]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
66200216d3fe7200963daddef2bcb744ea1b9387
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "fold-const.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "calls.h"
39 #include "stor-layout.h"
40 #include "varasm.h"
41 #include "tm_p.h"
42 #include "regs.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
46 #include "output.h"
47 #include "insn-codes.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "except.h"
51 #include "function.h"
52 #include "recog.h"
53 #include "hashtab.h"
54 #include "statistics.h"
55 #include "real.h"
56 #include "fixed-value.h"
57 #include "expmed.h"
58 #include "dojump.h"
59 #include "explow.h"
60 #include "emit-rtl.h"
61 #include "stmt.h"
62 #include "expr.h"
63 #include "optabs.h"
64 #include "diagnostic-core.h"
65 #include "toplev.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "ggc.h"
76 #include "target.h"
77 #include "target-def.h"
78 #include "common/common-target.h"
79 #include "langhooks.h"
80 #include "reload.h"
81 #include "hash-map.h"
82 #include "is-a.h"
83 #include "plugin-api.h"
84 #include "ipa-ref.h"
85 #include "cgraph.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "gimple.h"
93 #include "gimplify.h"
94 #include "cfgloop.h"
95 #include "dwarf2.h"
96 #include "df.h"
97 #include "tm-constrs.h"
98 #include "params.h"
99 #include "cselib.h"
100 #include "debug.h"
101 #include "sched-int.h"
102 #include "sbitmap.h"
103 #include "fibheap.h"
104 #include "opts.h"
105 #include "diagnostic.h"
106 #include "dumpfile.h"
107 #include "tree-pass.h"
108 #include "context.h"
109 #include "pass_manager.h"
110 #include "target-globals.h"
111 #include "tree-vectorizer.h"
112 #include "shrink-wrap.h"
113 #include "builtins.h"
114 #include "rtl-iter.h"
115 #include "tree-iterator.h"
116 #include "tree-chkp.h"
117 #include "rtl-chkp.h"
118
119 static rtx legitimize_dllimport_symbol (rtx, bool);
120 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
121 static rtx legitimize_pe_coff_symbol (rtx, bool);
122
123 #ifndef CHECK_STACK_LIMIT
124 #define CHECK_STACK_LIMIT (-1)
125 #endif
126
127 /* Return index of given mode in mult and division cost tables. */
128 #define MODE_INDEX(mode) \
129 ((mode) == QImode ? 0 \
130 : (mode) == HImode ? 1 \
131 : (mode) == SImode ? 2 \
132 : (mode) == DImode ? 3 \
133 : 4)
134
135 /* Processor costs (relative to an add) */
136 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
137 #define COSTS_N_BYTES(N) ((N) * 2)
138
139 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
140
141 static stringop_algs ix86_size_memcpy[2] = {
142 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
143 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
144 static stringop_algs ix86_size_memset[2] = {
145 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
146 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
147
148 const
149 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
150 COSTS_N_BYTES (2), /* cost of an add instruction */
151 COSTS_N_BYTES (3), /* cost of a lea instruction */
152 COSTS_N_BYTES (2), /* variable shift costs */
153 COSTS_N_BYTES (3), /* constant shift costs */
154 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
155 COSTS_N_BYTES (3), /* HI */
156 COSTS_N_BYTES (3), /* SI */
157 COSTS_N_BYTES (3), /* DI */
158 COSTS_N_BYTES (5)}, /* other */
159 0, /* cost of multiply per each bit set */
160 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
161 COSTS_N_BYTES (3), /* HI */
162 COSTS_N_BYTES (3), /* SI */
163 COSTS_N_BYTES (3), /* DI */
164 COSTS_N_BYTES (5)}, /* other */
165 COSTS_N_BYTES (3), /* cost of movsx */
166 COSTS_N_BYTES (3), /* cost of movzx */
167 0, /* "large" insn */
168 2, /* MOVE_RATIO */
169 2, /* cost for loading QImode using movzbl */
170 {2, 2, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 2, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {2, 2, 2}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {2, 2, 2}, /* cost of storing fp registers
178 in SFmode, DFmode and XFmode */
179 3, /* cost of moving MMX register */
180 {3, 3}, /* cost of loading MMX registers
181 in SImode and DImode */
182 {3, 3}, /* cost of storing MMX registers
183 in SImode and DImode */
184 3, /* cost of moving SSE register */
185 {3, 3, 3}, /* cost of loading SSE registers
186 in SImode, DImode and TImode */
187 {3, 3, 3}, /* cost of storing SSE registers
188 in SImode, DImode and TImode */
189 3, /* MMX or SSE register to integer */
190 0, /* size of l1 cache */
191 0, /* size of l2 cache */
192 0, /* size of prefetch block */
193 0, /* number of parallel prefetches */
194 2, /* Branch cost */
195 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
196 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
197 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
198 COSTS_N_BYTES (2), /* cost of FABS instruction. */
199 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
200 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
201 ix86_size_memcpy,
202 ix86_size_memset,
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 1, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 1, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
214 };
215
216 /* Processor costs (relative to an add) */
217 static stringop_algs i386_memcpy[2] = {
218 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
219 DUMMY_STRINGOP_ALGS};
220 static stringop_algs i386_memset[2] = {
221 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
222 DUMMY_STRINGOP_ALGS};
223
224 static const
225 struct processor_costs i386_cost = { /* 386 specific costs */
226 COSTS_N_INSNS (1), /* cost of an add instruction */
227 COSTS_N_INSNS (1), /* cost of a lea instruction */
228 COSTS_N_INSNS (3), /* variable shift costs */
229 COSTS_N_INSNS (2), /* constant shift costs */
230 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
231 COSTS_N_INSNS (6), /* HI */
232 COSTS_N_INSNS (6), /* SI */
233 COSTS_N_INSNS (6), /* DI */
234 COSTS_N_INSNS (6)}, /* other */
235 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
236 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
237 COSTS_N_INSNS (23), /* HI */
238 COSTS_N_INSNS (23), /* SI */
239 COSTS_N_INSNS (23), /* DI */
240 COSTS_N_INSNS (23)}, /* other */
241 COSTS_N_INSNS (3), /* cost of movsx */
242 COSTS_N_INSNS (2), /* cost of movzx */
243 15, /* "large" insn */
244 3, /* MOVE_RATIO */
245 4, /* cost for loading QImode using movzbl */
246 {2, 4, 2}, /* cost of loading integer registers
247 in QImode, HImode and SImode.
248 Relative to reg-reg move (2). */
249 {2, 4, 2}, /* cost of storing integer registers */
250 2, /* cost of reg,reg fld/fst */
251 {8, 8, 8}, /* cost of loading fp registers
252 in SFmode, DFmode and XFmode */
253 {8, 8, 8}, /* cost of storing fp registers
254 in SFmode, DFmode and XFmode */
255 2, /* cost of moving MMX register */
256 {4, 8}, /* cost of loading MMX registers
257 in SImode and DImode */
258 {4, 8}, /* cost of storing MMX registers
259 in SImode and DImode */
260 2, /* cost of moving SSE register */
261 {4, 8, 16}, /* cost of loading SSE registers
262 in SImode, DImode and TImode */
263 {4, 8, 16}, /* cost of storing SSE registers
264 in SImode, DImode and TImode */
265 3, /* MMX or SSE register to integer */
266 0, /* size of l1 cache */
267 0, /* size of l2 cache */
268 0, /* size of prefetch block */
269 0, /* number of parallel prefetches */
270 1, /* Branch cost */
271 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
272 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
273 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
274 COSTS_N_INSNS (22), /* cost of FABS instruction. */
275 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
276 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
277 i386_memcpy,
278 i386_memset,
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
290 };
291
292 static stringop_algs i486_memcpy[2] = {
293 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
294 DUMMY_STRINGOP_ALGS};
295 static stringop_algs i486_memset[2] = {
296 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
297 DUMMY_STRINGOP_ALGS};
298
299 static const
300 struct processor_costs i486_cost = { /* 486 specific costs */
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (3), /* variable shift costs */
304 COSTS_N_INSNS (2), /* constant shift costs */
305 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (12), /* HI */
307 COSTS_N_INSNS (12), /* SI */
308 COSTS_N_INSNS (12), /* DI */
309 COSTS_N_INSNS (12)}, /* other */
310 1, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (40), /* HI */
313 COSTS_N_INSNS (40), /* SI */
314 COSTS_N_INSNS (40), /* DI */
315 COSTS_N_INSNS (40)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 15, /* "large" insn */
319 3, /* MOVE_RATIO */
320 4, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {8, 8, 8}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {8, 8, 8}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 2, /* cost of moving MMX register */
331 {4, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {4, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 4, /* size of l1 cache. 486 has 8kB cache
342 shared for code and data, so 4kB is
343 not really precise. */
344 4, /* size of l2 cache */
345 0, /* size of prefetch block */
346 0, /* number of parallel prefetches */
347 1, /* Branch cost */
348 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (3), /* cost of FABS instruction. */
352 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
354 i486_memcpy,
355 i486_memset,
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
367 };
368
369 static stringop_algs pentium_memcpy[2] = {
370 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
371 DUMMY_STRINGOP_ALGS};
372 static stringop_algs pentium_memset[2] = {
373 {libcall, {{-1, rep_prefix_4_byte, false}}},
374 DUMMY_STRINGOP_ALGS};
375
376 static const
377 struct processor_costs pentium_cost = {
378 COSTS_N_INSNS (1), /* cost of an add instruction */
379 COSTS_N_INSNS (1), /* cost of a lea instruction */
380 COSTS_N_INSNS (4), /* variable shift costs */
381 COSTS_N_INSNS (1), /* constant shift costs */
382 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
383 COSTS_N_INSNS (11), /* HI */
384 COSTS_N_INSNS (11), /* SI */
385 COSTS_N_INSNS (11), /* DI */
386 COSTS_N_INSNS (11)}, /* other */
387 0, /* cost of multiply per each bit set */
388 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
389 COSTS_N_INSNS (25), /* HI */
390 COSTS_N_INSNS (25), /* SI */
391 COSTS_N_INSNS (25), /* DI */
392 COSTS_N_INSNS (25)}, /* other */
393 COSTS_N_INSNS (3), /* cost of movsx */
394 COSTS_N_INSNS (2), /* cost of movzx */
395 8, /* "large" insn */
396 6, /* MOVE_RATIO */
397 6, /* cost for loading QImode using movzbl */
398 {2, 4, 2}, /* cost of loading integer registers
399 in QImode, HImode and SImode.
400 Relative to reg-reg move (2). */
401 {2, 4, 2}, /* cost of storing integer registers */
402 2, /* cost of reg,reg fld/fst */
403 {2, 2, 6}, /* cost of loading fp registers
404 in SFmode, DFmode and XFmode */
405 {4, 4, 6}, /* cost of storing fp registers
406 in SFmode, DFmode and XFmode */
407 8, /* cost of moving MMX register */
408 {8, 8}, /* cost of loading MMX registers
409 in SImode and DImode */
410 {8, 8}, /* cost of storing MMX registers
411 in SImode and DImode */
412 2, /* cost of moving SSE register */
413 {4, 8, 16}, /* cost of loading SSE registers
414 in SImode, DImode and TImode */
415 {4, 8, 16}, /* cost of storing SSE registers
416 in SImode, DImode and TImode */
417 3, /* MMX or SSE register to integer */
418 8, /* size of l1 cache. */
419 8, /* size of l2 cache */
420 0, /* size of prefetch block */
421 0, /* number of parallel prefetches */
422 2, /* Branch cost */
423 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
424 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
425 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
426 COSTS_N_INSNS (1), /* cost of FABS instruction. */
427 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
428 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
429 pentium_memcpy,
430 pentium_memset,
431 1, /* scalar_stmt_cost. */
432 1, /* scalar load_cost. */
433 1, /* scalar_store_cost. */
434 1, /* vec_stmt_cost. */
435 1, /* vec_to_scalar_cost. */
436 1, /* scalar_to_vec_cost. */
437 1, /* vec_align_load_cost. */
438 2, /* vec_unalign_load_cost. */
439 1, /* vec_store_cost. */
440 3, /* cond_taken_branch_cost. */
441 1, /* cond_not_taken_branch_cost. */
442 };
443
444 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
445 (we ensure the alignment). For small blocks inline loop is still a
446 noticeable win, for bigger blocks either rep movsl or rep movsb is
447 way to go. Rep movsb has apparently more expensive startup time in CPU,
448 but after 4K the difference is down in the noise. */
449 static stringop_algs pentiumpro_memcpy[2] = {
450 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
451 {8192, rep_prefix_4_byte, false},
452 {-1, rep_prefix_1_byte, false}}},
453 DUMMY_STRINGOP_ALGS};
454 static stringop_algs pentiumpro_memset[2] = {
455 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
456 {8192, rep_prefix_4_byte, false},
457 {-1, libcall, false}}},
458 DUMMY_STRINGOP_ALGS};
459 static const
460 struct processor_costs pentiumpro_cost = {
461 COSTS_N_INSNS (1), /* cost of an add instruction */
462 COSTS_N_INSNS (1), /* cost of a lea instruction */
463 COSTS_N_INSNS (1), /* variable shift costs */
464 COSTS_N_INSNS (1), /* constant shift costs */
465 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
466 COSTS_N_INSNS (4), /* HI */
467 COSTS_N_INSNS (4), /* SI */
468 COSTS_N_INSNS (4), /* DI */
469 COSTS_N_INSNS (4)}, /* other */
470 0, /* cost of multiply per each bit set */
471 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
472 COSTS_N_INSNS (17), /* HI */
473 COSTS_N_INSNS (17), /* SI */
474 COSTS_N_INSNS (17), /* DI */
475 COSTS_N_INSNS (17)}, /* other */
476 COSTS_N_INSNS (1), /* cost of movsx */
477 COSTS_N_INSNS (1), /* cost of movzx */
478 8, /* "large" insn */
479 6, /* MOVE_RATIO */
480 2, /* cost for loading QImode using movzbl */
481 {4, 4, 4}, /* cost of loading integer registers
482 in QImode, HImode and SImode.
483 Relative to reg-reg move (2). */
484 {2, 2, 2}, /* cost of storing integer registers */
485 2, /* cost of reg,reg fld/fst */
486 {2, 2, 6}, /* cost of loading fp registers
487 in SFmode, DFmode and XFmode */
488 {4, 4, 6}, /* cost of storing fp registers
489 in SFmode, DFmode and XFmode */
490 2, /* cost of moving MMX register */
491 {2, 2}, /* cost of loading MMX registers
492 in SImode and DImode */
493 {2, 2}, /* cost of storing MMX registers
494 in SImode and DImode */
495 2, /* cost of moving SSE register */
496 {2, 2, 8}, /* cost of loading SSE registers
497 in SImode, DImode and TImode */
498 {2, 2, 8}, /* cost of storing SSE registers
499 in SImode, DImode and TImode */
500 3, /* MMX or SSE register to integer */
501 8, /* size of l1 cache. */
502 256, /* size of l2 cache */
503 32, /* size of prefetch block */
504 6, /* number of parallel prefetches */
505 2, /* Branch cost */
506 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
507 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
508 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
509 COSTS_N_INSNS (2), /* cost of FABS instruction. */
510 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
511 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
512 pentiumpro_memcpy,
513 pentiumpro_memset,
514 1, /* scalar_stmt_cost. */
515 1, /* scalar load_cost. */
516 1, /* scalar_store_cost. */
517 1, /* vec_stmt_cost. */
518 1, /* vec_to_scalar_cost. */
519 1, /* scalar_to_vec_cost. */
520 1, /* vec_align_load_cost. */
521 2, /* vec_unalign_load_cost. */
522 1, /* vec_store_cost. */
523 3, /* cond_taken_branch_cost. */
524 1, /* cond_not_taken_branch_cost. */
525 };
526
527 static stringop_algs geode_memcpy[2] = {
528 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
529 DUMMY_STRINGOP_ALGS};
530 static stringop_algs geode_memset[2] = {
531 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
532 DUMMY_STRINGOP_ALGS};
533 static const
534 struct processor_costs geode_cost = {
535 COSTS_N_INSNS (1), /* cost of an add instruction */
536 COSTS_N_INSNS (1), /* cost of a lea instruction */
537 COSTS_N_INSNS (2), /* variable shift costs */
538 COSTS_N_INSNS (1), /* constant shift costs */
539 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
540 COSTS_N_INSNS (4), /* HI */
541 COSTS_N_INSNS (7), /* SI */
542 COSTS_N_INSNS (7), /* DI */
543 COSTS_N_INSNS (7)}, /* other */
544 0, /* cost of multiply per each bit set */
545 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
546 COSTS_N_INSNS (23), /* HI */
547 COSTS_N_INSNS (39), /* SI */
548 COSTS_N_INSNS (39), /* DI */
549 COSTS_N_INSNS (39)}, /* other */
550 COSTS_N_INSNS (1), /* cost of movsx */
551 COSTS_N_INSNS (1), /* cost of movzx */
552 8, /* "large" insn */
553 4, /* MOVE_RATIO */
554 1, /* cost for loading QImode using movzbl */
555 {1, 1, 1}, /* cost of loading integer registers
556 in QImode, HImode and SImode.
557 Relative to reg-reg move (2). */
558 {1, 1, 1}, /* cost of storing integer registers */
559 1, /* cost of reg,reg fld/fst */
560 {1, 1, 1}, /* cost of loading fp registers
561 in SFmode, DFmode and XFmode */
562 {4, 6, 6}, /* cost of storing fp registers
563 in SFmode, DFmode and XFmode */
564
565 1, /* cost of moving MMX register */
566 {1, 1}, /* cost of loading MMX registers
567 in SImode and DImode */
568 {1, 1}, /* cost of storing MMX registers
569 in SImode and DImode */
570 1, /* cost of moving SSE register */
571 {1, 1, 1}, /* cost of loading SSE registers
572 in SImode, DImode and TImode */
573 {1, 1, 1}, /* cost of storing SSE registers
574 in SImode, DImode and TImode */
575 1, /* MMX or SSE register to integer */
576 64, /* size of l1 cache. */
577 128, /* size of l2 cache. */
578 32, /* size of prefetch block */
579 1, /* number of parallel prefetches */
580 1, /* Branch cost */
581 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
582 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
583 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
584 COSTS_N_INSNS (1), /* cost of FABS instruction. */
585 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
586 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
587 geode_memcpy,
588 geode_memset,
589 1, /* scalar_stmt_cost. */
590 1, /* scalar load_cost. */
591 1, /* scalar_store_cost. */
592 1, /* vec_stmt_cost. */
593 1, /* vec_to_scalar_cost. */
594 1, /* scalar_to_vec_cost. */
595 1, /* vec_align_load_cost. */
596 2, /* vec_unalign_load_cost. */
597 1, /* vec_store_cost. */
598 3, /* cond_taken_branch_cost. */
599 1, /* cond_not_taken_branch_cost. */
600 };
601
602 static stringop_algs k6_memcpy[2] = {
603 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
604 DUMMY_STRINGOP_ALGS};
605 static stringop_algs k6_memset[2] = {
606 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
607 DUMMY_STRINGOP_ALGS};
608 static const
609 struct processor_costs k6_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (2), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (3), /* HI */
616 COSTS_N_INSNS (3), /* SI */
617 COSTS_N_INSNS (3), /* DI */
618 COSTS_N_INSNS (3)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (18), /* HI */
622 COSTS_N_INSNS (18), /* SI */
623 COSTS_N_INSNS (18), /* DI */
624 COSTS_N_INSNS (18)}, /* other */
625 COSTS_N_INSNS (2), /* cost of movsx */
626 COSTS_N_INSNS (2), /* cost of movzx */
627 8, /* "large" insn */
628 4, /* MOVE_RATIO */
629 3, /* cost for loading QImode using movzbl */
630 {4, 5, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {2, 3, 2}, /* cost of storing integer registers */
634 4, /* cost of reg,reg fld/fst */
635 {6, 6, 6}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 2, /* cost of moving MMX register */
640 {2, 2}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {2, 2}, /* cost of storing MMX registers
643 in SImode and DImode */
644 2, /* cost of moving SSE register */
645 {2, 2, 8}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {2, 2, 8}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 6, /* MMX or SSE register to integer */
650 32, /* size of l1 cache. */
651 32, /* size of l2 cache. Some models
652 have integrated l2 cache, but
653 optimizing for k6 is not important
654 enough to worry about that. */
655 32, /* size of prefetch block */
656 1, /* number of parallel prefetches */
657 1, /* Branch cost */
658 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
659 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
660 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
661 COSTS_N_INSNS (2), /* cost of FABS instruction. */
662 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
663 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
664 k6_memcpy,
665 k6_memset,
666 1, /* scalar_stmt_cost. */
667 1, /* scalar load_cost. */
668 1, /* scalar_store_cost. */
669 1, /* vec_stmt_cost. */
670 1, /* vec_to_scalar_cost. */
671 1, /* scalar_to_vec_cost. */
672 1, /* vec_align_load_cost. */
673 2, /* vec_unalign_load_cost. */
674 1, /* vec_store_cost. */
675 3, /* cond_taken_branch_cost. */
676 1, /* cond_not_taken_branch_cost. */
677 };
678
679 /* For some reason, Athlon deals better with REP prefix (relative to loops)
680 compared to K8. Alignment becomes important after 8 bytes for memcpy and
681 128 bytes for memset. */
682 static stringop_algs athlon_memcpy[2] = {
683 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
684 DUMMY_STRINGOP_ALGS};
685 static stringop_algs athlon_memset[2] = {
686 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
687 DUMMY_STRINGOP_ALGS};
688 static const
689 struct processor_costs athlon_cost = {
690 COSTS_N_INSNS (1), /* cost of an add instruction */
691 COSTS_N_INSNS (2), /* cost of a lea instruction */
692 COSTS_N_INSNS (1), /* variable shift costs */
693 COSTS_N_INSNS (1), /* constant shift costs */
694 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
695 COSTS_N_INSNS (5), /* HI */
696 COSTS_N_INSNS (5), /* SI */
697 COSTS_N_INSNS (5), /* DI */
698 COSTS_N_INSNS (5)}, /* other */
699 0, /* cost of multiply per each bit set */
700 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
701 COSTS_N_INSNS (26), /* HI */
702 COSTS_N_INSNS (42), /* SI */
703 COSTS_N_INSNS (74), /* DI */
704 COSTS_N_INSNS (74)}, /* other */
705 COSTS_N_INSNS (1), /* cost of movsx */
706 COSTS_N_INSNS (1), /* cost of movzx */
707 8, /* "large" insn */
708 9, /* MOVE_RATIO */
709 4, /* cost for loading QImode using movzbl */
710 {3, 4, 3}, /* cost of loading integer registers
711 in QImode, HImode and SImode.
712 Relative to reg-reg move (2). */
713 {3, 4, 3}, /* cost of storing integer registers */
714 4, /* cost of reg,reg fld/fst */
715 {4, 4, 12}, /* cost of loading fp registers
716 in SFmode, DFmode and XFmode */
717 {6, 6, 8}, /* cost of storing fp registers
718 in SFmode, DFmode and XFmode */
719 2, /* cost of moving MMX register */
720 {4, 4}, /* cost of loading MMX registers
721 in SImode and DImode */
722 {4, 4}, /* cost of storing MMX registers
723 in SImode and DImode */
724 2, /* cost of moving SSE register */
725 {4, 4, 6}, /* cost of loading SSE registers
726 in SImode, DImode and TImode */
727 {4, 4, 5}, /* cost of storing SSE registers
728 in SImode, DImode and TImode */
729 5, /* MMX or SSE register to integer */
730 64, /* size of l1 cache. */
731 256, /* size of l2 cache. */
732 64, /* size of prefetch block */
733 6, /* number of parallel prefetches */
734 5, /* Branch cost */
735 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
736 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
737 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
738 COSTS_N_INSNS (2), /* cost of FABS instruction. */
739 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
740 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
741 athlon_memcpy,
742 athlon_memset,
743 1, /* scalar_stmt_cost. */
744 1, /* scalar load_cost. */
745 1, /* scalar_store_cost. */
746 1, /* vec_stmt_cost. */
747 1, /* vec_to_scalar_cost. */
748 1, /* scalar_to_vec_cost. */
749 1, /* vec_align_load_cost. */
750 2, /* vec_unalign_load_cost. */
751 1, /* vec_store_cost. */
752 3, /* cond_taken_branch_cost. */
753 1, /* cond_not_taken_branch_cost. */
754 };
755
756 /* K8 has optimized REP instruction for medium sized blocks, but for very
757 small blocks it is better to use loop. For large blocks, libcall can
758 do nontemporary accesses and beat inline considerably. */
759 static stringop_algs k8_memcpy[2] = {
760 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
761 {-1, rep_prefix_4_byte, false}}},
762 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
763 {-1, libcall, false}}}};
764 static stringop_algs k8_memset[2] = {
765 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
766 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
767 {libcall, {{48, unrolled_loop, false},
768 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
769 static const
770 struct processor_costs k8_cost = {
771 COSTS_N_INSNS (1), /* cost of an add instruction */
772 COSTS_N_INSNS (2), /* cost of a lea instruction */
773 COSTS_N_INSNS (1), /* variable shift costs */
774 COSTS_N_INSNS (1), /* constant shift costs */
775 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
776 COSTS_N_INSNS (4), /* HI */
777 COSTS_N_INSNS (3), /* SI */
778 COSTS_N_INSNS (4), /* DI */
779 COSTS_N_INSNS (5)}, /* other */
780 0, /* cost of multiply per each bit set */
781 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
782 COSTS_N_INSNS (26), /* HI */
783 COSTS_N_INSNS (42), /* SI */
784 COSTS_N_INSNS (74), /* DI */
785 COSTS_N_INSNS (74)}, /* other */
786 COSTS_N_INSNS (1), /* cost of movsx */
787 COSTS_N_INSNS (1), /* cost of movzx */
788 8, /* "large" insn */
789 9, /* MOVE_RATIO */
790 4, /* cost for loading QImode using movzbl */
791 {3, 4, 3}, /* cost of loading integer registers
792 in QImode, HImode and SImode.
793 Relative to reg-reg move (2). */
794 {3, 4, 3}, /* cost of storing integer registers */
795 4, /* cost of reg,reg fld/fst */
796 {4, 4, 12}, /* cost of loading fp registers
797 in SFmode, DFmode and XFmode */
798 {6, 6, 8}, /* cost of storing fp registers
799 in SFmode, DFmode and XFmode */
800 2, /* cost of moving MMX register */
801 {3, 3}, /* cost of loading MMX registers
802 in SImode and DImode */
803 {4, 4}, /* cost of storing MMX registers
804 in SImode and DImode */
805 2, /* cost of moving SSE register */
806 {4, 3, 6}, /* cost of loading SSE registers
807 in SImode, DImode and TImode */
808 {4, 4, 5}, /* cost of storing SSE registers
809 in SImode, DImode and TImode */
810 5, /* MMX or SSE register to integer */
811 64, /* size of l1 cache. */
812 512, /* size of l2 cache. */
813 64, /* size of prefetch block */
814 /* New AMD processors never drop prefetches; if they cannot be performed
815 immediately, they are queued. We set number of simultaneous prefetches
816 to a large constant to reflect this (it probably is not a good idea not
817 to limit number of prefetches at all, as their execution also takes some
818 time). */
819 100, /* number of parallel prefetches */
820 3, /* Branch cost */
821 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
822 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
823 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
824 COSTS_N_INSNS (2), /* cost of FABS instruction. */
825 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
826 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
827
828 k8_memcpy,
829 k8_memset,
830 4, /* scalar_stmt_cost. */
831 2, /* scalar load_cost. */
832 2, /* scalar_store_cost. */
833 5, /* vec_stmt_cost. */
834 0, /* vec_to_scalar_cost. */
835 2, /* scalar_to_vec_cost. */
836 2, /* vec_align_load_cost. */
837 3, /* vec_unalign_load_cost. */
838 3, /* vec_store_cost. */
839 3, /* cond_taken_branch_cost. */
840 2, /* cond_not_taken_branch_cost. */
841 };
842
843 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
844 very small blocks it is better to use loop. For large blocks, libcall can
845 do nontemporary accesses and beat inline considerably. */
846 static stringop_algs amdfam10_memcpy[2] = {
847 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
848 {-1, rep_prefix_4_byte, false}}},
849 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
850 {-1, libcall, false}}}};
851 static stringop_algs amdfam10_memset[2] = {
852 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
853 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
854 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
855 {-1, libcall, false}}}};
856 struct processor_costs amdfam10_cost = {
857 COSTS_N_INSNS (1), /* cost of an add instruction */
858 COSTS_N_INSNS (2), /* cost of a lea instruction */
859 COSTS_N_INSNS (1), /* variable shift costs */
860 COSTS_N_INSNS (1), /* constant shift costs */
861 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
862 COSTS_N_INSNS (4), /* HI */
863 COSTS_N_INSNS (3), /* SI */
864 COSTS_N_INSNS (4), /* DI */
865 COSTS_N_INSNS (5)}, /* other */
866 0, /* cost of multiply per each bit set */
867 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
868 COSTS_N_INSNS (35), /* HI */
869 COSTS_N_INSNS (51), /* SI */
870 COSTS_N_INSNS (83), /* DI */
871 COSTS_N_INSNS (83)}, /* other */
872 COSTS_N_INSNS (1), /* cost of movsx */
873 COSTS_N_INSNS (1), /* cost of movzx */
874 8, /* "large" insn */
875 9, /* MOVE_RATIO */
876 4, /* cost for loading QImode using movzbl */
877 {3, 4, 3}, /* cost of loading integer registers
878 in QImode, HImode and SImode.
879 Relative to reg-reg move (2). */
880 {3, 4, 3}, /* cost of storing integer registers */
881 4, /* cost of reg,reg fld/fst */
882 {4, 4, 12}, /* cost of loading fp registers
883 in SFmode, DFmode and XFmode */
884 {6, 6, 8}, /* cost of storing fp registers
885 in SFmode, DFmode and XFmode */
886 2, /* cost of moving MMX register */
887 {3, 3}, /* cost of loading MMX registers
888 in SImode and DImode */
889 {4, 4}, /* cost of storing MMX registers
890 in SImode and DImode */
891 2, /* cost of moving SSE register */
892 {4, 4, 3}, /* cost of loading SSE registers
893 in SImode, DImode and TImode */
894 {4, 4, 5}, /* cost of storing SSE registers
895 in SImode, DImode and TImode */
896 3, /* MMX or SSE register to integer */
897 /* On K8:
898 MOVD reg64, xmmreg Double FSTORE 4
899 MOVD reg32, xmmreg Double FSTORE 4
900 On AMDFAM10:
901 MOVD reg64, xmmreg Double FADD 3
902 1/1 1/1
903 MOVD reg32, xmmreg Double FADD 3
904 1/1 1/1 */
905 64, /* size of l1 cache. */
906 512, /* size of l2 cache. */
907 64, /* size of prefetch block */
908 /* New AMD processors never drop prefetches; if they cannot be performed
909 immediately, they are queued. We set number of simultaneous prefetches
910 to a large constant to reflect this (it probably is not a good idea not
911 to limit number of prefetches at all, as their execution also takes some
912 time). */
913 100, /* number of parallel prefetches */
914 2, /* Branch cost */
915 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
916 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
917 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
918 COSTS_N_INSNS (2), /* cost of FABS instruction. */
919 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
920 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
921
922 amdfam10_memcpy,
923 amdfam10_memset,
924 4, /* scalar_stmt_cost. */
925 2, /* scalar load_cost. */
926 2, /* scalar_store_cost. */
927 6, /* vec_stmt_cost. */
928 0, /* vec_to_scalar_cost. */
929 2, /* scalar_to_vec_cost. */
930 2, /* vec_align_load_cost. */
931 2, /* vec_unalign_load_cost. */
932 2, /* vec_store_cost. */
933 2, /* cond_taken_branch_cost. */
934 1, /* cond_not_taken_branch_cost. */
935 };
936
937 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
938 very small blocks it is better to use loop. For large blocks, libcall
939 can do nontemporary accesses and beat inline considerably. */
940 static stringop_algs bdver1_memcpy[2] = {
941 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
942 {-1, rep_prefix_4_byte, false}}},
943 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
944 {-1, libcall, false}}}};
945 static stringop_algs bdver1_memset[2] = {
946 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
947 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
948 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
949 {-1, libcall, false}}}};
950
951 const struct processor_costs bdver1_cost = {
952 COSTS_N_INSNS (1), /* cost of an add instruction */
953 COSTS_N_INSNS (1), /* cost of a lea instruction */
954 COSTS_N_INSNS (1), /* variable shift costs */
955 COSTS_N_INSNS (1), /* constant shift costs */
956 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
957 COSTS_N_INSNS (4), /* HI */
958 COSTS_N_INSNS (4), /* SI */
959 COSTS_N_INSNS (6), /* DI */
960 COSTS_N_INSNS (6)}, /* other */
961 0, /* cost of multiply per each bit set */
962 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
963 COSTS_N_INSNS (35), /* HI */
964 COSTS_N_INSNS (51), /* SI */
965 COSTS_N_INSNS (83), /* DI */
966 COSTS_N_INSNS (83)}, /* other */
967 COSTS_N_INSNS (1), /* cost of movsx */
968 COSTS_N_INSNS (1), /* cost of movzx */
969 8, /* "large" insn */
970 9, /* MOVE_RATIO */
971 4, /* cost for loading QImode using movzbl */
972 {5, 5, 4}, /* cost of loading integer registers
973 in QImode, HImode and SImode.
974 Relative to reg-reg move (2). */
975 {4, 4, 4}, /* cost of storing integer registers */
976 2, /* cost of reg,reg fld/fst */
977 {5, 5, 12}, /* cost of loading fp registers
978 in SFmode, DFmode and XFmode */
979 {4, 4, 8}, /* cost of storing fp registers
980 in SFmode, DFmode and XFmode */
981 2, /* cost of moving MMX register */
982 {4, 4}, /* cost of loading MMX registers
983 in SImode and DImode */
984 {4, 4}, /* cost of storing MMX registers
985 in SImode and DImode */
986 2, /* cost of moving SSE register */
987 {4, 4, 4}, /* cost of loading SSE registers
988 in SImode, DImode and TImode */
989 {4, 4, 4}, /* cost of storing SSE registers
990 in SImode, DImode and TImode */
991 2, /* MMX or SSE register to integer */
992 /* On K8:
993 MOVD reg64, xmmreg Double FSTORE 4
994 MOVD reg32, xmmreg Double FSTORE 4
995 On AMDFAM10:
996 MOVD reg64, xmmreg Double FADD 3
997 1/1 1/1
998 MOVD reg32, xmmreg Double FADD 3
999 1/1 1/1 */
1000 16, /* size of l1 cache. */
1001 2048, /* size of l2 cache. */
1002 64, /* size of prefetch block */
1003 /* New AMD processors never drop prefetches; if they cannot be performed
1004 immediately, they are queued. We set number of simultaneous prefetches
1005 to a large constant to reflect this (it probably is not a good idea not
1006 to limit number of prefetches at all, as their execution also takes some
1007 time). */
1008 100, /* number of parallel prefetches */
1009 2, /* Branch cost */
1010 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1011 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1012 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1013 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1014 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1015 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1016
1017 bdver1_memcpy,
1018 bdver1_memset,
1019 6, /* scalar_stmt_cost. */
1020 4, /* scalar load_cost. */
1021 4, /* scalar_store_cost. */
1022 6, /* vec_stmt_cost. */
1023 0, /* vec_to_scalar_cost. */
1024 2, /* scalar_to_vec_cost. */
1025 4, /* vec_align_load_cost. */
1026 4, /* vec_unalign_load_cost. */
1027 4, /* vec_store_cost. */
1028 2, /* cond_taken_branch_cost. */
1029 1, /* cond_not_taken_branch_cost. */
1030 };
1031
1032 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1033 very small blocks it is better to use loop. For large blocks, libcall
1034 can do nontemporary accesses and beat inline considerably. */
1035
1036 static stringop_algs bdver2_memcpy[2] = {
1037 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1038 {-1, rep_prefix_4_byte, false}}},
1039 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1040 {-1, libcall, false}}}};
1041 static stringop_algs bdver2_memset[2] = {
1042 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1043 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1044 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1045 {-1, libcall, false}}}};
1046
1047 const struct processor_costs bdver2_cost = {
1048 COSTS_N_INSNS (1), /* cost of an add instruction */
1049 COSTS_N_INSNS (1), /* cost of a lea instruction */
1050 COSTS_N_INSNS (1), /* variable shift costs */
1051 COSTS_N_INSNS (1), /* constant shift costs */
1052 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1053 COSTS_N_INSNS (4), /* HI */
1054 COSTS_N_INSNS (4), /* SI */
1055 COSTS_N_INSNS (6), /* DI */
1056 COSTS_N_INSNS (6)}, /* other */
1057 0, /* cost of multiply per each bit set */
1058 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1059 COSTS_N_INSNS (35), /* HI */
1060 COSTS_N_INSNS (51), /* SI */
1061 COSTS_N_INSNS (83), /* DI */
1062 COSTS_N_INSNS (83)}, /* other */
1063 COSTS_N_INSNS (1), /* cost of movsx */
1064 COSTS_N_INSNS (1), /* cost of movzx */
1065 8, /* "large" insn */
1066 9, /* MOVE_RATIO */
1067 4, /* cost for loading QImode using movzbl */
1068 {5, 5, 4}, /* cost of loading integer registers
1069 in QImode, HImode and SImode.
1070 Relative to reg-reg move (2). */
1071 {4, 4, 4}, /* cost of storing integer registers */
1072 2, /* cost of reg,reg fld/fst */
1073 {5, 5, 12}, /* cost of loading fp registers
1074 in SFmode, DFmode and XFmode */
1075 {4, 4, 8}, /* cost of storing fp registers
1076 in SFmode, DFmode and XFmode */
1077 2, /* cost of moving MMX register */
1078 {4, 4}, /* cost of loading MMX registers
1079 in SImode and DImode */
1080 {4, 4}, /* cost of storing MMX registers
1081 in SImode and DImode */
1082 2, /* cost of moving SSE register */
1083 {4, 4, 4}, /* cost of loading SSE registers
1084 in SImode, DImode and TImode */
1085 {4, 4, 4}, /* cost of storing SSE registers
1086 in SImode, DImode and TImode */
1087 2, /* MMX or SSE register to integer */
1088 /* On K8:
1089 MOVD reg64, xmmreg Double FSTORE 4
1090 MOVD reg32, xmmreg Double FSTORE 4
1091 On AMDFAM10:
1092 MOVD reg64, xmmreg Double FADD 3
1093 1/1 1/1
1094 MOVD reg32, xmmreg Double FADD 3
1095 1/1 1/1 */
1096 16, /* size of l1 cache. */
1097 2048, /* size of l2 cache. */
1098 64, /* size of prefetch block */
1099 /* New AMD processors never drop prefetches; if they cannot be performed
1100 immediately, they are queued. We set number of simultaneous prefetches
1101 to a large constant to reflect this (it probably is not a good idea not
1102 to limit number of prefetches at all, as their execution also takes some
1103 time). */
1104 100, /* number of parallel prefetches */
1105 2, /* Branch cost */
1106 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1107 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1108 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1109 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1110 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1111 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1112
1113 bdver2_memcpy,
1114 bdver2_memset,
1115 6, /* scalar_stmt_cost. */
1116 4, /* scalar load_cost. */
1117 4, /* scalar_store_cost. */
1118 6, /* vec_stmt_cost. */
1119 0, /* vec_to_scalar_cost. */
1120 2, /* scalar_to_vec_cost. */
1121 4, /* vec_align_load_cost. */
1122 4, /* vec_unalign_load_cost. */
1123 4, /* vec_store_cost. */
1124 2, /* cond_taken_branch_cost. */
1125 1, /* cond_not_taken_branch_cost. */
1126 };
1127
1128
1129 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1130 very small blocks it is better to use loop. For large blocks, libcall
1131 can do nontemporary accesses and beat inline considerably. */
1132 static stringop_algs bdver3_memcpy[2] = {
1133 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1134 {-1, rep_prefix_4_byte, false}}},
1135 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1136 {-1, libcall, false}}}};
1137 static stringop_algs bdver3_memset[2] = {
1138 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1139 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1140 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1141 {-1, libcall, false}}}};
1142 struct processor_costs bdver3_cost = {
1143 COSTS_N_INSNS (1), /* cost of an add instruction */
1144 COSTS_N_INSNS (1), /* cost of a lea instruction */
1145 COSTS_N_INSNS (1), /* variable shift costs */
1146 COSTS_N_INSNS (1), /* constant shift costs */
1147 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1148 COSTS_N_INSNS (4), /* HI */
1149 COSTS_N_INSNS (4), /* SI */
1150 COSTS_N_INSNS (6), /* DI */
1151 COSTS_N_INSNS (6)}, /* other */
1152 0, /* cost of multiply per each bit set */
1153 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1154 COSTS_N_INSNS (35), /* HI */
1155 COSTS_N_INSNS (51), /* SI */
1156 COSTS_N_INSNS (83), /* DI */
1157 COSTS_N_INSNS (83)}, /* other */
1158 COSTS_N_INSNS (1), /* cost of movsx */
1159 COSTS_N_INSNS (1), /* cost of movzx */
1160 8, /* "large" insn */
1161 9, /* MOVE_RATIO */
1162 4, /* cost for loading QImode using movzbl */
1163 {5, 5, 4}, /* cost of loading integer registers
1164 in QImode, HImode and SImode.
1165 Relative to reg-reg move (2). */
1166 {4, 4, 4}, /* cost of storing integer registers */
1167 2, /* cost of reg,reg fld/fst */
1168 {5, 5, 12}, /* cost of loading fp registers
1169 in SFmode, DFmode and XFmode */
1170 {4, 4, 8}, /* cost of storing fp registers
1171 in SFmode, DFmode and XFmode */
1172 2, /* cost of moving MMX register */
1173 {4, 4}, /* cost of loading MMX registers
1174 in SImode and DImode */
1175 {4, 4}, /* cost of storing MMX registers
1176 in SImode and DImode */
1177 2, /* cost of moving SSE register */
1178 {4, 4, 4}, /* cost of loading SSE registers
1179 in SImode, DImode and TImode */
1180 {4, 4, 4}, /* cost of storing SSE registers
1181 in SImode, DImode and TImode */
1182 2, /* MMX or SSE register to integer */
1183 16, /* size of l1 cache. */
1184 2048, /* size of l2 cache. */
1185 64, /* size of prefetch block */
1186 /* New AMD processors never drop prefetches; if they cannot be performed
1187 immediately, they are queued. We set number of simultaneous prefetches
1188 to a large constant to reflect this (it probably is not a good idea not
1189 to limit number of prefetches at all, as their execution also takes some
1190 time). */
1191 100, /* number of parallel prefetches */
1192 2, /* Branch cost */
1193 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1194 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1195 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1196 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1197 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1198 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1199
1200 bdver3_memcpy,
1201 bdver3_memset,
1202 6, /* scalar_stmt_cost. */
1203 4, /* scalar load_cost. */
1204 4, /* scalar_store_cost. */
1205 6, /* vec_stmt_cost. */
1206 0, /* vec_to_scalar_cost. */
1207 2, /* scalar_to_vec_cost. */
1208 4, /* vec_align_load_cost. */
1209 4, /* vec_unalign_load_cost. */
1210 4, /* vec_store_cost. */
1211 2, /* cond_taken_branch_cost. */
1212 1, /* cond_not_taken_branch_cost. */
1213 };
1214
1215 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1216 very small blocks it is better to use loop. For large blocks, libcall
1217 can do nontemporary accesses and beat inline considerably. */
1218 static stringop_algs bdver4_memcpy[2] = {
1219 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1220 {-1, rep_prefix_4_byte, false}}},
1221 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1222 {-1, libcall, false}}}};
1223 static stringop_algs bdver4_memset[2] = {
1224 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1225 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1226 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1227 {-1, libcall, false}}}};
1228 struct processor_costs bdver4_cost = {
1229 COSTS_N_INSNS (1), /* cost of an add instruction */
1230 COSTS_N_INSNS (1), /* cost of a lea instruction */
1231 COSTS_N_INSNS (1), /* variable shift costs */
1232 COSTS_N_INSNS (1), /* constant shift costs */
1233 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1234 COSTS_N_INSNS (4), /* HI */
1235 COSTS_N_INSNS (4), /* SI */
1236 COSTS_N_INSNS (6), /* DI */
1237 COSTS_N_INSNS (6)}, /* other */
1238 0, /* cost of multiply per each bit set */
1239 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1240 COSTS_N_INSNS (35), /* HI */
1241 COSTS_N_INSNS (51), /* SI */
1242 COSTS_N_INSNS (83), /* DI */
1243 COSTS_N_INSNS (83)}, /* other */
1244 COSTS_N_INSNS (1), /* cost of movsx */
1245 COSTS_N_INSNS (1), /* cost of movzx */
1246 8, /* "large" insn */
1247 9, /* MOVE_RATIO */
1248 4, /* cost for loading QImode using movzbl */
1249 {5, 5, 4}, /* cost of loading integer registers
1250 in QImode, HImode and SImode.
1251 Relative to reg-reg move (2). */
1252 {4, 4, 4}, /* cost of storing integer registers */
1253 2, /* cost of reg,reg fld/fst */
1254 {5, 5, 12}, /* cost of loading fp registers
1255 in SFmode, DFmode and XFmode */
1256 {4, 4, 8}, /* cost of storing fp registers
1257 in SFmode, DFmode and XFmode */
1258 2, /* cost of moving MMX register */
1259 {4, 4}, /* cost of loading MMX registers
1260 in SImode and DImode */
1261 {4, 4}, /* cost of storing MMX registers
1262 in SImode and DImode */
1263 2, /* cost of moving SSE register */
1264 {4, 4, 4}, /* cost of loading SSE registers
1265 in SImode, DImode and TImode */
1266 {4, 4, 4}, /* cost of storing SSE registers
1267 in SImode, DImode and TImode */
1268 2, /* MMX or SSE register to integer */
1269 16, /* size of l1 cache. */
1270 2048, /* size of l2 cache. */
1271 64, /* size of prefetch block */
1272 /* New AMD processors never drop prefetches; if they cannot be performed
1273 immediately, they are queued. We set number of simultaneous prefetches
1274 to a large constant to reflect this (it probably is not a good idea not
1275 to limit number of prefetches at all, as their execution also takes some
1276 time). */
1277 100, /* number of parallel prefetches */
1278 2, /* Branch cost */
1279 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1280 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1281 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1282 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1283 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1284 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1285
1286 bdver4_memcpy,
1287 bdver4_memset,
1288 6, /* scalar_stmt_cost. */
1289 4, /* scalar load_cost. */
1290 4, /* scalar_store_cost. */
1291 6, /* vec_stmt_cost. */
1292 0, /* vec_to_scalar_cost. */
1293 2, /* scalar_to_vec_cost. */
1294 4, /* vec_align_load_cost. */
1295 4, /* vec_unalign_load_cost. */
1296 4, /* vec_store_cost. */
1297 2, /* cond_taken_branch_cost. */
1298 1, /* cond_not_taken_branch_cost. */
1299 };
1300
1301 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1302 very small blocks it is better to use loop. For large blocks, libcall can
1303 do nontemporary accesses and beat inline considerably. */
1304 static stringop_algs btver1_memcpy[2] = {
1305 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1306 {-1, rep_prefix_4_byte, false}}},
1307 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1308 {-1, libcall, false}}}};
1309 static stringop_algs btver1_memset[2] = {
1310 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1311 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1312 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1313 {-1, libcall, false}}}};
1314 const struct processor_costs btver1_cost = {
1315 COSTS_N_INSNS (1), /* cost of an add instruction */
1316 COSTS_N_INSNS (2), /* cost of a lea instruction */
1317 COSTS_N_INSNS (1), /* variable shift costs */
1318 COSTS_N_INSNS (1), /* constant shift costs */
1319 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1320 COSTS_N_INSNS (4), /* HI */
1321 COSTS_N_INSNS (3), /* SI */
1322 COSTS_N_INSNS (4), /* DI */
1323 COSTS_N_INSNS (5)}, /* other */
1324 0, /* cost of multiply per each bit set */
1325 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1326 COSTS_N_INSNS (35), /* HI */
1327 COSTS_N_INSNS (51), /* SI */
1328 COSTS_N_INSNS (83), /* DI */
1329 COSTS_N_INSNS (83)}, /* other */
1330 COSTS_N_INSNS (1), /* cost of movsx */
1331 COSTS_N_INSNS (1), /* cost of movzx */
1332 8, /* "large" insn */
1333 9, /* MOVE_RATIO */
1334 4, /* cost for loading QImode using movzbl */
1335 {3, 4, 3}, /* cost of loading integer registers
1336 in QImode, HImode and SImode.
1337 Relative to reg-reg move (2). */
1338 {3, 4, 3}, /* cost of storing integer registers */
1339 4, /* cost of reg,reg fld/fst */
1340 {4, 4, 12}, /* cost of loading fp registers
1341 in SFmode, DFmode and XFmode */
1342 {6, 6, 8}, /* cost of storing fp registers
1343 in SFmode, DFmode and XFmode */
1344 2, /* cost of moving MMX register */
1345 {3, 3}, /* cost of loading MMX registers
1346 in SImode and DImode */
1347 {4, 4}, /* cost of storing MMX registers
1348 in SImode and DImode */
1349 2, /* cost of moving SSE register */
1350 {4, 4, 3}, /* cost of loading SSE registers
1351 in SImode, DImode and TImode */
1352 {4, 4, 5}, /* cost of storing SSE registers
1353 in SImode, DImode and TImode */
1354 3, /* MMX or SSE register to integer */
1355 /* On K8:
1356 MOVD reg64, xmmreg Double FSTORE 4
1357 MOVD reg32, xmmreg Double FSTORE 4
1358 On AMDFAM10:
1359 MOVD reg64, xmmreg Double FADD 3
1360 1/1 1/1
1361 MOVD reg32, xmmreg Double FADD 3
1362 1/1 1/1 */
1363 32, /* size of l1 cache. */
1364 512, /* size of l2 cache. */
1365 64, /* size of prefetch block */
1366 100, /* number of parallel prefetches */
1367 2, /* Branch cost */
1368 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1369 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1370 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1371 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1372 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1373 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1374
1375 btver1_memcpy,
1376 btver1_memset,
1377 4, /* scalar_stmt_cost. */
1378 2, /* scalar load_cost. */
1379 2, /* scalar_store_cost. */
1380 6, /* vec_stmt_cost. */
1381 0, /* vec_to_scalar_cost. */
1382 2, /* scalar_to_vec_cost. */
1383 2, /* vec_align_load_cost. */
1384 2, /* vec_unalign_load_cost. */
1385 2, /* vec_store_cost. */
1386 2, /* cond_taken_branch_cost. */
1387 1, /* cond_not_taken_branch_cost. */
1388 };
1389
1390 static stringop_algs btver2_memcpy[2] = {
1391 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1392 {-1, rep_prefix_4_byte, false}}},
1393 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1394 {-1, libcall, false}}}};
1395 static stringop_algs btver2_memset[2] = {
1396 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1397 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1398 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1399 {-1, libcall, false}}}};
1400 const struct processor_costs btver2_cost = {
1401 COSTS_N_INSNS (1), /* cost of an add instruction */
1402 COSTS_N_INSNS (2), /* cost of a lea instruction */
1403 COSTS_N_INSNS (1), /* variable shift costs */
1404 COSTS_N_INSNS (1), /* constant shift costs */
1405 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1406 COSTS_N_INSNS (4), /* HI */
1407 COSTS_N_INSNS (3), /* SI */
1408 COSTS_N_INSNS (4), /* DI */
1409 COSTS_N_INSNS (5)}, /* other */
1410 0, /* cost of multiply per each bit set */
1411 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1412 COSTS_N_INSNS (35), /* HI */
1413 COSTS_N_INSNS (51), /* SI */
1414 COSTS_N_INSNS (83), /* DI */
1415 COSTS_N_INSNS (83)}, /* other */
1416 COSTS_N_INSNS (1), /* cost of movsx */
1417 COSTS_N_INSNS (1), /* cost of movzx */
1418 8, /* "large" insn */
1419 9, /* MOVE_RATIO */
1420 4, /* cost for loading QImode using movzbl */
1421 {3, 4, 3}, /* cost of loading integer registers
1422 in QImode, HImode and SImode.
1423 Relative to reg-reg move (2). */
1424 {3, 4, 3}, /* cost of storing integer registers */
1425 4, /* cost of reg,reg fld/fst */
1426 {4, 4, 12}, /* cost of loading fp registers
1427 in SFmode, DFmode and XFmode */
1428 {6, 6, 8}, /* cost of storing fp registers
1429 in SFmode, DFmode and XFmode */
1430 2, /* cost of moving MMX register */
1431 {3, 3}, /* cost of loading MMX registers
1432 in SImode and DImode */
1433 {4, 4}, /* cost of storing MMX registers
1434 in SImode and DImode */
1435 2, /* cost of moving SSE register */
1436 {4, 4, 3}, /* cost of loading SSE registers
1437 in SImode, DImode and TImode */
1438 {4, 4, 5}, /* cost of storing SSE registers
1439 in SImode, DImode and TImode */
1440 3, /* MMX or SSE register to integer */
1441 /* On K8:
1442 MOVD reg64, xmmreg Double FSTORE 4
1443 MOVD reg32, xmmreg Double FSTORE 4
1444 On AMDFAM10:
1445 MOVD reg64, xmmreg Double FADD 3
1446 1/1 1/1
1447 MOVD reg32, xmmreg Double FADD 3
1448 1/1 1/1 */
1449 32, /* size of l1 cache. */
1450 2048, /* size of l2 cache. */
1451 64, /* size of prefetch block */
1452 100, /* number of parallel prefetches */
1453 2, /* Branch cost */
1454 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1455 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1456 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1457 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1458 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1459 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1460 btver2_memcpy,
1461 btver2_memset,
1462 4, /* scalar_stmt_cost. */
1463 2, /* scalar load_cost. */
1464 2, /* scalar_store_cost. */
1465 6, /* vec_stmt_cost. */
1466 0, /* vec_to_scalar_cost. */
1467 2, /* scalar_to_vec_cost. */
1468 2, /* vec_align_load_cost. */
1469 2, /* vec_unalign_load_cost. */
1470 2, /* vec_store_cost. */
1471 2, /* cond_taken_branch_cost. */
1472 1, /* cond_not_taken_branch_cost. */
1473 };
1474
1475 static stringop_algs pentium4_memcpy[2] = {
1476 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1477 DUMMY_STRINGOP_ALGS};
1478 static stringop_algs pentium4_memset[2] = {
1479 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1480 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1481 DUMMY_STRINGOP_ALGS};
1482
1483 static const
1484 struct processor_costs pentium4_cost = {
1485 COSTS_N_INSNS (1), /* cost of an add instruction */
1486 COSTS_N_INSNS (3), /* cost of a lea instruction */
1487 COSTS_N_INSNS (4), /* variable shift costs */
1488 COSTS_N_INSNS (4), /* constant shift costs */
1489 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1490 COSTS_N_INSNS (15), /* HI */
1491 COSTS_N_INSNS (15), /* SI */
1492 COSTS_N_INSNS (15), /* DI */
1493 COSTS_N_INSNS (15)}, /* other */
1494 0, /* cost of multiply per each bit set */
1495 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1496 COSTS_N_INSNS (56), /* HI */
1497 COSTS_N_INSNS (56), /* SI */
1498 COSTS_N_INSNS (56), /* DI */
1499 COSTS_N_INSNS (56)}, /* other */
1500 COSTS_N_INSNS (1), /* cost of movsx */
1501 COSTS_N_INSNS (1), /* cost of movzx */
1502 16, /* "large" insn */
1503 6, /* MOVE_RATIO */
1504 2, /* cost for loading QImode using movzbl */
1505 {4, 5, 4}, /* cost of loading integer registers
1506 in QImode, HImode and SImode.
1507 Relative to reg-reg move (2). */
1508 {2, 3, 2}, /* cost of storing integer registers */
1509 2, /* cost of reg,reg fld/fst */
1510 {2, 2, 6}, /* cost of loading fp registers
1511 in SFmode, DFmode and XFmode */
1512 {4, 4, 6}, /* cost of storing fp registers
1513 in SFmode, DFmode and XFmode */
1514 2, /* cost of moving MMX register */
1515 {2, 2}, /* cost of loading MMX registers
1516 in SImode and DImode */
1517 {2, 2}, /* cost of storing MMX registers
1518 in SImode and DImode */
1519 12, /* cost of moving SSE register */
1520 {12, 12, 12}, /* cost of loading SSE registers
1521 in SImode, DImode and TImode */
1522 {2, 2, 8}, /* cost of storing SSE registers
1523 in SImode, DImode and TImode */
1524 10, /* MMX or SSE register to integer */
1525 8, /* size of l1 cache. */
1526 256, /* size of l2 cache. */
1527 64, /* size of prefetch block */
1528 6, /* number of parallel prefetches */
1529 2, /* Branch cost */
1530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1536 pentium4_memcpy,
1537 pentium4_memset,
1538 1, /* scalar_stmt_cost. */
1539 1, /* scalar load_cost. */
1540 1, /* scalar_store_cost. */
1541 1, /* vec_stmt_cost. */
1542 1, /* vec_to_scalar_cost. */
1543 1, /* scalar_to_vec_cost. */
1544 1, /* vec_align_load_cost. */
1545 2, /* vec_unalign_load_cost. */
1546 1, /* vec_store_cost. */
1547 3, /* cond_taken_branch_cost. */
1548 1, /* cond_not_taken_branch_cost. */
1549 };
1550
1551 static stringop_algs nocona_memcpy[2] = {
1552 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1553 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1554 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1555
1556 static stringop_algs nocona_memset[2] = {
1557 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1558 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1559 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1560 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1561
1562 static const
1563 struct processor_costs nocona_cost = {
1564 COSTS_N_INSNS (1), /* cost of an add instruction */
1565 COSTS_N_INSNS (1), /* cost of a lea instruction */
1566 COSTS_N_INSNS (1), /* variable shift costs */
1567 COSTS_N_INSNS (1), /* constant shift costs */
1568 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1569 COSTS_N_INSNS (10), /* HI */
1570 COSTS_N_INSNS (10), /* SI */
1571 COSTS_N_INSNS (10), /* DI */
1572 COSTS_N_INSNS (10)}, /* other */
1573 0, /* cost of multiply per each bit set */
1574 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1575 COSTS_N_INSNS (66), /* HI */
1576 COSTS_N_INSNS (66), /* SI */
1577 COSTS_N_INSNS (66), /* DI */
1578 COSTS_N_INSNS (66)}, /* other */
1579 COSTS_N_INSNS (1), /* cost of movsx */
1580 COSTS_N_INSNS (1), /* cost of movzx */
1581 16, /* "large" insn */
1582 17, /* MOVE_RATIO */
1583 4, /* cost for loading QImode using movzbl */
1584 {4, 4, 4}, /* cost of loading integer registers
1585 in QImode, HImode and SImode.
1586 Relative to reg-reg move (2). */
1587 {4, 4, 4}, /* cost of storing integer registers */
1588 3, /* cost of reg,reg fld/fst */
1589 {12, 12, 12}, /* cost of loading fp registers
1590 in SFmode, DFmode and XFmode */
1591 {4, 4, 4}, /* cost of storing fp registers
1592 in SFmode, DFmode and XFmode */
1593 6, /* cost of moving MMX register */
1594 {12, 12}, /* cost of loading MMX registers
1595 in SImode and DImode */
1596 {12, 12}, /* cost of storing MMX registers
1597 in SImode and DImode */
1598 6, /* cost of moving SSE register */
1599 {12, 12, 12}, /* cost of loading SSE registers
1600 in SImode, DImode and TImode */
1601 {12, 12, 12}, /* cost of storing SSE registers
1602 in SImode, DImode and TImode */
1603 8, /* MMX or SSE register to integer */
1604 8, /* size of l1 cache. */
1605 1024, /* size of l2 cache. */
1606 64, /* size of prefetch block */
1607 8, /* number of parallel prefetches */
1608 1, /* Branch cost */
1609 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1610 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1611 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1612 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1613 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1614 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1615 nocona_memcpy,
1616 nocona_memset,
1617 1, /* scalar_stmt_cost. */
1618 1, /* scalar load_cost. */
1619 1, /* scalar_store_cost. */
1620 1, /* vec_stmt_cost. */
1621 1, /* vec_to_scalar_cost. */
1622 1, /* scalar_to_vec_cost. */
1623 1, /* vec_align_load_cost. */
1624 2, /* vec_unalign_load_cost. */
1625 1, /* vec_store_cost. */
1626 3, /* cond_taken_branch_cost. */
1627 1, /* cond_not_taken_branch_cost. */
1628 };
1629
1630 static stringop_algs atom_memcpy[2] = {
1631 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1632 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1633 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1634 static stringop_algs atom_memset[2] = {
1635 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1636 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1637 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1638 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1639 static const
1640 struct processor_costs atom_cost = {
1641 COSTS_N_INSNS (1), /* cost of an add instruction */
1642 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1643 COSTS_N_INSNS (1), /* variable shift costs */
1644 COSTS_N_INSNS (1), /* constant shift costs */
1645 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1646 COSTS_N_INSNS (4), /* HI */
1647 COSTS_N_INSNS (3), /* SI */
1648 COSTS_N_INSNS (4), /* DI */
1649 COSTS_N_INSNS (2)}, /* other */
1650 0, /* cost of multiply per each bit set */
1651 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1652 COSTS_N_INSNS (26), /* HI */
1653 COSTS_N_INSNS (42), /* SI */
1654 COSTS_N_INSNS (74), /* DI */
1655 COSTS_N_INSNS (74)}, /* other */
1656 COSTS_N_INSNS (1), /* cost of movsx */
1657 COSTS_N_INSNS (1), /* cost of movzx */
1658 8, /* "large" insn */
1659 17, /* MOVE_RATIO */
1660 4, /* cost for loading QImode using movzbl */
1661 {4, 4, 4}, /* cost of loading integer registers
1662 in QImode, HImode and SImode.
1663 Relative to reg-reg move (2). */
1664 {4, 4, 4}, /* cost of storing integer registers */
1665 4, /* cost of reg,reg fld/fst */
1666 {12, 12, 12}, /* cost of loading fp registers
1667 in SFmode, DFmode and XFmode */
1668 {6, 6, 8}, /* cost of storing fp registers
1669 in SFmode, DFmode and XFmode */
1670 2, /* cost of moving MMX register */
1671 {8, 8}, /* cost of loading MMX registers
1672 in SImode and DImode */
1673 {8, 8}, /* cost of storing MMX registers
1674 in SImode and DImode */
1675 2, /* cost of moving SSE register */
1676 {8, 8, 8}, /* cost of loading SSE registers
1677 in SImode, DImode and TImode */
1678 {8, 8, 8}, /* cost of storing SSE registers
1679 in SImode, DImode and TImode */
1680 5, /* MMX or SSE register to integer */
1681 32, /* size of l1 cache. */
1682 256, /* size of l2 cache. */
1683 64, /* size of prefetch block */
1684 6, /* number of parallel prefetches */
1685 3, /* Branch cost */
1686 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1687 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1688 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1689 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1690 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1691 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1692 atom_memcpy,
1693 atom_memset,
1694 1, /* scalar_stmt_cost. */
1695 1, /* scalar load_cost. */
1696 1, /* scalar_store_cost. */
1697 1, /* vec_stmt_cost. */
1698 1, /* vec_to_scalar_cost. */
1699 1, /* scalar_to_vec_cost. */
1700 1, /* vec_align_load_cost. */
1701 2, /* vec_unalign_load_cost. */
1702 1, /* vec_store_cost. */
1703 3, /* cond_taken_branch_cost. */
1704 1, /* cond_not_taken_branch_cost. */
1705 };
1706
1707 static stringop_algs slm_memcpy[2] = {
1708 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1709 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1710 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1711 static stringop_algs slm_memset[2] = {
1712 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1713 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1714 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1715 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1716 static const
1717 struct processor_costs slm_cost = {
1718 COSTS_N_INSNS (1), /* cost of an add instruction */
1719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1720 COSTS_N_INSNS (1), /* variable shift costs */
1721 COSTS_N_INSNS (1), /* constant shift costs */
1722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1723 COSTS_N_INSNS (3), /* HI */
1724 COSTS_N_INSNS (3), /* SI */
1725 COSTS_N_INSNS (4), /* DI */
1726 COSTS_N_INSNS (2)}, /* other */
1727 0, /* cost of multiply per each bit set */
1728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1729 COSTS_N_INSNS (26), /* HI */
1730 COSTS_N_INSNS (42), /* SI */
1731 COSTS_N_INSNS (74), /* DI */
1732 COSTS_N_INSNS (74)}, /* other */
1733 COSTS_N_INSNS (1), /* cost of movsx */
1734 COSTS_N_INSNS (1), /* cost of movzx */
1735 8, /* "large" insn */
1736 17, /* MOVE_RATIO */
1737 4, /* cost for loading QImode using movzbl */
1738 {4, 4, 4}, /* cost of loading integer registers
1739 in QImode, HImode and SImode.
1740 Relative to reg-reg move (2). */
1741 {4, 4, 4}, /* cost of storing integer registers */
1742 4, /* cost of reg,reg fld/fst */
1743 {12, 12, 12}, /* cost of loading fp registers
1744 in SFmode, DFmode and XFmode */
1745 {6, 6, 8}, /* cost of storing fp registers
1746 in SFmode, DFmode and XFmode */
1747 2, /* cost of moving MMX register */
1748 {8, 8}, /* cost of loading MMX registers
1749 in SImode and DImode */
1750 {8, 8}, /* cost of storing MMX registers
1751 in SImode and DImode */
1752 2, /* cost of moving SSE register */
1753 {8, 8, 8}, /* cost of loading SSE registers
1754 in SImode, DImode and TImode */
1755 {8, 8, 8}, /* cost of storing SSE registers
1756 in SImode, DImode and TImode */
1757 5, /* MMX or SSE register to integer */
1758 32, /* size of l1 cache. */
1759 256, /* size of l2 cache. */
1760 64, /* size of prefetch block */
1761 6, /* number of parallel prefetches */
1762 3, /* Branch cost */
1763 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1764 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1765 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1766 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1767 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1768 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1769 slm_memcpy,
1770 slm_memset,
1771 1, /* scalar_stmt_cost. */
1772 1, /* scalar load_cost. */
1773 1, /* scalar_store_cost. */
1774 1, /* vec_stmt_cost. */
1775 4, /* vec_to_scalar_cost. */
1776 1, /* scalar_to_vec_cost. */
1777 1, /* vec_align_load_cost. */
1778 2, /* vec_unalign_load_cost. */
1779 1, /* vec_store_cost. */
1780 3, /* cond_taken_branch_cost. */
1781 1, /* cond_not_taken_branch_cost. */
1782 };
1783
1784 static stringop_algs intel_memcpy[2] = {
1785 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1786 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1787 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1788 static stringop_algs intel_memset[2] = {
1789 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1790 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1791 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1792 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1793 static const
1794 struct processor_costs intel_cost = {
1795 COSTS_N_INSNS (1), /* cost of an add instruction */
1796 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1797 COSTS_N_INSNS (1), /* variable shift costs */
1798 COSTS_N_INSNS (1), /* constant shift costs */
1799 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1800 COSTS_N_INSNS (3), /* HI */
1801 COSTS_N_INSNS (3), /* SI */
1802 COSTS_N_INSNS (4), /* DI */
1803 COSTS_N_INSNS (2)}, /* other */
1804 0, /* cost of multiply per each bit set */
1805 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1806 COSTS_N_INSNS (26), /* HI */
1807 COSTS_N_INSNS (42), /* SI */
1808 COSTS_N_INSNS (74), /* DI */
1809 COSTS_N_INSNS (74)}, /* other */
1810 COSTS_N_INSNS (1), /* cost of movsx */
1811 COSTS_N_INSNS (1), /* cost of movzx */
1812 8, /* "large" insn */
1813 17, /* MOVE_RATIO */
1814 4, /* cost for loading QImode using movzbl */
1815 {4, 4, 4}, /* cost of loading integer registers
1816 in QImode, HImode and SImode.
1817 Relative to reg-reg move (2). */
1818 {4, 4, 4}, /* cost of storing integer registers */
1819 4, /* cost of reg,reg fld/fst */
1820 {12, 12, 12}, /* cost of loading fp registers
1821 in SFmode, DFmode and XFmode */
1822 {6, 6, 8}, /* cost of storing fp registers
1823 in SFmode, DFmode and XFmode */
1824 2, /* cost of moving MMX register */
1825 {8, 8}, /* cost of loading MMX registers
1826 in SImode and DImode */
1827 {8, 8}, /* cost of storing MMX registers
1828 in SImode and DImode */
1829 2, /* cost of moving SSE register */
1830 {8, 8, 8}, /* cost of loading SSE registers
1831 in SImode, DImode and TImode */
1832 {8, 8, 8}, /* cost of storing SSE registers
1833 in SImode, DImode and TImode */
1834 5, /* MMX or SSE register to integer */
1835 32, /* size of l1 cache. */
1836 256, /* size of l2 cache. */
1837 64, /* size of prefetch block */
1838 6, /* number of parallel prefetches */
1839 3, /* Branch cost */
1840 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1841 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1842 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1843 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1844 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1845 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1846 intel_memcpy,
1847 intel_memset,
1848 1, /* scalar_stmt_cost. */
1849 1, /* scalar load_cost. */
1850 1, /* scalar_store_cost. */
1851 1, /* vec_stmt_cost. */
1852 4, /* vec_to_scalar_cost. */
1853 1, /* scalar_to_vec_cost. */
1854 1, /* vec_align_load_cost. */
1855 2, /* vec_unalign_load_cost. */
1856 1, /* vec_store_cost. */
1857 3, /* cond_taken_branch_cost. */
1858 1, /* cond_not_taken_branch_cost. */
1859 };
1860
1861 /* Generic should produce code tuned for Core-i7 (and newer chips)
1862 and btver1 (and newer chips). */
1863
1864 static stringop_algs generic_memcpy[2] = {
1865 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1866 {-1, libcall, false}}},
1867 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1868 {-1, libcall, false}}}};
1869 static stringop_algs generic_memset[2] = {
1870 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1871 {-1, libcall, false}}},
1872 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1873 {-1, libcall, false}}}};
1874 static const
1875 struct processor_costs generic_cost = {
1876 COSTS_N_INSNS (1), /* cost of an add instruction */
1877 /* On all chips taken into consideration lea is 2 cycles and more. With
1878 this cost however our current implementation of synth_mult results in
1879 use of unnecessary temporary registers causing regression on several
1880 SPECfp benchmarks. */
1881 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1882 COSTS_N_INSNS (1), /* variable shift costs */
1883 COSTS_N_INSNS (1), /* constant shift costs */
1884 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1885 COSTS_N_INSNS (4), /* HI */
1886 COSTS_N_INSNS (3), /* SI */
1887 COSTS_N_INSNS (4), /* DI */
1888 COSTS_N_INSNS (2)}, /* other */
1889 0, /* cost of multiply per each bit set */
1890 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1891 COSTS_N_INSNS (26), /* HI */
1892 COSTS_N_INSNS (42), /* SI */
1893 COSTS_N_INSNS (74), /* DI */
1894 COSTS_N_INSNS (74)}, /* other */
1895 COSTS_N_INSNS (1), /* cost of movsx */
1896 COSTS_N_INSNS (1), /* cost of movzx */
1897 8, /* "large" insn */
1898 17, /* MOVE_RATIO */
1899 4, /* cost for loading QImode using movzbl */
1900 {4, 4, 4}, /* cost of loading integer registers
1901 in QImode, HImode and SImode.
1902 Relative to reg-reg move (2). */
1903 {4, 4, 4}, /* cost of storing integer registers */
1904 4, /* cost of reg,reg fld/fst */
1905 {12, 12, 12}, /* cost of loading fp registers
1906 in SFmode, DFmode and XFmode */
1907 {6, 6, 8}, /* cost of storing fp registers
1908 in SFmode, DFmode and XFmode */
1909 2, /* cost of moving MMX register */
1910 {8, 8}, /* cost of loading MMX registers
1911 in SImode and DImode */
1912 {8, 8}, /* cost of storing MMX registers
1913 in SImode and DImode */
1914 2, /* cost of moving SSE register */
1915 {8, 8, 8}, /* cost of loading SSE registers
1916 in SImode, DImode and TImode */
1917 {8, 8, 8}, /* cost of storing SSE registers
1918 in SImode, DImode and TImode */
1919 5, /* MMX or SSE register to integer */
1920 32, /* size of l1 cache. */
1921 512, /* size of l2 cache. */
1922 64, /* size of prefetch block */
1923 6, /* number of parallel prefetches */
1924 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1925 value is increased to perhaps more appropriate value of 5. */
1926 3, /* Branch cost */
1927 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1928 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1929 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1930 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1931 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1932 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1933 generic_memcpy,
1934 generic_memset,
1935 1, /* scalar_stmt_cost. */
1936 1, /* scalar load_cost. */
1937 1, /* scalar_store_cost. */
1938 1, /* vec_stmt_cost. */
1939 1, /* vec_to_scalar_cost. */
1940 1, /* scalar_to_vec_cost. */
1941 1, /* vec_align_load_cost. */
1942 2, /* vec_unalign_load_cost. */
1943 1, /* vec_store_cost. */
1944 3, /* cond_taken_branch_cost. */
1945 1, /* cond_not_taken_branch_cost. */
1946 };
1947
1948 /* core_cost should produce code tuned for Core familly of CPUs. */
1949 static stringop_algs core_memcpy[2] = {
1950 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1951 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1952 {-1, libcall, false}}}};
1953 static stringop_algs core_memset[2] = {
1954 {libcall, {{6, loop_1_byte, true},
1955 {24, loop, true},
1956 {8192, rep_prefix_4_byte, true},
1957 {-1, libcall, false}}},
1958 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1959 {-1, libcall, false}}}};
1960
1961 static const
1962 struct processor_costs core_cost = {
1963 COSTS_N_INSNS (1), /* cost of an add instruction */
1964 /* On all chips taken into consideration lea is 2 cycles and more. With
1965 this cost however our current implementation of synth_mult results in
1966 use of unnecessary temporary registers causing regression on several
1967 SPECfp benchmarks. */
1968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1969 COSTS_N_INSNS (1), /* variable shift costs */
1970 COSTS_N_INSNS (1), /* constant shift costs */
1971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1972 COSTS_N_INSNS (4), /* HI */
1973 COSTS_N_INSNS (3), /* SI */
1974 COSTS_N_INSNS (4), /* DI */
1975 COSTS_N_INSNS (2)}, /* other */
1976 0, /* cost of multiply per each bit set */
1977 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1978 COSTS_N_INSNS (26), /* HI */
1979 COSTS_N_INSNS (42), /* SI */
1980 COSTS_N_INSNS (74), /* DI */
1981 COSTS_N_INSNS (74)}, /* other */
1982 COSTS_N_INSNS (1), /* cost of movsx */
1983 COSTS_N_INSNS (1), /* cost of movzx */
1984 8, /* "large" insn */
1985 17, /* MOVE_RATIO */
1986 4, /* cost for loading QImode using movzbl */
1987 {4, 4, 4}, /* cost of loading integer registers
1988 in QImode, HImode and SImode.
1989 Relative to reg-reg move (2). */
1990 {4, 4, 4}, /* cost of storing integer registers */
1991 4, /* cost of reg,reg fld/fst */
1992 {12, 12, 12}, /* cost of loading fp registers
1993 in SFmode, DFmode and XFmode */
1994 {6, 6, 8}, /* cost of storing fp registers
1995 in SFmode, DFmode and XFmode */
1996 2, /* cost of moving MMX register */
1997 {8, 8}, /* cost of loading MMX registers
1998 in SImode and DImode */
1999 {8, 8}, /* cost of storing MMX registers
2000 in SImode and DImode */
2001 2, /* cost of moving SSE register */
2002 {8, 8, 8}, /* cost of loading SSE registers
2003 in SImode, DImode and TImode */
2004 {8, 8, 8}, /* cost of storing SSE registers
2005 in SImode, DImode and TImode */
2006 5, /* MMX or SSE register to integer */
2007 64, /* size of l1 cache. */
2008 512, /* size of l2 cache. */
2009 64, /* size of prefetch block */
2010 6, /* number of parallel prefetches */
2011 /* FIXME perhaps more appropriate value is 5. */
2012 3, /* Branch cost */
2013 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2014 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2015 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2016 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2017 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2018 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2019 core_memcpy,
2020 core_memset,
2021 1, /* scalar_stmt_cost. */
2022 1, /* scalar load_cost. */
2023 1, /* scalar_store_cost. */
2024 1, /* vec_stmt_cost. */
2025 1, /* vec_to_scalar_cost. */
2026 1, /* scalar_to_vec_cost. */
2027 1, /* vec_align_load_cost. */
2028 2, /* vec_unalign_load_cost. */
2029 1, /* vec_store_cost. */
2030 3, /* cond_taken_branch_cost. */
2031 1, /* cond_not_taken_branch_cost. */
2032 };
2033
2034
2035 /* Set by -mtune. */
2036 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2037
2038 /* Set by -mtune or -Os. */
2039 const struct processor_costs *ix86_cost = &pentium_cost;
2040
2041 /* Processor feature/optimization bitmasks. */
2042 #define m_386 (1<<PROCESSOR_I386)
2043 #define m_486 (1<<PROCESSOR_I486)
2044 #define m_PENT (1<<PROCESSOR_PENTIUM)
2045 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2046 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2047 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2048 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2049 #define m_CORE2 (1<<PROCESSOR_CORE2)
2050 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2051 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2052 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2053 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2054 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2055 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2056 #define m_KNL (1<<PROCESSOR_KNL)
2057 #define m_INTEL (1<<PROCESSOR_INTEL)
2058
2059 #define m_GEODE (1<<PROCESSOR_GEODE)
2060 #define m_K6 (1<<PROCESSOR_K6)
2061 #define m_K6_GEODE (m_K6 | m_GEODE)
2062 #define m_K8 (1<<PROCESSOR_K8)
2063 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2064 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2065 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2066 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2067 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2068 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2069 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2070 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2071 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2072 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2073 #define m_BTVER (m_BTVER1 | m_BTVER2)
2074 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2075
2076 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2077
2078 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2079 #undef DEF_TUNE
2080 #define DEF_TUNE(tune, name, selector) name,
2081 #include "x86-tune.def"
2082 #undef DEF_TUNE
2083 };
2084
2085 /* Feature tests against the various tunings. */
2086 unsigned char ix86_tune_features[X86_TUNE_LAST];
2087
2088 /* Feature tests against the various tunings used to create ix86_tune_features
2089 based on the processor mask. */
2090 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2091 #undef DEF_TUNE
2092 #define DEF_TUNE(tune, name, selector) selector,
2093 #include "x86-tune.def"
2094 #undef DEF_TUNE
2095 };
2096
2097 /* Feature tests against the various architecture variations. */
2098 unsigned char ix86_arch_features[X86_ARCH_LAST];
2099
2100 /* Feature tests against the various architecture variations, used to create
2101 ix86_arch_features based on the processor mask. */
2102 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2103 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2104 ~(m_386 | m_486 | m_PENT | m_K6),
2105
2106 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2107 ~m_386,
2108
2109 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2110 ~(m_386 | m_486),
2111
2112 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2113 ~m_386,
2114
2115 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2116 ~m_386,
2117 };
2118
2119 /* In case the average insn count for single function invocation is
2120 lower than this constant, emit fast (but longer) prologue and
2121 epilogue code. */
2122 #define FAST_PROLOGUE_INSN_COUNT 20
2123
2124 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2125 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2126 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2127 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2128
2129 /* Array of the smallest class containing reg number REGNO, indexed by
2130 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2131
2132 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2133 {
2134 /* ax, dx, cx, bx */
2135 AREG, DREG, CREG, BREG,
2136 /* si, di, bp, sp */
2137 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2138 /* FP registers */
2139 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2140 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2141 /* arg pointer */
2142 NON_Q_REGS,
2143 /* flags, fpsr, fpcr, frame */
2144 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2145 /* SSE registers */
2146 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2147 SSE_REGS, SSE_REGS,
2148 /* MMX registers */
2149 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2150 MMX_REGS, MMX_REGS,
2151 /* REX registers */
2152 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 /* SSE REX registers */
2155 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2156 SSE_REGS, SSE_REGS,
2157 /* AVX-512 SSE registers */
2158 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2159 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2160 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2161 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2162 /* Mask registers. */
2163 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2164 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2165 /* MPX bound registers */
2166 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2167 };
2168
2169 /* The "default" register map used in 32bit mode. */
2170
2171 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2172 {
2173 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2174 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2175 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2176 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2177 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2178 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2179 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2180 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2181 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2182 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2183 101, 102, 103, 104, /* bound registers */
2184 };
2185
2186 /* The "default" register map used in 64bit mode. */
2187
2188 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2189 {
2190 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2191 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2192 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2193 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2194 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2195 8,9,10,11,12,13,14,15, /* extended integer registers */
2196 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2197 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2198 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2199 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2200 126, 127, 128, 129, /* bound registers */
2201 };
2202
2203 /* Define the register numbers to be used in Dwarf debugging information.
2204 The SVR4 reference port C compiler uses the following register numbers
2205 in its Dwarf output code:
2206 0 for %eax (gcc regno = 0)
2207 1 for %ecx (gcc regno = 2)
2208 2 for %edx (gcc regno = 1)
2209 3 for %ebx (gcc regno = 3)
2210 4 for %esp (gcc regno = 7)
2211 5 for %ebp (gcc regno = 6)
2212 6 for %esi (gcc regno = 4)
2213 7 for %edi (gcc regno = 5)
2214 The following three DWARF register numbers are never generated by
2215 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2216 believes these numbers have these meanings.
2217 8 for %eip (no gcc equivalent)
2218 9 for %eflags (gcc regno = 17)
2219 10 for %trapno (no gcc equivalent)
2220 It is not at all clear how we should number the FP stack registers
2221 for the x86 architecture. If the version of SDB on x86/svr4 were
2222 a bit less brain dead with respect to floating-point then we would
2223 have a precedent to follow with respect to DWARF register numbers
2224 for x86 FP registers, but the SDB on x86/svr4 is so completely
2225 broken with respect to FP registers that it is hardly worth thinking
2226 of it as something to strive for compatibility with.
2227 The version of x86/svr4 SDB I have at the moment does (partially)
2228 seem to believe that DWARF register number 11 is associated with
2229 the x86 register %st(0), but that's about all. Higher DWARF
2230 register numbers don't seem to be associated with anything in
2231 particular, and even for DWARF regno 11, SDB only seems to under-
2232 stand that it should say that a variable lives in %st(0) (when
2233 asked via an `=' command) if we said it was in DWARF regno 11,
2234 but SDB still prints garbage when asked for the value of the
2235 variable in question (via a `/' command).
2236 (Also note that the labels SDB prints for various FP stack regs
2237 when doing an `x' command are all wrong.)
2238 Note that these problems generally don't affect the native SVR4
2239 C compiler because it doesn't allow the use of -O with -g and
2240 because when it is *not* optimizing, it allocates a memory
2241 location for each floating-point variable, and the memory
2242 location is what gets described in the DWARF AT_location
2243 attribute for the variable in question.
2244 Regardless of the severe mental illness of the x86/svr4 SDB, we
2245 do something sensible here and we use the following DWARF
2246 register numbers. Note that these are all stack-top-relative
2247 numbers.
2248 11 for %st(0) (gcc regno = 8)
2249 12 for %st(1) (gcc regno = 9)
2250 13 for %st(2) (gcc regno = 10)
2251 14 for %st(3) (gcc regno = 11)
2252 15 for %st(4) (gcc regno = 12)
2253 16 for %st(5) (gcc regno = 13)
2254 17 for %st(6) (gcc regno = 14)
2255 18 for %st(7) (gcc regno = 15)
2256 */
2257 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2258 {
2259 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2260 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2261 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2262 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2263 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2264 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2268 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2269 101, 102, 103, 104, /* bound registers */
2270 };
2271
2272 /* Define parameter passing and return registers. */
2273
2274 static int const x86_64_int_parameter_registers[6] =
2275 {
2276 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2277 };
2278
2279 static int const x86_64_ms_abi_int_parameter_registers[4] =
2280 {
2281 CX_REG, DX_REG, R8_REG, R9_REG
2282 };
2283
2284 static int const x86_64_int_return_registers[4] =
2285 {
2286 AX_REG, DX_REG, DI_REG, SI_REG
2287 };
2288
2289 /* Additional registers that are clobbered by SYSV calls. */
2290
2291 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2292 {
2293 SI_REG, DI_REG,
2294 XMM6_REG, XMM7_REG,
2295 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2296 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2297 };
2298
2299 /* Define the structure for the machine field in struct function. */
2300
2301 struct GTY(()) stack_local_entry {
2302 unsigned short mode;
2303 unsigned short n;
2304 rtx rtl;
2305 struct stack_local_entry *next;
2306 };
2307
2308 /* Structure describing stack frame layout.
2309 Stack grows downward:
2310
2311 [arguments]
2312 <- ARG_POINTER
2313 saved pc
2314
2315 saved static chain if ix86_static_chain_on_stack
2316
2317 saved frame pointer if frame_pointer_needed
2318 <- HARD_FRAME_POINTER
2319 [saved regs]
2320 <- regs_save_offset
2321 [padding0]
2322
2323 [saved SSE regs]
2324 <- sse_regs_save_offset
2325 [padding1] |
2326 | <- FRAME_POINTER
2327 [va_arg registers] |
2328 |
2329 [frame] |
2330 |
2331 [padding2] | = to_allocate
2332 <- STACK_POINTER
2333 */
2334 struct ix86_frame
2335 {
2336 int nsseregs;
2337 int nregs;
2338 int va_arg_size;
2339 int red_zone_size;
2340 int outgoing_arguments_size;
2341
2342 /* The offsets relative to ARG_POINTER. */
2343 HOST_WIDE_INT frame_pointer_offset;
2344 HOST_WIDE_INT hard_frame_pointer_offset;
2345 HOST_WIDE_INT stack_pointer_offset;
2346 HOST_WIDE_INT hfp_save_offset;
2347 HOST_WIDE_INT reg_save_offset;
2348 HOST_WIDE_INT sse_reg_save_offset;
2349
2350 /* When save_regs_using_mov is set, emit prologue using
2351 move instead of push instructions. */
2352 bool save_regs_using_mov;
2353 };
2354
2355 /* Which cpu are we scheduling for. */
2356 enum attr_cpu ix86_schedule;
2357
2358 /* Which cpu are we optimizing for. */
2359 enum processor_type ix86_tune;
2360
2361 /* Which instruction set architecture to use. */
2362 enum processor_type ix86_arch;
2363
2364 /* True if processor has SSE prefetch instruction. */
2365 unsigned char x86_prefetch_sse;
2366
2367 /* -mstackrealign option */
2368 static const char ix86_force_align_arg_pointer_string[]
2369 = "force_align_arg_pointer";
2370
2371 static rtx (*ix86_gen_leave) (void);
2372 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2373 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2374 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2375 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2376 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2377 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2378 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2379 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2380 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2381 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2382 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2383
2384 /* Preferred alignment for stack boundary in bits. */
2385 unsigned int ix86_preferred_stack_boundary;
2386
2387 /* Alignment for incoming stack boundary in bits specified at
2388 command line. */
2389 static unsigned int ix86_user_incoming_stack_boundary;
2390
2391 /* Default alignment for incoming stack boundary in bits. */
2392 static unsigned int ix86_default_incoming_stack_boundary;
2393
2394 /* Alignment for incoming stack boundary in bits. */
2395 unsigned int ix86_incoming_stack_boundary;
2396
2397 /* Calling abi specific va_list type nodes. */
2398 static GTY(()) tree sysv_va_list_type_node;
2399 static GTY(()) tree ms_va_list_type_node;
2400
2401 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2402 char internal_label_prefix[16];
2403 int internal_label_prefix_len;
2404
2405 /* Fence to use after loop using movnt. */
2406 tree x86_mfence;
2407
2408 /* Register class used for passing given 64bit part of the argument.
2409 These represent classes as documented by the PS ABI, with the exception
2410 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2411 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2412
2413 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2414 whenever possible (upper half does contain padding). */
2415 enum x86_64_reg_class
2416 {
2417 X86_64_NO_CLASS,
2418 X86_64_INTEGER_CLASS,
2419 X86_64_INTEGERSI_CLASS,
2420 X86_64_SSE_CLASS,
2421 X86_64_SSESF_CLASS,
2422 X86_64_SSEDF_CLASS,
2423 X86_64_SSEUP_CLASS,
2424 X86_64_X87_CLASS,
2425 X86_64_X87UP_CLASS,
2426 X86_64_COMPLEX_X87_CLASS,
2427 X86_64_MEMORY_CLASS
2428 };
2429
2430 #define MAX_CLASSES 8
2431
2432 /* Table of constants used by fldpi, fldln2, etc.... */
2433 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2434 static bool ext_80387_constants_init = 0;
2435
2436 \f
2437 static struct machine_function * ix86_init_machine_status (void);
2438 static rtx ix86_function_value (const_tree, const_tree, bool);
2439 static bool ix86_function_value_regno_p (const unsigned int);
2440 static unsigned int ix86_function_arg_boundary (machine_mode,
2441 const_tree);
2442 static rtx ix86_static_chain (const_tree, bool);
2443 static int ix86_function_regparm (const_tree, const_tree);
2444 static void ix86_compute_frame_layout (struct ix86_frame *);
2445 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2446 rtx, rtx, int);
2447 static void ix86_add_new_builtins (HOST_WIDE_INT);
2448 static tree ix86_canonical_va_list_type (tree);
2449 static void predict_jump (int);
2450 static unsigned int split_stack_prologue_scratch_regno (void);
2451 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2452
2453 enum ix86_function_specific_strings
2454 {
2455 IX86_FUNCTION_SPECIFIC_ARCH,
2456 IX86_FUNCTION_SPECIFIC_TUNE,
2457 IX86_FUNCTION_SPECIFIC_MAX
2458 };
2459
2460 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2461 const char *, enum fpmath_unit, bool);
2462 static void ix86_function_specific_save (struct cl_target_option *,
2463 struct gcc_options *opts);
2464 static void ix86_function_specific_restore (struct gcc_options *opts,
2465 struct cl_target_option *);
2466 static void ix86_function_specific_print (FILE *, int,
2467 struct cl_target_option *);
2468 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2469 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2470 struct gcc_options *,
2471 struct gcc_options *,
2472 struct gcc_options *);
2473 static bool ix86_can_inline_p (tree, tree);
2474 static void ix86_set_current_function (tree);
2475 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2476
2477 static enum calling_abi ix86_function_abi (const_tree);
2478
2479 \f
2480 #ifndef SUBTARGET32_DEFAULT_CPU
2481 #define SUBTARGET32_DEFAULT_CPU "i386"
2482 #endif
2483
2484 /* Whether -mtune= or -march= were specified */
2485 static int ix86_tune_defaulted;
2486 static int ix86_arch_specified;
2487
2488 /* Vectorization library interface and handlers. */
2489 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2490
2491 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2492 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2493
2494 /* Processor target table, indexed by processor number */
2495 struct ptt
2496 {
2497 const char *const name; /* processor name */
2498 const struct processor_costs *cost; /* Processor costs */
2499 const int align_loop; /* Default alignments. */
2500 const int align_loop_max_skip;
2501 const int align_jump;
2502 const int align_jump_max_skip;
2503 const int align_func;
2504 };
2505
2506 /* This table must be in sync with enum processor_type in i386.h. */
2507 static const struct ptt processor_target_table[PROCESSOR_max] =
2508 {
2509 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2510 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2511 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2512 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2513 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2514 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2515 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2516 {"core2", &core_cost, 16, 10, 16, 10, 16},
2517 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2518 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2519 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2520 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2521 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2522 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2523 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2524 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2525 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2526 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2527 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2528 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2529 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2530 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2531 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2532 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2533 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2534 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2535 };
2536 \f
2537 static unsigned int
2538 rest_of_handle_insert_vzeroupper (void)
2539 {
2540 int i;
2541
2542 /* vzeroupper instructions are inserted immediately after reload to
2543 account for possible spills from 256bit registers. The pass
2544 reuses mode switching infrastructure by re-running mode insertion
2545 pass, so disable entities that have already been processed. */
2546 for (i = 0; i < MAX_386_ENTITIES; i++)
2547 ix86_optimize_mode_switching[i] = 0;
2548
2549 ix86_optimize_mode_switching[AVX_U128] = 1;
2550
2551 /* Call optimize_mode_switching. */
2552 g->get_passes ()->execute_pass_mode_switching ();
2553 return 0;
2554 }
2555
2556 namespace {
2557
2558 const pass_data pass_data_insert_vzeroupper =
2559 {
2560 RTL_PASS, /* type */
2561 "vzeroupper", /* name */
2562 OPTGROUP_NONE, /* optinfo_flags */
2563 TV_NONE, /* tv_id */
2564 0, /* properties_required */
2565 0, /* properties_provided */
2566 0, /* properties_destroyed */
2567 0, /* todo_flags_start */
2568 TODO_df_finish, /* todo_flags_finish */
2569 };
2570
2571 class pass_insert_vzeroupper : public rtl_opt_pass
2572 {
2573 public:
2574 pass_insert_vzeroupper(gcc::context *ctxt)
2575 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2576 {}
2577
2578 /* opt_pass methods: */
2579 virtual bool gate (function *)
2580 {
2581 return TARGET_AVX && !TARGET_AVX512F
2582 && TARGET_VZEROUPPER && flag_expensive_optimizations
2583 && !optimize_size;
2584 }
2585
2586 virtual unsigned int execute (function *)
2587 {
2588 return rest_of_handle_insert_vzeroupper ();
2589 }
2590
2591 }; // class pass_insert_vzeroupper
2592
2593 } // anon namespace
2594
2595 rtl_opt_pass *
2596 make_pass_insert_vzeroupper (gcc::context *ctxt)
2597 {
2598 return new pass_insert_vzeroupper (ctxt);
2599 }
2600
2601 /* Return true if a red-zone is in use. */
2602
2603 static inline bool
2604 ix86_using_red_zone (void)
2605 {
2606 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2607 }
2608 \f
2609 /* Return a string that documents the current -m options. The caller is
2610 responsible for freeing the string. */
2611
2612 static char *
2613 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2614 const char *tune, enum fpmath_unit fpmath,
2615 bool add_nl_p)
2616 {
2617 struct ix86_target_opts
2618 {
2619 const char *option; /* option string */
2620 HOST_WIDE_INT mask; /* isa mask options */
2621 };
2622
2623 /* This table is ordered so that options like -msse4.2 that imply
2624 preceding options while match those first. */
2625 static struct ix86_target_opts isa_opts[] =
2626 {
2627 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2628 { "-mfma", OPTION_MASK_ISA_FMA },
2629 { "-mxop", OPTION_MASK_ISA_XOP },
2630 { "-mlwp", OPTION_MASK_ISA_LWP },
2631 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2632 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2633 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2634 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2635 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2636 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2637 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2638 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2639 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2640 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2641 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2642 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2643 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2644 { "-msse3", OPTION_MASK_ISA_SSE3 },
2645 { "-msse2", OPTION_MASK_ISA_SSE2 },
2646 { "-msse", OPTION_MASK_ISA_SSE },
2647 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2648 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2649 { "-mmmx", OPTION_MASK_ISA_MMX },
2650 { "-mabm", OPTION_MASK_ISA_ABM },
2651 { "-mbmi", OPTION_MASK_ISA_BMI },
2652 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2653 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2654 { "-mhle", OPTION_MASK_ISA_HLE },
2655 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2656 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2657 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2658 { "-madx", OPTION_MASK_ISA_ADX },
2659 { "-mtbm", OPTION_MASK_ISA_TBM },
2660 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2661 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2662 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2663 { "-maes", OPTION_MASK_ISA_AES },
2664 { "-msha", OPTION_MASK_ISA_SHA },
2665 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2666 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2667 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2668 { "-mf16c", OPTION_MASK_ISA_F16C },
2669 { "-mrtm", OPTION_MASK_ISA_RTM },
2670 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2671 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2672 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2673 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2674 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2675 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2676 { "-mmpx", OPTION_MASK_ISA_MPX },
2677 { "-mclwb", OPTION_MASK_ISA_CLWB },
2678 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2679 };
2680
2681 /* Flag options. */
2682 static struct ix86_target_opts flag_opts[] =
2683 {
2684 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2685 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2686 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2687 { "-m80387", MASK_80387 },
2688 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2689 { "-malign-double", MASK_ALIGN_DOUBLE },
2690 { "-mcld", MASK_CLD },
2691 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2692 { "-mieee-fp", MASK_IEEE_FP },
2693 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2694 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2695 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2696 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2697 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2698 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2699 { "-mno-red-zone", MASK_NO_RED_ZONE },
2700 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2701 { "-mrecip", MASK_RECIP },
2702 { "-mrtd", MASK_RTD },
2703 { "-msseregparm", MASK_SSEREGPARM },
2704 { "-mstack-arg-probe", MASK_STACK_PROBE },
2705 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2706 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2707 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2708 { "-mvzeroupper", MASK_VZEROUPPER },
2709 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2710 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2711 { "-mprefer-avx128", MASK_PREFER_AVX128},
2712 };
2713
2714 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2715
2716 char isa_other[40];
2717 char target_other[40];
2718 unsigned num = 0;
2719 unsigned i, j;
2720 char *ret;
2721 char *ptr;
2722 size_t len;
2723 size_t line_len;
2724 size_t sep_len;
2725 const char *abi;
2726
2727 memset (opts, '\0', sizeof (opts));
2728
2729 /* Add -march= option. */
2730 if (arch)
2731 {
2732 opts[num][0] = "-march=";
2733 opts[num++][1] = arch;
2734 }
2735
2736 /* Add -mtune= option. */
2737 if (tune)
2738 {
2739 opts[num][0] = "-mtune=";
2740 opts[num++][1] = tune;
2741 }
2742
2743 /* Add -m32/-m64/-mx32. */
2744 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2745 {
2746 if ((isa & OPTION_MASK_ABI_64) != 0)
2747 abi = "-m64";
2748 else
2749 abi = "-mx32";
2750 isa &= ~ (OPTION_MASK_ISA_64BIT
2751 | OPTION_MASK_ABI_64
2752 | OPTION_MASK_ABI_X32);
2753 }
2754 else
2755 abi = "-m32";
2756 opts[num++][0] = abi;
2757
2758 /* Pick out the options in isa options. */
2759 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2760 {
2761 if ((isa & isa_opts[i].mask) != 0)
2762 {
2763 opts[num++][0] = isa_opts[i].option;
2764 isa &= ~ isa_opts[i].mask;
2765 }
2766 }
2767
2768 if (isa && add_nl_p)
2769 {
2770 opts[num++][0] = isa_other;
2771 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2772 isa);
2773 }
2774
2775 /* Add flag options. */
2776 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2777 {
2778 if ((flags & flag_opts[i].mask) != 0)
2779 {
2780 opts[num++][0] = flag_opts[i].option;
2781 flags &= ~ flag_opts[i].mask;
2782 }
2783 }
2784
2785 if (flags && add_nl_p)
2786 {
2787 opts[num++][0] = target_other;
2788 sprintf (target_other, "(other flags: %#x)", flags);
2789 }
2790
2791 /* Add -fpmath= option. */
2792 if (fpmath)
2793 {
2794 opts[num][0] = "-mfpmath=";
2795 switch ((int) fpmath)
2796 {
2797 case FPMATH_387:
2798 opts[num++][1] = "387";
2799 break;
2800
2801 case FPMATH_SSE:
2802 opts[num++][1] = "sse";
2803 break;
2804
2805 case FPMATH_387 | FPMATH_SSE:
2806 opts[num++][1] = "sse+387";
2807 break;
2808
2809 default:
2810 gcc_unreachable ();
2811 }
2812 }
2813
2814 /* Any options? */
2815 if (num == 0)
2816 return NULL;
2817
2818 gcc_assert (num < ARRAY_SIZE (opts));
2819
2820 /* Size the string. */
2821 len = 0;
2822 sep_len = (add_nl_p) ? 3 : 1;
2823 for (i = 0; i < num; i++)
2824 {
2825 len += sep_len;
2826 for (j = 0; j < 2; j++)
2827 if (opts[i][j])
2828 len += strlen (opts[i][j]);
2829 }
2830
2831 /* Build the string. */
2832 ret = ptr = (char *) xmalloc (len);
2833 line_len = 0;
2834
2835 for (i = 0; i < num; i++)
2836 {
2837 size_t len2[2];
2838
2839 for (j = 0; j < 2; j++)
2840 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2841
2842 if (i != 0)
2843 {
2844 *ptr++ = ' ';
2845 line_len++;
2846
2847 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2848 {
2849 *ptr++ = '\\';
2850 *ptr++ = '\n';
2851 line_len = 0;
2852 }
2853 }
2854
2855 for (j = 0; j < 2; j++)
2856 if (opts[i][j])
2857 {
2858 memcpy (ptr, opts[i][j], len2[j]);
2859 ptr += len2[j];
2860 line_len += len2[j];
2861 }
2862 }
2863
2864 *ptr = '\0';
2865 gcc_assert (ret + len >= ptr);
2866
2867 return ret;
2868 }
2869
2870 /* Return true, if profiling code should be emitted before
2871 prologue. Otherwise it returns false.
2872 Note: For x86 with "hotfix" it is sorried. */
2873 static bool
2874 ix86_profile_before_prologue (void)
2875 {
2876 return flag_fentry != 0;
2877 }
2878
2879 /* Function that is callable from the debugger to print the current
2880 options. */
2881 void ATTRIBUTE_UNUSED
2882 ix86_debug_options (void)
2883 {
2884 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2885 ix86_arch_string, ix86_tune_string,
2886 ix86_fpmath, true);
2887
2888 if (opts)
2889 {
2890 fprintf (stderr, "%s\n\n", opts);
2891 free (opts);
2892 }
2893 else
2894 fputs ("<no options>\n\n", stderr);
2895
2896 return;
2897 }
2898
2899 static const char *stringop_alg_names[] = {
2900 #define DEF_ENUM
2901 #define DEF_ALG(alg, name) #name,
2902 #include "stringop.def"
2903 #undef DEF_ENUM
2904 #undef DEF_ALG
2905 };
2906
2907 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2908 The string is of the following form (or comma separated list of it):
2909
2910 strategy_alg:max_size:[align|noalign]
2911
2912 where the full size range for the strategy is either [0, max_size] or
2913 [min_size, max_size], in which min_size is the max_size + 1 of the
2914 preceding range. The last size range must have max_size == -1.
2915
2916 Examples:
2917
2918 1.
2919 -mmemcpy-strategy=libcall:-1:noalign
2920
2921 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2922
2923
2924 2.
2925 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2926
2927 This is to tell the compiler to use the following strategy for memset
2928 1) when the expected size is between [1, 16], use rep_8byte strategy;
2929 2) when the size is between [17, 2048], use vector_loop;
2930 3) when the size is > 2048, use libcall. */
2931
2932 struct stringop_size_range
2933 {
2934 int max;
2935 stringop_alg alg;
2936 bool noalign;
2937 };
2938
2939 static void
2940 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2941 {
2942 const struct stringop_algs *default_algs;
2943 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2944 char *curr_range_str, *next_range_str;
2945 int i = 0, n = 0;
2946
2947 if (is_memset)
2948 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2949 else
2950 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2951
2952 curr_range_str = strategy_str;
2953
2954 do
2955 {
2956 int maxs;
2957 char alg_name[128];
2958 char align[16];
2959 next_range_str = strchr (curr_range_str, ',');
2960 if (next_range_str)
2961 *next_range_str++ = '\0';
2962
2963 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2964 alg_name, &maxs, align))
2965 {
2966 error ("wrong arg %s to option %s", curr_range_str,
2967 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2968 return;
2969 }
2970
2971 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2972 {
2973 error ("size ranges of option %s should be increasing",
2974 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2975 return;
2976 }
2977
2978 for (i = 0; i < last_alg; i++)
2979 if (!strcmp (alg_name, stringop_alg_names[i]))
2980 break;
2981
2982 if (i == last_alg)
2983 {
2984 error ("wrong stringop strategy name %s specified for option %s",
2985 alg_name,
2986 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2987 return;
2988 }
2989
2990 input_ranges[n].max = maxs;
2991 input_ranges[n].alg = (stringop_alg) i;
2992 if (!strcmp (align, "align"))
2993 input_ranges[n].noalign = false;
2994 else if (!strcmp (align, "noalign"))
2995 input_ranges[n].noalign = true;
2996 else
2997 {
2998 error ("unknown alignment %s specified for option %s",
2999 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3000 return;
3001 }
3002 n++;
3003 curr_range_str = next_range_str;
3004 }
3005 while (curr_range_str);
3006
3007 if (input_ranges[n - 1].max != -1)
3008 {
3009 error ("the max value for the last size range should be -1"
3010 " for option %s",
3011 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3012 return;
3013 }
3014
3015 if (n > MAX_STRINGOP_ALGS)
3016 {
3017 error ("too many size ranges specified in option %s",
3018 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3019 return;
3020 }
3021
3022 /* Now override the default algs array. */
3023 for (i = 0; i < n; i++)
3024 {
3025 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3026 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3027 = input_ranges[i].alg;
3028 *const_cast<int *>(&default_algs->size[i].noalign)
3029 = input_ranges[i].noalign;
3030 }
3031 }
3032
3033 \f
3034 /* parse -mtune-ctrl= option. When DUMP is true,
3035 print the features that are explicitly set. */
3036
3037 static void
3038 parse_mtune_ctrl_str (bool dump)
3039 {
3040 if (!ix86_tune_ctrl_string)
3041 return;
3042
3043 char *next_feature_string = NULL;
3044 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3045 char *orig = curr_feature_string;
3046 int i;
3047 do
3048 {
3049 bool clear = false;
3050
3051 next_feature_string = strchr (curr_feature_string, ',');
3052 if (next_feature_string)
3053 *next_feature_string++ = '\0';
3054 if (*curr_feature_string == '^')
3055 {
3056 curr_feature_string++;
3057 clear = true;
3058 }
3059 for (i = 0; i < X86_TUNE_LAST; i++)
3060 {
3061 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3062 {
3063 ix86_tune_features[i] = !clear;
3064 if (dump)
3065 fprintf (stderr, "Explicitly %s feature %s\n",
3066 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3067 break;
3068 }
3069 }
3070 if (i == X86_TUNE_LAST)
3071 error ("Unknown parameter to option -mtune-ctrl: %s",
3072 clear ? curr_feature_string - 1 : curr_feature_string);
3073 curr_feature_string = next_feature_string;
3074 }
3075 while (curr_feature_string);
3076 free (orig);
3077 }
3078
3079 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3080 processor type. */
3081
3082 static void
3083 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3084 {
3085 unsigned int ix86_tune_mask = 1u << ix86_tune;
3086 int i;
3087
3088 for (i = 0; i < X86_TUNE_LAST; ++i)
3089 {
3090 if (ix86_tune_no_default)
3091 ix86_tune_features[i] = 0;
3092 else
3093 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3094 }
3095
3096 if (dump)
3097 {
3098 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3099 for (i = 0; i < X86_TUNE_LAST; i++)
3100 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3101 ix86_tune_features[i] ? "on" : "off");
3102 }
3103
3104 parse_mtune_ctrl_str (dump);
3105 }
3106
3107
3108 /* Override various settings based on options. If MAIN_ARGS_P, the
3109 options are from the command line, otherwise they are from
3110 attributes. */
3111
3112 static void
3113 ix86_option_override_internal (bool main_args_p,
3114 struct gcc_options *opts,
3115 struct gcc_options *opts_set)
3116 {
3117 int i;
3118 unsigned int ix86_arch_mask;
3119 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3120 const char *prefix;
3121 const char *suffix;
3122 const char *sw;
3123
3124 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3125 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3126 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3127 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3128 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3129 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3130 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3131 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3132 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3133 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3134 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3135 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3136 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3137 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3138 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3139 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3140 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3141 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3142 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3143 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3144 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3145 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3146 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3147 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3148 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3149 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3150 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3151 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3152 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3153 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3154 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3155 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3156 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3157 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3158 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3159 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3160 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3161 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3162 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3163 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3164 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3165 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3166 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3167 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3168 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3169 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3170 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3171 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3172 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3173 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3174 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3175 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3176 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3177 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3178 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3179 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3180 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3181
3182 #define PTA_CORE2 \
3183 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3184 | PTA_CX16 | PTA_FXSR)
3185 #define PTA_NEHALEM \
3186 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3187 #define PTA_WESTMERE \
3188 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3189 #define PTA_SANDYBRIDGE \
3190 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3191 #define PTA_IVYBRIDGE \
3192 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3193 #define PTA_HASWELL \
3194 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3195 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3196 #define PTA_BROADWELL \
3197 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3198 #define PTA_KNL \
3199 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3200 #define PTA_BONNELL \
3201 (PTA_CORE2 | PTA_MOVBE)
3202 #define PTA_SILVERMONT \
3203 (PTA_WESTMERE | PTA_MOVBE)
3204
3205 /* if this reaches 64, need to widen struct pta flags below */
3206
3207 static struct pta
3208 {
3209 const char *const name; /* processor name or nickname. */
3210 const enum processor_type processor;
3211 const enum attr_cpu schedule;
3212 const unsigned HOST_WIDE_INT flags;
3213 }
3214 const processor_alias_table[] =
3215 {
3216 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3217 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3218 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3219 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3220 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3221 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3222 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3223 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3224 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3225 PTA_MMX | PTA_SSE | PTA_FXSR},
3226 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3227 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3228 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3229 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3230 PTA_MMX | PTA_SSE | PTA_FXSR},
3231 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3232 PTA_MMX | PTA_SSE | PTA_FXSR},
3233 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3234 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3235 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3236 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3237 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3238 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3239 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3240 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3241 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3242 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3243 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3244 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3245 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3246 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3247 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3248 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3249 PTA_SANDYBRIDGE},
3250 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3251 PTA_SANDYBRIDGE},
3252 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3253 PTA_IVYBRIDGE},
3254 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3255 PTA_IVYBRIDGE},
3256 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3257 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3258 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3259 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3260 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3261 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3262 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3263 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3264 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3265 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3266 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3267 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3268 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3269 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3270 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3271 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3272 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3273 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3274 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3275 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3276 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3277 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3278 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3279 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3280 {"x86-64", PROCESSOR_K8, CPU_K8,
3281 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3282 {"k8", PROCESSOR_K8, CPU_K8,
3283 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3284 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3285 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3286 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3287 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3288 {"opteron", PROCESSOR_K8, CPU_K8,
3289 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3290 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3291 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3292 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3293 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3294 {"athlon64", PROCESSOR_K8, CPU_K8,
3295 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3296 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3297 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3298 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3299 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3300 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3301 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3302 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3303 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3304 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3305 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3306 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3307 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3308 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3309 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3310 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3311 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3312 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3313 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3314 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3315 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3316 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3317 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3318 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3319 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3320 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3321 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3322 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3323 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3324 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3325 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3326 | PTA_XSAVEOPT | PTA_FSGSBASE},
3327 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3328 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3329 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3330 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3331 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3332 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3333 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3334 | PTA_MOVBE},
3335 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3336 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3337 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3338 | PTA_FXSR | PTA_XSAVE},
3339 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3340 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3341 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3342 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3343 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3344 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3345
3346 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3347 PTA_64BIT
3348 | PTA_HLE /* flags are only used for -march switch. */ },
3349 };
3350
3351 /* -mrecip options. */
3352 static struct
3353 {
3354 const char *string; /* option name */
3355 unsigned int mask; /* mask bits to set */
3356 }
3357 const recip_options[] =
3358 {
3359 { "all", RECIP_MASK_ALL },
3360 { "none", RECIP_MASK_NONE },
3361 { "div", RECIP_MASK_DIV },
3362 { "sqrt", RECIP_MASK_SQRT },
3363 { "vec-div", RECIP_MASK_VEC_DIV },
3364 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3365 };
3366
3367 int const pta_size = ARRAY_SIZE (processor_alias_table);
3368
3369 /* Set up prefix/suffix so the error messages refer to either the command
3370 line argument, or the attribute(target). */
3371 if (main_args_p)
3372 {
3373 prefix = "-m";
3374 suffix = "";
3375 sw = "switch";
3376 }
3377 else
3378 {
3379 prefix = "option(\"";
3380 suffix = "\")";
3381 sw = "attribute";
3382 }
3383
3384 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3385 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3386 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3387 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3388 #ifdef TARGET_BI_ARCH
3389 else
3390 {
3391 #if TARGET_BI_ARCH == 1
3392 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3393 is on and OPTION_MASK_ABI_X32 is off. We turn off
3394 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3395 -mx32. */
3396 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3397 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3398 #else
3399 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3400 on and OPTION_MASK_ABI_64 is off. We turn off
3401 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3402 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3403 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3404 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3405 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3406 #endif
3407 }
3408 #endif
3409
3410 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3411 {
3412 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3413 OPTION_MASK_ABI_64 for TARGET_X32. */
3414 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3415 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3416 }
3417 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3418 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3419 | OPTION_MASK_ABI_X32
3420 | OPTION_MASK_ABI_64);
3421 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3422 {
3423 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3424 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3425 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3426 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3427 }
3428
3429 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3430 SUBTARGET_OVERRIDE_OPTIONS;
3431 #endif
3432
3433 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3434 SUBSUBTARGET_OVERRIDE_OPTIONS;
3435 #endif
3436
3437 /* -fPIC is the default for x86_64. */
3438 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3439 opts->x_flag_pic = 2;
3440
3441 /* Need to check -mtune=generic first. */
3442 if (opts->x_ix86_tune_string)
3443 {
3444 /* As special support for cross compilers we read -mtune=native
3445 as -mtune=generic. With native compilers we won't see the
3446 -mtune=native, as it was changed by the driver. */
3447 if (!strcmp (opts->x_ix86_tune_string, "native"))
3448 {
3449 opts->x_ix86_tune_string = "generic";
3450 }
3451 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3452 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3453 "%stune=k8%s or %stune=generic%s instead as appropriate",
3454 prefix, suffix, prefix, suffix, prefix, suffix);
3455 }
3456 else
3457 {
3458 if (opts->x_ix86_arch_string)
3459 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3460 if (!opts->x_ix86_tune_string)
3461 {
3462 opts->x_ix86_tune_string
3463 = processor_target_table[TARGET_CPU_DEFAULT].name;
3464 ix86_tune_defaulted = 1;
3465 }
3466
3467 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3468 or defaulted. We need to use a sensible tune option. */
3469 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3470 {
3471 opts->x_ix86_tune_string = "generic";
3472 }
3473 }
3474
3475 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3476 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3477 {
3478 /* rep; movq isn't available in 32-bit code. */
3479 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3480 opts->x_ix86_stringop_alg = no_stringop;
3481 }
3482
3483 if (!opts->x_ix86_arch_string)
3484 opts->x_ix86_arch_string
3485 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3486 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3487 else
3488 ix86_arch_specified = 1;
3489
3490 if (opts_set->x_ix86_pmode)
3491 {
3492 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3493 && opts->x_ix86_pmode == PMODE_SI)
3494 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3495 && opts->x_ix86_pmode == PMODE_DI))
3496 error ("address mode %qs not supported in the %s bit mode",
3497 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3498 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3499 }
3500 else
3501 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3502 ? PMODE_DI : PMODE_SI;
3503
3504 if (!opts_set->x_ix86_abi)
3505 opts->x_ix86_abi = DEFAULT_ABI;
3506
3507 /* For targets using ms ABI enable ms-extensions, if not
3508 explicit turned off. For non-ms ABI we turn off this
3509 option. */
3510 if (!opts_set->x_flag_ms_extensions)
3511 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3512
3513 if (opts_set->x_ix86_cmodel)
3514 {
3515 switch (opts->x_ix86_cmodel)
3516 {
3517 case CM_SMALL:
3518 case CM_SMALL_PIC:
3519 if (opts->x_flag_pic)
3520 opts->x_ix86_cmodel = CM_SMALL_PIC;
3521 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3522 error ("code model %qs not supported in the %s bit mode",
3523 "small", "32");
3524 break;
3525
3526 case CM_MEDIUM:
3527 case CM_MEDIUM_PIC:
3528 if (opts->x_flag_pic)
3529 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3530 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3531 error ("code model %qs not supported in the %s bit mode",
3532 "medium", "32");
3533 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3534 error ("code model %qs not supported in x32 mode",
3535 "medium");
3536 break;
3537
3538 case CM_LARGE:
3539 case CM_LARGE_PIC:
3540 if (opts->x_flag_pic)
3541 opts->x_ix86_cmodel = CM_LARGE_PIC;
3542 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3543 error ("code model %qs not supported in the %s bit mode",
3544 "large", "32");
3545 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3546 error ("code model %qs not supported in x32 mode",
3547 "large");
3548 break;
3549
3550 case CM_32:
3551 if (opts->x_flag_pic)
3552 error ("code model %s does not support PIC mode", "32");
3553 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3554 error ("code model %qs not supported in the %s bit mode",
3555 "32", "64");
3556 break;
3557
3558 case CM_KERNEL:
3559 if (opts->x_flag_pic)
3560 {
3561 error ("code model %s does not support PIC mode", "kernel");
3562 opts->x_ix86_cmodel = CM_32;
3563 }
3564 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3565 error ("code model %qs not supported in the %s bit mode",
3566 "kernel", "32");
3567 break;
3568
3569 default:
3570 gcc_unreachable ();
3571 }
3572 }
3573 else
3574 {
3575 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3576 use of rip-relative addressing. This eliminates fixups that
3577 would otherwise be needed if this object is to be placed in a
3578 DLL, and is essentially just as efficient as direct addressing. */
3579 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3580 && (TARGET_RDOS || TARGET_PECOFF))
3581 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3582 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3583 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3584 else
3585 opts->x_ix86_cmodel = CM_32;
3586 }
3587 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3588 {
3589 error ("-masm=intel not supported in this configuration");
3590 opts->x_ix86_asm_dialect = ASM_ATT;
3591 }
3592 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3593 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3594 sorry ("%i-bit mode not compiled in",
3595 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3596
3597 for (i = 0; i < pta_size; i++)
3598 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3599 {
3600 ix86_schedule = processor_alias_table[i].schedule;
3601 ix86_arch = processor_alias_table[i].processor;
3602 /* Default cpu tuning to the architecture. */
3603 ix86_tune = ix86_arch;
3604
3605 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3606 && !(processor_alias_table[i].flags & PTA_64BIT))
3607 error ("CPU you selected does not support x86-64 "
3608 "instruction set");
3609
3610 if (processor_alias_table[i].flags & PTA_MMX
3611 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3612 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3613 if (processor_alias_table[i].flags & PTA_3DNOW
3614 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3615 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3616 if (processor_alias_table[i].flags & PTA_3DNOW_A
3617 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3618 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3619 if (processor_alias_table[i].flags & PTA_SSE
3620 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3621 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3622 if (processor_alias_table[i].flags & PTA_SSE2
3623 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3624 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3625 if (processor_alias_table[i].flags & PTA_SSE3
3626 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3627 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3628 if (processor_alias_table[i].flags & PTA_SSSE3
3629 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3630 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3631 if (processor_alias_table[i].flags & PTA_SSE4_1
3632 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3633 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3634 if (processor_alias_table[i].flags & PTA_SSE4_2
3635 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3636 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3637 if (processor_alias_table[i].flags & PTA_AVX
3638 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3639 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3640 if (processor_alias_table[i].flags & PTA_AVX2
3641 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3642 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3643 if (processor_alias_table[i].flags & PTA_FMA
3644 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3645 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3646 if (processor_alias_table[i].flags & PTA_SSE4A
3647 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3648 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3649 if (processor_alias_table[i].flags & PTA_FMA4
3650 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3651 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3652 if (processor_alias_table[i].flags & PTA_XOP
3653 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3654 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3655 if (processor_alias_table[i].flags & PTA_LWP
3656 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3657 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3658 if (processor_alias_table[i].flags & PTA_ABM
3659 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3660 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3661 if (processor_alias_table[i].flags & PTA_BMI
3662 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3663 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3664 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3665 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3666 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3667 if (processor_alias_table[i].flags & PTA_TBM
3668 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3669 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3670 if (processor_alias_table[i].flags & PTA_BMI2
3671 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3672 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3673 if (processor_alias_table[i].flags & PTA_CX16
3674 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3675 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3676 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3677 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3678 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3679 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3680 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3681 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3682 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3683 if (processor_alias_table[i].flags & PTA_MOVBE
3684 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3685 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3686 if (processor_alias_table[i].flags & PTA_AES
3687 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3688 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3689 if (processor_alias_table[i].flags & PTA_SHA
3690 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3691 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3692 if (processor_alias_table[i].flags & PTA_PCLMUL
3693 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3694 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3695 if (processor_alias_table[i].flags & PTA_FSGSBASE
3696 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3697 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3698 if (processor_alias_table[i].flags & PTA_RDRND
3699 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3700 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3701 if (processor_alias_table[i].flags & PTA_F16C
3702 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3703 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3704 if (processor_alias_table[i].flags & PTA_RTM
3705 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3706 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3707 if (processor_alias_table[i].flags & PTA_HLE
3708 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3709 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3710 if (processor_alias_table[i].flags & PTA_PRFCHW
3711 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3712 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3713 if (processor_alias_table[i].flags & PTA_RDSEED
3714 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3715 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3716 if (processor_alias_table[i].flags & PTA_ADX
3717 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3718 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3719 if (processor_alias_table[i].flags & PTA_FXSR
3720 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3721 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3722 if (processor_alias_table[i].flags & PTA_XSAVE
3723 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3724 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3725 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3726 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3727 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3728 if (processor_alias_table[i].flags & PTA_AVX512F
3729 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3730 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3731 if (processor_alias_table[i].flags & PTA_AVX512ER
3732 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3733 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3734 if (processor_alias_table[i].flags & PTA_AVX512PF
3735 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3736 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3737 if (processor_alias_table[i].flags & PTA_AVX512CD
3738 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3739 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3740 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3741 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3742 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3743 if (processor_alias_table[i].flags & PTA_PCOMMIT
3744 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3745 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3746 if (processor_alias_table[i].flags & PTA_CLWB
3747 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3748 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3749 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3750 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3751 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3752 if (processor_alias_table[i].flags & PTA_XSAVEC
3753 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3754 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3755 if (processor_alias_table[i].flags & PTA_XSAVES
3756 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3757 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3758 if (processor_alias_table[i].flags & PTA_AVX512DQ
3759 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3760 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3761 if (processor_alias_table[i].flags & PTA_AVX512BW
3762 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3763 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3764 if (processor_alias_table[i].flags & PTA_AVX512VL
3765 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3766 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3767 if (processor_alias_table[i].flags & PTA_MPX
3768 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3769 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3770 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3771 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3772 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3773 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3774 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3775 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3776 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3777 x86_prefetch_sse = true;
3778
3779 break;
3780 }
3781
3782 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3783 error ("Intel MPX does not support x32");
3784
3785 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3786 error ("Intel MPX does not support x32");
3787
3788 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3789 error ("generic CPU can be used only for %stune=%s %s",
3790 prefix, suffix, sw);
3791 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3792 error ("intel CPU can be used only for %stune=%s %s",
3793 prefix, suffix, sw);
3794 else if (i == pta_size)
3795 error ("bad value (%s) for %sarch=%s %s",
3796 opts->x_ix86_arch_string, prefix, suffix, sw);
3797
3798 ix86_arch_mask = 1u << ix86_arch;
3799 for (i = 0; i < X86_ARCH_LAST; ++i)
3800 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3801
3802 for (i = 0; i < pta_size; i++)
3803 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3804 {
3805 ix86_schedule = processor_alias_table[i].schedule;
3806 ix86_tune = processor_alias_table[i].processor;
3807 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3808 {
3809 if (!(processor_alias_table[i].flags & PTA_64BIT))
3810 {
3811 if (ix86_tune_defaulted)
3812 {
3813 opts->x_ix86_tune_string = "x86-64";
3814 for (i = 0; i < pta_size; i++)
3815 if (! strcmp (opts->x_ix86_tune_string,
3816 processor_alias_table[i].name))
3817 break;
3818 ix86_schedule = processor_alias_table[i].schedule;
3819 ix86_tune = processor_alias_table[i].processor;
3820 }
3821 else
3822 error ("CPU you selected does not support x86-64 "
3823 "instruction set");
3824 }
3825 }
3826 /* Intel CPUs have always interpreted SSE prefetch instructions as
3827 NOPs; so, we can enable SSE prefetch instructions even when
3828 -mtune (rather than -march) points us to a processor that has them.
3829 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3830 higher processors. */
3831 if (TARGET_CMOV
3832 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3833 x86_prefetch_sse = true;
3834 break;
3835 }
3836
3837 if (ix86_tune_specified && i == pta_size)
3838 error ("bad value (%s) for %stune=%s %s",
3839 opts->x_ix86_tune_string, prefix, suffix, sw);
3840
3841 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3842
3843 #ifndef USE_IX86_FRAME_POINTER
3844 #define USE_IX86_FRAME_POINTER 0
3845 #endif
3846
3847 #ifndef USE_X86_64_FRAME_POINTER
3848 #define USE_X86_64_FRAME_POINTER 0
3849 #endif
3850
3851 /* Set the default values for switches whose default depends on TARGET_64BIT
3852 in case they weren't overwritten by command line options. */
3853 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3854 {
3855 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3856 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3857 if (opts->x_flag_asynchronous_unwind_tables
3858 && !opts_set->x_flag_unwind_tables
3859 && TARGET_64BIT_MS_ABI)
3860 opts->x_flag_unwind_tables = 1;
3861 if (opts->x_flag_asynchronous_unwind_tables == 2)
3862 opts->x_flag_unwind_tables
3863 = opts->x_flag_asynchronous_unwind_tables = 1;
3864 if (opts->x_flag_pcc_struct_return == 2)
3865 opts->x_flag_pcc_struct_return = 0;
3866 }
3867 else
3868 {
3869 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3870 opts->x_flag_omit_frame_pointer
3871 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3872 if (opts->x_flag_asynchronous_unwind_tables == 2)
3873 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3874 if (opts->x_flag_pcc_struct_return == 2)
3875 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3876 }
3877
3878 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3879 /* TODO: ix86_cost should be chosen at instruction or function granuality
3880 so for cold code we use size_cost even in !optimize_size compilation. */
3881 if (opts->x_optimize_size)
3882 ix86_cost = &ix86_size_cost;
3883 else
3884 ix86_cost = ix86_tune_cost;
3885
3886 /* Arrange to set up i386_stack_locals for all functions. */
3887 init_machine_status = ix86_init_machine_status;
3888
3889 /* Validate -mregparm= value. */
3890 if (opts_set->x_ix86_regparm)
3891 {
3892 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3893 warning (0, "-mregparm is ignored in 64-bit mode");
3894 if (opts->x_ix86_regparm > REGPARM_MAX)
3895 {
3896 error ("-mregparm=%d is not between 0 and %d",
3897 opts->x_ix86_regparm, REGPARM_MAX);
3898 opts->x_ix86_regparm = 0;
3899 }
3900 }
3901 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3902 opts->x_ix86_regparm = REGPARM_MAX;
3903
3904 /* Default align_* from the processor table. */
3905 if (opts->x_align_loops == 0)
3906 {
3907 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3908 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3909 }
3910 if (opts->x_align_jumps == 0)
3911 {
3912 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3913 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3914 }
3915 if (opts->x_align_functions == 0)
3916 {
3917 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3918 }
3919
3920 /* Provide default for -mbranch-cost= value. */
3921 if (!opts_set->x_ix86_branch_cost)
3922 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3923
3924 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3925 {
3926 opts->x_target_flags
3927 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3928
3929 /* Enable by default the SSE and MMX builtins. Do allow the user to
3930 explicitly disable any of these. In particular, disabling SSE and
3931 MMX for kernel code is extremely useful. */
3932 if (!ix86_arch_specified)
3933 opts->x_ix86_isa_flags
3934 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3935 | TARGET_SUBTARGET64_ISA_DEFAULT)
3936 & ~opts->x_ix86_isa_flags_explicit);
3937
3938 if (TARGET_RTD_P (opts->x_target_flags))
3939 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3940 }
3941 else
3942 {
3943 opts->x_target_flags
3944 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3945
3946 if (!ix86_arch_specified)
3947 opts->x_ix86_isa_flags
3948 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3949
3950 /* i386 ABI does not specify red zone. It still makes sense to use it
3951 when programmer takes care to stack from being destroyed. */
3952 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3953 opts->x_target_flags |= MASK_NO_RED_ZONE;
3954 }
3955
3956 /* Keep nonleaf frame pointers. */
3957 if (opts->x_flag_omit_frame_pointer)
3958 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3959 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3960 opts->x_flag_omit_frame_pointer = 1;
3961
3962 /* If we're doing fast math, we don't care about comparison order
3963 wrt NaNs. This lets us use a shorter comparison sequence. */
3964 if (opts->x_flag_finite_math_only)
3965 opts->x_target_flags &= ~MASK_IEEE_FP;
3966
3967 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3968 since the insns won't need emulation. */
3969 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3970 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3971
3972 /* Likewise, if the target doesn't have a 387, or we've specified
3973 software floating point, don't use 387 inline intrinsics. */
3974 if (!TARGET_80387_P (opts->x_target_flags))
3975 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3976
3977 /* Turn on MMX builtins for -msse. */
3978 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3979 opts->x_ix86_isa_flags
3980 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3981
3982 /* Enable SSE prefetch. */
3983 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3984 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3985 x86_prefetch_sse = true;
3986
3987 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3988 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3989 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3990 opts->x_ix86_isa_flags
3991 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3992
3993 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3994 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3995 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3996 opts->x_ix86_isa_flags
3997 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3998
3999 /* Enable lzcnt instruction for -mabm. */
4000 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4001 opts->x_ix86_isa_flags
4002 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4003
4004 /* Validate -mpreferred-stack-boundary= value or default it to
4005 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4006 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4007 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4008 {
4009 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4010 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4011 int max = (TARGET_SEH ? 4 : 12);
4012
4013 if (opts->x_ix86_preferred_stack_boundary_arg < min
4014 || opts->x_ix86_preferred_stack_boundary_arg > max)
4015 {
4016 if (min == max)
4017 error ("-mpreferred-stack-boundary is not supported "
4018 "for this target");
4019 else
4020 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4021 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4022 }
4023 else
4024 ix86_preferred_stack_boundary
4025 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4026 }
4027
4028 /* Set the default value for -mstackrealign. */
4029 if (opts->x_ix86_force_align_arg_pointer == -1)
4030 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4031
4032 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4033
4034 /* Validate -mincoming-stack-boundary= value or default it to
4035 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4036 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4037 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4038 {
4039 if (opts->x_ix86_incoming_stack_boundary_arg
4040 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4041 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4042 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4043 opts->x_ix86_incoming_stack_boundary_arg,
4044 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4045 else
4046 {
4047 ix86_user_incoming_stack_boundary
4048 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4049 ix86_incoming_stack_boundary
4050 = ix86_user_incoming_stack_boundary;
4051 }
4052 }
4053
4054 #ifndef NO_PROFILE_COUNTERS
4055 if (flag_nop_mcount)
4056 error ("-mnop-mcount is not compatible with this target");
4057 #endif
4058 if (flag_nop_mcount && flag_pic)
4059 error ("-mnop-mcount is not implemented for -fPIC");
4060
4061 /* Accept -msseregparm only if at least SSE support is enabled. */
4062 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4063 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4064 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4065
4066 if (opts_set->x_ix86_fpmath)
4067 {
4068 if (opts->x_ix86_fpmath & FPMATH_SSE)
4069 {
4070 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4071 {
4072 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4073 opts->x_ix86_fpmath = FPMATH_387;
4074 }
4075 else if ((opts->x_ix86_fpmath & FPMATH_387)
4076 && !TARGET_80387_P (opts->x_target_flags))
4077 {
4078 warning (0, "387 instruction set disabled, using SSE arithmetics");
4079 opts->x_ix86_fpmath = FPMATH_SSE;
4080 }
4081 }
4082 }
4083 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4084 fpmath=387. The second is however default at many targets since the
4085 extra 80bit precision of temporaries is considered to be part of ABI.
4086 Overwrite the default at least for -ffast-math.
4087 TODO: -mfpmath=both seems to produce same performing code with bit
4088 smaller binaries. It is however not clear if register allocation is
4089 ready for this setting.
4090 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4091 codegen. We may switch to 387 with -ffast-math for size optimized
4092 functions. */
4093 else if (fast_math_flags_set_p (&global_options)
4094 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4095 opts->x_ix86_fpmath = FPMATH_SSE;
4096 else
4097 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4098
4099 /* If the i387 is disabled, then do not return values in it. */
4100 if (!TARGET_80387_P (opts->x_target_flags))
4101 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4102
4103 /* Use external vectorized library in vectorizing intrinsics. */
4104 if (opts_set->x_ix86_veclibabi_type)
4105 switch (opts->x_ix86_veclibabi_type)
4106 {
4107 case ix86_veclibabi_type_svml:
4108 ix86_veclib_handler = ix86_veclibabi_svml;
4109 break;
4110
4111 case ix86_veclibabi_type_acml:
4112 ix86_veclib_handler = ix86_veclibabi_acml;
4113 break;
4114
4115 default:
4116 gcc_unreachable ();
4117 }
4118
4119 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4120 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4121 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4122
4123 /* If stack probes are required, the space used for large function
4124 arguments on the stack must also be probed, so enable
4125 -maccumulate-outgoing-args so this happens in the prologue. */
4126 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4127 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4128 {
4129 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4130 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4131 "for correctness", prefix, suffix);
4132 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4133 }
4134
4135 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4136 {
4137 char *p;
4138 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4139 p = strchr (internal_label_prefix, 'X');
4140 internal_label_prefix_len = p - internal_label_prefix;
4141 *p = '\0';
4142 }
4143
4144 /* When scheduling description is not available, disable scheduler pass
4145 so it won't slow down the compilation and make x87 code slower. */
4146 if (!TARGET_SCHEDULE)
4147 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4148
4149 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4150 ix86_tune_cost->simultaneous_prefetches,
4151 opts->x_param_values,
4152 opts_set->x_param_values);
4153 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4154 ix86_tune_cost->prefetch_block,
4155 opts->x_param_values,
4156 opts_set->x_param_values);
4157 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4158 ix86_tune_cost->l1_cache_size,
4159 opts->x_param_values,
4160 opts_set->x_param_values);
4161 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4162 ix86_tune_cost->l2_cache_size,
4163 opts->x_param_values,
4164 opts_set->x_param_values);
4165
4166 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4167 if (opts->x_flag_prefetch_loop_arrays < 0
4168 && HAVE_prefetch
4169 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4170 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4171 opts->x_flag_prefetch_loop_arrays = 1;
4172
4173 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4174 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4175 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4176 targetm.expand_builtin_va_start = NULL;
4177
4178 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4179 {
4180 ix86_gen_leave = gen_leave_rex64;
4181 if (Pmode == DImode)
4182 {
4183 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4184 ix86_gen_tls_local_dynamic_base_64
4185 = gen_tls_local_dynamic_base_64_di;
4186 }
4187 else
4188 {
4189 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4190 ix86_gen_tls_local_dynamic_base_64
4191 = gen_tls_local_dynamic_base_64_si;
4192 }
4193 }
4194 else
4195 ix86_gen_leave = gen_leave;
4196
4197 if (Pmode == DImode)
4198 {
4199 ix86_gen_add3 = gen_adddi3;
4200 ix86_gen_sub3 = gen_subdi3;
4201 ix86_gen_sub3_carry = gen_subdi3_carry;
4202 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4203 ix86_gen_andsp = gen_anddi3;
4204 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4205 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4206 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4207 ix86_gen_monitor = gen_sse3_monitor_di;
4208 }
4209 else
4210 {
4211 ix86_gen_add3 = gen_addsi3;
4212 ix86_gen_sub3 = gen_subsi3;
4213 ix86_gen_sub3_carry = gen_subsi3_carry;
4214 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4215 ix86_gen_andsp = gen_andsi3;
4216 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4217 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4218 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4219 ix86_gen_monitor = gen_sse3_monitor_si;
4220 }
4221
4222 #ifdef USE_IX86_CLD
4223 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4224 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4225 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4226 #endif
4227
4228 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4229 {
4230 if (opts->x_flag_fentry > 0)
4231 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4232 "with -fpic");
4233 opts->x_flag_fentry = 0;
4234 }
4235 else if (TARGET_SEH)
4236 {
4237 if (opts->x_flag_fentry == 0)
4238 sorry ("-mno-fentry isn%'t compatible with SEH");
4239 opts->x_flag_fentry = 1;
4240 }
4241 else if (opts->x_flag_fentry < 0)
4242 {
4243 #if defined(PROFILE_BEFORE_PROLOGUE)
4244 opts->x_flag_fentry = 1;
4245 #else
4246 opts->x_flag_fentry = 0;
4247 #endif
4248 }
4249
4250 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4251 opts->x_target_flags |= MASK_VZEROUPPER;
4252 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4253 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4254 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4255 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4256 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4257 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4258 /* Enable 128-bit AVX instruction generation
4259 for the auto-vectorizer. */
4260 if (TARGET_AVX128_OPTIMAL
4261 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4262 opts->x_target_flags |= MASK_PREFER_AVX128;
4263
4264 if (opts->x_ix86_recip_name)
4265 {
4266 char *p = ASTRDUP (opts->x_ix86_recip_name);
4267 char *q;
4268 unsigned int mask, i;
4269 bool invert;
4270
4271 while ((q = strtok (p, ",")) != NULL)
4272 {
4273 p = NULL;
4274 if (*q == '!')
4275 {
4276 invert = true;
4277 q++;
4278 }
4279 else
4280 invert = false;
4281
4282 if (!strcmp (q, "default"))
4283 mask = RECIP_MASK_ALL;
4284 else
4285 {
4286 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4287 if (!strcmp (q, recip_options[i].string))
4288 {
4289 mask = recip_options[i].mask;
4290 break;
4291 }
4292
4293 if (i == ARRAY_SIZE (recip_options))
4294 {
4295 error ("unknown option for -mrecip=%s", q);
4296 invert = false;
4297 mask = RECIP_MASK_NONE;
4298 }
4299 }
4300
4301 opts->x_recip_mask_explicit |= mask;
4302 if (invert)
4303 opts->x_recip_mask &= ~mask;
4304 else
4305 opts->x_recip_mask |= mask;
4306 }
4307 }
4308
4309 if (TARGET_RECIP_P (opts->x_target_flags))
4310 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4311 else if (opts_set->x_target_flags & MASK_RECIP)
4312 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4313
4314 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4315 for 64-bit Bionic. */
4316 if (TARGET_HAS_BIONIC
4317 && !(opts_set->x_target_flags
4318 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4319 opts->x_target_flags |= (TARGET_64BIT
4320 ? MASK_LONG_DOUBLE_128
4321 : MASK_LONG_DOUBLE_64);
4322
4323 /* Only one of them can be active. */
4324 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4325 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4326
4327 /* Save the initial options in case the user does function specific
4328 options. */
4329 if (main_args_p)
4330 target_option_default_node = target_option_current_node
4331 = build_target_option_node (opts);
4332
4333 /* Handle stack protector */
4334 if (!opts_set->x_ix86_stack_protector_guard)
4335 opts->x_ix86_stack_protector_guard
4336 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4337
4338 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4339 if (opts->x_ix86_tune_memcpy_strategy)
4340 {
4341 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4342 ix86_parse_stringop_strategy_string (str, false);
4343 free (str);
4344 }
4345
4346 if (opts->x_ix86_tune_memset_strategy)
4347 {
4348 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4349 ix86_parse_stringop_strategy_string (str, true);
4350 free (str);
4351 }
4352 }
4353
4354 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4355
4356 static void
4357 ix86_option_override (void)
4358 {
4359 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4360 struct register_pass_info insert_vzeroupper_info
4361 = { pass_insert_vzeroupper, "reload",
4362 1, PASS_POS_INSERT_AFTER
4363 };
4364
4365 ix86_option_override_internal (true, &global_options, &global_options_set);
4366
4367
4368 /* This needs to be done at start up. It's convenient to do it here. */
4369 register_pass (&insert_vzeroupper_info);
4370 }
4371
4372 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4373 static char *
4374 ix86_offload_options (void)
4375 {
4376 if (TARGET_LP64)
4377 return xstrdup ("-foffload-abi=lp64");
4378 return xstrdup ("-foffload-abi=ilp32");
4379 }
4380
4381 /* Update register usage after having seen the compiler flags. */
4382
4383 static void
4384 ix86_conditional_register_usage (void)
4385 {
4386 int i, c_mask;
4387
4388 /* For 32-bit targets, squash the REX registers. */
4389 if (! TARGET_64BIT)
4390 {
4391 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4392 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4393 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4394 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4395 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4396 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4397 }
4398
4399 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4400 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4401 : TARGET_64BIT ? (1 << 2)
4402 : (1 << 1));
4403
4404 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4405
4406 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4407 {
4408 /* Set/reset conditionally defined registers from
4409 CALL_USED_REGISTERS initializer. */
4410 if (call_used_regs[i] > 1)
4411 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4412
4413 /* Calculate registers of CLOBBERED_REGS register set
4414 as call used registers from GENERAL_REGS register set. */
4415 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4416 && call_used_regs[i])
4417 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4418 }
4419
4420 /* If MMX is disabled, squash the registers. */
4421 if (! TARGET_MMX)
4422 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4423 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4424 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4425
4426 /* If SSE is disabled, squash the registers. */
4427 if (! TARGET_SSE)
4428 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4429 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4430 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4431
4432 /* If the FPU is disabled, squash the registers. */
4433 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4434 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4435 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4436 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4437
4438 /* If AVX512F is disabled, squash the registers. */
4439 if (! TARGET_AVX512F)
4440 {
4441 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4442 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4443
4444 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4445 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4446 }
4447
4448 /* If MPX is disabled, squash the registers. */
4449 if (! TARGET_MPX)
4450 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4451 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4452 }
4453
4454 \f
4455 /* Save the current options */
4456
4457 static void
4458 ix86_function_specific_save (struct cl_target_option *ptr,
4459 struct gcc_options *opts)
4460 {
4461 ptr->arch = ix86_arch;
4462 ptr->schedule = ix86_schedule;
4463 ptr->prefetch_sse = x86_prefetch_sse;
4464 ptr->tune = ix86_tune;
4465 ptr->branch_cost = ix86_branch_cost;
4466 ptr->tune_defaulted = ix86_tune_defaulted;
4467 ptr->arch_specified = ix86_arch_specified;
4468 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4469 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4470 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4471 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4472 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4473 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4474 ptr->x_ix86_abi = opts->x_ix86_abi;
4475 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4476 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4477 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4478 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4479 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4480 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4481 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4482 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4483 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4484 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4485 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4486 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4487 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4488 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4489 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4490 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4491 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4492 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4493 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4494 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4495
4496 /* The fields are char but the variables are not; make sure the
4497 values fit in the fields. */
4498 gcc_assert (ptr->arch == ix86_arch);
4499 gcc_assert (ptr->schedule == ix86_schedule);
4500 gcc_assert (ptr->tune == ix86_tune);
4501 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4502 }
4503
4504 /* Restore the current options */
4505
4506 static void
4507 ix86_function_specific_restore (struct gcc_options *opts,
4508 struct cl_target_option *ptr)
4509 {
4510 enum processor_type old_tune = ix86_tune;
4511 enum processor_type old_arch = ix86_arch;
4512 unsigned int ix86_arch_mask;
4513 int i;
4514
4515 /* We don't change -fPIC. */
4516 opts->x_flag_pic = flag_pic;
4517
4518 ix86_arch = (enum processor_type) ptr->arch;
4519 ix86_schedule = (enum attr_cpu) ptr->schedule;
4520 ix86_tune = (enum processor_type) ptr->tune;
4521 x86_prefetch_sse = ptr->prefetch_sse;
4522 opts->x_ix86_branch_cost = ptr->branch_cost;
4523 ix86_tune_defaulted = ptr->tune_defaulted;
4524 ix86_arch_specified = ptr->arch_specified;
4525 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4526 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4527 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4528 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4529 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4530 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4531 opts->x_ix86_abi = ptr->x_ix86_abi;
4532 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4533 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4534 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4535 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4536 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4537 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4538 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4539 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4540 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4541 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4542 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4543 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4544 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4545 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4546 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4547 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4548 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4549 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4550 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4551 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4552 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4553 /* TODO: ix86_cost should be chosen at instruction or function granuality
4554 so for cold code we use size_cost even in !optimize_size compilation. */
4555 if (opts->x_optimize_size)
4556 ix86_cost = &ix86_size_cost;
4557 else
4558 ix86_cost = ix86_tune_cost;
4559
4560 /* Recreate the arch feature tests if the arch changed */
4561 if (old_arch != ix86_arch)
4562 {
4563 ix86_arch_mask = 1u << ix86_arch;
4564 for (i = 0; i < X86_ARCH_LAST; ++i)
4565 ix86_arch_features[i]
4566 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4567 }
4568
4569 /* Recreate the tune optimization tests */
4570 if (old_tune != ix86_tune)
4571 set_ix86_tune_features (ix86_tune, false);
4572 }
4573
4574 /* Print the current options */
4575
4576 static void
4577 ix86_function_specific_print (FILE *file, int indent,
4578 struct cl_target_option *ptr)
4579 {
4580 char *target_string
4581 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4582 NULL, NULL, ptr->x_ix86_fpmath, false);
4583
4584 gcc_assert (ptr->arch < PROCESSOR_max);
4585 fprintf (file, "%*sarch = %d (%s)\n",
4586 indent, "",
4587 ptr->arch, processor_target_table[ptr->arch].name);
4588
4589 gcc_assert (ptr->tune < PROCESSOR_max);
4590 fprintf (file, "%*stune = %d (%s)\n",
4591 indent, "",
4592 ptr->tune, processor_target_table[ptr->tune].name);
4593
4594 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4595
4596 if (target_string)
4597 {
4598 fprintf (file, "%*s%s\n", indent, "", target_string);
4599 free (target_string);
4600 }
4601 }
4602
4603 \f
4604 /* Inner function to process the attribute((target(...))), take an argument and
4605 set the current options from the argument. If we have a list, recursively go
4606 over the list. */
4607
4608 static bool
4609 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4610 struct gcc_options *opts,
4611 struct gcc_options *opts_set,
4612 struct gcc_options *enum_opts_set)
4613 {
4614 char *next_optstr;
4615 bool ret = true;
4616
4617 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4618 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4619 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4620 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4621 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4622
4623 enum ix86_opt_type
4624 {
4625 ix86_opt_unknown,
4626 ix86_opt_yes,
4627 ix86_opt_no,
4628 ix86_opt_str,
4629 ix86_opt_enum,
4630 ix86_opt_isa
4631 };
4632
4633 static const struct
4634 {
4635 const char *string;
4636 size_t len;
4637 enum ix86_opt_type type;
4638 int opt;
4639 int mask;
4640 } attrs[] = {
4641 /* isa options */
4642 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4643 IX86_ATTR_ISA ("abm", OPT_mabm),
4644 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4645 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4646 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4647 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4648 IX86_ATTR_ISA ("aes", OPT_maes),
4649 IX86_ATTR_ISA ("sha", OPT_msha),
4650 IX86_ATTR_ISA ("avx", OPT_mavx),
4651 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4652 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4653 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4654 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4655 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4656 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4657 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4658 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4659 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4660 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4661 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4662 IX86_ATTR_ISA ("sse", OPT_msse),
4663 IX86_ATTR_ISA ("sse2", OPT_msse2),
4664 IX86_ATTR_ISA ("sse3", OPT_msse3),
4665 IX86_ATTR_ISA ("sse4", OPT_msse4),
4666 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4667 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4668 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4669 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4670 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4671 IX86_ATTR_ISA ("fma", OPT_mfma),
4672 IX86_ATTR_ISA ("xop", OPT_mxop),
4673 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4674 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4675 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4676 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4677 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4678 IX86_ATTR_ISA ("hle", OPT_mhle),
4679 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4680 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4681 IX86_ATTR_ISA ("adx", OPT_madx),
4682 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4683 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4684 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4685 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4686 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4687 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4688 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4689 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4690 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4691 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4692 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4693
4694 /* enum options */
4695 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4696
4697 /* string options */
4698 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4699 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4700
4701 /* flag options */
4702 IX86_ATTR_YES ("cld",
4703 OPT_mcld,
4704 MASK_CLD),
4705
4706 IX86_ATTR_NO ("fancy-math-387",
4707 OPT_mfancy_math_387,
4708 MASK_NO_FANCY_MATH_387),
4709
4710 IX86_ATTR_YES ("ieee-fp",
4711 OPT_mieee_fp,
4712 MASK_IEEE_FP),
4713
4714 IX86_ATTR_YES ("inline-all-stringops",
4715 OPT_minline_all_stringops,
4716 MASK_INLINE_ALL_STRINGOPS),
4717
4718 IX86_ATTR_YES ("inline-stringops-dynamically",
4719 OPT_minline_stringops_dynamically,
4720 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4721
4722 IX86_ATTR_NO ("align-stringops",
4723 OPT_mno_align_stringops,
4724 MASK_NO_ALIGN_STRINGOPS),
4725
4726 IX86_ATTR_YES ("recip",
4727 OPT_mrecip,
4728 MASK_RECIP),
4729
4730 };
4731
4732 /* If this is a list, recurse to get the options. */
4733 if (TREE_CODE (args) == TREE_LIST)
4734 {
4735 bool ret = true;
4736
4737 for (; args; args = TREE_CHAIN (args))
4738 if (TREE_VALUE (args)
4739 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4740 p_strings, opts, opts_set,
4741 enum_opts_set))
4742 ret = false;
4743
4744 return ret;
4745 }
4746
4747 else if (TREE_CODE (args) != STRING_CST)
4748 {
4749 error ("attribute %<target%> argument not a string");
4750 return false;
4751 }
4752
4753 /* Handle multiple arguments separated by commas. */
4754 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4755
4756 while (next_optstr && *next_optstr != '\0')
4757 {
4758 char *p = next_optstr;
4759 char *orig_p = p;
4760 char *comma = strchr (next_optstr, ',');
4761 const char *opt_string;
4762 size_t len, opt_len;
4763 int opt;
4764 bool opt_set_p;
4765 char ch;
4766 unsigned i;
4767 enum ix86_opt_type type = ix86_opt_unknown;
4768 int mask = 0;
4769
4770 if (comma)
4771 {
4772 *comma = '\0';
4773 len = comma - next_optstr;
4774 next_optstr = comma + 1;
4775 }
4776 else
4777 {
4778 len = strlen (p);
4779 next_optstr = NULL;
4780 }
4781
4782 /* Recognize no-xxx. */
4783 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4784 {
4785 opt_set_p = false;
4786 p += 3;
4787 len -= 3;
4788 }
4789 else
4790 opt_set_p = true;
4791
4792 /* Find the option. */
4793 ch = *p;
4794 opt = N_OPTS;
4795 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4796 {
4797 type = attrs[i].type;
4798 opt_len = attrs[i].len;
4799 if (ch == attrs[i].string[0]
4800 && ((type != ix86_opt_str && type != ix86_opt_enum)
4801 ? len == opt_len
4802 : len > opt_len)
4803 && memcmp (p, attrs[i].string, opt_len) == 0)
4804 {
4805 opt = attrs[i].opt;
4806 mask = attrs[i].mask;
4807 opt_string = attrs[i].string;
4808 break;
4809 }
4810 }
4811
4812 /* Process the option. */
4813 if (opt == N_OPTS)
4814 {
4815 error ("attribute(target(\"%s\")) is unknown", orig_p);
4816 ret = false;
4817 }
4818
4819 else if (type == ix86_opt_isa)
4820 {
4821 struct cl_decoded_option decoded;
4822
4823 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4824 ix86_handle_option (opts, opts_set,
4825 &decoded, input_location);
4826 }
4827
4828 else if (type == ix86_opt_yes || type == ix86_opt_no)
4829 {
4830 if (type == ix86_opt_no)
4831 opt_set_p = !opt_set_p;
4832
4833 if (opt_set_p)
4834 opts->x_target_flags |= mask;
4835 else
4836 opts->x_target_flags &= ~mask;
4837 }
4838
4839 else if (type == ix86_opt_str)
4840 {
4841 if (p_strings[opt])
4842 {
4843 error ("option(\"%s\") was already specified", opt_string);
4844 ret = false;
4845 }
4846 else
4847 p_strings[opt] = xstrdup (p + opt_len);
4848 }
4849
4850 else if (type == ix86_opt_enum)
4851 {
4852 bool arg_ok;
4853 int value;
4854
4855 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4856 if (arg_ok)
4857 set_option (opts, enum_opts_set, opt, value,
4858 p + opt_len, DK_UNSPECIFIED, input_location,
4859 global_dc);
4860 else
4861 {
4862 error ("attribute(target(\"%s\")) is unknown", orig_p);
4863 ret = false;
4864 }
4865 }
4866
4867 else
4868 gcc_unreachable ();
4869 }
4870
4871 return ret;
4872 }
4873
4874 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4875
4876 tree
4877 ix86_valid_target_attribute_tree (tree args,
4878 struct gcc_options *opts,
4879 struct gcc_options *opts_set)
4880 {
4881 const char *orig_arch_string = opts->x_ix86_arch_string;
4882 const char *orig_tune_string = opts->x_ix86_tune_string;
4883 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4884 int orig_tune_defaulted = ix86_tune_defaulted;
4885 int orig_arch_specified = ix86_arch_specified;
4886 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4887 tree t = NULL_TREE;
4888 int i;
4889 struct cl_target_option *def
4890 = TREE_TARGET_OPTION (target_option_default_node);
4891 struct gcc_options enum_opts_set;
4892
4893 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4894
4895 /* Process each of the options on the chain. */
4896 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4897 opts_set, &enum_opts_set))
4898 return error_mark_node;
4899
4900 /* If the changed options are different from the default, rerun
4901 ix86_option_override_internal, and then save the options away.
4902 The string options are are attribute options, and will be undone
4903 when we copy the save structure. */
4904 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4905 || opts->x_target_flags != def->x_target_flags
4906 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4907 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4908 || enum_opts_set.x_ix86_fpmath)
4909 {
4910 /* If we are using the default tune= or arch=, undo the string assigned,
4911 and use the default. */
4912 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4913 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4914 else if (!orig_arch_specified)
4915 opts->x_ix86_arch_string = NULL;
4916
4917 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4918 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4919 else if (orig_tune_defaulted)
4920 opts->x_ix86_tune_string = NULL;
4921
4922 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4923 if (enum_opts_set.x_ix86_fpmath)
4924 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4925 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4926 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4927 {
4928 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4929 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4930 }
4931
4932 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4933 ix86_option_override_internal (false, opts, opts_set);
4934
4935 /* Add any builtin functions with the new isa if any. */
4936 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4937
4938 /* Save the current options unless we are validating options for
4939 #pragma. */
4940 t = build_target_option_node (opts);
4941
4942 opts->x_ix86_arch_string = orig_arch_string;
4943 opts->x_ix86_tune_string = orig_tune_string;
4944 opts_set->x_ix86_fpmath = orig_fpmath_set;
4945
4946 /* Free up memory allocated to hold the strings */
4947 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4948 free (option_strings[i]);
4949 }
4950
4951 return t;
4952 }
4953
4954 /* Hook to validate attribute((target("string"))). */
4955
4956 static bool
4957 ix86_valid_target_attribute_p (tree fndecl,
4958 tree ARG_UNUSED (name),
4959 tree args,
4960 int ARG_UNUSED (flags))
4961 {
4962 struct gcc_options func_options;
4963 tree new_target, new_optimize;
4964 bool ret = true;
4965
4966 /* attribute((target("default"))) does nothing, beyond
4967 affecting multi-versioning. */
4968 if (TREE_VALUE (args)
4969 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4970 && TREE_CHAIN (args) == NULL_TREE
4971 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4972 return true;
4973
4974 tree old_optimize = build_optimization_node (&global_options);
4975
4976 /* Get the optimization options of the current function. */
4977 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4978
4979 if (!func_optimize)
4980 func_optimize = old_optimize;
4981
4982 /* Init func_options. */
4983 memset (&func_options, 0, sizeof (func_options));
4984 init_options_struct (&func_options, NULL);
4985 lang_hooks.init_options_struct (&func_options);
4986
4987 cl_optimization_restore (&func_options,
4988 TREE_OPTIMIZATION (func_optimize));
4989
4990 /* Initialize func_options to the default before its target options can
4991 be set. */
4992 cl_target_option_restore (&func_options,
4993 TREE_TARGET_OPTION (target_option_default_node));
4994
4995 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4996 &global_options_set);
4997
4998 new_optimize = build_optimization_node (&func_options);
4999
5000 if (new_target == error_mark_node)
5001 ret = false;
5002
5003 else if (fndecl && new_target)
5004 {
5005 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5006
5007 if (old_optimize != new_optimize)
5008 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5009 }
5010
5011 return ret;
5012 }
5013
5014 \f
5015 /* Hook to determine if one function can safely inline another. */
5016
5017 static bool
5018 ix86_can_inline_p (tree caller, tree callee)
5019 {
5020 bool ret = false;
5021 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5022 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5023
5024 /* If callee has no option attributes, then it is ok to inline. */
5025 if (!callee_tree)
5026 ret = true;
5027
5028 /* If caller has no option attributes, but callee does then it is not ok to
5029 inline. */
5030 else if (!caller_tree)
5031 ret = false;
5032
5033 else
5034 {
5035 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5036 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5037
5038 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5039 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5040 function. */
5041 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5042 != callee_opts->x_ix86_isa_flags)
5043 ret = false;
5044
5045 /* See if we have the same non-isa options. */
5046 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5047 ret = false;
5048
5049 /* See if arch, tune, etc. are the same. */
5050 else if (caller_opts->arch != callee_opts->arch)
5051 ret = false;
5052
5053 else if (caller_opts->tune != callee_opts->tune)
5054 ret = false;
5055
5056 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5057 ret = false;
5058
5059 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5060 ret = false;
5061
5062 else
5063 ret = true;
5064 }
5065
5066 return ret;
5067 }
5068
5069 \f
5070 /* Remember the last target of ix86_set_current_function. */
5071 static GTY(()) tree ix86_previous_fndecl;
5072
5073 /* Set targets globals to the default (or current #pragma GCC target
5074 if active). Invalidate ix86_previous_fndecl cache. */
5075
5076 void
5077 ix86_reset_previous_fndecl (void)
5078 {
5079 tree new_tree = target_option_current_node;
5080 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5081 if (TREE_TARGET_GLOBALS (new_tree))
5082 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5083 else if (new_tree == target_option_default_node)
5084 restore_target_globals (&default_target_globals);
5085 else
5086 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5087 ix86_previous_fndecl = NULL_TREE;
5088 }
5089
5090 /* Establish appropriate back-end context for processing the function
5091 FNDECL. The argument might be NULL to indicate processing at top
5092 level, outside of any function scope. */
5093 static void
5094 ix86_set_current_function (tree fndecl)
5095 {
5096 /* Only change the context if the function changes. This hook is called
5097 several times in the course of compiling a function, and we don't want to
5098 slow things down too much or call target_reinit when it isn't safe. */
5099 if (fndecl == ix86_previous_fndecl)
5100 return;
5101
5102 tree old_tree;
5103 if (ix86_previous_fndecl == NULL_TREE)
5104 old_tree = target_option_current_node;
5105 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5106 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5107 else
5108 old_tree = target_option_default_node;
5109
5110 if (fndecl == NULL_TREE)
5111 {
5112 if (old_tree != target_option_current_node)
5113 ix86_reset_previous_fndecl ();
5114 return;
5115 }
5116
5117 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5118 if (new_tree == NULL_TREE)
5119 new_tree = target_option_default_node;
5120
5121 if (old_tree != new_tree)
5122 {
5123 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5124 if (TREE_TARGET_GLOBALS (new_tree))
5125 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5126 else if (new_tree == target_option_default_node)
5127 restore_target_globals (&default_target_globals);
5128 else
5129 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5130 }
5131 ix86_previous_fndecl = fndecl;
5132 }
5133
5134 \f
5135 /* Return true if this goes in large data/bss. */
5136
5137 static bool
5138 ix86_in_large_data_p (tree exp)
5139 {
5140 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5141 return false;
5142
5143 /* Functions are never large data. */
5144 if (TREE_CODE (exp) == FUNCTION_DECL)
5145 return false;
5146
5147 /* Automatic variables are never large data. */
5148 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5149 return false;
5150
5151 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5152 {
5153 const char *section = DECL_SECTION_NAME (exp);
5154 if (strcmp (section, ".ldata") == 0
5155 || strcmp (section, ".lbss") == 0)
5156 return true;
5157 return false;
5158 }
5159 else
5160 {
5161 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5162
5163 /* If this is an incomplete type with size 0, then we can't put it
5164 in data because it might be too big when completed. Also,
5165 int_size_in_bytes returns -1 if size can vary or is larger than
5166 an integer in which case also it is safer to assume that it goes in
5167 large data. */
5168 if (size <= 0 || size > ix86_section_threshold)
5169 return true;
5170 }
5171
5172 return false;
5173 }
5174
5175 /* Switch to the appropriate section for output of DECL.
5176 DECL is either a `VAR_DECL' node or a constant of some sort.
5177 RELOC indicates whether forming the initial value of DECL requires
5178 link-time relocations. */
5179
5180 ATTRIBUTE_UNUSED static section *
5181 x86_64_elf_select_section (tree decl, int reloc,
5182 unsigned HOST_WIDE_INT align)
5183 {
5184 if (ix86_in_large_data_p (decl))
5185 {
5186 const char *sname = NULL;
5187 unsigned int flags = SECTION_WRITE;
5188 switch (categorize_decl_for_section (decl, reloc))
5189 {
5190 case SECCAT_DATA:
5191 sname = ".ldata";
5192 break;
5193 case SECCAT_DATA_REL:
5194 sname = ".ldata.rel";
5195 break;
5196 case SECCAT_DATA_REL_LOCAL:
5197 sname = ".ldata.rel.local";
5198 break;
5199 case SECCAT_DATA_REL_RO:
5200 sname = ".ldata.rel.ro";
5201 break;
5202 case SECCAT_DATA_REL_RO_LOCAL:
5203 sname = ".ldata.rel.ro.local";
5204 break;
5205 case SECCAT_BSS:
5206 sname = ".lbss";
5207 flags |= SECTION_BSS;
5208 break;
5209 case SECCAT_RODATA:
5210 case SECCAT_RODATA_MERGE_STR:
5211 case SECCAT_RODATA_MERGE_STR_INIT:
5212 case SECCAT_RODATA_MERGE_CONST:
5213 sname = ".lrodata";
5214 flags = 0;
5215 break;
5216 case SECCAT_SRODATA:
5217 case SECCAT_SDATA:
5218 case SECCAT_SBSS:
5219 gcc_unreachable ();
5220 case SECCAT_TEXT:
5221 case SECCAT_TDATA:
5222 case SECCAT_TBSS:
5223 /* We don't split these for medium model. Place them into
5224 default sections and hope for best. */
5225 break;
5226 }
5227 if (sname)
5228 {
5229 /* We might get called with string constants, but get_named_section
5230 doesn't like them as they are not DECLs. Also, we need to set
5231 flags in that case. */
5232 if (!DECL_P (decl))
5233 return get_section (sname, flags, NULL);
5234 return get_named_section (decl, sname, reloc);
5235 }
5236 }
5237 return default_elf_select_section (decl, reloc, align);
5238 }
5239
5240 /* Select a set of attributes for section NAME based on the properties
5241 of DECL and whether or not RELOC indicates that DECL's initializer
5242 might contain runtime relocations. */
5243
5244 static unsigned int ATTRIBUTE_UNUSED
5245 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5246 {
5247 unsigned int flags = default_section_type_flags (decl, name, reloc);
5248
5249 if (decl == NULL_TREE
5250 && (strcmp (name, ".ldata.rel.ro") == 0
5251 || strcmp (name, ".ldata.rel.ro.local") == 0))
5252 flags |= SECTION_RELRO;
5253
5254 if (strcmp (name, ".lbss") == 0
5255 || strncmp (name, ".lbss.", 5) == 0
5256 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5257 flags |= SECTION_BSS;
5258
5259 return flags;
5260 }
5261
5262 /* Build up a unique section name, expressed as a
5263 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5264 RELOC indicates whether the initial value of EXP requires
5265 link-time relocations. */
5266
5267 static void ATTRIBUTE_UNUSED
5268 x86_64_elf_unique_section (tree decl, int reloc)
5269 {
5270 if (ix86_in_large_data_p (decl))
5271 {
5272 const char *prefix = NULL;
5273 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5274 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5275
5276 switch (categorize_decl_for_section (decl, reloc))
5277 {
5278 case SECCAT_DATA:
5279 case SECCAT_DATA_REL:
5280 case SECCAT_DATA_REL_LOCAL:
5281 case SECCAT_DATA_REL_RO:
5282 case SECCAT_DATA_REL_RO_LOCAL:
5283 prefix = one_only ? ".ld" : ".ldata";
5284 break;
5285 case SECCAT_BSS:
5286 prefix = one_only ? ".lb" : ".lbss";
5287 break;
5288 case SECCAT_RODATA:
5289 case SECCAT_RODATA_MERGE_STR:
5290 case SECCAT_RODATA_MERGE_STR_INIT:
5291 case SECCAT_RODATA_MERGE_CONST:
5292 prefix = one_only ? ".lr" : ".lrodata";
5293 break;
5294 case SECCAT_SRODATA:
5295 case SECCAT_SDATA:
5296 case SECCAT_SBSS:
5297 gcc_unreachable ();
5298 case SECCAT_TEXT:
5299 case SECCAT_TDATA:
5300 case SECCAT_TBSS:
5301 /* We don't split these for medium model. Place them into
5302 default sections and hope for best. */
5303 break;
5304 }
5305 if (prefix)
5306 {
5307 const char *name, *linkonce;
5308 char *string;
5309
5310 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5311 name = targetm.strip_name_encoding (name);
5312
5313 /* If we're using one_only, then there needs to be a .gnu.linkonce
5314 prefix to the section name. */
5315 linkonce = one_only ? ".gnu.linkonce" : "";
5316
5317 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5318
5319 set_decl_section_name (decl, string);
5320 return;
5321 }
5322 }
5323 default_unique_section (decl, reloc);
5324 }
5325
5326 #ifdef COMMON_ASM_OP
5327 /* This says how to output assembler code to declare an
5328 uninitialized external linkage data object.
5329
5330 For medium model x86-64 we need to use .largecomm opcode for
5331 large objects. */
5332 void
5333 x86_elf_aligned_common (FILE *file,
5334 const char *name, unsigned HOST_WIDE_INT size,
5335 int align)
5336 {
5337 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5338 && size > (unsigned int)ix86_section_threshold)
5339 fputs ("\t.largecomm\t", file);
5340 else
5341 fputs (COMMON_ASM_OP, file);
5342 assemble_name (file, name);
5343 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5344 size, align / BITS_PER_UNIT);
5345 }
5346 #endif
5347
5348 /* Utility function for targets to use in implementing
5349 ASM_OUTPUT_ALIGNED_BSS. */
5350
5351 void
5352 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5353 unsigned HOST_WIDE_INT size, int align)
5354 {
5355 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5356 && size > (unsigned int)ix86_section_threshold)
5357 switch_to_section (get_named_section (decl, ".lbss", 0));
5358 else
5359 switch_to_section (bss_section);
5360 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5361 #ifdef ASM_DECLARE_OBJECT_NAME
5362 last_assemble_variable_decl = decl;
5363 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5364 #else
5365 /* Standard thing is just output label for the object. */
5366 ASM_OUTPUT_LABEL (file, name);
5367 #endif /* ASM_DECLARE_OBJECT_NAME */
5368 ASM_OUTPUT_SKIP (file, size ? size : 1);
5369 }
5370 \f
5371 /* Decide whether we must probe the stack before any space allocation
5372 on this target. It's essentially TARGET_STACK_PROBE except when
5373 -fstack-check causes the stack to be already probed differently. */
5374
5375 bool
5376 ix86_target_stack_probe (void)
5377 {
5378 /* Do not probe the stack twice if static stack checking is enabled. */
5379 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5380 return false;
5381
5382 return TARGET_STACK_PROBE;
5383 }
5384 \f
5385 /* Decide whether we can make a sibling call to a function. DECL is the
5386 declaration of the function being targeted by the call and EXP is the
5387 CALL_EXPR representing the call. */
5388
5389 static bool
5390 ix86_function_ok_for_sibcall (tree decl, tree exp)
5391 {
5392 tree type, decl_or_type;
5393 rtx a, b;
5394
5395 /* If we are generating position-independent code, we cannot sibcall
5396 optimize any indirect call, or a direct call to a global function,
5397 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5398 if (!TARGET_MACHO
5399 && !TARGET_64BIT
5400 && flag_pic
5401 && (!decl || !targetm.binds_local_p (decl)))
5402 return false;
5403
5404 /* If we need to align the outgoing stack, then sibcalling would
5405 unalign the stack, which may break the called function. */
5406 if (ix86_minimum_incoming_stack_boundary (true)
5407 < PREFERRED_STACK_BOUNDARY)
5408 return false;
5409
5410 if (decl)
5411 {
5412 decl_or_type = decl;
5413 type = TREE_TYPE (decl);
5414 }
5415 else
5416 {
5417 /* We're looking at the CALL_EXPR, we need the type of the function. */
5418 type = CALL_EXPR_FN (exp); /* pointer expression */
5419 type = TREE_TYPE (type); /* pointer type */
5420 type = TREE_TYPE (type); /* function type */
5421 decl_or_type = type;
5422 }
5423
5424 /* Check that the return value locations are the same. Like
5425 if we are returning floats on the 80387 register stack, we cannot
5426 make a sibcall from a function that doesn't return a float to a
5427 function that does or, conversely, from a function that does return
5428 a float to a function that doesn't; the necessary stack adjustment
5429 would not be executed. This is also the place we notice
5430 differences in the return value ABI. Note that it is ok for one
5431 of the functions to have void return type as long as the return
5432 value of the other is passed in a register. */
5433 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5434 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5435 cfun->decl, false);
5436 if (STACK_REG_P (a) || STACK_REG_P (b))
5437 {
5438 if (!rtx_equal_p (a, b))
5439 return false;
5440 }
5441 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5442 ;
5443 else if (!rtx_equal_p (a, b))
5444 return false;
5445
5446 if (TARGET_64BIT)
5447 {
5448 /* The SYSV ABI has more call-clobbered registers;
5449 disallow sibcalls from MS to SYSV. */
5450 if (cfun->machine->call_abi == MS_ABI
5451 && ix86_function_type_abi (type) == SYSV_ABI)
5452 return false;
5453 }
5454 else
5455 {
5456 /* If this call is indirect, we'll need to be able to use a
5457 call-clobbered register for the address of the target function.
5458 Make sure that all such registers are not used for passing
5459 parameters. Note that DLLIMPORT functions are indirect. */
5460 if (!decl
5461 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5462 {
5463 if (ix86_function_regparm (type, NULL) >= 3)
5464 {
5465 /* ??? Need to count the actual number of registers to be used,
5466 not the possible number of registers. Fix later. */
5467 return false;
5468 }
5469 }
5470 }
5471
5472 /* Otherwise okay. That also includes certain types of indirect calls. */
5473 return true;
5474 }
5475
5476 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5477 and "sseregparm" calling convention attributes;
5478 arguments as in struct attribute_spec.handler. */
5479
5480 static tree
5481 ix86_handle_cconv_attribute (tree *node, tree name,
5482 tree args,
5483 int,
5484 bool *no_add_attrs)
5485 {
5486 if (TREE_CODE (*node) != FUNCTION_TYPE
5487 && TREE_CODE (*node) != METHOD_TYPE
5488 && TREE_CODE (*node) != FIELD_DECL
5489 && TREE_CODE (*node) != TYPE_DECL)
5490 {
5491 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5492 name);
5493 *no_add_attrs = true;
5494 return NULL_TREE;
5495 }
5496
5497 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5498 if (is_attribute_p ("regparm", name))
5499 {
5500 tree cst;
5501
5502 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5503 {
5504 error ("fastcall and regparm attributes are not compatible");
5505 }
5506
5507 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5508 {
5509 error ("regparam and thiscall attributes are not compatible");
5510 }
5511
5512 cst = TREE_VALUE (args);
5513 if (TREE_CODE (cst) != INTEGER_CST)
5514 {
5515 warning (OPT_Wattributes,
5516 "%qE attribute requires an integer constant argument",
5517 name);
5518 *no_add_attrs = true;
5519 }
5520 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5521 {
5522 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5523 name, REGPARM_MAX);
5524 *no_add_attrs = true;
5525 }
5526
5527 return NULL_TREE;
5528 }
5529
5530 if (TARGET_64BIT)
5531 {
5532 /* Do not warn when emulating the MS ABI. */
5533 if ((TREE_CODE (*node) != FUNCTION_TYPE
5534 && TREE_CODE (*node) != METHOD_TYPE)
5535 || ix86_function_type_abi (*node) != MS_ABI)
5536 warning (OPT_Wattributes, "%qE attribute ignored",
5537 name);
5538 *no_add_attrs = true;
5539 return NULL_TREE;
5540 }
5541
5542 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5543 if (is_attribute_p ("fastcall", name))
5544 {
5545 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5546 {
5547 error ("fastcall and cdecl attributes are not compatible");
5548 }
5549 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5550 {
5551 error ("fastcall and stdcall attributes are not compatible");
5552 }
5553 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5554 {
5555 error ("fastcall and regparm attributes are not compatible");
5556 }
5557 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5558 {
5559 error ("fastcall and thiscall attributes are not compatible");
5560 }
5561 }
5562
5563 /* Can combine stdcall with fastcall (redundant), regparm and
5564 sseregparm. */
5565 else if (is_attribute_p ("stdcall", name))
5566 {
5567 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5568 {
5569 error ("stdcall and cdecl attributes are not compatible");
5570 }
5571 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5572 {
5573 error ("stdcall and fastcall attributes are not compatible");
5574 }
5575 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5576 {
5577 error ("stdcall and thiscall attributes are not compatible");
5578 }
5579 }
5580
5581 /* Can combine cdecl with regparm and sseregparm. */
5582 else if (is_attribute_p ("cdecl", name))
5583 {
5584 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5585 {
5586 error ("stdcall and cdecl attributes are not compatible");
5587 }
5588 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5589 {
5590 error ("fastcall and cdecl attributes are not compatible");
5591 }
5592 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5593 {
5594 error ("cdecl and thiscall attributes are not compatible");
5595 }
5596 }
5597 else if (is_attribute_p ("thiscall", name))
5598 {
5599 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5600 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5601 name);
5602 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5603 {
5604 error ("stdcall and thiscall attributes are not compatible");
5605 }
5606 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5607 {
5608 error ("fastcall and thiscall attributes are not compatible");
5609 }
5610 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5611 {
5612 error ("cdecl and thiscall attributes are not compatible");
5613 }
5614 }
5615
5616 /* Can combine sseregparm with all attributes. */
5617
5618 return NULL_TREE;
5619 }
5620
5621 /* The transactional memory builtins are implicitly regparm or fastcall
5622 depending on the ABI. Override the generic do-nothing attribute that
5623 these builtins were declared with, and replace it with one of the two
5624 attributes that we expect elsewhere. */
5625
5626 static tree
5627 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5628 int flags, bool *no_add_attrs)
5629 {
5630 tree alt;
5631
5632 /* In no case do we want to add the placeholder attribute. */
5633 *no_add_attrs = true;
5634
5635 /* The 64-bit ABI is unchanged for transactional memory. */
5636 if (TARGET_64BIT)
5637 return NULL_TREE;
5638
5639 /* ??? Is there a better way to validate 32-bit windows? We have
5640 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5641 if (CHECK_STACK_LIMIT > 0)
5642 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5643 else
5644 {
5645 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5646 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5647 }
5648 decl_attributes (node, alt, flags);
5649
5650 return NULL_TREE;
5651 }
5652
5653 /* This function determines from TYPE the calling-convention. */
5654
5655 unsigned int
5656 ix86_get_callcvt (const_tree type)
5657 {
5658 unsigned int ret = 0;
5659 bool is_stdarg;
5660 tree attrs;
5661
5662 if (TARGET_64BIT)
5663 return IX86_CALLCVT_CDECL;
5664
5665 attrs = TYPE_ATTRIBUTES (type);
5666 if (attrs != NULL_TREE)
5667 {
5668 if (lookup_attribute ("cdecl", attrs))
5669 ret |= IX86_CALLCVT_CDECL;
5670 else if (lookup_attribute ("stdcall", attrs))
5671 ret |= IX86_CALLCVT_STDCALL;
5672 else if (lookup_attribute ("fastcall", attrs))
5673 ret |= IX86_CALLCVT_FASTCALL;
5674 else if (lookup_attribute ("thiscall", attrs))
5675 ret |= IX86_CALLCVT_THISCALL;
5676
5677 /* Regparam isn't allowed for thiscall and fastcall. */
5678 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5679 {
5680 if (lookup_attribute ("regparm", attrs))
5681 ret |= IX86_CALLCVT_REGPARM;
5682 if (lookup_attribute ("sseregparm", attrs))
5683 ret |= IX86_CALLCVT_SSEREGPARM;
5684 }
5685
5686 if (IX86_BASE_CALLCVT(ret) != 0)
5687 return ret;
5688 }
5689
5690 is_stdarg = stdarg_p (type);
5691 if (TARGET_RTD && !is_stdarg)
5692 return IX86_CALLCVT_STDCALL | ret;
5693
5694 if (ret != 0
5695 || is_stdarg
5696 || TREE_CODE (type) != METHOD_TYPE
5697 || ix86_function_type_abi (type) != MS_ABI)
5698 return IX86_CALLCVT_CDECL | ret;
5699
5700 return IX86_CALLCVT_THISCALL;
5701 }
5702
5703 /* Return 0 if the attributes for two types are incompatible, 1 if they
5704 are compatible, and 2 if they are nearly compatible (which causes a
5705 warning to be generated). */
5706
5707 static int
5708 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5709 {
5710 unsigned int ccvt1, ccvt2;
5711
5712 if (TREE_CODE (type1) != FUNCTION_TYPE
5713 && TREE_CODE (type1) != METHOD_TYPE)
5714 return 1;
5715
5716 ccvt1 = ix86_get_callcvt (type1);
5717 ccvt2 = ix86_get_callcvt (type2);
5718 if (ccvt1 != ccvt2)
5719 return 0;
5720 if (ix86_function_regparm (type1, NULL)
5721 != ix86_function_regparm (type2, NULL))
5722 return 0;
5723
5724 return 1;
5725 }
5726 \f
5727 /* Return the regparm value for a function with the indicated TYPE and DECL.
5728 DECL may be NULL when calling function indirectly
5729 or considering a libcall. */
5730
5731 static int
5732 ix86_function_regparm (const_tree type, const_tree decl)
5733 {
5734 tree attr;
5735 int regparm;
5736 unsigned int ccvt;
5737
5738 if (TARGET_64BIT)
5739 return (ix86_function_type_abi (type) == SYSV_ABI
5740 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5741 ccvt = ix86_get_callcvt (type);
5742 regparm = ix86_regparm;
5743
5744 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5745 {
5746 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5747 if (attr)
5748 {
5749 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5750 return regparm;
5751 }
5752 }
5753 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5754 return 2;
5755 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5756 return 1;
5757
5758 /* Use register calling convention for local functions when possible. */
5759 if (decl
5760 && TREE_CODE (decl) == FUNCTION_DECL)
5761 {
5762 cgraph_node *target = cgraph_node::get (decl);
5763 if (target)
5764 target = target->function_symbol ();
5765
5766 /* Caller and callee must agree on the calling convention, so
5767 checking here just optimize means that with
5768 __attribute__((optimize (...))) caller could use regparm convention
5769 and callee not, or vice versa. Instead look at whether the callee
5770 is optimized or not. */
5771 if (target && opt_for_fn (target->decl, optimize)
5772 && !(profile_flag && !flag_fentry))
5773 {
5774 cgraph_local_info *i = &target->local;
5775 if (i && i->local && i->can_change_signature)
5776 {
5777 int local_regparm, globals = 0, regno;
5778
5779 /* Make sure no regparm register is taken by a
5780 fixed register variable. */
5781 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5782 local_regparm++)
5783 if (fixed_regs[local_regparm])
5784 break;
5785
5786 /* We don't want to use regparm(3) for nested functions as
5787 these use a static chain pointer in the third argument. */
5788 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5789 local_regparm = 2;
5790
5791 /* Save a register for the split stack. */
5792 if (local_regparm == 3 && flag_split_stack)
5793 local_regparm = 2;
5794
5795 /* Each fixed register usage increases register pressure,
5796 so less registers should be used for argument passing.
5797 This functionality can be overriden by an explicit
5798 regparm value. */
5799 for (regno = AX_REG; regno <= DI_REG; regno++)
5800 if (fixed_regs[regno])
5801 globals++;
5802
5803 local_regparm
5804 = globals < local_regparm ? local_regparm - globals : 0;
5805
5806 if (local_regparm > regparm)
5807 regparm = local_regparm;
5808 }
5809 }
5810 }
5811
5812 return regparm;
5813 }
5814
5815 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5816 DFmode (2) arguments in SSE registers for a function with the
5817 indicated TYPE and DECL. DECL may be NULL when calling function
5818 indirectly or considering a libcall. Otherwise return 0. */
5819
5820 static int
5821 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5822 {
5823 gcc_assert (!TARGET_64BIT);
5824
5825 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5826 by the sseregparm attribute. */
5827 if (TARGET_SSEREGPARM
5828 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5829 {
5830 if (!TARGET_SSE)
5831 {
5832 if (warn)
5833 {
5834 if (decl)
5835 error ("calling %qD with attribute sseregparm without "
5836 "SSE/SSE2 enabled", decl);
5837 else
5838 error ("calling %qT with attribute sseregparm without "
5839 "SSE/SSE2 enabled", type);
5840 }
5841 return 0;
5842 }
5843
5844 return 2;
5845 }
5846
5847 if (!decl)
5848 return 0;
5849
5850 cgraph_node *target = cgraph_node::get (decl);
5851 if (target)
5852 target = target->function_symbol ();
5853
5854 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5855 (and DFmode for SSE2) arguments in SSE registers. */
5856 if (target
5857 /* TARGET_SSE_MATH */
5858 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5859 && opt_for_fn (target->decl, optimize)
5860 && !(profile_flag && !flag_fentry))
5861 {
5862 cgraph_local_info *i = &target->local;
5863 if (i && i->local && i->can_change_signature)
5864 {
5865 /* Refuse to produce wrong code when local function with SSE enabled
5866 is called from SSE disabled function.
5867 We may work hard to work out these scenarios but hopefully
5868 it doesnot matter in practice. */
5869 if (!TARGET_SSE && warn)
5870 {
5871 error ("calling %qD with SSE caling convention without "
5872 "SSE/SSE2 enabled", decl);
5873 return 0;
5874 }
5875 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5876 ->x_ix86_isa_flags) ? 2 : 1;
5877 }
5878 }
5879
5880 return 0;
5881 }
5882
5883 /* Return true if EAX is live at the start of the function. Used by
5884 ix86_expand_prologue to determine if we need special help before
5885 calling allocate_stack_worker. */
5886
5887 static bool
5888 ix86_eax_live_at_start_p (void)
5889 {
5890 /* Cheat. Don't bother working forward from ix86_function_regparm
5891 to the function type to whether an actual argument is located in
5892 eax. Instead just look at cfg info, which is still close enough
5893 to correct at this point. This gives false positives for broken
5894 functions that might use uninitialized data that happens to be
5895 allocated in eax, but who cares? */
5896 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5897 }
5898
5899 static bool
5900 ix86_keep_aggregate_return_pointer (tree fntype)
5901 {
5902 tree attr;
5903
5904 if (!TARGET_64BIT)
5905 {
5906 attr = lookup_attribute ("callee_pop_aggregate_return",
5907 TYPE_ATTRIBUTES (fntype));
5908 if (attr)
5909 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5910
5911 /* For 32-bit MS-ABI the default is to keep aggregate
5912 return pointer. */
5913 if (ix86_function_type_abi (fntype) == MS_ABI)
5914 return true;
5915 }
5916 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5917 }
5918
5919 /* Value is the number of bytes of arguments automatically
5920 popped when returning from a subroutine call.
5921 FUNDECL is the declaration node of the function (as a tree),
5922 FUNTYPE is the data type of the function (as a tree),
5923 or for a library call it is an identifier node for the subroutine name.
5924 SIZE is the number of bytes of arguments passed on the stack.
5925
5926 On the 80386, the RTD insn may be used to pop them if the number
5927 of args is fixed, but if the number is variable then the caller
5928 must pop them all. RTD can't be used for library calls now
5929 because the library is compiled with the Unix compiler.
5930 Use of RTD is a selectable option, since it is incompatible with
5931 standard Unix calling sequences. If the option is not selected,
5932 the caller must always pop the args.
5933
5934 The attribute stdcall is equivalent to RTD on a per module basis. */
5935
5936 static int
5937 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5938 {
5939 unsigned int ccvt;
5940
5941 /* None of the 64-bit ABIs pop arguments. */
5942 if (TARGET_64BIT)
5943 return 0;
5944
5945 ccvt = ix86_get_callcvt (funtype);
5946
5947 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5948 | IX86_CALLCVT_THISCALL)) != 0
5949 && ! stdarg_p (funtype))
5950 return size;
5951
5952 /* Lose any fake structure return argument if it is passed on the stack. */
5953 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5954 && !ix86_keep_aggregate_return_pointer (funtype))
5955 {
5956 int nregs = ix86_function_regparm (funtype, fundecl);
5957 if (nregs == 0)
5958 return GET_MODE_SIZE (Pmode);
5959 }
5960
5961 return 0;
5962 }
5963
5964 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5965
5966 static bool
5967 ix86_legitimate_combined_insn (rtx_insn *insn)
5968 {
5969 /* Check operand constraints in case hard registers were propagated
5970 into insn pattern. This check prevents combine pass from
5971 generating insn patterns with invalid hard register operands.
5972 These invalid insns can eventually confuse reload to error out
5973 with a spill failure. See also PRs 46829 and 46843. */
5974 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5975 {
5976 int i;
5977
5978 extract_insn (insn);
5979 preprocess_constraints (insn);
5980
5981 int n_operands = recog_data.n_operands;
5982 int n_alternatives = recog_data.n_alternatives;
5983 for (i = 0; i < n_operands; i++)
5984 {
5985 rtx op = recog_data.operand[i];
5986 machine_mode mode = GET_MODE (op);
5987 const operand_alternative *op_alt;
5988 int offset = 0;
5989 bool win;
5990 int j;
5991
5992 /* For pre-AVX disallow unaligned loads/stores where the
5993 instructions don't support it. */
5994 if (!TARGET_AVX
5995 && VECTOR_MODE_P (GET_MODE (op))
5996 && misaligned_operand (op, GET_MODE (op)))
5997 {
5998 int min_align = get_attr_ssememalign (insn);
5999 if (min_align == 0)
6000 return false;
6001 }
6002
6003 /* A unary operator may be accepted by the predicate, but it
6004 is irrelevant for matching constraints. */
6005 if (UNARY_P (op))
6006 op = XEXP (op, 0);
6007
6008 if (GET_CODE (op) == SUBREG)
6009 {
6010 if (REG_P (SUBREG_REG (op))
6011 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6012 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6013 GET_MODE (SUBREG_REG (op)),
6014 SUBREG_BYTE (op),
6015 GET_MODE (op));
6016 op = SUBREG_REG (op);
6017 }
6018
6019 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6020 continue;
6021
6022 op_alt = recog_op_alt;
6023
6024 /* Operand has no constraints, anything is OK. */
6025 win = !n_alternatives;
6026
6027 alternative_mask preferred = get_preferred_alternatives (insn);
6028 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6029 {
6030 if (!TEST_BIT (preferred, j))
6031 continue;
6032 if (op_alt[i].anything_ok
6033 || (op_alt[i].matches != -1
6034 && operands_match_p
6035 (recog_data.operand[i],
6036 recog_data.operand[op_alt[i].matches]))
6037 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6038 {
6039 win = true;
6040 break;
6041 }
6042 }
6043
6044 if (!win)
6045 return false;
6046 }
6047 }
6048
6049 return true;
6050 }
6051 \f
6052 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6053
6054 static unsigned HOST_WIDE_INT
6055 ix86_asan_shadow_offset (void)
6056 {
6057 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6058 : HOST_WIDE_INT_C (0x7fff8000))
6059 : (HOST_WIDE_INT_1 << 29);
6060 }
6061 \f
6062 /* Argument support functions. */
6063
6064 /* Return true when register may be used to pass function parameters. */
6065 bool
6066 ix86_function_arg_regno_p (int regno)
6067 {
6068 int i;
6069 const int *parm_regs;
6070
6071 if (!TARGET_64BIT)
6072 {
6073 if (TARGET_MACHO)
6074 return (regno < REGPARM_MAX
6075 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6076 else
6077 return (regno < REGPARM_MAX
6078 || (TARGET_MMX && MMX_REGNO_P (regno)
6079 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6080 || (TARGET_SSE && SSE_REGNO_P (regno)
6081 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6082 }
6083
6084 if (TARGET_SSE && SSE_REGNO_P (regno)
6085 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6086 return true;
6087
6088 /* TODO: The function should depend on current function ABI but
6089 builtins.c would need updating then. Therefore we use the
6090 default ABI. */
6091
6092 /* RAX is used as hidden argument to va_arg functions. */
6093 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6094 return true;
6095
6096 if (ix86_abi == MS_ABI)
6097 parm_regs = x86_64_ms_abi_int_parameter_registers;
6098 else
6099 parm_regs = x86_64_int_parameter_registers;
6100 for (i = 0; i < (ix86_abi == MS_ABI
6101 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6102 if (regno == parm_regs[i])
6103 return true;
6104 return false;
6105 }
6106
6107 /* Return if we do not know how to pass TYPE solely in registers. */
6108
6109 static bool
6110 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6111 {
6112 if (must_pass_in_stack_var_size_or_pad (mode, type))
6113 return true;
6114
6115 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6116 The layout_type routine is crafty and tries to trick us into passing
6117 currently unsupported vector types on the stack by using TImode. */
6118 return (!TARGET_64BIT && mode == TImode
6119 && type && TREE_CODE (type) != VECTOR_TYPE);
6120 }
6121
6122 /* It returns the size, in bytes, of the area reserved for arguments passed
6123 in registers for the function represented by fndecl dependent to the used
6124 abi format. */
6125 int
6126 ix86_reg_parm_stack_space (const_tree fndecl)
6127 {
6128 enum calling_abi call_abi = SYSV_ABI;
6129 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6130 call_abi = ix86_function_abi (fndecl);
6131 else
6132 call_abi = ix86_function_type_abi (fndecl);
6133 if (TARGET_64BIT && call_abi == MS_ABI)
6134 return 32;
6135 return 0;
6136 }
6137
6138 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6139 call abi used. */
6140 enum calling_abi
6141 ix86_function_type_abi (const_tree fntype)
6142 {
6143 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6144 {
6145 enum calling_abi abi = ix86_abi;
6146 if (abi == SYSV_ABI)
6147 {
6148 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6149 {
6150 if (TARGET_X32)
6151 {
6152 static bool warned = false;
6153 if (!warned)
6154 {
6155 error ("X32 does not support ms_abi attribute");
6156 warned = true;
6157 }
6158 }
6159 abi = MS_ABI;
6160 }
6161 }
6162 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6163 abi = SYSV_ABI;
6164 return abi;
6165 }
6166 return ix86_abi;
6167 }
6168
6169 /* We add this as a workaround in order to use libc_has_function
6170 hook in i386.md. */
6171 bool
6172 ix86_libc_has_function (enum function_class fn_class)
6173 {
6174 return targetm.libc_has_function (fn_class);
6175 }
6176
6177 static bool
6178 ix86_function_ms_hook_prologue (const_tree fn)
6179 {
6180 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6181 {
6182 if (decl_function_context (fn) != NULL_TREE)
6183 error_at (DECL_SOURCE_LOCATION (fn),
6184 "ms_hook_prologue is not compatible with nested function");
6185 else
6186 return true;
6187 }
6188 return false;
6189 }
6190
6191 static enum calling_abi
6192 ix86_function_abi (const_tree fndecl)
6193 {
6194 if (! fndecl)
6195 return ix86_abi;
6196 return ix86_function_type_abi (TREE_TYPE (fndecl));
6197 }
6198
6199 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6200 call abi used. */
6201 enum calling_abi
6202 ix86_cfun_abi (void)
6203 {
6204 if (! cfun)
6205 return ix86_abi;
6206 return cfun->machine->call_abi;
6207 }
6208
6209 /* Write the extra assembler code needed to declare a function properly. */
6210
6211 void
6212 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6213 tree decl)
6214 {
6215 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6216
6217 if (is_ms_hook)
6218 {
6219 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6220 unsigned int filler_cc = 0xcccccccc;
6221
6222 for (i = 0; i < filler_count; i += 4)
6223 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6224 }
6225
6226 #ifdef SUBTARGET_ASM_UNWIND_INIT
6227 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6228 #endif
6229
6230 ASM_OUTPUT_LABEL (asm_out_file, fname);
6231
6232 /* Output magic byte marker, if hot-patch attribute is set. */
6233 if (is_ms_hook)
6234 {
6235 if (TARGET_64BIT)
6236 {
6237 /* leaq [%rsp + 0], %rsp */
6238 asm_fprintf (asm_out_file, ASM_BYTE
6239 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6240 }
6241 else
6242 {
6243 /* movl.s %edi, %edi
6244 push %ebp
6245 movl.s %esp, %ebp */
6246 asm_fprintf (asm_out_file, ASM_BYTE
6247 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6248 }
6249 }
6250 }
6251
6252 /* regclass.c */
6253 extern void init_regs (void);
6254
6255 /* Implementation of call abi switching target hook. Specific to FNDECL
6256 the specific call register sets are set. See also
6257 ix86_conditional_register_usage for more details. */
6258 void
6259 ix86_call_abi_override (const_tree fndecl)
6260 {
6261 if (fndecl == NULL_TREE)
6262 cfun->machine->call_abi = ix86_abi;
6263 else
6264 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6265 }
6266
6267 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6268 expensive re-initialization of init_regs each time we switch function context
6269 since this is needed only during RTL expansion. */
6270 static void
6271 ix86_maybe_switch_abi (void)
6272 {
6273 if (TARGET_64BIT &&
6274 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6275 reinit_regs ();
6276 }
6277
6278 /* Return 1 if pseudo register should be created and used to hold
6279 GOT address for PIC code. */
6280 bool
6281 ix86_use_pseudo_pic_reg (void)
6282 {
6283 if ((TARGET_64BIT
6284 && (ix86_cmodel == CM_SMALL_PIC
6285 || TARGET_PECOFF))
6286 || !flag_pic)
6287 return false;
6288 return true;
6289 }
6290
6291 /* Initialize large model PIC register. */
6292
6293 static void
6294 ix86_init_large_pic_reg (unsigned int tmp_regno)
6295 {
6296 rtx_code_label *label;
6297 rtx tmp_reg;
6298
6299 gcc_assert (Pmode == DImode);
6300 label = gen_label_rtx ();
6301 emit_label (label);
6302 LABEL_PRESERVE_P (label) = 1;
6303 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6304 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6305 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6306 label));
6307 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6308 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6309 pic_offset_table_rtx, tmp_reg));
6310 }
6311
6312 /* Create and initialize PIC register if required. */
6313 static void
6314 ix86_init_pic_reg (void)
6315 {
6316 edge entry_edge;
6317 rtx_insn *seq;
6318
6319 if (!ix86_use_pseudo_pic_reg ())
6320 return;
6321
6322 start_sequence ();
6323
6324 if (TARGET_64BIT)
6325 {
6326 if (ix86_cmodel == CM_LARGE_PIC)
6327 ix86_init_large_pic_reg (R11_REG);
6328 else
6329 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6330 }
6331 else
6332 {
6333 /* If there is future mcount call in the function it is more profitable
6334 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6335 rtx reg = crtl->profile
6336 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6337 : pic_offset_table_rtx;
6338 rtx insn = emit_insn (gen_set_got (reg));
6339 RTX_FRAME_RELATED_P (insn) = 1;
6340 if (crtl->profile)
6341 emit_move_insn (pic_offset_table_rtx, reg);
6342 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6343 }
6344
6345 seq = get_insns ();
6346 end_sequence ();
6347
6348 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6349 insert_insn_on_edge (seq, entry_edge);
6350 commit_one_edge_insertion (entry_edge);
6351 }
6352
6353 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6354 for a call to a function whose data type is FNTYPE.
6355 For a library call, FNTYPE is 0. */
6356
6357 void
6358 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6359 tree fntype, /* tree ptr for function decl */
6360 rtx libname, /* SYMBOL_REF of library name or 0 */
6361 tree fndecl,
6362 int caller)
6363 {
6364 struct cgraph_local_info *i = NULL;
6365 struct cgraph_node *target = NULL;
6366
6367 memset (cum, 0, sizeof (*cum));
6368
6369 if (fndecl)
6370 {
6371 target = cgraph_node::get (fndecl);
6372 if (target)
6373 {
6374 target = target->function_symbol ();
6375 i = cgraph_node::local_info (target->decl);
6376 cum->call_abi = ix86_function_abi (target->decl);
6377 }
6378 else
6379 cum->call_abi = ix86_function_abi (fndecl);
6380 }
6381 else
6382 cum->call_abi = ix86_function_type_abi (fntype);
6383
6384 cum->caller = caller;
6385
6386 /* Set up the number of registers to use for passing arguments. */
6387 cum->nregs = ix86_regparm;
6388 if (TARGET_64BIT)
6389 {
6390 cum->nregs = (cum->call_abi == SYSV_ABI
6391 ? X86_64_REGPARM_MAX
6392 : X86_64_MS_REGPARM_MAX);
6393 }
6394 if (TARGET_SSE)
6395 {
6396 cum->sse_nregs = SSE_REGPARM_MAX;
6397 if (TARGET_64BIT)
6398 {
6399 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6400 ? X86_64_SSE_REGPARM_MAX
6401 : X86_64_MS_SSE_REGPARM_MAX);
6402 }
6403 }
6404 if (TARGET_MMX)
6405 cum->mmx_nregs = MMX_REGPARM_MAX;
6406 cum->warn_avx512f = true;
6407 cum->warn_avx = true;
6408 cum->warn_sse = true;
6409 cum->warn_mmx = true;
6410
6411 /* Because type might mismatch in between caller and callee, we need to
6412 use actual type of function for local calls.
6413 FIXME: cgraph_analyze can be told to actually record if function uses
6414 va_start so for local functions maybe_vaarg can be made aggressive
6415 helping K&R code.
6416 FIXME: once typesytem is fixed, we won't need this code anymore. */
6417 if (i && i->local && i->can_change_signature)
6418 fntype = TREE_TYPE (target->decl);
6419 cum->stdarg = stdarg_p (fntype);
6420 cum->maybe_vaarg = (fntype
6421 ? (!prototype_p (fntype) || stdarg_p (fntype))
6422 : !libname);
6423
6424 cum->bnd_regno = FIRST_BND_REG;
6425 cum->bnds_in_bt = 0;
6426 cum->force_bnd_pass = 0;
6427
6428 if (!TARGET_64BIT)
6429 {
6430 /* If there are variable arguments, then we won't pass anything
6431 in registers in 32-bit mode. */
6432 if (stdarg_p (fntype))
6433 {
6434 cum->nregs = 0;
6435 cum->sse_nregs = 0;
6436 cum->mmx_nregs = 0;
6437 cum->warn_avx512f = false;
6438 cum->warn_avx = false;
6439 cum->warn_sse = false;
6440 cum->warn_mmx = false;
6441 return;
6442 }
6443
6444 /* Use ecx and edx registers if function has fastcall attribute,
6445 else look for regparm information. */
6446 if (fntype)
6447 {
6448 unsigned int ccvt = ix86_get_callcvt (fntype);
6449 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6450 {
6451 cum->nregs = 1;
6452 cum->fastcall = 1; /* Same first register as in fastcall. */
6453 }
6454 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6455 {
6456 cum->nregs = 2;
6457 cum->fastcall = 1;
6458 }
6459 else
6460 cum->nregs = ix86_function_regparm (fntype, fndecl);
6461 }
6462
6463 /* Set up the number of SSE registers used for passing SFmode
6464 and DFmode arguments. Warn for mismatching ABI. */
6465 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6466 }
6467 }
6468
6469 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6470 But in the case of vector types, it is some vector mode.
6471
6472 When we have only some of our vector isa extensions enabled, then there
6473 are some modes for which vector_mode_supported_p is false. For these
6474 modes, the generic vector support in gcc will choose some non-vector mode
6475 in order to implement the type. By computing the natural mode, we'll
6476 select the proper ABI location for the operand and not depend on whatever
6477 the middle-end decides to do with these vector types.
6478
6479 The midde-end can't deal with the vector types > 16 bytes. In this
6480 case, we return the original mode and warn ABI change if CUM isn't
6481 NULL.
6482
6483 If INT_RETURN is true, warn ABI change if the vector mode isn't
6484 available for function return value. */
6485
6486 static machine_mode
6487 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6488 bool in_return)
6489 {
6490 machine_mode mode = TYPE_MODE (type);
6491
6492 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6493 {
6494 HOST_WIDE_INT size = int_size_in_bytes (type);
6495 if ((size == 8 || size == 16 || size == 32 || size == 64)
6496 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6497 && TYPE_VECTOR_SUBPARTS (type) > 1)
6498 {
6499 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6500
6501 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6502 mode = MIN_MODE_VECTOR_FLOAT;
6503 else
6504 mode = MIN_MODE_VECTOR_INT;
6505
6506 /* Get the mode which has this inner mode and number of units. */
6507 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6508 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6509 && GET_MODE_INNER (mode) == innermode)
6510 {
6511 if (size == 64 && !TARGET_AVX512F)
6512 {
6513 static bool warnedavx512f;
6514 static bool warnedavx512f_ret;
6515
6516 if (cum && cum->warn_avx512f && !warnedavx512f)
6517 {
6518 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6519 "without AVX512F enabled changes the ABI"))
6520 warnedavx512f = true;
6521 }
6522 else if (in_return && !warnedavx512f_ret)
6523 {
6524 if (warning (OPT_Wpsabi, "AVX512F vector return "
6525 "without AVX512F enabled changes the ABI"))
6526 warnedavx512f_ret = true;
6527 }
6528
6529 return TYPE_MODE (type);
6530 }
6531 else if (size == 32 && !TARGET_AVX)
6532 {
6533 static bool warnedavx;
6534 static bool warnedavx_ret;
6535
6536 if (cum && cum->warn_avx && !warnedavx)
6537 {
6538 if (warning (OPT_Wpsabi, "AVX vector argument "
6539 "without AVX enabled changes the ABI"))
6540 warnedavx = true;
6541 }
6542 else if (in_return && !warnedavx_ret)
6543 {
6544 if (warning (OPT_Wpsabi, "AVX vector return "
6545 "without AVX enabled changes the ABI"))
6546 warnedavx_ret = true;
6547 }
6548
6549 return TYPE_MODE (type);
6550 }
6551 else if (((size == 8 && TARGET_64BIT) || size == 16)
6552 && !TARGET_SSE)
6553 {
6554 static bool warnedsse;
6555 static bool warnedsse_ret;
6556
6557 if (cum && cum->warn_sse && !warnedsse)
6558 {
6559 if (warning (OPT_Wpsabi, "SSE vector argument "
6560 "without SSE enabled changes the ABI"))
6561 warnedsse = true;
6562 }
6563 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6564 {
6565 if (warning (OPT_Wpsabi, "SSE vector return "
6566 "without SSE enabled changes the ABI"))
6567 warnedsse_ret = true;
6568 }
6569 }
6570 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6571 {
6572 static bool warnedmmx;
6573 static bool warnedmmx_ret;
6574
6575 if (cum && cum->warn_mmx && !warnedmmx)
6576 {
6577 if (warning (OPT_Wpsabi, "MMX vector argument "
6578 "without MMX enabled changes the ABI"))
6579 warnedmmx = true;
6580 }
6581 else if (in_return && !warnedmmx_ret)
6582 {
6583 if (warning (OPT_Wpsabi, "MMX vector return "
6584 "without MMX enabled changes the ABI"))
6585 warnedmmx_ret = true;
6586 }
6587 }
6588 return mode;
6589 }
6590
6591 gcc_unreachable ();
6592 }
6593 }
6594
6595 return mode;
6596 }
6597
6598 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6599 this may not agree with the mode that the type system has chosen for the
6600 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6601 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6602
6603 static rtx
6604 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6605 unsigned int regno)
6606 {
6607 rtx tmp;
6608
6609 if (orig_mode != BLKmode)
6610 tmp = gen_rtx_REG (orig_mode, regno);
6611 else
6612 {
6613 tmp = gen_rtx_REG (mode, regno);
6614 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6615 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6616 }
6617
6618 return tmp;
6619 }
6620
6621 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6622 of this code is to classify each 8bytes of incoming argument by the register
6623 class and assign registers accordingly. */
6624
6625 /* Return the union class of CLASS1 and CLASS2.
6626 See the x86-64 PS ABI for details. */
6627
6628 static enum x86_64_reg_class
6629 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6630 {
6631 /* Rule #1: If both classes are equal, this is the resulting class. */
6632 if (class1 == class2)
6633 return class1;
6634
6635 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6636 the other class. */
6637 if (class1 == X86_64_NO_CLASS)
6638 return class2;
6639 if (class2 == X86_64_NO_CLASS)
6640 return class1;
6641
6642 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6643 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6644 return X86_64_MEMORY_CLASS;
6645
6646 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6647 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6648 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6649 return X86_64_INTEGERSI_CLASS;
6650 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6651 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6652 return X86_64_INTEGER_CLASS;
6653
6654 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6655 MEMORY is used. */
6656 if (class1 == X86_64_X87_CLASS
6657 || class1 == X86_64_X87UP_CLASS
6658 || class1 == X86_64_COMPLEX_X87_CLASS
6659 || class2 == X86_64_X87_CLASS
6660 || class2 == X86_64_X87UP_CLASS
6661 || class2 == X86_64_COMPLEX_X87_CLASS)
6662 return X86_64_MEMORY_CLASS;
6663
6664 /* Rule #6: Otherwise class SSE is used. */
6665 return X86_64_SSE_CLASS;
6666 }
6667
6668 /* Classify the argument of type TYPE and mode MODE.
6669 CLASSES will be filled by the register class used to pass each word
6670 of the operand. The number of words is returned. In case the parameter
6671 should be passed in memory, 0 is returned. As a special case for zero
6672 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6673
6674 BIT_OFFSET is used internally for handling records and specifies offset
6675 of the offset in bits modulo 512 to avoid overflow cases.
6676
6677 See the x86-64 PS ABI for details.
6678 */
6679
6680 static int
6681 classify_argument (machine_mode mode, const_tree type,
6682 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6683 {
6684 HOST_WIDE_INT bytes =
6685 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6686 int words
6687 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6688
6689 /* Variable sized entities are always passed/returned in memory. */
6690 if (bytes < 0)
6691 return 0;
6692
6693 if (mode != VOIDmode
6694 && targetm.calls.must_pass_in_stack (mode, type))
6695 return 0;
6696
6697 if (type && AGGREGATE_TYPE_P (type))
6698 {
6699 int i;
6700 tree field;
6701 enum x86_64_reg_class subclasses[MAX_CLASSES];
6702
6703 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6704 if (bytes > 64)
6705 return 0;
6706
6707 for (i = 0; i < words; i++)
6708 classes[i] = X86_64_NO_CLASS;
6709
6710 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6711 signalize memory class, so handle it as special case. */
6712 if (!words)
6713 {
6714 classes[0] = X86_64_NO_CLASS;
6715 return 1;
6716 }
6717
6718 /* Classify each field of record and merge classes. */
6719 switch (TREE_CODE (type))
6720 {
6721 case RECORD_TYPE:
6722 /* And now merge the fields of structure. */
6723 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6724 {
6725 if (TREE_CODE (field) == FIELD_DECL)
6726 {
6727 int num;
6728
6729 if (TREE_TYPE (field) == error_mark_node)
6730 continue;
6731
6732 /* Bitfields are always classified as integer. Handle them
6733 early, since later code would consider them to be
6734 misaligned integers. */
6735 if (DECL_BIT_FIELD (field))
6736 {
6737 for (i = (int_bit_position (field)
6738 + (bit_offset % 64)) / 8 / 8;
6739 i < ((int_bit_position (field) + (bit_offset % 64))
6740 + tree_to_shwi (DECL_SIZE (field))
6741 + 63) / 8 / 8; i++)
6742 classes[i] =
6743 merge_classes (X86_64_INTEGER_CLASS,
6744 classes[i]);
6745 }
6746 else
6747 {
6748 int pos;
6749
6750 type = TREE_TYPE (field);
6751
6752 /* Flexible array member is ignored. */
6753 if (TYPE_MODE (type) == BLKmode
6754 && TREE_CODE (type) == ARRAY_TYPE
6755 && TYPE_SIZE (type) == NULL_TREE
6756 && TYPE_DOMAIN (type) != NULL_TREE
6757 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6758 == NULL_TREE))
6759 {
6760 static bool warned;
6761
6762 if (!warned && warn_psabi)
6763 {
6764 warned = true;
6765 inform (input_location,
6766 "the ABI of passing struct with"
6767 " a flexible array member has"
6768 " changed in GCC 4.4");
6769 }
6770 continue;
6771 }
6772 num = classify_argument (TYPE_MODE (type), type,
6773 subclasses,
6774 (int_bit_position (field)
6775 + bit_offset) % 512);
6776 if (!num)
6777 return 0;
6778 pos = (int_bit_position (field)
6779 + (bit_offset % 64)) / 8 / 8;
6780 for (i = 0; i < num && (i + pos) < words; i++)
6781 classes[i + pos] =
6782 merge_classes (subclasses[i], classes[i + pos]);
6783 }
6784 }
6785 }
6786 break;
6787
6788 case ARRAY_TYPE:
6789 /* Arrays are handled as small records. */
6790 {
6791 int num;
6792 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6793 TREE_TYPE (type), subclasses, bit_offset);
6794 if (!num)
6795 return 0;
6796
6797 /* The partial classes are now full classes. */
6798 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6799 subclasses[0] = X86_64_SSE_CLASS;
6800 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6801 && !((bit_offset % 64) == 0 && bytes == 4))
6802 subclasses[0] = X86_64_INTEGER_CLASS;
6803
6804 for (i = 0; i < words; i++)
6805 classes[i] = subclasses[i % num];
6806
6807 break;
6808 }
6809 case UNION_TYPE:
6810 case QUAL_UNION_TYPE:
6811 /* Unions are similar to RECORD_TYPE but offset is always 0.
6812 */
6813 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6814 {
6815 if (TREE_CODE (field) == FIELD_DECL)
6816 {
6817 int num;
6818
6819 if (TREE_TYPE (field) == error_mark_node)
6820 continue;
6821
6822 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6823 TREE_TYPE (field), subclasses,
6824 bit_offset);
6825 if (!num)
6826 return 0;
6827 for (i = 0; i < num && i < words; i++)
6828 classes[i] = merge_classes (subclasses[i], classes[i]);
6829 }
6830 }
6831 break;
6832
6833 default:
6834 gcc_unreachable ();
6835 }
6836
6837 if (words > 2)
6838 {
6839 /* When size > 16 bytes, if the first one isn't
6840 X86_64_SSE_CLASS or any other ones aren't
6841 X86_64_SSEUP_CLASS, everything should be passed in
6842 memory. */
6843 if (classes[0] != X86_64_SSE_CLASS)
6844 return 0;
6845
6846 for (i = 1; i < words; i++)
6847 if (classes[i] != X86_64_SSEUP_CLASS)
6848 return 0;
6849 }
6850
6851 /* Final merger cleanup. */
6852 for (i = 0; i < words; i++)
6853 {
6854 /* If one class is MEMORY, everything should be passed in
6855 memory. */
6856 if (classes[i] == X86_64_MEMORY_CLASS)
6857 return 0;
6858
6859 /* The X86_64_SSEUP_CLASS should be always preceded by
6860 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6861 if (classes[i] == X86_64_SSEUP_CLASS
6862 && classes[i - 1] != X86_64_SSE_CLASS
6863 && classes[i - 1] != X86_64_SSEUP_CLASS)
6864 {
6865 /* The first one should never be X86_64_SSEUP_CLASS. */
6866 gcc_assert (i != 0);
6867 classes[i] = X86_64_SSE_CLASS;
6868 }
6869
6870 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6871 everything should be passed in memory. */
6872 if (classes[i] == X86_64_X87UP_CLASS
6873 && (classes[i - 1] != X86_64_X87_CLASS))
6874 {
6875 static bool warned;
6876
6877 /* The first one should never be X86_64_X87UP_CLASS. */
6878 gcc_assert (i != 0);
6879 if (!warned && warn_psabi)
6880 {
6881 warned = true;
6882 inform (input_location,
6883 "the ABI of passing union with long double"
6884 " has changed in GCC 4.4");
6885 }
6886 return 0;
6887 }
6888 }
6889 return words;
6890 }
6891
6892 /* Compute alignment needed. We align all types to natural boundaries with
6893 exception of XFmode that is aligned to 64bits. */
6894 if (mode != VOIDmode && mode != BLKmode)
6895 {
6896 int mode_alignment = GET_MODE_BITSIZE (mode);
6897
6898 if (mode == XFmode)
6899 mode_alignment = 128;
6900 else if (mode == XCmode)
6901 mode_alignment = 256;
6902 if (COMPLEX_MODE_P (mode))
6903 mode_alignment /= 2;
6904 /* Misaligned fields are always returned in memory. */
6905 if (bit_offset % mode_alignment)
6906 return 0;
6907 }
6908
6909 /* for V1xx modes, just use the base mode */
6910 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6911 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6912 mode = GET_MODE_INNER (mode);
6913
6914 /* Classification of atomic types. */
6915 switch (mode)
6916 {
6917 case SDmode:
6918 case DDmode:
6919 classes[0] = X86_64_SSE_CLASS;
6920 return 1;
6921 case TDmode:
6922 classes[0] = X86_64_SSE_CLASS;
6923 classes[1] = X86_64_SSEUP_CLASS;
6924 return 2;
6925 case DImode:
6926 case SImode:
6927 case HImode:
6928 case QImode:
6929 case CSImode:
6930 case CHImode:
6931 case CQImode:
6932 {
6933 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6934
6935 /* Analyze last 128 bits only. */
6936 size = (size - 1) & 0x7f;
6937
6938 if (size < 32)
6939 {
6940 classes[0] = X86_64_INTEGERSI_CLASS;
6941 return 1;
6942 }
6943 else if (size < 64)
6944 {
6945 classes[0] = X86_64_INTEGER_CLASS;
6946 return 1;
6947 }
6948 else if (size < 64+32)
6949 {
6950 classes[0] = X86_64_INTEGER_CLASS;
6951 classes[1] = X86_64_INTEGERSI_CLASS;
6952 return 2;
6953 }
6954 else if (size < 64+64)
6955 {
6956 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6957 return 2;
6958 }
6959 else
6960 gcc_unreachable ();
6961 }
6962 case CDImode:
6963 case TImode:
6964 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6965 return 2;
6966 case COImode:
6967 case OImode:
6968 /* OImode shouldn't be used directly. */
6969 gcc_unreachable ();
6970 case CTImode:
6971 return 0;
6972 case SFmode:
6973 if (!(bit_offset % 64))
6974 classes[0] = X86_64_SSESF_CLASS;
6975 else
6976 classes[0] = X86_64_SSE_CLASS;
6977 return 1;
6978 case DFmode:
6979 classes[0] = X86_64_SSEDF_CLASS;
6980 return 1;
6981 case XFmode:
6982 classes[0] = X86_64_X87_CLASS;
6983 classes[1] = X86_64_X87UP_CLASS;
6984 return 2;
6985 case TFmode:
6986 classes[0] = X86_64_SSE_CLASS;
6987 classes[1] = X86_64_SSEUP_CLASS;
6988 return 2;
6989 case SCmode:
6990 classes[0] = X86_64_SSE_CLASS;
6991 if (!(bit_offset % 64))
6992 return 1;
6993 else
6994 {
6995 static bool warned;
6996
6997 if (!warned && warn_psabi)
6998 {
6999 warned = true;
7000 inform (input_location,
7001 "the ABI of passing structure with complex float"
7002 " member has changed in GCC 4.4");
7003 }
7004 classes[1] = X86_64_SSESF_CLASS;
7005 return 2;
7006 }
7007 case DCmode:
7008 classes[0] = X86_64_SSEDF_CLASS;
7009 classes[1] = X86_64_SSEDF_CLASS;
7010 return 2;
7011 case XCmode:
7012 classes[0] = X86_64_COMPLEX_X87_CLASS;
7013 return 1;
7014 case TCmode:
7015 /* This modes is larger than 16 bytes. */
7016 return 0;
7017 case V8SFmode:
7018 case V8SImode:
7019 case V32QImode:
7020 case V16HImode:
7021 case V4DFmode:
7022 case V4DImode:
7023 classes[0] = X86_64_SSE_CLASS;
7024 classes[1] = X86_64_SSEUP_CLASS;
7025 classes[2] = X86_64_SSEUP_CLASS;
7026 classes[3] = X86_64_SSEUP_CLASS;
7027 return 4;
7028 case V8DFmode:
7029 case V16SFmode:
7030 case V8DImode:
7031 case V16SImode:
7032 case V32HImode:
7033 case V64QImode:
7034 classes[0] = X86_64_SSE_CLASS;
7035 classes[1] = X86_64_SSEUP_CLASS;
7036 classes[2] = X86_64_SSEUP_CLASS;
7037 classes[3] = X86_64_SSEUP_CLASS;
7038 classes[4] = X86_64_SSEUP_CLASS;
7039 classes[5] = X86_64_SSEUP_CLASS;
7040 classes[6] = X86_64_SSEUP_CLASS;
7041 classes[7] = X86_64_SSEUP_CLASS;
7042 return 8;
7043 case V4SFmode:
7044 case V4SImode:
7045 case V16QImode:
7046 case V8HImode:
7047 case V2DFmode:
7048 case V2DImode:
7049 classes[0] = X86_64_SSE_CLASS;
7050 classes[1] = X86_64_SSEUP_CLASS;
7051 return 2;
7052 case V1TImode:
7053 case V1DImode:
7054 case V2SFmode:
7055 case V2SImode:
7056 case V4HImode:
7057 case V8QImode:
7058 classes[0] = X86_64_SSE_CLASS;
7059 return 1;
7060 case BLKmode:
7061 case VOIDmode:
7062 return 0;
7063 default:
7064 gcc_assert (VECTOR_MODE_P (mode));
7065
7066 if (bytes > 16)
7067 return 0;
7068
7069 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7070
7071 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7072 classes[0] = X86_64_INTEGERSI_CLASS;
7073 else
7074 classes[0] = X86_64_INTEGER_CLASS;
7075 classes[1] = X86_64_INTEGER_CLASS;
7076 return 1 + (bytes > 8);
7077 }
7078 }
7079
7080 /* Examine the argument and return set number of register required in each
7081 class. Return true iff parameter should be passed in memory. */
7082
7083 static bool
7084 examine_argument (machine_mode mode, const_tree type, int in_return,
7085 int *int_nregs, int *sse_nregs)
7086 {
7087 enum x86_64_reg_class regclass[MAX_CLASSES];
7088 int n = classify_argument (mode, type, regclass, 0);
7089
7090 *int_nregs = 0;
7091 *sse_nregs = 0;
7092
7093 if (!n)
7094 return true;
7095 for (n--; n >= 0; n--)
7096 switch (regclass[n])
7097 {
7098 case X86_64_INTEGER_CLASS:
7099 case X86_64_INTEGERSI_CLASS:
7100 (*int_nregs)++;
7101 break;
7102 case X86_64_SSE_CLASS:
7103 case X86_64_SSESF_CLASS:
7104 case X86_64_SSEDF_CLASS:
7105 (*sse_nregs)++;
7106 break;
7107 case X86_64_NO_CLASS:
7108 case X86_64_SSEUP_CLASS:
7109 break;
7110 case X86_64_X87_CLASS:
7111 case X86_64_X87UP_CLASS:
7112 case X86_64_COMPLEX_X87_CLASS:
7113 if (!in_return)
7114 return true;
7115 break;
7116 case X86_64_MEMORY_CLASS:
7117 gcc_unreachable ();
7118 }
7119
7120 return false;
7121 }
7122
7123 /* Construct container for the argument used by GCC interface. See
7124 FUNCTION_ARG for the detailed description. */
7125
7126 static rtx
7127 construct_container (machine_mode mode, machine_mode orig_mode,
7128 const_tree type, int in_return, int nintregs, int nsseregs,
7129 const int *intreg, int sse_regno)
7130 {
7131 /* The following variables hold the static issued_error state. */
7132 static bool issued_sse_arg_error;
7133 static bool issued_sse_ret_error;
7134 static bool issued_x87_ret_error;
7135
7136 machine_mode tmpmode;
7137 int bytes =
7138 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7139 enum x86_64_reg_class regclass[MAX_CLASSES];
7140 int n;
7141 int i;
7142 int nexps = 0;
7143 int needed_sseregs, needed_intregs;
7144 rtx exp[MAX_CLASSES];
7145 rtx ret;
7146
7147 n = classify_argument (mode, type, regclass, 0);
7148 if (!n)
7149 return NULL;
7150 if (examine_argument (mode, type, in_return, &needed_intregs,
7151 &needed_sseregs))
7152 return NULL;
7153 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7154 return NULL;
7155
7156 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7157 some less clueful developer tries to use floating-point anyway. */
7158 if (needed_sseregs && !TARGET_SSE)
7159 {
7160 if (in_return)
7161 {
7162 if (!issued_sse_ret_error)
7163 {
7164 error ("SSE register return with SSE disabled");
7165 issued_sse_ret_error = true;
7166 }
7167 }
7168 else if (!issued_sse_arg_error)
7169 {
7170 error ("SSE register argument with SSE disabled");
7171 issued_sse_arg_error = true;
7172 }
7173 return NULL;
7174 }
7175
7176 /* Likewise, error if the ABI requires us to return values in the
7177 x87 registers and the user specified -mno-80387. */
7178 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7179 for (i = 0; i < n; i++)
7180 if (regclass[i] == X86_64_X87_CLASS
7181 || regclass[i] == X86_64_X87UP_CLASS
7182 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7183 {
7184 if (!issued_x87_ret_error)
7185 {
7186 error ("x87 register return with x87 disabled");
7187 issued_x87_ret_error = true;
7188 }
7189 return NULL;
7190 }
7191
7192 /* First construct simple cases. Avoid SCmode, since we want to use
7193 single register to pass this type. */
7194 if (n == 1 && mode != SCmode)
7195 switch (regclass[0])
7196 {
7197 case X86_64_INTEGER_CLASS:
7198 case X86_64_INTEGERSI_CLASS:
7199 return gen_rtx_REG (mode, intreg[0]);
7200 case X86_64_SSE_CLASS:
7201 case X86_64_SSESF_CLASS:
7202 case X86_64_SSEDF_CLASS:
7203 if (mode != BLKmode)
7204 return gen_reg_or_parallel (mode, orig_mode,
7205 SSE_REGNO (sse_regno));
7206 break;
7207 case X86_64_X87_CLASS:
7208 case X86_64_COMPLEX_X87_CLASS:
7209 return gen_rtx_REG (mode, FIRST_STACK_REG);
7210 case X86_64_NO_CLASS:
7211 /* Zero sized array, struct or class. */
7212 return NULL;
7213 default:
7214 gcc_unreachable ();
7215 }
7216 if (n == 2
7217 && regclass[0] == X86_64_SSE_CLASS
7218 && regclass[1] == X86_64_SSEUP_CLASS
7219 && mode != BLKmode)
7220 return gen_reg_or_parallel (mode, orig_mode,
7221 SSE_REGNO (sse_regno));
7222 if (n == 4
7223 && regclass[0] == X86_64_SSE_CLASS
7224 && regclass[1] == X86_64_SSEUP_CLASS
7225 && regclass[2] == X86_64_SSEUP_CLASS
7226 && regclass[3] == X86_64_SSEUP_CLASS
7227 && mode != BLKmode)
7228 return gen_reg_or_parallel (mode, orig_mode,
7229 SSE_REGNO (sse_regno));
7230 if (n == 8
7231 && regclass[0] == X86_64_SSE_CLASS
7232 && regclass[1] == X86_64_SSEUP_CLASS
7233 && regclass[2] == X86_64_SSEUP_CLASS
7234 && regclass[3] == X86_64_SSEUP_CLASS
7235 && regclass[4] == X86_64_SSEUP_CLASS
7236 && regclass[5] == X86_64_SSEUP_CLASS
7237 && regclass[6] == X86_64_SSEUP_CLASS
7238 && regclass[7] == X86_64_SSEUP_CLASS
7239 && mode != BLKmode)
7240 return gen_reg_or_parallel (mode, orig_mode,
7241 SSE_REGNO (sse_regno));
7242 if (n == 2
7243 && regclass[0] == X86_64_X87_CLASS
7244 && regclass[1] == X86_64_X87UP_CLASS)
7245 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7246
7247 if (n == 2
7248 && regclass[0] == X86_64_INTEGER_CLASS
7249 && regclass[1] == X86_64_INTEGER_CLASS
7250 && (mode == CDImode || mode == TImode)
7251 && intreg[0] + 1 == intreg[1])
7252 return gen_rtx_REG (mode, intreg[0]);
7253
7254 /* Otherwise figure out the entries of the PARALLEL. */
7255 for (i = 0; i < n; i++)
7256 {
7257 int pos;
7258
7259 switch (regclass[i])
7260 {
7261 case X86_64_NO_CLASS:
7262 break;
7263 case X86_64_INTEGER_CLASS:
7264 case X86_64_INTEGERSI_CLASS:
7265 /* Merge TImodes on aligned occasions here too. */
7266 if (i * 8 + 8 > bytes)
7267 tmpmode
7268 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7269 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7270 tmpmode = SImode;
7271 else
7272 tmpmode = DImode;
7273 /* We've requested 24 bytes we
7274 don't have mode for. Use DImode. */
7275 if (tmpmode == BLKmode)
7276 tmpmode = DImode;
7277 exp [nexps++]
7278 = gen_rtx_EXPR_LIST (VOIDmode,
7279 gen_rtx_REG (tmpmode, *intreg),
7280 GEN_INT (i*8));
7281 intreg++;
7282 break;
7283 case X86_64_SSESF_CLASS:
7284 exp [nexps++]
7285 = gen_rtx_EXPR_LIST (VOIDmode,
7286 gen_rtx_REG (SFmode,
7287 SSE_REGNO (sse_regno)),
7288 GEN_INT (i*8));
7289 sse_regno++;
7290 break;
7291 case X86_64_SSEDF_CLASS:
7292 exp [nexps++]
7293 = gen_rtx_EXPR_LIST (VOIDmode,
7294 gen_rtx_REG (DFmode,
7295 SSE_REGNO (sse_regno)),
7296 GEN_INT (i*8));
7297 sse_regno++;
7298 break;
7299 case X86_64_SSE_CLASS:
7300 pos = i;
7301 switch (n)
7302 {
7303 case 1:
7304 tmpmode = DImode;
7305 break;
7306 case 2:
7307 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7308 {
7309 tmpmode = TImode;
7310 i++;
7311 }
7312 else
7313 tmpmode = DImode;
7314 break;
7315 case 4:
7316 gcc_assert (i == 0
7317 && regclass[1] == X86_64_SSEUP_CLASS
7318 && regclass[2] == X86_64_SSEUP_CLASS
7319 && regclass[3] == X86_64_SSEUP_CLASS);
7320 tmpmode = OImode;
7321 i += 3;
7322 break;
7323 case 8:
7324 gcc_assert (i == 0
7325 && regclass[1] == X86_64_SSEUP_CLASS
7326 && regclass[2] == X86_64_SSEUP_CLASS
7327 && regclass[3] == X86_64_SSEUP_CLASS
7328 && regclass[4] == X86_64_SSEUP_CLASS
7329 && regclass[5] == X86_64_SSEUP_CLASS
7330 && regclass[6] == X86_64_SSEUP_CLASS
7331 && regclass[7] == X86_64_SSEUP_CLASS);
7332 tmpmode = XImode;
7333 i += 7;
7334 break;
7335 default:
7336 gcc_unreachable ();
7337 }
7338 exp [nexps++]
7339 = gen_rtx_EXPR_LIST (VOIDmode,
7340 gen_rtx_REG (tmpmode,
7341 SSE_REGNO (sse_regno)),
7342 GEN_INT (pos*8));
7343 sse_regno++;
7344 break;
7345 default:
7346 gcc_unreachable ();
7347 }
7348 }
7349
7350 /* Empty aligned struct, union or class. */
7351 if (nexps == 0)
7352 return NULL;
7353
7354 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7355 for (i = 0; i < nexps; i++)
7356 XVECEXP (ret, 0, i) = exp [i];
7357 return ret;
7358 }
7359
7360 /* Update the data in CUM to advance over an argument of mode MODE
7361 and data type TYPE. (TYPE is null for libcalls where that information
7362 may not be available.)
7363
7364 Return a number of integer regsiters advanced over. */
7365
7366 static int
7367 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7368 const_tree type, HOST_WIDE_INT bytes,
7369 HOST_WIDE_INT words)
7370 {
7371 int res = 0;
7372
7373 switch (mode)
7374 {
7375 default:
7376 break;
7377
7378 case BLKmode:
7379 if (bytes < 0)
7380 break;
7381 /* FALLTHRU */
7382
7383 case DImode:
7384 case SImode:
7385 case HImode:
7386 case QImode:
7387 cum->words += words;
7388 cum->nregs -= words;
7389 cum->regno += words;
7390 if (cum->nregs >= 0)
7391 res = words;
7392 if (cum->nregs <= 0)
7393 {
7394 cum->nregs = 0;
7395 cum->regno = 0;
7396 }
7397 break;
7398
7399 case OImode:
7400 /* OImode shouldn't be used directly. */
7401 gcc_unreachable ();
7402
7403 case DFmode:
7404 if (cum->float_in_sse < 2)
7405 break;
7406 case SFmode:
7407 if (cum->float_in_sse < 1)
7408 break;
7409 /* FALLTHRU */
7410
7411 case V8SFmode:
7412 case V8SImode:
7413 case V64QImode:
7414 case V32HImode:
7415 case V16SImode:
7416 case V8DImode:
7417 case V16SFmode:
7418 case V8DFmode:
7419 case V32QImode:
7420 case V16HImode:
7421 case V4DFmode:
7422 case V4DImode:
7423 case TImode:
7424 case V16QImode:
7425 case V8HImode:
7426 case V4SImode:
7427 case V2DImode:
7428 case V4SFmode:
7429 case V2DFmode:
7430 if (!type || !AGGREGATE_TYPE_P (type))
7431 {
7432 cum->sse_words += words;
7433 cum->sse_nregs -= 1;
7434 cum->sse_regno += 1;
7435 if (cum->sse_nregs <= 0)
7436 {
7437 cum->sse_nregs = 0;
7438 cum->sse_regno = 0;
7439 }
7440 }
7441 break;
7442
7443 case V8QImode:
7444 case V4HImode:
7445 case V2SImode:
7446 case V2SFmode:
7447 case V1TImode:
7448 case V1DImode:
7449 if (!type || !AGGREGATE_TYPE_P (type))
7450 {
7451 cum->mmx_words += words;
7452 cum->mmx_nregs -= 1;
7453 cum->mmx_regno += 1;
7454 if (cum->mmx_nregs <= 0)
7455 {
7456 cum->mmx_nregs = 0;
7457 cum->mmx_regno = 0;
7458 }
7459 }
7460 break;
7461 }
7462
7463 return res;
7464 }
7465
7466 static int
7467 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7468 const_tree type, HOST_WIDE_INT words, bool named)
7469 {
7470 int int_nregs, sse_nregs;
7471
7472 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7473 if (!named && (VALID_AVX512F_REG_MODE (mode)
7474 || VALID_AVX256_REG_MODE (mode)))
7475 return 0;
7476
7477 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7478 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7479 {
7480 cum->nregs -= int_nregs;
7481 cum->sse_nregs -= sse_nregs;
7482 cum->regno += int_nregs;
7483 cum->sse_regno += sse_nregs;
7484 return int_nregs;
7485 }
7486 else
7487 {
7488 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7489 cum->words = (cum->words + align - 1) & ~(align - 1);
7490 cum->words += words;
7491 return 0;
7492 }
7493 }
7494
7495 static int
7496 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7497 HOST_WIDE_INT words)
7498 {
7499 /* Otherwise, this should be passed indirect. */
7500 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7501
7502 cum->words += words;
7503 if (cum->nregs > 0)
7504 {
7505 cum->nregs -= 1;
7506 cum->regno += 1;
7507 return 1;
7508 }
7509 return 0;
7510 }
7511
7512 /* Update the data in CUM to advance over an argument of mode MODE and
7513 data type TYPE. (TYPE is null for libcalls where that information
7514 may not be available.) */
7515
7516 static void
7517 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7518 const_tree type, bool named)
7519 {
7520 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7521 HOST_WIDE_INT bytes, words;
7522 int nregs;
7523
7524 if (mode == BLKmode)
7525 bytes = int_size_in_bytes (type);
7526 else
7527 bytes = GET_MODE_SIZE (mode);
7528 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7529
7530 if (type)
7531 mode = type_natural_mode (type, NULL, false);
7532
7533 if ((type && POINTER_BOUNDS_TYPE_P (type))
7534 || POINTER_BOUNDS_MODE_P (mode))
7535 {
7536 /* If we pass bounds in BT then just update remained bounds count. */
7537 if (cum->bnds_in_bt)
7538 {
7539 cum->bnds_in_bt--;
7540 return;
7541 }
7542
7543 /* Update remained number of bounds to force. */
7544 if (cum->force_bnd_pass)
7545 cum->force_bnd_pass--;
7546
7547 cum->bnd_regno++;
7548
7549 return;
7550 }
7551
7552 /* The first arg not going to Bounds Tables resets this counter. */
7553 cum->bnds_in_bt = 0;
7554 /* For unnamed args we always pass bounds to avoid bounds mess when
7555 passed and received types do not match. If bounds do not follow
7556 unnamed arg, still pretend required number of bounds were passed. */
7557 if (cum->force_bnd_pass)
7558 {
7559 cum->bnd_regno += cum->force_bnd_pass;
7560 cum->force_bnd_pass = 0;
7561 }
7562
7563 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7564 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7565 else if (TARGET_64BIT)
7566 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7567 else
7568 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7569
7570 /* For stdarg we expect bounds to be passed for each value passed
7571 in register. */
7572 if (cum->stdarg)
7573 cum->force_bnd_pass = nregs;
7574 /* For pointers passed in memory we expect bounds passed in Bounds
7575 Table. */
7576 if (!nregs)
7577 cum->bnds_in_bt = chkp_type_bounds_count (type);
7578 }
7579
7580 /* Define where to put the arguments to a function.
7581 Value is zero to push the argument on the stack,
7582 or a hard register in which to store the argument.
7583
7584 MODE is the argument's machine mode.
7585 TYPE is the data type of the argument (as a tree).
7586 This is null for libcalls where that information may
7587 not be available.
7588 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7589 the preceding args and about the function being called.
7590 NAMED is nonzero if this argument is a named parameter
7591 (otherwise it is an extra parameter matching an ellipsis). */
7592
7593 static rtx
7594 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7595 machine_mode orig_mode, const_tree type,
7596 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7597 {
7598 /* Avoid the AL settings for the Unix64 ABI. */
7599 if (mode == VOIDmode)
7600 return constm1_rtx;
7601
7602 switch (mode)
7603 {
7604 default:
7605 break;
7606
7607 case BLKmode:
7608 if (bytes < 0)
7609 break;
7610 /* FALLTHRU */
7611 case DImode:
7612 case SImode:
7613 case HImode:
7614 case QImode:
7615 if (words <= cum->nregs)
7616 {
7617 int regno = cum->regno;
7618
7619 /* Fastcall allocates the first two DWORD (SImode) or
7620 smaller arguments to ECX and EDX if it isn't an
7621 aggregate type . */
7622 if (cum->fastcall)
7623 {
7624 if (mode == BLKmode
7625 || mode == DImode
7626 || (type && AGGREGATE_TYPE_P (type)))
7627 break;
7628
7629 /* ECX not EAX is the first allocated register. */
7630 if (regno == AX_REG)
7631 regno = CX_REG;
7632 }
7633 return gen_rtx_REG (mode, regno);
7634 }
7635 break;
7636
7637 case DFmode:
7638 if (cum->float_in_sse < 2)
7639 break;
7640 case SFmode:
7641 if (cum->float_in_sse < 1)
7642 break;
7643 /* FALLTHRU */
7644 case TImode:
7645 /* In 32bit, we pass TImode in xmm registers. */
7646 case V16QImode:
7647 case V8HImode:
7648 case V4SImode:
7649 case V2DImode:
7650 case V4SFmode:
7651 case V2DFmode:
7652 if (!type || !AGGREGATE_TYPE_P (type))
7653 {
7654 if (cum->sse_nregs)
7655 return gen_reg_or_parallel (mode, orig_mode,
7656 cum->sse_regno + FIRST_SSE_REG);
7657 }
7658 break;
7659
7660 case OImode:
7661 case XImode:
7662 /* OImode and XImode shouldn't be used directly. */
7663 gcc_unreachable ();
7664
7665 case V64QImode:
7666 case V32HImode:
7667 case V16SImode:
7668 case V8DImode:
7669 case V16SFmode:
7670 case V8DFmode:
7671 case V8SFmode:
7672 case V8SImode:
7673 case V32QImode:
7674 case V16HImode:
7675 case V4DFmode:
7676 case V4DImode:
7677 if (!type || !AGGREGATE_TYPE_P (type))
7678 {
7679 if (cum->sse_nregs)
7680 return gen_reg_or_parallel (mode, orig_mode,
7681 cum->sse_regno + FIRST_SSE_REG);
7682 }
7683 break;
7684
7685 case V8QImode:
7686 case V4HImode:
7687 case V2SImode:
7688 case V2SFmode:
7689 case V1TImode:
7690 case V1DImode:
7691 if (!type || !AGGREGATE_TYPE_P (type))
7692 {
7693 if (cum->mmx_nregs)
7694 return gen_reg_or_parallel (mode, orig_mode,
7695 cum->mmx_regno + FIRST_MMX_REG);
7696 }
7697 break;
7698 }
7699
7700 return NULL_RTX;
7701 }
7702
7703 static rtx
7704 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7705 machine_mode orig_mode, const_tree type, bool named)
7706 {
7707 /* Handle a hidden AL argument containing number of registers
7708 for varargs x86-64 functions. */
7709 if (mode == VOIDmode)
7710 return GEN_INT (cum->maybe_vaarg
7711 ? (cum->sse_nregs < 0
7712 ? X86_64_SSE_REGPARM_MAX
7713 : cum->sse_regno)
7714 : -1);
7715
7716 switch (mode)
7717 {
7718 default:
7719 break;
7720
7721 case V8SFmode:
7722 case V8SImode:
7723 case V32QImode:
7724 case V16HImode:
7725 case V4DFmode:
7726 case V4DImode:
7727 case V16SFmode:
7728 case V16SImode:
7729 case V64QImode:
7730 case V32HImode:
7731 case V8DFmode:
7732 case V8DImode:
7733 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7734 if (!named)
7735 return NULL;
7736 break;
7737 }
7738
7739 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7740 cum->sse_nregs,
7741 &x86_64_int_parameter_registers [cum->regno],
7742 cum->sse_regno);
7743 }
7744
7745 static rtx
7746 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7747 machine_mode orig_mode, bool named,
7748 HOST_WIDE_INT bytes)
7749 {
7750 unsigned int regno;
7751
7752 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7753 We use value of -2 to specify that current function call is MSABI. */
7754 if (mode == VOIDmode)
7755 return GEN_INT (-2);
7756
7757 /* If we've run out of registers, it goes on the stack. */
7758 if (cum->nregs == 0)
7759 return NULL_RTX;
7760
7761 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7762
7763 /* Only floating point modes are passed in anything but integer regs. */
7764 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7765 {
7766 if (named)
7767 regno = cum->regno + FIRST_SSE_REG;
7768 else
7769 {
7770 rtx t1, t2;
7771
7772 /* Unnamed floating parameters are passed in both the
7773 SSE and integer registers. */
7774 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7775 t2 = gen_rtx_REG (mode, regno);
7776 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7777 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7778 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7779 }
7780 }
7781 /* Handle aggregated types passed in register. */
7782 if (orig_mode == BLKmode)
7783 {
7784 if (bytes > 0 && bytes <= 8)
7785 mode = (bytes > 4 ? DImode : SImode);
7786 if (mode == BLKmode)
7787 mode = DImode;
7788 }
7789
7790 return gen_reg_or_parallel (mode, orig_mode, regno);
7791 }
7792
7793 /* Return where to put the arguments to a function.
7794 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7795
7796 MODE is the argument's machine mode. TYPE is the data type of the
7797 argument. It is null for libcalls where that information may not be
7798 available. CUM gives information about the preceding args and about
7799 the function being called. NAMED is nonzero if this argument is a
7800 named parameter (otherwise it is an extra parameter matching an
7801 ellipsis). */
7802
7803 static rtx
7804 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7805 const_tree type, bool named)
7806 {
7807 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7808 machine_mode mode = omode;
7809 HOST_WIDE_INT bytes, words;
7810 rtx arg;
7811
7812 /* All pointer bounds argumntas are handled separately here. */
7813 if ((type && POINTER_BOUNDS_TYPE_P (type))
7814 || POINTER_BOUNDS_MODE_P (mode))
7815 {
7816 /* Return NULL if bounds are forced to go in Bounds Table. */
7817 if (cum->bnds_in_bt)
7818 arg = NULL;
7819 /* Return the next available bound reg if any. */
7820 else if (cum->bnd_regno <= LAST_BND_REG)
7821 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7822 /* Return the next special slot number otherwise. */
7823 else
7824 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7825
7826 return arg;
7827 }
7828
7829 if (mode == BLKmode)
7830 bytes = int_size_in_bytes (type);
7831 else
7832 bytes = GET_MODE_SIZE (mode);
7833 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7834
7835 /* To simplify the code below, represent vector types with a vector mode
7836 even if MMX/SSE are not active. */
7837 if (type && TREE_CODE (type) == VECTOR_TYPE)
7838 mode = type_natural_mode (type, cum, false);
7839
7840 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7841 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7842 else if (TARGET_64BIT)
7843 arg = function_arg_64 (cum, mode, omode, type, named);
7844 else
7845 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7846
7847 return arg;
7848 }
7849
7850 /* A C expression that indicates when an argument must be passed by
7851 reference. If nonzero for an argument, a copy of that argument is
7852 made in memory and a pointer to the argument is passed instead of
7853 the argument itself. The pointer is passed in whatever way is
7854 appropriate for passing a pointer to that type. */
7855
7856 static bool
7857 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7858 const_tree type, bool)
7859 {
7860 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7861
7862 /* See Windows x64 Software Convention. */
7863 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7864 {
7865 int msize = (int) GET_MODE_SIZE (mode);
7866 if (type)
7867 {
7868 /* Arrays are passed by reference. */
7869 if (TREE_CODE (type) == ARRAY_TYPE)
7870 return true;
7871
7872 if (AGGREGATE_TYPE_P (type))
7873 {
7874 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7875 are passed by reference. */
7876 msize = int_size_in_bytes (type);
7877 }
7878 }
7879
7880 /* __m128 is passed by reference. */
7881 switch (msize) {
7882 case 1: case 2: case 4: case 8:
7883 break;
7884 default:
7885 return true;
7886 }
7887 }
7888 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7889 return 1;
7890
7891 return 0;
7892 }
7893
7894 /* Return true when TYPE should be 128bit aligned for 32bit argument
7895 passing ABI. XXX: This function is obsolete and is only used for
7896 checking psABI compatibility with previous versions of GCC. */
7897
7898 static bool
7899 ix86_compat_aligned_value_p (const_tree type)
7900 {
7901 machine_mode mode = TYPE_MODE (type);
7902 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7903 || mode == TDmode
7904 || mode == TFmode
7905 || mode == TCmode)
7906 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7907 return true;
7908 if (TYPE_ALIGN (type) < 128)
7909 return false;
7910
7911 if (AGGREGATE_TYPE_P (type))
7912 {
7913 /* Walk the aggregates recursively. */
7914 switch (TREE_CODE (type))
7915 {
7916 case RECORD_TYPE:
7917 case UNION_TYPE:
7918 case QUAL_UNION_TYPE:
7919 {
7920 tree field;
7921
7922 /* Walk all the structure fields. */
7923 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7924 {
7925 if (TREE_CODE (field) == FIELD_DECL
7926 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7927 return true;
7928 }
7929 break;
7930 }
7931
7932 case ARRAY_TYPE:
7933 /* Just for use if some languages passes arrays by value. */
7934 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7935 return true;
7936 break;
7937
7938 default:
7939 gcc_unreachable ();
7940 }
7941 }
7942 return false;
7943 }
7944
7945 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7946 XXX: This function is obsolete and is only used for checking psABI
7947 compatibility with previous versions of GCC. */
7948
7949 static unsigned int
7950 ix86_compat_function_arg_boundary (machine_mode mode,
7951 const_tree type, unsigned int align)
7952 {
7953 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7954 natural boundaries. */
7955 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7956 {
7957 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7958 make an exception for SSE modes since these require 128bit
7959 alignment.
7960
7961 The handling here differs from field_alignment. ICC aligns MMX
7962 arguments to 4 byte boundaries, while structure fields are aligned
7963 to 8 byte boundaries. */
7964 if (!type)
7965 {
7966 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7967 align = PARM_BOUNDARY;
7968 }
7969 else
7970 {
7971 if (!ix86_compat_aligned_value_p (type))
7972 align = PARM_BOUNDARY;
7973 }
7974 }
7975 if (align > BIGGEST_ALIGNMENT)
7976 align = BIGGEST_ALIGNMENT;
7977 return align;
7978 }
7979
7980 /* Return true when TYPE should be 128bit aligned for 32bit argument
7981 passing ABI. */
7982
7983 static bool
7984 ix86_contains_aligned_value_p (const_tree type)
7985 {
7986 machine_mode mode = TYPE_MODE (type);
7987
7988 if (mode == XFmode || mode == XCmode)
7989 return false;
7990
7991 if (TYPE_ALIGN (type) < 128)
7992 return false;
7993
7994 if (AGGREGATE_TYPE_P (type))
7995 {
7996 /* Walk the aggregates recursively. */
7997 switch (TREE_CODE (type))
7998 {
7999 case RECORD_TYPE:
8000 case UNION_TYPE:
8001 case QUAL_UNION_TYPE:
8002 {
8003 tree field;
8004
8005 /* Walk all the structure fields. */
8006 for (field = TYPE_FIELDS (type);
8007 field;
8008 field = DECL_CHAIN (field))
8009 {
8010 if (TREE_CODE (field) == FIELD_DECL
8011 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8012 return true;
8013 }
8014 break;
8015 }
8016
8017 case ARRAY_TYPE:
8018 /* Just for use if some languages passes arrays by value. */
8019 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8020 return true;
8021 break;
8022
8023 default:
8024 gcc_unreachable ();
8025 }
8026 }
8027 else
8028 return TYPE_ALIGN (type) >= 128;
8029
8030 return false;
8031 }
8032
8033 /* Gives the alignment boundary, in bits, of an argument with the
8034 specified mode and type. */
8035
8036 static unsigned int
8037 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8038 {
8039 unsigned int align;
8040 if (type)
8041 {
8042 /* Since the main variant type is used for call, we convert it to
8043 the main variant type. */
8044 type = TYPE_MAIN_VARIANT (type);
8045 align = TYPE_ALIGN (type);
8046 }
8047 else
8048 align = GET_MODE_ALIGNMENT (mode);
8049 if (align < PARM_BOUNDARY)
8050 align = PARM_BOUNDARY;
8051 else
8052 {
8053 static bool warned;
8054 unsigned int saved_align = align;
8055
8056 if (!TARGET_64BIT)
8057 {
8058 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8059 if (!type)
8060 {
8061 if (mode == XFmode || mode == XCmode)
8062 align = PARM_BOUNDARY;
8063 }
8064 else if (!ix86_contains_aligned_value_p (type))
8065 align = PARM_BOUNDARY;
8066
8067 if (align < 128)
8068 align = PARM_BOUNDARY;
8069 }
8070
8071 if (warn_psabi
8072 && !warned
8073 && align != ix86_compat_function_arg_boundary (mode, type,
8074 saved_align))
8075 {
8076 warned = true;
8077 inform (input_location,
8078 "The ABI for passing parameters with %d-byte"
8079 " alignment has changed in GCC 4.6",
8080 align / BITS_PER_UNIT);
8081 }
8082 }
8083
8084 return align;
8085 }
8086
8087 /* Return true if N is a possible register number of function value. */
8088
8089 static bool
8090 ix86_function_value_regno_p (const unsigned int regno)
8091 {
8092 switch (regno)
8093 {
8094 case AX_REG:
8095 return true;
8096 case DX_REG:
8097 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8098 case DI_REG:
8099 case SI_REG:
8100 return TARGET_64BIT && ix86_abi != MS_ABI;
8101
8102 case FIRST_BND_REG:
8103 return chkp_function_instrumented_p (current_function_decl);
8104
8105 /* Complex values are returned in %st(0)/%st(1) pair. */
8106 case ST0_REG:
8107 case ST1_REG:
8108 /* TODO: The function should depend on current function ABI but
8109 builtins.c would need updating then. Therefore we use the
8110 default ABI. */
8111 if (TARGET_64BIT && ix86_abi == MS_ABI)
8112 return false;
8113 return TARGET_FLOAT_RETURNS_IN_80387;
8114
8115 /* Complex values are returned in %xmm0/%xmm1 pair. */
8116 case XMM0_REG:
8117 case XMM1_REG:
8118 return TARGET_SSE;
8119
8120 case MM0_REG:
8121 if (TARGET_MACHO || TARGET_64BIT)
8122 return false;
8123 return TARGET_MMX;
8124 }
8125
8126 return false;
8127 }
8128
8129 /* Define how to find the value returned by a function.
8130 VALTYPE is the data type of the value (as a tree).
8131 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8132 otherwise, FUNC is 0. */
8133
8134 static rtx
8135 function_value_32 (machine_mode orig_mode, machine_mode mode,
8136 const_tree fntype, const_tree fn)
8137 {
8138 unsigned int regno;
8139
8140 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8141 we normally prevent this case when mmx is not available. However
8142 some ABIs may require the result to be returned like DImode. */
8143 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8144 regno = FIRST_MMX_REG;
8145
8146 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8147 we prevent this case when sse is not available. However some ABIs
8148 may require the result to be returned like integer TImode. */
8149 else if (mode == TImode
8150 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8151 regno = FIRST_SSE_REG;
8152
8153 /* 32-byte vector modes in %ymm0. */
8154 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8155 regno = FIRST_SSE_REG;
8156
8157 /* 64-byte vector modes in %zmm0. */
8158 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8159 regno = FIRST_SSE_REG;
8160
8161 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8162 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8163 regno = FIRST_FLOAT_REG;
8164 else
8165 /* Most things go in %eax. */
8166 regno = AX_REG;
8167
8168 /* Override FP return register with %xmm0 for local functions when
8169 SSE math is enabled or for functions with sseregparm attribute. */
8170 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8171 {
8172 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8173 if ((sse_level >= 1 && mode == SFmode)
8174 || (sse_level == 2 && mode == DFmode))
8175 regno = FIRST_SSE_REG;
8176 }
8177
8178 /* OImode shouldn't be used directly. */
8179 gcc_assert (mode != OImode);
8180
8181 return gen_rtx_REG (orig_mode, regno);
8182 }
8183
8184 static rtx
8185 function_value_64 (machine_mode orig_mode, machine_mode mode,
8186 const_tree valtype)
8187 {
8188 rtx ret;
8189
8190 /* Handle libcalls, which don't provide a type node. */
8191 if (valtype == NULL)
8192 {
8193 unsigned int regno;
8194
8195 switch (mode)
8196 {
8197 case SFmode:
8198 case SCmode:
8199 case DFmode:
8200 case DCmode:
8201 case TFmode:
8202 case SDmode:
8203 case DDmode:
8204 case TDmode:
8205 regno = FIRST_SSE_REG;
8206 break;
8207 case XFmode:
8208 case XCmode:
8209 regno = FIRST_FLOAT_REG;
8210 break;
8211 case TCmode:
8212 return NULL;
8213 default:
8214 regno = AX_REG;
8215 }
8216
8217 return gen_rtx_REG (mode, regno);
8218 }
8219 else if (POINTER_TYPE_P (valtype))
8220 {
8221 /* Pointers are always returned in word_mode. */
8222 mode = word_mode;
8223 }
8224
8225 ret = construct_container (mode, orig_mode, valtype, 1,
8226 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8227 x86_64_int_return_registers, 0);
8228
8229 /* For zero sized structures, construct_container returns NULL, but we
8230 need to keep rest of compiler happy by returning meaningful value. */
8231 if (!ret)
8232 ret = gen_rtx_REG (orig_mode, AX_REG);
8233
8234 return ret;
8235 }
8236
8237 static rtx
8238 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8239 const_tree valtype)
8240 {
8241 unsigned int regno = AX_REG;
8242
8243 if (TARGET_SSE)
8244 {
8245 switch (GET_MODE_SIZE (mode))
8246 {
8247 case 16:
8248 if (valtype != NULL_TREE
8249 && !VECTOR_INTEGER_TYPE_P (valtype)
8250 && !VECTOR_INTEGER_TYPE_P (valtype)
8251 && !INTEGRAL_TYPE_P (valtype)
8252 && !VECTOR_FLOAT_TYPE_P (valtype))
8253 break;
8254 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8255 && !COMPLEX_MODE_P (mode))
8256 regno = FIRST_SSE_REG;
8257 break;
8258 case 8:
8259 case 4:
8260 if (mode == SFmode || mode == DFmode)
8261 regno = FIRST_SSE_REG;
8262 break;
8263 default:
8264 break;
8265 }
8266 }
8267 return gen_rtx_REG (orig_mode, regno);
8268 }
8269
8270 static rtx
8271 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8272 machine_mode orig_mode, machine_mode mode)
8273 {
8274 const_tree fn, fntype;
8275
8276 fn = NULL_TREE;
8277 if (fntype_or_decl && DECL_P (fntype_or_decl))
8278 fn = fntype_or_decl;
8279 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8280
8281 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8282 || POINTER_BOUNDS_MODE_P (mode))
8283 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8284 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8285 return function_value_ms_64 (orig_mode, mode, valtype);
8286 else if (TARGET_64BIT)
8287 return function_value_64 (orig_mode, mode, valtype);
8288 else
8289 return function_value_32 (orig_mode, mode, fntype, fn);
8290 }
8291
8292 static rtx
8293 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8294 {
8295 machine_mode mode, orig_mode;
8296
8297 orig_mode = TYPE_MODE (valtype);
8298 mode = type_natural_mode (valtype, NULL, true);
8299 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8300 }
8301
8302 /* Return an RTX representing a place where a function returns
8303 or recieves pointer bounds or NULL if no bounds are returned.
8304
8305 VALTYPE is a data type of a value returned by the function.
8306
8307 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8308 or FUNCTION_TYPE of the function.
8309
8310 If OUTGOING is false, return a place in which the caller will
8311 see the return value. Otherwise, return a place where a
8312 function returns a value. */
8313
8314 static rtx
8315 ix86_function_value_bounds (const_tree valtype,
8316 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8317 bool outgoing ATTRIBUTE_UNUSED)
8318 {
8319 rtx res = NULL_RTX;
8320
8321 if (BOUNDED_TYPE_P (valtype))
8322 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8323 else if (chkp_type_has_pointer (valtype))
8324 {
8325 bitmap slots;
8326 rtx bounds[2];
8327 bitmap_iterator bi;
8328 unsigned i, bnd_no = 0;
8329
8330 bitmap_obstack_initialize (NULL);
8331 slots = BITMAP_ALLOC (NULL);
8332 chkp_find_bound_slots (valtype, slots);
8333
8334 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8335 {
8336 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8337 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8338 gcc_assert (bnd_no < 2);
8339 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8340 }
8341
8342 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8343
8344 BITMAP_FREE (slots);
8345 bitmap_obstack_release (NULL);
8346 }
8347 else
8348 res = NULL_RTX;
8349
8350 return res;
8351 }
8352
8353 /* Pointer function arguments and return values are promoted to
8354 word_mode. */
8355
8356 static machine_mode
8357 ix86_promote_function_mode (const_tree type, machine_mode mode,
8358 int *punsignedp, const_tree fntype,
8359 int for_return)
8360 {
8361 if (type != NULL_TREE && POINTER_TYPE_P (type))
8362 {
8363 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8364 return word_mode;
8365 }
8366 return default_promote_function_mode (type, mode, punsignedp, fntype,
8367 for_return);
8368 }
8369
8370 /* Return true if a structure, union or array with MODE containing FIELD
8371 should be accessed using BLKmode. */
8372
8373 static bool
8374 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8375 {
8376 /* Union with XFmode must be in BLKmode. */
8377 return (mode == XFmode
8378 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8379 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8380 }
8381
8382 rtx
8383 ix86_libcall_value (machine_mode mode)
8384 {
8385 return ix86_function_value_1 (NULL, NULL, mode, mode);
8386 }
8387
8388 /* Return true iff type is returned in memory. */
8389
8390 static bool
8391 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8392 {
8393 #ifdef SUBTARGET_RETURN_IN_MEMORY
8394 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8395 #else
8396 const machine_mode mode = type_natural_mode (type, NULL, true);
8397 HOST_WIDE_INT size;
8398
8399 if (POINTER_BOUNDS_TYPE_P (type))
8400 return false;
8401
8402 if (TARGET_64BIT)
8403 {
8404 if (ix86_function_type_abi (fntype) == MS_ABI)
8405 {
8406 size = int_size_in_bytes (type);
8407
8408 /* __m128 is returned in xmm0. */
8409 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8410 || INTEGRAL_TYPE_P (type)
8411 || VECTOR_FLOAT_TYPE_P (type))
8412 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8413 && !COMPLEX_MODE_P (mode)
8414 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8415 return false;
8416
8417 /* Otherwise, the size must be exactly in [1248]. */
8418 return size != 1 && size != 2 && size != 4 && size != 8;
8419 }
8420 else
8421 {
8422 int needed_intregs, needed_sseregs;
8423
8424 return examine_argument (mode, type, 1,
8425 &needed_intregs, &needed_sseregs);
8426 }
8427 }
8428 else
8429 {
8430 if (mode == BLKmode)
8431 return true;
8432
8433 size = int_size_in_bytes (type);
8434
8435 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8436 return false;
8437
8438 if (VECTOR_MODE_P (mode) || mode == TImode)
8439 {
8440 /* User-created vectors small enough to fit in EAX. */
8441 if (size < 8)
8442 return false;
8443
8444 /* Unless ABI prescibes otherwise,
8445 MMX/3dNow values are returned in MM0 if available. */
8446
8447 if (size == 8)
8448 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8449
8450 /* SSE values are returned in XMM0 if available. */
8451 if (size == 16)
8452 return !TARGET_SSE;
8453
8454 /* AVX values are returned in YMM0 if available. */
8455 if (size == 32)
8456 return !TARGET_AVX;
8457
8458 /* AVX512F values are returned in ZMM0 if available. */
8459 if (size == 64)
8460 return !TARGET_AVX512F;
8461 }
8462
8463 if (mode == XFmode)
8464 return false;
8465
8466 if (size > 12)
8467 return true;
8468
8469 /* OImode shouldn't be used directly. */
8470 gcc_assert (mode != OImode);
8471
8472 return false;
8473 }
8474 #endif
8475 }
8476
8477 \f
8478 /* Create the va_list data type. */
8479
8480 /* Returns the calling convention specific va_list date type.
8481 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8482
8483 static tree
8484 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8485 {
8486 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8487
8488 /* For i386 we use plain pointer to argument area. */
8489 if (!TARGET_64BIT || abi == MS_ABI)
8490 return build_pointer_type (char_type_node);
8491
8492 record = lang_hooks.types.make_type (RECORD_TYPE);
8493 type_decl = build_decl (BUILTINS_LOCATION,
8494 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8495
8496 f_gpr = build_decl (BUILTINS_LOCATION,
8497 FIELD_DECL, get_identifier ("gp_offset"),
8498 unsigned_type_node);
8499 f_fpr = build_decl (BUILTINS_LOCATION,
8500 FIELD_DECL, get_identifier ("fp_offset"),
8501 unsigned_type_node);
8502 f_ovf = build_decl (BUILTINS_LOCATION,
8503 FIELD_DECL, get_identifier ("overflow_arg_area"),
8504 ptr_type_node);
8505 f_sav = build_decl (BUILTINS_LOCATION,
8506 FIELD_DECL, get_identifier ("reg_save_area"),
8507 ptr_type_node);
8508
8509 va_list_gpr_counter_field = f_gpr;
8510 va_list_fpr_counter_field = f_fpr;
8511
8512 DECL_FIELD_CONTEXT (f_gpr) = record;
8513 DECL_FIELD_CONTEXT (f_fpr) = record;
8514 DECL_FIELD_CONTEXT (f_ovf) = record;
8515 DECL_FIELD_CONTEXT (f_sav) = record;
8516
8517 TYPE_STUB_DECL (record) = type_decl;
8518 TYPE_NAME (record) = type_decl;
8519 TYPE_FIELDS (record) = f_gpr;
8520 DECL_CHAIN (f_gpr) = f_fpr;
8521 DECL_CHAIN (f_fpr) = f_ovf;
8522 DECL_CHAIN (f_ovf) = f_sav;
8523
8524 layout_type (record);
8525
8526 /* The correct type is an array type of one element. */
8527 return build_array_type (record, build_index_type (size_zero_node));
8528 }
8529
8530 /* Setup the builtin va_list data type and for 64-bit the additional
8531 calling convention specific va_list data types. */
8532
8533 static tree
8534 ix86_build_builtin_va_list (void)
8535 {
8536 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8537
8538 /* Initialize abi specific va_list builtin types. */
8539 if (TARGET_64BIT)
8540 {
8541 tree t;
8542 if (ix86_abi == MS_ABI)
8543 {
8544 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8545 if (TREE_CODE (t) != RECORD_TYPE)
8546 t = build_variant_type_copy (t);
8547 sysv_va_list_type_node = t;
8548 }
8549 else
8550 {
8551 t = ret;
8552 if (TREE_CODE (t) != RECORD_TYPE)
8553 t = build_variant_type_copy (t);
8554 sysv_va_list_type_node = t;
8555 }
8556 if (ix86_abi != MS_ABI)
8557 {
8558 t = ix86_build_builtin_va_list_abi (MS_ABI);
8559 if (TREE_CODE (t) != RECORD_TYPE)
8560 t = build_variant_type_copy (t);
8561 ms_va_list_type_node = t;
8562 }
8563 else
8564 {
8565 t = ret;
8566 if (TREE_CODE (t) != RECORD_TYPE)
8567 t = build_variant_type_copy (t);
8568 ms_va_list_type_node = t;
8569 }
8570 }
8571
8572 return ret;
8573 }
8574
8575 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8576
8577 static void
8578 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8579 {
8580 rtx save_area, mem;
8581 alias_set_type set;
8582 int i, max;
8583
8584 /* GPR size of varargs save area. */
8585 if (cfun->va_list_gpr_size)
8586 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8587 else
8588 ix86_varargs_gpr_size = 0;
8589
8590 /* FPR size of varargs save area. We don't need it if we don't pass
8591 anything in SSE registers. */
8592 if (TARGET_SSE && cfun->va_list_fpr_size)
8593 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8594 else
8595 ix86_varargs_fpr_size = 0;
8596
8597 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8598 return;
8599
8600 save_area = frame_pointer_rtx;
8601 set = get_varargs_alias_set ();
8602
8603 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8604 if (max > X86_64_REGPARM_MAX)
8605 max = X86_64_REGPARM_MAX;
8606
8607 for (i = cum->regno; i < max; i++)
8608 {
8609 mem = gen_rtx_MEM (word_mode,
8610 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8611 MEM_NOTRAP_P (mem) = 1;
8612 set_mem_alias_set (mem, set);
8613 emit_move_insn (mem,
8614 gen_rtx_REG (word_mode,
8615 x86_64_int_parameter_registers[i]));
8616 }
8617
8618 if (ix86_varargs_fpr_size)
8619 {
8620 machine_mode smode;
8621 rtx_code_label *label;
8622 rtx test;
8623
8624 /* Now emit code to save SSE registers. The AX parameter contains number
8625 of SSE parameter registers used to call this function, though all we
8626 actually check here is the zero/non-zero status. */
8627
8628 label = gen_label_rtx ();
8629 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8630 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8631 label));
8632
8633 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8634 we used movdqa (i.e. TImode) instead? Perhaps even better would
8635 be if we could determine the real mode of the data, via a hook
8636 into pass_stdarg. Ignore all that for now. */
8637 smode = V4SFmode;
8638 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8639 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8640
8641 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8642 if (max > X86_64_SSE_REGPARM_MAX)
8643 max = X86_64_SSE_REGPARM_MAX;
8644
8645 for (i = cum->sse_regno; i < max; ++i)
8646 {
8647 mem = plus_constant (Pmode, save_area,
8648 i * 16 + ix86_varargs_gpr_size);
8649 mem = gen_rtx_MEM (smode, mem);
8650 MEM_NOTRAP_P (mem) = 1;
8651 set_mem_alias_set (mem, set);
8652 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8653
8654 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8655 }
8656
8657 emit_label (label);
8658 }
8659 }
8660
8661 static void
8662 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8663 {
8664 alias_set_type set = get_varargs_alias_set ();
8665 int i;
8666
8667 /* Reset to zero, as there might be a sysv vaarg used
8668 before. */
8669 ix86_varargs_gpr_size = 0;
8670 ix86_varargs_fpr_size = 0;
8671
8672 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8673 {
8674 rtx reg, mem;
8675
8676 mem = gen_rtx_MEM (Pmode,
8677 plus_constant (Pmode, virtual_incoming_args_rtx,
8678 i * UNITS_PER_WORD));
8679 MEM_NOTRAP_P (mem) = 1;
8680 set_mem_alias_set (mem, set);
8681
8682 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8683 emit_move_insn (mem, reg);
8684 }
8685 }
8686
8687 static void
8688 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8689 tree type, int *, int no_rtl)
8690 {
8691 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8692 CUMULATIVE_ARGS next_cum;
8693 tree fntype;
8694
8695 /* This argument doesn't appear to be used anymore. Which is good,
8696 because the old code here didn't suppress rtl generation. */
8697 gcc_assert (!no_rtl);
8698
8699 if (!TARGET_64BIT)
8700 return;
8701
8702 fntype = TREE_TYPE (current_function_decl);
8703
8704 /* For varargs, we do not want to skip the dummy va_dcl argument.
8705 For stdargs, we do want to skip the last named argument. */
8706 next_cum = *cum;
8707 if (stdarg_p (fntype))
8708 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8709 true);
8710
8711 if (cum->call_abi == MS_ABI)
8712 setup_incoming_varargs_ms_64 (&next_cum);
8713 else
8714 setup_incoming_varargs_64 (&next_cum);
8715 }
8716
8717 static void
8718 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8719 enum machine_mode mode,
8720 tree type,
8721 int *pretend_size ATTRIBUTE_UNUSED,
8722 int no_rtl)
8723 {
8724 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8725 CUMULATIVE_ARGS next_cum;
8726 tree fntype;
8727 rtx save_area;
8728 int bnd_reg, i, max;
8729
8730 gcc_assert (!no_rtl);
8731
8732 /* Do nothing if we use plain pointer to argument area. */
8733 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8734 return;
8735
8736 fntype = TREE_TYPE (current_function_decl);
8737
8738 /* For varargs, we do not want to skip the dummy va_dcl argument.
8739 For stdargs, we do want to skip the last named argument. */
8740 next_cum = *cum;
8741 if (stdarg_p (fntype))
8742 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8743 true);
8744 save_area = frame_pointer_rtx;
8745
8746 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8747 if (max > X86_64_REGPARM_MAX)
8748 max = X86_64_REGPARM_MAX;
8749
8750 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8751 if (chkp_function_instrumented_p (current_function_decl))
8752 for (i = cum->regno; i < max; i++)
8753 {
8754 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8755 rtx reg = gen_rtx_REG (DImode,
8756 x86_64_int_parameter_registers[i]);
8757 rtx ptr = reg;
8758 rtx bounds;
8759
8760 if (bnd_reg <= LAST_BND_REG)
8761 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8762 else
8763 {
8764 rtx ldx_addr =
8765 plus_constant (Pmode, arg_pointer_rtx,
8766 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8767 bounds = gen_reg_rtx (BNDmode);
8768 emit_insn (BNDmode == BND64mode
8769 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8770 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8771 }
8772
8773 emit_insn (BNDmode == BND64mode
8774 ? gen_bnd64_stx (addr, ptr, bounds)
8775 : gen_bnd32_stx (addr, ptr, bounds));
8776
8777 bnd_reg++;
8778 }
8779 }
8780
8781
8782 /* Checks if TYPE is of kind va_list char *. */
8783
8784 static bool
8785 is_va_list_char_pointer (tree type)
8786 {
8787 tree canonic;
8788
8789 /* For 32-bit it is always true. */
8790 if (!TARGET_64BIT)
8791 return true;
8792 canonic = ix86_canonical_va_list_type (type);
8793 return (canonic == ms_va_list_type_node
8794 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8795 }
8796
8797 /* Implement va_start. */
8798
8799 static void
8800 ix86_va_start (tree valist, rtx nextarg)
8801 {
8802 HOST_WIDE_INT words, n_gpr, n_fpr;
8803 tree f_gpr, f_fpr, f_ovf, f_sav;
8804 tree gpr, fpr, ovf, sav, t;
8805 tree type;
8806 rtx ovf_rtx;
8807
8808 if (flag_split_stack
8809 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8810 {
8811 unsigned int scratch_regno;
8812
8813 /* When we are splitting the stack, we can't refer to the stack
8814 arguments using internal_arg_pointer, because they may be on
8815 the old stack. The split stack prologue will arrange to
8816 leave a pointer to the old stack arguments in a scratch
8817 register, which we here copy to a pseudo-register. The split
8818 stack prologue can't set the pseudo-register directly because
8819 it (the prologue) runs before any registers have been saved. */
8820
8821 scratch_regno = split_stack_prologue_scratch_regno ();
8822 if (scratch_regno != INVALID_REGNUM)
8823 {
8824 rtx reg;
8825 rtx_insn *seq;
8826
8827 reg = gen_reg_rtx (Pmode);
8828 cfun->machine->split_stack_varargs_pointer = reg;
8829
8830 start_sequence ();
8831 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8832 seq = get_insns ();
8833 end_sequence ();
8834
8835 push_topmost_sequence ();
8836 emit_insn_after (seq, entry_of_function ());
8837 pop_topmost_sequence ();
8838 }
8839 }
8840
8841 /* Only 64bit target needs something special. */
8842 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8843 {
8844 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8845 std_expand_builtin_va_start (valist, nextarg);
8846 else
8847 {
8848 rtx va_r, next;
8849
8850 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8851 next = expand_binop (ptr_mode, add_optab,
8852 cfun->machine->split_stack_varargs_pointer,
8853 crtl->args.arg_offset_rtx,
8854 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8855 convert_move (va_r, next, 0);
8856
8857 /* Store zero bounds for va_list. */
8858 if (chkp_function_instrumented_p (current_function_decl))
8859 chkp_expand_bounds_reset_for_mem (valist,
8860 make_tree (TREE_TYPE (valist),
8861 next));
8862
8863 }
8864 return;
8865 }
8866
8867 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8868 f_fpr = DECL_CHAIN (f_gpr);
8869 f_ovf = DECL_CHAIN (f_fpr);
8870 f_sav = DECL_CHAIN (f_ovf);
8871
8872 valist = build_simple_mem_ref (valist);
8873 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8874 /* The following should be folded into the MEM_REF offset. */
8875 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8876 f_gpr, NULL_TREE);
8877 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8878 f_fpr, NULL_TREE);
8879 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8880 f_ovf, NULL_TREE);
8881 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8882 f_sav, NULL_TREE);
8883
8884 /* Count number of gp and fp argument registers used. */
8885 words = crtl->args.info.words;
8886 n_gpr = crtl->args.info.regno;
8887 n_fpr = crtl->args.info.sse_regno;
8888
8889 if (cfun->va_list_gpr_size)
8890 {
8891 type = TREE_TYPE (gpr);
8892 t = build2 (MODIFY_EXPR, type,
8893 gpr, build_int_cst (type, n_gpr * 8));
8894 TREE_SIDE_EFFECTS (t) = 1;
8895 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8896 }
8897
8898 if (TARGET_SSE && cfun->va_list_fpr_size)
8899 {
8900 type = TREE_TYPE (fpr);
8901 t = build2 (MODIFY_EXPR, type, fpr,
8902 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8903 TREE_SIDE_EFFECTS (t) = 1;
8904 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8905 }
8906
8907 /* Find the overflow area. */
8908 type = TREE_TYPE (ovf);
8909 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8910 ovf_rtx = crtl->args.internal_arg_pointer;
8911 else
8912 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8913 t = make_tree (type, ovf_rtx);
8914 if (words != 0)
8915 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8916
8917 /* Store zero bounds for overflow area pointer. */
8918 if (chkp_function_instrumented_p (current_function_decl))
8919 chkp_expand_bounds_reset_for_mem (ovf, t);
8920
8921 t = build2 (MODIFY_EXPR, type, ovf, t);
8922 TREE_SIDE_EFFECTS (t) = 1;
8923 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8924
8925 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8926 {
8927 /* Find the register save area.
8928 Prologue of the function save it right above stack frame. */
8929 type = TREE_TYPE (sav);
8930 t = make_tree (type, frame_pointer_rtx);
8931 if (!ix86_varargs_gpr_size)
8932 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8933
8934 /* Store zero bounds for save area pointer. */
8935 if (chkp_function_instrumented_p (current_function_decl))
8936 chkp_expand_bounds_reset_for_mem (sav, t);
8937
8938 t = build2 (MODIFY_EXPR, type, sav, t);
8939 TREE_SIDE_EFFECTS (t) = 1;
8940 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8941 }
8942 }
8943
8944 /* Implement va_arg. */
8945
8946 static tree
8947 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8948 gimple_seq *post_p)
8949 {
8950 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8951 tree f_gpr, f_fpr, f_ovf, f_sav;
8952 tree gpr, fpr, ovf, sav, t;
8953 int size, rsize;
8954 tree lab_false, lab_over = NULL_TREE;
8955 tree addr, t2;
8956 rtx container;
8957 int indirect_p = 0;
8958 tree ptrtype;
8959 machine_mode nat_mode;
8960 unsigned int arg_boundary;
8961
8962 /* Only 64bit target needs something special. */
8963 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8964 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8965
8966 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8967 f_fpr = DECL_CHAIN (f_gpr);
8968 f_ovf = DECL_CHAIN (f_fpr);
8969 f_sav = DECL_CHAIN (f_ovf);
8970
8971 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8972 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8973 valist = build_va_arg_indirect_ref (valist);
8974 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8975 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8976 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8977
8978 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8979 if (indirect_p)
8980 type = build_pointer_type (type);
8981 size = int_size_in_bytes (type);
8982 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8983
8984 nat_mode = type_natural_mode (type, NULL, false);
8985 switch (nat_mode)
8986 {
8987 case V8SFmode:
8988 case V8SImode:
8989 case V32QImode:
8990 case V16HImode:
8991 case V4DFmode:
8992 case V4DImode:
8993 case V16SFmode:
8994 case V16SImode:
8995 case V64QImode:
8996 case V32HImode:
8997 case V8DFmode:
8998 case V8DImode:
8999 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9000 if (!TARGET_64BIT_MS_ABI)
9001 {
9002 container = NULL;
9003 break;
9004 }
9005
9006 default:
9007 container = construct_container (nat_mode, TYPE_MODE (type),
9008 type, 0, X86_64_REGPARM_MAX,
9009 X86_64_SSE_REGPARM_MAX, intreg,
9010 0);
9011 break;
9012 }
9013
9014 /* Pull the value out of the saved registers. */
9015
9016 addr = create_tmp_var (ptr_type_node, "addr");
9017
9018 if (container)
9019 {
9020 int needed_intregs, needed_sseregs;
9021 bool need_temp;
9022 tree int_addr, sse_addr;
9023
9024 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9025 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9026
9027 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9028
9029 need_temp = (!REG_P (container)
9030 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9031 || TYPE_ALIGN (type) > 128));
9032
9033 /* In case we are passing structure, verify that it is consecutive block
9034 on the register save area. If not we need to do moves. */
9035 if (!need_temp && !REG_P (container))
9036 {
9037 /* Verify that all registers are strictly consecutive */
9038 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9039 {
9040 int i;
9041
9042 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9043 {
9044 rtx slot = XVECEXP (container, 0, i);
9045 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9046 || INTVAL (XEXP (slot, 1)) != i * 16)
9047 need_temp = true;
9048 }
9049 }
9050 else
9051 {
9052 int i;
9053
9054 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9055 {
9056 rtx slot = XVECEXP (container, 0, i);
9057 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9058 || INTVAL (XEXP (slot, 1)) != i * 8)
9059 need_temp = true;
9060 }
9061 }
9062 }
9063 if (!need_temp)
9064 {
9065 int_addr = addr;
9066 sse_addr = addr;
9067 }
9068 else
9069 {
9070 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9071 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9072 }
9073
9074 /* First ensure that we fit completely in registers. */
9075 if (needed_intregs)
9076 {
9077 t = build_int_cst (TREE_TYPE (gpr),
9078 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9079 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9080 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9081 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9082 gimplify_and_add (t, pre_p);
9083 }
9084 if (needed_sseregs)
9085 {
9086 t = build_int_cst (TREE_TYPE (fpr),
9087 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9088 + X86_64_REGPARM_MAX * 8);
9089 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9090 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9091 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9092 gimplify_and_add (t, pre_p);
9093 }
9094
9095 /* Compute index to start of area used for integer regs. */
9096 if (needed_intregs)
9097 {
9098 /* int_addr = gpr + sav; */
9099 t = fold_build_pointer_plus (sav, gpr);
9100 gimplify_assign (int_addr, t, pre_p);
9101 }
9102 if (needed_sseregs)
9103 {
9104 /* sse_addr = fpr + sav; */
9105 t = fold_build_pointer_plus (sav, fpr);
9106 gimplify_assign (sse_addr, t, pre_p);
9107 }
9108 if (need_temp)
9109 {
9110 int i, prev_size = 0;
9111 tree temp = create_tmp_var (type, "va_arg_tmp");
9112
9113 /* addr = &temp; */
9114 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9115 gimplify_assign (addr, t, pre_p);
9116
9117 for (i = 0; i < XVECLEN (container, 0); i++)
9118 {
9119 rtx slot = XVECEXP (container, 0, i);
9120 rtx reg = XEXP (slot, 0);
9121 machine_mode mode = GET_MODE (reg);
9122 tree piece_type;
9123 tree addr_type;
9124 tree daddr_type;
9125 tree src_addr, src;
9126 int src_offset;
9127 tree dest_addr, dest;
9128 int cur_size = GET_MODE_SIZE (mode);
9129
9130 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9131 prev_size = INTVAL (XEXP (slot, 1));
9132 if (prev_size + cur_size > size)
9133 {
9134 cur_size = size - prev_size;
9135 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9136 if (mode == BLKmode)
9137 mode = QImode;
9138 }
9139 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9140 if (mode == GET_MODE (reg))
9141 addr_type = build_pointer_type (piece_type);
9142 else
9143 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9144 true);
9145 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9146 true);
9147
9148 if (SSE_REGNO_P (REGNO (reg)))
9149 {
9150 src_addr = sse_addr;
9151 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9152 }
9153 else
9154 {
9155 src_addr = int_addr;
9156 src_offset = REGNO (reg) * 8;
9157 }
9158 src_addr = fold_convert (addr_type, src_addr);
9159 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9160
9161 dest_addr = fold_convert (daddr_type, addr);
9162 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9163 if (cur_size == GET_MODE_SIZE (mode))
9164 {
9165 src = build_va_arg_indirect_ref (src_addr);
9166 dest = build_va_arg_indirect_ref (dest_addr);
9167
9168 gimplify_assign (dest, src, pre_p);
9169 }
9170 else
9171 {
9172 tree copy
9173 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9174 3, dest_addr, src_addr,
9175 size_int (cur_size));
9176 gimplify_and_add (copy, pre_p);
9177 }
9178 prev_size += cur_size;
9179 }
9180 }
9181
9182 if (needed_intregs)
9183 {
9184 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9185 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9186 gimplify_assign (gpr, t, pre_p);
9187 }
9188
9189 if (needed_sseregs)
9190 {
9191 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9192 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9193 gimplify_assign (fpr, t, pre_p);
9194 }
9195
9196 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9197
9198 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9199 }
9200
9201 /* ... otherwise out of the overflow area. */
9202
9203 /* When we align parameter on stack for caller, if the parameter
9204 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9205 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9206 here with caller. */
9207 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9208 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9209 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9210
9211 /* Care for on-stack alignment if needed. */
9212 if (arg_boundary <= 64 || size == 0)
9213 t = ovf;
9214 else
9215 {
9216 HOST_WIDE_INT align = arg_boundary / 8;
9217 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9218 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9219 build_int_cst (TREE_TYPE (t), -align));
9220 }
9221
9222 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9223 gimplify_assign (addr, t, pre_p);
9224
9225 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9226 gimplify_assign (unshare_expr (ovf), t, pre_p);
9227
9228 if (container)
9229 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9230
9231 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9232 addr = fold_convert (ptrtype, addr);
9233
9234 if (indirect_p)
9235 addr = build_va_arg_indirect_ref (addr);
9236 return build_va_arg_indirect_ref (addr);
9237 }
9238 \f
9239 /* Return true if OPNUM's MEM should be matched
9240 in movabs* patterns. */
9241
9242 bool
9243 ix86_check_movabs (rtx insn, int opnum)
9244 {
9245 rtx set, mem;
9246
9247 set = PATTERN (insn);
9248 if (GET_CODE (set) == PARALLEL)
9249 set = XVECEXP (set, 0, 0);
9250 gcc_assert (GET_CODE (set) == SET);
9251 mem = XEXP (set, opnum);
9252 while (GET_CODE (mem) == SUBREG)
9253 mem = SUBREG_REG (mem);
9254 gcc_assert (MEM_P (mem));
9255 return volatile_ok || !MEM_VOLATILE_P (mem);
9256 }
9257 \f
9258 /* Initialize the table of extra 80387 mathematical constants. */
9259
9260 static void
9261 init_ext_80387_constants (void)
9262 {
9263 static const char * cst[5] =
9264 {
9265 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9266 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9267 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9268 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9269 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9270 };
9271 int i;
9272
9273 for (i = 0; i < 5; i++)
9274 {
9275 real_from_string (&ext_80387_constants_table[i], cst[i]);
9276 /* Ensure each constant is rounded to XFmode precision. */
9277 real_convert (&ext_80387_constants_table[i],
9278 XFmode, &ext_80387_constants_table[i]);
9279 }
9280
9281 ext_80387_constants_init = 1;
9282 }
9283
9284 /* Return non-zero if the constant is something that
9285 can be loaded with a special instruction. */
9286
9287 int
9288 standard_80387_constant_p (rtx x)
9289 {
9290 machine_mode mode = GET_MODE (x);
9291
9292 REAL_VALUE_TYPE r;
9293
9294 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9295 return -1;
9296
9297 if (x == CONST0_RTX (mode))
9298 return 1;
9299 if (x == CONST1_RTX (mode))
9300 return 2;
9301
9302 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9303
9304 /* For XFmode constants, try to find a special 80387 instruction when
9305 optimizing for size or on those CPUs that benefit from them. */
9306 if (mode == XFmode
9307 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9308 {
9309 int i;
9310
9311 if (! ext_80387_constants_init)
9312 init_ext_80387_constants ();
9313
9314 for (i = 0; i < 5; i++)
9315 if (real_identical (&r, &ext_80387_constants_table[i]))
9316 return i + 3;
9317 }
9318
9319 /* Load of the constant -0.0 or -1.0 will be split as
9320 fldz;fchs or fld1;fchs sequence. */
9321 if (real_isnegzero (&r))
9322 return 8;
9323 if (real_identical (&r, &dconstm1))
9324 return 9;
9325
9326 return 0;
9327 }
9328
9329 /* Return the opcode of the special instruction to be used to load
9330 the constant X. */
9331
9332 const char *
9333 standard_80387_constant_opcode (rtx x)
9334 {
9335 switch (standard_80387_constant_p (x))
9336 {
9337 case 1:
9338 return "fldz";
9339 case 2:
9340 return "fld1";
9341 case 3:
9342 return "fldlg2";
9343 case 4:
9344 return "fldln2";
9345 case 5:
9346 return "fldl2e";
9347 case 6:
9348 return "fldl2t";
9349 case 7:
9350 return "fldpi";
9351 case 8:
9352 case 9:
9353 return "#";
9354 default:
9355 gcc_unreachable ();
9356 }
9357 }
9358
9359 /* Return the CONST_DOUBLE representing the 80387 constant that is
9360 loaded by the specified special instruction. The argument IDX
9361 matches the return value from standard_80387_constant_p. */
9362
9363 rtx
9364 standard_80387_constant_rtx (int idx)
9365 {
9366 int i;
9367
9368 if (! ext_80387_constants_init)
9369 init_ext_80387_constants ();
9370
9371 switch (idx)
9372 {
9373 case 3:
9374 case 4:
9375 case 5:
9376 case 6:
9377 case 7:
9378 i = idx - 3;
9379 break;
9380
9381 default:
9382 gcc_unreachable ();
9383 }
9384
9385 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9386 XFmode);
9387 }
9388
9389 /* Return 1 if X is all 0s and 2 if x is all 1s
9390 in supported SSE/AVX vector mode. */
9391
9392 int
9393 standard_sse_constant_p (rtx x)
9394 {
9395 machine_mode mode = GET_MODE (x);
9396
9397 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9398 return 1;
9399 if (vector_all_ones_operand (x, mode))
9400 switch (mode)
9401 {
9402 case V16QImode:
9403 case V8HImode:
9404 case V4SImode:
9405 case V2DImode:
9406 if (TARGET_SSE2)
9407 return 2;
9408 case V32QImode:
9409 case V16HImode:
9410 case V8SImode:
9411 case V4DImode:
9412 if (TARGET_AVX2)
9413 return 2;
9414 case V64QImode:
9415 case V32HImode:
9416 case V16SImode:
9417 case V8DImode:
9418 if (TARGET_AVX512F)
9419 return 2;
9420 default:
9421 break;
9422 }
9423
9424 return 0;
9425 }
9426
9427 /* Return the opcode of the special instruction to be used to load
9428 the constant X. */
9429
9430 const char *
9431 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9432 {
9433 switch (standard_sse_constant_p (x))
9434 {
9435 case 1:
9436 switch (get_attr_mode (insn))
9437 {
9438 case MODE_XI:
9439 return "vpxord\t%g0, %g0, %g0";
9440 case MODE_V16SF:
9441 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9442 : "vpxord\t%g0, %g0, %g0";
9443 case MODE_V8DF:
9444 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9445 : "vpxorq\t%g0, %g0, %g0";
9446 case MODE_TI:
9447 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9448 : "%vpxor\t%0, %d0";
9449 case MODE_V2DF:
9450 return "%vxorpd\t%0, %d0";
9451 case MODE_V4SF:
9452 return "%vxorps\t%0, %d0";
9453
9454 case MODE_OI:
9455 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9456 : "vpxor\t%x0, %x0, %x0";
9457 case MODE_V4DF:
9458 return "vxorpd\t%x0, %x0, %x0";
9459 case MODE_V8SF:
9460 return "vxorps\t%x0, %x0, %x0";
9461
9462 default:
9463 break;
9464 }
9465
9466 case 2:
9467 if (TARGET_AVX512VL
9468 || get_attr_mode (insn) == MODE_XI
9469 || get_attr_mode (insn) == MODE_V8DF
9470 || get_attr_mode (insn) == MODE_V16SF)
9471 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9472 if (TARGET_AVX)
9473 return "vpcmpeqd\t%0, %0, %0";
9474 else
9475 return "pcmpeqd\t%0, %0";
9476
9477 default:
9478 break;
9479 }
9480 gcc_unreachable ();
9481 }
9482
9483 /* Returns true if OP contains a symbol reference */
9484
9485 bool
9486 symbolic_reference_mentioned_p (rtx op)
9487 {
9488 const char *fmt;
9489 int i;
9490
9491 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9492 return true;
9493
9494 fmt = GET_RTX_FORMAT (GET_CODE (op));
9495 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9496 {
9497 if (fmt[i] == 'E')
9498 {
9499 int j;
9500
9501 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9502 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9503 return true;
9504 }
9505
9506 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9507 return true;
9508 }
9509
9510 return false;
9511 }
9512
9513 /* Return true if it is appropriate to emit `ret' instructions in the
9514 body of a function. Do this only if the epilogue is simple, needing a
9515 couple of insns. Prior to reloading, we can't tell how many registers
9516 must be saved, so return false then. Return false if there is no frame
9517 marker to de-allocate. */
9518
9519 bool
9520 ix86_can_use_return_insn_p (void)
9521 {
9522 struct ix86_frame frame;
9523
9524 if (! reload_completed || frame_pointer_needed)
9525 return 0;
9526
9527 /* Don't allow more than 32k pop, since that's all we can do
9528 with one instruction. */
9529 if (crtl->args.pops_args && crtl->args.size >= 32768)
9530 return 0;
9531
9532 ix86_compute_frame_layout (&frame);
9533 return (frame.stack_pointer_offset == UNITS_PER_WORD
9534 && (frame.nregs + frame.nsseregs) == 0);
9535 }
9536 \f
9537 /* Value should be nonzero if functions must have frame pointers.
9538 Zero means the frame pointer need not be set up (and parms may
9539 be accessed via the stack pointer) in functions that seem suitable. */
9540
9541 static bool
9542 ix86_frame_pointer_required (void)
9543 {
9544 /* If we accessed previous frames, then the generated code expects
9545 to be able to access the saved ebp value in our frame. */
9546 if (cfun->machine->accesses_prev_frame)
9547 return true;
9548
9549 /* Several x86 os'es need a frame pointer for other reasons,
9550 usually pertaining to setjmp. */
9551 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9552 return true;
9553
9554 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9555 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9556 return true;
9557
9558 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9559 allocation is 4GB. */
9560 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9561 return true;
9562
9563 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9564 turns off the frame pointer by default. Turn it back on now if
9565 we've not got a leaf function. */
9566 if (TARGET_OMIT_LEAF_FRAME_POINTER
9567 && (!crtl->is_leaf
9568 || ix86_current_function_calls_tls_descriptor))
9569 return true;
9570
9571 if (crtl->profile && !flag_fentry)
9572 return true;
9573
9574 return false;
9575 }
9576
9577 /* Record that the current function accesses previous call frames. */
9578
9579 void
9580 ix86_setup_frame_addresses (void)
9581 {
9582 cfun->machine->accesses_prev_frame = 1;
9583 }
9584 \f
9585 #ifndef USE_HIDDEN_LINKONCE
9586 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9587 # define USE_HIDDEN_LINKONCE 1
9588 # else
9589 # define USE_HIDDEN_LINKONCE 0
9590 # endif
9591 #endif
9592
9593 static int pic_labels_used;
9594
9595 /* Fills in the label name that should be used for a pc thunk for
9596 the given register. */
9597
9598 static void
9599 get_pc_thunk_name (char name[32], unsigned int regno)
9600 {
9601 gcc_assert (!TARGET_64BIT);
9602
9603 if (USE_HIDDEN_LINKONCE)
9604 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9605 else
9606 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9607 }
9608
9609
9610 /* This function generates code for -fpic that loads %ebx with
9611 the return address of the caller and then returns. */
9612
9613 static void
9614 ix86_code_end (void)
9615 {
9616 rtx xops[2];
9617 int regno;
9618
9619 for (regno = AX_REG; regno <= SP_REG; regno++)
9620 {
9621 char name[32];
9622 tree decl;
9623
9624 if (!(pic_labels_used & (1 << regno)))
9625 continue;
9626
9627 get_pc_thunk_name (name, regno);
9628
9629 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9630 get_identifier (name),
9631 build_function_type_list (void_type_node, NULL_TREE));
9632 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9633 NULL_TREE, void_type_node);
9634 TREE_PUBLIC (decl) = 1;
9635 TREE_STATIC (decl) = 1;
9636 DECL_IGNORED_P (decl) = 1;
9637
9638 #if TARGET_MACHO
9639 if (TARGET_MACHO)
9640 {
9641 switch_to_section (darwin_sections[text_coal_section]);
9642 fputs ("\t.weak_definition\t", asm_out_file);
9643 assemble_name (asm_out_file, name);
9644 fputs ("\n\t.private_extern\t", asm_out_file);
9645 assemble_name (asm_out_file, name);
9646 putc ('\n', asm_out_file);
9647 ASM_OUTPUT_LABEL (asm_out_file, name);
9648 DECL_WEAK (decl) = 1;
9649 }
9650 else
9651 #endif
9652 if (USE_HIDDEN_LINKONCE)
9653 {
9654 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9655
9656 targetm.asm_out.unique_section (decl, 0);
9657 switch_to_section (get_named_section (decl, NULL, 0));
9658
9659 targetm.asm_out.globalize_label (asm_out_file, name);
9660 fputs ("\t.hidden\t", asm_out_file);
9661 assemble_name (asm_out_file, name);
9662 putc ('\n', asm_out_file);
9663 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9664 }
9665 else
9666 {
9667 switch_to_section (text_section);
9668 ASM_OUTPUT_LABEL (asm_out_file, name);
9669 }
9670
9671 DECL_INITIAL (decl) = make_node (BLOCK);
9672 current_function_decl = decl;
9673 init_function_start (decl);
9674 first_function_block_is_cold = false;
9675 /* Make sure unwind info is emitted for the thunk if needed. */
9676 final_start_function (emit_barrier (), asm_out_file, 1);
9677
9678 /* Pad stack IP move with 4 instructions (two NOPs count
9679 as one instruction). */
9680 if (TARGET_PAD_SHORT_FUNCTION)
9681 {
9682 int i = 8;
9683
9684 while (i--)
9685 fputs ("\tnop\n", asm_out_file);
9686 }
9687
9688 xops[0] = gen_rtx_REG (Pmode, regno);
9689 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9690 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9691 output_asm_insn ("%!ret", NULL);
9692 final_end_function ();
9693 init_insn_lengths ();
9694 free_after_compilation (cfun);
9695 set_cfun (NULL);
9696 current_function_decl = NULL;
9697 }
9698
9699 if (flag_split_stack)
9700 file_end_indicate_split_stack ();
9701 }
9702
9703 /* Emit code for the SET_GOT patterns. */
9704
9705 const char *
9706 output_set_got (rtx dest, rtx label)
9707 {
9708 rtx xops[3];
9709
9710 xops[0] = dest;
9711
9712 if (TARGET_VXWORKS_RTP && flag_pic)
9713 {
9714 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9715 xops[2] = gen_rtx_MEM (Pmode,
9716 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9717 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9718
9719 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9720 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9721 an unadorned address. */
9722 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9723 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9724 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9725 return "";
9726 }
9727
9728 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9729
9730 if (!flag_pic)
9731 {
9732 if (TARGET_MACHO)
9733 /* We don't need a pic base, we're not producing pic. */
9734 gcc_unreachable ();
9735
9736 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9737 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9738 targetm.asm_out.internal_label (asm_out_file, "L",
9739 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9740 }
9741 else
9742 {
9743 char name[32];
9744 get_pc_thunk_name (name, REGNO (dest));
9745 pic_labels_used |= 1 << REGNO (dest);
9746
9747 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9748 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9749 output_asm_insn ("%!call\t%X2", xops);
9750
9751 #if TARGET_MACHO
9752 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9753 This is what will be referenced by the Mach-O PIC subsystem. */
9754 if (machopic_should_output_picbase_label () || !label)
9755 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9756
9757 /* When we are restoring the pic base at the site of a nonlocal label,
9758 and we decided to emit the pic base above, we will still output a
9759 local label used for calculating the correction offset (even though
9760 the offset will be 0 in that case). */
9761 if (label)
9762 targetm.asm_out.internal_label (asm_out_file, "L",
9763 CODE_LABEL_NUMBER (label));
9764 #endif
9765 }
9766
9767 if (!TARGET_MACHO)
9768 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9769
9770 return "";
9771 }
9772
9773 /* Generate an "push" pattern for input ARG. */
9774
9775 static rtx
9776 gen_push (rtx arg)
9777 {
9778 struct machine_function *m = cfun->machine;
9779
9780 if (m->fs.cfa_reg == stack_pointer_rtx)
9781 m->fs.cfa_offset += UNITS_PER_WORD;
9782 m->fs.sp_offset += UNITS_PER_WORD;
9783
9784 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9785 arg = gen_rtx_REG (word_mode, REGNO (arg));
9786
9787 return gen_rtx_SET (VOIDmode,
9788 gen_rtx_MEM (word_mode,
9789 gen_rtx_PRE_DEC (Pmode,
9790 stack_pointer_rtx)),
9791 arg);
9792 }
9793
9794 /* Generate an "pop" pattern for input ARG. */
9795
9796 static rtx
9797 gen_pop (rtx arg)
9798 {
9799 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9800 arg = gen_rtx_REG (word_mode, REGNO (arg));
9801
9802 return gen_rtx_SET (VOIDmode,
9803 arg,
9804 gen_rtx_MEM (word_mode,
9805 gen_rtx_POST_INC (Pmode,
9806 stack_pointer_rtx)));
9807 }
9808
9809 /* Return >= 0 if there is an unused call-clobbered register available
9810 for the entire function. */
9811
9812 static unsigned int
9813 ix86_select_alt_pic_regnum (void)
9814 {
9815 if (ix86_use_pseudo_pic_reg ())
9816 return INVALID_REGNUM;
9817
9818 if (crtl->is_leaf
9819 && !crtl->profile
9820 && !ix86_current_function_calls_tls_descriptor)
9821 {
9822 int i, drap;
9823 /* Can't use the same register for both PIC and DRAP. */
9824 if (crtl->drap_reg)
9825 drap = REGNO (crtl->drap_reg);
9826 else
9827 drap = -1;
9828 for (i = 2; i >= 0; --i)
9829 if (i != drap && !df_regs_ever_live_p (i))
9830 return i;
9831 }
9832
9833 return INVALID_REGNUM;
9834 }
9835
9836 /* Return TRUE if we need to save REGNO. */
9837
9838 static bool
9839 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9840 {
9841 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9842 && pic_offset_table_rtx)
9843 {
9844 if (ix86_use_pseudo_pic_reg ())
9845 {
9846 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9847 _mcount in prologue. */
9848 if (!TARGET_64BIT && flag_pic && crtl->profile)
9849 return true;
9850 }
9851 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9852 || crtl->profile
9853 || crtl->calls_eh_return
9854 || crtl->uses_const_pool
9855 || cfun->has_nonlocal_label)
9856 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9857 }
9858
9859 if (crtl->calls_eh_return && maybe_eh_return)
9860 {
9861 unsigned i;
9862 for (i = 0; ; i++)
9863 {
9864 unsigned test = EH_RETURN_DATA_REGNO (i);
9865 if (test == INVALID_REGNUM)
9866 break;
9867 if (test == regno)
9868 return true;
9869 }
9870 }
9871
9872 if (crtl->drap_reg
9873 && regno == REGNO (crtl->drap_reg)
9874 && !cfun->machine->no_drap_save_restore)
9875 return true;
9876
9877 return (df_regs_ever_live_p (regno)
9878 && !call_used_regs[regno]
9879 && !fixed_regs[regno]
9880 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9881 }
9882
9883 /* Return number of saved general prupose registers. */
9884
9885 static int
9886 ix86_nsaved_regs (void)
9887 {
9888 int nregs = 0;
9889 int regno;
9890
9891 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9892 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9893 nregs ++;
9894 return nregs;
9895 }
9896
9897 /* Return number of saved SSE registrers. */
9898
9899 static int
9900 ix86_nsaved_sseregs (void)
9901 {
9902 int nregs = 0;
9903 int regno;
9904
9905 if (!TARGET_64BIT_MS_ABI)
9906 return 0;
9907 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9908 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9909 nregs ++;
9910 return nregs;
9911 }
9912
9913 /* Given FROM and TO register numbers, say whether this elimination is
9914 allowed. If stack alignment is needed, we can only replace argument
9915 pointer with hard frame pointer, or replace frame pointer with stack
9916 pointer. Otherwise, frame pointer elimination is automatically
9917 handled and all other eliminations are valid. */
9918
9919 static bool
9920 ix86_can_eliminate (const int from, const int to)
9921 {
9922 if (stack_realign_fp)
9923 return ((from == ARG_POINTER_REGNUM
9924 && to == HARD_FRAME_POINTER_REGNUM)
9925 || (from == FRAME_POINTER_REGNUM
9926 && to == STACK_POINTER_REGNUM));
9927 else
9928 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9929 }
9930
9931 /* Return the offset between two registers, one to be eliminated, and the other
9932 its replacement, at the start of a routine. */
9933
9934 HOST_WIDE_INT
9935 ix86_initial_elimination_offset (int from, int to)
9936 {
9937 struct ix86_frame frame;
9938 ix86_compute_frame_layout (&frame);
9939
9940 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9941 return frame.hard_frame_pointer_offset;
9942 else if (from == FRAME_POINTER_REGNUM
9943 && to == HARD_FRAME_POINTER_REGNUM)
9944 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9945 else
9946 {
9947 gcc_assert (to == STACK_POINTER_REGNUM);
9948
9949 if (from == ARG_POINTER_REGNUM)
9950 return frame.stack_pointer_offset;
9951
9952 gcc_assert (from == FRAME_POINTER_REGNUM);
9953 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9954 }
9955 }
9956
9957 /* In a dynamically-aligned function, we can't know the offset from
9958 stack pointer to frame pointer, so we must ensure that setjmp
9959 eliminates fp against the hard fp (%ebp) rather than trying to
9960 index from %esp up to the top of the frame across a gap that is
9961 of unknown (at compile-time) size. */
9962 static rtx
9963 ix86_builtin_setjmp_frame_value (void)
9964 {
9965 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9966 }
9967
9968 /* When using -fsplit-stack, the allocation routines set a field in
9969 the TCB to the bottom of the stack plus this much space, measured
9970 in bytes. */
9971
9972 #define SPLIT_STACK_AVAILABLE 256
9973
9974 /* Fill structure ix86_frame about frame of currently computed function. */
9975
9976 static void
9977 ix86_compute_frame_layout (struct ix86_frame *frame)
9978 {
9979 unsigned HOST_WIDE_INT stack_alignment_needed;
9980 HOST_WIDE_INT offset;
9981 unsigned HOST_WIDE_INT preferred_alignment;
9982 HOST_WIDE_INT size = get_frame_size ();
9983 HOST_WIDE_INT to_allocate;
9984
9985 frame->nregs = ix86_nsaved_regs ();
9986 frame->nsseregs = ix86_nsaved_sseregs ();
9987
9988 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9989 function prologues and leaf. */
9990 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9991 && (!crtl->is_leaf || cfun->calls_alloca != 0
9992 || ix86_current_function_calls_tls_descriptor))
9993 {
9994 crtl->preferred_stack_boundary = 128;
9995 crtl->stack_alignment_needed = 128;
9996 }
9997 /* preferred_stack_boundary is never updated for call
9998 expanded from tls descriptor. Update it here. We don't update it in
9999 expand stage because according to the comments before
10000 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10001 away. */
10002 else if (ix86_current_function_calls_tls_descriptor
10003 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10004 {
10005 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10006 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10007 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10008 }
10009
10010 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10011 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10012
10013 gcc_assert (!size || stack_alignment_needed);
10014 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10015 gcc_assert (preferred_alignment <= stack_alignment_needed);
10016
10017 /* For SEH we have to limit the amount of code movement into the prologue.
10018 At present we do this via a BLOCKAGE, at which point there's very little
10019 scheduling that can be done, which means that there's very little point
10020 in doing anything except PUSHs. */
10021 if (TARGET_SEH)
10022 cfun->machine->use_fast_prologue_epilogue = false;
10023
10024 /* During reload iteration the amount of registers saved can change.
10025 Recompute the value as needed. Do not recompute when amount of registers
10026 didn't change as reload does multiple calls to the function and does not
10027 expect the decision to change within single iteration. */
10028 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10029 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10030 {
10031 int count = frame->nregs;
10032 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10033
10034 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10035
10036 /* The fast prologue uses move instead of push to save registers. This
10037 is significantly longer, but also executes faster as modern hardware
10038 can execute the moves in parallel, but can't do that for push/pop.
10039
10040 Be careful about choosing what prologue to emit: When function takes
10041 many instructions to execute we may use slow version as well as in
10042 case function is known to be outside hot spot (this is known with
10043 feedback only). Weight the size of function by number of registers
10044 to save as it is cheap to use one or two push instructions but very
10045 slow to use many of them. */
10046 if (count)
10047 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10048 if (node->frequency < NODE_FREQUENCY_NORMAL
10049 || (flag_branch_probabilities
10050 && node->frequency < NODE_FREQUENCY_HOT))
10051 cfun->machine->use_fast_prologue_epilogue = false;
10052 else
10053 cfun->machine->use_fast_prologue_epilogue
10054 = !expensive_function_p (count);
10055 }
10056
10057 frame->save_regs_using_mov
10058 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10059 /* If static stack checking is enabled and done with probes,
10060 the registers need to be saved before allocating the frame. */
10061 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10062
10063 /* Skip return address. */
10064 offset = UNITS_PER_WORD;
10065
10066 /* Skip pushed static chain. */
10067 if (ix86_static_chain_on_stack)
10068 offset += UNITS_PER_WORD;
10069
10070 /* Skip saved base pointer. */
10071 if (frame_pointer_needed)
10072 offset += UNITS_PER_WORD;
10073 frame->hfp_save_offset = offset;
10074
10075 /* The traditional frame pointer location is at the top of the frame. */
10076 frame->hard_frame_pointer_offset = offset;
10077
10078 /* Register save area */
10079 offset += frame->nregs * UNITS_PER_WORD;
10080 frame->reg_save_offset = offset;
10081
10082 /* On SEH target, registers are pushed just before the frame pointer
10083 location. */
10084 if (TARGET_SEH)
10085 frame->hard_frame_pointer_offset = offset;
10086
10087 /* Align and set SSE register save area. */
10088 if (frame->nsseregs)
10089 {
10090 /* The only ABI that has saved SSE registers (Win64) also has a
10091 16-byte aligned default stack, and thus we don't need to be
10092 within the re-aligned local stack frame to save them. */
10093 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10094 offset = (offset + 16 - 1) & -16;
10095 offset += frame->nsseregs * 16;
10096 }
10097 frame->sse_reg_save_offset = offset;
10098
10099 /* The re-aligned stack starts here. Values before this point are not
10100 directly comparable with values below this point. In order to make
10101 sure that no value happens to be the same before and after, force
10102 the alignment computation below to add a non-zero value. */
10103 if (stack_realign_fp)
10104 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10105
10106 /* Va-arg area */
10107 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10108 offset += frame->va_arg_size;
10109
10110 /* Align start of frame for local function. */
10111 if (stack_realign_fp
10112 || offset != frame->sse_reg_save_offset
10113 || size != 0
10114 || !crtl->is_leaf
10115 || cfun->calls_alloca
10116 || ix86_current_function_calls_tls_descriptor)
10117 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10118
10119 /* Frame pointer points here. */
10120 frame->frame_pointer_offset = offset;
10121
10122 offset += size;
10123
10124 /* Add outgoing arguments area. Can be skipped if we eliminated
10125 all the function calls as dead code.
10126 Skipping is however impossible when function calls alloca. Alloca
10127 expander assumes that last crtl->outgoing_args_size
10128 of stack frame are unused. */
10129 if (ACCUMULATE_OUTGOING_ARGS
10130 && (!crtl->is_leaf || cfun->calls_alloca
10131 || ix86_current_function_calls_tls_descriptor))
10132 {
10133 offset += crtl->outgoing_args_size;
10134 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10135 }
10136 else
10137 frame->outgoing_arguments_size = 0;
10138
10139 /* Align stack boundary. Only needed if we're calling another function
10140 or using alloca. */
10141 if (!crtl->is_leaf || cfun->calls_alloca
10142 || ix86_current_function_calls_tls_descriptor)
10143 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10144
10145 /* We've reached end of stack frame. */
10146 frame->stack_pointer_offset = offset;
10147
10148 /* Size prologue needs to allocate. */
10149 to_allocate = offset - frame->sse_reg_save_offset;
10150
10151 if ((!to_allocate && frame->nregs <= 1)
10152 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10153 frame->save_regs_using_mov = false;
10154
10155 if (ix86_using_red_zone ()
10156 && crtl->sp_is_unchanging
10157 && crtl->is_leaf
10158 && !ix86_current_function_calls_tls_descriptor)
10159 {
10160 frame->red_zone_size = to_allocate;
10161 if (frame->save_regs_using_mov)
10162 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10163 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10164 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10165 }
10166 else
10167 frame->red_zone_size = 0;
10168 frame->stack_pointer_offset -= frame->red_zone_size;
10169
10170 /* The SEH frame pointer location is near the bottom of the frame.
10171 This is enforced by the fact that the difference between the
10172 stack pointer and the frame pointer is limited to 240 bytes in
10173 the unwind data structure. */
10174 if (TARGET_SEH)
10175 {
10176 HOST_WIDE_INT diff;
10177
10178 /* If we can leave the frame pointer where it is, do so. Also, returns
10179 the establisher frame for __builtin_frame_address (0). */
10180 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10181 if (diff <= SEH_MAX_FRAME_SIZE
10182 && (diff > 240 || (diff & 15) != 0)
10183 && !crtl->accesses_prior_frames)
10184 {
10185 /* Ideally we'd determine what portion of the local stack frame
10186 (within the constraint of the lowest 240) is most heavily used.
10187 But without that complication, simply bias the frame pointer
10188 by 128 bytes so as to maximize the amount of the local stack
10189 frame that is addressable with 8-bit offsets. */
10190 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10191 }
10192 }
10193 }
10194
10195 /* This is semi-inlined memory_address_length, but simplified
10196 since we know that we're always dealing with reg+offset, and
10197 to avoid having to create and discard all that rtl. */
10198
10199 static inline int
10200 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10201 {
10202 int len = 4;
10203
10204 if (offset == 0)
10205 {
10206 /* EBP and R13 cannot be encoded without an offset. */
10207 len = (regno == BP_REG || regno == R13_REG);
10208 }
10209 else if (IN_RANGE (offset, -128, 127))
10210 len = 1;
10211
10212 /* ESP and R12 must be encoded with a SIB byte. */
10213 if (regno == SP_REG || regno == R12_REG)
10214 len++;
10215
10216 return len;
10217 }
10218
10219 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10220 The valid base registers are taken from CFUN->MACHINE->FS. */
10221
10222 static rtx
10223 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10224 {
10225 const struct machine_function *m = cfun->machine;
10226 rtx base_reg = NULL;
10227 HOST_WIDE_INT base_offset = 0;
10228
10229 if (m->use_fast_prologue_epilogue)
10230 {
10231 /* Choose the base register most likely to allow the most scheduling
10232 opportunities. Generally FP is valid throughout the function,
10233 while DRAP must be reloaded within the epilogue. But choose either
10234 over the SP due to increased encoding size. */
10235
10236 if (m->fs.fp_valid)
10237 {
10238 base_reg = hard_frame_pointer_rtx;
10239 base_offset = m->fs.fp_offset - cfa_offset;
10240 }
10241 else if (m->fs.drap_valid)
10242 {
10243 base_reg = crtl->drap_reg;
10244 base_offset = 0 - cfa_offset;
10245 }
10246 else if (m->fs.sp_valid)
10247 {
10248 base_reg = stack_pointer_rtx;
10249 base_offset = m->fs.sp_offset - cfa_offset;
10250 }
10251 }
10252 else
10253 {
10254 HOST_WIDE_INT toffset;
10255 int len = 16, tlen;
10256
10257 /* Choose the base register with the smallest address encoding.
10258 With a tie, choose FP > DRAP > SP. */
10259 if (m->fs.sp_valid)
10260 {
10261 base_reg = stack_pointer_rtx;
10262 base_offset = m->fs.sp_offset - cfa_offset;
10263 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10264 }
10265 if (m->fs.drap_valid)
10266 {
10267 toffset = 0 - cfa_offset;
10268 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10269 if (tlen <= len)
10270 {
10271 base_reg = crtl->drap_reg;
10272 base_offset = toffset;
10273 len = tlen;
10274 }
10275 }
10276 if (m->fs.fp_valid)
10277 {
10278 toffset = m->fs.fp_offset - cfa_offset;
10279 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10280 if (tlen <= len)
10281 {
10282 base_reg = hard_frame_pointer_rtx;
10283 base_offset = toffset;
10284 len = tlen;
10285 }
10286 }
10287 }
10288 gcc_assert (base_reg != NULL);
10289
10290 return plus_constant (Pmode, base_reg, base_offset);
10291 }
10292
10293 /* Emit code to save registers in the prologue. */
10294
10295 static void
10296 ix86_emit_save_regs (void)
10297 {
10298 unsigned int regno;
10299 rtx insn;
10300
10301 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10302 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10303 {
10304 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10305 RTX_FRAME_RELATED_P (insn) = 1;
10306 }
10307 }
10308
10309 /* Emit a single register save at CFA - CFA_OFFSET. */
10310
10311 static void
10312 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10313 HOST_WIDE_INT cfa_offset)
10314 {
10315 struct machine_function *m = cfun->machine;
10316 rtx reg = gen_rtx_REG (mode, regno);
10317 rtx mem, addr, base, insn;
10318
10319 addr = choose_baseaddr (cfa_offset);
10320 mem = gen_frame_mem (mode, addr);
10321
10322 /* For SSE saves, we need to indicate the 128-bit alignment. */
10323 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10324
10325 insn = emit_move_insn (mem, reg);
10326 RTX_FRAME_RELATED_P (insn) = 1;
10327
10328 base = addr;
10329 if (GET_CODE (base) == PLUS)
10330 base = XEXP (base, 0);
10331 gcc_checking_assert (REG_P (base));
10332
10333 /* When saving registers into a re-aligned local stack frame, avoid
10334 any tricky guessing by dwarf2out. */
10335 if (m->fs.realigned)
10336 {
10337 gcc_checking_assert (stack_realign_drap);
10338
10339 if (regno == REGNO (crtl->drap_reg))
10340 {
10341 /* A bit of a hack. We force the DRAP register to be saved in
10342 the re-aligned stack frame, which provides us with a copy
10343 of the CFA that will last past the prologue. Install it. */
10344 gcc_checking_assert (cfun->machine->fs.fp_valid);
10345 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10346 cfun->machine->fs.fp_offset - cfa_offset);
10347 mem = gen_rtx_MEM (mode, addr);
10348 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10349 }
10350 else
10351 {
10352 /* The frame pointer is a stable reference within the
10353 aligned frame. Use it. */
10354 gcc_checking_assert (cfun->machine->fs.fp_valid);
10355 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10356 cfun->machine->fs.fp_offset - cfa_offset);
10357 mem = gen_rtx_MEM (mode, addr);
10358 add_reg_note (insn, REG_CFA_EXPRESSION,
10359 gen_rtx_SET (VOIDmode, mem, reg));
10360 }
10361 }
10362
10363 /* The memory may not be relative to the current CFA register,
10364 which means that we may need to generate a new pattern for
10365 use by the unwind info. */
10366 else if (base != m->fs.cfa_reg)
10367 {
10368 addr = plus_constant (Pmode, m->fs.cfa_reg,
10369 m->fs.cfa_offset - cfa_offset);
10370 mem = gen_rtx_MEM (mode, addr);
10371 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10372 }
10373 }
10374
10375 /* Emit code to save registers using MOV insns.
10376 First register is stored at CFA - CFA_OFFSET. */
10377 static void
10378 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10379 {
10380 unsigned int regno;
10381
10382 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10383 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10384 {
10385 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10386 cfa_offset -= UNITS_PER_WORD;
10387 }
10388 }
10389
10390 /* Emit code to save SSE registers using MOV insns.
10391 First register is stored at CFA - CFA_OFFSET. */
10392 static void
10393 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10394 {
10395 unsigned int regno;
10396
10397 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10398 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10399 {
10400 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10401 cfa_offset -= 16;
10402 }
10403 }
10404
10405 static GTY(()) rtx queued_cfa_restores;
10406
10407 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10408 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10409 Don't add the note if the previously saved value will be left untouched
10410 within stack red-zone till return, as unwinders can find the same value
10411 in the register and on the stack. */
10412
10413 static void
10414 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10415 {
10416 if (!crtl->shrink_wrapped
10417 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10418 return;
10419
10420 if (insn)
10421 {
10422 add_reg_note (insn, REG_CFA_RESTORE, reg);
10423 RTX_FRAME_RELATED_P (insn) = 1;
10424 }
10425 else
10426 queued_cfa_restores
10427 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10428 }
10429
10430 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10431
10432 static void
10433 ix86_add_queued_cfa_restore_notes (rtx insn)
10434 {
10435 rtx last;
10436 if (!queued_cfa_restores)
10437 return;
10438 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10439 ;
10440 XEXP (last, 1) = REG_NOTES (insn);
10441 REG_NOTES (insn) = queued_cfa_restores;
10442 queued_cfa_restores = NULL_RTX;
10443 RTX_FRAME_RELATED_P (insn) = 1;
10444 }
10445
10446 /* Expand prologue or epilogue stack adjustment.
10447 The pattern exist to put a dependency on all ebp-based memory accesses.
10448 STYLE should be negative if instructions should be marked as frame related,
10449 zero if %r11 register is live and cannot be freely used and positive
10450 otherwise. */
10451
10452 static void
10453 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10454 int style, bool set_cfa)
10455 {
10456 struct machine_function *m = cfun->machine;
10457 rtx insn;
10458 bool add_frame_related_expr = false;
10459
10460 if (Pmode == SImode)
10461 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10462 else if (x86_64_immediate_operand (offset, DImode))
10463 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10464 else
10465 {
10466 rtx tmp;
10467 /* r11 is used by indirect sibcall return as well, set before the
10468 epilogue and used after the epilogue. */
10469 if (style)
10470 tmp = gen_rtx_REG (DImode, R11_REG);
10471 else
10472 {
10473 gcc_assert (src != hard_frame_pointer_rtx
10474 && dest != hard_frame_pointer_rtx);
10475 tmp = hard_frame_pointer_rtx;
10476 }
10477 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10478 if (style < 0)
10479 add_frame_related_expr = true;
10480
10481 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10482 }
10483
10484 insn = emit_insn (insn);
10485 if (style >= 0)
10486 ix86_add_queued_cfa_restore_notes (insn);
10487
10488 if (set_cfa)
10489 {
10490 rtx r;
10491
10492 gcc_assert (m->fs.cfa_reg == src);
10493 m->fs.cfa_offset += INTVAL (offset);
10494 m->fs.cfa_reg = dest;
10495
10496 r = gen_rtx_PLUS (Pmode, src, offset);
10497 r = gen_rtx_SET (VOIDmode, dest, r);
10498 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10499 RTX_FRAME_RELATED_P (insn) = 1;
10500 }
10501 else if (style < 0)
10502 {
10503 RTX_FRAME_RELATED_P (insn) = 1;
10504 if (add_frame_related_expr)
10505 {
10506 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10507 r = gen_rtx_SET (VOIDmode, dest, r);
10508 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10509 }
10510 }
10511
10512 if (dest == stack_pointer_rtx)
10513 {
10514 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10515 bool valid = m->fs.sp_valid;
10516
10517 if (src == hard_frame_pointer_rtx)
10518 {
10519 valid = m->fs.fp_valid;
10520 ooffset = m->fs.fp_offset;
10521 }
10522 else if (src == crtl->drap_reg)
10523 {
10524 valid = m->fs.drap_valid;
10525 ooffset = 0;
10526 }
10527 else
10528 {
10529 /* Else there are two possibilities: SP itself, which we set
10530 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10531 taken care of this by hand along the eh_return path. */
10532 gcc_checking_assert (src == stack_pointer_rtx
10533 || offset == const0_rtx);
10534 }
10535
10536 m->fs.sp_offset = ooffset - INTVAL (offset);
10537 m->fs.sp_valid = valid;
10538 }
10539 }
10540
10541 /* Find an available register to be used as dynamic realign argument
10542 pointer regsiter. Such a register will be written in prologue and
10543 used in begin of body, so it must not be
10544 1. parameter passing register.
10545 2. GOT pointer.
10546 We reuse static-chain register if it is available. Otherwise, we
10547 use DI for i386 and R13 for x86-64. We chose R13 since it has
10548 shorter encoding.
10549
10550 Return: the regno of chosen register. */
10551
10552 static unsigned int
10553 find_drap_reg (void)
10554 {
10555 tree decl = cfun->decl;
10556
10557 if (TARGET_64BIT)
10558 {
10559 /* Use R13 for nested function or function need static chain.
10560 Since function with tail call may use any caller-saved
10561 registers in epilogue, DRAP must not use caller-saved
10562 register in such case. */
10563 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10564 return R13_REG;
10565
10566 return R10_REG;
10567 }
10568 else
10569 {
10570 /* Use DI for nested function or function need static chain.
10571 Since function with tail call may use any caller-saved
10572 registers in epilogue, DRAP must not use caller-saved
10573 register in such case. */
10574 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10575 return DI_REG;
10576
10577 /* Reuse static chain register if it isn't used for parameter
10578 passing. */
10579 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10580 {
10581 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10582 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10583 return CX_REG;
10584 }
10585 return DI_REG;
10586 }
10587 }
10588
10589 /* Return minimum incoming stack alignment. */
10590
10591 static unsigned int
10592 ix86_minimum_incoming_stack_boundary (bool sibcall)
10593 {
10594 unsigned int incoming_stack_boundary;
10595
10596 /* Prefer the one specified at command line. */
10597 if (ix86_user_incoming_stack_boundary)
10598 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10599 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10600 if -mstackrealign is used, it isn't used for sibcall check and
10601 estimated stack alignment is 128bit. */
10602 else if (!sibcall
10603 && !TARGET_64BIT
10604 && ix86_force_align_arg_pointer
10605 && crtl->stack_alignment_estimated == 128)
10606 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10607 else
10608 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10609
10610 /* Incoming stack alignment can be changed on individual functions
10611 via force_align_arg_pointer attribute. We use the smallest
10612 incoming stack boundary. */
10613 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10614 && lookup_attribute (ix86_force_align_arg_pointer_string,
10615 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10616 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10617
10618 /* The incoming stack frame has to be aligned at least at
10619 parm_stack_boundary. */
10620 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10621 incoming_stack_boundary = crtl->parm_stack_boundary;
10622
10623 /* Stack at entrance of main is aligned by runtime. We use the
10624 smallest incoming stack boundary. */
10625 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10626 && DECL_NAME (current_function_decl)
10627 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10628 && DECL_FILE_SCOPE_P (current_function_decl))
10629 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10630
10631 return incoming_stack_boundary;
10632 }
10633
10634 /* Update incoming stack boundary and estimated stack alignment. */
10635
10636 static void
10637 ix86_update_stack_boundary (void)
10638 {
10639 ix86_incoming_stack_boundary
10640 = ix86_minimum_incoming_stack_boundary (false);
10641
10642 /* x86_64 vararg needs 16byte stack alignment for register save
10643 area. */
10644 if (TARGET_64BIT
10645 && cfun->stdarg
10646 && crtl->stack_alignment_estimated < 128)
10647 crtl->stack_alignment_estimated = 128;
10648 }
10649
10650 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10651 needed or an rtx for DRAP otherwise. */
10652
10653 static rtx
10654 ix86_get_drap_rtx (void)
10655 {
10656 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10657 crtl->need_drap = true;
10658
10659 if (stack_realign_drap)
10660 {
10661 /* Assign DRAP to vDRAP and returns vDRAP */
10662 unsigned int regno = find_drap_reg ();
10663 rtx drap_vreg;
10664 rtx arg_ptr;
10665 rtx_insn *seq, *insn;
10666
10667 arg_ptr = gen_rtx_REG (Pmode, regno);
10668 crtl->drap_reg = arg_ptr;
10669
10670 start_sequence ();
10671 drap_vreg = copy_to_reg (arg_ptr);
10672 seq = get_insns ();
10673 end_sequence ();
10674
10675 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10676 if (!optimize)
10677 {
10678 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10679 RTX_FRAME_RELATED_P (insn) = 1;
10680 }
10681 return drap_vreg;
10682 }
10683 else
10684 return NULL;
10685 }
10686
10687 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10688
10689 static rtx
10690 ix86_internal_arg_pointer (void)
10691 {
10692 return virtual_incoming_args_rtx;
10693 }
10694
10695 struct scratch_reg {
10696 rtx reg;
10697 bool saved;
10698 };
10699
10700 /* Return a short-lived scratch register for use on function entry.
10701 In 32-bit mode, it is valid only after the registers are saved
10702 in the prologue. This register must be released by means of
10703 release_scratch_register_on_entry once it is dead. */
10704
10705 static void
10706 get_scratch_register_on_entry (struct scratch_reg *sr)
10707 {
10708 int regno;
10709
10710 sr->saved = false;
10711
10712 if (TARGET_64BIT)
10713 {
10714 /* We always use R11 in 64-bit mode. */
10715 regno = R11_REG;
10716 }
10717 else
10718 {
10719 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10720 bool fastcall_p
10721 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10722 bool thiscall_p
10723 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10724 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10725 int regparm = ix86_function_regparm (fntype, decl);
10726 int drap_regno
10727 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10728
10729 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10730 for the static chain register. */
10731 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10732 && drap_regno != AX_REG)
10733 regno = AX_REG;
10734 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10735 for the static chain register. */
10736 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10737 regno = AX_REG;
10738 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10739 regno = DX_REG;
10740 /* ecx is the static chain register. */
10741 else if (regparm < 3 && !fastcall_p && !thiscall_p
10742 && !static_chain_p
10743 && drap_regno != CX_REG)
10744 regno = CX_REG;
10745 else if (ix86_save_reg (BX_REG, true))
10746 regno = BX_REG;
10747 /* esi is the static chain register. */
10748 else if (!(regparm == 3 && static_chain_p)
10749 && ix86_save_reg (SI_REG, true))
10750 regno = SI_REG;
10751 else if (ix86_save_reg (DI_REG, true))
10752 regno = DI_REG;
10753 else
10754 {
10755 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10756 sr->saved = true;
10757 }
10758 }
10759
10760 sr->reg = gen_rtx_REG (Pmode, regno);
10761 if (sr->saved)
10762 {
10763 rtx insn = emit_insn (gen_push (sr->reg));
10764 RTX_FRAME_RELATED_P (insn) = 1;
10765 }
10766 }
10767
10768 /* Release a scratch register obtained from the preceding function. */
10769
10770 static void
10771 release_scratch_register_on_entry (struct scratch_reg *sr)
10772 {
10773 if (sr->saved)
10774 {
10775 struct machine_function *m = cfun->machine;
10776 rtx x, insn = emit_insn (gen_pop (sr->reg));
10777
10778 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10779 RTX_FRAME_RELATED_P (insn) = 1;
10780 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10781 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10782 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10783 m->fs.sp_offset -= UNITS_PER_WORD;
10784 }
10785 }
10786
10787 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10788
10789 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10790
10791 static void
10792 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10793 {
10794 /* We skip the probe for the first interval + a small dope of 4 words and
10795 probe that many bytes past the specified size to maintain a protection
10796 area at the botton of the stack. */
10797 const int dope = 4 * UNITS_PER_WORD;
10798 rtx size_rtx = GEN_INT (size), last;
10799
10800 /* See if we have a constant small number of probes to generate. If so,
10801 that's the easy case. The run-time loop is made up of 11 insns in the
10802 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10803 for n # of intervals. */
10804 if (size <= 5 * PROBE_INTERVAL)
10805 {
10806 HOST_WIDE_INT i, adjust;
10807 bool first_probe = true;
10808
10809 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10810 values of N from 1 until it exceeds SIZE. If only one probe is
10811 needed, this will not generate any code. Then adjust and probe
10812 to PROBE_INTERVAL + SIZE. */
10813 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10814 {
10815 if (first_probe)
10816 {
10817 adjust = 2 * PROBE_INTERVAL + dope;
10818 first_probe = false;
10819 }
10820 else
10821 adjust = PROBE_INTERVAL;
10822
10823 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10824 plus_constant (Pmode, stack_pointer_rtx,
10825 -adjust)));
10826 emit_stack_probe (stack_pointer_rtx);
10827 }
10828
10829 if (first_probe)
10830 adjust = size + PROBE_INTERVAL + dope;
10831 else
10832 adjust = size + PROBE_INTERVAL - i;
10833
10834 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10835 plus_constant (Pmode, stack_pointer_rtx,
10836 -adjust)));
10837 emit_stack_probe (stack_pointer_rtx);
10838
10839 /* Adjust back to account for the additional first interval. */
10840 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10841 plus_constant (Pmode, stack_pointer_rtx,
10842 PROBE_INTERVAL + dope)));
10843 }
10844
10845 /* Otherwise, do the same as above, but in a loop. Note that we must be
10846 extra careful with variables wrapping around because we might be at
10847 the very top (or the very bottom) of the address space and we have
10848 to be able to handle this case properly; in particular, we use an
10849 equality test for the loop condition. */
10850 else
10851 {
10852 HOST_WIDE_INT rounded_size;
10853 struct scratch_reg sr;
10854
10855 get_scratch_register_on_entry (&sr);
10856
10857
10858 /* Step 1: round SIZE to the previous multiple of the interval. */
10859
10860 rounded_size = size & -PROBE_INTERVAL;
10861
10862
10863 /* Step 2: compute initial and final value of the loop counter. */
10864
10865 /* SP = SP_0 + PROBE_INTERVAL. */
10866 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10867 plus_constant (Pmode, stack_pointer_rtx,
10868 - (PROBE_INTERVAL + dope))));
10869
10870 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10871 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10872 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10873 gen_rtx_PLUS (Pmode, sr.reg,
10874 stack_pointer_rtx)));
10875
10876
10877 /* Step 3: the loop
10878
10879 while (SP != LAST_ADDR)
10880 {
10881 SP = SP + PROBE_INTERVAL
10882 probe at SP
10883 }
10884
10885 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10886 values of N from 1 until it is equal to ROUNDED_SIZE. */
10887
10888 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10889
10890
10891 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10892 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10893
10894 if (size != rounded_size)
10895 {
10896 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10897 plus_constant (Pmode, stack_pointer_rtx,
10898 rounded_size - size)));
10899 emit_stack_probe (stack_pointer_rtx);
10900 }
10901
10902 /* Adjust back to account for the additional first interval. */
10903 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10904 plus_constant (Pmode, stack_pointer_rtx,
10905 PROBE_INTERVAL + dope)));
10906
10907 release_scratch_register_on_entry (&sr);
10908 }
10909
10910 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10911
10912 /* Even if the stack pointer isn't the CFA register, we need to correctly
10913 describe the adjustments made to it, in particular differentiate the
10914 frame-related ones from the frame-unrelated ones. */
10915 if (size > 0)
10916 {
10917 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10918 XVECEXP (expr, 0, 0)
10919 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10920 plus_constant (Pmode, stack_pointer_rtx, -size));
10921 XVECEXP (expr, 0, 1)
10922 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10923 plus_constant (Pmode, stack_pointer_rtx,
10924 PROBE_INTERVAL + dope + size));
10925 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10926 RTX_FRAME_RELATED_P (last) = 1;
10927
10928 cfun->machine->fs.sp_offset += size;
10929 }
10930
10931 /* Make sure nothing is scheduled before we are done. */
10932 emit_insn (gen_blockage ());
10933 }
10934
10935 /* Adjust the stack pointer up to REG while probing it. */
10936
10937 const char *
10938 output_adjust_stack_and_probe (rtx reg)
10939 {
10940 static int labelno = 0;
10941 char loop_lab[32], end_lab[32];
10942 rtx xops[2];
10943
10944 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10945 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10946
10947 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10948
10949 /* Jump to END_LAB if SP == LAST_ADDR. */
10950 xops[0] = stack_pointer_rtx;
10951 xops[1] = reg;
10952 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10953 fputs ("\tje\t", asm_out_file);
10954 assemble_name_raw (asm_out_file, end_lab);
10955 fputc ('\n', asm_out_file);
10956
10957 /* SP = SP + PROBE_INTERVAL. */
10958 xops[1] = GEN_INT (PROBE_INTERVAL);
10959 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10960
10961 /* Probe at SP. */
10962 xops[1] = const0_rtx;
10963 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10964
10965 fprintf (asm_out_file, "\tjmp\t");
10966 assemble_name_raw (asm_out_file, loop_lab);
10967 fputc ('\n', asm_out_file);
10968
10969 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10970
10971 return "";
10972 }
10973
10974 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10975 inclusive. These are offsets from the current stack pointer. */
10976
10977 static void
10978 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10979 {
10980 /* See if we have a constant small number of probes to generate. If so,
10981 that's the easy case. The run-time loop is made up of 7 insns in the
10982 generic case while the compile-time loop is made up of n insns for n #
10983 of intervals. */
10984 if (size <= 7 * PROBE_INTERVAL)
10985 {
10986 HOST_WIDE_INT i;
10987
10988 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10989 it exceeds SIZE. If only one probe is needed, this will not
10990 generate any code. Then probe at FIRST + SIZE. */
10991 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10992 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10993 -(first + i)));
10994
10995 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10996 -(first + size)));
10997 }
10998
10999 /* Otherwise, do the same as above, but in a loop. Note that we must be
11000 extra careful with variables wrapping around because we might be at
11001 the very top (or the very bottom) of the address space and we have
11002 to be able to handle this case properly; in particular, we use an
11003 equality test for the loop condition. */
11004 else
11005 {
11006 HOST_WIDE_INT rounded_size, last;
11007 struct scratch_reg sr;
11008
11009 get_scratch_register_on_entry (&sr);
11010
11011
11012 /* Step 1: round SIZE to the previous multiple of the interval. */
11013
11014 rounded_size = size & -PROBE_INTERVAL;
11015
11016
11017 /* Step 2: compute initial and final value of the loop counter. */
11018
11019 /* TEST_OFFSET = FIRST. */
11020 emit_move_insn (sr.reg, GEN_INT (-first));
11021
11022 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11023 last = first + rounded_size;
11024
11025
11026 /* Step 3: the loop
11027
11028 while (TEST_ADDR != LAST_ADDR)
11029 {
11030 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11031 probe at TEST_ADDR
11032 }
11033
11034 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11035 until it is equal to ROUNDED_SIZE. */
11036
11037 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11038
11039
11040 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11041 that SIZE is equal to ROUNDED_SIZE. */
11042
11043 if (size != rounded_size)
11044 emit_stack_probe (plus_constant (Pmode,
11045 gen_rtx_PLUS (Pmode,
11046 stack_pointer_rtx,
11047 sr.reg),
11048 rounded_size - size));
11049
11050 release_scratch_register_on_entry (&sr);
11051 }
11052
11053 /* Make sure nothing is scheduled before we are done. */
11054 emit_insn (gen_blockage ());
11055 }
11056
11057 /* Probe a range of stack addresses from REG to END, inclusive. These are
11058 offsets from the current stack pointer. */
11059
11060 const char *
11061 output_probe_stack_range (rtx reg, rtx end)
11062 {
11063 static int labelno = 0;
11064 char loop_lab[32], end_lab[32];
11065 rtx xops[3];
11066
11067 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11068 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11069
11070 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11071
11072 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11073 xops[0] = reg;
11074 xops[1] = end;
11075 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11076 fputs ("\tje\t", asm_out_file);
11077 assemble_name_raw (asm_out_file, end_lab);
11078 fputc ('\n', asm_out_file);
11079
11080 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11081 xops[1] = GEN_INT (PROBE_INTERVAL);
11082 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11083
11084 /* Probe at TEST_ADDR. */
11085 xops[0] = stack_pointer_rtx;
11086 xops[1] = reg;
11087 xops[2] = const0_rtx;
11088 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11089
11090 fprintf (asm_out_file, "\tjmp\t");
11091 assemble_name_raw (asm_out_file, loop_lab);
11092 fputc ('\n', asm_out_file);
11093
11094 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11095
11096 return "";
11097 }
11098
11099 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11100 to be generated in correct form. */
11101 static void
11102 ix86_finalize_stack_realign_flags (void)
11103 {
11104 /* Check if stack realign is really needed after reload, and
11105 stores result in cfun */
11106 unsigned int incoming_stack_boundary
11107 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11108 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11109 unsigned int stack_realign = (incoming_stack_boundary
11110 < (crtl->is_leaf
11111 ? crtl->max_used_stack_slot_alignment
11112 : crtl->stack_alignment_needed));
11113
11114 if (crtl->stack_realign_finalized)
11115 {
11116 /* After stack_realign_needed is finalized, we can't no longer
11117 change it. */
11118 gcc_assert (crtl->stack_realign_needed == stack_realign);
11119 return;
11120 }
11121
11122 /* If the only reason for frame_pointer_needed is that we conservatively
11123 assumed stack realignment might be needed, but in the end nothing that
11124 needed the stack alignment had been spilled, clear frame_pointer_needed
11125 and say we don't need stack realignment. */
11126 if (stack_realign
11127 && frame_pointer_needed
11128 && crtl->is_leaf
11129 && flag_omit_frame_pointer
11130 && crtl->sp_is_unchanging
11131 && !ix86_current_function_calls_tls_descriptor
11132 && !crtl->accesses_prior_frames
11133 && !cfun->calls_alloca
11134 && !crtl->calls_eh_return
11135 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11136 && !ix86_frame_pointer_required ()
11137 && get_frame_size () == 0
11138 && ix86_nsaved_sseregs () == 0
11139 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11140 {
11141 HARD_REG_SET set_up_by_prologue, prologue_used;
11142 basic_block bb;
11143
11144 CLEAR_HARD_REG_SET (prologue_used);
11145 CLEAR_HARD_REG_SET (set_up_by_prologue);
11146 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11147 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11148 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11149 HARD_FRAME_POINTER_REGNUM);
11150 FOR_EACH_BB_FN (bb, cfun)
11151 {
11152 rtx_insn *insn;
11153 FOR_BB_INSNS (bb, insn)
11154 if (NONDEBUG_INSN_P (insn)
11155 && requires_stack_frame_p (insn, prologue_used,
11156 set_up_by_prologue))
11157 {
11158 crtl->stack_realign_needed = stack_realign;
11159 crtl->stack_realign_finalized = true;
11160 return;
11161 }
11162 }
11163
11164 /* If drap has been set, but it actually isn't live at the start
11165 of the function, there is no reason to set it up. */
11166 if (crtl->drap_reg)
11167 {
11168 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11169 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11170 {
11171 crtl->drap_reg = NULL_RTX;
11172 crtl->need_drap = false;
11173 }
11174 }
11175 else
11176 cfun->machine->no_drap_save_restore = true;
11177
11178 frame_pointer_needed = false;
11179 stack_realign = false;
11180 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11181 crtl->stack_alignment_needed = incoming_stack_boundary;
11182 crtl->stack_alignment_estimated = incoming_stack_boundary;
11183 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11184 crtl->preferred_stack_boundary = incoming_stack_boundary;
11185 df_finish_pass (true);
11186 df_scan_alloc (NULL);
11187 df_scan_blocks ();
11188 df_compute_regs_ever_live (true);
11189 df_analyze ();
11190 }
11191
11192 crtl->stack_realign_needed = stack_realign;
11193 crtl->stack_realign_finalized = true;
11194 }
11195
11196 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11197
11198 static void
11199 ix86_elim_entry_set_got (rtx reg)
11200 {
11201 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11202 rtx_insn *c_insn = BB_HEAD (bb);
11203 if (!NONDEBUG_INSN_P (c_insn))
11204 c_insn = next_nonnote_nondebug_insn (c_insn);
11205 if (c_insn && NONJUMP_INSN_P (c_insn))
11206 {
11207 rtx pat = PATTERN (c_insn);
11208 if (GET_CODE (pat) == PARALLEL)
11209 {
11210 rtx vec = XVECEXP (pat, 0, 0);
11211 if (GET_CODE (vec) == SET
11212 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11213 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11214 delete_insn (c_insn);
11215 }
11216 }
11217 }
11218
11219 /* Expand the prologue into a bunch of separate insns. */
11220
11221 void
11222 ix86_expand_prologue (void)
11223 {
11224 struct machine_function *m = cfun->machine;
11225 rtx insn, t;
11226 struct ix86_frame frame;
11227 HOST_WIDE_INT allocate;
11228 bool int_registers_saved;
11229 bool sse_registers_saved;
11230
11231 ix86_finalize_stack_realign_flags ();
11232
11233 /* DRAP should not coexist with stack_realign_fp */
11234 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11235
11236 memset (&m->fs, 0, sizeof (m->fs));
11237
11238 /* Initialize CFA state for before the prologue. */
11239 m->fs.cfa_reg = stack_pointer_rtx;
11240 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11241
11242 /* Track SP offset to the CFA. We continue tracking this after we've
11243 swapped the CFA register away from SP. In the case of re-alignment
11244 this is fudged; we're interested to offsets within the local frame. */
11245 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11246 m->fs.sp_valid = true;
11247
11248 ix86_compute_frame_layout (&frame);
11249
11250 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11251 {
11252 /* We should have already generated an error for any use of
11253 ms_hook on a nested function. */
11254 gcc_checking_assert (!ix86_static_chain_on_stack);
11255
11256 /* Check if profiling is active and we shall use profiling before
11257 prologue variant. If so sorry. */
11258 if (crtl->profile && flag_fentry != 0)
11259 sorry ("ms_hook_prologue attribute isn%'t compatible "
11260 "with -mfentry for 32-bit");
11261
11262 /* In ix86_asm_output_function_label we emitted:
11263 8b ff movl.s %edi,%edi
11264 55 push %ebp
11265 8b ec movl.s %esp,%ebp
11266
11267 This matches the hookable function prologue in Win32 API
11268 functions in Microsoft Windows XP Service Pack 2 and newer.
11269 Wine uses this to enable Windows apps to hook the Win32 API
11270 functions provided by Wine.
11271
11272 What that means is that we've already set up the frame pointer. */
11273
11274 if (frame_pointer_needed
11275 && !(crtl->drap_reg && crtl->stack_realign_needed))
11276 {
11277 rtx push, mov;
11278
11279 /* We've decided to use the frame pointer already set up.
11280 Describe this to the unwinder by pretending that both
11281 push and mov insns happen right here.
11282
11283 Putting the unwind info here at the end of the ms_hook
11284 is done so that we can make absolutely certain we get
11285 the required byte sequence at the start of the function,
11286 rather than relying on an assembler that can produce
11287 the exact encoding required.
11288
11289 However it does mean (in the unpatched case) that we have
11290 a 1 insn window where the asynchronous unwind info is
11291 incorrect. However, if we placed the unwind info at
11292 its correct location we would have incorrect unwind info
11293 in the patched case. Which is probably all moot since
11294 I don't expect Wine generates dwarf2 unwind info for the
11295 system libraries that use this feature. */
11296
11297 insn = emit_insn (gen_blockage ());
11298
11299 push = gen_push (hard_frame_pointer_rtx);
11300 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11301 stack_pointer_rtx);
11302 RTX_FRAME_RELATED_P (push) = 1;
11303 RTX_FRAME_RELATED_P (mov) = 1;
11304
11305 RTX_FRAME_RELATED_P (insn) = 1;
11306 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11307 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11308
11309 /* Note that gen_push incremented m->fs.cfa_offset, even
11310 though we didn't emit the push insn here. */
11311 m->fs.cfa_reg = hard_frame_pointer_rtx;
11312 m->fs.fp_offset = m->fs.cfa_offset;
11313 m->fs.fp_valid = true;
11314 }
11315 else
11316 {
11317 /* The frame pointer is not needed so pop %ebp again.
11318 This leaves us with a pristine state. */
11319 emit_insn (gen_pop (hard_frame_pointer_rtx));
11320 }
11321 }
11322
11323 /* The first insn of a function that accepts its static chain on the
11324 stack is to push the register that would be filled in by a direct
11325 call. This insn will be skipped by the trampoline. */
11326 else if (ix86_static_chain_on_stack)
11327 {
11328 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11329 emit_insn (gen_blockage ());
11330
11331 /* We don't want to interpret this push insn as a register save,
11332 only as a stack adjustment. The real copy of the register as
11333 a save will be done later, if needed. */
11334 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11335 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11336 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11337 RTX_FRAME_RELATED_P (insn) = 1;
11338 }
11339
11340 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11341 of DRAP is needed and stack realignment is really needed after reload */
11342 if (stack_realign_drap)
11343 {
11344 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11345
11346 /* Only need to push parameter pointer reg if it is caller saved. */
11347 if (!call_used_regs[REGNO (crtl->drap_reg)])
11348 {
11349 /* Push arg pointer reg */
11350 insn = emit_insn (gen_push (crtl->drap_reg));
11351 RTX_FRAME_RELATED_P (insn) = 1;
11352 }
11353
11354 /* Grab the argument pointer. */
11355 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11356 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11357 RTX_FRAME_RELATED_P (insn) = 1;
11358 m->fs.cfa_reg = crtl->drap_reg;
11359 m->fs.cfa_offset = 0;
11360
11361 /* Align the stack. */
11362 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11363 stack_pointer_rtx,
11364 GEN_INT (-align_bytes)));
11365 RTX_FRAME_RELATED_P (insn) = 1;
11366
11367 /* Replicate the return address on the stack so that return
11368 address can be reached via (argp - 1) slot. This is needed
11369 to implement macro RETURN_ADDR_RTX and intrinsic function
11370 expand_builtin_return_addr etc. */
11371 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11372 t = gen_frame_mem (word_mode, t);
11373 insn = emit_insn (gen_push (t));
11374 RTX_FRAME_RELATED_P (insn) = 1;
11375
11376 /* For the purposes of frame and register save area addressing,
11377 we've started over with a new frame. */
11378 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11379 m->fs.realigned = true;
11380 }
11381
11382 int_registers_saved = (frame.nregs == 0);
11383 sse_registers_saved = (frame.nsseregs == 0);
11384
11385 if (frame_pointer_needed && !m->fs.fp_valid)
11386 {
11387 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11388 slower on all targets. Also sdb doesn't like it. */
11389 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11390 RTX_FRAME_RELATED_P (insn) = 1;
11391
11392 /* Push registers now, before setting the frame pointer
11393 on SEH target. */
11394 if (!int_registers_saved
11395 && TARGET_SEH
11396 && !frame.save_regs_using_mov)
11397 {
11398 ix86_emit_save_regs ();
11399 int_registers_saved = true;
11400 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11401 }
11402
11403 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11404 {
11405 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11406 RTX_FRAME_RELATED_P (insn) = 1;
11407
11408 if (m->fs.cfa_reg == stack_pointer_rtx)
11409 m->fs.cfa_reg = hard_frame_pointer_rtx;
11410 m->fs.fp_offset = m->fs.sp_offset;
11411 m->fs.fp_valid = true;
11412 }
11413 }
11414
11415 if (!int_registers_saved)
11416 {
11417 /* If saving registers via PUSH, do so now. */
11418 if (!frame.save_regs_using_mov)
11419 {
11420 ix86_emit_save_regs ();
11421 int_registers_saved = true;
11422 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11423 }
11424
11425 /* When using red zone we may start register saving before allocating
11426 the stack frame saving one cycle of the prologue. However, avoid
11427 doing this if we have to probe the stack; at least on x86_64 the
11428 stack probe can turn into a call that clobbers a red zone location. */
11429 else if (ix86_using_red_zone ()
11430 && (! TARGET_STACK_PROBE
11431 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11432 {
11433 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11434 int_registers_saved = true;
11435 }
11436 }
11437
11438 if (stack_realign_fp)
11439 {
11440 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11441 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11442
11443 /* The computation of the size of the re-aligned stack frame means
11444 that we must allocate the size of the register save area before
11445 performing the actual alignment. Otherwise we cannot guarantee
11446 that there's enough storage above the realignment point. */
11447 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11448 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11449 GEN_INT (m->fs.sp_offset
11450 - frame.sse_reg_save_offset),
11451 -1, false);
11452
11453 /* Align the stack. */
11454 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11455 stack_pointer_rtx,
11456 GEN_INT (-align_bytes)));
11457
11458 /* For the purposes of register save area addressing, the stack
11459 pointer is no longer valid. As for the value of sp_offset,
11460 see ix86_compute_frame_layout, which we need to match in order
11461 to pass verification of stack_pointer_offset at the end. */
11462 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11463 m->fs.sp_valid = false;
11464 }
11465
11466 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11467
11468 if (flag_stack_usage_info)
11469 {
11470 /* We start to count from ARG_POINTER. */
11471 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11472
11473 /* If it was realigned, take into account the fake frame. */
11474 if (stack_realign_drap)
11475 {
11476 if (ix86_static_chain_on_stack)
11477 stack_size += UNITS_PER_WORD;
11478
11479 if (!call_used_regs[REGNO (crtl->drap_reg)])
11480 stack_size += UNITS_PER_WORD;
11481
11482 /* This over-estimates by 1 minimal-stack-alignment-unit but
11483 mitigates that by counting in the new return address slot. */
11484 current_function_dynamic_stack_size
11485 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11486 }
11487
11488 current_function_static_stack_size = stack_size;
11489 }
11490
11491 /* On SEH target with very large frame size, allocate an area to save
11492 SSE registers (as the very large allocation won't be described). */
11493 if (TARGET_SEH
11494 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11495 && !sse_registers_saved)
11496 {
11497 HOST_WIDE_INT sse_size =
11498 frame.sse_reg_save_offset - frame.reg_save_offset;
11499
11500 gcc_assert (int_registers_saved);
11501
11502 /* No need to do stack checking as the area will be immediately
11503 written. */
11504 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11505 GEN_INT (-sse_size), -1,
11506 m->fs.cfa_reg == stack_pointer_rtx);
11507 allocate -= sse_size;
11508 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11509 sse_registers_saved = true;
11510 }
11511
11512 /* The stack has already been decremented by the instruction calling us
11513 so probe if the size is non-negative to preserve the protection area. */
11514 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11515 {
11516 /* We expect the registers to be saved when probes are used. */
11517 gcc_assert (int_registers_saved);
11518
11519 if (STACK_CHECK_MOVING_SP)
11520 {
11521 if (!(crtl->is_leaf && !cfun->calls_alloca
11522 && allocate <= PROBE_INTERVAL))
11523 {
11524 ix86_adjust_stack_and_probe (allocate);
11525 allocate = 0;
11526 }
11527 }
11528 else
11529 {
11530 HOST_WIDE_INT size = allocate;
11531
11532 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11533 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11534
11535 if (TARGET_STACK_PROBE)
11536 {
11537 if (crtl->is_leaf && !cfun->calls_alloca)
11538 {
11539 if (size > PROBE_INTERVAL)
11540 ix86_emit_probe_stack_range (0, size);
11541 }
11542 else
11543 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11544 }
11545 else
11546 {
11547 if (crtl->is_leaf && !cfun->calls_alloca)
11548 {
11549 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11550 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11551 size - STACK_CHECK_PROTECT);
11552 }
11553 else
11554 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11555 }
11556 }
11557 }
11558
11559 if (allocate == 0)
11560 ;
11561 else if (!ix86_target_stack_probe ()
11562 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11563 {
11564 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11565 GEN_INT (-allocate), -1,
11566 m->fs.cfa_reg == stack_pointer_rtx);
11567 }
11568 else
11569 {
11570 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11571 rtx r10 = NULL;
11572 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11573 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11574 bool eax_live = ix86_eax_live_at_start_p ();
11575 bool r10_live = false;
11576
11577 if (TARGET_64BIT)
11578 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11579
11580 if (eax_live)
11581 {
11582 insn = emit_insn (gen_push (eax));
11583 allocate -= UNITS_PER_WORD;
11584 /* Note that SEH directives need to continue tracking the stack
11585 pointer even after the frame pointer has been set up. */
11586 if (sp_is_cfa_reg || TARGET_SEH)
11587 {
11588 if (sp_is_cfa_reg)
11589 m->fs.cfa_offset += UNITS_PER_WORD;
11590 RTX_FRAME_RELATED_P (insn) = 1;
11591 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11592 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11593 plus_constant (Pmode, stack_pointer_rtx,
11594 -UNITS_PER_WORD)));
11595 }
11596 }
11597
11598 if (r10_live)
11599 {
11600 r10 = gen_rtx_REG (Pmode, R10_REG);
11601 insn = emit_insn (gen_push (r10));
11602 allocate -= UNITS_PER_WORD;
11603 if (sp_is_cfa_reg || TARGET_SEH)
11604 {
11605 if (sp_is_cfa_reg)
11606 m->fs.cfa_offset += UNITS_PER_WORD;
11607 RTX_FRAME_RELATED_P (insn) = 1;
11608 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11609 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11610 plus_constant (Pmode, stack_pointer_rtx,
11611 -UNITS_PER_WORD)));
11612 }
11613 }
11614
11615 emit_move_insn (eax, GEN_INT (allocate));
11616 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11617
11618 /* Use the fact that AX still contains ALLOCATE. */
11619 adjust_stack_insn = (Pmode == DImode
11620 ? gen_pro_epilogue_adjust_stack_di_sub
11621 : gen_pro_epilogue_adjust_stack_si_sub);
11622
11623 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11624 stack_pointer_rtx, eax));
11625
11626 if (sp_is_cfa_reg || TARGET_SEH)
11627 {
11628 if (sp_is_cfa_reg)
11629 m->fs.cfa_offset += allocate;
11630 RTX_FRAME_RELATED_P (insn) = 1;
11631 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11632 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11633 plus_constant (Pmode, stack_pointer_rtx,
11634 -allocate)));
11635 }
11636 m->fs.sp_offset += allocate;
11637
11638 /* Use stack_pointer_rtx for relative addressing so that code
11639 works for realigned stack, too. */
11640 if (r10_live && eax_live)
11641 {
11642 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11643 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11644 gen_frame_mem (word_mode, t));
11645 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11646 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11647 gen_frame_mem (word_mode, t));
11648 }
11649 else if (eax_live || r10_live)
11650 {
11651 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11652 emit_move_insn (gen_rtx_REG (word_mode,
11653 (eax_live ? AX_REG : R10_REG)),
11654 gen_frame_mem (word_mode, t));
11655 }
11656 }
11657 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11658
11659 /* If we havn't already set up the frame pointer, do so now. */
11660 if (frame_pointer_needed && !m->fs.fp_valid)
11661 {
11662 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11663 GEN_INT (frame.stack_pointer_offset
11664 - frame.hard_frame_pointer_offset));
11665 insn = emit_insn (insn);
11666 RTX_FRAME_RELATED_P (insn) = 1;
11667 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11668
11669 if (m->fs.cfa_reg == stack_pointer_rtx)
11670 m->fs.cfa_reg = hard_frame_pointer_rtx;
11671 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11672 m->fs.fp_valid = true;
11673 }
11674
11675 if (!int_registers_saved)
11676 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11677 if (!sse_registers_saved)
11678 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11679
11680 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11681 in PROLOGUE. */
11682 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11683 {
11684 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11685 insn = emit_insn (gen_set_got (pic));
11686 RTX_FRAME_RELATED_P (insn) = 1;
11687 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11688 emit_insn (gen_prologue_use (pic));
11689 /* Deleting already emmitted SET_GOT if exist and allocated to
11690 REAL_PIC_OFFSET_TABLE_REGNUM. */
11691 ix86_elim_entry_set_got (pic);
11692 }
11693
11694 if (crtl->drap_reg && !crtl->stack_realign_needed)
11695 {
11696 /* vDRAP is setup but after reload it turns out stack realign
11697 isn't necessary, here we will emit prologue to setup DRAP
11698 without stack realign adjustment */
11699 t = choose_baseaddr (0);
11700 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11701 }
11702
11703 /* Prevent instructions from being scheduled into register save push
11704 sequence when access to the redzone area is done through frame pointer.
11705 The offset between the frame pointer and the stack pointer is calculated
11706 relative to the value of the stack pointer at the end of the function
11707 prologue, and moving instructions that access redzone area via frame
11708 pointer inside push sequence violates this assumption. */
11709 if (frame_pointer_needed && frame.red_zone_size)
11710 emit_insn (gen_memory_blockage ());
11711
11712 /* Emit cld instruction if stringops are used in the function. */
11713 if (TARGET_CLD && ix86_current_function_needs_cld)
11714 emit_insn (gen_cld ());
11715
11716 /* SEH requires that the prologue end within 256 bytes of the start of
11717 the function. Prevent instruction schedules that would extend that.
11718 Further, prevent alloca modifications to the stack pointer from being
11719 combined with prologue modifications. */
11720 if (TARGET_SEH)
11721 emit_insn (gen_prologue_use (stack_pointer_rtx));
11722 }
11723
11724 /* Emit code to restore REG using a POP insn. */
11725
11726 static void
11727 ix86_emit_restore_reg_using_pop (rtx reg)
11728 {
11729 struct machine_function *m = cfun->machine;
11730 rtx insn = emit_insn (gen_pop (reg));
11731
11732 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11733 m->fs.sp_offset -= UNITS_PER_WORD;
11734
11735 if (m->fs.cfa_reg == crtl->drap_reg
11736 && REGNO (reg) == REGNO (crtl->drap_reg))
11737 {
11738 /* Previously we'd represented the CFA as an expression
11739 like *(%ebp - 8). We've just popped that value from
11740 the stack, which means we need to reset the CFA to
11741 the drap register. This will remain until we restore
11742 the stack pointer. */
11743 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11744 RTX_FRAME_RELATED_P (insn) = 1;
11745
11746 /* This means that the DRAP register is valid for addressing too. */
11747 m->fs.drap_valid = true;
11748 return;
11749 }
11750
11751 if (m->fs.cfa_reg == stack_pointer_rtx)
11752 {
11753 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11754 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11755 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11756 RTX_FRAME_RELATED_P (insn) = 1;
11757
11758 m->fs.cfa_offset -= UNITS_PER_WORD;
11759 }
11760
11761 /* When the frame pointer is the CFA, and we pop it, we are
11762 swapping back to the stack pointer as the CFA. This happens
11763 for stack frames that don't allocate other data, so we assume
11764 the stack pointer is now pointing at the return address, i.e.
11765 the function entry state, which makes the offset be 1 word. */
11766 if (reg == hard_frame_pointer_rtx)
11767 {
11768 m->fs.fp_valid = false;
11769 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11770 {
11771 m->fs.cfa_reg = stack_pointer_rtx;
11772 m->fs.cfa_offset -= UNITS_PER_WORD;
11773
11774 add_reg_note (insn, REG_CFA_DEF_CFA,
11775 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11776 GEN_INT (m->fs.cfa_offset)));
11777 RTX_FRAME_RELATED_P (insn) = 1;
11778 }
11779 }
11780 }
11781
11782 /* Emit code to restore saved registers using POP insns. */
11783
11784 static void
11785 ix86_emit_restore_regs_using_pop (void)
11786 {
11787 unsigned int regno;
11788
11789 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11790 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11791 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11792 }
11793
11794 /* Emit code and notes for the LEAVE instruction. */
11795
11796 static void
11797 ix86_emit_leave (void)
11798 {
11799 struct machine_function *m = cfun->machine;
11800 rtx insn = emit_insn (ix86_gen_leave ());
11801
11802 ix86_add_queued_cfa_restore_notes (insn);
11803
11804 gcc_assert (m->fs.fp_valid);
11805 m->fs.sp_valid = true;
11806 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11807 m->fs.fp_valid = false;
11808
11809 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11810 {
11811 m->fs.cfa_reg = stack_pointer_rtx;
11812 m->fs.cfa_offset = m->fs.sp_offset;
11813
11814 add_reg_note (insn, REG_CFA_DEF_CFA,
11815 plus_constant (Pmode, stack_pointer_rtx,
11816 m->fs.sp_offset));
11817 RTX_FRAME_RELATED_P (insn) = 1;
11818 }
11819 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11820 m->fs.fp_offset);
11821 }
11822
11823 /* Emit code to restore saved registers using MOV insns.
11824 First register is restored from CFA - CFA_OFFSET. */
11825 static void
11826 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11827 bool maybe_eh_return)
11828 {
11829 struct machine_function *m = cfun->machine;
11830 unsigned int regno;
11831
11832 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11833 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11834 {
11835 rtx reg = gen_rtx_REG (word_mode, regno);
11836 rtx insn, mem;
11837
11838 mem = choose_baseaddr (cfa_offset);
11839 mem = gen_frame_mem (word_mode, mem);
11840 insn = emit_move_insn (reg, mem);
11841
11842 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11843 {
11844 /* Previously we'd represented the CFA as an expression
11845 like *(%ebp - 8). We've just popped that value from
11846 the stack, which means we need to reset the CFA to
11847 the drap register. This will remain until we restore
11848 the stack pointer. */
11849 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11850 RTX_FRAME_RELATED_P (insn) = 1;
11851
11852 /* This means that the DRAP register is valid for addressing. */
11853 m->fs.drap_valid = true;
11854 }
11855 else
11856 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11857
11858 cfa_offset -= UNITS_PER_WORD;
11859 }
11860 }
11861
11862 /* Emit code to restore saved registers using MOV insns.
11863 First register is restored from CFA - CFA_OFFSET. */
11864 static void
11865 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11866 bool maybe_eh_return)
11867 {
11868 unsigned int regno;
11869
11870 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11871 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11872 {
11873 rtx reg = gen_rtx_REG (V4SFmode, regno);
11874 rtx mem;
11875
11876 mem = choose_baseaddr (cfa_offset);
11877 mem = gen_rtx_MEM (V4SFmode, mem);
11878 set_mem_align (mem, 128);
11879 emit_move_insn (reg, mem);
11880
11881 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11882
11883 cfa_offset -= 16;
11884 }
11885 }
11886
11887 /* Restore function stack, frame, and registers. */
11888
11889 void
11890 ix86_expand_epilogue (int style)
11891 {
11892 struct machine_function *m = cfun->machine;
11893 struct machine_frame_state frame_state_save = m->fs;
11894 struct ix86_frame frame;
11895 bool restore_regs_via_mov;
11896 bool using_drap;
11897
11898 ix86_finalize_stack_realign_flags ();
11899 ix86_compute_frame_layout (&frame);
11900
11901 m->fs.sp_valid = (!frame_pointer_needed
11902 || (crtl->sp_is_unchanging
11903 && !stack_realign_fp));
11904 gcc_assert (!m->fs.sp_valid
11905 || m->fs.sp_offset == frame.stack_pointer_offset);
11906
11907 /* The FP must be valid if the frame pointer is present. */
11908 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11909 gcc_assert (!m->fs.fp_valid
11910 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11911
11912 /* We must have *some* valid pointer to the stack frame. */
11913 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11914
11915 /* The DRAP is never valid at this point. */
11916 gcc_assert (!m->fs.drap_valid);
11917
11918 /* See the comment about red zone and frame
11919 pointer usage in ix86_expand_prologue. */
11920 if (frame_pointer_needed && frame.red_zone_size)
11921 emit_insn (gen_memory_blockage ());
11922
11923 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11924 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11925
11926 /* Determine the CFA offset of the end of the red-zone. */
11927 m->fs.red_zone_offset = 0;
11928 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11929 {
11930 /* The red-zone begins below the return address. */
11931 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11932
11933 /* When the register save area is in the aligned portion of
11934 the stack, determine the maximum runtime displacement that
11935 matches up with the aligned frame. */
11936 if (stack_realign_drap)
11937 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11938 + UNITS_PER_WORD);
11939 }
11940
11941 /* Special care must be taken for the normal return case of a function
11942 using eh_return: the eax and edx registers are marked as saved, but
11943 not restored along this path. Adjust the save location to match. */
11944 if (crtl->calls_eh_return && style != 2)
11945 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11946
11947 /* EH_RETURN requires the use of moves to function properly. */
11948 if (crtl->calls_eh_return)
11949 restore_regs_via_mov = true;
11950 /* SEH requires the use of pops to identify the epilogue. */
11951 else if (TARGET_SEH)
11952 restore_regs_via_mov = false;
11953 /* If we're only restoring one register and sp is not valid then
11954 using a move instruction to restore the register since it's
11955 less work than reloading sp and popping the register. */
11956 else if (!m->fs.sp_valid && frame.nregs <= 1)
11957 restore_regs_via_mov = true;
11958 else if (TARGET_EPILOGUE_USING_MOVE
11959 && cfun->machine->use_fast_prologue_epilogue
11960 && (frame.nregs > 1
11961 || m->fs.sp_offset != frame.reg_save_offset))
11962 restore_regs_via_mov = true;
11963 else if (frame_pointer_needed
11964 && !frame.nregs
11965 && m->fs.sp_offset != frame.reg_save_offset)
11966 restore_regs_via_mov = true;
11967 else if (frame_pointer_needed
11968 && TARGET_USE_LEAVE
11969 && cfun->machine->use_fast_prologue_epilogue
11970 && frame.nregs == 1)
11971 restore_regs_via_mov = true;
11972 else
11973 restore_regs_via_mov = false;
11974
11975 if (restore_regs_via_mov || frame.nsseregs)
11976 {
11977 /* Ensure that the entire register save area is addressable via
11978 the stack pointer, if we will restore via sp. */
11979 if (TARGET_64BIT
11980 && m->fs.sp_offset > 0x7fffffff
11981 && !(m->fs.fp_valid || m->fs.drap_valid)
11982 && (frame.nsseregs + frame.nregs) != 0)
11983 {
11984 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11985 GEN_INT (m->fs.sp_offset
11986 - frame.sse_reg_save_offset),
11987 style,
11988 m->fs.cfa_reg == stack_pointer_rtx);
11989 }
11990 }
11991
11992 /* If there are any SSE registers to restore, then we have to do it
11993 via moves, since there's obviously no pop for SSE regs. */
11994 if (frame.nsseregs)
11995 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11996 style == 2);
11997
11998 if (restore_regs_via_mov)
11999 {
12000 rtx t;
12001
12002 if (frame.nregs)
12003 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12004
12005 /* eh_return epilogues need %ecx added to the stack pointer. */
12006 if (style == 2)
12007 {
12008 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
12009
12010 /* Stack align doesn't work with eh_return. */
12011 gcc_assert (!stack_realign_drap);
12012 /* Neither does regparm nested functions. */
12013 gcc_assert (!ix86_static_chain_on_stack);
12014
12015 if (frame_pointer_needed)
12016 {
12017 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12018 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12019 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
12020
12021 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12022 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12023
12024 /* Note that we use SA as a temporary CFA, as the return
12025 address is at the proper place relative to it. We
12026 pretend this happens at the FP restore insn because
12027 prior to this insn the FP would be stored at the wrong
12028 offset relative to SA, and after this insn we have no
12029 other reasonable register to use for the CFA. We don't
12030 bother resetting the CFA to the SP for the duration of
12031 the return insn. */
12032 add_reg_note (insn, REG_CFA_DEF_CFA,
12033 plus_constant (Pmode, sa, UNITS_PER_WORD));
12034 ix86_add_queued_cfa_restore_notes (insn);
12035 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12036 RTX_FRAME_RELATED_P (insn) = 1;
12037
12038 m->fs.cfa_reg = sa;
12039 m->fs.cfa_offset = UNITS_PER_WORD;
12040 m->fs.fp_valid = false;
12041
12042 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12043 const0_rtx, style, false);
12044 }
12045 else
12046 {
12047 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12048 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12049 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
12050 ix86_add_queued_cfa_restore_notes (insn);
12051
12052 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12053 if (m->fs.cfa_offset != UNITS_PER_WORD)
12054 {
12055 m->fs.cfa_offset = UNITS_PER_WORD;
12056 add_reg_note (insn, REG_CFA_DEF_CFA,
12057 plus_constant (Pmode, stack_pointer_rtx,
12058 UNITS_PER_WORD));
12059 RTX_FRAME_RELATED_P (insn) = 1;
12060 }
12061 }
12062 m->fs.sp_offset = UNITS_PER_WORD;
12063 m->fs.sp_valid = true;
12064 }
12065 }
12066 else
12067 {
12068 /* SEH requires that the function end with (1) a stack adjustment
12069 if necessary, (2) a sequence of pops, and (3) a return or
12070 jump instruction. Prevent insns from the function body from
12071 being scheduled into this sequence. */
12072 if (TARGET_SEH)
12073 {
12074 /* Prevent a catch region from being adjacent to the standard
12075 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12076 several other flags that would be interesting to test are
12077 not yet set up. */
12078 if (flag_non_call_exceptions)
12079 emit_insn (gen_nops (const1_rtx));
12080 else
12081 emit_insn (gen_blockage ());
12082 }
12083
12084 /* First step is to deallocate the stack frame so that we can
12085 pop the registers. Also do it on SEH target for very large
12086 frame as the emitted instructions aren't allowed by the ABI in
12087 epilogues. */
12088 if (!m->fs.sp_valid
12089 || (TARGET_SEH
12090 && (m->fs.sp_offset - frame.reg_save_offset
12091 >= SEH_MAX_FRAME_SIZE)))
12092 {
12093 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12094 GEN_INT (m->fs.fp_offset
12095 - frame.reg_save_offset),
12096 style, false);
12097 }
12098 else if (m->fs.sp_offset != frame.reg_save_offset)
12099 {
12100 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12101 GEN_INT (m->fs.sp_offset
12102 - frame.reg_save_offset),
12103 style,
12104 m->fs.cfa_reg == stack_pointer_rtx);
12105 }
12106
12107 ix86_emit_restore_regs_using_pop ();
12108 }
12109
12110 /* If we used a stack pointer and haven't already got rid of it,
12111 then do so now. */
12112 if (m->fs.fp_valid)
12113 {
12114 /* If the stack pointer is valid and pointing at the frame
12115 pointer store address, then we only need a pop. */
12116 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12117 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12118 /* Leave results in shorter dependency chains on CPUs that are
12119 able to grok it fast. */
12120 else if (TARGET_USE_LEAVE
12121 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12122 || !cfun->machine->use_fast_prologue_epilogue)
12123 ix86_emit_leave ();
12124 else
12125 {
12126 pro_epilogue_adjust_stack (stack_pointer_rtx,
12127 hard_frame_pointer_rtx,
12128 const0_rtx, style, !using_drap);
12129 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12130 }
12131 }
12132
12133 if (using_drap)
12134 {
12135 int param_ptr_offset = UNITS_PER_WORD;
12136 rtx insn;
12137
12138 gcc_assert (stack_realign_drap);
12139
12140 if (ix86_static_chain_on_stack)
12141 param_ptr_offset += UNITS_PER_WORD;
12142 if (!call_used_regs[REGNO (crtl->drap_reg)])
12143 param_ptr_offset += UNITS_PER_WORD;
12144
12145 insn = emit_insn (gen_rtx_SET
12146 (VOIDmode, stack_pointer_rtx,
12147 gen_rtx_PLUS (Pmode,
12148 crtl->drap_reg,
12149 GEN_INT (-param_ptr_offset))));
12150 m->fs.cfa_reg = stack_pointer_rtx;
12151 m->fs.cfa_offset = param_ptr_offset;
12152 m->fs.sp_offset = param_ptr_offset;
12153 m->fs.realigned = false;
12154
12155 add_reg_note (insn, REG_CFA_DEF_CFA,
12156 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12157 GEN_INT (param_ptr_offset)));
12158 RTX_FRAME_RELATED_P (insn) = 1;
12159
12160 if (!call_used_regs[REGNO (crtl->drap_reg)])
12161 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12162 }
12163
12164 /* At this point the stack pointer must be valid, and we must have
12165 restored all of the registers. We may not have deallocated the
12166 entire stack frame. We've delayed this until now because it may
12167 be possible to merge the local stack deallocation with the
12168 deallocation forced by ix86_static_chain_on_stack. */
12169 gcc_assert (m->fs.sp_valid);
12170 gcc_assert (!m->fs.fp_valid);
12171 gcc_assert (!m->fs.realigned);
12172 if (m->fs.sp_offset != UNITS_PER_WORD)
12173 {
12174 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12175 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12176 style, true);
12177 }
12178 else
12179 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12180
12181 /* Sibcall epilogues don't want a return instruction. */
12182 if (style == 0)
12183 {
12184 m->fs = frame_state_save;
12185 return;
12186 }
12187
12188 if (crtl->args.pops_args && crtl->args.size)
12189 {
12190 rtx popc = GEN_INT (crtl->args.pops_args);
12191
12192 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12193 address, do explicit add, and jump indirectly to the caller. */
12194
12195 if (crtl->args.pops_args >= 65536)
12196 {
12197 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12198 rtx insn;
12199
12200 /* There is no "pascal" calling convention in any 64bit ABI. */
12201 gcc_assert (!TARGET_64BIT);
12202
12203 insn = emit_insn (gen_pop (ecx));
12204 m->fs.cfa_offset -= UNITS_PER_WORD;
12205 m->fs.sp_offset -= UNITS_PER_WORD;
12206
12207 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12208 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12209 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12210 add_reg_note (insn, REG_CFA_REGISTER,
12211 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12212 RTX_FRAME_RELATED_P (insn) = 1;
12213
12214 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12215 popc, -1, true);
12216 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12217 }
12218 else
12219 emit_jump_insn (gen_simple_return_pop_internal (popc));
12220 }
12221 else
12222 emit_jump_insn (gen_simple_return_internal ());
12223
12224 /* Restore the state back to the state from the prologue,
12225 so that it's correct for the next epilogue. */
12226 m->fs = frame_state_save;
12227 }
12228
12229 /* Reset from the function's potential modifications. */
12230
12231 static void
12232 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12233 {
12234 if (pic_offset_table_rtx
12235 && !ix86_use_pseudo_pic_reg ())
12236 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12237 #if TARGET_MACHO
12238 /* Mach-O doesn't support labels at the end of objects, so if
12239 it looks like we might want one, insert a NOP. */
12240 {
12241 rtx_insn *insn = get_last_insn ();
12242 rtx_insn *deleted_debug_label = NULL;
12243 while (insn
12244 && NOTE_P (insn)
12245 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12246 {
12247 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12248 notes only, instead set their CODE_LABEL_NUMBER to -1,
12249 otherwise there would be code generation differences
12250 in between -g and -g0. */
12251 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12252 deleted_debug_label = insn;
12253 insn = PREV_INSN (insn);
12254 }
12255 if (insn
12256 && (LABEL_P (insn)
12257 || (NOTE_P (insn)
12258 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12259 fputs ("\tnop\n", file);
12260 else if (deleted_debug_label)
12261 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12262 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12263 CODE_LABEL_NUMBER (insn) = -1;
12264 }
12265 #endif
12266
12267 }
12268
12269 /* Return a scratch register to use in the split stack prologue. The
12270 split stack prologue is used for -fsplit-stack. It is the first
12271 instructions in the function, even before the regular prologue.
12272 The scratch register can be any caller-saved register which is not
12273 used for parameters or for the static chain. */
12274
12275 static unsigned int
12276 split_stack_prologue_scratch_regno (void)
12277 {
12278 if (TARGET_64BIT)
12279 return R11_REG;
12280 else
12281 {
12282 bool is_fastcall, is_thiscall;
12283 int regparm;
12284
12285 is_fastcall = (lookup_attribute ("fastcall",
12286 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12287 != NULL);
12288 is_thiscall = (lookup_attribute ("thiscall",
12289 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12290 != NULL);
12291 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12292
12293 if (is_fastcall)
12294 {
12295 if (DECL_STATIC_CHAIN (cfun->decl))
12296 {
12297 sorry ("-fsplit-stack does not support fastcall with "
12298 "nested function");
12299 return INVALID_REGNUM;
12300 }
12301 return AX_REG;
12302 }
12303 else if (is_thiscall)
12304 {
12305 if (!DECL_STATIC_CHAIN (cfun->decl))
12306 return DX_REG;
12307 return AX_REG;
12308 }
12309 else if (regparm < 3)
12310 {
12311 if (!DECL_STATIC_CHAIN (cfun->decl))
12312 return CX_REG;
12313 else
12314 {
12315 if (regparm >= 2)
12316 {
12317 sorry ("-fsplit-stack does not support 2 register "
12318 "parameters for a nested function");
12319 return INVALID_REGNUM;
12320 }
12321 return DX_REG;
12322 }
12323 }
12324 else
12325 {
12326 /* FIXME: We could make this work by pushing a register
12327 around the addition and comparison. */
12328 sorry ("-fsplit-stack does not support 3 register parameters");
12329 return INVALID_REGNUM;
12330 }
12331 }
12332 }
12333
12334 /* A SYMBOL_REF for the function which allocates new stackspace for
12335 -fsplit-stack. */
12336
12337 static GTY(()) rtx split_stack_fn;
12338
12339 /* A SYMBOL_REF for the more stack function when using the large
12340 model. */
12341
12342 static GTY(()) rtx split_stack_fn_large;
12343
12344 /* Handle -fsplit-stack. These are the first instructions in the
12345 function, even before the regular prologue. */
12346
12347 void
12348 ix86_expand_split_stack_prologue (void)
12349 {
12350 struct ix86_frame frame;
12351 HOST_WIDE_INT allocate;
12352 unsigned HOST_WIDE_INT args_size;
12353 rtx_code_label *label;
12354 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12355 rtx scratch_reg = NULL_RTX;
12356 rtx_code_label *varargs_label = NULL;
12357 rtx fn;
12358
12359 gcc_assert (flag_split_stack && reload_completed);
12360
12361 ix86_finalize_stack_realign_flags ();
12362 ix86_compute_frame_layout (&frame);
12363 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12364
12365 /* This is the label we will branch to if we have enough stack
12366 space. We expect the basic block reordering pass to reverse this
12367 branch if optimizing, so that we branch in the unlikely case. */
12368 label = gen_label_rtx ();
12369
12370 /* We need to compare the stack pointer minus the frame size with
12371 the stack boundary in the TCB. The stack boundary always gives
12372 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12373 can compare directly. Otherwise we need to do an addition. */
12374
12375 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12376 UNSPEC_STACK_CHECK);
12377 limit = gen_rtx_CONST (Pmode, limit);
12378 limit = gen_rtx_MEM (Pmode, limit);
12379 if (allocate < SPLIT_STACK_AVAILABLE)
12380 current = stack_pointer_rtx;
12381 else
12382 {
12383 unsigned int scratch_regno;
12384 rtx offset;
12385
12386 /* We need a scratch register to hold the stack pointer minus
12387 the required frame size. Since this is the very start of the
12388 function, the scratch register can be any caller-saved
12389 register which is not used for parameters. */
12390 offset = GEN_INT (- allocate);
12391 scratch_regno = split_stack_prologue_scratch_regno ();
12392 if (scratch_regno == INVALID_REGNUM)
12393 return;
12394 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12395 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12396 {
12397 /* We don't use ix86_gen_add3 in this case because it will
12398 want to split to lea, but when not optimizing the insn
12399 will not be split after this point. */
12400 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12401 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12402 offset)));
12403 }
12404 else
12405 {
12406 emit_move_insn (scratch_reg, offset);
12407 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12408 stack_pointer_rtx));
12409 }
12410 current = scratch_reg;
12411 }
12412
12413 ix86_expand_branch (GEU, current, limit, label);
12414 jump_insn = get_last_insn ();
12415 JUMP_LABEL (jump_insn) = label;
12416
12417 /* Mark the jump as very likely to be taken. */
12418 add_int_reg_note (jump_insn, REG_BR_PROB,
12419 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12420
12421 if (split_stack_fn == NULL_RTX)
12422 {
12423 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12424 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12425 }
12426 fn = split_stack_fn;
12427
12428 /* Get more stack space. We pass in the desired stack space and the
12429 size of the arguments to copy to the new stack. In 32-bit mode
12430 we push the parameters; __morestack will return on a new stack
12431 anyhow. In 64-bit mode we pass the parameters in r10 and
12432 r11. */
12433 allocate_rtx = GEN_INT (allocate);
12434 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12435 call_fusage = NULL_RTX;
12436 if (TARGET_64BIT)
12437 {
12438 rtx reg10, reg11;
12439
12440 reg10 = gen_rtx_REG (Pmode, R10_REG);
12441 reg11 = gen_rtx_REG (Pmode, R11_REG);
12442
12443 /* If this function uses a static chain, it will be in %r10.
12444 Preserve it across the call to __morestack. */
12445 if (DECL_STATIC_CHAIN (cfun->decl))
12446 {
12447 rtx rax;
12448
12449 rax = gen_rtx_REG (word_mode, AX_REG);
12450 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12451 use_reg (&call_fusage, rax);
12452 }
12453
12454 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12455 && !TARGET_PECOFF)
12456 {
12457 HOST_WIDE_INT argval;
12458
12459 gcc_assert (Pmode == DImode);
12460 /* When using the large model we need to load the address
12461 into a register, and we've run out of registers. So we
12462 switch to a different calling convention, and we call a
12463 different function: __morestack_large. We pass the
12464 argument size in the upper 32 bits of r10 and pass the
12465 frame size in the lower 32 bits. */
12466 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12467 gcc_assert ((args_size & 0xffffffff) == args_size);
12468
12469 if (split_stack_fn_large == NULL_RTX)
12470 {
12471 split_stack_fn_large =
12472 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12473 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12474 }
12475 if (ix86_cmodel == CM_LARGE_PIC)
12476 {
12477 rtx_code_label *label;
12478 rtx x;
12479
12480 label = gen_label_rtx ();
12481 emit_label (label);
12482 LABEL_PRESERVE_P (label) = 1;
12483 emit_insn (gen_set_rip_rex64 (reg10, label));
12484 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12485 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12486 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12487 UNSPEC_GOT);
12488 x = gen_rtx_CONST (Pmode, x);
12489 emit_move_insn (reg11, x);
12490 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12491 x = gen_const_mem (Pmode, x);
12492 emit_move_insn (reg11, x);
12493 }
12494 else
12495 emit_move_insn (reg11, split_stack_fn_large);
12496
12497 fn = reg11;
12498
12499 argval = ((args_size << 16) << 16) + allocate;
12500 emit_move_insn (reg10, GEN_INT (argval));
12501 }
12502 else
12503 {
12504 emit_move_insn (reg10, allocate_rtx);
12505 emit_move_insn (reg11, GEN_INT (args_size));
12506 use_reg (&call_fusage, reg11);
12507 }
12508
12509 use_reg (&call_fusage, reg10);
12510 }
12511 else
12512 {
12513 emit_insn (gen_push (GEN_INT (args_size)));
12514 emit_insn (gen_push (allocate_rtx));
12515 }
12516 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12517 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12518 NULL_RTX, false);
12519 add_function_usage_to (call_insn, call_fusage);
12520
12521 /* In order to make call/return prediction work right, we now need
12522 to execute a return instruction. See
12523 libgcc/config/i386/morestack.S for the details on how this works.
12524
12525 For flow purposes gcc must not see this as a return
12526 instruction--we need control flow to continue at the subsequent
12527 label. Therefore, we use an unspec. */
12528 gcc_assert (crtl->args.pops_args < 65536);
12529 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12530
12531 /* If we are in 64-bit mode and this function uses a static chain,
12532 we saved %r10 in %rax before calling _morestack. */
12533 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12534 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12535 gen_rtx_REG (word_mode, AX_REG));
12536
12537 /* If this function calls va_start, we need to store a pointer to
12538 the arguments on the old stack, because they may not have been
12539 all copied to the new stack. At this point the old stack can be
12540 found at the frame pointer value used by __morestack, because
12541 __morestack has set that up before calling back to us. Here we
12542 store that pointer in a scratch register, and in
12543 ix86_expand_prologue we store the scratch register in a stack
12544 slot. */
12545 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12546 {
12547 unsigned int scratch_regno;
12548 rtx frame_reg;
12549 int words;
12550
12551 scratch_regno = split_stack_prologue_scratch_regno ();
12552 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12553 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12554
12555 /* 64-bit:
12556 fp -> old fp value
12557 return address within this function
12558 return address of caller of this function
12559 stack arguments
12560 So we add three words to get to the stack arguments.
12561
12562 32-bit:
12563 fp -> old fp value
12564 return address within this function
12565 first argument to __morestack
12566 second argument to __morestack
12567 return address of caller of this function
12568 stack arguments
12569 So we add five words to get to the stack arguments.
12570 */
12571 words = TARGET_64BIT ? 3 : 5;
12572 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12573 gen_rtx_PLUS (Pmode, frame_reg,
12574 GEN_INT (words * UNITS_PER_WORD))));
12575
12576 varargs_label = gen_label_rtx ();
12577 emit_jump_insn (gen_jump (varargs_label));
12578 JUMP_LABEL (get_last_insn ()) = varargs_label;
12579
12580 emit_barrier ();
12581 }
12582
12583 emit_label (label);
12584 LABEL_NUSES (label) = 1;
12585
12586 /* If this function calls va_start, we now have to set the scratch
12587 register for the case where we do not call __morestack. In this
12588 case we need to set it based on the stack pointer. */
12589 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12590 {
12591 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12592 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12593 GEN_INT (UNITS_PER_WORD))));
12594
12595 emit_label (varargs_label);
12596 LABEL_NUSES (varargs_label) = 1;
12597 }
12598 }
12599
12600 /* We may have to tell the dataflow pass that the split stack prologue
12601 is initializing a scratch register. */
12602
12603 static void
12604 ix86_live_on_entry (bitmap regs)
12605 {
12606 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12607 {
12608 gcc_assert (flag_split_stack);
12609 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12610 }
12611 }
12612 \f
12613 /* Extract the parts of an RTL expression that is a valid memory address
12614 for an instruction. Return 0 if the structure of the address is
12615 grossly off. Return -1 if the address contains ASHIFT, so it is not
12616 strictly valid, but still used for computing length of lea instruction. */
12617
12618 int
12619 ix86_decompose_address (rtx addr, struct ix86_address *out)
12620 {
12621 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12622 rtx base_reg, index_reg;
12623 HOST_WIDE_INT scale = 1;
12624 rtx scale_rtx = NULL_RTX;
12625 rtx tmp;
12626 int retval = 1;
12627 enum ix86_address_seg seg = SEG_DEFAULT;
12628
12629 /* Allow zero-extended SImode addresses,
12630 they will be emitted with addr32 prefix. */
12631 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12632 {
12633 if (GET_CODE (addr) == ZERO_EXTEND
12634 && GET_MODE (XEXP (addr, 0)) == SImode)
12635 {
12636 addr = XEXP (addr, 0);
12637 if (CONST_INT_P (addr))
12638 return 0;
12639 }
12640 else if (GET_CODE (addr) == AND
12641 && const_32bit_mask (XEXP (addr, 1), DImode))
12642 {
12643 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12644 if (addr == NULL_RTX)
12645 return 0;
12646
12647 if (CONST_INT_P (addr))
12648 return 0;
12649 }
12650 }
12651
12652 /* Allow SImode subregs of DImode addresses,
12653 they will be emitted with addr32 prefix. */
12654 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12655 {
12656 if (GET_CODE (addr) == SUBREG
12657 && GET_MODE (SUBREG_REG (addr)) == DImode)
12658 {
12659 addr = SUBREG_REG (addr);
12660 if (CONST_INT_P (addr))
12661 return 0;
12662 }
12663 }
12664
12665 if (REG_P (addr))
12666 base = addr;
12667 else if (GET_CODE (addr) == SUBREG)
12668 {
12669 if (REG_P (SUBREG_REG (addr)))
12670 base = addr;
12671 else
12672 return 0;
12673 }
12674 else if (GET_CODE (addr) == PLUS)
12675 {
12676 rtx addends[4], op;
12677 int n = 0, i;
12678
12679 op = addr;
12680 do
12681 {
12682 if (n >= 4)
12683 return 0;
12684 addends[n++] = XEXP (op, 1);
12685 op = XEXP (op, 0);
12686 }
12687 while (GET_CODE (op) == PLUS);
12688 if (n >= 4)
12689 return 0;
12690 addends[n] = op;
12691
12692 for (i = n; i >= 0; --i)
12693 {
12694 op = addends[i];
12695 switch (GET_CODE (op))
12696 {
12697 case MULT:
12698 if (index)
12699 return 0;
12700 index = XEXP (op, 0);
12701 scale_rtx = XEXP (op, 1);
12702 break;
12703
12704 case ASHIFT:
12705 if (index)
12706 return 0;
12707 index = XEXP (op, 0);
12708 tmp = XEXP (op, 1);
12709 if (!CONST_INT_P (tmp))
12710 return 0;
12711 scale = INTVAL (tmp);
12712 if ((unsigned HOST_WIDE_INT) scale > 3)
12713 return 0;
12714 scale = 1 << scale;
12715 break;
12716
12717 case ZERO_EXTEND:
12718 op = XEXP (op, 0);
12719 if (GET_CODE (op) != UNSPEC)
12720 return 0;
12721 /* FALLTHRU */
12722
12723 case UNSPEC:
12724 if (XINT (op, 1) == UNSPEC_TP
12725 && TARGET_TLS_DIRECT_SEG_REFS
12726 && seg == SEG_DEFAULT)
12727 seg = DEFAULT_TLS_SEG_REG;
12728 else
12729 return 0;
12730 break;
12731
12732 case SUBREG:
12733 if (!REG_P (SUBREG_REG (op)))
12734 return 0;
12735 /* FALLTHRU */
12736
12737 case REG:
12738 if (!base)
12739 base = op;
12740 else if (!index)
12741 index = op;
12742 else
12743 return 0;
12744 break;
12745
12746 case CONST:
12747 case CONST_INT:
12748 case SYMBOL_REF:
12749 case LABEL_REF:
12750 if (disp)
12751 return 0;
12752 disp = op;
12753 break;
12754
12755 default:
12756 return 0;
12757 }
12758 }
12759 }
12760 else if (GET_CODE (addr) == MULT)
12761 {
12762 index = XEXP (addr, 0); /* index*scale */
12763 scale_rtx = XEXP (addr, 1);
12764 }
12765 else if (GET_CODE (addr) == ASHIFT)
12766 {
12767 /* We're called for lea too, which implements ashift on occasion. */
12768 index = XEXP (addr, 0);
12769 tmp = XEXP (addr, 1);
12770 if (!CONST_INT_P (tmp))
12771 return 0;
12772 scale = INTVAL (tmp);
12773 if ((unsigned HOST_WIDE_INT) scale > 3)
12774 return 0;
12775 scale = 1 << scale;
12776 retval = -1;
12777 }
12778 else
12779 disp = addr; /* displacement */
12780
12781 if (index)
12782 {
12783 if (REG_P (index))
12784 ;
12785 else if (GET_CODE (index) == SUBREG
12786 && REG_P (SUBREG_REG (index)))
12787 ;
12788 else
12789 return 0;
12790 }
12791
12792 /* Extract the integral value of scale. */
12793 if (scale_rtx)
12794 {
12795 if (!CONST_INT_P (scale_rtx))
12796 return 0;
12797 scale = INTVAL (scale_rtx);
12798 }
12799
12800 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12801 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12802
12803 /* Avoid useless 0 displacement. */
12804 if (disp == const0_rtx && (base || index))
12805 disp = NULL_RTX;
12806
12807 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12808 if (base_reg && index_reg && scale == 1
12809 && (index_reg == arg_pointer_rtx
12810 || index_reg == frame_pointer_rtx
12811 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12812 {
12813 std::swap (base, index);
12814 std::swap (base_reg, index_reg);
12815 }
12816
12817 /* Special case: %ebp cannot be encoded as a base without a displacement.
12818 Similarly %r13. */
12819 if (!disp
12820 && base_reg
12821 && (base_reg == hard_frame_pointer_rtx
12822 || base_reg == frame_pointer_rtx
12823 || base_reg == arg_pointer_rtx
12824 || (REG_P (base_reg)
12825 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12826 || REGNO (base_reg) == R13_REG))))
12827 disp = const0_rtx;
12828
12829 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12830 Avoid this by transforming to [%esi+0].
12831 Reload calls address legitimization without cfun defined, so we need
12832 to test cfun for being non-NULL. */
12833 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12834 && base_reg && !index_reg && !disp
12835 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12836 disp = const0_rtx;
12837
12838 /* Special case: encode reg+reg instead of reg*2. */
12839 if (!base && index && scale == 2)
12840 base = index, base_reg = index_reg, scale = 1;
12841
12842 /* Special case: scaling cannot be encoded without base or displacement. */
12843 if (!base && !disp && index && scale != 1)
12844 disp = const0_rtx;
12845
12846 out->base = base;
12847 out->index = index;
12848 out->disp = disp;
12849 out->scale = scale;
12850 out->seg = seg;
12851
12852 return retval;
12853 }
12854 \f
12855 /* Return cost of the memory address x.
12856 For i386, it is better to use a complex address than let gcc copy
12857 the address into a reg and make a new pseudo. But not if the address
12858 requires to two regs - that would mean more pseudos with longer
12859 lifetimes. */
12860 static int
12861 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12862 {
12863 struct ix86_address parts;
12864 int cost = 1;
12865 int ok = ix86_decompose_address (x, &parts);
12866
12867 gcc_assert (ok);
12868
12869 if (parts.base && GET_CODE (parts.base) == SUBREG)
12870 parts.base = SUBREG_REG (parts.base);
12871 if (parts.index && GET_CODE (parts.index) == SUBREG)
12872 parts.index = SUBREG_REG (parts.index);
12873
12874 /* Attempt to minimize number of registers in the address. */
12875 if ((parts.base
12876 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12877 || (parts.index
12878 && (!REG_P (parts.index)
12879 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12880 cost++;
12881
12882 /* When address base or index is "pic_offset_table_rtx" we don't increase
12883 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12884 itself it most likely means that base or index is not invariant.
12885 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12886 profitable for x86. */
12887 if (parts.base
12888 && (current_pass->type == GIMPLE_PASS
12889 || (!pic_offset_table_rtx
12890 || REGNO (pic_offset_table_rtx) != REGNO(parts.base)))
12891 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12892 && parts.index
12893 && (current_pass->type == GIMPLE_PASS
12894 || (!pic_offset_table_rtx
12895 || REGNO (pic_offset_table_rtx) != REGNO(parts.index)))
12896 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12897 && parts.base != parts.index)
12898 cost++;
12899
12900 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12901 since it's predecode logic can't detect the length of instructions
12902 and it degenerates to vector decoded. Increase cost of such
12903 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12904 to split such addresses or even refuse such addresses at all.
12905
12906 Following addressing modes are affected:
12907 [base+scale*index]
12908 [scale*index+disp]
12909 [base+index]
12910
12911 The first and last case may be avoidable by explicitly coding the zero in
12912 memory address, but I don't have AMD-K6 machine handy to check this
12913 theory. */
12914
12915 if (TARGET_K6
12916 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12917 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12918 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12919 cost += 10;
12920
12921 return cost;
12922 }
12923 \f
12924 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12925 this is used for to form addresses to local data when -fPIC is in
12926 use. */
12927
12928 static bool
12929 darwin_local_data_pic (rtx disp)
12930 {
12931 return (GET_CODE (disp) == UNSPEC
12932 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12933 }
12934
12935 /* Determine if a given RTX is a valid constant. We already know this
12936 satisfies CONSTANT_P. */
12937
12938 static bool
12939 ix86_legitimate_constant_p (machine_mode, rtx x)
12940 {
12941 /* Pointer bounds constants are not valid. */
12942 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
12943 return false;
12944
12945 switch (GET_CODE (x))
12946 {
12947 case CONST:
12948 x = XEXP (x, 0);
12949
12950 if (GET_CODE (x) == PLUS)
12951 {
12952 if (!CONST_INT_P (XEXP (x, 1)))
12953 return false;
12954 x = XEXP (x, 0);
12955 }
12956
12957 if (TARGET_MACHO && darwin_local_data_pic (x))
12958 return true;
12959
12960 /* Only some unspecs are valid as "constants". */
12961 if (GET_CODE (x) == UNSPEC)
12962 switch (XINT (x, 1))
12963 {
12964 case UNSPEC_GOT:
12965 case UNSPEC_GOTOFF:
12966 case UNSPEC_PLTOFF:
12967 return TARGET_64BIT;
12968 case UNSPEC_TPOFF:
12969 case UNSPEC_NTPOFF:
12970 x = XVECEXP (x, 0, 0);
12971 return (GET_CODE (x) == SYMBOL_REF
12972 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12973 case UNSPEC_DTPOFF:
12974 x = XVECEXP (x, 0, 0);
12975 return (GET_CODE (x) == SYMBOL_REF
12976 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12977 default:
12978 return false;
12979 }
12980
12981 /* We must have drilled down to a symbol. */
12982 if (GET_CODE (x) == LABEL_REF)
12983 return true;
12984 if (GET_CODE (x) != SYMBOL_REF)
12985 return false;
12986 /* FALLTHRU */
12987
12988 case SYMBOL_REF:
12989 /* TLS symbols are never valid. */
12990 if (SYMBOL_REF_TLS_MODEL (x))
12991 return false;
12992
12993 /* DLLIMPORT symbols are never valid. */
12994 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12995 && SYMBOL_REF_DLLIMPORT_P (x))
12996 return false;
12997
12998 #if TARGET_MACHO
12999 /* mdynamic-no-pic */
13000 if (MACHO_DYNAMIC_NO_PIC_P)
13001 return machopic_symbol_defined_p (x);
13002 #endif
13003 break;
13004
13005 case CONST_DOUBLE:
13006 if (GET_MODE (x) == TImode
13007 && x != CONST0_RTX (TImode)
13008 && !TARGET_64BIT)
13009 return false;
13010 break;
13011
13012 case CONST_VECTOR:
13013 if (!standard_sse_constant_p (x))
13014 return false;
13015
13016 default:
13017 break;
13018 }
13019
13020 /* Otherwise we handle everything else in the move patterns. */
13021 return true;
13022 }
13023
13024 /* Determine if it's legal to put X into the constant pool. This
13025 is not possible for the address of thread-local symbols, which
13026 is checked above. */
13027
13028 static bool
13029 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13030 {
13031 /* We can always put integral constants and vectors in memory. */
13032 switch (GET_CODE (x))
13033 {
13034 case CONST_INT:
13035 case CONST_DOUBLE:
13036 case CONST_VECTOR:
13037 return false;
13038
13039 default:
13040 break;
13041 }
13042 return !ix86_legitimate_constant_p (mode, x);
13043 }
13044
13045 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13046 otherwise zero. */
13047
13048 static bool
13049 is_imported_p (rtx x)
13050 {
13051 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13052 || GET_CODE (x) != SYMBOL_REF)
13053 return false;
13054
13055 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13056 }
13057
13058
13059 /* Nonzero if the constant value X is a legitimate general operand
13060 when generating PIC code. It is given that flag_pic is on and
13061 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
13062
13063 bool
13064 legitimate_pic_operand_p (rtx x)
13065 {
13066 rtx inner;
13067
13068 switch (GET_CODE (x))
13069 {
13070 case CONST:
13071 inner = XEXP (x, 0);
13072 if (GET_CODE (inner) == PLUS
13073 && CONST_INT_P (XEXP (inner, 1)))
13074 inner = XEXP (inner, 0);
13075
13076 /* Only some unspecs are valid as "constants". */
13077 if (GET_CODE (inner) == UNSPEC)
13078 switch (XINT (inner, 1))
13079 {
13080 case UNSPEC_GOT:
13081 case UNSPEC_GOTOFF:
13082 case UNSPEC_PLTOFF:
13083 return TARGET_64BIT;
13084 case UNSPEC_TPOFF:
13085 x = XVECEXP (inner, 0, 0);
13086 return (GET_CODE (x) == SYMBOL_REF
13087 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13088 case UNSPEC_MACHOPIC_OFFSET:
13089 return legitimate_pic_address_disp_p (x);
13090 default:
13091 return false;
13092 }
13093 /* FALLTHRU */
13094
13095 case SYMBOL_REF:
13096 case LABEL_REF:
13097 return legitimate_pic_address_disp_p (x);
13098
13099 default:
13100 return true;
13101 }
13102 }
13103
13104 /* Determine if a given CONST RTX is a valid memory displacement
13105 in PIC mode. */
13106
13107 bool
13108 legitimate_pic_address_disp_p (rtx disp)
13109 {
13110 bool saw_plus;
13111
13112 /* In 64bit mode we can allow direct addresses of symbols and labels
13113 when they are not dynamic symbols. */
13114 if (TARGET_64BIT)
13115 {
13116 rtx op0 = disp, op1;
13117
13118 switch (GET_CODE (disp))
13119 {
13120 case LABEL_REF:
13121 return true;
13122
13123 case CONST:
13124 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13125 break;
13126 op0 = XEXP (XEXP (disp, 0), 0);
13127 op1 = XEXP (XEXP (disp, 0), 1);
13128 if (!CONST_INT_P (op1)
13129 || INTVAL (op1) >= 16*1024*1024
13130 || INTVAL (op1) < -16*1024*1024)
13131 break;
13132 if (GET_CODE (op0) == LABEL_REF)
13133 return true;
13134 if (GET_CODE (op0) == CONST
13135 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13136 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13137 return true;
13138 if (GET_CODE (op0) == UNSPEC
13139 && XINT (op0, 1) == UNSPEC_PCREL)
13140 return true;
13141 if (GET_CODE (op0) != SYMBOL_REF)
13142 break;
13143 /* FALLTHRU */
13144
13145 case SYMBOL_REF:
13146 /* TLS references should always be enclosed in UNSPEC.
13147 The dllimported symbol needs always to be resolved. */
13148 if (SYMBOL_REF_TLS_MODEL (op0)
13149 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13150 return false;
13151
13152 if (TARGET_PECOFF)
13153 {
13154 if (is_imported_p (op0))
13155 return true;
13156
13157 if (SYMBOL_REF_FAR_ADDR_P (op0)
13158 || !SYMBOL_REF_LOCAL_P (op0))
13159 break;
13160
13161 /* Function-symbols need to be resolved only for
13162 large-model.
13163 For the small-model we don't need to resolve anything
13164 here. */
13165 if ((ix86_cmodel != CM_LARGE_PIC
13166 && SYMBOL_REF_FUNCTION_P (op0))
13167 || ix86_cmodel == CM_SMALL_PIC)
13168 return true;
13169 /* Non-external symbols don't need to be resolved for
13170 large, and medium-model. */
13171 if ((ix86_cmodel == CM_LARGE_PIC
13172 || ix86_cmodel == CM_MEDIUM_PIC)
13173 && !SYMBOL_REF_EXTERNAL_P (op0))
13174 return true;
13175 }
13176 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13177 && (SYMBOL_REF_LOCAL_P (op0)
13178 || (HAVE_LD_PIE_COPYRELOC
13179 && flag_pie
13180 && !SYMBOL_REF_WEAK (op0)
13181 && !SYMBOL_REF_FUNCTION_P (op0)))
13182 && ix86_cmodel != CM_LARGE_PIC)
13183 return true;
13184 break;
13185
13186 default:
13187 break;
13188 }
13189 }
13190 if (GET_CODE (disp) != CONST)
13191 return false;
13192 disp = XEXP (disp, 0);
13193
13194 if (TARGET_64BIT)
13195 {
13196 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13197 of GOT tables. We should not need these anyway. */
13198 if (GET_CODE (disp) != UNSPEC
13199 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13200 && XINT (disp, 1) != UNSPEC_GOTOFF
13201 && XINT (disp, 1) != UNSPEC_PCREL
13202 && XINT (disp, 1) != UNSPEC_PLTOFF))
13203 return false;
13204
13205 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13206 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13207 return false;
13208 return true;
13209 }
13210
13211 saw_plus = false;
13212 if (GET_CODE (disp) == PLUS)
13213 {
13214 if (!CONST_INT_P (XEXP (disp, 1)))
13215 return false;
13216 disp = XEXP (disp, 0);
13217 saw_plus = true;
13218 }
13219
13220 if (TARGET_MACHO && darwin_local_data_pic (disp))
13221 return true;
13222
13223 if (GET_CODE (disp) != UNSPEC)
13224 return false;
13225
13226 switch (XINT (disp, 1))
13227 {
13228 case UNSPEC_GOT:
13229 if (saw_plus)
13230 return false;
13231 /* We need to check for both symbols and labels because VxWorks loads
13232 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13233 details. */
13234 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13235 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13236 case UNSPEC_GOTOFF:
13237 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13238 While ABI specify also 32bit relocation but we don't produce it in
13239 small PIC model at all. */
13240 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13241 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13242 && !TARGET_64BIT)
13243 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13244 return false;
13245 case UNSPEC_GOTTPOFF:
13246 case UNSPEC_GOTNTPOFF:
13247 case UNSPEC_INDNTPOFF:
13248 if (saw_plus)
13249 return false;
13250 disp = XVECEXP (disp, 0, 0);
13251 return (GET_CODE (disp) == SYMBOL_REF
13252 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13253 case UNSPEC_NTPOFF:
13254 disp = XVECEXP (disp, 0, 0);
13255 return (GET_CODE (disp) == SYMBOL_REF
13256 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13257 case UNSPEC_DTPOFF:
13258 disp = XVECEXP (disp, 0, 0);
13259 return (GET_CODE (disp) == SYMBOL_REF
13260 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13261 }
13262
13263 return false;
13264 }
13265
13266 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13267 replace the input X, or the original X if no replacement is called for.
13268 The output parameter *WIN is 1 if the calling macro should goto WIN,
13269 0 if it should not. */
13270
13271 bool
13272 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13273 int)
13274 {
13275 /* Reload can generate:
13276
13277 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13278 (reg:DI 97))
13279 (reg:DI 2 cx))
13280
13281 This RTX is rejected from ix86_legitimate_address_p due to
13282 non-strictness of base register 97. Following this rejection,
13283 reload pushes all three components into separate registers,
13284 creating invalid memory address RTX.
13285
13286 Following code reloads only the invalid part of the
13287 memory address RTX. */
13288
13289 if (GET_CODE (x) == PLUS
13290 && REG_P (XEXP (x, 1))
13291 && GET_CODE (XEXP (x, 0)) == PLUS
13292 && REG_P (XEXP (XEXP (x, 0), 1)))
13293 {
13294 rtx base, index;
13295 bool something_reloaded = false;
13296
13297 base = XEXP (XEXP (x, 0), 1);
13298 if (!REG_OK_FOR_BASE_STRICT_P (base))
13299 {
13300 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13301 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13302 opnum, (enum reload_type) type);
13303 something_reloaded = true;
13304 }
13305
13306 index = XEXP (x, 1);
13307 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13308 {
13309 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13310 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13311 opnum, (enum reload_type) type);
13312 something_reloaded = true;
13313 }
13314
13315 gcc_assert (something_reloaded);
13316 return true;
13317 }
13318
13319 return false;
13320 }
13321
13322 /* Determine if op is suitable RTX for an address register.
13323 Return naked register if a register or a register subreg is
13324 found, otherwise return NULL_RTX. */
13325
13326 static rtx
13327 ix86_validate_address_register (rtx op)
13328 {
13329 machine_mode mode = GET_MODE (op);
13330
13331 /* Only SImode or DImode registers can form the address. */
13332 if (mode != SImode && mode != DImode)
13333 return NULL_RTX;
13334
13335 if (REG_P (op))
13336 return op;
13337 else if (GET_CODE (op) == SUBREG)
13338 {
13339 rtx reg = SUBREG_REG (op);
13340
13341 if (!REG_P (reg))
13342 return NULL_RTX;
13343
13344 mode = GET_MODE (reg);
13345
13346 /* Don't allow SUBREGs that span more than a word. It can
13347 lead to spill failures when the register is one word out
13348 of a two word structure. */
13349 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13350 return NULL_RTX;
13351
13352 /* Allow only SUBREGs of non-eliminable hard registers. */
13353 if (register_no_elim_operand (reg, mode))
13354 return reg;
13355 }
13356
13357 /* Op is not a register. */
13358 return NULL_RTX;
13359 }
13360
13361 /* Recognizes RTL expressions that are valid memory addresses for an
13362 instruction. The MODE argument is the machine mode for the MEM
13363 expression that wants to use this address.
13364
13365 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13366 convert common non-canonical forms to canonical form so that they will
13367 be recognized. */
13368
13369 static bool
13370 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13371 {
13372 struct ix86_address parts;
13373 rtx base, index, disp;
13374 HOST_WIDE_INT scale;
13375 enum ix86_address_seg seg;
13376
13377 if (ix86_decompose_address (addr, &parts) <= 0)
13378 /* Decomposition failed. */
13379 return false;
13380
13381 base = parts.base;
13382 index = parts.index;
13383 disp = parts.disp;
13384 scale = parts.scale;
13385 seg = parts.seg;
13386
13387 /* Validate base register. */
13388 if (base)
13389 {
13390 rtx reg = ix86_validate_address_register (base);
13391
13392 if (reg == NULL_RTX)
13393 return false;
13394
13395 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13396 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13397 /* Base is not valid. */
13398 return false;
13399 }
13400
13401 /* Validate index register. */
13402 if (index)
13403 {
13404 rtx reg = ix86_validate_address_register (index);
13405
13406 if (reg == NULL_RTX)
13407 return false;
13408
13409 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13410 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13411 /* Index is not valid. */
13412 return false;
13413 }
13414
13415 /* Index and base should have the same mode. */
13416 if (base && index
13417 && GET_MODE (base) != GET_MODE (index))
13418 return false;
13419
13420 /* Address override works only on the (%reg) part of %fs:(%reg). */
13421 if (seg != SEG_DEFAULT
13422 && ((base && GET_MODE (base) != word_mode)
13423 || (index && GET_MODE (index) != word_mode)))
13424 return false;
13425
13426 /* Validate scale factor. */
13427 if (scale != 1)
13428 {
13429 if (!index)
13430 /* Scale without index. */
13431 return false;
13432
13433 if (scale != 2 && scale != 4 && scale != 8)
13434 /* Scale is not a valid multiplier. */
13435 return false;
13436 }
13437
13438 /* Validate displacement. */
13439 if (disp)
13440 {
13441 if (GET_CODE (disp) == CONST
13442 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13443 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13444 switch (XINT (XEXP (disp, 0), 1))
13445 {
13446 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13447 used. While ABI specify also 32bit relocations, we don't produce
13448 them at all and use IP relative instead. */
13449 case UNSPEC_GOT:
13450 case UNSPEC_GOTOFF:
13451 gcc_assert (flag_pic);
13452 if (!TARGET_64BIT)
13453 goto is_legitimate_pic;
13454
13455 /* 64bit address unspec. */
13456 return false;
13457
13458 case UNSPEC_GOTPCREL:
13459 case UNSPEC_PCREL:
13460 gcc_assert (flag_pic);
13461 goto is_legitimate_pic;
13462
13463 case UNSPEC_GOTTPOFF:
13464 case UNSPEC_GOTNTPOFF:
13465 case UNSPEC_INDNTPOFF:
13466 case UNSPEC_NTPOFF:
13467 case UNSPEC_DTPOFF:
13468 break;
13469
13470 case UNSPEC_STACK_CHECK:
13471 gcc_assert (flag_split_stack);
13472 break;
13473
13474 default:
13475 /* Invalid address unspec. */
13476 return false;
13477 }
13478
13479 else if (SYMBOLIC_CONST (disp)
13480 && (flag_pic
13481 || (TARGET_MACHO
13482 #if TARGET_MACHO
13483 && MACHOPIC_INDIRECT
13484 && !machopic_operand_p (disp)
13485 #endif
13486 )))
13487 {
13488
13489 is_legitimate_pic:
13490 if (TARGET_64BIT && (index || base))
13491 {
13492 /* foo@dtpoff(%rX) is ok. */
13493 if (GET_CODE (disp) != CONST
13494 || GET_CODE (XEXP (disp, 0)) != PLUS
13495 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13496 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13497 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13498 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13499 /* Non-constant pic memory reference. */
13500 return false;
13501 }
13502 else if ((!TARGET_MACHO || flag_pic)
13503 && ! legitimate_pic_address_disp_p (disp))
13504 /* Displacement is an invalid pic construct. */
13505 return false;
13506 #if TARGET_MACHO
13507 else if (MACHO_DYNAMIC_NO_PIC_P
13508 && !ix86_legitimate_constant_p (Pmode, disp))
13509 /* displacment must be referenced via non_lazy_pointer */
13510 return false;
13511 #endif
13512
13513 /* This code used to verify that a symbolic pic displacement
13514 includes the pic_offset_table_rtx register.
13515
13516 While this is good idea, unfortunately these constructs may
13517 be created by "adds using lea" optimization for incorrect
13518 code like:
13519
13520 int a;
13521 int foo(int i)
13522 {
13523 return *(&a+i);
13524 }
13525
13526 This code is nonsensical, but results in addressing
13527 GOT table with pic_offset_table_rtx base. We can't
13528 just refuse it easily, since it gets matched by
13529 "addsi3" pattern, that later gets split to lea in the
13530 case output register differs from input. While this
13531 can be handled by separate addsi pattern for this case
13532 that never results in lea, this seems to be easier and
13533 correct fix for crash to disable this test. */
13534 }
13535 else if (GET_CODE (disp) != LABEL_REF
13536 && !CONST_INT_P (disp)
13537 && (GET_CODE (disp) != CONST
13538 || !ix86_legitimate_constant_p (Pmode, disp))
13539 && (GET_CODE (disp) != SYMBOL_REF
13540 || !ix86_legitimate_constant_p (Pmode, disp)))
13541 /* Displacement is not constant. */
13542 return false;
13543 else if (TARGET_64BIT
13544 && !x86_64_immediate_operand (disp, VOIDmode))
13545 /* Displacement is out of range. */
13546 return false;
13547 /* In x32 mode, constant addresses are sign extended to 64bit, so
13548 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13549 else if (TARGET_X32 && !(index || base)
13550 && CONST_INT_P (disp)
13551 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13552 return false;
13553 }
13554
13555 /* Everything looks valid. */
13556 return true;
13557 }
13558
13559 /* Determine if a given RTX is a valid constant address. */
13560
13561 bool
13562 constant_address_p (rtx x)
13563 {
13564 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13565 }
13566 \f
13567 /* Return a unique alias set for the GOT. */
13568
13569 static alias_set_type
13570 ix86_GOT_alias_set (void)
13571 {
13572 static alias_set_type set = -1;
13573 if (set == -1)
13574 set = new_alias_set ();
13575 return set;
13576 }
13577
13578 /* Set regs_ever_live for PIC base address register
13579 to true if required. */
13580 static void
13581 set_pic_reg_ever_live ()
13582 {
13583 if (reload_in_progress)
13584 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13585 }
13586
13587 /* Return a legitimate reference for ORIG (an address) using the
13588 register REG. If REG is 0, a new pseudo is generated.
13589
13590 There are two types of references that must be handled:
13591
13592 1. Global data references must load the address from the GOT, via
13593 the PIC reg. An insn is emitted to do this load, and the reg is
13594 returned.
13595
13596 2. Static data references, constant pool addresses, and code labels
13597 compute the address as an offset from the GOT, whose base is in
13598 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13599 differentiate them from global data objects. The returned
13600 address is the PIC reg + an unspec constant.
13601
13602 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13603 reg also appears in the address. */
13604
13605 static rtx
13606 legitimize_pic_address (rtx orig, rtx reg)
13607 {
13608 rtx addr = orig;
13609 rtx new_rtx = orig;
13610
13611 #if TARGET_MACHO
13612 if (TARGET_MACHO && !TARGET_64BIT)
13613 {
13614 if (reg == 0)
13615 reg = gen_reg_rtx (Pmode);
13616 /* Use the generic Mach-O PIC machinery. */
13617 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13618 }
13619 #endif
13620
13621 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13622 {
13623 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13624 if (tmp)
13625 return tmp;
13626 }
13627
13628 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13629 new_rtx = addr;
13630 else if (TARGET_64BIT && !TARGET_PECOFF
13631 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13632 {
13633 rtx tmpreg;
13634 /* This symbol may be referenced via a displacement from the PIC
13635 base address (@GOTOFF). */
13636
13637 set_pic_reg_ever_live ();
13638 if (GET_CODE (addr) == CONST)
13639 addr = XEXP (addr, 0);
13640 if (GET_CODE (addr) == PLUS)
13641 {
13642 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13643 UNSPEC_GOTOFF);
13644 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13645 }
13646 else
13647 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13648 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13649 if (!reg)
13650 tmpreg = gen_reg_rtx (Pmode);
13651 else
13652 tmpreg = reg;
13653 emit_move_insn (tmpreg, new_rtx);
13654
13655 if (reg != 0)
13656 {
13657 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13658 tmpreg, 1, OPTAB_DIRECT);
13659 new_rtx = reg;
13660 }
13661 else
13662 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13663 }
13664 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13665 {
13666 /* This symbol may be referenced via a displacement from the PIC
13667 base address (@GOTOFF). */
13668
13669 set_pic_reg_ever_live ();
13670 if (GET_CODE (addr) == CONST)
13671 addr = XEXP (addr, 0);
13672 if (GET_CODE (addr) == PLUS)
13673 {
13674 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13675 UNSPEC_GOTOFF);
13676 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13677 }
13678 else
13679 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13680 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13681 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13682
13683 if (reg != 0)
13684 {
13685 emit_move_insn (reg, new_rtx);
13686 new_rtx = reg;
13687 }
13688 }
13689 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13690 /* We can't use @GOTOFF for text labels on VxWorks;
13691 see gotoff_operand. */
13692 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13693 {
13694 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13695 if (tmp)
13696 return tmp;
13697
13698 /* For x64 PE-COFF there is no GOT table. So we use address
13699 directly. */
13700 if (TARGET_64BIT && TARGET_PECOFF)
13701 {
13702 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13703 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13704
13705 if (reg == 0)
13706 reg = gen_reg_rtx (Pmode);
13707 emit_move_insn (reg, new_rtx);
13708 new_rtx = reg;
13709 }
13710 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13711 {
13712 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13713 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13714 new_rtx = gen_const_mem (Pmode, new_rtx);
13715 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13716
13717 if (reg == 0)
13718 reg = gen_reg_rtx (Pmode);
13719 /* Use directly gen_movsi, otherwise the address is loaded
13720 into register for CSE. We don't want to CSE this addresses,
13721 instead we CSE addresses from the GOT table, so skip this. */
13722 emit_insn (gen_movsi (reg, new_rtx));
13723 new_rtx = reg;
13724 }
13725 else
13726 {
13727 /* This symbol must be referenced via a load from the
13728 Global Offset Table (@GOT). */
13729
13730 set_pic_reg_ever_live ();
13731 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13732 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13733 if (TARGET_64BIT)
13734 new_rtx = force_reg (Pmode, new_rtx);
13735 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13736 new_rtx = gen_const_mem (Pmode, new_rtx);
13737 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13738
13739 if (reg == 0)
13740 reg = gen_reg_rtx (Pmode);
13741 emit_move_insn (reg, new_rtx);
13742 new_rtx = reg;
13743 }
13744 }
13745 else
13746 {
13747 if (CONST_INT_P (addr)
13748 && !x86_64_immediate_operand (addr, VOIDmode))
13749 {
13750 if (reg)
13751 {
13752 emit_move_insn (reg, addr);
13753 new_rtx = reg;
13754 }
13755 else
13756 new_rtx = force_reg (Pmode, addr);
13757 }
13758 else if (GET_CODE (addr) == CONST)
13759 {
13760 addr = XEXP (addr, 0);
13761
13762 /* We must match stuff we generate before. Assume the only
13763 unspecs that can get here are ours. Not that we could do
13764 anything with them anyway.... */
13765 if (GET_CODE (addr) == UNSPEC
13766 || (GET_CODE (addr) == PLUS
13767 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13768 return orig;
13769 gcc_assert (GET_CODE (addr) == PLUS);
13770 }
13771 if (GET_CODE (addr) == PLUS)
13772 {
13773 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13774
13775 /* Check first to see if this is a constant offset from a @GOTOFF
13776 symbol reference. */
13777 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13778 && CONST_INT_P (op1))
13779 {
13780 if (!TARGET_64BIT)
13781 {
13782 set_pic_reg_ever_live ();
13783 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13784 UNSPEC_GOTOFF);
13785 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13786 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13787 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13788
13789 if (reg != 0)
13790 {
13791 emit_move_insn (reg, new_rtx);
13792 new_rtx = reg;
13793 }
13794 }
13795 else
13796 {
13797 if (INTVAL (op1) < -16*1024*1024
13798 || INTVAL (op1) >= 16*1024*1024)
13799 {
13800 if (!x86_64_immediate_operand (op1, Pmode))
13801 op1 = force_reg (Pmode, op1);
13802 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13803 }
13804 }
13805 }
13806 else
13807 {
13808 rtx base = legitimize_pic_address (op0, reg);
13809 machine_mode mode = GET_MODE (base);
13810 new_rtx
13811 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13812
13813 if (CONST_INT_P (new_rtx))
13814 {
13815 if (INTVAL (new_rtx) < -16*1024*1024
13816 || INTVAL (new_rtx) >= 16*1024*1024)
13817 {
13818 if (!x86_64_immediate_operand (new_rtx, mode))
13819 new_rtx = force_reg (mode, new_rtx);
13820 new_rtx
13821 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13822 }
13823 else
13824 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13825 }
13826 else
13827 {
13828 if (GET_CODE (new_rtx) == PLUS
13829 && CONSTANT_P (XEXP (new_rtx, 1)))
13830 {
13831 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13832 new_rtx = XEXP (new_rtx, 1);
13833 }
13834 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13835 }
13836 }
13837 }
13838 }
13839 return new_rtx;
13840 }
13841 \f
13842 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13843
13844 static rtx
13845 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13846 {
13847 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13848
13849 if (GET_MODE (tp) != tp_mode)
13850 {
13851 gcc_assert (GET_MODE (tp) == SImode);
13852 gcc_assert (tp_mode == DImode);
13853
13854 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13855 }
13856
13857 if (to_reg)
13858 tp = copy_to_mode_reg (tp_mode, tp);
13859
13860 return tp;
13861 }
13862
13863 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13864
13865 static GTY(()) rtx ix86_tls_symbol;
13866
13867 static rtx
13868 ix86_tls_get_addr (void)
13869 {
13870 if (!ix86_tls_symbol)
13871 {
13872 const char *sym
13873 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13874 ? "___tls_get_addr" : "__tls_get_addr");
13875
13876 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13877 }
13878
13879 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13880 {
13881 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13882 UNSPEC_PLTOFF);
13883 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13884 gen_rtx_CONST (Pmode, unspec));
13885 }
13886
13887 return ix86_tls_symbol;
13888 }
13889
13890 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13891
13892 static GTY(()) rtx ix86_tls_module_base_symbol;
13893
13894 rtx
13895 ix86_tls_module_base (void)
13896 {
13897 if (!ix86_tls_module_base_symbol)
13898 {
13899 ix86_tls_module_base_symbol
13900 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13901
13902 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13903 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13904 }
13905
13906 return ix86_tls_module_base_symbol;
13907 }
13908
13909 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13910 false if we expect this to be used for a memory address and true if
13911 we expect to load the address into a register. */
13912
13913 static rtx
13914 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13915 {
13916 rtx dest, base, off;
13917 rtx pic = NULL_RTX, tp = NULL_RTX;
13918 machine_mode tp_mode = Pmode;
13919 int type;
13920
13921 /* Fall back to global dynamic model if tool chain cannot support local
13922 dynamic. */
13923 if (TARGET_SUN_TLS && !TARGET_64BIT
13924 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13925 && model == TLS_MODEL_LOCAL_DYNAMIC)
13926 model = TLS_MODEL_GLOBAL_DYNAMIC;
13927
13928 switch (model)
13929 {
13930 case TLS_MODEL_GLOBAL_DYNAMIC:
13931 dest = gen_reg_rtx (Pmode);
13932
13933 if (!TARGET_64BIT)
13934 {
13935 if (flag_pic && !TARGET_PECOFF)
13936 pic = pic_offset_table_rtx;
13937 else
13938 {
13939 pic = gen_reg_rtx (Pmode);
13940 emit_insn (gen_set_got (pic));
13941 }
13942 }
13943
13944 if (TARGET_GNU2_TLS)
13945 {
13946 if (TARGET_64BIT)
13947 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13948 else
13949 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13950
13951 tp = get_thread_pointer (Pmode, true);
13952 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13953
13954 if (GET_MODE (x) != Pmode)
13955 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13956
13957 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13958 }
13959 else
13960 {
13961 rtx caddr = ix86_tls_get_addr ();
13962
13963 if (TARGET_64BIT)
13964 {
13965 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13966 rtx_insn *insns;
13967
13968 start_sequence ();
13969 emit_call_insn
13970 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13971 insns = get_insns ();
13972 end_sequence ();
13973
13974 if (GET_MODE (x) != Pmode)
13975 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13976
13977 RTL_CONST_CALL_P (insns) = 1;
13978 emit_libcall_block (insns, dest, rax, x);
13979 }
13980 else
13981 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13982 }
13983 break;
13984
13985 case TLS_MODEL_LOCAL_DYNAMIC:
13986 base = gen_reg_rtx (Pmode);
13987
13988 if (!TARGET_64BIT)
13989 {
13990 if (flag_pic)
13991 pic = pic_offset_table_rtx;
13992 else
13993 {
13994 pic = gen_reg_rtx (Pmode);
13995 emit_insn (gen_set_got (pic));
13996 }
13997 }
13998
13999 if (TARGET_GNU2_TLS)
14000 {
14001 rtx tmp = ix86_tls_module_base ();
14002
14003 if (TARGET_64BIT)
14004 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14005 else
14006 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14007
14008 tp = get_thread_pointer (Pmode, true);
14009 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14010 gen_rtx_MINUS (Pmode, tmp, tp));
14011 }
14012 else
14013 {
14014 rtx caddr = ix86_tls_get_addr ();
14015
14016 if (TARGET_64BIT)
14017 {
14018 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14019 rtx_insn *insns;
14020 rtx eqv;
14021
14022 start_sequence ();
14023 emit_call_insn
14024 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14025 insns = get_insns ();
14026 end_sequence ();
14027
14028 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14029 share the LD_BASE result with other LD model accesses. */
14030 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14031 UNSPEC_TLS_LD_BASE);
14032
14033 RTL_CONST_CALL_P (insns) = 1;
14034 emit_libcall_block (insns, base, rax, eqv);
14035 }
14036 else
14037 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14038 }
14039
14040 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14041 off = gen_rtx_CONST (Pmode, off);
14042
14043 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14044
14045 if (TARGET_GNU2_TLS)
14046 {
14047 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14048
14049 if (GET_MODE (x) != Pmode)
14050 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14051
14052 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14053 }
14054 break;
14055
14056 case TLS_MODEL_INITIAL_EXEC:
14057 if (TARGET_64BIT)
14058 {
14059 if (TARGET_SUN_TLS && !TARGET_X32)
14060 {
14061 /* The Sun linker took the AMD64 TLS spec literally
14062 and can only handle %rax as destination of the
14063 initial executable code sequence. */
14064
14065 dest = gen_reg_rtx (DImode);
14066 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14067 return dest;
14068 }
14069
14070 /* Generate DImode references to avoid %fs:(%reg32)
14071 problems and linker IE->LE relaxation bug. */
14072 tp_mode = DImode;
14073 pic = NULL;
14074 type = UNSPEC_GOTNTPOFF;
14075 }
14076 else if (flag_pic)
14077 {
14078 set_pic_reg_ever_live ();
14079 pic = pic_offset_table_rtx;
14080 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14081 }
14082 else if (!TARGET_ANY_GNU_TLS)
14083 {
14084 pic = gen_reg_rtx (Pmode);
14085 emit_insn (gen_set_got (pic));
14086 type = UNSPEC_GOTTPOFF;
14087 }
14088 else
14089 {
14090 pic = NULL;
14091 type = UNSPEC_INDNTPOFF;
14092 }
14093
14094 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14095 off = gen_rtx_CONST (tp_mode, off);
14096 if (pic)
14097 off = gen_rtx_PLUS (tp_mode, pic, off);
14098 off = gen_const_mem (tp_mode, off);
14099 set_mem_alias_set (off, ix86_GOT_alias_set ());
14100
14101 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14102 {
14103 base = get_thread_pointer (tp_mode,
14104 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14105 off = force_reg (tp_mode, off);
14106 return gen_rtx_PLUS (tp_mode, base, off);
14107 }
14108 else
14109 {
14110 base = get_thread_pointer (Pmode, true);
14111 dest = gen_reg_rtx (Pmode);
14112 emit_insn (ix86_gen_sub3 (dest, base, off));
14113 }
14114 break;
14115
14116 case TLS_MODEL_LOCAL_EXEC:
14117 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14118 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14119 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14120 off = gen_rtx_CONST (Pmode, off);
14121
14122 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14123 {
14124 base = get_thread_pointer (Pmode,
14125 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14126 return gen_rtx_PLUS (Pmode, base, off);
14127 }
14128 else
14129 {
14130 base = get_thread_pointer (Pmode, true);
14131 dest = gen_reg_rtx (Pmode);
14132 emit_insn (ix86_gen_sub3 (dest, base, off));
14133 }
14134 break;
14135
14136 default:
14137 gcc_unreachable ();
14138 }
14139
14140 return dest;
14141 }
14142
14143 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14144 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14145 unique refptr-DECL symbol corresponding to symbol DECL. */
14146
14147 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14148 {
14149 static inline hashval_t hash (tree_map *m) { return m->hash; }
14150 static inline bool
14151 equal (tree_map *a, tree_map *b)
14152 {
14153 return a->base.from == b->base.from;
14154 }
14155
14156 static void
14157 handle_cache_entry (tree_map *&m)
14158 {
14159 extern void gt_ggc_mx (tree_map *&);
14160 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14161 return;
14162 else if (ggc_marked_p (m->base.from))
14163 gt_ggc_mx (m);
14164 else
14165 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14166 }
14167 };
14168
14169 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14170
14171 static tree
14172 get_dllimport_decl (tree decl, bool beimport)
14173 {
14174 struct tree_map *h, in;
14175 const char *name;
14176 const char *prefix;
14177 size_t namelen, prefixlen;
14178 char *imp_name;
14179 tree to;
14180 rtx rtl;
14181
14182 if (!dllimport_map)
14183 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14184
14185 in.hash = htab_hash_pointer (decl);
14186 in.base.from = decl;
14187 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14188 h = *loc;
14189 if (h)
14190 return h->to;
14191
14192 *loc = h = ggc_alloc<tree_map> ();
14193 h->hash = in.hash;
14194 h->base.from = decl;
14195 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14196 VAR_DECL, NULL, ptr_type_node);
14197 DECL_ARTIFICIAL (to) = 1;
14198 DECL_IGNORED_P (to) = 1;
14199 DECL_EXTERNAL (to) = 1;
14200 TREE_READONLY (to) = 1;
14201
14202 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14203 name = targetm.strip_name_encoding (name);
14204 if (beimport)
14205 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14206 ? "*__imp_" : "*__imp__";
14207 else
14208 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14209 namelen = strlen (name);
14210 prefixlen = strlen (prefix);
14211 imp_name = (char *) alloca (namelen + prefixlen + 1);
14212 memcpy (imp_name, prefix, prefixlen);
14213 memcpy (imp_name + prefixlen, name, namelen + 1);
14214
14215 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14216 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14217 SET_SYMBOL_REF_DECL (rtl, to);
14218 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14219 if (!beimport)
14220 {
14221 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14222 #ifdef SUB_TARGET_RECORD_STUB
14223 SUB_TARGET_RECORD_STUB (name);
14224 #endif
14225 }
14226
14227 rtl = gen_const_mem (Pmode, rtl);
14228 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14229
14230 SET_DECL_RTL (to, rtl);
14231 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14232
14233 return to;
14234 }
14235
14236 /* Expand SYMBOL into its corresponding far-addresse symbol.
14237 WANT_REG is true if we require the result be a register. */
14238
14239 static rtx
14240 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14241 {
14242 tree imp_decl;
14243 rtx x;
14244
14245 gcc_assert (SYMBOL_REF_DECL (symbol));
14246 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14247
14248 x = DECL_RTL (imp_decl);
14249 if (want_reg)
14250 x = force_reg (Pmode, x);
14251 return x;
14252 }
14253
14254 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14255 true if we require the result be a register. */
14256
14257 static rtx
14258 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14259 {
14260 tree imp_decl;
14261 rtx x;
14262
14263 gcc_assert (SYMBOL_REF_DECL (symbol));
14264 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14265
14266 x = DECL_RTL (imp_decl);
14267 if (want_reg)
14268 x = force_reg (Pmode, x);
14269 return x;
14270 }
14271
14272 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14273 is true if we require the result be a register. */
14274
14275 static rtx
14276 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14277 {
14278 if (!TARGET_PECOFF)
14279 return NULL_RTX;
14280
14281 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14282 {
14283 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14284 return legitimize_dllimport_symbol (addr, inreg);
14285 if (GET_CODE (addr) == CONST
14286 && GET_CODE (XEXP (addr, 0)) == PLUS
14287 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14288 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14289 {
14290 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14291 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14292 }
14293 }
14294
14295 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14296 return NULL_RTX;
14297 if (GET_CODE (addr) == SYMBOL_REF
14298 && !is_imported_p (addr)
14299 && SYMBOL_REF_EXTERNAL_P (addr)
14300 && SYMBOL_REF_DECL (addr))
14301 return legitimize_pe_coff_extern_decl (addr, inreg);
14302
14303 if (GET_CODE (addr) == CONST
14304 && GET_CODE (XEXP (addr, 0)) == PLUS
14305 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14306 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14307 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14308 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14309 {
14310 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14311 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14312 }
14313 return NULL_RTX;
14314 }
14315
14316 /* Try machine-dependent ways of modifying an illegitimate address
14317 to be legitimate. If we find one, return the new, valid address.
14318 This macro is used in only one place: `memory_address' in explow.c.
14319
14320 OLDX is the address as it was before break_out_memory_refs was called.
14321 In some cases it is useful to look at this to decide what needs to be done.
14322
14323 It is always safe for this macro to do nothing. It exists to recognize
14324 opportunities to optimize the output.
14325
14326 For the 80386, we handle X+REG by loading X into a register R and
14327 using R+REG. R will go in a general reg and indexing will be used.
14328 However, if REG is a broken-out memory address or multiplication,
14329 nothing needs to be done because REG can certainly go in a general reg.
14330
14331 When -fpic is used, special handling is needed for symbolic references.
14332 See comments by legitimize_pic_address in i386.c for details. */
14333
14334 static rtx
14335 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14336 {
14337 bool changed = false;
14338 unsigned log;
14339
14340 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14341 if (log)
14342 return legitimize_tls_address (x, (enum tls_model) log, false);
14343 if (GET_CODE (x) == CONST
14344 && GET_CODE (XEXP (x, 0)) == PLUS
14345 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14346 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14347 {
14348 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14349 (enum tls_model) log, false);
14350 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14351 }
14352
14353 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14354 {
14355 rtx tmp = legitimize_pe_coff_symbol (x, true);
14356 if (tmp)
14357 return tmp;
14358 }
14359
14360 if (flag_pic && SYMBOLIC_CONST (x))
14361 return legitimize_pic_address (x, 0);
14362
14363 #if TARGET_MACHO
14364 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14365 return machopic_indirect_data_reference (x, 0);
14366 #endif
14367
14368 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14369 if (GET_CODE (x) == ASHIFT
14370 && CONST_INT_P (XEXP (x, 1))
14371 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14372 {
14373 changed = true;
14374 log = INTVAL (XEXP (x, 1));
14375 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14376 GEN_INT (1 << log));
14377 }
14378
14379 if (GET_CODE (x) == PLUS)
14380 {
14381 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14382
14383 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14384 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14385 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14386 {
14387 changed = true;
14388 log = INTVAL (XEXP (XEXP (x, 0), 1));
14389 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14390 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14391 GEN_INT (1 << log));
14392 }
14393
14394 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14395 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14396 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14397 {
14398 changed = true;
14399 log = INTVAL (XEXP (XEXP (x, 1), 1));
14400 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14401 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14402 GEN_INT (1 << log));
14403 }
14404
14405 /* Put multiply first if it isn't already. */
14406 if (GET_CODE (XEXP (x, 1)) == MULT)
14407 {
14408 std::swap (XEXP (x, 0), XEXP (x, 1));
14409 changed = true;
14410 }
14411
14412 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14413 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14414 created by virtual register instantiation, register elimination, and
14415 similar optimizations. */
14416 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14417 {
14418 changed = true;
14419 x = gen_rtx_PLUS (Pmode,
14420 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14421 XEXP (XEXP (x, 1), 0)),
14422 XEXP (XEXP (x, 1), 1));
14423 }
14424
14425 /* Canonicalize
14426 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14427 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14428 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14429 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14430 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14431 && CONSTANT_P (XEXP (x, 1)))
14432 {
14433 rtx constant;
14434 rtx other = NULL_RTX;
14435
14436 if (CONST_INT_P (XEXP (x, 1)))
14437 {
14438 constant = XEXP (x, 1);
14439 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14440 }
14441 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14442 {
14443 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14444 other = XEXP (x, 1);
14445 }
14446 else
14447 constant = 0;
14448
14449 if (constant)
14450 {
14451 changed = true;
14452 x = gen_rtx_PLUS (Pmode,
14453 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14454 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14455 plus_constant (Pmode, other,
14456 INTVAL (constant)));
14457 }
14458 }
14459
14460 if (changed && ix86_legitimate_address_p (mode, x, false))
14461 return x;
14462
14463 if (GET_CODE (XEXP (x, 0)) == MULT)
14464 {
14465 changed = true;
14466 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14467 }
14468
14469 if (GET_CODE (XEXP (x, 1)) == MULT)
14470 {
14471 changed = true;
14472 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14473 }
14474
14475 if (changed
14476 && REG_P (XEXP (x, 1))
14477 && REG_P (XEXP (x, 0)))
14478 return x;
14479
14480 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14481 {
14482 changed = true;
14483 x = legitimize_pic_address (x, 0);
14484 }
14485
14486 if (changed && ix86_legitimate_address_p (mode, x, false))
14487 return x;
14488
14489 if (REG_P (XEXP (x, 0)))
14490 {
14491 rtx temp = gen_reg_rtx (Pmode);
14492 rtx val = force_operand (XEXP (x, 1), temp);
14493 if (val != temp)
14494 {
14495 val = convert_to_mode (Pmode, val, 1);
14496 emit_move_insn (temp, val);
14497 }
14498
14499 XEXP (x, 1) = temp;
14500 return x;
14501 }
14502
14503 else if (REG_P (XEXP (x, 1)))
14504 {
14505 rtx temp = gen_reg_rtx (Pmode);
14506 rtx val = force_operand (XEXP (x, 0), temp);
14507 if (val != temp)
14508 {
14509 val = convert_to_mode (Pmode, val, 1);
14510 emit_move_insn (temp, val);
14511 }
14512
14513 XEXP (x, 0) = temp;
14514 return x;
14515 }
14516 }
14517
14518 return x;
14519 }
14520 \f
14521 /* Print an integer constant expression in assembler syntax. Addition
14522 and subtraction are the only arithmetic that may appear in these
14523 expressions. FILE is the stdio stream to write to, X is the rtx, and
14524 CODE is the operand print code from the output string. */
14525
14526 static void
14527 output_pic_addr_const (FILE *file, rtx x, int code)
14528 {
14529 char buf[256];
14530
14531 switch (GET_CODE (x))
14532 {
14533 case PC:
14534 gcc_assert (flag_pic);
14535 putc ('.', file);
14536 break;
14537
14538 case SYMBOL_REF:
14539 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14540 output_addr_const (file, x);
14541 else
14542 {
14543 const char *name = XSTR (x, 0);
14544
14545 /* Mark the decl as referenced so that cgraph will
14546 output the function. */
14547 if (SYMBOL_REF_DECL (x))
14548 mark_decl_referenced (SYMBOL_REF_DECL (x));
14549
14550 #if TARGET_MACHO
14551 if (MACHOPIC_INDIRECT
14552 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14553 name = machopic_indirection_name (x, /*stub_p=*/true);
14554 #endif
14555 assemble_name (file, name);
14556 }
14557 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14558 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14559 fputs ("@PLT", file);
14560 break;
14561
14562 case LABEL_REF:
14563 x = XEXP (x, 0);
14564 /* FALLTHRU */
14565 case CODE_LABEL:
14566 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14567 assemble_name (asm_out_file, buf);
14568 break;
14569
14570 case CONST_INT:
14571 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14572 break;
14573
14574 case CONST:
14575 /* This used to output parentheses around the expression,
14576 but that does not work on the 386 (either ATT or BSD assembler). */
14577 output_pic_addr_const (file, XEXP (x, 0), code);
14578 break;
14579
14580 case CONST_DOUBLE:
14581 if (GET_MODE (x) == VOIDmode)
14582 {
14583 /* We can use %d if the number is <32 bits and positive. */
14584 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14585 fprintf (file, "0x%lx%08lx",
14586 (unsigned long) CONST_DOUBLE_HIGH (x),
14587 (unsigned long) CONST_DOUBLE_LOW (x));
14588 else
14589 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14590 }
14591 else
14592 /* We can't handle floating point constants;
14593 TARGET_PRINT_OPERAND must handle them. */
14594 output_operand_lossage ("floating constant misused");
14595 break;
14596
14597 case PLUS:
14598 /* Some assemblers need integer constants to appear first. */
14599 if (CONST_INT_P (XEXP (x, 0)))
14600 {
14601 output_pic_addr_const (file, XEXP (x, 0), code);
14602 putc ('+', file);
14603 output_pic_addr_const (file, XEXP (x, 1), code);
14604 }
14605 else
14606 {
14607 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14608 output_pic_addr_const (file, XEXP (x, 1), code);
14609 putc ('+', file);
14610 output_pic_addr_const (file, XEXP (x, 0), code);
14611 }
14612 break;
14613
14614 case MINUS:
14615 if (!TARGET_MACHO)
14616 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14617 output_pic_addr_const (file, XEXP (x, 0), code);
14618 putc ('-', file);
14619 output_pic_addr_const (file, XEXP (x, 1), code);
14620 if (!TARGET_MACHO)
14621 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14622 break;
14623
14624 case UNSPEC:
14625 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14626 {
14627 bool f = i386_asm_output_addr_const_extra (file, x);
14628 gcc_assert (f);
14629 break;
14630 }
14631
14632 gcc_assert (XVECLEN (x, 0) == 1);
14633 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14634 switch (XINT (x, 1))
14635 {
14636 case UNSPEC_GOT:
14637 fputs ("@GOT", file);
14638 break;
14639 case UNSPEC_GOTOFF:
14640 fputs ("@GOTOFF", file);
14641 break;
14642 case UNSPEC_PLTOFF:
14643 fputs ("@PLTOFF", file);
14644 break;
14645 case UNSPEC_PCREL:
14646 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14647 "(%rip)" : "[rip]", file);
14648 break;
14649 case UNSPEC_GOTPCREL:
14650 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14651 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14652 break;
14653 case UNSPEC_GOTTPOFF:
14654 /* FIXME: This might be @TPOFF in Sun ld too. */
14655 fputs ("@gottpoff", file);
14656 break;
14657 case UNSPEC_TPOFF:
14658 fputs ("@tpoff", file);
14659 break;
14660 case UNSPEC_NTPOFF:
14661 if (TARGET_64BIT)
14662 fputs ("@tpoff", file);
14663 else
14664 fputs ("@ntpoff", file);
14665 break;
14666 case UNSPEC_DTPOFF:
14667 fputs ("@dtpoff", file);
14668 break;
14669 case UNSPEC_GOTNTPOFF:
14670 if (TARGET_64BIT)
14671 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14672 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14673 else
14674 fputs ("@gotntpoff", file);
14675 break;
14676 case UNSPEC_INDNTPOFF:
14677 fputs ("@indntpoff", file);
14678 break;
14679 #if TARGET_MACHO
14680 case UNSPEC_MACHOPIC_OFFSET:
14681 putc ('-', file);
14682 machopic_output_function_base_name (file);
14683 break;
14684 #endif
14685 default:
14686 output_operand_lossage ("invalid UNSPEC as operand");
14687 break;
14688 }
14689 break;
14690
14691 default:
14692 output_operand_lossage ("invalid expression as operand");
14693 }
14694 }
14695
14696 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14697 We need to emit DTP-relative relocations. */
14698
14699 static void ATTRIBUTE_UNUSED
14700 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14701 {
14702 fputs (ASM_LONG, file);
14703 output_addr_const (file, x);
14704 fputs ("@dtpoff", file);
14705 switch (size)
14706 {
14707 case 4:
14708 break;
14709 case 8:
14710 fputs (", 0", file);
14711 break;
14712 default:
14713 gcc_unreachable ();
14714 }
14715 }
14716
14717 /* Return true if X is a representation of the PIC register. This copes
14718 with calls from ix86_find_base_term, where the register might have
14719 been replaced by a cselib value. */
14720
14721 static bool
14722 ix86_pic_register_p (rtx x)
14723 {
14724 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14725 return (pic_offset_table_rtx
14726 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14727 else if (!REG_P (x))
14728 return false;
14729 else if (pic_offset_table_rtx)
14730 {
14731 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14732 return true;
14733 if (HARD_REGISTER_P (x)
14734 && !HARD_REGISTER_P (pic_offset_table_rtx)
14735 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14736 return true;
14737 return false;
14738 }
14739 else
14740 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14741 }
14742
14743 /* Helper function for ix86_delegitimize_address.
14744 Attempt to delegitimize TLS local-exec accesses. */
14745
14746 static rtx
14747 ix86_delegitimize_tls_address (rtx orig_x)
14748 {
14749 rtx x = orig_x, unspec;
14750 struct ix86_address addr;
14751
14752 if (!TARGET_TLS_DIRECT_SEG_REFS)
14753 return orig_x;
14754 if (MEM_P (x))
14755 x = XEXP (x, 0);
14756 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14757 return orig_x;
14758 if (ix86_decompose_address (x, &addr) == 0
14759 || addr.seg != DEFAULT_TLS_SEG_REG
14760 || addr.disp == NULL_RTX
14761 || GET_CODE (addr.disp) != CONST)
14762 return orig_x;
14763 unspec = XEXP (addr.disp, 0);
14764 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14765 unspec = XEXP (unspec, 0);
14766 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14767 return orig_x;
14768 x = XVECEXP (unspec, 0, 0);
14769 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14770 if (unspec != XEXP (addr.disp, 0))
14771 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14772 if (addr.index)
14773 {
14774 rtx idx = addr.index;
14775 if (addr.scale != 1)
14776 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14777 x = gen_rtx_PLUS (Pmode, idx, x);
14778 }
14779 if (addr.base)
14780 x = gen_rtx_PLUS (Pmode, addr.base, x);
14781 if (MEM_P (orig_x))
14782 x = replace_equiv_address_nv (orig_x, x);
14783 return x;
14784 }
14785
14786 /* In the name of slightly smaller debug output, and to cater to
14787 general assembler lossage, recognize PIC+GOTOFF and turn it back
14788 into a direct symbol reference.
14789
14790 On Darwin, this is necessary to avoid a crash, because Darwin
14791 has a different PIC label for each routine but the DWARF debugging
14792 information is not associated with any particular routine, so it's
14793 necessary to remove references to the PIC label from RTL stored by
14794 the DWARF output code. */
14795
14796 static rtx
14797 ix86_delegitimize_address (rtx x)
14798 {
14799 rtx orig_x = delegitimize_mem_from_attrs (x);
14800 /* addend is NULL or some rtx if x is something+GOTOFF where
14801 something doesn't include the PIC register. */
14802 rtx addend = NULL_RTX;
14803 /* reg_addend is NULL or a multiple of some register. */
14804 rtx reg_addend = NULL_RTX;
14805 /* const_addend is NULL or a const_int. */
14806 rtx const_addend = NULL_RTX;
14807 /* This is the result, or NULL. */
14808 rtx result = NULL_RTX;
14809
14810 x = orig_x;
14811
14812 if (MEM_P (x))
14813 x = XEXP (x, 0);
14814
14815 if (TARGET_64BIT)
14816 {
14817 if (GET_CODE (x) == CONST
14818 && GET_CODE (XEXP (x, 0)) == PLUS
14819 && GET_MODE (XEXP (x, 0)) == Pmode
14820 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14821 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14822 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14823 {
14824 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14825 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14826 if (MEM_P (orig_x))
14827 x = replace_equiv_address_nv (orig_x, x);
14828 return x;
14829 }
14830
14831 if (GET_CODE (x) == CONST
14832 && GET_CODE (XEXP (x, 0)) == UNSPEC
14833 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14834 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14835 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14836 {
14837 x = XVECEXP (XEXP (x, 0), 0, 0);
14838 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14839 {
14840 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14841 GET_MODE (x), 0);
14842 if (x == NULL_RTX)
14843 return orig_x;
14844 }
14845 return x;
14846 }
14847
14848 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14849 return ix86_delegitimize_tls_address (orig_x);
14850
14851 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14852 and -mcmodel=medium -fpic. */
14853 }
14854
14855 if (GET_CODE (x) != PLUS
14856 || GET_CODE (XEXP (x, 1)) != CONST)
14857 return ix86_delegitimize_tls_address (orig_x);
14858
14859 if (ix86_pic_register_p (XEXP (x, 0)))
14860 /* %ebx + GOT/GOTOFF */
14861 ;
14862 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14863 {
14864 /* %ebx + %reg * scale + GOT/GOTOFF */
14865 reg_addend = XEXP (x, 0);
14866 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14867 reg_addend = XEXP (reg_addend, 1);
14868 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14869 reg_addend = XEXP (reg_addend, 0);
14870 else
14871 {
14872 reg_addend = NULL_RTX;
14873 addend = XEXP (x, 0);
14874 }
14875 }
14876 else
14877 addend = XEXP (x, 0);
14878
14879 x = XEXP (XEXP (x, 1), 0);
14880 if (GET_CODE (x) == PLUS
14881 && CONST_INT_P (XEXP (x, 1)))
14882 {
14883 const_addend = XEXP (x, 1);
14884 x = XEXP (x, 0);
14885 }
14886
14887 if (GET_CODE (x) == UNSPEC
14888 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14889 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14890 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14891 && !MEM_P (orig_x) && !addend)))
14892 result = XVECEXP (x, 0, 0);
14893
14894 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14895 && !MEM_P (orig_x))
14896 result = XVECEXP (x, 0, 0);
14897
14898 if (! result)
14899 return ix86_delegitimize_tls_address (orig_x);
14900
14901 if (const_addend)
14902 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14903 if (reg_addend)
14904 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14905 if (addend)
14906 {
14907 /* If the rest of original X doesn't involve the PIC register, add
14908 addend and subtract pic_offset_table_rtx. This can happen e.g.
14909 for code like:
14910 leal (%ebx, %ecx, 4), %ecx
14911 ...
14912 movl foo@GOTOFF(%ecx), %edx
14913 in which case we return (%ecx - %ebx) + foo
14914 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14915 and reload has completed. */
14916 if (pic_offset_table_rtx
14917 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14918 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14919 pic_offset_table_rtx),
14920 result);
14921 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14922 {
14923 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14924 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14925 result = gen_rtx_PLUS (Pmode, tmp, result);
14926 }
14927 else
14928 return orig_x;
14929 }
14930 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14931 {
14932 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14933 if (result == NULL_RTX)
14934 return orig_x;
14935 }
14936 return result;
14937 }
14938
14939 /* If X is a machine specific address (i.e. a symbol or label being
14940 referenced as a displacement from the GOT implemented using an
14941 UNSPEC), then return the base term. Otherwise return X. */
14942
14943 rtx
14944 ix86_find_base_term (rtx x)
14945 {
14946 rtx term;
14947
14948 if (TARGET_64BIT)
14949 {
14950 if (GET_CODE (x) != CONST)
14951 return x;
14952 term = XEXP (x, 0);
14953 if (GET_CODE (term) == PLUS
14954 && (CONST_INT_P (XEXP (term, 1))
14955 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14956 term = XEXP (term, 0);
14957 if (GET_CODE (term) != UNSPEC
14958 || (XINT (term, 1) != UNSPEC_GOTPCREL
14959 && XINT (term, 1) != UNSPEC_PCREL))
14960 return x;
14961
14962 return XVECEXP (term, 0, 0);
14963 }
14964
14965 return ix86_delegitimize_address (x);
14966 }
14967 \f
14968 static void
14969 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
14970 bool fp, FILE *file)
14971 {
14972 const char *suffix;
14973
14974 if (mode == CCFPmode || mode == CCFPUmode)
14975 {
14976 code = ix86_fp_compare_code_to_integer (code);
14977 mode = CCmode;
14978 }
14979 if (reverse)
14980 code = reverse_condition (code);
14981
14982 switch (code)
14983 {
14984 case EQ:
14985 switch (mode)
14986 {
14987 case CCAmode:
14988 suffix = "a";
14989 break;
14990
14991 case CCCmode:
14992 suffix = "c";
14993 break;
14994
14995 case CCOmode:
14996 suffix = "o";
14997 break;
14998
14999 case CCSmode:
15000 suffix = "s";
15001 break;
15002
15003 default:
15004 suffix = "e";
15005 }
15006 break;
15007 case NE:
15008 switch (mode)
15009 {
15010 case CCAmode:
15011 suffix = "na";
15012 break;
15013
15014 case CCCmode:
15015 suffix = "nc";
15016 break;
15017
15018 case CCOmode:
15019 suffix = "no";
15020 break;
15021
15022 case CCSmode:
15023 suffix = "ns";
15024 break;
15025
15026 default:
15027 suffix = "ne";
15028 }
15029 break;
15030 case GT:
15031 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15032 suffix = "g";
15033 break;
15034 case GTU:
15035 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15036 Those same assemblers have the same but opposite lossage on cmov. */
15037 if (mode == CCmode)
15038 suffix = fp ? "nbe" : "a";
15039 else
15040 gcc_unreachable ();
15041 break;
15042 case LT:
15043 switch (mode)
15044 {
15045 case CCNOmode:
15046 case CCGOCmode:
15047 suffix = "s";
15048 break;
15049
15050 case CCmode:
15051 case CCGCmode:
15052 suffix = "l";
15053 break;
15054
15055 default:
15056 gcc_unreachable ();
15057 }
15058 break;
15059 case LTU:
15060 if (mode == CCmode)
15061 suffix = "b";
15062 else if (mode == CCCmode)
15063 suffix = fp ? "b" : "c";
15064 else
15065 gcc_unreachable ();
15066 break;
15067 case GE:
15068 switch (mode)
15069 {
15070 case CCNOmode:
15071 case CCGOCmode:
15072 suffix = "ns";
15073 break;
15074
15075 case CCmode:
15076 case CCGCmode:
15077 suffix = "ge";
15078 break;
15079
15080 default:
15081 gcc_unreachable ();
15082 }
15083 break;
15084 case GEU:
15085 if (mode == CCmode)
15086 suffix = "nb";
15087 else if (mode == CCCmode)
15088 suffix = fp ? "nb" : "nc";
15089 else
15090 gcc_unreachable ();
15091 break;
15092 case LE:
15093 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15094 suffix = "le";
15095 break;
15096 case LEU:
15097 if (mode == CCmode)
15098 suffix = "be";
15099 else
15100 gcc_unreachable ();
15101 break;
15102 case UNORDERED:
15103 suffix = fp ? "u" : "p";
15104 break;
15105 case ORDERED:
15106 suffix = fp ? "nu" : "np";
15107 break;
15108 default:
15109 gcc_unreachable ();
15110 }
15111 fputs (suffix, file);
15112 }
15113
15114 /* Print the name of register X to FILE based on its machine mode and number.
15115 If CODE is 'w', pretend the mode is HImode.
15116 If CODE is 'b', pretend the mode is QImode.
15117 If CODE is 'k', pretend the mode is SImode.
15118 If CODE is 'q', pretend the mode is DImode.
15119 If CODE is 'x', pretend the mode is V4SFmode.
15120 If CODE is 't', pretend the mode is V8SFmode.
15121 If CODE is 'g', pretend the mode is V16SFmode.
15122 If CODE is 'h', pretend the reg is the 'high' byte register.
15123 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15124 If CODE is 'd', duplicate the operand for AVX instruction.
15125 */
15126
15127 void
15128 print_reg (rtx x, int code, FILE *file)
15129 {
15130 const char *reg;
15131 unsigned int regno;
15132 bool duplicated = code == 'd' && TARGET_AVX;
15133
15134 if (ASSEMBLER_DIALECT == ASM_ATT)
15135 putc ('%', file);
15136
15137 if (x == pc_rtx)
15138 {
15139 gcc_assert (TARGET_64BIT);
15140 fputs ("rip", file);
15141 return;
15142 }
15143
15144 regno = true_regnum (x);
15145 gcc_assert (regno != ARG_POINTER_REGNUM
15146 && regno != FRAME_POINTER_REGNUM
15147 && regno != FLAGS_REG
15148 && regno != FPSR_REG
15149 && regno != FPCR_REG);
15150
15151 if (code == 'w' || MMX_REG_P (x))
15152 code = 2;
15153 else if (code == 'b')
15154 code = 1;
15155 else if (code == 'k')
15156 code = 4;
15157 else if (code == 'q')
15158 code = 8;
15159 else if (code == 'y')
15160 code = 3;
15161 else if (code == 'h')
15162 code = 0;
15163 else if (code == 'x')
15164 code = 16;
15165 else if (code == 't')
15166 code = 32;
15167 else if (code == 'g')
15168 code = 64;
15169 else
15170 code = GET_MODE_SIZE (GET_MODE (x));
15171
15172 /* Irritatingly, AMD extended registers use different naming convention
15173 from the normal registers: "r%d[bwd]" */
15174 if (REX_INT_REGNO_P (regno))
15175 {
15176 gcc_assert (TARGET_64BIT);
15177 putc ('r', file);
15178 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15179 switch (code)
15180 {
15181 case 0:
15182 error ("extended registers have no high halves");
15183 break;
15184 case 1:
15185 putc ('b', file);
15186 break;
15187 case 2:
15188 putc ('w', file);
15189 break;
15190 case 4:
15191 putc ('d', file);
15192 break;
15193 case 8:
15194 /* no suffix */
15195 break;
15196 default:
15197 error ("unsupported operand size for extended register");
15198 break;
15199 }
15200 return;
15201 }
15202
15203 reg = NULL;
15204 switch (code)
15205 {
15206 case 3:
15207 if (STACK_TOP_P (x))
15208 {
15209 reg = "st(0)";
15210 break;
15211 }
15212 /* FALLTHRU */
15213 case 8:
15214 case 4:
15215 case 12:
15216 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15217 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15218 /* FALLTHRU */
15219 case 16:
15220 case 2:
15221 normal:
15222 reg = hi_reg_name[regno];
15223 break;
15224 case 1:
15225 if (regno >= ARRAY_SIZE (qi_reg_name))
15226 goto normal;
15227 reg = qi_reg_name[regno];
15228 break;
15229 case 0:
15230 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15231 goto normal;
15232 reg = qi_high_reg_name[regno];
15233 break;
15234 case 32:
15235 if (SSE_REG_P (x))
15236 {
15237 gcc_assert (!duplicated);
15238 putc ('y', file);
15239 fputs (hi_reg_name[regno] + 1, file);
15240 return;
15241 }
15242 case 64:
15243 if (SSE_REG_P (x))
15244 {
15245 gcc_assert (!duplicated);
15246 putc ('z', file);
15247 fputs (hi_reg_name[REGNO (x)] + 1, file);
15248 return;
15249 }
15250 break;
15251 default:
15252 gcc_unreachable ();
15253 }
15254
15255 fputs (reg, file);
15256 if (duplicated)
15257 {
15258 if (ASSEMBLER_DIALECT == ASM_ATT)
15259 fprintf (file, ", %%%s", reg);
15260 else
15261 fprintf (file, ", %s", reg);
15262 }
15263 }
15264
15265 /* Meaning of CODE:
15266 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15267 C -- print opcode suffix for set/cmov insn.
15268 c -- like C, but print reversed condition
15269 F,f -- likewise, but for floating-point.
15270 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15271 otherwise nothing
15272 R -- print embeded rounding and sae.
15273 r -- print only sae.
15274 z -- print the opcode suffix for the size of the current operand.
15275 Z -- likewise, with special suffixes for x87 instructions.
15276 * -- print a star (in certain assembler syntax)
15277 A -- print an absolute memory reference.
15278 E -- print address with DImode register names if TARGET_64BIT.
15279 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15280 s -- print a shift double count, followed by the assemblers argument
15281 delimiter.
15282 b -- print the QImode name of the register for the indicated operand.
15283 %b0 would print %al if operands[0] is reg 0.
15284 w -- likewise, print the HImode name of the register.
15285 k -- likewise, print the SImode name of the register.
15286 q -- likewise, print the DImode name of the register.
15287 x -- likewise, print the V4SFmode name of the register.
15288 t -- likewise, print the V8SFmode name of the register.
15289 g -- likewise, print the V16SFmode name of the register.
15290 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15291 y -- print "st(0)" instead of "st" as a register.
15292 d -- print duplicated register operand for AVX instruction.
15293 D -- print condition for SSE cmp instruction.
15294 P -- if PIC, print an @PLT suffix.
15295 p -- print raw symbol name.
15296 X -- don't print any sort of PIC '@' suffix for a symbol.
15297 & -- print some in-use local-dynamic symbol name.
15298 H -- print a memory address offset by 8; used for sse high-parts
15299 Y -- print condition for XOP pcom* instruction.
15300 + -- print a branch hint as 'cs' or 'ds' prefix
15301 ; -- print a semicolon (after prefixes due to bug in older gas).
15302 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15303 @ -- print a segment register of thread base pointer load
15304 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15305 ! -- print MPX prefix for jxx/call/ret instructions if required.
15306 */
15307
15308 void
15309 ix86_print_operand (FILE *file, rtx x, int code)
15310 {
15311 if (code)
15312 {
15313 switch (code)
15314 {
15315 case 'A':
15316 switch (ASSEMBLER_DIALECT)
15317 {
15318 case ASM_ATT:
15319 putc ('*', file);
15320 break;
15321
15322 case ASM_INTEL:
15323 /* Intel syntax. For absolute addresses, registers should not
15324 be surrounded by braces. */
15325 if (!REG_P (x))
15326 {
15327 putc ('[', file);
15328 ix86_print_operand (file, x, 0);
15329 putc (']', file);
15330 return;
15331 }
15332 break;
15333
15334 default:
15335 gcc_unreachable ();
15336 }
15337
15338 ix86_print_operand (file, x, 0);
15339 return;
15340
15341 case 'E':
15342 /* Wrap address in an UNSPEC to declare special handling. */
15343 if (TARGET_64BIT)
15344 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15345
15346 output_address (x);
15347 return;
15348
15349 case 'L':
15350 if (ASSEMBLER_DIALECT == ASM_ATT)
15351 putc ('l', file);
15352 return;
15353
15354 case 'W':
15355 if (ASSEMBLER_DIALECT == ASM_ATT)
15356 putc ('w', file);
15357 return;
15358
15359 case 'B':
15360 if (ASSEMBLER_DIALECT == ASM_ATT)
15361 putc ('b', file);
15362 return;
15363
15364 case 'Q':
15365 if (ASSEMBLER_DIALECT == ASM_ATT)
15366 putc ('l', file);
15367 return;
15368
15369 case 'S':
15370 if (ASSEMBLER_DIALECT == ASM_ATT)
15371 putc ('s', file);
15372 return;
15373
15374 case 'T':
15375 if (ASSEMBLER_DIALECT == ASM_ATT)
15376 putc ('t', file);
15377 return;
15378
15379 case 'O':
15380 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15381 if (ASSEMBLER_DIALECT != ASM_ATT)
15382 return;
15383
15384 switch (GET_MODE_SIZE (GET_MODE (x)))
15385 {
15386 case 2:
15387 putc ('w', file);
15388 break;
15389
15390 case 4:
15391 putc ('l', file);
15392 break;
15393
15394 case 8:
15395 putc ('q', file);
15396 break;
15397
15398 default:
15399 output_operand_lossage
15400 ("invalid operand size for operand code 'O'");
15401 return;
15402 }
15403
15404 putc ('.', file);
15405 #endif
15406 return;
15407
15408 case 'z':
15409 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15410 {
15411 /* Opcodes don't get size suffixes if using Intel opcodes. */
15412 if (ASSEMBLER_DIALECT == ASM_INTEL)
15413 return;
15414
15415 switch (GET_MODE_SIZE (GET_MODE (x)))
15416 {
15417 case 1:
15418 putc ('b', file);
15419 return;
15420
15421 case 2:
15422 putc ('w', file);
15423 return;
15424
15425 case 4:
15426 putc ('l', file);
15427 return;
15428
15429 case 8:
15430 putc ('q', file);
15431 return;
15432
15433 default:
15434 output_operand_lossage
15435 ("invalid operand size for operand code 'z'");
15436 return;
15437 }
15438 }
15439
15440 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15441 warning
15442 (0, "non-integer operand used with operand code 'z'");
15443 /* FALLTHRU */
15444
15445 case 'Z':
15446 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15447 if (ASSEMBLER_DIALECT == ASM_INTEL)
15448 return;
15449
15450 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15451 {
15452 switch (GET_MODE_SIZE (GET_MODE (x)))
15453 {
15454 case 2:
15455 #ifdef HAVE_AS_IX86_FILDS
15456 putc ('s', file);
15457 #endif
15458 return;
15459
15460 case 4:
15461 putc ('l', file);
15462 return;
15463
15464 case 8:
15465 #ifdef HAVE_AS_IX86_FILDQ
15466 putc ('q', file);
15467 #else
15468 fputs ("ll", file);
15469 #endif
15470 return;
15471
15472 default:
15473 break;
15474 }
15475 }
15476 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15477 {
15478 /* 387 opcodes don't get size suffixes
15479 if the operands are registers. */
15480 if (STACK_REG_P (x))
15481 return;
15482
15483 switch (GET_MODE_SIZE (GET_MODE (x)))
15484 {
15485 case 4:
15486 putc ('s', file);
15487 return;
15488
15489 case 8:
15490 putc ('l', file);
15491 return;
15492
15493 case 12:
15494 case 16:
15495 putc ('t', file);
15496 return;
15497
15498 default:
15499 break;
15500 }
15501 }
15502 else
15503 {
15504 output_operand_lossage
15505 ("invalid operand type used with operand code 'Z'");
15506 return;
15507 }
15508
15509 output_operand_lossage
15510 ("invalid operand size for operand code 'Z'");
15511 return;
15512
15513 case 'd':
15514 case 'b':
15515 case 'w':
15516 case 'k':
15517 case 'q':
15518 case 'h':
15519 case 't':
15520 case 'g':
15521 case 'y':
15522 case 'x':
15523 case 'X':
15524 case 'P':
15525 case 'p':
15526 break;
15527
15528 case 's':
15529 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15530 {
15531 ix86_print_operand (file, x, 0);
15532 fputs (", ", file);
15533 }
15534 return;
15535
15536 case 'Y':
15537 switch (GET_CODE (x))
15538 {
15539 case NE:
15540 fputs ("neq", file);
15541 break;
15542 case EQ:
15543 fputs ("eq", file);
15544 break;
15545 case GE:
15546 case GEU:
15547 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15548 break;
15549 case GT:
15550 case GTU:
15551 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15552 break;
15553 case LE:
15554 case LEU:
15555 fputs ("le", file);
15556 break;
15557 case LT:
15558 case LTU:
15559 fputs ("lt", file);
15560 break;
15561 case UNORDERED:
15562 fputs ("unord", file);
15563 break;
15564 case ORDERED:
15565 fputs ("ord", file);
15566 break;
15567 case UNEQ:
15568 fputs ("ueq", file);
15569 break;
15570 case UNGE:
15571 fputs ("nlt", file);
15572 break;
15573 case UNGT:
15574 fputs ("nle", file);
15575 break;
15576 case UNLE:
15577 fputs ("ule", file);
15578 break;
15579 case UNLT:
15580 fputs ("ult", file);
15581 break;
15582 case LTGT:
15583 fputs ("une", file);
15584 break;
15585 default:
15586 output_operand_lossage ("operand is not a condition code, "
15587 "invalid operand code 'Y'");
15588 return;
15589 }
15590 return;
15591
15592 case 'D':
15593 /* Little bit of braindamage here. The SSE compare instructions
15594 does use completely different names for the comparisons that the
15595 fp conditional moves. */
15596 switch (GET_CODE (x))
15597 {
15598 case UNEQ:
15599 if (TARGET_AVX)
15600 {
15601 fputs ("eq_us", file);
15602 break;
15603 }
15604 case EQ:
15605 fputs ("eq", file);
15606 break;
15607 case UNLT:
15608 if (TARGET_AVX)
15609 {
15610 fputs ("nge", file);
15611 break;
15612 }
15613 case LT:
15614 fputs ("lt", file);
15615 break;
15616 case UNLE:
15617 if (TARGET_AVX)
15618 {
15619 fputs ("ngt", file);
15620 break;
15621 }
15622 case LE:
15623 fputs ("le", file);
15624 break;
15625 case UNORDERED:
15626 fputs ("unord", file);
15627 break;
15628 case LTGT:
15629 if (TARGET_AVX)
15630 {
15631 fputs ("neq_oq", file);
15632 break;
15633 }
15634 case NE:
15635 fputs ("neq", file);
15636 break;
15637 case GE:
15638 if (TARGET_AVX)
15639 {
15640 fputs ("ge", file);
15641 break;
15642 }
15643 case UNGE:
15644 fputs ("nlt", file);
15645 break;
15646 case GT:
15647 if (TARGET_AVX)
15648 {
15649 fputs ("gt", file);
15650 break;
15651 }
15652 case UNGT:
15653 fputs ("nle", file);
15654 break;
15655 case ORDERED:
15656 fputs ("ord", file);
15657 break;
15658 default:
15659 output_operand_lossage ("operand is not a condition code, "
15660 "invalid operand code 'D'");
15661 return;
15662 }
15663 return;
15664
15665 case 'F':
15666 case 'f':
15667 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15668 if (ASSEMBLER_DIALECT == ASM_ATT)
15669 putc ('.', file);
15670 #endif
15671
15672 case 'C':
15673 case 'c':
15674 if (!COMPARISON_P (x))
15675 {
15676 output_operand_lossage ("operand is not a condition code, "
15677 "invalid operand code '%c'", code);
15678 return;
15679 }
15680 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15681 code == 'c' || code == 'f',
15682 code == 'F' || code == 'f',
15683 file);
15684 return;
15685
15686 case 'H':
15687 if (!offsettable_memref_p (x))
15688 {
15689 output_operand_lossage ("operand is not an offsettable memory "
15690 "reference, invalid operand code 'H'");
15691 return;
15692 }
15693 /* It doesn't actually matter what mode we use here, as we're
15694 only going to use this for printing. */
15695 x = adjust_address_nv (x, DImode, 8);
15696 /* Output 'qword ptr' for intel assembler dialect. */
15697 if (ASSEMBLER_DIALECT == ASM_INTEL)
15698 code = 'q';
15699 break;
15700
15701 case 'K':
15702 gcc_assert (CONST_INT_P (x));
15703
15704 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15705 #ifdef HAVE_AS_IX86_HLE
15706 fputs ("xacquire ", file);
15707 #else
15708 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15709 #endif
15710 else if (INTVAL (x) & IX86_HLE_RELEASE)
15711 #ifdef HAVE_AS_IX86_HLE
15712 fputs ("xrelease ", file);
15713 #else
15714 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15715 #endif
15716 /* We do not want to print value of the operand. */
15717 return;
15718
15719 case 'N':
15720 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15721 fputs ("{z}", file);
15722 return;
15723
15724 case 'r':
15725 gcc_assert (CONST_INT_P (x));
15726 gcc_assert (INTVAL (x) == ROUND_SAE);
15727
15728 if (ASSEMBLER_DIALECT == ASM_INTEL)
15729 fputs (", ", file);
15730
15731 fputs ("{sae}", file);
15732
15733 if (ASSEMBLER_DIALECT == ASM_ATT)
15734 fputs (", ", file);
15735
15736 return;
15737
15738 case 'R':
15739 gcc_assert (CONST_INT_P (x));
15740
15741 if (ASSEMBLER_DIALECT == ASM_INTEL)
15742 fputs (", ", file);
15743
15744 switch (INTVAL (x))
15745 {
15746 case ROUND_NEAREST_INT | ROUND_SAE:
15747 fputs ("{rn-sae}", file);
15748 break;
15749 case ROUND_NEG_INF | ROUND_SAE:
15750 fputs ("{rd-sae}", file);
15751 break;
15752 case ROUND_POS_INF | ROUND_SAE:
15753 fputs ("{ru-sae}", file);
15754 break;
15755 case ROUND_ZERO | ROUND_SAE:
15756 fputs ("{rz-sae}", file);
15757 break;
15758 default:
15759 gcc_unreachable ();
15760 }
15761
15762 if (ASSEMBLER_DIALECT == ASM_ATT)
15763 fputs (", ", file);
15764
15765 return;
15766
15767 case '*':
15768 if (ASSEMBLER_DIALECT == ASM_ATT)
15769 putc ('*', file);
15770 return;
15771
15772 case '&':
15773 {
15774 const char *name = get_some_local_dynamic_name ();
15775 if (name == NULL)
15776 output_operand_lossage ("'%%&' used without any "
15777 "local dynamic TLS references");
15778 else
15779 assemble_name (file, name);
15780 return;
15781 }
15782
15783 case '+':
15784 {
15785 rtx x;
15786
15787 if (!optimize
15788 || optimize_function_for_size_p (cfun)
15789 || !TARGET_BRANCH_PREDICTION_HINTS)
15790 return;
15791
15792 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15793 if (x)
15794 {
15795 int pred_val = XINT (x, 0);
15796
15797 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15798 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15799 {
15800 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15801 bool cputaken
15802 = final_forward_branch_p (current_output_insn) == 0;
15803
15804 /* Emit hints only in the case default branch prediction
15805 heuristics would fail. */
15806 if (taken != cputaken)
15807 {
15808 /* We use 3e (DS) prefix for taken branches and
15809 2e (CS) prefix for not taken branches. */
15810 if (taken)
15811 fputs ("ds ; ", file);
15812 else
15813 fputs ("cs ; ", file);
15814 }
15815 }
15816 }
15817 return;
15818 }
15819
15820 case ';':
15821 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15822 putc (';', file);
15823 #endif
15824 return;
15825
15826 case '@':
15827 if (ASSEMBLER_DIALECT == ASM_ATT)
15828 putc ('%', file);
15829
15830 /* The kernel uses a different segment register for performance
15831 reasons; a system call would not have to trash the userspace
15832 segment register, which would be expensive. */
15833 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15834 fputs ("fs", file);
15835 else
15836 fputs ("gs", file);
15837 return;
15838
15839 case '~':
15840 putc (TARGET_AVX2 ? 'i' : 'f', file);
15841 return;
15842
15843 case '^':
15844 if (TARGET_64BIT && Pmode != word_mode)
15845 fputs ("addr32 ", file);
15846 return;
15847
15848 case '!':
15849 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15850 fputs ("bnd ", file);
15851 return;
15852
15853 default:
15854 output_operand_lossage ("invalid operand code '%c'", code);
15855 }
15856 }
15857
15858 if (REG_P (x))
15859 print_reg (x, code, file);
15860
15861 else if (MEM_P (x))
15862 {
15863 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15864 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15865 && GET_MODE (x) != BLKmode)
15866 {
15867 const char * size;
15868 switch (GET_MODE_SIZE (GET_MODE (x)))
15869 {
15870 case 1: size = "BYTE"; break;
15871 case 2: size = "WORD"; break;
15872 case 4: size = "DWORD"; break;
15873 case 8: size = "QWORD"; break;
15874 case 12: size = "TBYTE"; break;
15875 case 16:
15876 if (GET_MODE (x) == XFmode)
15877 size = "TBYTE";
15878 else
15879 size = "XMMWORD";
15880 break;
15881 case 32: size = "YMMWORD"; break;
15882 case 64: size = "ZMMWORD"; break;
15883 default:
15884 gcc_unreachable ();
15885 }
15886
15887 /* Check for explicit size override (codes 'b', 'w', 'k',
15888 'q' and 'x') */
15889 if (code == 'b')
15890 size = "BYTE";
15891 else if (code == 'w')
15892 size = "WORD";
15893 else if (code == 'k')
15894 size = "DWORD";
15895 else if (code == 'q')
15896 size = "QWORD";
15897 else if (code == 'x')
15898 size = "XMMWORD";
15899
15900 fputs (size, file);
15901 fputs (" PTR ", file);
15902 }
15903
15904 x = XEXP (x, 0);
15905 /* Avoid (%rip) for call operands. */
15906 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15907 && !CONST_INT_P (x))
15908 output_addr_const (file, x);
15909 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15910 output_operand_lossage ("invalid constraints for operand");
15911 else
15912 output_address (x);
15913 }
15914
15915 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15916 {
15917 REAL_VALUE_TYPE r;
15918 long l;
15919
15920 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15921 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15922
15923 if (ASSEMBLER_DIALECT == ASM_ATT)
15924 putc ('$', file);
15925 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15926 if (code == 'q')
15927 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15928 (unsigned long long) (int) l);
15929 else
15930 fprintf (file, "0x%08x", (unsigned int) l);
15931 }
15932
15933 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15934 {
15935 REAL_VALUE_TYPE r;
15936 long l[2];
15937
15938 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15939 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15940
15941 if (ASSEMBLER_DIALECT == ASM_ATT)
15942 putc ('$', file);
15943 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15944 }
15945
15946 /* These float cases don't actually occur as immediate operands. */
15947 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15948 {
15949 char dstr[30];
15950
15951 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15952 fputs (dstr, file);
15953 }
15954
15955 else
15956 {
15957 /* We have patterns that allow zero sets of memory, for instance.
15958 In 64-bit mode, we should probably support all 8-byte vectors,
15959 since we can in fact encode that into an immediate. */
15960 if (GET_CODE (x) == CONST_VECTOR)
15961 {
15962 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15963 x = const0_rtx;
15964 }
15965
15966 if (code != 'P' && code != 'p')
15967 {
15968 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15969 {
15970 if (ASSEMBLER_DIALECT == ASM_ATT)
15971 putc ('$', file);
15972 }
15973 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15974 || GET_CODE (x) == LABEL_REF)
15975 {
15976 if (ASSEMBLER_DIALECT == ASM_ATT)
15977 putc ('$', file);
15978 else
15979 fputs ("OFFSET FLAT:", file);
15980 }
15981 }
15982 if (CONST_INT_P (x))
15983 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15984 else if (flag_pic || MACHOPIC_INDIRECT)
15985 output_pic_addr_const (file, x, code);
15986 else
15987 output_addr_const (file, x);
15988 }
15989 }
15990
15991 static bool
15992 ix86_print_operand_punct_valid_p (unsigned char code)
15993 {
15994 return (code == '@' || code == '*' || code == '+' || code == '&'
15995 || code == ';' || code == '~' || code == '^' || code == '!');
15996 }
15997 \f
15998 /* Print a memory operand whose address is ADDR. */
15999
16000 static void
16001 ix86_print_operand_address (FILE *file, rtx addr)
16002 {
16003 struct ix86_address parts;
16004 rtx base, index, disp;
16005 int scale;
16006 int ok;
16007 bool vsib = false;
16008 int code = 0;
16009
16010 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16011 {
16012 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16013 gcc_assert (parts.index == NULL_RTX);
16014 parts.index = XVECEXP (addr, 0, 1);
16015 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16016 addr = XVECEXP (addr, 0, 0);
16017 vsib = true;
16018 }
16019 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16020 {
16021 gcc_assert (TARGET_64BIT);
16022 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16023 code = 'q';
16024 }
16025 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16026 {
16027 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16028 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16029 if (parts.base != NULL_RTX)
16030 {
16031 parts.index = parts.base;
16032 parts.scale = 1;
16033 }
16034 parts.base = XVECEXP (addr, 0, 0);
16035 addr = XVECEXP (addr, 0, 0);
16036 }
16037 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16038 {
16039 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16040 gcc_assert (parts.index == NULL_RTX);
16041 parts.index = XVECEXP (addr, 0, 1);
16042 addr = XVECEXP (addr, 0, 0);
16043 }
16044 else
16045 ok = ix86_decompose_address (addr, &parts);
16046
16047 gcc_assert (ok);
16048
16049 base = parts.base;
16050 index = parts.index;
16051 disp = parts.disp;
16052 scale = parts.scale;
16053
16054 switch (parts.seg)
16055 {
16056 case SEG_DEFAULT:
16057 break;
16058 case SEG_FS:
16059 case SEG_GS:
16060 if (ASSEMBLER_DIALECT == ASM_ATT)
16061 putc ('%', file);
16062 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16063 break;
16064 default:
16065 gcc_unreachable ();
16066 }
16067
16068 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16069 if (TARGET_64BIT && !base && !index)
16070 {
16071 rtx symbol = disp;
16072
16073 if (GET_CODE (disp) == CONST
16074 && GET_CODE (XEXP (disp, 0)) == PLUS
16075 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16076 symbol = XEXP (XEXP (disp, 0), 0);
16077
16078 if (GET_CODE (symbol) == LABEL_REF
16079 || (GET_CODE (symbol) == SYMBOL_REF
16080 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16081 base = pc_rtx;
16082 }
16083 if (!base && !index)
16084 {
16085 /* Displacement only requires special attention. */
16086
16087 if (CONST_INT_P (disp))
16088 {
16089 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16090 fputs ("ds:", file);
16091 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16092 }
16093 else if (flag_pic)
16094 output_pic_addr_const (file, disp, 0);
16095 else
16096 output_addr_const (file, disp);
16097 }
16098 else
16099 {
16100 /* Print SImode register names to force addr32 prefix. */
16101 if (SImode_address_operand (addr, VOIDmode))
16102 {
16103 #ifdef ENABLE_CHECKING
16104 gcc_assert (TARGET_64BIT);
16105 switch (GET_CODE (addr))
16106 {
16107 case SUBREG:
16108 gcc_assert (GET_MODE (addr) == SImode);
16109 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16110 break;
16111 case ZERO_EXTEND:
16112 case AND:
16113 gcc_assert (GET_MODE (addr) == DImode);
16114 break;
16115 default:
16116 gcc_unreachable ();
16117 }
16118 #endif
16119 gcc_assert (!code);
16120 code = 'k';
16121 }
16122 else if (code == 0
16123 && TARGET_X32
16124 && disp
16125 && CONST_INT_P (disp)
16126 && INTVAL (disp) < -16*1024*1024)
16127 {
16128 /* X32 runs in 64-bit mode, where displacement, DISP, in
16129 address DISP(%r64), is encoded as 32-bit immediate sign-
16130 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16131 address is %r64 + 0xffffffffbffffd00. When %r64 <
16132 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16133 which is invalid for x32. The correct address is %r64
16134 - 0x40000300 == 0xf7ffdd64. To properly encode
16135 -0x40000300(%r64) for x32, we zero-extend negative
16136 displacement by forcing addr32 prefix which truncates
16137 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16138 zero-extend all negative displacements, including -1(%rsp).
16139 However, for small negative displacements, sign-extension
16140 won't cause overflow. We only zero-extend negative
16141 displacements if they < -16*1024*1024, which is also used
16142 to check legitimate address displacements for PIC. */
16143 code = 'k';
16144 }
16145
16146 if (ASSEMBLER_DIALECT == ASM_ATT)
16147 {
16148 if (disp)
16149 {
16150 if (flag_pic)
16151 output_pic_addr_const (file, disp, 0);
16152 else if (GET_CODE (disp) == LABEL_REF)
16153 output_asm_label (disp);
16154 else
16155 output_addr_const (file, disp);
16156 }
16157
16158 putc ('(', file);
16159 if (base)
16160 print_reg (base, code, file);
16161 if (index)
16162 {
16163 putc (',', file);
16164 print_reg (index, vsib ? 0 : code, file);
16165 if (scale != 1 || vsib)
16166 fprintf (file, ",%d", scale);
16167 }
16168 putc (')', file);
16169 }
16170 else
16171 {
16172 rtx offset = NULL_RTX;
16173
16174 if (disp)
16175 {
16176 /* Pull out the offset of a symbol; print any symbol itself. */
16177 if (GET_CODE (disp) == CONST
16178 && GET_CODE (XEXP (disp, 0)) == PLUS
16179 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16180 {
16181 offset = XEXP (XEXP (disp, 0), 1);
16182 disp = gen_rtx_CONST (VOIDmode,
16183 XEXP (XEXP (disp, 0), 0));
16184 }
16185
16186 if (flag_pic)
16187 output_pic_addr_const (file, disp, 0);
16188 else if (GET_CODE (disp) == LABEL_REF)
16189 output_asm_label (disp);
16190 else if (CONST_INT_P (disp))
16191 offset = disp;
16192 else
16193 output_addr_const (file, disp);
16194 }
16195
16196 putc ('[', file);
16197 if (base)
16198 {
16199 print_reg (base, code, file);
16200 if (offset)
16201 {
16202 if (INTVAL (offset) >= 0)
16203 putc ('+', file);
16204 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16205 }
16206 }
16207 else if (offset)
16208 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16209 else
16210 putc ('0', file);
16211
16212 if (index)
16213 {
16214 putc ('+', file);
16215 print_reg (index, vsib ? 0 : code, file);
16216 if (scale != 1 || vsib)
16217 fprintf (file, "*%d", scale);
16218 }
16219 putc (']', file);
16220 }
16221 }
16222 }
16223
16224 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16225
16226 static bool
16227 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16228 {
16229 rtx op;
16230
16231 if (GET_CODE (x) != UNSPEC)
16232 return false;
16233
16234 op = XVECEXP (x, 0, 0);
16235 switch (XINT (x, 1))
16236 {
16237 case UNSPEC_GOTTPOFF:
16238 output_addr_const (file, op);
16239 /* FIXME: This might be @TPOFF in Sun ld. */
16240 fputs ("@gottpoff", file);
16241 break;
16242 case UNSPEC_TPOFF:
16243 output_addr_const (file, op);
16244 fputs ("@tpoff", file);
16245 break;
16246 case UNSPEC_NTPOFF:
16247 output_addr_const (file, op);
16248 if (TARGET_64BIT)
16249 fputs ("@tpoff", file);
16250 else
16251 fputs ("@ntpoff", file);
16252 break;
16253 case UNSPEC_DTPOFF:
16254 output_addr_const (file, op);
16255 fputs ("@dtpoff", file);
16256 break;
16257 case UNSPEC_GOTNTPOFF:
16258 output_addr_const (file, op);
16259 if (TARGET_64BIT)
16260 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16261 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16262 else
16263 fputs ("@gotntpoff", file);
16264 break;
16265 case UNSPEC_INDNTPOFF:
16266 output_addr_const (file, op);
16267 fputs ("@indntpoff", file);
16268 break;
16269 #if TARGET_MACHO
16270 case UNSPEC_MACHOPIC_OFFSET:
16271 output_addr_const (file, op);
16272 putc ('-', file);
16273 machopic_output_function_base_name (file);
16274 break;
16275 #endif
16276
16277 case UNSPEC_STACK_CHECK:
16278 {
16279 int offset;
16280
16281 gcc_assert (flag_split_stack);
16282
16283 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16284 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16285 #else
16286 gcc_unreachable ();
16287 #endif
16288
16289 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16290 }
16291 break;
16292
16293 default:
16294 return false;
16295 }
16296
16297 return true;
16298 }
16299 \f
16300 /* Split one or more double-mode RTL references into pairs of half-mode
16301 references. The RTL can be REG, offsettable MEM, integer constant, or
16302 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16303 split and "num" is its length. lo_half and hi_half are output arrays
16304 that parallel "operands". */
16305
16306 void
16307 split_double_mode (machine_mode mode, rtx operands[],
16308 int num, rtx lo_half[], rtx hi_half[])
16309 {
16310 machine_mode half_mode;
16311 unsigned int byte;
16312
16313 switch (mode)
16314 {
16315 case TImode:
16316 half_mode = DImode;
16317 break;
16318 case DImode:
16319 half_mode = SImode;
16320 break;
16321 default:
16322 gcc_unreachable ();
16323 }
16324
16325 byte = GET_MODE_SIZE (half_mode);
16326
16327 while (num--)
16328 {
16329 rtx op = operands[num];
16330
16331 /* simplify_subreg refuse to split volatile memory addresses,
16332 but we still have to handle it. */
16333 if (MEM_P (op))
16334 {
16335 lo_half[num] = adjust_address (op, half_mode, 0);
16336 hi_half[num] = adjust_address (op, half_mode, byte);
16337 }
16338 else
16339 {
16340 lo_half[num] = simplify_gen_subreg (half_mode, op,
16341 GET_MODE (op) == VOIDmode
16342 ? mode : GET_MODE (op), 0);
16343 hi_half[num] = simplify_gen_subreg (half_mode, op,
16344 GET_MODE (op) == VOIDmode
16345 ? mode : GET_MODE (op), byte);
16346 }
16347 }
16348 }
16349 \f
16350 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16351 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16352 is the expression of the binary operation. The output may either be
16353 emitted here, or returned to the caller, like all output_* functions.
16354
16355 There is no guarantee that the operands are the same mode, as they
16356 might be within FLOAT or FLOAT_EXTEND expressions. */
16357
16358 #ifndef SYSV386_COMPAT
16359 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16360 wants to fix the assemblers because that causes incompatibility
16361 with gcc. No-one wants to fix gcc because that causes
16362 incompatibility with assemblers... You can use the option of
16363 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16364 #define SYSV386_COMPAT 1
16365 #endif
16366
16367 const char *
16368 output_387_binary_op (rtx insn, rtx *operands)
16369 {
16370 static char buf[40];
16371 const char *p;
16372 const char *ssep;
16373 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16374
16375 #ifdef ENABLE_CHECKING
16376 /* Even if we do not want to check the inputs, this documents input
16377 constraints. Which helps in understanding the following code. */
16378 if (STACK_REG_P (operands[0])
16379 && ((REG_P (operands[1])
16380 && REGNO (operands[0]) == REGNO (operands[1])
16381 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16382 || (REG_P (operands[2])
16383 && REGNO (operands[0]) == REGNO (operands[2])
16384 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16385 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16386 ; /* ok */
16387 else
16388 gcc_assert (is_sse);
16389 #endif
16390
16391 switch (GET_CODE (operands[3]))
16392 {
16393 case PLUS:
16394 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16395 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16396 p = "fiadd";
16397 else
16398 p = "fadd";
16399 ssep = "vadd";
16400 break;
16401
16402 case MINUS:
16403 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16404 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16405 p = "fisub";
16406 else
16407 p = "fsub";
16408 ssep = "vsub";
16409 break;
16410
16411 case MULT:
16412 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16413 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16414 p = "fimul";
16415 else
16416 p = "fmul";
16417 ssep = "vmul";
16418 break;
16419
16420 case DIV:
16421 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16422 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16423 p = "fidiv";
16424 else
16425 p = "fdiv";
16426 ssep = "vdiv";
16427 break;
16428
16429 default:
16430 gcc_unreachable ();
16431 }
16432
16433 if (is_sse)
16434 {
16435 if (TARGET_AVX)
16436 {
16437 strcpy (buf, ssep);
16438 if (GET_MODE (operands[0]) == SFmode)
16439 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16440 else
16441 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16442 }
16443 else
16444 {
16445 strcpy (buf, ssep + 1);
16446 if (GET_MODE (operands[0]) == SFmode)
16447 strcat (buf, "ss\t{%2, %0|%0, %2}");
16448 else
16449 strcat (buf, "sd\t{%2, %0|%0, %2}");
16450 }
16451 return buf;
16452 }
16453 strcpy (buf, p);
16454
16455 switch (GET_CODE (operands[3]))
16456 {
16457 case MULT:
16458 case PLUS:
16459 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16460 std::swap (operands[1], operands[2]);
16461
16462 /* know operands[0] == operands[1]. */
16463
16464 if (MEM_P (operands[2]))
16465 {
16466 p = "%Z2\t%2";
16467 break;
16468 }
16469
16470 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16471 {
16472 if (STACK_TOP_P (operands[0]))
16473 /* How is it that we are storing to a dead operand[2]?
16474 Well, presumably operands[1] is dead too. We can't
16475 store the result to st(0) as st(0) gets popped on this
16476 instruction. Instead store to operands[2] (which I
16477 think has to be st(1)). st(1) will be popped later.
16478 gcc <= 2.8.1 didn't have this check and generated
16479 assembly code that the Unixware assembler rejected. */
16480 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16481 else
16482 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16483 break;
16484 }
16485
16486 if (STACK_TOP_P (operands[0]))
16487 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16488 else
16489 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16490 break;
16491
16492 case MINUS:
16493 case DIV:
16494 if (MEM_P (operands[1]))
16495 {
16496 p = "r%Z1\t%1";
16497 break;
16498 }
16499
16500 if (MEM_P (operands[2]))
16501 {
16502 p = "%Z2\t%2";
16503 break;
16504 }
16505
16506 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16507 {
16508 #if SYSV386_COMPAT
16509 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16510 derived assemblers, confusingly reverse the direction of
16511 the operation for fsub{r} and fdiv{r} when the
16512 destination register is not st(0). The Intel assembler
16513 doesn't have this brain damage. Read !SYSV386_COMPAT to
16514 figure out what the hardware really does. */
16515 if (STACK_TOP_P (operands[0]))
16516 p = "{p\t%0, %2|rp\t%2, %0}";
16517 else
16518 p = "{rp\t%2, %0|p\t%0, %2}";
16519 #else
16520 if (STACK_TOP_P (operands[0]))
16521 /* As above for fmul/fadd, we can't store to st(0). */
16522 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16523 else
16524 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16525 #endif
16526 break;
16527 }
16528
16529 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16530 {
16531 #if SYSV386_COMPAT
16532 if (STACK_TOP_P (operands[0]))
16533 p = "{rp\t%0, %1|p\t%1, %0}";
16534 else
16535 p = "{p\t%1, %0|rp\t%0, %1}";
16536 #else
16537 if (STACK_TOP_P (operands[0]))
16538 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16539 else
16540 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16541 #endif
16542 break;
16543 }
16544
16545 if (STACK_TOP_P (operands[0]))
16546 {
16547 if (STACK_TOP_P (operands[1]))
16548 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16549 else
16550 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16551 break;
16552 }
16553 else if (STACK_TOP_P (operands[1]))
16554 {
16555 #if SYSV386_COMPAT
16556 p = "{\t%1, %0|r\t%0, %1}";
16557 #else
16558 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16559 #endif
16560 }
16561 else
16562 {
16563 #if SYSV386_COMPAT
16564 p = "{r\t%2, %0|\t%0, %2}";
16565 #else
16566 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16567 #endif
16568 }
16569 break;
16570
16571 default:
16572 gcc_unreachable ();
16573 }
16574
16575 strcat (buf, p);
16576 return buf;
16577 }
16578
16579 /* Check if a 256bit AVX register is referenced inside of EXP. */
16580
16581 static bool
16582 ix86_check_avx256_register (const_rtx exp)
16583 {
16584 if (GET_CODE (exp) == SUBREG)
16585 exp = SUBREG_REG (exp);
16586
16587 return (REG_P (exp)
16588 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16589 }
16590
16591 /* Return needed mode for entity in optimize_mode_switching pass. */
16592
16593 static int
16594 ix86_avx_u128_mode_needed (rtx_insn *insn)
16595 {
16596 if (CALL_P (insn))
16597 {
16598 rtx link;
16599
16600 /* Needed mode is set to AVX_U128_CLEAN if there are
16601 no 256bit modes used in function arguments. */
16602 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16603 link;
16604 link = XEXP (link, 1))
16605 {
16606 if (GET_CODE (XEXP (link, 0)) == USE)
16607 {
16608 rtx arg = XEXP (XEXP (link, 0), 0);
16609
16610 if (ix86_check_avx256_register (arg))
16611 return AVX_U128_DIRTY;
16612 }
16613 }
16614
16615 return AVX_U128_CLEAN;
16616 }
16617
16618 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16619 changes state only when a 256bit register is written to, but we need
16620 to prevent the compiler from moving optimal insertion point above
16621 eventual read from 256bit register. */
16622 subrtx_iterator::array_type array;
16623 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16624 if (ix86_check_avx256_register (*iter))
16625 return AVX_U128_DIRTY;
16626
16627 return AVX_U128_ANY;
16628 }
16629
16630 /* Return mode that i387 must be switched into
16631 prior to the execution of insn. */
16632
16633 static int
16634 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16635 {
16636 enum attr_i387_cw mode;
16637
16638 /* The mode UNINITIALIZED is used to store control word after a
16639 function call or ASM pattern. The mode ANY specify that function
16640 has no requirements on the control word and make no changes in the
16641 bits we are interested in. */
16642
16643 if (CALL_P (insn)
16644 || (NONJUMP_INSN_P (insn)
16645 && (asm_noperands (PATTERN (insn)) >= 0
16646 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16647 return I387_CW_UNINITIALIZED;
16648
16649 if (recog_memoized (insn) < 0)
16650 return I387_CW_ANY;
16651
16652 mode = get_attr_i387_cw (insn);
16653
16654 switch (entity)
16655 {
16656 case I387_TRUNC:
16657 if (mode == I387_CW_TRUNC)
16658 return mode;
16659 break;
16660
16661 case I387_FLOOR:
16662 if (mode == I387_CW_FLOOR)
16663 return mode;
16664 break;
16665
16666 case I387_CEIL:
16667 if (mode == I387_CW_CEIL)
16668 return mode;
16669 break;
16670
16671 case I387_MASK_PM:
16672 if (mode == I387_CW_MASK_PM)
16673 return mode;
16674 break;
16675
16676 default:
16677 gcc_unreachable ();
16678 }
16679
16680 return I387_CW_ANY;
16681 }
16682
16683 /* Return mode that entity must be switched into
16684 prior to the execution of insn. */
16685
16686 static int
16687 ix86_mode_needed (int entity, rtx_insn *insn)
16688 {
16689 switch (entity)
16690 {
16691 case AVX_U128:
16692 return ix86_avx_u128_mode_needed (insn);
16693 case I387_TRUNC:
16694 case I387_FLOOR:
16695 case I387_CEIL:
16696 case I387_MASK_PM:
16697 return ix86_i387_mode_needed (entity, insn);
16698 default:
16699 gcc_unreachable ();
16700 }
16701 return 0;
16702 }
16703
16704 /* Check if a 256bit AVX register is referenced in stores. */
16705
16706 static void
16707 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16708 {
16709 if (ix86_check_avx256_register (dest))
16710 {
16711 bool *used = (bool *) data;
16712 *used = true;
16713 }
16714 }
16715
16716 /* Calculate mode of upper 128bit AVX registers after the insn. */
16717
16718 static int
16719 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16720 {
16721 rtx pat = PATTERN (insn);
16722
16723 if (vzeroupper_operation (pat, VOIDmode)
16724 || vzeroall_operation (pat, VOIDmode))
16725 return AVX_U128_CLEAN;
16726
16727 /* We know that state is clean after CALL insn if there are no
16728 256bit registers used in the function return register. */
16729 if (CALL_P (insn))
16730 {
16731 bool avx_reg256_found = false;
16732 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16733
16734 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16735 }
16736
16737 /* Otherwise, return current mode. Remember that if insn
16738 references AVX 256bit registers, the mode was already changed
16739 to DIRTY from MODE_NEEDED. */
16740 return mode;
16741 }
16742
16743 /* Return the mode that an insn results in. */
16744
16745 static int
16746 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16747 {
16748 switch (entity)
16749 {
16750 case AVX_U128:
16751 return ix86_avx_u128_mode_after (mode, insn);
16752 case I387_TRUNC:
16753 case I387_FLOOR:
16754 case I387_CEIL:
16755 case I387_MASK_PM:
16756 return mode;
16757 default:
16758 gcc_unreachable ();
16759 }
16760 }
16761
16762 static int
16763 ix86_avx_u128_mode_entry (void)
16764 {
16765 tree arg;
16766
16767 /* Entry mode is set to AVX_U128_DIRTY if there are
16768 256bit modes used in function arguments. */
16769 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16770 arg = TREE_CHAIN (arg))
16771 {
16772 rtx incoming = DECL_INCOMING_RTL (arg);
16773
16774 if (incoming && ix86_check_avx256_register (incoming))
16775 return AVX_U128_DIRTY;
16776 }
16777
16778 return AVX_U128_CLEAN;
16779 }
16780
16781 /* Return a mode that ENTITY is assumed to be
16782 switched to at function entry. */
16783
16784 static int
16785 ix86_mode_entry (int entity)
16786 {
16787 switch (entity)
16788 {
16789 case AVX_U128:
16790 return ix86_avx_u128_mode_entry ();
16791 case I387_TRUNC:
16792 case I387_FLOOR:
16793 case I387_CEIL:
16794 case I387_MASK_PM:
16795 return I387_CW_ANY;
16796 default:
16797 gcc_unreachable ();
16798 }
16799 }
16800
16801 static int
16802 ix86_avx_u128_mode_exit (void)
16803 {
16804 rtx reg = crtl->return_rtx;
16805
16806 /* Exit mode is set to AVX_U128_DIRTY if there are
16807 256bit modes used in the function return register. */
16808 if (reg && ix86_check_avx256_register (reg))
16809 return AVX_U128_DIRTY;
16810
16811 return AVX_U128_CLEAN;
16812 }
16813
16814 /* Return a mode that ENTITY is assumed to be
16815 switched to at function exit. */
16816
16817 static int
16818 ix86_mode_exit (int entity)
16819 {
16820 switch (entity)
16821 {
16822 case AVX_U128:
16823 return ix86_avx_u128_mode_exit ();
16824 case I387_TRUNC:
16825 case I387_FLOOR:
16826 case I387_CEIL:
16827 case I387_MASK_PM:
16828 return I387_CW_ANY;
16829 default:
16830 gcc_unreachable ();
16831 }
16832 }
16833
16834 static int
16835 ix86_mode_priority (int, int n)
16836 {
16837 return n;
16838 }
16839
16840 /* Output code to initialize control word copies used by trunc?f?i and
16841 rounding patterns. CURRENT_MODE is set to current control word,
16842 while NEW_MODE is set to new control word. */
16843
16844 static void
16845 emit_i387_cw_initialization (int mode)
16846 {
16847 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16848 rtx new_mode;
16849
16850 enum ix86_stack_slot slot;
16851
16852 rtx reg = gen_reg_rtx (HImode);
16853
16854 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16855 emit_move_insn (reg, copy_rtx (stored_mode));
16856
16857 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16858 || optimize_insn_for_size_p ())
16859 {
16860 switch (mode)
16861 {
16862 case I387_CW_TRUNC:
16863 /* round toward zero (truncate) */
16864 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16865 slot = SLOT_CW_TRUNC;
16866 break;
16867
16868 case I387_CW_FLOOR:
16869 /* round down toward -oo */
16870 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16871 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16872 slot = SLOT_CW_FLOOR;
16873 break;
16874
16875 case I387_CW_CEIL:
16876 /* round up toward +oo */
16877 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16878 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16879 slot = SLOT_CW_CEIL;
16880 break;
16881
16882 case I387_CW_MASK_PM:
16883 /* mask precision exception for nearbyint() */
16884 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16885 slot = SLOT_CW_MASK_PM;
16886 break;
16887
16888 default:
16889 gcc_unreachable ();
16890 }
16891 }
16892 else
16893 {
16894 switch (mode)
16895 {
16896 case I387_CW_TRUNC:
16897 /* round toward zero (truncate) */
16898 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16899 slot = SLOT_CW_TRUNC;
16900 break;
16901
16902 case I387_CW_FLOOR:
16903 /* round down toward -oo */
16904 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16905 slot = SLOT_CW_FLOOR;
16906 break;
16907
16908 case I387_CW_CEIL:
16909 /* round up toward +oo */
16910 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16911 slot = SLOT_CW_CEIL;
16912 break;
16913
16914 case I387_CW_MASK_PM:
16915 /* mask precision exception for nearbyint() */
16916 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16917 slot = SLOT_CW_MASK_PM;
16918 break;
16919
16920 default:
16921 gcc_unreachable ();
16922 }
16923 }
16924
16925 gcc_assert (slot < MAX_386_STACK_LOCALS);
16926
16927 new_mode = assign_386_stack_local (HImode, slot);
16928 emit_move_insn (new_mode, reg);
16929 }
16930
16931 /* Emit vzeroupper. */
16932
16933 void
16934 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16935 {
16936 int i;
16937
16938 /* Cancel automatic vzeroupper insertion if there are
16939 live call-saved SSE registers at the insertion point. */
16940
16941 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16942 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16943 return;
16944
16945 if (TARGET_64BIT)
16946 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16947 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16948 return;
16949
16950 emit_insn (gen_avx_vzeroupper ());
16951 }
16952
16953 /* Generate one or more insns to set ENTITY to MODE. */
16954
16955 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16956 is the set of hard registers live at the point where the insn(s)
16957 are to be inserted. */
16958
16959 static void
16960 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16961 HARD_REG_SET regs_live)
16962 {
16963 switch (entity)
16964 {
16965 case AVX_U128:
16966 if (mode == AVX_U128_CLEAN)
16967 ix86_avx_emit_vzeroupper (regs_live);
16968 break;
16969 case I387_TRUNC:
16970 case I387_FLOOR:
16971 case I387_CEIL:
16972 case I387_MASK_PM:
16973 if (mode != I387_CW_ANY
16974 && mode != I387_CW_UNINITIALIZED)
16975 emit_i387_cw_initialization (mode);
16976 break;
16977 default:
16978 gcc_unreachable ();
16979 }
16980 }
16981
16982 /* Output code for INSN to convert a float to a signed int. OPERANDS
16983 are the insn operands. The output may be [HSD]Imode and the input
16984 operand may be [SDX]Fmode. */
16985
16986 const char *
16987 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16988 {
16989 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16990 int dimode_p = GET_MODE (operands[0]) == DImode;
16991 int round_mode = get_attr_i387_cw (insn);
16992
16993 /* Jump through a hoop or two for DImode, since the hardware has no
16994 non-popping instruction. We used to do this a different way, but
16995 that was somewhat fragile and broke with post-reload splitters. */
16996 if ((dimode_p || fisttp) && !stack_top_dies)
16997 output_asm_insn ("fld\t%y1", operands);
16998
16999 gcc_assert (STACK_TOP_P (operands[1]));
17000 gcc_assert (MEM_P (operands[0]));
17001 gcc_assert (GET_MODE (operands[1]) != TFmode);
17002
17003 if (fisttp)
17004 output_asm_insn ("fisttp%Z0\t%0", operands);
17005 else
17006 {
17007 if (round_mode != I387_CW_ANY)
17008 output_asm_insn ("fldcw\t%3", operands);
17009 if (stack_top_dies || dimode_p)
17010 output_asm_insn ("fistp%Z0\t%0", operands);
17011 else
17012 output_asm_insn ("fist%Z0\t%0", operands);
17013 if (round_mode != I387_CW_ANY)
17014 output_asm_insn ("fldcw\t%2", operands);
17015 }
17016
17017 return "";
17018 }
17019
17020 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17021 have the values zero or one, indicates the ffreep insn's operand
17022 from the OPERANDS array. */
17023
17024 static const char *
17025 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17026 {
17027 if (TARGET_USE_FFREEP)
17028 #ifdef HAVE_AS_IX86_FFREEP
17029 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17030 #else
17031 {
17032 static char retval[32];
17033 int regno = REGNO (operands[opno]);
17034
17035 gcc_assert (STACK_REGNO_P (regno));
17036
17037 regno -= FIRST_STACK_REG;
17038
17039 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17040 return retval;
17041 }
17042 #endif
17043
17044 return opno ? "fstp\t%y1" : "fstp\t%y0";
17045 }
17046
17047
17048 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17049 should be used. UNORDERED_P is true when fucom should be used. */
17050
17051 const char *
17052 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17053 {
17054 int stack_top_dies;
17055 rtx cmp_op0, cmp_op1;
17056 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17057
17058 if (eflags_p)
17059 {
17060 cmp_op0 = operands[0];
17061 cmp_op1 = operands[1];
17062 }
17063 else
17064 {
17065 cmp_op0 = operands[1];
17066 cmp_op1 = operands[2];
17067 }
17068
17069 if (is_sse)
17070 {
17071 if (GET_MODE (operands[0]) == SFmode)
17072 if (unordered_p)
17073 return "%vucomiss\t{%1, %0|%0, %1}";
17074 else
17075 return "%vcomiss\t{%1, %0|%0, %1}";
17076 else
17077 if (unordered_p)
17078 return "%vucomisd\t{%1, %0|%0, %1}";
17079 else
17080 return "%vcomisd\t{%1, %0|%0, %1}";
17081 }
17082
17083 gcc_assert (STACK_TOP_P (cmp_op0));
17084
17085 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17086
17087 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17088 {
17089 if (stack_top_dies)
17090 {
17091 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17092 return output_387_ffreep (operands, 1);
17093 }
17094 else
17095 return "ftst\n\tfnstsw\t%0";
17096 }
17097
17098 if (STACK_REG_P (cmp_op1)
17099 && stack_top_dies
17100 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17101 && REGNO (cmp_op1) != FIRST_STACK_REG)
17102 {
17103 /* If both the top of the 387 stack dies, and the other operand
17104 is also a stack register that dies, then this must be a
17105 `fcompp' float compare */
17106
17107 if (eflags_p)
17108 {
17109 /* There is no double popping fcomi variant. Fortunately,
17110 eflags is immune from the fstp's cc clobbering. */
17111 if (unordered_p)
17112 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17113 else
17114 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17115 return output_387_ffreep (operands, 0);
17116 }
17117 else
17118 {
17119 if (unordered_p)
17120 return "fucompp\n\tfnstsw\t%0";
17121 else
17122 return "fcompp\n\tfnstsw\t%0";
17123 }
17124 }
17125 else
17126 {
17127 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17128
17129 static const char * const alt[16] =
17130 {
17131 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17132 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17133 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17134 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17135
17136 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17137 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17138 NULL,
17139 NULL,
17140
17141 "fcomi\t{%y1, %0|%0, %y1}",
17142 "fcomip\t{%y1, %0|%0, %y1}",
17143 "fucomi\t{%y1, %0|%0, %y1}",
17144 "fucomip\t{%y1, %0|%0, %y1}",
17145
17146 NULL,
17147 NULL,
17148 NULL,
17149 NULL
17150 };
17151
17152 int mask;
17153 const char *ret;
17154
17155 mask = eflags_p << 3;
17156 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17157 mask |= unordered_p << 1;
17158 mask |= stack_top_dies;
17159
17160 gcc_assert (mask < 16);
17161 ret = alt[mask];
17162 gcc_assert (ret);
17163
17164 return ret;
17165 }
17166 }
17167
17168 void
17169 ix86_output_addr_vec_elt (FILE *file, int value)
17170 {
17171 const char *directive = ASM_LONG;
17172
17173 #ifdef ASM_QUAD
17174 if (TARGET_LP64)
17175 directive = ASM_QUAD;
17176 #else
17177 gcc_assert (!TARGET_64BIT);
17178 #endif
17179
17180 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17181 }
17182
17183 void
17184 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17185 {
17186 const char *directive = ASM_LONG;
17187
17188 #ifdef ASM_QUAD
17189 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17190 directive = ASM_QUAD;
17191 #else
17192 gcc_assert (!TARGET_64BIT);
17193 #endif
17194 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17195 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17196 fprintf (file, "%s%s%d-%s%d\n",
17197 directive, LPREFIX, value, LPREFIX, rel);
17198 else if (HAVE_AS_GOTOFF_IN_DATA)
17199 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17200 #if TARGET_MACHO
17201 else if (TARGET_MACHO)
17202 {
17203 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17204 machopic_output_function_base_name (file);
17205 putc ('\n', file);
17206 }
17207 #endif
17208 else
17209 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17210 GOT_SYMBOL_NAME, LPREFIX, value);
17211 }
17212 \f
17213 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17214 for the target. */
17215
17216 void
17217 ix86_expand_clear (rtx dest)
17218 {
17219 rtx tmp;
17220
17221 /* We play register width games, which are only valid after reload. */
17222 gcc_assert (reload_completed);
17223
17224 /* Avoid HImode and its attendant prefix byte. */
17225 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17226 dest = gen_rtx_REG (SImode, REGNO (dest));
17227 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17228
17229 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17230 {
17231 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17232 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17233 }
17234
17235 emit_insn (tmp);
17236 }
17237
17238 /* X is an unchanging MEM. If it is a constant pool reference, return
17239 the constant pool rtx, else NULL. */
17240
17241 rtx
17242 maybe_get_pool_constant (rtx x)
17243 {
17244 x = ix86_delegitimize_address (XEXP (x, 0));
17245
17246 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17247 return get_pool_constant (x);
17248
17249 return NULL_RTX;
17250 }
17251
17252 void
17253 ix86_expand_move (machine_mode mode, rtx operands[])
17254 {
17255 rtx op0, op1;
17256 enum tls_model model;
17257
17258 op0 = operands[0];
17259 op1 = operands[1];
17260
17261 if (GET_CODE (op1) == SYMBOL_REF)
17262 {
17263 rtx tmp;
17264
17265 model = SYMBOL_REF_TLS_MODEL (op1);
17266 if (model)
17267 {
17268 op1 = legitimize_tls_address (op1, model, true);
17269 op1 = force_operand (op1, op0);
17270 if (op1 == op0)
17271 return;
17272 op1 = convert_to_mode (mode, op1, 1);
17273 }
17274 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17275 op1 = tmp;
17276 }
17277 else if (GET_CODE (op1) == CONST
17278 && GET_CODE (XEXP (op1, 0)) == PLUS
17279 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17280 {
17281 rtx addend = XEXP (XEXP (op1, 0), 1);
17282 rtx symbol = XEXP (XEXP (op1, 0), 0);
17283 rtx tmp;
17284
17285 model = SYMBOL_REF_TLS_MODEL (symbol);
17286 if (model)
17287 tmp = legitimize_tls_address (symbol, model, true);
17288 else
17289 tmp = legitimize_pe_coff_symbol (symbol, true);
17290
17291 if (tmp)
17292 {
17293 tmp = force_operand (tmp, NULL);
17294 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17295 op0, 1, OPTAB_DIRECT);
17296 if (tmp == op0)
17297 return;
17298 op1 = convert_to_mode (mode, tmp, 1);
17299 }
17300 }
17301
17302 if ((flag_pic || MACHOPIC_INDIRECT)
17303 && symbolic_operand (op1, mode))
17304 {
17305 if (TARGET_MACHO && !TARGET_64BIT)
17306 {
17307 #if TARGET_MACHO
17308 /* dynamic-no-pic */
17309 if (MACHOPIC_INDIRECT)
17310 {
17311 rtx temp = ((reload_in_progress
17312 || ((op0 && REG_P (op0))
17313 && mode == Pmode))
17314 ? op0 : gen_reg_rtx (Pmode));
17315 op1 = machopic_indirect_data_reference (op1, temp);
17316 if (MACHOPIC_PURE)
17317 op1 = machopic_legitimize_pic_address (op1, mode,
17318 temp == op1 ? 0 : temp);
17319 }
17320 if (op0 != op1 && GET_CODE (op0) != MEM)
17321 {
17322 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17323 emit_insn (insn);
17324 return;
17325 }
17326 if (GET_CODE (op0) == MEM)
17327 op1 = force_reg (Pmode, op1);
17328 else
17329 {
17330 rtx temp = op0;
17331 if (GET_CODE (temp) != REG)
17332 temp = gen_reg_rtx (Pmode);
17333 temp = legitimize_pic_address (op1, temp);
17334 if (temp == op0)
17335 return;
17336 op1 = temp;
17337 }
17338 /* dynamic-no-pic */
17339 #endif
17340 }
17341 else
17342 {
17343 if (MEM_P (op0))
17344 op1 = force_reg (mode, op1);
17345 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17346 {
17347 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17348 op1 = legitimize_pic_address (op1, reg);
17349 if (op0 == op1)
17350 return;
17351 op1 = convert_to_mode (mode, op1, 1);
17352 }
17353 }
17354 }
17355 else
17356 {
17357 if (MEM_P (op0)
17358 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17359 || !push_operand (op0, mode))
17360 && MEM_P (op1))
17361 op1 = force_reg (mode, op1);
17362
17363 if (push_operand (op0, mode)
17364 && ! general_no_elim_operand (op1, mode))
17365 op1 = copy_to_mode_reg (mode, op1);
17366
17367 /* Force large constants in 64bit compilation into register
17368 to get them CSEed. */
17369 if (can_create_pseudo_p ()
17370 && (mode == DImode) && TARGET_64BIT
17371 && immediate_operand (op1, mode)
17372 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17373 && !register_operand (op0, mode)
17374 && optimize)
17375 op1 = copy_to_mode_reg (mode, op1);
17376
17377 if (can_create_pseudo_p ()
17378 && FLOAT_MODE_P (mode)
17379 && GET_CODE (op1) == CONST_DOUBLE)
17380 {
17381 /* If we are loading a floating point constant to a register,
17382 force the value to memory now, since we'll get better code
17383 out the back end. */
17384
17385 op1 = validize_mem (force_const_mem (mode, op1));
17386 if (!register_operand (op0, mode))
17387 {
17388 rtx temp = gen_reg_rtx (mode);
17389 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17390 emit_move_insn (op0, temp);
17391 return;
17392 }
17393 }
17394 }
17395
17396 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17397 }
17398
17399 void
17400 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17401 {
17402 rtx op0 = operands[0], op1 = operands[1];
17403 unsigned int align = GET_MODE_ALIGNMENT (mode);
17404
17405 if (push_operand (op0, VOIDmode))
17406 op0 = emit_move_resolve_push (mode, op0);
17407
17408 /* Force constants other than zero into memory. We do not know how
17409 the instructions used to build constants modify the upper 64 bits
17410 of the register, once we have that information we may be able
17411 to handle some of them more efficiently. */
17412 if (can_create_pseudo_p ()
17413 && register_operand (op0, mode)
17414 && (CONSTANT_P (op1)
17415 || (GET_CODE (op1) == SUBREG
17416 && CONSTANT_P (SUBREG_REG (op1))))
17417 && !standard_sse_constant_p (op1))
17418 op1 = validize_mem (force_const_mem (mode, op1));
17419
17420 /* We need to check memory alignment for SSE mode since attribute
17421 can make operands unaligned. */
17422 if (can_create_pseudo_p ()
17423 && SSE_REG_MODE_P (mode)
17424 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17425 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17426 {
17427 rtx tmp[2];
17428
17429 /* ix86_expand_vector_move_misalign() does not like constants ... */
17430 if (CONSTANT_P (op1)
17431 || (GET_CODE (op1) == SUBREG
17432 && CONSTANT_P (SUBREG_REG (op1))))
17433 op1 = validize_mem (force_const_mem (mode, op1));
17434
17435 /* ... nor both arguments in memory. */
17436 if (!register_operand (op0, mode)
17437 && !register_operand (op1, mode))
17438 op1 = force_reg (mode, op1);
17439
17440 tmp[0] = op0; tmp[1] = op1;
17441 ix86_expand_vector_move_misalign (mode, tmp);
17442 return;
17443 }
17444
17445 /* Make operand1 a register if it isn't already. */
17446 if (can_create_pseudo_p ()
17447 && !register_operand (op0, mode)
17448 && !register_operand (op1, mode))
17449 {
17450 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17451 return;
17452 }
17453
17454 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17455 }
17456
17457 /* Split 32-byte AVX unaligned load and store if needed. */
17458
17459 static void
17460 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17461 {
17462 rtx m;
17463 rtx (*extract) (rtx, rtx, rtx);
17464 rtx (*load_unaligned) (rtx, rtx);
17465 rtx (*store_unaligned) (rtx, rtx);
17466 machine_mode mode;
17467
17468 switch (GET_MODE (op0))
17469 {
17470 default:
17471 gcc_unreachable ();
17472 case V32QImode:
17473 extract = gen_avx_vextractf128v32qi;
17474 load_unaligned = gen_avx_loaddquv32qi;
17475 store_unaligned = gen_avx_storedquv32qi;
17476 mode = V16QImode;
17477 break;
17478 case V8SFmode:
17479 extract = gen_avx_vextractf128v8sf;
17480 load_unaligned = gen_avx_loadups256;
17481 store_unaligned = gen_avx_storeups256;
17482 mode = V4SFmode;
17483 break;
17484 case V4DFmode:
17485 extract = gen_avx_vextractf128v4df;
17486 load_unaligned = gen_avx_loadupd256;
17487 store_unaligned = gen_avx_storeupd256;
17488 mode = V2DFmode;
17489 break;
17490 }
17491
17492 if (MEM_P (op1))
17493 {
17494 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17495 && optimize_insn_for_speed_p ())
17496 {
17497 rtx r = gen_reg_rtx (mode);
17498 m = adjust_address (op1, mode, 0);
17499 emit_move_insn (r, m);
17500 m = adjust_address (op1, mode, 16);
17501 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17502 emit_move_insn (op0, r);
17503 }
17504 /* Normal *mov<mode>_internal pattern will handle
17505 unaligned loads just fine if misaligned_operand
17506 is true, and without the UNSPEC it can be combined
17507 with arithmetic instructions. */
17508 else if (misaligned_operand (op1, GET_MODE (op1)))
17509 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17510 else
17511 emit_insn (load_unaligned (op0, op1));
17512 }
17513 else if (MEM_P (op0))
17514 {
17515 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17516 && optimize_insn_for_speed_p ())
17517 {
17518 m = adjust_address (op0, mode, 0);
17519 emit_insn (extract (m, op1, const0_rtx));
17520 m = adjust_address (op0, mode, 16);
17521 emit_insn (extract (m, op1, const1_rtx));
17522 }
17523 else
17524 emit_insn (store_unaligned (op0, op1));
17525 }
17526 else
17527 gcc_unreachable ();
17528 }
17529
17530 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17531 straight to ix86_expand_vector_move. */
17532 /* Code generation for scalar reg-reg moves of single and double precision data:
17533 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17534 movaps reg, reg
17535 else
17536 movss reg, reg
17537 if (x86_sse_partial_reg_dependency == true)
17538 movapd reg, reg
17539 else
17540 movsd reg, reg
17541
17542 Code generation for scalar loads of double precision data:
17543 if (x86_sse_split_regs == true)
17544 movlpd mem, reg (gas syntax)
17545 else
17546 movsd mem, reg
17547
17548 Code generation for unaligned packed loads of single precision data
17549 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17550 if (x86_sse_unaligned_move_optimal)
17551 movups mem, reg
17552
17553 if (x86_sse_partial_reg_dependency == true)
17554 {
17555 xorps reg, reg
17556 movlps mem, reg
17557 movhps mem+8, reg
17558 }
17559 else
17560 {
17561 movlps mem, reg
17562 movhps mem+8, reg
17563 }
17564
17565 Code generation for unaligned packed loads of double precision data
17566 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17567 if (x86_sse_unaligned_move_optimal)
17568 movupd mem, reg
17569
17570 if (x86_sse_split_regs == true)
17571 {
17572 movlpd mem, reg
17573 movhpd mem+8, reg
17574 }
17575 else
17576 {
17577 movsd mem, reg
17578 movhpd mem+8, reg
17579 }
17580 */
17581
17582 void
17583 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17584 {
17585 rtx op0, op1, orig_op0 = NULL_RTX, m;
17586 rtx (*load_unaligned) (rtx, rtx);
17587 rtx (*store_unaligned) (rtx, rtx);
17588
17589 op0 = operands[0];
17590 op1 = operands[1];
17591
17592 if (GET_MODE_SIZE (mode) == 64)
17593 {
17594 switch (GET_MODE_CLASS (mode))
17595 {
17596 case MODE_VECTOR_INT:
17597 case MODE_INT:
17598 if (GET_MODE (op0) != V16SImode)
17599 {
17600 if (!MEM_P (op0))
17601 {
17602 orig_op0 = op0;
17603 op0 = gen_reg_rtx (V16SImode);
17604 }
17605 else
17606 op0 = gen_lowpart (V16SImode, op0);
17607 }
17608 op1 = gen_lowpart (V16SImode, op1);
17609 /* FALLTHRU */
17610
17611 case MODE_VECTOR_FLOAT:
17612 switch (GET_MODE (op0))
17613 {
17614 default:
17615 gcc_unreachable ();
17616 case V16SImode:
17617 load_unaligned = gen_avx512f_loaddquv16si;
17618 store_unaligned = gen_avx512f_storedquv16si;
17619 break;
17620 case V16SFmode:
17621 load_unaligned = gen_avx512f_loadups512;
17622 store_unaligned = gen_avx512f_storeups512;
17623 break;
17624 case V8DFmode:
17625 load_unaligned = gen_avx512f_loadupd512;
17626 store_unaligned = gen_avx512f_storeupd512;
17627 break;
17628 }
17629
17630 if (MEM_P (op1))
17631 emit_insn (load_unaligned (op0, op1));
17632 else if (MEM_P (op0))
17633 emit_insn (store_unaligned (op0, op1));
17634 else
17635 gcc_unreachable ();
17636 if (orig_op0)
17637 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17638 break;
17639
17640 default:
17641 gcc_unreachable ();
17642 }
17643
17644 return;
17645 }
17646
17647 if (TARGET_AVX
17648 && GET_MODE_SIZE (mode) == 32)
17649 {
17650 switch (GET_MODE_CLASS (mode))
17651 {
17652 case MODE_VECTOR_INT:
17653 case MODE_INT:
17654 if (GET_MODE (op0) != V32QImode)
17655 {
17656 if (!MEM_P (op0))
17657 {
17658 orig_op0 = op0;
17659 op0 = gen_reg_rtx (V32QImode);
17660 }
17661 else
17662 op0 = gen_lowpart (V32QImode, op0);
17663 }
17664 op1 = gen_lowpart (V32QImode, op1);
17665 /* FALLTHRU */
17666
17667 case MODE_VECTOR_FLOAT:
17668 ix86_avx256_split_vector_move_misalign (op0, op1);
17669 if (orig_op0)
17670 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17671 break;
17672
17673 default:
17674 gcc_unreachable ();
17675 }
17676
17677 return;
17678 }
17679
17680 if (MEM_P (op1))
17681 {
17682 /* Normal *mov<mode>_internal pattern will handle
17683 unaligned loads just fine if misaligned_operand
17684 is true, and without the UNSPEC it can be combined
17685 with arithmetic instructions. */
17686 if (TARGET_AVX
17687 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17688 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17689 && misaligned_operand (op1, GET_MODE (op1)))
17690 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17691 /* ??? If we have typed data, then it would appear that using
17692 movdqu is the only way to get unaligned data loaded with
17693 integer type. */
17694 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17695 {
17696 if (GET_MODE (op0) != V16QImode)
17697 {
17698 orig_op0 = op0;
17699 op0 = gen_reg_rtx (V16QImode);
17700 }
17701 op1 = gen_lowpart (V16QImode, op1);
17702 /* We will eventually emit movups based on insn attributes. */
17703 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17704 if (orig_op0)
17705 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17706 }
17707 else if (TARGET_SSE2 && mode == V2DFmode)
17708 {
17709 rtx zero;
17710
17711 if (TARGET_AVX
17712 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17713 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17714 || optimize_insn_for_size_p ())
17715 {
17716 /* We will eventually emit movups based on insn attributes. */
17717 emit_insn (gen_sse2_loadupd (op0, op1));
17718 return;
17719 }
17720
17721 /* When SSE registers are split into halves, we can avoid
17722 writing to the top half twice. */
17723 if (TARGET_SSE_SPLIT_REGS)
17724 {
17725 emit_clobber (op0);
17726 zero = op0;
17727 }
17728 else
17729 {
17730 /* ??? Not sure about the best option for the Intel chips.
17731 The following would seem to satisfy; the register is
17732 entirely cleared, breaking the dependency chain. We
17733 then store to the upper half, with a dependency depth
17734 of one. A rumor has it that Intel recommends two movsd
17735 followed by an unpacklpd, but this is unconfirmed. And
17736 given that the dependency depth of the unpacklpd would
17737 still be one, I'm not sure why this would be better. */
17738 zero = CONST0_RTX (V2DFmode);
17739 }
17740
17741 m = adjust_address (op1, DFmode, 0);
17742 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17743 m = adjust_address (op1, DFmode, 8);
17744 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17745 }
17746 else
17747 {
17748 rtx t;
17749
17750 if (TARGET_AVX
17751 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17752 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17753 || optimize_insn_for_size_p ())
17754 {
17755 if (GET_MODE (op0) != V4SFmode)
17756 {
17757 orig_op0 = op0;
17758 op0 = gen_reg_rtx (V4SFmode);
17759 }
17760 op1 = gen_lowpart (V4SFmode, op1);
17761 emit_insn (gen_sse_loadups (op0, op1));
17762 if (orig_op0)
17763 emit_move_insn (orig_op0,
17764 gen_lowpart (GET_MODE (orig_op0), op0));
17765 return;
17766 }
17767
17768 if (mode != V4SFmode)
17769 t = gen_reg_rtx (V4SFmode);
17770 else
17771 t = op0;
17772
17773 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17774 emit_move_insn (t, CONST0_RTX (V4SFmode));
17775 else
17776 emit_clobber (t);
17777
17778 m = adjust_address (op1, V2SFmode, 0);
17779 emit_insn (gen_sse_loadlps (t, t, m));
17780 m = adjust_address (op1, V2SFmode, 8);
17781 emit_insn (gen_sse_loadhps (t, t, m));
17782 if (mode != V4SFmode)
17783 emit_move_insn (op0, gen_lowpart (mode, t));
17784 }
17785 }
17786 else if (MEM_P (op0))
17787 {
17788 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17789 {
17790 op0 = gen_lowpart (V16QImode, op0);
17791 op1 = gen_lowpart (V16QImode, op1);
17792 /* We will eventually emit movups based on insn attributes. */
17793 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17794 }
17795 else if (TARGET_SSE2 && mode == V2DFmode)
17796 {
17797 if (TARGET_AVX
17798 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17799 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17800 || optimize_insn_for_size_p ())
17801 /* We will eventually emit movups based on insn attributes. */
17802 emit_insn (gen_sse2_storeupd (op0, op1));
17803 else
17804 {
17805 m = adjust_address (op0, DFmode, 0);
17806 emit_insn (gen_sse2_storelpd (m, op1));
17807 m = adjust_address (op0, DFmode, 8);
17808 emit_insn (gen_sse2_storehpd (m, op1));
17809 }
17810 }
17811 else
17812 {
17813 if (mode != V4SFmode)
17814 op1 = gen_lowpart (V4SFmode, op1);
17815
17816 if (TARGET_AVX
17817 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17818 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17819 || optimize_insn_for_size_p ())
17820 {
17821 op0 = gen_lowpart (V4SFmode, op0);
17822 emit_insn (gen_sse_storeups (op0, op1));
17823 }
17824 else
17825 {
17826 m = adjust_address (op0, V2SFmode, 0);
17827 emit_insn (gen_sse_storelps (m, op1));
17828 m = adjust_address (op0, V2SFmode, 8);
17829 emit_insn (gen_sse_storehps (m, op1));
17830 }
17831 }
17832 }
17833 else
17834 gcc_unreachable ();
17835 }
17836
17837 /* Helper function of ix86_fixup_binary_operands to canonicalize
17838 operand order. Returns true if the operands should be swapped. */
17839
17840 static bool
17841 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17842 rtx operands[])
17843 {
17844 rtx dst = operands[0];
17845 rtx src1 = operands[1];
17846 rtx src2 = operands[2];
17847
17848 /* If the operation is not commutative, we can't do anything. */
17849 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17850 return false;
17851
17852 /* Highest priority is that src1 should match dst. */
17853 if (rtx_equal_p (dst, src1))
17854 return false;
17855 if (rtx_equal_p (dst, src2))
17856 return true;
17857
17858 /* Next highest priority is that immediate constants come second. */
17859 if (immediate_operand (src2, mode))
17860 return false;
17861 if (immediate_operand (src1, mode))
17862 return true;
17863
17864 /* Lowest priority is that memory references should come second. */
17865 if (MEM_P (src2))
17866 return false;
17867 if (MEM_P (src1))
17868 return true;
17869
17870 return false;
17871 }
17872
17873
17874 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17875 destination to use for the operation. If different from the true
17876 destination in operands[0], a copy operation will be required. */
17877
17878 rtx
17879 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17880 rtx operands[])
17881 {
17882 rtx dst = operands[0];
17883 rtx src1 = operands[1];
17884 rtx src2 = operands[2];
17885
17886 /* Canonicalize operand order. */
17887 if (ix86_swap_binary_operands_p (code, mode, operands))
17888 {
17889 /* It is invalid to swap operands of different modes. */
17890 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17891
17892 std::swap (src1, src2);
17893 }
17894
17895 /* Both source operands cannot be in memory. */
17896 if (MEM_P (src1) && MEM_P (src2))
17897 {
17898 /* Optimization: Only read from memory once. */
17899 if (rtx_equal_p (src1, src2))
17900 {
17901 src2 = force_reg (mode, src2);
17902 src1 = src2;
17903 }
17904 else if (rtx_equal_p (dst, src1))
17905 src2 = force_reg (mode, src2);
17906 else
17907 src1 = force_reg (mode, src1);
17908 }
17909
17910 /* If the destination is memory, and we do not have matching source
17911 operands, do things in registers. */
17912 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17913 dst = gen_reg_rtx (mode);
17914
17915 /* Source 1 cannot be a constant. */
17916 if (CONSTANT_P (src1))
17917 src1 = force_reg (mode, src1);
17918
17919 /* Source 1 cannot be a non-matching memory. */
17920 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17921 src1 = force_reg (mode, src1);
17922
17923 /* Improve address combine. */
17924 if (code == PLUS
17925 && GET_MODE_CLASS (mode) == MODE_INT
17926 && MEM_P (src2))
17927 src2 = force_reg (mode, src2);
17928
17929 operands[1] = src1;
17930 operands[2] = src2;
17931 return dst;
17932 }
17933
17934 /* Similarly, but assume that the destination has already been
17935 set up properly. */
17936
17937 void
17938 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17939 machine_mode mode, rtx operands[])
17940 {
17941 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17942 gcc_assert (dst == operands[0]);
17943 }
17944
17945 /* Attempt to expand a binary operator. Make the expansion closer to the
17946 actual machine, then just general_operand, which will allow 3 separate
17947 memory references (one output, two input) in a single insn. */
17948
17949 void
17950 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17951 rtx operands[])
17952 {
17953 rtx src1, src2, dst, op, clob;
17954
17955 dst = ix86_fixup_binary_operands (code, mode, operands);
17956 src1 = operands[1];
17957 src2 = operands[2];
17958
17959 /* Emit the instruction. */
17960
17961 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17962 if (reload_in_progress)
17963 {
17964 /* Reload doesn't know about the flags register, and doesn't know that
17965 it doesn't want to clobber it. We can only do this with PLUS. */
17966 gcc_assert (code == PLUS);
17967 emit_insn (op);
17968 }
17969 else if (reload_completed
17970 && code == PLUS
17971 && !rtx_equal_p (dst, src1))
17972 {
17973 /* This is going to be an LEA; avoid splitting it later. */
17974 emit_insn (op);
17975 }
17976 else
17977 {
17978 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17979 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17980 }
17981
17982 /* Fix up the destination if needed. */
17983 if (dst != operands[0])
17984 emit_move_insn (operands[0], dst);
17985 }
17986
17987 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17988 the given OPERANDS. */
17989
17990 void
17991 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
17992 rtx operands[])
17993 {
17994 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17995 if (GET_CODE (operands[1]) == SUBREG)
17996 {
17997 op1 = operands[1];
17998 op2 = operands[2];
17999 }
18000 else if (GET_CODE (operands[2]) == SUBREG)
18001 {
18002 op1 = operands[2];
18003 op2 = operands[1];
18004 }
18005 /* Optimize (__m128i) d | (__m128i) e and similar code
18006 when d and e are float vectors into float vector logical
18007 insn. In C/C++ without using intrinsics there is no other way
18008 to express vector logical operation on float vectors than
18009 to cast them temporarily to integer vectors. */
18010 if (op1
18011 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18012 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18013 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18014 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18015 && SUBREG_BYTE (op1) == 0
18016 && (GET_CODE (op2) == CONST_VECTOR
18017 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18018 && SUBREG_BYTE (op2) == 0))
18019 && can_create_pseudo_p ())
18020 {
18021 rtx dst;
18022 switch (GET_MODE (SUBREG_REG (op1)))
18023 {
18024 case V4SFmode:
18025 case V8SFmode:
18026 case V16SFmode:
18027 case V2DFmode:
18028 case V4DFmode:
18029 case V8DFmode:
18030 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18031 if (GET_CODE (op2) == CONST_VECTOR)
18032 {
18033 op2 = gen_lowpart (GET_MODE (dst), op2);
18034 op2 = force_reg (GET_MODE (dst), op2);
18035 }
18036 else
18037 {
18038 op1 = operands[1];
18039 op2 = SUBREG_REG (operands[2]);
18040 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18041 op2 = force_reg (GET_MODE (dst), op2);
18042 }
18043 op1 = SUBREG_REG (op1);
18044 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18045 op1 = force_reg (GET_MODE (dst), op1);
18046 emit_insn (gen_rtx_SET (VOIDmode, dst,
18047 gen_rtx_fmt_ee (code, GET_MODE (dst),
18048 op1, op2)));
18049 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18050 return;
18051 default:
18052 break;
18053 }
18054 }
18055 if (!nonimmediate_operand (operands[1], mode))
18056 operands[1] = force_reg (mode, operands[1]);
18057 if (!nonimmediate_operand (operands[2], mode))
18058 operands[2] = force_reg (mode, operands[2]);
18059 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18060 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18061 gen_rtx_fmt_ee (code, mode, operands[1],
18062 operands[2])));
18063 }
18064
18065 /* Return TRUE or FALSE depending on whether the binary operator meets the
18066 appropriate constraints. */
18067
18068 bool
18069 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18070 rtx operands[3])
18071 {
18072 rtx dst = operands[0];
18073 rtx src1 = operands[1];
18074 rtx src2 = operands[2];
18075
18076 /* Both source operands cannot be in memory. */
18077 if (MEM_P (src1) && MEM_P (src2))
18078 return false;
18079
18080 /* Canonicalize operand order for commutative operators. */
18081 if (ix86_swap_binary_operands_p (code, mode, operands))
18082 std::swap (src1, src2);
18083
18084 /* If the destination is memory, we must have a matching source operand. */
18085 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18086 return false;
18087
18088 /* Source 1 cannot be a constant. */
18089 if (CONSTANT_P (src1))
18090 return false;
18091
18092 /* Source 1 cannot be a non-matching memory. */
18093 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18094 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18095 return (code == AND
18096 && (mode == HImode
18097 || mode == SImode
18098 || (TARGET_64BIT && mode == DImode))
18099 && satisfies_constraint_L (src2));
18100
18101 return true;
18102 }
18103
18104 /* Attempt to expand a unary operator. Make the expansion closer to the
18105 actual machine, then just general_operand, which will allow 2 separate
18106 memory references (one output, one input) in a single insn. */
18107
18108 void
18109 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18110 rtx operands[])
18111 {
18112 bool matching_memory = false;
18113 rtx src, dst, op, clob;
18114
18115 dst = operands[0];
18116 src = operands[1];
18117
18118 /* If the destination is memory, and we do not have matching source
18119 operands, do things in registers. */
18120 if (MEM_P (dst))
18121 {
18122 if (rtx_equal_p (dst, src))
18123 matching_memory = true;
18124 else
18125 dst = gen_reg_rtx (mode);
18126 }
18127
18128 /* When source operand is memory, destination must match. */
18129 if (MEM_P (src) && !matching_memory)
18130 src = force_reg (mode, src);
18131
18132 /* Emit the instruction. */
18133
18134 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18135 if (reload_in_progress || code == NOT)
18136 {
18137 /* Reload doesn't know about the flags register, and doesn't know that
18138 it doesn't want to clobber it. */
18139 gcc_assert (code == NOT);
18140 emit_insn (op);
18141 }
18142 else
18143 {
18144 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18145 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18146 }
18147
18148 /* Fix up the destination if needed. */
18149 if (dst != operands[0])
18150 emit_move_insn (operands[0], dst);
18151 }
18152
18153 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18154 divisor are within the range [0-255]. */
18155
18156 void
18157 ix86_split_idivmod (machine_mode mode, rtx operands[],
18158 bool signed_p)
18159 {
18160 rtx_code_label *end_label, *qimode_label;
18161 rtx insn, div, mod;
18162 rtx scratch, tmp0, tmp1, tmp2;
18163 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18164 rtx (*gen_zero_extend) (rtx, rtx);
18165 rtx (*gen_test_ccno_1) (rtx, rtx);
18166
18167 switch (mode)
18168 {
18169 case SImode:
18170 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18171 gen_test_ccno_1 = gen_testsi_ccno_1;
18172 gen_zero_extend = gen_zero_extendqisi2;
18173 break;
18174 case DImode:
18175 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18176 gen_test_ccno_1 = gen_testdi_ccno_1;
18177 gen_zero_extend = gen_zero_extendqidi2;
18178 break;
18179 default:
18180 gcc_unreachable ();
18181 }
18182
18183 end_label = gen_label_rtx ();
18184 qimode_label = gen_label_rtx ();
18185
18186 scratch = gen_reg_rtx (mode);
18187
18188 /* Use 8bit unsigned divimod if dividend and divisor are within
18189 the range [0-255]. */
18190 emit_move_insn (scratch, operands[2]);
18191 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18192 scratch, 1, OPTAB_DIRECT);
18193 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18194 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18195 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18196 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18197 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18198 pc_rtx);
18199 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18200 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18201 JUMP_LABEL (insn) = qimode_label;
18202
18203 /* Generate original signed/unsigned divimod. */
18204 div = gen_divmod4_1 (operands[0], operands[1],
18205 operands[2], operands[3]);
18206 emit_insn (div);
18207
18208 /* Branch to the end. */
18209 emit_jump_insn (gen_jump (end_label));
18210 emit_barrier ();
18211
18212 /* Generate 8bit unsigned divide. */
18213 emit_label (qimode_label);
18214 /* Don't use operands[0] for result of 8bit divide since not all
18215 registers support QImode ZERO_EXTRACT. */
18216 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18217 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18218 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18219 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18220
18221 if (signed_p)
18222 {
18223 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18224 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18225 }
18226 else
18227 {
18228 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18229 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18230 }
18231
18232 /* Extract remainder from AH. */
18233 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18234 if (REG_P (operands[1]))
18235 insn = emit_move_insn (operands[1], tmp1);
18236 else
18237 {
18238 /* Need a new scratch register since the old one has result
18239 of 8bit divide. */
18240 scratch = gen_reg_rtx (mode);
18241 emit_move_insn (scratch, tmp1);
18242 insn = emit_move_insn (operands[1], scratch);
18243 }
18244 set_unique_reg_note (insn, REG_EQUAL, mod);
18245
18246 /* Zero extend quotient from AL. */
18247 tmp1 = gen_lowpart (QImode, tmp0);
18248 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18249 set_unique_reg_note (insn, REG_EQUAL, div);
18250
18251 emit_label (end_label);
18252 }
18253
18254 #define LEA_MAX_STALL (3)
18255 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18256
18257 /* Increase given DISTANCE in half-cycles according to
18258 dependencies between PREV and NEXT instructions.
18259 Add 1 half-cycle if there is no dependency and
18260 go to next cycle if there is some dependecy. */
18261
18262 static unsigned int
18263 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18264 {
18265 df_ref def, use;
18266
18267 if (!prev || !next)
18268 return distance + (distance & 1) + 2;
18269
18270 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18271 return distance + 1;
18272
18273 FOR_EACH_INSN_USE (use, next)
18274 FOR_EACH_INSN_DEF (def, prev)
18275 if (!DF_REF_IS_ARTIFICIAL (def)
18276 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18277 return distance + (distance & 1) + 2;
18278
18279 return distance + 1;
18280 }
18281
18282 /* Function checks if instruction INSN defines register number
18283 REGNO1 or REGNO2. */
18284
18285 static bool
18286 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18287 rtx insn)
18288 {
18289 df_ref def;
18290
18291 FOR_EACH_INSN_DEF (def, insn)
18292 if (DF_REF_REG_DEF_P (def)
18293 && !DF_REF_IS_ARTIFICIAL (def)
18294 && (regno1 == DF_REF_REGNO (def)
18295 || regno2 == DF_REF_REGNO (def)))
18296 return true;
18297
18298 return false;
18299 }
18300
18301 /* Function checks if instruction INSN uses register number
18302 REGNO as a part of address expression. */
18303
18304 static bool
18305 insn_uses_reg_mem (unsigned int regno, rtx insn)
18306 {
18307 df_ref use;
18308
18309 FOR_EACH_INSN_USE (use, insn)
18310 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18311 return true;
18312
18313 return false;
18314 }
18315
18316 /* Search backward for non-agu definition of register number REGNO1
18317 or register number REGNO2 in basic block starting from instruction
18318 START up to head of basic block or instruction INSN.
18319
18320 Function puts true value into *FOUND var if definition was found
18321 and false otherwise.
18322
18323 Distance in half-cycles between START and found instruction or head
18324 of BB is added to DISTANCE and returned. */
18325
18326 static int
18327 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18328 rtx_insn *insn, int distance,
18329 rtx_insn *start, bool *found)
18330 {
18331 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18332 rtx_insn *prev = start;
18333 rtx_insn *next = NULL;
18334
18335 *found = false;
18336
18337 while (prev
18338 && prev != insn
18339 && distance < LEA_SEARCH_THRESHOLD)
18340 {
18341 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18342 {
18343 distance = increase_distance (prev, next, distance);
18344 if (insn_defines_reg (regno1, regno2, prev))
18345 {
18346 if (recog_memoized (prev) < 0
18347 || get_attr_type (prev) != TYPE_LEA)
18348 {
18349 *found = true;
18350 return distance;
18351 }
18352 }
18353
18354 next = prev;
18355 }
18356 if (prev == BB_HEAD (bb))
18357 break;
18358
18359 prev = PREV_INSN (prev);
18360 }
18361
18362 return distance;
18363 }
18364
18365 /* Search backward for non-agu definition of register number REGNO1
18366 or register number REGNO2 in INSN's basic block until
18367 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18368 2. Reach neighbour BBs boundary, or
18369 3. Reach agu definition.
18370 Returns the distance between the non-agu definition point and INSN.
18371 If no definition point, returns -1. */
18372
18373 static int
18374 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18375 rtx_insn *insn)
18376 {
18377 basic_block bb = BLOCK_FOR_INSN (insn);
18378 int distance = 0;
18379 bool found = false;
18380
18381 if (insn != BB_HEAD (bb))
18382 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18383 distance, PREV_INSN (insn),
18384 &found);
18385
18386 if (!found && distance < LEA_SEARCH_THRESHOLD)
18387 {
18388 edge e;
18389 edge_iterator ei;
18390 bool simple_loop = false;
18391
18392 FOR_EACH_EDGE (e, ei, bb->preds)
18393 if (e->src == bb)
18394 {
18395 simple_loop = true;
18396 break;
18397 }
18398
18399 if (simple_loop)
18400 distance = distance_non_agu_define_in_bb (regno1, regno2,
18401 insn, distance,
18402 BB_END (bb), &found);
18403 else
18404 {
18405 int shortest_dist = -1;
18406 bool found_in_bb = false;
18407
18408 FOR_EACH_EDGE (e, ei, bb->preds)
18409 {
18410 int bb_dist
18411 = distance_non_agu_define_in_bb (regno1, regno2,
18412 insn, distance,
18413 BB_END (e->src),
18414 &found_in_bb);
18415 if (found_in_bb)
18416 {
18417 if (shortest_dist < 0)
18418 shortest_dist = bb_dist;
18419 else if (bb_dist > 0)
18420 shortest_dist = MIN (bb_dist, shortest_dist);
18421
18422 found = true;
18423 }
18424 }
18425
18426 distance = shortest_dist;
18427 }
18428 }
18429
18430 /* get_attr_type may modify recog data. We want to make sure
18431 that recog data is valid for instruction INSN, on which
18432 distance_non_agu_define is called. INSN is unchanged here. */
18433 extract_insn_cached (insn);
18434
18435 if (!found)
18436 return -1;
18437
18438 return distance >> 1;
18439 }
18440
18441 /* Return the distance in half-cycles between INSN and the next
18442 insn that uses register number REGNO in memory address added
18443 to DISTANCE. Return -1 if REGNO0 is set.
18444
18445 Put true value into *FOUND if register usage was found and
18446 false otherwise.
18447 Put true value into *REDEFINED if register redefinition was
18448 found and false otherwise. */
18449
18450 static int
18451 distance_agu_use_in_bb (unsigned int regno,
18452 rtx_insn *insn, int distance, rtx_insn *start,
18453 bool *found, bool *redefined)
18454 {
18455 basic_block bb = NULL;
18456 rtx_insn *next = start;
18457 rtx_insn *prev = NULL;
18458
18459 *found = false;
18460 *redefined = false;
18461
18462 if (start != NULL_RTX)
18463 {
18464 bb = BLOCK_FOR_INSN (start);
18465 if (start != BB_HEAD (bb))
18466 /* If insn and start belong to the same bb, set prev to insn,
18467 so the call to increase_distance will increase the distance
18468 between insns by 1. */
18469 prev = insn;
18470 }
18471
18472 while (next
18473 && next != insn
18474 && distance < LEA_SEARCH_THRESHOLD)
18475 {
18476 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18477 {
18478 distance = increase_distance(prev, next, distance);
18479 if (insn_uses_reg_mem (regno, next))
18480 {
18481 /* Return DISTANCE if OP0 is used in memory
18482 address in NEXT. */
18483 *found = true;
18484 return distance;
18485 }
18486
18487 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18488 {
18489 /* Return -1 if OP0 is set in NEXT. */
18490 *redefined = true;
18491 return -1;
18492 }
18493
18494 prev = next;
18495 }
18496
18497 if (next == BB_END (bb))
18498 break;
18499
18500 next = NEXT_INSN (next);
18501 }
18502
18503 return distance;
18504 }
18505
18506 /* Return the distance between INSN and the next insn that uses
18507 register number REGNO0 in memory address. Return -1 if no such
18508 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18509
18510 static int
18511 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18512 {
18513 basic_block bb = BLOCK_FOR_INSN (insn);
18514 int distance = 0;
18515 bool found = false;
18516 bool redefined = false;
18517
18518 if (insn != BB_END (bb))
18519 distance = distance_agu_use_in_bb (regno0, insn, distance,
18520 NEXT_INSN (insn),
18521 &found, &redefined);
18522
18523 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18524 {
18525 edge e;
18526 edge_iterator ei;
18527 bool simple_loop = false;
18528
18529 FOR_EACH_EDGE (e, ei, bb->succs)
18530 if (e->dest == bb)
18531 {
18532 simple_loop = true;
18533 break;
18534 }
18535
18536 if (simple_loop)
18537 distance = distance_agu_use_in_bb (regno0, insn,
18538 distance, BB_HEAD (bb),
18539 &found, &redefined);
18540 else
18541 {
18542 int shortest_dist = -1;
18543 bool found_in_bb = false;
18544 bool redefined_in_bb = false;
18545
18546 FOR_EACH_EDGE (e, ei, bb->succs)
18547 {
18548 int bb_dist
18549 = distance_agu_use_in_bb (regno0, insn,
18550 distance, BB_HEAD (e->dest),
18551 &found_in_bb, &redefined_in_bb);
18552 if (found_in_bb)
18553 {
18554 if (shortest_dist < 0)
18555 shortest_dist = bb_dist;
18556 else if (bb_dist > 0)
18557 shortest_dist = MIN (bb_dist, shortest_dist);
18558
18559 found = true;
18560 }
18561 }
18562
18563 distance = shortest_dist;
18564 }
18565 }
18566
18567 if (!found || redefined)
18568 return -1;
18569
18570 return distance >> 1;
18571 }
18572
18573 /* Define this macro to tune LEA priority vs ADD, it take effect when
18574 there is a dilemma of choicing LEA or ADD
18575 Negative value: ADD is more preferred than LEA
18576 Zero: Netrual
18577 Positive value: LEA is more preferred than ADD*/
18578 #define IX86_LEA_PRIORITY 0
18579
18580 /* Return true if usage of lea INSN has performance advantage
18581 over a sequence of instructions. Instructions sequence has
18582 SPLIT_COST cycles higher latency than lea latency. */
18583
18584 static bool
18585 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18586 unsigned int regno2, int split_cost, bool has_scale)
18587 {
18588 int dist_define, dist_use;
18589
18590 /* For Silvermont if using a 2-source or 3-source LEA for
18591 non-destructive destination purposes, or due to wanting
18592 ability to use SCALE, the use of LEA is justified. */
18593 if (TARGET_SILVERMONT || TARGET_INTEL)
18594 {
18595 if (has_scale)
18596 return true;
18597 if (split_cost < 1)
18598 return false;
18599 if (regno0 == regno1 || regno0 == regno2)
18600 return false;
18601 return true;
18602 }
18603
18604 dist_define = distance_non_agu_define (regno1, regno2, insn);
18605 dist_use = distance_agu_use (regno0, insn);
18606
18607 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18608 {
18609 /* If there is no non AGU operand definition, no AGU
18610 operand usage and split cost is 0 then both lea
18611 and non lea variants have same priority. Currently
18612 we prefer lea for 64 bit code and non lea on 32 bit
18613 code. */
18614 if (dist_use < 0 && split_cost == 0)
18615 return TARGET_64BIT || IX86_LEA_PRIORITY;
18616 else
18617 return true;
18618 }
18619
18620 /* With longer definitions distance lea is more preferable.
18621 Here we change it to take into account splitting cost and
18622 lea priority. */
18623 dist_define += split_cost + IX86_LEA_PRIORITY;
18624
18625 /* If there is no use in memory addess then we just check
18626 that split cost exceeds AGU stall. */
18627 if (dist_use < 0)
18628 return dist_define > LEA_MAX_STALL;
18629
18630 /* If this insn has both backward non-agu dependence and forward
18631 agu dependence, the one with short distance takes effect. */
18632 return dist_define >= dist_use;
18633 }
18634
18635 /* Return true if it is legal to clobber flags by INSN and
18636 false otherwise. */
18637
18638 static bool
18639 ix86_ok_to_clobber_flags (rtx_insn *insn)
18640 {
18641 basic_block bb = BLOCK_FOR_INSN (insn);
18642 df_ref use;
18643 bitmap live;
18644
18645 while (insn)
18646 {
18647 if (NONDEBUG_INSN_P (insn))
18648 {
18649 FOR_EACH_INSN_USE (use, insn)
18650 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18651 return false;
18652
18653 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18654 return true;
18655 }
18656
18657 if (insn == BB_END (bb))
18658 break;
18659
18660 insn = NEXT_INSN (insn);
18661 }
18662
18663 live = df_get_live_out(bb);
18664 return !REGNO_REG_SET_P (live, FLAGS_REG);
18665 }
18666
18667 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18668 move and add to avoid AGU stalls. */
18669
18670 bool
18671 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18672 {
18673 unsigned int regno0, regno1, regno2;
18674
18675 /* Check if we need to optimize. */
18676 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18677 return false;
18678
18679 /* Check it is correct to split here. */
18680 if (!ix86_ok_to_clobber_flags(insn))
18681 return false;
18682
18683 regno0 = true_regnum (operands[0]);
18684 regno1 = true_regnum (operands[1]);
18685 regno2 = true_regnum (operands[2]);
18686
18687 /* We need to split only adds with non destructive
18688 destination operand. */
18689 if (regno0 == regno1 || regno0 == regno2)
18690 return false;
18691 else
18692 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18693 }
18694
18695 /* Return true if we should emit lea instruction instead of mov
18696 instruction. */
18697
18698 bool
18699 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18700 {
18701 unsigned int regno0, regno1;
18702
18703 /* Check if we need to optimize. */
18704 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18705 return false;
18706
18707 /* Use lea for reg to reg moves only. */
18708 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18709 return false;
18710
18711 regno0 = true_regnum (operands[0]);
18712 regno1 = true_regnum (operands[1]);
18713
18714 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18715 }
18716
18717 /* Return true if we need to split lea into a sequence of
18718 instructions to avoid AGU stalls. */
18719
18720 bool
18721 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18722 {
18723 unsigned int regno0, regno1, regno2;
18724 int split_cost;
18725 struct ix86_address parts;
18726 int ok;
18727
18728 /* Check we need to optimize. */
18729 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18730 return false;
18731
18732 /* The "at least two components" test below might not catch simple
18733 move or zero extension insns if parts.base is non-NULL and parts.disp
18734 is const0_rtx as the only components in the address, e.g. if the
18735 register is %rbp or %r13. As this test is much cheaper and moves or
18736 zero extensions are the common case, do this check first. */
18737 if (REG_P (operands[1])
18738 || (SImode_address_operand (operands[1], VOIDmode)
18739 && REG_P (XEXP (operands[1], 0))))
18740 return false;
18741
18742 /* Check if it is OK to split here. */
18743 if (!ix86_ok_to_clobber_flags (insn))
18744 return false;
18745
18746 ok = ix86_decompose_address (operands[1], &parts);
18747 gcc_assert (ok);
18748
18749 /* There should be at least two components in the address. */
18750 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18751 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18752 return false;
18753
18754 /* We should not split into add if non legitimate pic
18755 operand is used as displacement. */
18756 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18757 return false;
18758
18759 regno0 = true_regnum (operands[0]) ;
18760 regno1 = INVALID_REGNUM;
18761 regno2 = INVALID_REGNUM;
18762
18763 if (parts.base)
18764 regno1 = true_regnum (parts.base);
18765 if (parts.index)
18766 regno2 = true_regnum (parts.index);
18767
18768 split_cost = 0;
18769
18770 /* Compute how many cycles we will add to execution time
18771 if split lea into a sequence of instructions. */
18772 if (parts.base || parts.index)
18773 {
18774 /* Have to use mov instruction if non desctructive
18775 destination form is used. */
18776 if (regno1 != regno0 && regno2 != regno0)
18777 split_cost += 1;
18778
18779 /* Have to add index to base if both exist. */
18780 if (parts.base && parts.index)
18781 split_cost += 1;
18782
18783 /* Have to use shift and adds if scale is 2 or greater. */
18784 if (parts.scale > 1)
18785 {
18786 if (regno0 != regno1)
18787 split_cost += 1;
18788 else if (regno2 == regno0)
18789 split_cost += 4;
18790 else
18791 split_cost += parts.scale;
18792 }
18793
18794 /* Have to use add instruction with immediate if
18795 disp is non zero. */
18796 if (parts.disp && parts.disp != const0_rtx)
18797 split_cost += 1;
18798
18799 /* Subtract the price of lea. */
18800 split_cost -= 1;
18801 }
18802
18803 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18804 parts.scale > 1);
18805 }
18806
18807 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18808 matches destination. RTX includes clobber of FLAGS_REG. */
18809
18810 static void
18811 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18812 rtx dst, rtx src)
18813 {
18814 rtx op, clob;
18815
18816 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18817 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18818
18819 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18820 }
18821
18822 /* Return true if regno1 def is nearest to the insn. */
18823
18824 static bool
18825 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18826 {
18827 rtx_insn *prev = insn;
18828 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18829
18830 if (insn == start)
18831 return false;
18832 while (prev && prev != start)
18833 {
18834 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18835 {
18836 prev = PREV_INSN (prev);
18837 continue;
18838 }
18839 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18840 return true;
18841 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18842 return false;
18843 prev = PREV_INSN (prev);
18844 }
18845
18846 /* None of the regs is defined in the bb. */
18847 return false;
18848 }
18849
18850 /* Split lea instructions into a sequence of instructions
18851 which are executed on ALU to avoid AGU stalls.
18852 It is assumed that it is allowed to clobber flags register
18853 at lea position. */
18854
18855 void
18856 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18857 {
18858 unsigned int regno0, regno1, regno2;
18859 struct ix86_address parts;
18860 rtx target, tmp;
18861 int ok, adds;
18862
18863 ok = ix86_decompose_address (operands[1], &parts);
18864 gcc_assert (ok);
18865
18866 target = gen_lowpart (mode, operands[0]);
18867
18868 regno0 = true_regnum (target);
18869 regno1 = INVALID_REGNUM;
18870 regno2 = INVALID_REGNUM;
18871
18872 if (parts.base)
18873 {
18874 parts.base = gen_lowpart (mode, parts.base);
18875 regno1 = true_regnum (parts.base);
18876 }
18877
18878 if (parts.index)
18879 {
18880 parts.index = gen_lowpart (mode, parts.index);
18881 regno2 = true_regnum (parts.index);
18882 }
18883
18884 if (parts.disp)
18885 parts.disp = gen_lowpart (mode, parts.disp);
18886
18887 if (parts.scale > 1)
18888 {
18889 /* Case r1 = r1 + ... */
18890 if (regno1 == regno0)
18891 {
18892 /* If we have a case r1 = r1 + C * r2 then we
18893 should use multiplication which is very
18894 expensive. Assume cost model is wrong if we
18895 have such case here. */
18896 gcc_assert (regno2 != regno0);
18897
18898 for (adds = parts.scale; adds > 0; adds--)
18899 ix86_emit_binop (PLUS, mode, target, parts.index);
18900 }
18901 else
18902 {
18903 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18904 if (regno0 != regno2)
18905 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18906
18907 /* Use shift for scaling. */
18908 ix86_emit_binop (ASHIFT, mode, target,
18909 GEN_INT (exact_log2 (parts.scale)));
18910
18911 if (parts.base)
18912 ix86_emit_binop (PLUS, mode, target, parts.base);
18913
18914 if (parts.disp && parts.disp != const0_rtx)
18915 ix86_emit_binop (PLUS, mode, target, parts.disp);
18916 }
18917 }
18918 else if (!parts.base && !parts.index)
18919 {
18920 gcc_assert(parts.disp);
18921 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18922 }
18923 else
18924 {
18925 if (!parts.base)
18926 {
18927 if (regno0 != regno2)
18928 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18929 }
18930 else if (!parts.index)
18931 {
18932 if (regno0 != regno1)
18933 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18934 }
18935 else
18936 {
18937 if (regno0 == regno1)
18938 tmp = parts.index;
18939 else if (regno0 == regno2)
18940 tmp = parts.base;
18941 else
18942 {
18943 rtx tmp1;
18944
18945 /* Find better operand for SET instruction, depending
18946 on which definition is farther from the insn. */
18947 if (find_nearest_reg_def (insn, regno1, regno2))
18948 tmp = parts.index, tmp1 = parts.base;
18949 else
18950 tmp = parts.base, tmp1 = parts.index;
18951
18952 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18953
18954 if (parts.disp && parts.disp != const0_rtx)
18955 ix86_emit_binop (PLUS, mode, target, parts.disp);
18956
18957 ix86_emit_binop (PLUS, mode, target, tmp1);
18958 return;
18959 }
18960
18961 ix86_emit_binop (PLUS, mode, target, tmp);
18962 }
18963
18964 if (parts.disp && parts.disp != const0_rtx)
18965 ix86_emit_binop (PLUS, mode, target, parts.disp);
18966 }
18967 }
18968
18969 /* Return true if it is ok to optimize an ADD operation to LEA
18970 operation to avoid flag register consumation. For most processors,
18971 ADD is faster than LEA. For the processors like BONNELL, if the
18972 destination register of LEA holds an actual address which will be
18973 used soon, LEA is better and otherwise ADD is better. */
18974
18975 bool
18976 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18977 {
18978 unsigned int regno0 = true_regnum (operands[0]);
18979 unsigned int regno1 = true_regnum (operands[1]);
18980 unsigned int regno2 = true_regnum (operands[2]);
18981
18982 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18983 if (regno0 != regno1 && regno0 != regno2)
18984 return true;
18985
18986 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18987 return false;
18988
18989 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18990 }
18991
18992 /* Return true if destination reg of SET_BODY is shift count of
18993 USE_BODY. */
18994
18995 static bool
18996 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18997 {
18998 rtx set_dest;
18999 rtx shift_rtx;
19000 int i;
19001
19002 /* Retrieve destination of SET_BODY. */
19003 switch (GET_CODE (set_body))
19004 {
19005 case SET:
19006 set_dest = SET_DEST (set_body);
19007 if (!set_dest || !REG_P (set_dest))
19008 return false;
19009 break;
19010 case PARALLEL:
19011 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19012 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19013 use_body))
19014 return true;
19015 default:
19016 return false;
19017 break;
19018 }
19019
19020 /* Retrieve shift count of USE_BODY. */
19021 switch (GET_CODE (use_body))
19022 {
19023 case SET:
19024 shift_rtx = XEXP (use_body, 1);
19025 break;
19026 case PARALLEL:
19027 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19028 if (ix86_dep_by_shift_count_body (set_body,
19029 XVECEXP (use_body, 0, i)))
19030 return true;
19031 default:
19032 return false;
19033 break;
19034 }
19035
19036 if (shift_rtx
19037 && (GET_CODE (shift_rtx) == ASHIFT
19038 || GET_CODE (shift_rtx) == LSHIFTRT
19039 || GET_CODE (shift_rtx) == ASHIFTRT
19040 || GET_CODE (shift_rtx) == ROTATE
19041 || GET_CODE (shift_rtx) == ROTATERT))
19042 {
19043 rtx shift_count = XEXP (shift_rtx, 1);
19044
19045 /* Return true if shift count is dest of SET_BODY. */
19046 if (REG_P (shift_count))
19047 {
19048 /* Add check since it can be invoked before register
19049 allocation in pre-reload schedule. */
19050 if (reload_completed
19051 && true_regnum (set_dest) == true_regnum (shift_count))
19052 return true;
19053 else if (REGNO(set_dest) == REGNO(shift_count))
19054 return true;
19055 }
19056 }
19057
19058 return false;
19059 }
19060
19061 /* Return true if destination reg of SET_INSN is shift count of
19062 USE_INSN. */
19063
19064 bool
19065 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19066 {
19067 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19068 PATTERN (use_insn));
19069 }
19070
19071 /* Return TRUE or FALSE depending on whether the unary operator meets the
19072 appropriate constraints. */
19073
19074 bool
19075 ix86_unary_operator_ok (enum rtx_code,
19076 machine_mode,
19077 rtx operands[2])
19078 {
19079 /* If one of operands is memory, source and destination must match. */
19080 if ((MEM_P (operands[0])
19081 || MEM_P (operands[1]))
19082 && ! rtx_equal_p (operands[0], operands[1]))
19083 return false;
19084 return true;
19085 }
19086
19087 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19088 are ok, keeping in mind the possible movddup alternative. */
19089
19090 bool
19091 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19092 {
19093 if (MEM_P (operands[0]))
19094 return rtx_equal_p (operands[0], operands[1 + high]);
19095 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19096 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19097 return true;
19098 }
19099
19100 /* Post-reload splitter for converting an SF or DFmode value in an
19101 SSE register into an unsigned SImode. */
19102
19103 void
19104 ix86_split_convert_uns_si_sse (rtx operands[])
19105 {
19106 machine_mode vecmode;
19107 rtx value, large, zero_or_two31, input, two31, x;
19108
19109 large = operands[1];
19110 zero_or_two31 = operands[2];
19111 input = operands[3];
19112 two31 = operands[4];
19113 vecmode = GET_MODE (large);
19114 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19115
19116 /* Load up the value into the low element. We must ensure that the other
19117 elements are valid floats -- zero is the easiest such value. */
19118 if (MEM_P (input))
19119 {
19120 if (vecmode == V4SFmode)
19121 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19122 else
19123 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19124 }
19125 else
19126 {
19127 input = gen_rtx_REG (vecmode, REGNO (input));
19128 emit_move_insn (value, CONST0_RTX (vecmode));
19129 if (vecmode == V4SFmode)
19130 emit_insn (gen_sse_movss (value, value, input));
19131 else
19132 emit_insn (gen_sse2_movsd (value, value, input));
19133 }
19134
19135 emit_move_insn (large, two31);
19136 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19137
19138 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19139 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19140
19141 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19142 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19143
19144 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19145 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19146
19147 large = gen_rtx_REG (V4SImode, REGNO (large));
19148 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19149
19150 x = gen_rtx_REG (V4SImode, REGNO (value));
19151 if (vecmode == V4SFmode)
19152 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19153 else
19154 emit_insn (gen_sse2_cvttpd2dq (x, value));
19155 value = x;
19156
19157 emit_insn (gen_xorv4si3 (value, value, large));
19158 }
19159
19160 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19161 Expects the 64-bit DImode to be supplied in a pair of integral
19162 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19163 -mfpmath=sse, !optimize_size only. */
19164
19165 void
19166 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19167 {
19168 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19169 rtx int_xmm, fp_xmm;
19170 rtx biases, exponents;
19171 rtx x;
19172
19173 int_xmm = gen_reg_rtx (V4SImode);
19174 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19175 emit_insn (gen_movdi_to_sse (int_xmm, input));
19176 else if (TARGET_SSE_SPLIT_REGS)
19177 {
19178 emit_clobber (int_xmm);
19179 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19180 }
19181 else
19182 {
19183 x = gen_reg_rtx (V2DImode);
19184 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19185 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19186 }
19187
19188 x = gen_rtx_CONST_VECTOR (V4SImode,
19189 gen_rtvec (4, GEN_INT (0x43300000UL),
19190 GEN_INT (0x45300000UL),
19191 const0_rtx, const0_rtx));
19192 exponents = validize_mem (force_const_mem (V4SImode, x));
19193
19194 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19195 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19196
19197 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19198 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19199 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19200 (0x1.0p84 + double(fp_value_hi_xmm)).
19201 Note these exponents differ by 32. */
19202
19203 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19204
19205 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19206 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19207 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19208 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19209 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19210 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19211 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19212 biases = validize_mem (force_const_mem (V2DFmode, biases));
19213 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19214
19215 /* Add the upper and lower DFmode values together. */
19216 if (TARGET_SSE3)
19217 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19218 else
19219 {
19220 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19221 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19222 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19223 }
19224
19225 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19226 }
19227
19228 /* Not used, but eases macroization of patterns. */
19229 void
19230 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19231 {
19232 gcc_unreachable ();
19233 }
19234
19235 /* Convert an unsigned SImode value into a DFmode. Only currently used
19236 for SSE, but applicable anywhere. */
19237
19238 void
19239 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19240 {
19241 REAL_VALUE_TYPE TWO31r;
19242 rtx x, fp;
19243
19244 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19245 NULL, 1, OPTAB_DIRECT);
19246
19247 fp = gen_reg_rtx (DFmode);
19248 emit_insn (gen_floatsidf2 (fp, x));
19249
19250 real_ldexp (&TWO31r, &dconst1, 31);
19251 x = const_double_from_real_value (TWO31r, DFmode);
19252
19253 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19254 if (x != target)
19255 emit_move_insn (target, x);
19256 }
19257
19258 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19259 32-bit mode; otherwise we have a direct convert instruction. */
19260
19261 void
19262 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19263 {
19264 REAL_VALUE_TYPE TWO32r;
19265 rtx fp_lo, fp_hi, x;
19266
19267 fp_lo = gen_reg_rtx (DFmode);
19268 fp_hi = gen_reg_rtx (DFmode);
19269
19270 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19271
19272 real_ldexp (&TWO32r, &dconst1, 32);
19273 x = const_double_from_real_value (TWO32r, DFmode);
19274 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19275
19276 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19277
19278 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19279 0, OPTAB_DIRECT);
19280 if (x != target)
19281 emit_move_insn (target, x);
19282 }
19283
19284 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19285 For x86_32, -mfpmath=sse, !optimize_size only. */
19286 void
19287 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19288 {
19289 REAL_VALUE_TYPE ONE16r;
19290 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19291
19292 real_ldexp (&ONE16r, &dconst1, 16);
19293 x = const_double_from_real_value (ONE16r, SFmode);
19294 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19295 NULL, 0, OPTAB_DIRECT);
19296 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19297 NULL, 0, OPTAB_DIRECT);
19298 fp_hi = gen_reg_rtx (SFmode);
19299 fp_lo = gen_reg_rtx (SFmode);
19300 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19301 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19302 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19303 0, OPTAB_DIRECT);
19304 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19305 0, OPTAB_DIRECT);
19306 if (!rtx_equal_p (target, fp_hi))
19307 emit_move_insn (target, fp_hi);
19308 }
19309
19310 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19311 a vector of unsigned ints VAL to vector of floats TARGET. */
19312
19313 void
19314 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19315 {
19316 rtx tmp[8];
19317 REAL_VALUE_TYPE TWO16r;
19318 machine_mode intmode = GET_MODE (val);
19319 machine_mode fltmode = GET_MODE (target);
19320 rtx (*cvt) (rtx, rtx);
19321
19322 if (intmode == V4SImode)
19323 cvt = gen_floatv4siv4sf2;
19324 else
19325 cvt = gen_floatv8siv8sf2;
19326 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19327 tmp[0] = force_reg (intmode, tmp[0]);
19328 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19329 OPTAB_DIRECT);
19330 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19331 NULL_RTX, 1, OPTAB_DIRECT);
19332 tmp[3] = gen_reg_rtx (fltmode);
19333 emit_insn (cvt (tmp[3], tmp[1]));
19334 tmp[4] = gen_reg_rtx (fltmode);
19335 emit_insn (cvt (tmp[4], tmp[2]));
19336 real_ldexp (&TWO16r, &dconst1, 16);
19337 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19338 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19339 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19340 OPTAB_DIRECT);
19341 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19342 OPTAB_DIRECT);
19343 if (tmp[7] != target)
19344 emit_move_insn (target, tmp[7]);
19345 }
19346
19347 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19348 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19349 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19350 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19351
19352 rtx
19353 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19354 {
19355 REAL_VALUE_TYPE TWO31r;
19356 rtx two31r, tmp[4];
19357 machine_mode mode = GET_MODE (val);
19358 machine_mode scalarmode = GET_MODE_INNER (mode);
19359 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19360 rtx (*cmp) (rtx, rtx, rtx, rtx);
19361 int i;
19362
19363 for (i = 0; i < 3; i++)
19364 tmp[i] = gen_reg_rtx (mode);
19365 real_ldexp (&TWO31r, &dconst1, 31);
19366 two31r = const_double_from_real_value (TWO31r, scalarmode);
19367 two31r = ix86_build_const_vector (mode, 1, two31r);
19368 two31r = force_reg (mode, two31r);
19369 switch (mode)
19370 {
19371 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19372 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19373 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19374 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19375 default: gcc_unreachable ();
19376 }
19377 tmp[3] = gen_rtx_LE (mode, two31r, val);
19378 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19379 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19380 0, OPTAB_DIRECT);
19381 if (intmode == V4SImode || TARGET_AVX2)
19382 *xorp = expand_simple_binop (intmode, ASHIFT,
19383 gen_lowpart (intmode, tmp[0]),
19384 GEN_INT (31), NULL_RTX, 0,
19385 OPTAB_DIRECT);
19386 else
19387 {
19388 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19389 two31 = ix86_build_const_vector (intmode, 1, two31);
19390 *xorp = expand_simple_binop (intmode, AND,
19391 gen_lowpart (intmode, tmp[0]),
19392 two31, NULL_RTX, 0,
19393 OPTAB_DIRECT);
19394 }
19395 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19396 0, OPTAB_DIRECT);
19397 }
19398
19399 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19400 then replicate the value for all elements of the vector
19401 register. */
19402
19403 rtx
19404 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19405 {
19406 int i, n_elt;
19407 rtvec v;
19408 machine_mode scalar_mode;
19409
19410 switch (mode)
19411 {
19412 case V64QImode:
19413 case V32QImode:
19414 case V16QImode:
19415 case V32HImode:
19416 case V16HImode:
19417 case V8HImode:
19418 case V16SImode:
19419 case V8SImode:
19420 case V4SImode:
19421 case V8DImode:
19422 case V4DImode:
19423 case V2DImode:
19424 gcc_assert (vect);
19425 case V16SFmode:
19426 case V8SFmode:
19427 case V4SFmode:
19428 case V8DFmode:
19429 case V4DFmode:
19430 case V2DFmode:
19431 n_elt = GET_MODE_NUNITS (mode);
19432 v = rtvec_alloc (n_elt);
19433 scalar_mode = GET_MODE_INNER (mode);
19434
19435 RTVEC_ELT (v, 0) = value;
19436
19437 for (i = 1; i < n_elt; ++i)
19438 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19439
19440 return gen_rtx_CONST_VECTOR (mode, v);
19441
19442 default:
19443 gcc_unreachable ();
19444 }
19445 }
19446
19447 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19448 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19449 for an SSE register. If VECT is true, then replicate the mask for
19450 all elements of the vector register. If INVERT is true, then create
19451 a mask excluding the sign bit. */
19452
19453 rtx
19454 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19455 {
19456 machine_mode vec_mode, imode;
19457 HOST_WIDE_INT hi, lo;
19458 int shift = 63;
19459 rtx v;
19460 rtx mask;
19461
19462 /* Find the sign bit, sign extended to 2*HWI. */
19463 switch (mode)
19464 {
19465 case V16SImode:
19466 case V16SFmode:
19467 case V8SImode:
19468 case V4SImode:
19469 case V8SFmode:
19470 case V4SFmode:
19471 vec_mode = mode;
19472 mode = GET_MODE_INNER (mode);
19473 imode = SImode;
19474 lo = 0x80000000, hi = lo < 0;
19475 break;
19476
19477 case V8DImode:
19478 case V4DImode:
19479 case V2DImode:
19480 case V8DFmode:
19481 case V4DFmode:
19482 case V2DFmode:
19483 vec_mode = mode;
19484 mode = GET_MODE_INNER (mode);
19485 imode = DImode;
19486 if (HOST_BITS_PER_WIDE_INT >= 64)
19487 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19488 else
19489 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19490 break;
19491
19492 case TImode:
19493 case TFmode:
19494 vec_mode = VOIDmode;
19495 if (HOST_BITS_PER_WIDE_INT >= 64)
19496 {
19497 imode = TImode;
19498 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19499 }
19500 else
19501 {
19502 rtvec vec;
19503
19504 imode = DImode;
19505 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19506
19507 if (invert)
19508 {
19509 lo = ~lo, hi = ~hi;
19510 v = constm1_rtx;
19511 }
19512 else
19513 v = const0_rtx;
19514
19515 mask = immed_double_const (lo, hi, imode);
19516
19517 vec = gen_rtvec (2, v, mask);
19518 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19519 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19520
19521 return v;
19522 }
19523 break;
19524
19525 default:
19526 gcc_unreachable ();
19527 }
19528
19529 if (invert)
19530 lo = ~lo, hi = ~hi;
19531
19532 /* Force this value into the low part of a fp vector constant. */
19533 mask = immed_double_const (lo, hi, imode);
19534 mask = gen_lowpart (mode, mask);
19535
19536 if (vec_mode == VOIDmode)
19537 return force_reg (mode, mask);
19538
19539 v = ix86_build_const_vector (vec_mode, vect, mask);
19540 return force_reg (vec_mode, v);
19541 }
19542
19543 /* Generate code for floating point ABS or NEG. */
19544
19545 void
19546 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19547 rtx operands[])
19548 {
19549 rtx mask, set, dst, src;
19550 bool use_sse = false;
19551 bool vector_mode = VECTOR_MODE_P (mode);
19552 machine_mode vmode = mode;
19553
19554 if (vector_mode)
19555 use_sse = true;
19556 else if (mode == TFmode)
19557 use_sse = true;
19558 else if (TARGET_SSE_MATH)
19559 {
19560 use_sse = SSE_FLOAT_MODE_P (mode);
19561 if (mode == SFmode)
19562 vmode = V4SFmode;
19563 else if (mode == DFmode)
19564 vmode = V2DFmode;
19565 }
19566
19567 /* NEG and ABS performed with SSE use bitwise mask operations.
19568 Create the appropriate mask now. */
19569 if (use_sse)
19570 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19571 else
19572 mask = NULL_RTX;
19573
19574 dst = operands[0];
19575 src = operands[1];
19576
19577 set = gen_rtx_fmt_e (code, mode, src);
19578 set = gen_rtx_SET (VOIDmode, dst, set);
19579
19580 if (mask)
19581 {
19582 rtx use, clob;
19583 rtvec par;
19584
19585 use = gen_rtx_USE (VOIDmode, mask);
19586 if (vector_mode)
19587 par = gen_rtvec (2, set, use);
19588 else
19589 {
19590 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19591 par = gen_rtvec (3, set, use, clob);
19592 }
19593 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19594 }
19595 else
19596 emit_insn (set);
19597 }
19598
19599 /* Expand a copysign operation. Special case operand 0 being a constant. */
19600
19601 void
19602 ix86_expand_copysign (rtx operands[])
19603 {
19604 machine_mode mode, vmode;
19605 rtx dest, op0, op1, mask, nmask;
19606
19607 dest = operands[0];
19608 op0 = operands[1];
19609 op1 = operands[2];
19610
19611 mode = GET_MODE (dest);
19612
19613 if (mode == SFmode)
19614 vmode = V4SFmode;
19615 else if (mode == DFmode)
19616 vmode = V2DFmode;
19617 else
19618 vmode = mode;
19619
19620 if (GET_CODE (op0) == CONST_DOUBLE)
19621 {
19622 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19623
19624 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19625 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19626
19627 if (mode == SFmode || mode == DFmode)
19628 {
19629 if (op0 == CONST0_RTX (mode))
19630 op0 = CONST0_RTX (vmode);
19631 else
19632 {
19633 rtx v = ix86_build_const_vector (vmode, false, op0);
19634
19635 op0 = force_reg (vmode, v);
19636 }
19637 }
19638 else if (op0 != CONST0_RTX (mode))
19639 op0 = force_reg (mode, op0);
19640
19641 mask = ix86_build_signbit_mask (vmode, 0, 0);
19642
19643 if (mode == SFmode)
19644 copysign_insn = gen_copysignsf3_const;
19645 else if (mode == DFmode)
19646 copysign_insn = gen_copysigndf3_const;
19647 else
19648 copysign_insn = gen_copysigntf3_const;
19649
19650 emit_insn (copysign_insn (dest, op0, op1, mask));
19651 }
19652 else
19653 {
19654 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19655
19656 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19657 mask = ix86_build_signbit_mask (vmode, 0, 0);
19658
19659 if (mode == SFmode)
19660 copysign_insn = gen_copysignsf3_var;
19661 else if (mode == DFmode)
19662 copysign_insn = gen_copysigndf3_var;
19663 else
19664 copysign_insn = gen_copysigntf3_var;
19665
19666 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19667 }
19668 }
19669
19670 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19671 be a constant, and so has already been expanded into a vector constant. */
19672
19673 void
19674 ix86_split_copysign_const (rtx operands[])
19675 {
19676 machine_mode mode, vmode;
19677 rtx dest, op0, mask, x;
19678
19679 dest = operands[0];
19680 op0 = operands[1];
19681 mask = operands[3];
19682
19683 mode = GET_MODE (dest);
19684 vmode = GET_MODE (mask);
19685
19686 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19687 x = gen_rtx_AND (vmode, dest, mask);
19688 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19689
19690 if (op0 != CONST0_RTX (vmode))
19691 {
19692 x = gen_rtx_IOR (vmode, dest, op0);
19693 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19694 }
19695 }
19696
19697 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19698 so we have to do two masks. */
19699
19700 void
19701 ix86_split_copysign_var (rtx operands[])
19702 {
19703 machine_mode mode, vmode;
19704 rtx dest, scratch, op0, op1, mask, nmask, x;
19705
19706 dest = operands[0];
19707 scratch = operands[1];
19708 op0 = operands[2];
19709 op1 = operands[3];
19710 nmask = operands[4];
19711 mask = operands[5];
19712
19713 mode = GET_MODE (dest);
19714 vmode = GET_MODE (mask);
19715
19716 if (rtx_equal_p (op0, op1))
19717 {
19718 /* Shouldn't happen often (it's useless, obviously), but when it does
19719 we'd generate incorrect code if we continue below. */
19720 emit_move_insn (dest, op0);
19721 return;
19722 }
19723
19724 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19725 {
19726 gcc_assert (REGNO (op1) == REGNO (scratch));
19727
19728 x = gen_rtx_AND (vmode, scratch, mask);
19729 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19730
19731 dest = mask;
19732 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19733 x = gen_rtx_NOT (vmode, dest);
19734 x = gen_rtx_AND (vmode, x, op0);
19735 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19736 }
19737 else
19738 {
19739 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19740 {
19741 x = gen_rtx_AND (vmode, scratch, mask);
19742 }
19743 else /* alternative 2,4 */
19744 {
19745 gcc_assert (REGNO (mask) == REGNO (scratch));
19746 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19747 x = gen_rtx_AND (vmode, scratch, op1);
19748 }
19749 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19750
19751 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19752 {
19753 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19754 x = gen_rtx_AND (vmode, dest, nmask);
19755 }
19756 else /* alternative 3,4 */
19757 {
19758 gcc_assert (REGNO (nmask) == REGNO (dest));
19759 dest = nmask;
19760 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19761 x = gen_rtx_AND (vmode, dest, op0);
19762 }
19763 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19764 }
19765
19766 x = gen_rtx_IOR (vmode, dest, scratch);
19767 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19768 }
19769
19770 /* Return TRUE or FALSE depending on whether the first SET in INSN
19771 has source and destination with matching CC modes, and that the
19772 CC mode is at least as constrained as REQ_MODE. */
19773
19774 bool
19775 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19776 {
19777 rtx set;
19778 machine_mode set_mode;
19779
19780 set = PATTERN (insn);
19781 if (GET_CODE (set) == PARALLEL)
19782 set = XVECEXP (set, 0, 0);
19783 gcc_assert (GET_CODE (set) == SET);
19784 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19785
19786 set_mode = GET_MODE (SET_DEST (set));
19787 switch (set_mode)
19788 {
19789 case CCNOmode:
19790 if (req_mode != CCNOmode
19791 && (req_mode != CCmode
19792 || XEXP (SET_SRC (set), 1) != const0_rtx))
19793 return false;
19794 break;
19795 case CCmode:
19796 if (req_mode == CCGCmode)
19797 return false;
19798 /* FALLTHRU */
19799 case CCGCmode:
19800 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19801 return false;
19802 /* FALLTHRU */
19803 case CCGOCmode:
19804 if (req_mode == CCZmode)
19805 return false;
19806 /* FALLTHRU */
19807 case CCZmode:
19808 break;
19809
19810 case CCAmode:
19811 case CCCmode:
19812 case CCOmode:
19813 case CCSmode:
19814 if (set_mode != req_mode)
19815 return false;
19816 break;
19817
19818 default:
19819 gcc_unreachable ();
19820 }
19821
19822 return GET_MODE (SET_SRC (set)) == set_mode;
19823 }
19824
19825 /* Generate insn patterns to do an integer compare of OPERANDS. */
19826
19827 static rtx
19828 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19829 {
19830 machine_mode cmpmode;
19831 rtx tmp, flags;
19832
19833 cmpmode = SELECT_CC_MODE (code, op0, op1);
19834 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19835
19836 /* This is very simple, but making the interface the same as in the
19837 FP case makes the rest of the code easier. */
19838 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19839 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19840
19841 /* Return the test that should be put into the flags user, i.e.
19842 the bcc, scc, or cmov instruction. */
19843 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19844 }
19845
19846 /* Figure out whether to use ordered or unordered fp comparisons.
19847 Return the appropriate mode to use. */
19848
19849 machine_mode
19850 ix86_fp_compare_mode (enum rtx_code)
19851 {
19852 /* ??? In order to make all comparisons reversible, we do all comparisons
19853 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19854 all forms trapping and nontrapping comparisons, we can make inequality
19855 comparisons trapping again, since it results in better code when using
19856 FCOM based compares. */
19857 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19858 }
19859
19860 machine_mode
19861 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19862 {
19863 machine_mode mode = GET_MODE (op0);
19864
19865 if (SCALAR_FLOAT_MODE_P (mode))
19866 {
19867 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19868 return ix86_fp_compare_mode (code);
19869 }
19870
19871 switch (code)
19872 {
19873 /* Only zero flag is needed. */
19874 case EQ: /* ZF=0 */
19875 case NE: /* ZF!=0 */
19876 return CCZmode;
19877 /* Codes needing carry flag. */
19878 case GEU: /* CF=0 */
19879 case LTU: /* CF=1 */
19880 /* Detect overflow checks. They need just the carry flag. */
19881 if (GET_CODE (op0) == PLUS
19882 && rtx_equal_p (op1, XEXP (op0, 0)))
19883 return CCCmode;
19884 else
19885 return CCmode;
19886 case GTU: /* CF=0 & ZF=0 */
19887 case LEU: /* CF=1 | ZF=1 */
19888 return CCmode;
19889 /* Codes possibly doable only with sign flag when
19890 comparing against zero. */
19891 case GE: /* SF=OF or SF=0 */
19892 case LT: /* SF<>OF or SF=1 */
19893 if (op1 == const0_rtx)
19894 return CCGOCmode;
19895 else
19896 /* For other cases Carry flag is not required. */
19897 return CCGCmode;
19898 /* Codes doable only with sign flag when comparing
19899 against zero, but we miss jump instruction for it
19900 so we need to use relational tests against overflow
19901 that thus needs to be zero. */
19902 case GT: /* ZF=0 & SF=OF */
19903 case LE: /* ZF=1 | SF<>OF */
19904 if (op1 == const0_rtx)
19905 return CCNOmode;
19906 else
19907 return CCGCmode;
19908 /* strcmp pattern do (use flags) and combine may ask us for proper
19909 mode. */
19910 case USE:
19911 return CCmode;
19912 default:
19913 gcc_unreachable ();
19914 }
19915 }
19916
19917 /* Return the fixed registers used for condition codes. */
19918
19919 static bool
19920 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19921 {
19922 *p1 = FLAGS_REG;
19923 *p2 = FPSR_REG;
19924 return true;
19925 }
19926
19927 /* If two condition code modes are compatible, return a condition code
19928 mode which is compatible with both. Otherwise, return
19929 VOIDmode. */
19930
19931 static machine_mode
19932 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19933 {
19934 if (m1 == m2)
19935 return m1;
19936
19937 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19938 return VOIDmode;
19939
19940 if ((m1 == CCGCmode && m2 == CCGOCmode)
19941 || (m1 == CCGOCmode && m2 == CCGCmode))
19942 return CCGCmode;
19943
19944 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19945 return m2;
19946 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19947 return m1;
19948
19949 switch (m1)
19950 {
19951 default:
19952 gcc_unreachable ();
19953
19954 case CCmode:
19955 case CCGCmode:
19956 case CCGOCmode:
19957 case CCNOmode:
19958 case CCAmode:
19959 case CCCmode:
19960 case CCOmode:
19961 case CCSmode:
19962 case CCZmode:
19963 switch (m2)
19964 {
19965 default:
19966 return VOIDmode;
19967
19968 case CCmode:
19969 case CCGCmode:
19970 case CCGOCmode:
19971 case CCNOmode:
19972 case CCAmode:
19973 case CCCmode:
19974 case CCOmode:
19975 case CCSmode:
19976 case CCZmode:
19977 return CCmode;
19978 }
19979
19980 case CCFPmode:
19981 case CCFPUmode:
19982 /* These are only compatible with themselves, which we already
19983 checked above. */
19984 return VOIDmode;
19985 }
19986 }
19987
19988
19989 /* Return a comparison we can do and that it is equivalent to
19990 swap_condition (code) apart possibly from orderedness.
19991 But, never change orderedness if TARGET_IEEE_FP, returning
19992 UNKNOWN in that case if necessary. */
19993
19994 static enum rtx_code
19995 ix86_fp_swap_condition (enum rtx_code code)
19996 {
19997 switch (code)
19998 {
19999 case GT: /* GTU - CF=0 & ZF=0 */
20000 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
20001 case GE: /* GEU - CF=0 */
20002 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20003 case UNLT: /* LTU - CF=1 */
20004 return TARGET_IEEE_FP ? UNKNOWN : GT;
20005 case UNLE: /* LEU - CF=1 | ZF=1 */
20006 return TARGET_IEEE_FP ? UNKNOWN : GE;
20007 default:
20008 return swap_condition (code);
20009 }
20010 }
20011
20012 /* Return cost of comparison CODE using the best strategy for performance.
20013 All following functions do use number of instructions as a cost metrics.
20014 In future this should be tweaked to compute bytes for optimize_size and
20015 take into account performance of various instructions on various CPUs. */
20016
20017 static int
20018 ix86_fp_comparison_cost (enum rtx_code code)
20019 {
20020 int arith_cost;
20021
20022 /* The cost of code using bit-twiddling on %ah. */
20023 switch (code)
20024 {
20025 case UNLE:
20026 case UNLT:
20027 case LTGT:
20028 case GT:
20029 case GE:
20030 case UNORDERED:
20031 case ORDERED:
20032 case UNEQ:
20033 arith_cost = 4;
20034 break;
20035 case LT:
20036 case NE:
20037 case EQ:
20038 case UNGE:
20039 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20040 break;
20041 case LE:
20042 case UNGT:
20043 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20044 break;
20045 default:
20046 gcc_unreachable ();
20047 }
20048
20049 switch (ix86_fp_comparison_strategy (code))
20050 {
20051 case IX86_FPCMP_COMI:
20052 return arith_cost > 4 ? 3 : 2;
20053 case IX86_FPCMP_SAHF:
20054 return arith_cost > 4 ? 4 : 3;
20055 default:
20056 return arith_cost;
20057 }
20058 }
20059
20060 /* Return strategy to use for floating-point. We assume that fcomi is always
20061 preferrable where available, since that is also true when looking at size
20062 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20063
20064 enum ix86_fpcmp_strategy
20065 ix86_fp_comparison_strategy (enum rtx_code)
20066 {
20067 /* Do fcomi/sahf based test when profitable. */
20068
20069 if (TARGET_CMOVE)
20070 return IX86_FPCMP_COMI;
20071
20072 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20073 return IX86_FPCMP_SAHF;
20074
20075 return IX86_FPCMP_ARITH;
20076 }
20077
20078 /* Swap, force into registers, or otherwise massage the two operands
20079 to a fp comparison. The operands are updated in place; the new
20080 comparison code is returned. */
20081
20082 static enum rtx_code
20083 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20084 {
20085 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20086 rtx op0 = *pop0, op1 = *pop1;
20087 machine_mode op_mode = GET_MODE (op0);
20088 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20089
20090 /* All of the unordered compare instructions only work on registers.
20091 The same is true of the fcomi compare instructions. The XFmode
20092 compare instructions require registers except when comparing
20093 against zero or when converting operand 1 from fixed point to
20094 floating point. */
20095
20096 if (!is_sse
20097 && (fpcmp_mode == CCFPUmode
20098 || (op_mode == XFmode
20099 && ! (standard_80387_constant_p (op0) == 1
20100 || standard_80387_constant_p (op1) == 1)
20101 && GET_CODE (op1) != FLOAT)
20102 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20103 {
20104 op0 = force_reg (op_mode, op0);
20105 op1 = force_reg (op_mode, op1);
20106 }
20107 else
20108 {
20109 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20110 things around if they appear profitable, otherwise force op0
20111 into a register. */
20112
20113 if (standard_80387_constant_p (op0) == 0
20114 || (MEM_P (op0)
20115 && ! (standard_80387_constant_p (op1) == 0
20116 || MEM_P (op1))))
20117 {
20118 enum rtx_code new_code = ix86_fp_swap_condition (code);
20119 if (new_code != UNKNOWN)
20120 {
20121 std::swap (op0, op1);
20122 code = new_code;
20123 }
20124 }
20125
20126 if (!REG_P (op0))
20127 op0 = force_reg (op_mode, op0);
20128
20129 if (CONSTANT_P (op1))
20130 {
20131 int tmp = standard_80387_constant_p (op1);
20132 if (tmp == 0)
20133 op1 = validize_mem (force_const_mem (op_mode, op1));
20134 else if (tmp == 1)
20135 {
20136 if (TARGET_CMOVE)
20137 op1 = force_reg (op_mode, op1);
20138 }
20139 else
20140 op1 = force_reg (op_mode, op1);
20141 }
20142 }
20143
20144 /* Try to rearrange the comparison to make it cheaper. */
20145 if (ix86_fp_comparison_cost (code)
20146 > ix86_fp_comparison_cost (swap_condition (code))
20147 && (REG_P (op1) || can_create_pseudo_p ()))
20148 {
20149 std::swap (op0, op1);
20150 code = swap_condition (code);
20151 if (!REG_P (op0))
20152 op0 = force_reg (op_mode, op0);
20153 }
20154
20155 *pop0 = op0;
20156 *pop1 = op1;
20157 return code;
20158 }
20159
20160 /* Convert comparison codes we use to represent FP comparison to integer
20161 code that will result in proper branch. Return UNKNOWN if no such code
20162 is available. */
20163
20164 enum rtx_code
20165 ix86_fp_compare_code_to_integer (enum rtx_code code)
20166 {
20167 switch (code)
20168 {
20169 case GT:
20170 return GTU;
20171 case GE:
20172 return GEU;
20173 case ORDERED:
20174 case UNORDERED:
20175 return code;
20176 break;
20177 case UNEQ:
20178 return EQ;
20179 break;
20180 case UNLT:
20181 return LTU;
20182 break;
20183 case UNLE:
20184 return LEU;
20185 break;
20186 case LTGT:
20187 return NE;
20188 break;
20189 default:
20190 return UNKNOWN;
20191 }
20192 }
20193
20194 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20195
20196 static rtx
20197 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20198 {
20199 machine_mode fpcmp_mode, intcmp_mode;
20200 rtx tmp, tmp2;
20201
20202 fpcmp_mode = ix86_fp_compare_mode (code);
20203 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20204
20205 /* Do fcomi/sahf based test when profitable. */
20206 switch (ix86_fp_comparison_strategy (code))
20207 {
20208 case IX86_FPCMP_COMI:
20209 intcmp_mode = fpcmp_mode;
20210 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20211 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20212 tmp);
20213 emit_insn (tmp);
20214 break;
20215
20216 case IX86_FPCMP_SAHF:
20217 intcmp_mode = fpcmp_mode;
20218 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20219 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20220 tmp);
20221
20222 if (!scratch)
20223 scratch = gen_reg_rtx (HImode);
20224 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20225 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20226 break;
20227
20228 case IX86_FPCMP_ARITH:
20229 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20230 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20231 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20232 if (!scratch)
20233 scratch = gen_reg_rtx (HImode);
20234 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20235
20236 /* In the unordered case, we have to check C2 for NaN's, which
20237 doesn't happen to work out to anything nice combination-wise.
20238 So do some bit twiddling on the value we've got in AH to come
20239 up with an appropriate set of condition codes. */
20240
20241 intcmp_mode = CCNOmode;
20242 switch (code)
20243 {
20244 case GT:
20245 case UNGT:
20246 if (code == GT || !TARGET_IEEE_FP)
20247 {
20248 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20249 code = EQ;
20250 }
20251 else
20252 {
20253 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20254 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20255 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20256 intcmp_mode = CCmode;
20257 code = GEU;
20258 }
20259 break;
20260 case LT:
20261 case UNLT:
20262 if (code == LT && TARGET_IEEE_FP)
20263 {
20264 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20265 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20266 intcmp_mode = CCmode;
20267 code = EQ;
20268 }
20269 else
20270 {
20271 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20272 code = NE;
20273 }
20274 break;
20275 case GE:
20276 case UNGE:
20277 if (code == GE || !TARGET_IEEE_FP)
20278 {
20279 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20280 code = EQ;
20281 }
20282 else
20283 {
20284 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20285 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20286 code = NE;
20287 }
20288 break;
20289 case LE:
20290 case UNLE:
20291 if (code == LE && TARGET_IEEE_FP)
20292 {
20293 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20294 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20295 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20296 intcmp_mode = CCmode;
20297 code = LTU;
20298 }
20299 else
20300 {
20301 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20302 code = NE;
20303 }
20304 break;
20305 case EQ:
20306 case UNEQ:
20307 if (code == EQ && TARGET_IEEE_FP)
20308 {
20309 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20310 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20311 intcmp_mode = CCmode;
20312 code = EQ;
20313 }
20314 else
20315 {
20316 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20317 code = NE;
20318 }
20319 break;
20320 case NE:
20321 case LTGT:
20322 if (code == NE && TARGET_IEEE_FP)
20323 {
20324 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20325 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20326 GEN_INT (0x40)));
20327 code = NE;
20328 }
20329 else
20330 {
20331 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20332 code = EQ;
20333 }
20334 break;
20335
20336 case UNORDERED:
20337 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20338 code = NE;
20339 break;
20340 case ORDERED:
20341 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20342 code = EQ;
20343 break;
20344
20345 default:
20346 gcc_unreachable ();
20347 }
20348 break;
20349
20350 default:
20351 gcc_unreachable();
20352 }
20353
20354 /* Return the test that should be put into the flags user, i.e.
20355 the bcc, scc, or cmov instruction. */
20356 return gen_rtx_fmt_ee (code, VOIDmode,
20357 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20358 const0_rtx);
20359 }
20360
20361 static rtx
20362 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20363 {
20364 rtx ret;
20365
20366 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20367 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20368
20369 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20370 {
20371 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20372 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20373 }
20374 else
20375 ret = ix86_expand_int_compare (code, op0, op1);
20376
20377 return ret;
20378 }
20379
20380 void
20381 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20382 {
20383 machine_mode mode = GET_MODE (op0);
20384 rtx tmp;
20385
20386 switch (mode)
20387 {
20388 case SFmode:
20389 case DFmode:
20390 case XFmode:
20391 case QImode:
20392 case HImode:
20393 case SImode:
20394 simple:
20395 tmp = ix86_expand_compare (code, op0, op1);
20396 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20397 gen_rtx_LABEL_REF (VOIDmode, label),
20398 pc_rtx);
20399 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20400 return;
20401
20402 case DImode:
20403 if (TARGET_64BIT)
20404 goto simple;
20405 case TImode:
20406 /* Expand DImode branch into multiple compare+branch. */
20407 {
20408 rtx lo[2], hi[2];
20409 rtx_code_label *label2;
20410 enum rtx_code code1, code2, code3;
20411 machine_mode submode;
20412
20413 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20414 {
20415 std::swap (op0, op1);
20416 code = swap_condition (code);
20417 }
20418
20419 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20420 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20421
20422 submode = mode == DImode ? SImode : DImode;
20423
20424 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20425 avoid two branches. This costs one extra insn, so disable when
20426 optimizing for size. */
20427
20428 if ((code == EQ || code == NE)
20429 && (!optimize_insn_for_size_p ()
20430 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20431 {
20432 rtx xor0, xor1;
20433
20434 xor1 = hi[0];
20435 if (hi[1] != const0_rtx)
20436 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20437 NULL_RTX, 0, OPTAB_WIDEN);
20438
20439 xor0 = lo[0];
20440 if (lo[1] != const0_rtx)
20441 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20442 NULL_RTX, 0, OPTAB_WIDEN);
20443
20444 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20445 NULL_RTX, 0, OPTAB_WIDEN);
20446
20447 ix86_expand_branch (code, tmp, const0_rtx, label);
20448 return;
20449 }
20450
20451 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20452 op1 is a constant and the low word is zero, then we can just
20453 examine the high word. Similarly for low word -1 and
20454 less-or-equal-than or greater-than. */
20455
20456 if (CONST_INT_P (hi[1]))
20457 switch (code)
20458 {
20459 case LT: case LTU: case GE: case GEU:
20460 if (lo[1] == const0_rtx)
20461 {
20462 ix86_expand_branch (code, hi[0], hi[1], label);
20463 return;
20464 }
20465 break;
20466 case LE: case LEU: case GT: case GTU:
20467 if (lo[1] == constm1_rtx)
20468 {
20469 ix86_expand_branch (code, hi[0], hi[1], label);
20470 return;
20471 }
20472 break;
20473 default:
20474 break;
20475 }
20476
20477 /* Otherwise, we need two or three jumps. */
20478
20479 label2 = gen_label_rtx ();
20480
20481 code1 = code;
20482 code2 = swap_condition (code);
20483 code3 = unsigned_condition (code);
20484
20485 switch (code)
20486 {
20487 case LT: case GT: case LTU: case GTU:
20488 break;
20489
20490 case LE: code1 = LT; code2 = GT; break;
20491 case GE: code1 = GT; code2 = LT; break;
20492 case LEU: code1 = LTU; code2 = GTU; break;
20493 case GEU: code1 = GTU; code2 = LTU; break;
20494
20495 case EQ: code1 = UNKNOWN; code2 = NE; break;
20496 case NE: code2 = UNKNOWN; break;
20497
20498 default:
20499 gcc_unreachable ();
20500 }
20501
20502 /*
20503 * a < b =>
20504 * if (hi(a) < hi(b)) goto true;
20505 * if (hi(a) > hi(b)) goto false;
20506 * if (lo(a) < lo(b)) goto true;
20507 * false:
20508 */
20509
20510 if (code1 != UNKNOWN)
20511 ix86_expand_branch (code1, hi[0], hi[1], label);
20512 if (code2 != UNKNOWN)
20513 ix86_expand_branch (code2, hi[0], hi[1], label2);
20514
20515 ix86_expand_branch (code3, lo[0], lo[1], label);
20516
20517 if (code2 != UNKNOWN)
20518 emit_label (label2);
20519 return;
20520 }
20521
20522 default:
20523 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20524 goto simple;
20525 }
20526 }
20527
20528 /* Split branch based on floating point condition. */
20529 void
20530 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20531 rtx target1, rtx target2, rtx tmp)
20532 {
20533 rtx condition;
20534 rtx i;
20535
20536 if (target2 != pc_rtx)
20537 {
20538 std::swap (target1, target2);
20539 code = reverse_condition_maybe_unordered (code);
20540 }
20541
20542 condition = ix86_expand_fp_compare (code, op1, op2,
20543 tmp);
20544
20545 i = emit_jump_insn (gen_rtx_SET
20546 (VOIDmode, pc_rtx,
20547 gen_rtx_IF_THEN_ELSE (VOIDmode,
20548 condition, target1, target2)));
20549 if (split_branch_probability >= 0)
20550 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20551 }
20552
20553 void
20554 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20555 {
20556 rtx ret;
20557
20558 gcc_assert (GET_MODE (dest) == QImode);
20559
20560 ret = ix86_expand_compare (code, op0, op1);
20561 PUT_MODE (ret, QImode);
20562 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20563 }
20564
20565 /* Expand comparison setting or clearing carry flag. Return true when
20566 successful and set pop for the operation. */
20567 static bool
20568 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20569 {
20570 machine_mode mode =
20571 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20572
20573 /* Do not handle double-mode compares that go through special path. */
20574 if (mode == (TARGET_64BIT ? TImode : DImode))
20575 return false;
20576
20577 if (SCALAR_FLOAT_MODE_P (mode))
20578 {
20579 rtx compare_op;
20580 rtx_insn *compare_seq;
20581
20582 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20583
20584 /* Shortcut: following common codes never translate
20585 into carry flag compares. */
20586 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20587 || code == ORDERED || code == UNORDERED)
20588 return false;
20589
20590 /* These comparisons require zero flag; swap operands so they won't. */
20591 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20592 && !TARGET_IEEE_FP)
20593 {
20594 std::swap (op0, op1);
20595 code = swap_condition (code);
20596 }
20597
20598 /* Try to expand the comparison and verify that we end up with
20599 carry flag based comparison. This fails to be true only when
20600 we decide to expand comparison using arithmetic that is not
20601 too common scenario. */
20602 start_sequence ();
20603 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20604 compare_seq = get_insns ();
20605 end_sequence ();
20606
20607 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20608 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20609 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20610 else
20611 code = GET_CODE (compare_op);
20612
20613 if (code != LTU && code != GEU)
20614 return false;
20615
20616 emit_insn (compare_seq);
20617 *pop = compare_op;
20618 return true;
20619 }
20620
20621 if (!INTEGRAL_MODE_P (mode))
20622 return false;
20623
20624 switch (code)
20625 {
20626 case LTU:
20627 case GEU:
20628 break;
20629
20630 /* Convert a==0 into (unsigned)a<1. */
20631 case EQ:
20632 case NE:
20633 if (op1 != const0_rtx)
20634 return false;
20635 op1 = const1_rtx;
20636 code = (code == EQ ? LTU : GEU);
20637 break;
20638
20639 /* Convert a>b into b<a or a>=b-1. */
20640 case GTU:
20641 case LEU:
20642 if (CONST_INT_P (op1))
20643 {
20644 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20645 /* Bail out on overflow. We still can swap operands but that
20646 would force loading of the constant into register. */
20647 if (op1 == const0_rtx
20648 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20649 return false;
20650 code = (code == GTU ? GEU : LTU);
20651 }
20652 else
20653 {
20654 std::swap (op0, op1);
20655 code = (code == GTU ? LTU : GEU);
20656 }
20657 break;
20658
20659 /* Convert a>=0 into (unsigned)a<0x80000000. */
20660 case LT:
20661 case GE:
20662 if (mode == DImode || op1 != const0_rtx)
20663 return false;
20664 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20665 code = (code == LT ? GEU : LTU);
20666 break;
20667 case LE:
20668 case GT:
20669 if (mode == DImode || op1 != constm1_rtx)
20670 return false;
20671 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20672 code = (code == LE ? GEU : LTU);
20673 break;
20674
20675 default:
20676 return false;
20677 }
20678 /* Swapping operands may cause constant to appear as first operand. */
20679 if (!nonimmediate_operand (op0, VOIDmode))
20680 {
20681 if (!can_create_pseudo_p ())
20682 return false;
20683 op0 = force_reg (mode, op0);
20684 }
20685 *pop = ix86_expand_compare (code, op0, op1);
20686 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20687 return true;
20688 }
20689
20690 bool
20691 ix86_expand_int_movcc (rtx operands[])
20692 {
20693 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20694 rtx_insn *compare_seq;
20695 rtx compare_op;
20696 machine_mode mode = GET_MODE (operands[0]);
20697 bool sign_bit_compare_p = false;
20698 rtx op0 = XEXP (operands[1], 0);
20699 rtx op1 = XEXP (operands[1], 1);
20700
20701 if (GET_MODE (op0) == TImode
20702 || (GET_MODE (op0) == DImode
20703 && !TARGET_64BIT))
20704 return false;
20705
20706 start_sequence ();
20707 compare_op = ix86_expand_compare (code, op0, op1);
20708 compare_seq = get_insns ();
20709 end_sequence ();
20710
20711 compare_code = GET_CODE (compare_op);
20712
20713 if ((op1 == const0_rtx && (code == GE || code == LT))
20714 || (op1 == constm1_rtx && (code == GT || code == LE)))
20715 sign_bit_compare_p = true;
20716
20717 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20718 HImode insns, we'd be swallowed in word prefix ops. */
20719
20720 if ((mode != HImode || TARGET_FAST_PREFIX)
20721 && (mode != (TARGET_64BIT ? TImode : DImode))
20722 && CONST_INT_P (operands[2])
20723 && CONST_INT_P (operands[3]))
20724 {
20725 rtx out = operands[0];
20726 HOST_WIDE_INT ct = INTVAL (operands[2]);
20727 HOST_WIDE_INT cf = INTVAL (operands[3]);
20728 HOST_WIDE_INT diff;
20729
20730 diff = ct - cf;
20731 /* Sign bit compares are better done using shifts than we do by using
20732 sbb. */
20733 if (sign_bit_compare_p
20734 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20735 {
20736 /* Detect overlap between destination and compare sources. */
20737 rtx tmp = out;
20738
20739 if (!sign_bit_compare_p)
20740 {
20741 rtx flags;
20742 bool fpcmp = false;
20743
20744 compare_code = GET_CODE (compare_op);
20745
20746 flags = XEXP (compare_op, 0);
20747
20748 if (GET_MODE (flags) == CCFPmode
20749 || GET_MODE (flags) == CCFPUmode)
20750 {
20751 fpcmp = true;
20752 compare_code
20753 = ix86_fp_compare_code_to_integer (compare_code);
20754 }
20755
20756 /* To simplify rest of code, restrict to the GEU case. */
20757 if (compare_code == LTU)
20758 {
20759 std::swap (ct, cf);
20760 compare_code = reverse_condition (compare_code);
20761 code = reverse_condition (code);
20762 }
20763 else
20764 {
20765 if (fpcmp)
20766 PUT_CODE (compare_op,
20767 reverse_condition_maybe_unordered
20768 (GET_CODE (compare_op)));
20769 else
20770 PUT_CODE (compare_op,
20771 reverse_condition (GET_CODE (compare_op)));
20772 }
20773 diff = ct - cf;
20774
20775 if (reg_overlap_mentioned_p (out, op0)
20776 || reg_overlap_mentioned_p (out, op1))
20777 tmp = gen_reg_rtx (mode);
20778
20779 if (mode == DImode)
20780 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20781 else
20782 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20783 flags, compare_op));
20784 }
20785 else
20786 {
20787 if (code == GT || code == GE)
20788 code = reverse_condition (code);
20789 else
20790 {
20791 std::swap (ct, cf);
20792 diff = ct - cf;
20793 }
20794 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20795 }
20796
20797 if (diff == 1)
20798 {
20799 /*
20800 * cmpl op0,op1
20801 * sbbl dest,dest
20802 * [addl dest, ct]
20803 *
20804 * Size 5 - 8.
20805 */
20806 if (ct)
20807 tmp = expand_simple_binop (mode, PLUS,
20808 tmp, GEN_INT (ct),
20809 copy_rtx (tmp), 1, OPTAB_DIRECT);
20810 }
20811 else if (cf == -1)
20812 {
20813 /*
20814 * cmpl op0,op1
20815 * sbbl dest,dest
20816 * orl $ct, dest
20817 *
20818 * Size 8.
20819 */
20820 tmp = expand_simple_binop (mode, IOR,
20821 tmp, GEN_INT (ct),
20822 copy_rtx (tmp), 1, OPTAB_DIRECT);
20823 }
20824 else if (diff == -1 && ct)
20825 {
20826 /*
20827 * cmpl op0,op1
20828 * sbbl dest,dest
20829 * notl dest
20830 * [addl dest, cf]
20831 *
20832 * Size 8 - 11.
20833 */
20834 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20835 if (cf)
20836 tmp = expand_simple_binop (mode, PLUS,
20837 copy_rtx (tmp), GEN_INT (cf),
20838 copy_rtx (tmp), 1, OPTAB_DIRECT);
20839 }
20840 else
20841 {
20842 /*
20843 * cmpl op0,op1
20844 * sbbl dest,dest
20845 * [notl dest]
20846 * andl cf - ct, dest
20847 * [addl dest, ct]
20848 *
20849 * Size 8 - 11.
20850 */
20851
20852 if (cf == 0)
20853 {
20854 cf = ct;
20855 ct = 0;
20856 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20857 }
20858
20859 tmp = expand_simple_binop (mode, AND,
20860 copy_rtx (tmp),
20861 gen_int_mode (cf - ct, mode),
20862 copy_rtx (tmp), 1, OPTAB_DIRECT);
20863 if (ct)
20864 tmp = expand_simple_binop (mode, PLUS,
20865 copy_rtx (tmp), GEN_INT (ct),
20866 copy_rtx (tmp), 1, OPTAB_DIRECT);
20867 }
20868
20869 if (!rtx_equal_p (tmp, out))
20870 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20871
20872 return true;
20873 }
20874
20875 if (diff < 0)
20876 {
20877 machine_mode cmp_mode = GET_MODE (op0);
20878 enum rtx_code new_code;
20879
20880 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20881 {
20882 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20883
20884 /* We may be reversing unordered compare to normal compare, that
20885 is not valid in general (we may convert non-trapping condition
20886 to trapping one), however on i386 we currently emit all
20887 comparisons unordered. */
20888 new_code = reverse_condition_maybe_unordered (code);
20889 }
20890 else
20891 new_code = ix86_reverse_condition (code, cmp_mode);
20892 if (new_code != UNKNOWN)
20893 {
20894 std::swap (ct, cf);
20895 diff = -diff;
20896 code = new_code;
20897 }
20898 }
20899
20900 compare_code = UNKNOWN;
20901 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20902 && CONST_INT_P (op1))
20903 {
20904 if (op1 == const0_rtx
20905 && (code == LT || code == GE))
20906 compare_code = code;
20907 else if (op1 == constm1_rtx)
20908 {
20909 if (code == LE)
20910 compare_code = LT;
20911 else if (code == GT)
20912 compare_code = GE;
20913 }
20914 }
20915
20916 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20917 if (compare_code != UNKNOWN
20918 && GET_MODE (op0) == GET_MODE (out)
20919 && (cf == -1 || ct == -1))
20920 {
20921 /* If lea code below could be used, only optimize
20922 if it results in a 2 insn sequence. */
20923
20924 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20925 || diff == 3 || diff == 5 || diff == 9)
20926 || (compare_code == LT && ct == -1)
20927 || (compare_code == GE && cf == -1))
20928 {
20929 /*
20930 * notl op1 (if necessary)
20931 * sarl $31, op1
20932 * orl cf, op1
20933 */
20934 if (ct != -1)
20935 {
20936 cf = ct;
20937 ct = -1;
20938 code = reverse_condition (code);
20939 }
20940
20941 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20942
20943 out = expand_simple_binop (mode, IOR,
20944 out, GEN_INT (cf),
20945 out, 1, OPTAB_DIRECT);
20946 if (out != operands[0])
20947 emit_move_insn (operands[0], out);
20948
20949 return true;
20950 }
20951 }
20952
20953
20954 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20955 || diff == 3 || diff == 5 || diff == 9)
20956 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20957 && (mode != DImode
20958 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20959 {
20960 /*
20961 * xorl dest,dest
20962 * cmpl op1,op2
20963 * setcc dest
20964 * lea cf(dest*(ct-cf)),dest
20965 *
20966 * Size 14.
20967 *
20968 * This also catches the degenerate setcc-only case.
20969 */
20970
20971 rtx tmp;
20972 int nops;
20973
20974 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20975
20976 nops = 0;
20977 /* On x86_64 the lea instruction operates on Pmode, so we need
20978 to get arithmetics done in proper mode to match. */
20979 if (diff == 1)
20980 tmp = copy_rtx (out);
20981 else
20982 {
20983 rtx out1;
20984 out1 = copy_rtx (out);
20985 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20986 nops++;
20987 if (diff & 1)
20988 {
20989 tmp = gen_rtx_PLUS (mode, tmp, out1);
20990 nops++;
20991 }
20992 }
20993 if (cf != 0)
20994 {
20995 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20996 nops++;
20997 }
20998 if (!rtx_equal_p (tmp, out))
20999 {
21000 if (nops == 1)
21001 out = force_operand (tmp, copy_rtx (out));
21002 else
21003 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
21004 }
21005 if (!rtx_equal_p (out, operands[0]))
21006 emit_move_insn (operands[0], copy_rtx (out));
21007
21008 return true;
21009 }
21010
21011 /*
21012 * General case: Jumpful:
21013 * xorl dest,dest cmpl op1, op2
21014 * cmpl op1, op2 movl ct, dest
21015 * setcc dest jcc 1f
21016 * decl dest movl cf, dest
21017 * andl (cf-ct),dest 1:
21018 * addl ct,dest
21019 *
21020 * Size 20. Size 14.
21021 *
21022 * This is reasonably steep, but branch mispredict costs are
21023 * high on modern cpus, so consider failing only if optimizing
21024 * for space.
21025 */
21026
21027 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21028 && BRANCH_COST (optimize_insn_for_speed_p (),
21029 false) >= 2)
21030 {
21031 if (cf == 0)
21032 {
21033 machine_mode cmp_mode = GET_MODE (op0);
21034 enum rtx_code new_code;
21035
21036 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21037 {
21038 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21039
21040 /* We may be reversing unordered compare to normal compare,
21041 that is not valid in general (we may convert non-trapping
21042 condition to trapping one), however on i386 we currently
21043 emit all comparisons unordered. */
21044 new_code = reverse_condition_maybe_unordered (code);
21045 }
21046 else
21047 {
21048 new_code = ix86_reverse_condition (code, cmp_mode);
21049 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21050 compare_code = reverse_condition (compare_code);
21051 }
21052
21053 if (new_code != UNKNOWN)
21054 {
21055 cf = ct;
21056 ct = 0;
21057 code = new_code;
21058 }
21059 }
21060
21061 if (compare_code != UNKNOWN)
21062 {
21063 /* notl op1 (if needed)
21064 sarl $31, op1
21065 andl (cf-ct), op1
21066 addl ct, op1
21067
21068 For x < 0 (resp. x <= -1) there will be no notl,
21069 so if possible swap the constants to get rid of the
21070 complement.
21071 True/false will be -1/0 while code below (store flag
21072 followed by decrement) is 0/-1, so the constants need
21073 to be exchanged once more. */
21074
21075 if (compare_code == GE || !cf)
21076 {
21077 code = reverse_condition (code);
21078 compare_code = LT;
21079 }
21080 else
21081 std::swap (ct, cf);
21082
21083 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21084 }
21085 else
21086 {
21087 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21088
21089 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21090 constm1_rtx,
21091 copy_rtx (out), 1, OPTAB_DIRECT);
21092 }
21093
21094 out = expand_simple_binop (mode, AND, copy_rtx (out),
21095 gen_int_mode (cf - ct, mode),
21096 copy_rtx (out), 1, OPTAB_DIRECT);
21097 if (ct)
21098 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21099 copy_rtx (out), 1, OPTAB_DIRECT);
21100 if (!rtx_equal_p (out, operands[0]))
21101 emit_move_insn (operands[0], copy_rtx (out));
21102
21103 return true;
21104 }
21105 }
21106
21107 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21108 {
21109 /* Try a few things more with specific constants and a variable. */
21110
21111 optab op;
21112 rtx var, orig_out, out, tmp;
21113
21114 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21115 return false;
21116
21117 /* If one of the two operands is an interesting constant, load a
21118 constant with the above and mask it in with a logical operation. */
21119
21120 if (CONST_INT_P (operands[2]))
21121 {
21122 var = operands[3];
21123 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21124 operands[3] = constm1_rtx, op = and_optab;
21125 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21126 operands[3] = const0_rtx, op = ior_optab;
21127 else
21128 return false;
21129 }
21130 else if (CONST_INT_P (operands[3]))
21131 {
21132 var = operands[2];
21133 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21134 operands[2] = constm1_rtx, op = and_optab;
21135 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21136 operands[2] = const0_rtx, op = ior_optab;
21137 else
21138 return false;
21139 }
21140 else
21141 return false;
21142
21143 orig_out = operands[0];
21144 tmp = gen_reg_rtx (mode);
21145 operands[0] = tmp;
21146
21147 /* Recurse to get the constant loaded. */
21148 if (ix86_expand_int_movcc (operands) == 0)
21149 return false;
21150
21151 /* Mask in the interesting variable. */
21152 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21153 OPTAB_WIDEN);
21154 if (!rtx_equal_p (out, orig_out))
21155 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21156
21157 return true;
21158 }
21159
21160 /*
21161 * For comparison with above,
21162 *
21163 * movl cf,dest
21164 * movl ct,tmp
21165 * cmpl op1,op2
21166 * cmovcc tmp,dest
21167 *
21168 * Size 15.
21169 */
21170
21171 if (! nonimmediate_operand (operands[2], mode))
21172 operands[2] = force_reg (mode, operands[2]);
21173 if (! nonimmediate_operand (operands[3], mode))
21174 operands[3] = force_reg (mode, operands[3]);
21175
21176 if (! register_operand (operands[2], VOIDmode)
21177 && (mode == QImode
21178 || ! register_operand (operands[3], VOIDmode)))
21179 operands[2] = force_reg (mode, operands[2]);
21180
21181 if (mode == QImode
21182 && ! register_operand (operands[3], VOIDmode))
21183 operands[3] = force_reg (mode, operands[3]);
21184
21185 emit_insn (compare_seq);
21186 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21187 gen_rtx_IF_THEN_ELSE (mode,
21188 compare_op, operands[2],
21189 operands[3])));
21190 return true;
21191 }
21192
21193 /* Swap, force into registers, or otherwise massage the two operands
21194 to an sse comparison with a mask result. Thus we differ a bit from
21195 ix86_prepare_fp_compare_args which expects to produce a flags result.
21196
21197 The DEST operand exists to help determine whether to commute commutative
21198 operators. The POP0/POP1 operands are updated in place. The new
21199 comparison code is returned, or UNKNOWN if not implementable. */
21200
21201 static enum rtx_code
21202 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21203 rtx *pop0, rtx *pop1)
21204 {
21205 switch (code)
21206 {
21207 case LTGT:
21208 case UNEQ:
21209 /* AVX supports all the needed comparisons. */
21210 if (TARGET_AVX)
21211 break;
21212 /* We have no LTGT as an operator. We could implement it with
21213 NE & ORDERED, but this requires an extra temporary. It's
21214 not clear that it's worth it. */
21215 return UNKNOWN;
21216
21217 case LT:
21218 case LE:
21219 case UNGT:
21220 case UNGE:
21221 /* These are supported directly. */
21222 break;
21223
21224 case EQ:
21225 case NE:
21226 case UNORDERED:
21227 case ORDERED:
21228 /* AVX has 3 operand comparisons, no need to swap anything. */
21229 if (TARGET_AVX)
21230 break;
21231 /* For commutative operators, try to canonicalize the destination
21232 operand to be first in the comparison - this helps reload to
21233 avoid extra moves. */
21234 if (!dest || !rtx_equal_p (dest, *pop1))
21235 break;
21236 /* FALLTHRU */
21237
21238 case GE:
21239 case GT:
21240 case UNLE:
21241 case UNLT:
21242 /* These are not supported directly before AVX, and furthermore
21243 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21244 comparison operands to transform into something that is
21245 supported. */
21246 std::swap (*pop0, *pop1);
21247 code = swap_condition (code);
21248 break;
21249
21250 default:
21251 gcc_unreachable ();
21252 }
21253
21254 return code;
21255 }
21256
21257 /* Detect conditional moves that exactly match min/max operational
21258 semantics. Note that this is IEEE safe, as long as we don't
21259 interchange the operands.
21260
21261 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21262 and TRUE if the operation is successful and instructions are emitted. */
21263
21264 static bool
21265 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21266 rtx cmp_op1, rtx if_true, rtx if_false)
21267 {
21268 machine_mode mode;
21269 bool is_min;
21270 rtx tmp;
21271
21272 if (code == LT)
21273 ;
21274 else if (code == UNGE)
21275 std::swap (if_true, if_false);
21276 else
21277 return false;
21278
21279 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21280 is_min = true;
21281 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21282 is_min = false;
21283 else
21284 return false;
21285
21286 mode = GET_MODE (dest);
21287
21288 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21289 but MODE may be a vector mode and thus not appropriate. */
21290 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21291 {
21292 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21293 rtvec v;
21294
21295 if_true = force_reg (mode, if_true);
21296 v = gen_rtvec (2, if_true, if_false);
21297 tmp = gen_rtx_UNSPEC (mode, v, u);
21298 }
21299 else
21300 {
21301 code = is_min ? SMIN : SMAX;
21302 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21303 }
21304
21305 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21306 return true;
21307 }
21308
21309 /* Expand an sse vector comparison. Return the register with the result. */
21310
21311 static rtx
21312 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21313 rtx op_true, rtx op_false)
21314 {
21315 machine_mode mode = GET_MODE (dest);
21316 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21317
21318 /* In general case result of comparison can differ from operands' type. */
21319 machine_mode cmp_mode;
21320
21321 /* In AVX512F the result of comparison is an integer mask. */
21322 bool maskcmp = false;
21323 rtx x;
21324
21325 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21326 {
21327 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21328 gcc_assert (cmp_mode != BLKmode);
21329
21330 maskcmp = true;
21331 }
21332 else
21333 cmp_mode = cmp_ops_mode;
21334
21335
21336 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21337 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21338 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21339
21340 if (optimize
21341 || reg_overlap_mentioned_p (dest, op_true)
21342 || reg_overlap_mentioned_p (dest, op_false))
21343 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21344
21345 /* Compare patterns for int modes are unspec in AVX512F only. */
21346 if (maskcmp && (code == GT || code == EQ))
21347 {
21348 rtx (*gen)(rtx, rtx, rtx);
21349
21350 switch (cmp_ops_mode)
21351 {
21352 case V64QImode:
21353 gcc_assert (TARGET_AVX512BW);
21354 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21355 break;
21356 case V32HImode:
21357 gcc_assert (TARGET_AVX512BW);
21358 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21359 break;
21360 case V16SImode:
21361 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21362 break;
21363 case V8DImode:
21364 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21365 break;
21366 default:
21367 gen = NULL;
21368 }
21369
21370 if (gen)
21371 {
21372 emit_insn (gen (dest, cmp_op0, cmp_op1));
21373 return dest;
21374 }
21375 }
21376 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21377
21378 if (cmp_mode != mode && !maskcmp)
21379 {
21380 x = force_reg (cmp_ops_mode, x);
21381 convert_move (dest, x, false);
21382 }
21383 else
21384 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21385
21386 return dest;
21387 }
21388
21389 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21390 operations. This is used for both scalar and vector conditional moves. */
21391
21392 static void
21393 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21394 {
21395 machine_mode mode = GET_MODE (dest);
21396 machine_mode cmpmode = GET_MODE (cmp);
21397
21398 /* In AVX512F the result of comparison is an integer mask. */
21399 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21400
21401 rtx t2, t3, x;
21402
21403 if (vector_all_ones_operand (op_true, mode)
21404 && rtx_equal_p (op_false, CONST0_RTX (mode))
21405 && !maskcmp)
21406 {
21407 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21408 }
21409 else if (op_false == CONST0_RTX (mode)
21410 && !maskcmp)
21411 {
21412 op_true = force_reg (mode, op_true);
21413 x = gen_rtx_AND (mode, cmp, op_true);
21414 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21415 }
21416 else if (op_true == CONST0_RTX (mode)
21417 && !maskcmp)
21418 {
21419 op_false = force_reg (mode, op_false);
21420 x = gen_rtx_NOT (mode, cmp);
21421 x = gen_rtx_AND (mode, x, op_false);
21422 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21423 }
21424 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21425 && !maskcmp)
21426 {
21427 op_false = force_reg (mode, op_false);
21428 x = gen_rtx_IOR (mode, cmp, op_false);
21429 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21430 }
21431 else if (TARGET_XOP
21432 && !maskcmp)
21433 {
21434 op_true = force_reg (mode, op_true);
21435
21436 if (!nonimmediate_operand (op_false, mode))
21437 op_false = force_reg (mode, op_false);
21438
21439 emit_insn (gen_rtx_SET (mode, dest,
21440 gen_rtx_IF_THEN_ELSE (mode, cmp,
21441 op_true,
21442 op_false)));
21443 }
21444 else
21445 {
21446 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21447 rtx d = dest;
21448
21449 if (!nonimmediate_operand (op_true, mode))
21450 op_true = force_reg (mode, op_true);
21451
21452 op_false = force_reg (mode, op_false);
21453
21454 switch (mode)
21455 {
21456 case V4SFmode:
21457 if (TARGET_SSE4_1)
21458 gen = gen_sse4_1_blendvps;
21459 break;
21460 case V2DFmode:
21461 if (TARGET_SSE4_1)
21462 gen = gen_sse4_1_blendvpd;
21463 break;
21464 case V16QImode:
21465 case V8HImode:
21466 case V4SImode:
21467 case V2DImode:
21468 if (TARGET_SSE4_1)
21469 {
21470 gen = gen_sse4_1_pblendvb;
21471 if (mode != V16QImode)
21472 d = gen_reg_rtx (V16QImode);
21473 op_false = gen_lowpart (V16QImode, op_false);
21474 op_true = gen_lowpart (V16QImode, op_true);
21475 cmp = gen_lowpart (V16QImode, cmp);
21476 }
21477 break;
21478 case V8SFmode:
21479 if (TARGET_AVX)
21480 gen = gen_avx_blendvps256;
21481 break;
21482 case V4DFmode:
21483 if (TARGET_AVX)
21484 gen = gen_avx_blendvpd256;
21485 break;
21486 case V32QImode:
21487 case V16HImode:
21488 case V8SImode:
21489 case V4DImode:
21490 if (TARGET_AVX2)
21491 {
21492 gen = gen_avx2_pblendvb;
21493 if (mode != V32QImode)
21494 d = gen_reg_rtx (V32QImode);
21495 op_false = gen_lowpart (V32QImode, op_false);
21496 op_true = gen_lowpart (V32QImode, op_true);
21497 cmp = gen_lowpart (V32QImode, cmp);
21498 }
21499 break;
21500
21501 case V64QImode:
21502 gen = gen_avx512bw_blendmv64qi;
21503 break;
21504 case V32HImode:
21505 gen = gen_avx512bw_blendmv32hi;
21506 break;
21507 case V16SImode:
21508 gen = gen_avx512f_blendmv16si;
21509 break;
21510 case V8DImode:
21511 gen = gen_avx512f_blendmv8di;
21512 break;
21513 case V8DFmode:
21514 gen = gen_avx512f_blendmv8df;
21515 break;
21516 case V16SFmode:
21517 gen = gen_avx512f_blendmv16sf;
21518 break;
21519
21520 default:
21521 break;
21522 }
21523
21524 if (gen != NULL)
21525 {
21526 emit_insn (gen (d, op_false, op_true, cmp));
21527 if (d != dest)
21528 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21529 }
21530 else
21531 {
21532 op_true = force_reg (mode, op_true);
21533
21534 t2 = gen_reg_rtx (mode);
21535 if (optimize)
21536 t3 = gen_reg_rtx (mode);
21537 else
21538 t3 = dest;
21539
21540 x = gen_rtx_AND (mode, op_true, cmp);
21541 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21542
21543 x = gen_rtx_NOT (mode, cmp);
21544 x = gen_rtx_AND (mode, x, op_false);
21545 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21546
21547 x = gen_rtx_IOR (mode, t3, t2);
21548 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21549 }
21550 }
21551 }
21552
21553 /* Expand a floating-point conditional move. Return true if successful. */
21554
21555 bool
21556 ix86_expand_fp_movcc (rtx operands[])
21557 {
21558 machine_mode mode = GET_MODE (operands[0]);
21559 enum rtx_code code = GET_CODE (operands[1]);
21560 rtx tmp, compare_op;
21561 rtx op0 = XEXP (operands[1], 0);
21562 rtx op1 = XEXP (operands[1], 1);
21563
21564 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21565 {
21566 machine_mode cmode;
21567
21568 /* Since we've no cmove for sse registers, don't force bad register
21569 allocation just to gain access to it. Deny movcc when the
21570 comparison mode doesn't match the move mode. */
21571 cmode = GET_MODE (op0);
21572 if (cmode == VOIDmode)
21573 cmode = GET_MODE (op1);
21574 if (cmode != mode)
21575 return false;
21576
21577 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21578 if (code == UNKNOWN)
21579 return false;
21580
21581 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21582 operands[2], operands[3]))
21583 return true;
21584
21585 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21586 operands[2], operands[3]);
21587 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21588 return true;
21589 }
21590
21591 if (GET_MODE (op0) == TImode
21592 || (GET_MODE (op0) == DImode
21593 && !TARGET_64BIT))
21594 return false;
21595
21596 /* The floating point conditional move instructions don't directly
21597 support conditions resulting from a signed integer comparison. */
21598
21599 compare_op = ix86_expand_compare (code, op0, op1);
21600 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21601 {
21602 tmp = gen_reg_rtx (QImode);
21603 ix86_expand_setcc (tmp, code, op0, op1);
21604
21605 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21606 }
21607
21608 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21609 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21610 operands[2], operands[3])));
21611
21612 return true;
21613 }
21614
21615 /* Expand a floating-point vector conditional move; a vcond operation
21616 rather than a movcc operation. */
21617
21618 bool
21619 ix86_expand_fp_vcond (rtx operands[])
21620 {
21621 enum rtx_code code = GET_CODE (operands[3]);
21622 rtx cmp;
21623
21624 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21625 &operands[4], &operands[5]);
21626 if (code == UNKNOWN)
21627 {
21628 rtx temp;
21629 switch (GET_CODE (operands[3]))
21630 {
21631 case LTGT:
21632 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21633 operands[5], operands[0], operands[0]);
21634 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21635 operands[5], operands[1], operands[2]);
21636 code = AND;
21637 break;
21638 case UNEQ:
21639 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21640 operands[5], operands[0], operands[0]);
21641 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21642 operands[5], operands[1], operands[2]);
21643 code = IOR;
21644 break;
21645 default:
21646 gcc_unreachable ();
21647 }
21648 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21649 OPTAB_DIRECT);
21650 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21651 return true;
21652 }
21653
21654 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21655 operands[5], operands[1], operands[2]))
21656 return true;
21657
21658 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21659 operands[1], operands[2]);
21660 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21661 return true;
21662 }
21663
21664 /* Expand a signed/unsigned integral vector conditional move. */
21665
21666 bool
21667 ix86_expand_int_vcond (rtx operands[])
21668 {
21669 machine_mode data_mode = GET_MODE (operands[0]);
21670 machine_mode mode = GET_MODE (operands[4]);
21671 enum rtx_code code = GET_CODE (operands[3]);
21672 bool negate = false;
21673 rtx x, cop0, cop1;
21674
21675 cop0 = operands[4];
21676 cop1 = operands[5];
21677
21678 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21679 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21680 if ((code == LT || code == GE)
21681 && data_mode == mode
21682 && cop1 == CONST0_RTX (mode)
21683 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21684 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21685 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21686 && (GET_MODE_SIZE (data_mode) == 16
21687 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21688 {
21689 rtx negop = operands[2 - (code == LT)];
21690 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21691 if (negop == CONST1_RTX (data_mode))
21692 {
21693 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21694 operands[0], 1, OPTAB_DIRECT);
21695 if (res != operands[0])
21696 emit_move_insn (operands[0], res);
21697 return true;
21698 }
21699 else if (GET_MODE_INNER (data_mode) != DImode
21700 && vector_all_ones_operand (negop, data_mode))
21701 {
21702 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21703 operands[0], 0, OPTAB_DIRECT);
21704 if (res != operands[0])
21705 emit_move_insn (operands[0], res);
21706 return true;
21707 }
21708 }
21709
21710 if (!nonimmediate_operand (cop1, mode))
21711 cop1 = force_reg (mode, cop1);
21712 if (!general_operand (operands[1], data_mode))
21713 operands[1] = force_reg (data_mode, operands[1]);
21714 if (!general_operand (operands[2], data_mode))
21715 operands[2] = force_reg (data_mode, operands[2]);
21716
21717 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21718 if (TARGET_XOP
21719 && (mode == V16QImode || mode == V8HImode
21720 || mode == V4SImode || mode == V2DImode))
21721 ;
21722 else
21723 {
21724 /* Canonicalize the comparison to EQ, GT, GTU. */
21725 switch (code)
21726 {
21727 case EQ:
21728 case GT:
21729 case GTU:
21730 break;
21731
21732 case NE:
21733 case LE:
21734 case LEU:
21735 code = reverse_condition (code);
21736 negate = true;
21737 break;
21738
21739 case GE:
21740 case GEU:
21741 code = reverse_condition (code);
21742 negate = true;
21743 /* FALLTHRU */
21744
21745 case LT:
21746 case LTU:
21747 std::swap (cop0, cop1);
21748 code = swap_condition (code);
21749 break;
21750
21751 default:
21752 gcc_unreachable ();
21753 }
21754
21755 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21756 if (mode == V2DImode)
21757 {
21758 switch (code)
21759 {
21760 case EQ:
21761 /* SSE4.1 supports EQ. */
21762 if (!TARGET_SSE4_1)
21763 return false;
21764 break;
21765
21766 case GT:
21767 case GTU:
21768 /* SSE4.2 supports GT/GTU. */
21769 if (!TARGET_SSE4_2)
21770 return false;
21771 break;
21772
21773 default:
21774 gcc_unreachable ();
21775 }
21776 }
21777
21778 /* Unsigned parallel compare is not supported by the hardware.
21779 Play some tricks to turn this into a signed comparison
21780 against 0. */
21781 if (code == GTU)
21782 {
21783 cop0 = force_reg (mode, cop0);
21784
21785 switch (mode)
21786 {
21787 case V16SImode:
21788 case V8DImode:
21789 case V8SImode:
21790 case V4DImode:
21791 case V4SImode:
21792 case V2DImode:
21793 {
21794 rtx t1, t2, mask;
21795 rtx (*gen_sub3) (rtx, rtx, rtx);
21796
21797 switch (mode)
21798 {
21799 case V16SImode: gen_sub3 = gen_subv16si3; break;
21800 case V8DImode: gen_sub3 = gen_subv8di3; break;
21801 case V8SImode: gen_sub3 = gen_subv8si3; break;
21802 case V4DImode: gen_sub3 = gen_subv4di3; break;
21803 case V4SImode: gen_sub3 = gen_subv4si3; break;
21804 case V2DImode: gen_sub3 = gen_subv2di3; break;
21805 default:
21806 gcc_unreachable ();
21807 }
21808 /* Subtract (-(INT MAX) - 1) from both operands to make
21809 them signed. */
21810 mask = ix86_build_signbit_mask (mode, true, false);
21811 t1 = gen_reg_rtx (mode);
21812 emit_insn (gen_sub3 (t1, cop0, mask));
21813
21814 t2 = gen_reg_rtx (mode);
21815 emit_insn (gen_sub3 (t2, cop1, mask));
21816
21817 cop0 = t1;
21818 cop1 = t2;
21819 code = GT;
21820 }
21821 break;
21822
21823 case V64QImode:
21824 case V32HImode:
21825 case V32QImode:
21826 case V16HImode:
21827 case V16QImode:
21828 case V8HImode:
21829 /* Perform a parallel unsigned saturating subtraction. */
21830 x = gen_reg_rtx (mode);
21831 emit_insn (gen_rtx_SET (VOIDmode, x,
21832 gen_rtx_US_MINUS (mode, cop0, cop1)));
21833
21834 cop0 = x;
21835 cop1 = CONST0_RTX (mode);
21836 code = EQ;
21837 negate = !negate;
21838 break;
21839
21840 default:
21841 gcc_unreachable ();
21842 }
21843 }
21844 }
21845
21846 /* Allow the comparison to be done in one mode, but the movcc to
21847 happen in another mode. */
21848 if (data_mode == mode)
21849 {
21850 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21851 operands[1+negate], operands[2-negate]);
21852 }
21853 else
21854 {
21855 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21856 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21857 operands[1+negate], operands[2-negate]);
21858 if (GET_MODE (x) == mode)
21859 x = gen_lowpart (data_mode, x);
21860 }
21861
21862 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21863 operands[2-negate]);
21864 return true;
21865 }
21866
21867 /* AVX512F does support 64-byte integer vector operations,
21868 thus the longest vector we are faced with is V64QImode. */
21869 #define MAX_VECT_LEN 64
21870
21871 struct expand_vec_perm_d
21872 {
21873 rtx target, op0, op1;
21874 unsigned char perm[MAX_VECT_LEN];
21875 machine_mode vmode;
21876 unsigned char nelt;
21877 bool one_operand_p;
21878 bool testing_p;
21879 };
21880
21881 static bool
21882 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21883 struct expand_vec_perm_d *d)
21884 {
21885 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21886 expander, so args are either in d, or in op0, op1 etc. */
21887 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21888 machine_mode maskmode = mode;
21889 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21890
21891 switch (mode)
21892 {
21893 case V8HImode:
21894 if (TARGET_AVX512VL && TARGET_AVX512BW)
21895 gen = gen_avx512vl_vpermi2varv8hi3;
21896 break;
21897 case V16HImode:
21898 if (TARGET_AVX512VL && TARGET_AVX512BW)
21899 gen = gen_avx512vl_vpermi2varv16hi3;
21900 break;
21901 case V64QImode:
21902 if (TARGET_AVX512VBMI)
21903 gen = gen_avx512bw_vpermi2varv64qi3;
21904 break;
21905 case V32HImode:
21906 if (TARGET_AVX512BW)
21907 gen = gen_avx512bw_vpermi2varv32hi3;
21908 break;
21909 case V4SImode:
21910 if (TARGET_AVX512VL)
21911 gen = gen_avx512vl_vpermi2varv4si3;
21912 break;
21913 case V8SImode:
21914 if (TARGET_AVX512VL)
21915 gen = gen_avx512vl_vpermi2varv8si3;
21916 break;
21917 case V16SImode:
21918 if (TARGET_AVX512F)
21919 gen = gen_avx512f_vpermi2varv16si3;
21920 break;
21921 case V4SFmode:
21922 if (TARGET_AVX512VL)
21923 {
21924 gen = gen_avx512vl_vpermi2varv4sf3;
21925 maskmode = V4SImode;
21926 }
21927 break;
21928 case V8SFmode:
21929 if (TARGET_AVX512VL)
21930 {
21931 gen = gen_avx512vl_vpermi2varv8sf3;
21932 maskmode = V8SImode;
21933 }
21934 break;
21935 case V16SFmode:
21936 if (TARGET_AVX512F)
21937 {
21938 gen = gen_avx512f_vpermi2varv16sf3;
21939 maskmode = V16SImode;
21940 }
21941 break;
21942 case V2DImode:
21943 if (TARGET_AVX512VL)
21944 gen = gen_avx512vl_vpermi2varv2di3;
21945 break;
21946 case V4DImode:
21947 if (TARGET_AVX512VL)
21948 gen = gen_avx512vl_vpermi2varv4di3;
21949 break;
21950 case V8DImode:
21951 if (TARGET_AVX512F)
21952 gen = gen_avx512f_vpermi2varv8di3;
21953 break;
21954 case V2DFmode:
21955 if (TARGET_AVX512VL)
21956 {
21957 gen = gen_avx512vl_vpermi2varv2df3;
21958 maskmode = V2DImode;
21959 }
21960 break;
21961 case V4DFmode:
21962 if (TARGET_AVX512VL)
21963 {
21964 gen = gen_avx512vl_vpermi2varv4df3;
21965 maskmode = V4DImode;
21966 }
21967 break;
21968 case V8DFmode:
21969 if (TARGET_AVX512F)
21970 {
21971 gen = gen_avx512f_vpermi2varv8df3;
21972 maskmode = V8DImode;
21973 }
21974 break;
21975 default:
21976 break;
21977 }
21978
21979 if (gen == NULL)
21980 return false;
21981
21982 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21983 expander, so args are either in d, or in op0, op1 etc. */
21984 if (d)
21985 {
21986 rtx vec[64];
21987 target = d->target;
21988 op0 = d->op0;
21989 op1 = d->op1;
21990 for (int i = 0; i < d->nelt; ++i)
21991 vec[i] = GEN_INT (d->perm[i]);
21992 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21993 }
21994
21995 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21996 return true;
21997 }
21998
21999 /* Expand a variable vector permutation. */
22000
22001 void
22002 ix86_expand_vec_perm (rtx operands[])
22003 {
22004 rtx target = operands[0];
22005 rtx op0 = operands[1];
22006 rtx op1 = operands[2];
22007 rtx mask = operands[3];
22008 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22009 machine_mode mode = GET_MODE (op0);
22010 machine_mode maskmode = GET_MODE (mask);
22011 int w, e, i;
22012 bool one_operand_shuffle = rtx_equal_p (op0, op1);
22013
22014 /* Number of elements in the vector. */
22015 w = GET_MODE_NUNITS (mode);
22016 e = GET_MODE_UNIT_SIZE (mode);
22017 gcc_assert (w <= 64);
22018
22019 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22020 return;
22021
22022 if (TARGET_AVX2)
22023 {
22024 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22025 {
22026 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22027 an constant shuffle operand. With a tiny bit of effort we can
22028 use VPERMD instead. A re-interpretation stall for V4DFmode is
22029 unfortunate but there's no avoiding it.
22030 Similarly for V16HImode we don't have instructions for variable
22031 shuffling, while for V32QImode we can use after preparing suitable
22032 masks vpshufb; vpshufb; vpermq; vpor. */
22033
22034 if (mode == V16HImode)
22035 {
22036 maskmode = mode = V32QImode;
22037 w = 32;
22038 e = 1;
22039 }
22040 else
22041 {
22042 maskmode = mode = V8SImode;
22043 w = 8;
22044 e = 4;
22045 }
22046 t1 = gen_reg_rtx (maskmode);
22047
22048 /* Replicate the low bits of the V4DImode mask into V8SImode:
22049 mask = { A B C D }
22050 t1 = { A A B B C C D D }. */
22051 for (i = 0; i < w / 2; ++i)
22052 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22053 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22054 vt = force_reg (maskmode, vt);
22055 mask = gen_lowpart (maskmode, mask);
22056 if (maskmode == V8SImode)
22057 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22058 else
22059 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22060
22061 /* Multiply the shuffle indicies by two. */
22062 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22063 OPTAB_DIRECT);
22064
22065 /* Add one to the odd shuffle indicies:
22066 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22067 for (i = 0; i < w / 2; ++i)
22068 {
22069 vec[i * 2] = const0_rtx;
22070 vec[i * 2 + 1] = const1_rtx;
22071 }
22072 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22073 vt = validize_mem (force_const_mem (maskmode, vt));
22074 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22075 OPTAB_DIRECT);
22076
22077 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22078 operands[3] = mask = t1;
22079 target = gen_reg_rtx (mode);
22080 op0 = gen_lowpart (mode, op0);
22081 op1 = gen_lowpart (mode, op1);
22082 }
22083
22084 switch (mode)
22085 {
22086 case V8SImode:
22087 /* The VPERMD and VPERMPS instructions already properly ignore
22088 the high bits of the shuffle elements. No need for us to
22089 perform an AND ourselves. */
22090 if (one_operand_shuffle)
22091 {
22092 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22093 if (target != operands[0])
22094 emit_move_insn (operands[0],
22095 gen_lowpart (GET_MODE (operands[0]), target));
22096 }
22097 else
22098 {
22099 t1 = gen_reg_rtx (V8SImode);
22100 t2 = gen_reg_rtx (V8SImode);
22101 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22102 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22103 goto merge_two;
22104 }
22105 return;
22106
22107 case V8SFmode:
22108 mask = gen_lowpart (V8SImode, mask);
22109 if (one_operand_shuffle)
22110 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22111 else
22112 {
22113 t1 = gen_reg_rtx (V8SFmode);
22114 t2 = gen_reg_rtx (V8SFmode);
22115 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22116 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22117 goto merge_two;
22118 }
22119 return;
22120
22121 case V4SImode:
22122 /* By combining the two 128-bit input vectors into one 256-bit
22123 input vector, we can use VPERMD and VPERMPS for the full
22124 two-operand shuffle. */
22125 t1 = gen_reg_rtx (V8SImode);
22126 t2 = gen_reg_rtx (V8SImode);
22127 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22128 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22129 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22130 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22131 return;
22132
22133 case V4SFmode:
22134 t1 = gen_reg_rtx (V8SFmode);
22135 t2 = gen_reg_rtx (V8SImode);
22136 mask = gen_lowpart (V4SImode, mask);
22137 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22138 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22139 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22140 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22141 return;
22142
22143 case V32QImode:
22144 t1 = gen_reg_rtx (V32QImode);
22145 t2 = gen_reg_rtx (V32QImode);
22146 t3 = gen_reg_rtx (V32QImode);
22147 vt2 = GEN_INT (-128);
22148 for (i = 0; i < 32; i++)
22149 vec[i] = vt2;
22150 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22151 vt = force_reg (V32QImode, vt);
22152 for (i = 0; i < 32; i++)
22153 vec[i] = i < 16 ? vt2 : const0_rtx;
22154 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22155 vt2 = force_reg (V32QImode, vt2);
22156 /* From mask create two adjusted masks, which contain the same
22157 bits as mask in the low 7 bits of each vector element.
22158 The first mask will have the most significant bit clear
22159 if it requests element from the same 128-bit lane
22160 and MSB set if it requests element from the other 128-bit lane.
22161 The second mask will have the opposite values of the MSB,
22162 and additionally will have its 128-bit lanes swapped.
22163 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22164 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22165 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22166 stands for other 12 bytes. */
22167 /* The bit whether element is from the same lane or the other
22168 lane is bit 4, so shift it up by 3 to the MSB position. */
22169 t5 = gen_reg_rtx (V4DImode);
22170 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22171 GEN_INT (3)));
22172 /* Clear MSB bits from the mask just in case it had them set. */
22173 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22174 /* After this t1 will have MSB set for elements from other lane. */
22175 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22176 /* Clear bits other than MSB. */
22177 emit_insn (gen_andv32qi3 (t1, t1, vt));
22178 /* Or in the lower bits from mask into t3. */
22179 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22180 /* And invert MSB bits in t1, so MSB is set for elements from the same
22181 lane. */
22182 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22183 /* Swap 128-bit lanes in t3. */
22184 t6 = gen_reg_rtx (V4DImode);
22185 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22186 const2_rtx, GEN_INT (3),
22187 const0_rtx, const1_rtx));
22188 /* And or in the lower bits from mask into t1. */
22189 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22190 if (one_operand_shuffle)
22191 {
22192 /* Each of these shuffles will put 0s in places where
22193 element from the other 128-bit lane is needed, otherwise
22194 will shuffle in the requested value. */
22195 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22196 gen_lowpart (V32QImode, t6)));
22197 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22198 /* For t3 the 128-bit lanes are swapped again. */
22199 t7 = gen_reg_rtx (V4DImode);
22200 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22201 const2_rtx, GEN_INT (3),
22202 const0_rtx, const1_rtx));
22203 /* And oring both together leads to the result. */
22204 emit_insn (gen_iorv32qi3 (target, t1,
22205 gen_lowpart (V32QImode, t7)));
22206 if (target != operands[0])
22207 emit_move_insn (operands[0],
22208 gen_lowpart (GET_MODE (operands[0]), target));
22209 return;
22210 }
22211
22212 t4 = gen_reg_rtx (V32QImode);
22213 /* Similarly to the above one_operand_shuffle code,
22214 just for repeated twice for each operand. merge_two:
22215 code will merge the two results together. */
22216 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22217 gen_lowpart (V32QImode, t6)));
22218 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22219 gen_lowpart (V32QImode, t6)));
22220 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22221 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22222 t7 = gen_reg_rtx (V4DImode);
22223 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22224 const2_rtx, GEN_INT (3),
22225 const0_rtx, const1_rtx));
22226 t8 = gen_reg_rtx (V4DImode);
22227 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22228 const2_rtx, GEN_INT (3),
22229 const0_rtx, const1_rtx));
22230 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22231 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22232 t1 = t4;
22233 t2 = t3;
22234 goto merge_two;
22235
22236 default:
22237 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22238 break;
22239 }
22240 }
22241
22242 if (TARGET_XOP)
22243 {
22244 /* The XOP VPPERM insn supports three inputs. By ignoring the
22245 one_operand_shuffle special case, we avoid creating another
22246 set of constant vectors in memory. */
22247 one_operand_shuffle = false;
22248
22249 /* mask = mask & {2*w-1, ...} */
22250 vt = GEN_INT (2*w - 1);
22251 }
22252 else
22253 {
22254 /* mask = mask & {w-1, ...} */
22255 vt = GEN_INT (w - 1);
22256 }
22257
22258 for (i = 0; i < w; i++)
22259 vec[i] = vt;
22260 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22261 mask = expand_simple_binop (maskmode, AND, mask, vt,
22262 NULL_RTX, 0, OPTAB_DIRECT);
22263
22264 /* For non-QImode operations, convert the word permutation control
22265 into a byte permutation control. */
22266 if (mode != V16QImode)
22267 {
22268 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22269 GEN_INT (exact_log2 (e)),
22270 NULL_RTX, 0, OPTAB_DIRECT);
22271
22272 /* Convert mask to vector of chars. */
22273 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22274
22275 /* Replicate each of the input bytes into byte positions:
22276 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22277 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22278 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22279 for (i = 0; i < 16; ++i)
22280 vec[i] = GEN_INT (i/e * e);
22281 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22282 vt = validize_mem (force_const_mem (V16QImode, vt));
22283 if (TARGET_XOP)
22284 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22285 else
22286 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22287
22288 /* Convert it into the byte positions by doing
22289 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22290 for (i = 0; i < 16; ++i)
22291 vec[i] = GEN_INT (i % e);
22292 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22293 vt = validize_mem (force_const_mem (V16QImode, vt));
22294 emit_insn (gen_addv16qi3 (mask, mask, vt));
22295 }
22296
22297 /* The actual shuffle operations all operate on V16QImode. */
22298 op0 = gen_lowpart (V16QImode, op0);
22299 op1 = gen_lowpart (V16QImode, op1);
22300
22301 if (TARGET_XOP)
22302 {
22303 if (GET_MODE (target) != V16QImode)
22304 target = gen_reg_rtx (V16QImode);
22305 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22306 if (target != operands[0])
22307 emit_move_insn (operands[0],
22308 gen_lowpart (GET_MODE (operands[0]), target));
22309 }
22310 else if (one_operand_shuffle)
22311 {
22312 if (GET_MODE (target) != V16QImode)
22313 target = gen_reg_rtx (V16QImode);
22314 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22315 if (target != operands[0])
22316 emit_move_insn (operands[0],
22317 gen_lowpart (GET_MODE (operands[0]), target));
22318 }
22319 else
22320 {
22321 rtx xops[6];
22322 bool ok;
22323
22324 /* Shuffle the two input vectors independently. */
22325 t1 = gen_reg_rtx (V16QImode);
22326 t2 = gen_reg_rtx (V16QImode);
22327 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22328 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22329
22330 merge_two:
22331 /* Then merge them together. The key is whether any given control
22332 element contained a bit set that indicates the second word. */
22333 mask = operands[3];
22334 vt = GEN_INT (w);
22335 if (maskmode == V2DImode && !TARGET_SSE4_1)
22336 {
22337 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22338 more shuffle to convert the V2DI input mask into a V4SI
22339 input mask. At which point the masking that expand_int_vcond
22340 will work as desired. */
22341 rtx t3 = gen_reg_rtx (V4SImode);
22342 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22343 const0_rtx, const0_rtx,
22344 const2_rtx, const2_rtx));
22345 mask = t3;
22346 maskmode = V4SImode;
22347 e = w = 4;
22348 }
22349
22350 for (i = 0; i < w; i++)
22351 vec[i] = vt;
22352 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22353 vt = force_reg (maskmode, vt);
22354 mask = expand_simple_binop (maskmode, AND, mask, vt,
22355 NULL_RTX, 0, OPTAB_DIRECT);
22356
22357 if (GET_MODE (target) != mode)
22358 target = gen_reg_rtx (mode);
22359 xops[0] = target;
22360 xops[1] = gen_lowpart (mode, t2);
22361 xops[2] = gen_lowpart (mode, t1);
22362 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22363 xops[4] = mask;
22364 xops[5] = vt;
22365 ok = ix86_expand_int_vcond (xops);
22366 gcc_assert (ok);
22367 if (target != operands[0])
22368 emit_move_insn (operands[0],
22369 gen_lowpart (GET_MODE (operands[0]), target));
22370 }
22371 }
22372
22373 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22374 true if we should do zero extension, else sign extension. HIGH_P is
22375 true if we want the N/2 high elements, else the low elements. */
22376
22377 void
22378 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22379 {
22380 machine_mode imode = GET_MODE (src);
22381 rtx tmp;
22382
22383 if (TARGET_SSE4_1)
22384 {
22385 rtx (*unpack)(rtx, rtx);
22386 rtx (*extract)(rtx, rtx) = NULL;
22387 machine_mode halfmode = BLKmode;
22388
22389 switch (imode)
22390 {
22391 case V64QImode:
22392 if (unsigned_p)
22393 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22394 else
22395 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22396 halfmode = V32QImode;
22397 extract
22398 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22399 break;
22400 case V32QImode:
22401 if (unsigned_p)
22402 unpack = gen_avx2_zero_extendv16qiv16hi2;
22403 else
22404 unpack = gen_avx2_sign_extendv16qiv16hi2;
22405 halfmode = V16QImode;
22406 extract
22407 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22408 break;
22409 case V32HImode:
22410 if (unsigned_p)
22411 unpack = gen_avx512f_zero_extendv16hiv16si2;
22412 else
22413 unpack = gen_avx512f_sign_extendv16hiv16si2;
22414 halfmode = V16HImode;
22415 extract
22416 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22417 break;
22418 case V16HImode:
22419 if (unsigned_p)
22420 unpack = gen_avx2_zero_extendv8hiv8si2;
22421 else
22422 unpack = gen_avx2_sign_extendv8hiv8si2;
22423 halfmode = V8HImode;
22424 extract
22425 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22426 break;
22427 case V16SImode:
22428 if (unsigned_p)
22429 unpack = gen_avx512f_zero_extendv8siv8di2;
22430 else
22431 unpack = gen_avx512f_sign_extendv8siv8di2;
22432 halfmode = V8SImode;
22433 extract
22434 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22435 break;
22436 case V8SImode:
22437 if (unsigned_p)
22438 unpack = gen_avx2_zero_extendv4siv4di2;
22439 else
22440 unpack = gen_avx2_sign_extendv4siv4di2;
22441 halfmode = V4SImode;
22442 extract
22443 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22444 break;
22445 case V16QImode:
22446 if (unsigned_p)
22447 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22448 else
22449 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22450 break;
22451 case V8HImode:
22452 if (unsigned_p)
22453 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22454 else
22455 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22456 break;
22457 case V4SImode:
22458 if (unsigned_p)
22459 unpack = gen_sse4_1_zero_extendv2siv2di2;
22460 else
22461 unpack = gen_sse4_1_sign_extendv2siv2di2;
22462 break;
22463 default:
22464 gcc_unreachable ();
22465 }
22466
22467 if (GET_MODE_SIZE (imode) >= 32)
22468 {
22469 tmp = gen_reg_rtx (halfmode);
22470 emit_insn (extract (tmp, src));
22471 }
22472 else if (high_p)
22473 {
22474 /* Shift higher 8 bytes to lower 8 bytes. */
22475 tmp = gen_reg_rtx (V1TImode);
22476 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22477 GEN_INT (64)));
22478 tmp = gen_lowpart (imode, tmp);
22479 }
22480 else
22481 tmp = src;
22482
22483 emit_insn (unpack (dest, tmp));
22484 }
22485 else
22486 {
22487 rtx (*unpack)(rtx, rtx, rtx);
22488
22489 switch (imode)
22490 {
22491 case V16QImode:
22492 if (high_p)
22493 unpack = gen_vec_interleave_highv16qi;
22494 else
22495 unpack = gen_vec_interleave_lowv16qi;
22496 break;
22497 case V8HImode:
22498 if (high_p)
22499 unpack = gen_vec_interleave_highv8hi;
22500 else
22501 unpack = gen_vec_interleave_lowv8hi;
22502 break;
22503 case V4SImode:
22504 if (high_p)
22505 unpack = gen_vec_interleave_highv4si;
22506 else
22507 unpack = gen_vec_interleave_lowv4si;
22508 break;
22509 default:
22510 gcc_unreachable ();
22511 }
22512
22513 if (unsigned_p)
22514 tmp = force_reg (imode, CONST0_RTX (imode));
22515 else
22516 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22517 src, pc_rtx, pc_rtx);
22518
22519 rtx tmp2 = gen_reg_rtx (imode);
22520 emit_insn (unpack (tmp2, src, tmp));
22521 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22522 }
22523 }
22524
22525 /* Expand conditional increment or decrement using adb/sbb instructions.
22526 The default case using setcc followed by the conditional move can be
22527 done by generic code. */
22528 bool
22529 ix86_expand_int_addcc (rtx operands[])
22530 {
22531 enum rtx_code code = GET_CODE (operands[1]);
22532 rtx flags;
22533 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22534 rtx compare_op;
22535 rtx val = const0_rtx;
22536 bool fpcmp = false;
22537 machine_mode mode;
22538 rtx op0 = XEXP (operands[1], 0);
22539 rtx op1 = XEXP (operands[1], 1);
22540
22541 if (operands[3] != const1_rtx
22542 && operands[3] != constm1_rtx)
22543 return false;
22544 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22545 return false;
22546 code = GET_CODE (compare_op);
22547
22548 flags = XEXP (compare_op, 0);
22549
22550 if (GET_MODE (flags) == CCFPmode
22551 || GET_MODE (flags) == CCFPUmode)
22552 {
22553 fpcmp = true;
22554 code = ix86_fp_compare_code_to_integer (code);
22555 }
22556
22557 if (code != LTU)
22558 {
22559 val = constm1_rtx;
22560 if (fpcmp)
22561 PUT_CODE (compare_op,
22562 reverse_condition_maybe_unordered
22563 (GET_CODE (compare_op)));
22564 else
22565 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22566 }
22567
22568 mode = GET_MODE (operands[0]);
22569
22570 /* Construct either adc or sbb insn. */
22571 if ((code == LTU) == (operands[3] == constm1_rtx))
22572 {
22573 switch (mode)
22574 {
22575 case QImode:
22576 insn = gen_subqi3_carry;
22577 break;
22578 case HImode:
22579 insn = gen_subhi3_carry;
22580 break;
22581 case SImode:
22582 insn = gen_subsi3_carry;
22583 break;
22584 case DImode:
22585 insn = gen_subdi3_carry;
22586 break;
22587 default:
22588 gcc_unreachable ();
22589 }
22590 }
22591 else
22592 {
22593 switch (mode)
22594 {
22595 case QImode:
22596 insn = gen_addqi3_carry;
22597 break;
22598 case HImode:
22599 insn = gen_addhi3_carry;
22600 break;
22601 case SImode:
22602 insn = gen_addsi3_carry;
22603 break;
22604 case DImode:
22605 insn = gen_adddi3_carry;
22606 break;
22607 default:
22608 gcc_unreachable ();
22609 }
22610 }
22611 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22612
22613 return true;
22614 }
22615
22616
22617 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22618 but works for floating pointer parameters and nonoffsetable memories.
22619 For pushes, it returns just stack offsets; the values will be saved
22620 in the right order. Maximally three parts are generated. */
22621
22622 static int
22623 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22624 {
22625 int size;
22626
22627 if (!TARGET_64BIT)
22628 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22629 else
22630 size = (GET_MODE_SIZE (mode) + 4) / 8;
22631
22632 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22633 gcc_assert (size >= 2 && size <= 4);
22634
22635 /* Optimize constant pool reference to immediates. This is used by fp
22636 moves, that force all constants to memory to allow combining. */
22637 if (MEM_P (operand) && MEM_READONLY_P (operand))
22638 {
22639 rtx tmp = maybe_get_pool_constant (operand);
22640 if (tmp)
22641 operand = tmp;
22642 }
22643
22644 if (MEM_P (operand) && !offsettable_memref_p (operand))
22645 {
22646 /* The only non-offsetable memories we handle are pushes. */
22647 int ok = push_operand (operand, VOIDmode);
22648
22649 gcc_assert (ok);
22650
22651 operand = copy_rtx (operand);
22652 PUT_MODE (operand, word_mode);
22653 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22654 return size;
22655 }
22656
22657 if (GET_CODE (operand) == CONST_VECTOR)
22658 {
22659 machine_mode imode = int_mode_for_mode (mode);
22660 /* Caution: if we looked through a constant pool memory above,
22661 the operand may actually have a different mode now. That's
22662 ok, since we want to pun this all the way back to an integer. */
22663 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22664 gcc_assert (operand != NULL);
22665 mode = imode;
22666 }
22667
22668 if (!TARGET_64BIT)
22669 {
22670 if (mode == DImode)
22671 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22672 else
22673 {
22674 int i;
22675
22676 if (REG_P (operand))
22677 {
22678 gcc_assert (reload_completed);
22679 for (i = 0; i < size; i++)
22680 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22681 }
22682 else if (offsettable_memref_p (operand))
22683 {
22684 operand = adjust_address (operand, SImode, 0);
22685 parts[0] = operand;
22686 for (i = 1; i < size; i++)
22687 parts[i] = adjust_address (operand, SImode, 4 * i);
22688 }
22689 else if (GET_CODE (operand) == CONST_DOUBLE)
22690 {
22691 REAL_VALUE_TYPE r;
22692 long l[4];
22693
22694 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22695 switch (mode)
22696 {
22697 case TFmode:
22698 real_to_target (l, &r, mode);
22699 parts[3] = gen_int_mode (l[3], SImode);
22700 parts[2] = gen_int_mode (l[2], SImode);
22701 break;
22702 case XFmode:
22703 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22704 long double may not be 80-bit. */
22705 real_to_target (l, &r, mode);
22706 parts[2] = gen_int_mode (l[2], SImode);
22707 break;
22708 case DFmode:
22709 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22710 break;
22711 default:
22712 gcc_unreachable ();
22713 }
22714 parts[1] = gen_int_mode (l[1], SImode);
22715 parts[0] = gen_int_mode (l[0], SImode);
22716 }
22717 else
22718 gcc_unreachable ();
22719 }
22720 }
22721 else
22722 {
22723 if (mode == TImode)
22724 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22725 if (mode == XFmode || mode == TFmode)
22726 {
22727 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22728 if (REG_P (operand))
22729 {
22730 gcc_assert (reload_completed);
22731 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22732 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22733 }
22734 else if (offsettable_memref_p (operand))
22735 {
22736 operand = adjust_address (operand, DImode, 0);
22737 parts[0] = operand;
22738 parts[1] = adjust_address (operand, upper_mode, 8);
22739 }
22740 else if (GET_CODE (operand) == CONST_DOUBLE)
22741 {
22742 REAL_VALUE_TYPE r;
22743 long l[4];
22744
22745 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22746 real_to_target (l, &r, mode);
22747
22748 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22749 if (HOST_BITS_PER_WIDE_INT >= 64)
22750 parts[0]
22751 = gen_int_mode
22752 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22753 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22754 DImode);
22755 else
22756 parts[0] = immed_double_const (l[0], l[1], DImode);
22757
22758 if (upper_mode == SImode)
22759 parts[1] = gen_int_mode (l[2], SImode);
22760 else if (HOST_BITS_PER_WIDE_INT >= 64)
22761 parts[1]
22762 = gen_int_mode
22763 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22764 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22765 DImode);
22766 else
22767 parts[1] = immed_double_const (l[2], l[3], DImode);
22768 }
22769 else
22770 gcc_unreachable ();
22771 }
22772 }
22773
22774 return size;
22775 }
22776
22777 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22778 Return false when normal moves are needed; true when all required
22779 insns have been emitted. Operands 2-4 contain the input values
22780 int the correct order; operands 5-7 contain the output values. */
22781
22782 void
22783 ix86_split_long_move (rtx operands[])
22784 {
22785 rtx part[2][4];
22786 int nparts, i, j;
22787 int push = 0;
22788 int collisions = 0;
22789 machine_mode mode = GET_MODE (operands[0]);
22790 bool collisionparts[4];
22791
22792 /* The DFmode expanders may ask us to move double.
22793 For 64bit target this is single move. By hiding the fact
22794 here we simplify i386.md splitters. */
22795 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22796 {
22797 /* Optimize constant pool reference to immediates. This is used by
22798 fp moves, that force all constants to memory to allow combining. */
22799
22800 if (MEM_P (operands[1])
22801 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22802 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22803 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22804 if (push_operand (operands[0], VOIDmode))
22805 {
22806 operands[0] = copy_rtx (operands[0]);
22807 PUT_MODE (operands[0], word_mode);
22808 }
22809 else
22810 operands[0] = gen_lowpart (DImode, operands[0]);
22811 operands[1] = gen_lowpart (DImode, operands[1]);
22812 emit_move_insn (operands[0], operands[1]);
22813 return;
22814 }
22815
22816 /* The only non-offsettable memory we handle is push. */
22817 if (push_operand (operands[0], VOIDmode))
22818 push = 1;
22819 else
22820 gcc_assert (!MEM_P (operands[0])
22821 || offsettable_memref_p (operands[0]));
22822
22823 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22824 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22825
22826 /* When emitting push, take care for source operands on the stack. */
22827 if (push && MEM_P (operands[1])
22828 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22829 {
22830 rtx src_base = XEXP (part[1][nparts - 1], 0);
22831
22832 /* Compensate for the stack decrement by 4. */
22833 if (!TARGET_64BIT && nparts == 3
22834 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22835 src_base = plus_constant (Pmode, src_base, 4);
22836
22837 /* src_base refers to the stack pointer and is
22838 automatically decreased by emitted push. */
22839 for (i = 0; i < nparts; i++)
22840 part[1][i] = change_address (part[1][i],
22841 GET_MODE (part[1][i]), src_base);
22842 }
22843
22844 /* We need to do copy in the right order in case an address register
22845 of the source overlaps the destination. */
22846 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22847 {
22848 rtx tmp;
22849
22850 for (i = 0; i < nparts; i++)
22851 {
22852 collisionparts[i]
22853 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22854 if (collisionparts[i])
22855 collisions++;
22856 }
22857
22858 /* Collision in the middle part can be handled by reordering. */
22859 if (collisions == 1 && nparts == 3 && collisionparts [1])
22860 {
22861 std::swap (part[0][1], part[0][2]);
22862 std::swap (part[1][1], part[1][2]);
22863 }
22864 else if (collisions == 1
22865 && nparts == 4
22866 && (collisionparts [1] || collisionparts [2]))
22867 {
22868 if (collisionparts [1])
22869 {
22870 std::swap (part[0][1], part[0][2]);
22871 std::swap (part[1][1], part[1][2]);
22872 }
22873 else
22874 {
22875 std::swap (part[0][2], part[0][3]);
22876 std::swap (part[1][2], part[1][3]);
22877 }
22878 }
22879
22880 /* If there are more collisions, we can't handle it by reordering.
22881 Do an lea to the last part and use only one colliding move. */
22882 else if (collisions > 1)
22883 {
22884 rtx base;
22885
22886 collisions = 1;
22887
22888 base = part[0][nparts - 1];
22889
22890 /* Handle the case when the last part isn't valid for lea.
22891 Happens in 64-bit mode storing the 12-byte XFmode. */
22892 if (GET_MODE (base) != Pmode)
22893 base = gen_rtx_REG (Pmode, REGNO (base));
22894
22895 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22896 part[1][0] = replace_equiv_address (part[1][0], base);
22897 for (i = 1; i < nparts; i++)
22898 {
22899 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22900 part[1][i] = replace_equiv_address (part[1][i], tmp);
22901 }
22902 }
22903 }
22904
22905 if (push)
22906 {
22907 if (!TARGET_64BIT)
22908 {
22909 if (nparts == 3)
22910 {
22911 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22912 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22913 stack_pointer_rtx, GEN_INT (-4)));
22914 emit_move_insn (part[0][2], part[1][2]);
22915 }
22916 else if (nparts == 4)
22917 {
22918 emit_move_insn (part[0][3], part[1][3]);
22919 emit_move_insn (part[0][2], part[1][2]);
22920 }
22921 }
22922 else
22923 {
22924 /* In 64bit mode we don't have 32bit push available. In case this is
22925 register, it is OK - we will just use larger counterpart. We also
22926 retype memory - these comes from attempt to avoid REX prefix on
22927 moving of second half of TFmode value. */
22928 if (GET_MODE (part[1][1]) == SImode)
22929 {
22930 switch (GET_CODE (part[1][1]))
22931 {
22932 case MEM:
22933 part[1][1] = adjust_address (part[1][1], DImode, 0);
22934 break;
22935
22936 case REG:
22937 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22938 break;
22939
22940 default:
22941 gcc_unreachable ();
22942 }
22943
22944 if (GET_MODE (part[1][0]) == SImode)
22945 part[1][0] = part[1][1];
22946 }
22947 }
22948 emit_move_insn (part[0][1], part[1][1]);
22949 emit_move_insn (part[0][0], part[1][0]);
22950 return;
22951 }
22952
22953 /* Choose correct order to not overwrite the source before it is copied. */
22954 if ((REG_P (part[0][0])
22955 && REG_P (part[1][1])
22956 && (REGNO (part[0][0]) == REGNO (part[1][1])
22957 || (nparts == 3
22958 && REGNO (part[0][0]) == REGNO (part[1][2]))
22959 || (nparts == 4
22960 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22961 || (collisions > 0
22962 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22963 {
22964 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22965 {
22966 operands[2 + i] = part[0][j];
22967 operands[6 + i] = part[1][j];
22968 }
22969 }
22970 else
22971 {
22972 for (i = 0; i < nparts; i++)
22973 {
22974 operands[2 + i] = part[0][i];
22975 operands[6 + i] = part[1][i];
22976 }
22977 }
22978
22979 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22980 if (optimize_insn_for_size_p ())
22981 {
22982 for (j = 0; j < nparts - 1; j++)
22983 if (CONST_INT_P (operands[6 + j])
22984 && operands[6 + j] != const0_rtx
22985 && REG_P (operands[2 + j]))
22986 for (i = j; i < nparts - 1; i++)
22987 if (CONST_INT_P (operands[7 + i])
22988 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22989 operands[7 + i] = operands[2 + j];
22990 }
22991
22992 for (i = 0; i < nparts; i++)
22993 emit_move_insn (operands[2 + i], operands[6 + i]);
22994
22995 return;
22996 }
22997
22998 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22999 left shift by a constant, either using a single shift or
23000 a sequence of add instructions. */
23001
23002 static void
23003 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23004 {
23005 rtx (*insn)(rtx, rtx, rtx);
23006
23007 if (count == 1
23008 || (count * ix86_cost->add <= ix86_cost->shift_const
23009 && !optimize_insn_for_size_p ()))
23010 {
23011 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23012 while (count-- > 0)
23013 emit_insn (insn (operand, operand, operand));
23014 }
23015 else
23016 {
23017 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23018 emit_insn (insn (operand, operand, GEN_INT (count)));
23019 }
23020 }
23021
23022 void
23023 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23024 {
23025 rtx (*gen_ashl3)(rtx, rtx, rtx);
23026 rtx (*gen_shld)(rtx, rtx, rtx);
23027 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23028
23029 rtx low[2], high[2];
23030 int count;
23031
23032 if (CONST_INT_P (operands[2]))
23033 {
23034 split_double_mode (mode, operands, 2, low, high);
23035 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23036
23037 if (count >= half_width)
23038 {
23039 emit_move_insn (high[0], low[1]);
23040 emit_move_insn (low[0], const0_rtx);
23041
23042 if (count > half_width)
23043 ix86_expand_ashl_const (high[0], count - half_width, mode);
23044 }
23045 else
23046 {
23047 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23048
23049 if (!rtx_equal_p (operands[0], operands[1]))
23050 emit_move_insn (operands[0], operands[1]);
23051
23052 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23053 ix86_expand_ashl_const (low[0], count, mode);
23054 }
23055 return;
23056 }
23057
23058 split_double_mode (mode, operands, 1, low, high);
23059
23060 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23061
23062 if (operands[1] == const1_rtx)
23063 {
23064 /* Assuming we've chosen a QImode capable registers, then 1 << N
23065 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23066 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23067 {
23068 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23069
23070 ix86_expand_clear (low[0]);
23071 ix86_expand_clear (high[0]);
23072 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23073
23074 d = gen_lowpart (QImode, low[0]);
23075 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23076 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23077 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23078
23079 d = gen_lowpart (QImode, high[0]);
23080 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23081 s = gen_rtx_NE (QImode, flags, const0_rtx);
23082 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23083 }
23084
23085 /* Otherwise, we can get the same results by manually performing
23086 a bit extract operation on bit 5/6, and then performing the two
23087 shifts. The two methods of getting 0/1 into low/high are exactly
23088 the same size. Avoiding the shift in the bit extract case helps
23089 pentium4 a bit; no one else seems to care much either way. */
23090 else
23091 {
23092 machine_mode half_mode;
23093 rtx (*gen_lshr3)(rtx, rtx, rtx);
23094 rtx (*gen_and3)(rtx, rtx, rtx);
23095 rtx (*gen_xor3)(rtx, rtx, rtx);
23096 HOST_WIDE_INT bits;
23097 rtx x;
23098
23099 if (mode == DImode)
23100 {
23101 half_mode = SImode;
23102 gen_lshr3 = gen_lshrsi3;
23103 gen_and3 = gen_andsi3;
23104 gen_xor3 = gen_xorsi3;
23105 bits = 5;
23106 }
23107 else
23108 {
23109 half_mode = DImode;
23110 gen_lshr3 = gen_lshrdi3;
23111 gen_and3 = gen_anddi3;
23112 gen_xor3 = gen_xordi3;
23113 bits = 6;
23114 }
23115
23116 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23117 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23118 else
23119 x = gen_lowpart (half_mode, operands[2]);
23120 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23121
23122 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23123 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23124 emit_move_insn (low[0], high[0]);
23125 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23126 }
23127
23128 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23129 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23130 return;
23131 }
23132
23133 if (operands[1] == constm1_rtx)
23134 {
23135 /* For -1 << N, we can avoid the shld instruction, because we
23136 know that we're shifting 0...31/63 ones into a -1. */
23137 emit_move_insn (low[0], constm1_rtx);
23138 if (optimize_insn_for_size_p ())
23139 emit_move_insn (high[0], low[0]);
23140 else
23141 emit_move_insn (high[0], constm1_rtx);
23142 }
23143 else
23144 {
23145 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23146
23147 if (!rtx_equal_p (operands[0], operands[1]))
23148 emit_move_insn (operands[0], operands[1]);
23149
23150 split_double_mode (mode, operands, 1, low, high);
23151 emit_insn (gen_shld (high[0], low[0], operands[2]));
23152 }
23153
23154 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23155
23156 if (TARGET_CMOVE && scratch)
23157 {
23158 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23159 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23160
23161 ix86_expand_clear (scratch);
23162 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23163 }
23164 else
23165 {
23166 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23167 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23168
23169 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23170 }
23171 }
23172
23173 void
23174 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23175 {
23176 rtx (*gen_ashr3)(rtx, rtx, rtx)
23177 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23178 rtx (*gen_shrd)(rtx, rtx, rtx);
23179 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23180
23181 rtx low[2], high[2];
23182 int count;
23183
23184 if (CONST_INT_P (operands[2]))
23185 {
23186 split_double_mode (mode, operands, 2, low, high);
23187 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23188
23189 if (count == GET_MODE_BITSIZE (mode) - 1)
23190 {
23191 emit_move_insn (high[0], high[1]);
23192 emit_insn (gen_ashr3 (high[0], high[0],
23193 GEN_INT (half_width - 1)));
23194 emit_move_insn (low[0], high[0]);
23195
23196 }
23197 else if (count >= half_width)
23198 {
23199 emit_move_insn (low[0], high[1]);
23200 emit_move_insn (high[0], low[0]);
23201 emit_insn (gen_ashr3 (high[0], high[0],
23202 GEN_INT (half_width - 1)));
23203
23204 if (count > half_width)
23205 emit_insn (gen_ashr3 (low[0], low[0],
23206 GEN_INT (count - half_width)));
23207 }
23208 else
23209 {
23210 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23211
23212 if (!rtx_equal_p (operands[0], operands[1]))
23213 emit_move_insn (operands[0], operands[1]);
23214
23215 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23216 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23217 }
23218 }
23219 else
23220 {
23221 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23222
23223 if (!rtx_equal_p (operands[0], operands[1]))
23224 emit_move_insn (operands[0], operands[1]);
23225
23226 split_double_mode (mode, operands, 1, low, high);
23227
23228 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23229 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23230
23231 if (TARGET_CMOVE && scratch)
23232 {
23233 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23234 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23235
23236 emit_move_insn (scratch, high[0]);
23237 emit_insn (gen_ashr3 (scratch, scratch,
23238 GEN_INT (half_width - 1)));
23239 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23240 scratch));
23241 }
23242 else
23243 {
23244 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23245 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23246
23247 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23248 }
23249 }
23250 }
23251
23252 void
23253 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23254 {
23255 rtx (*gen_lshr3)(rtx, rtx, rtx)
23256 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23257 rtx (*gen_shrd)(rtx, rtx, rtx);
23258 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23259
23260 rtx low[2], high[2];
23261 int count;
23262
23263 if (CONST_INT_P (operands[2]))
23264 {
23265 split_double_mode (mode, operands, 2, low, high);
23266 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23267
23268 if (count >= half_width)
23269 {
23270 emit_move_insn (low[0], high[1]);
23271 ix86_expand_clear (high[0]);
23272
23273 if (count > half_width)
23274 emit_insn (gen_lshr3 (low[0], low[0],
23275 GEN_INT (count - half_width)));
23276 }
23277 else
23278 {
23279 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23280
23281 if (!rtx_equal_p (operands[0], operands[1]))
23282 emit_move_insn (operands[0], operands[1]);
23283
23284 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23285 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23286 }
23287 }
23288 else
23289 {
23290 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23291
23292 if (!rtx_equal_p (operands[0], operands[1]))
23293 emit_move_insn (operands[0], operands[1]);
23294
23295 split_double_mode (mode, operands, 1, low, high);
23296
23297 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23298 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23299
23300 if (TARGET_CMOVE && scratch)
23301 {
23302 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23303 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23304
23305 ix86_expand_clear (scratch);
23306 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23307 scratch));
23308 }
23309 else
23310 {
23311 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23312 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23313
23314 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23315 }
23316 }
23317 }
23318
23319 /* Predict just emitted jump instruction to be taken with probability PROB. */
23320 static void
23321 predict_jump (int prob)
23322 {
23323 rtx insn = get_last_insn ();
23324 gcc_assert (JUMP_P (insn));
23325 add_int_reg_note (insn, REG_BR_PROB, prob);
23326 }
23327
23328 /* Helper function for the string operations below. Dest VARIABLE whether
23329 it is aligned to VALUE bytes. If true, jump to the label. */
23330 static rtx_code_label *
23331 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23332 {
23333 rtx_code_label *label = gen_label_rtx ();
23334 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23335 if (GET_MODE (variable) == DImode)
23336 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23337 else
23338 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23339 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23340 1, label);
23341 if (epilogue)
23342 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23343 else
23344 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23345 return label;
23346 }
23347
23348 /* Adjust COUNTER by the VALUE. */
23349 static void
23350 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23351 {
23352 rtx (*gen_add)(rtx, rtx, rtx)
23353 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23354
23355 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23356 }
23357
23358 /* Zero extend possibly SImode EXP to Pmode register. */
23359 rtx
23360 ix86_zero_extend_to_Pmode (rtx exp)
23361 {
23362 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23363 }
23364
23365 /* Divide COUNTREG by SCALE. */
23366 static rtx
23367 scale_counter (rtx countreg, int scale)
23368 {
23369 rtx sc;
23370
23371 if (scale == 1)
23372 return countreg;
23373 if (CONST_INT_P (countreg))
23374 return GEN_INT (INTVAL (countreg) / scale);
23375 gcc_assert (REG_P (countreg));
23376
23377 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23378 GEN_INT (exact_log2 (scale)),
23379 NULL, 1, OPTAB_DIRECT);
23380 return sc;
23381 }
23382
23383 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23384 DImode for constant loop counts. */
23385
23386 static machine_mode
23387 counter_mode (rtx count_exp)
23388 {
23389 if (GET_MODE (count_exp) != VOIDmode)
23390 return GET_MODE (count_exp);
23391 if (!CONST_INT_P (count_exp))
23392 return Pmode;
23393 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23394 return DImode;
23395 return SImode;
23396 }
23397
23398 /* Copy the address to a Pmode register. This is used for x32 to
23399 truncate DImode TLS address to a SImode register. */
23400
23401 static rtx
23402 ix86_copy_addr_to_reg (rtx addr)
23403 {
23404 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23405 return copy_addr_to_reg (addr);
23406 else
23407 {
23408 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23409 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
23410 }
23411 }
23412
23413 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23414 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23415 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23416 memory by VALUE (supposed to be in MODE).
23417
23418 The size is rounded down to whole number of chunk size moved at once.
23419 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23420
23421
23422 static void
23423 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23424 rtx destptr, rtx srcptr, rtx value,
23425 rtx count, machine_mode mode, int unroll,
23426 int expected_size, bool issetmem)
23427 {
23428 rtx_code_label *out_label, *top_label;
23429 rtx iter, tmp;
23430 machine_mode iter_mode = counter_mode (count);
23431 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23432 rtx piece_size = GEN_INT (piece_size_n);
23433 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23434 rtx size;
23435 int i;
23436
23437 top_label = gen_label_rtx ();
23438 out_label = gen_label_rtx ();
23439 iter = gen_reg_rtx (iter_mode);
23440
23441 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23442 NULL, 1, OPTAB_DIRECT);
23443 /* Those two should combine. */
23444 if (piece_size == const1_rtx)
23445 {
23446 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23447 true, out_label);
23448 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23449 }
23450 emit_move_insn (iter, const0_rtx);
23451
23452 emit_label (top_label);
23453
23454 tmp = convert_modes (Pmode, iter_mode, iter, true);
23455
23456 /* This assert could be relaxed - in this case we'll need to compute
23457 smallest power of two, containing in PIECE_SIZE_N and pass it to
23458 offset_address. */
23459 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23460 destmem = offset_address (destmem, tmp, piece_size_n);
23461 destmem = adjust_address (destmem, mode, 0);
23462
23463 if (!issetmem)
23464 {
23465 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23466 srcmem = adjust_address (srcmem, mode, 0);
23467
23468 /* When unrolling for chips that reorder memory reads and writes,
23469 we can save registers by using single temporary.
23470 Also using 4 temporaries is overkill in 32bit mode. */
23471 if (!TARGET_64BIT && 0)
23472 {
23473 for (i = 0; i < unroll; i++)
23474 {
23475 if (i)
23476 {
23477 destmem =
23478 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23479 srcmem =
23480 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23481 }
23482 emit_move_insn (destmem, srcmem);
23483 }
23484 }
23485 else
23486 {
23487 rtx tmpreg[4];
23488 gcc_assert (unroll <= 4);
23489 for (i = 0; i < unroll; i++)
23490 {
23491 tmpreg[i] = gen_reg_rtx (mode);
23492 if (i)
23493 {
23494 srcmem =
23495 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23496 }
23497 emit_move_insn (tmpreg[i], srcmem);
23498 }
23499 for (i = 0; i < unroll; i++)
23500 {
23501 if (i)
23502 {
23503 destmem =
23504 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23505 }
23506 emit_move_insn (destmem, tmpreg[i]);
23507 }
23508 }
23509 }
23510 else
23511 for (i = 0; i < unroll; i++)
23512 {
23513 if (i)
23514 destmem =
23515 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23516 emit_move_insn (destmem, value);
23517 }
23518
23519 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23520 true, OPTAB_LIB_WIDEN);
23521 if (tmp != iter)
23522 emit_move_insn (iter, tmp);
23523
23524 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23525 true, top_label);
23526 if (expected_size != -1)
23527 {
23528 expected_size /= GET_MODE_SIZE (mode) * unroll;
23529 if (expected_size == 0)
23530 predict_jump (0);
23531 else if (expected_size > REG_BR_PROB_BASE)
23532 predict_jump (REG_BR_PROB_BASE - 1);
23533 else
23534 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23535 }
23536 else
23537 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23538 iter = ix86_zero_extend_to_Pmode (iter);
23539 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23540 true, OPTAB_LIB_WIDEN);
23541 if (tmp != destptr)
23542 emit_move_insn (destptr, tmp);
23543 if (!issetmem)
23544 {
23545 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23546 true, OPTAB_LIB_WIDEN);
23547 if (tmp != srcptr)
23548 emit_move_insn (srcptr, tmp);
23549 }
23550 emit_label (out_label);
23551 }
23552
23553 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23554 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23555 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23556 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23557 ORIG_VALUE is the original value passed to memset to fill the memory with.
23558 Other arguments have same meaning as for previous function. */
23559
23560 static void
23561 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23562 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23563 rtx count,
23564 machine_mode mode, bool issetmem)
23565 {
23566 rtx destexp;
23567 rtx srcexp;
23568 rtx countreg;
23569 HOST_WIDE_INT rounded_count;
23570
23571 /* If possible, it is shorter to use rep movs.
23572 TODO: Maybe it is better to move this logic to decide_alg. */
23573 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23574 && (!issetmem || orig_value == const0_rtx))
23575 mode = SImode;
23576
23577 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23578 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23579
23580 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23581 GET_MODE_SIZE (mode)));
23582 if (mode != QImode)
23583 {
23584 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23585 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23586 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23587 }
23588 else
23589 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23590 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23591 {
23592 rounded_count = (INTVAL (count)
23593 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23594 destmem = shallow_copy_rtx (destmem);
23595 set_mem_size (destmem, rounded_count);
23596 }
23597 else if (MEM_SIZE_KNOWN_P (destmem))
23598 clear_mem_size (destmem);
23599
23600 if (issetmem)
23601 {
23602 value = force_reg (mode, gen_lowpart (mode, value));
23603 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23604 }
23605 else
23606 {
23607 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23608 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23609 if (mode != QImode)
23610 {
23611 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23612 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23613 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23614 }
23615 else
23616 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23617 if (CONST_INT_P (count))
23618 {
23619 rounded_count = (INTVAL (count)
23620 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23621 srcmem = shallow_copy_rtx (srcmem);
23622 set_mem_size (srcmem, rounded_count);
23623 }
23624 else
23625 {
23626 if (MEM_SIZE_KNOWN_P (srcmem))
23627 clear_mem_size (srcmem);
23628 }
23629 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23630 destexp, srcexp));
23631 }
23632 }
23633
23634 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23635 DESTMEM.
23636 SRC is passed by pointer to be updated on return.
23637 Return value is updated DST. */
23638 static rtx
23639 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23640 HOST_WIDE_INT size_to_move)
23641 {
23642 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23643 enum insn_code code;
23644 machine_mode move_mode;
23645 int piece_size, i;
23646
23647 /* Find the widest mode in which we could perform moves.
23648 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23649 it until move of such size is supported. */
23650 piece_size = 1 << floor_log2 (size_to_move);
23651 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23652 code = optab_handler (mov_optab, move_mode);
23653 while (code == CODE_FOR_nothing && piece_size > 1)
23654 {
23655 piece_size >>= 1;
23656 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23657 code = optab_handler (mov_optab, move_mode);
23658 }
23659
23660 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23661 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23662 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23663 {
23664 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23665 move_mode = mode_for_vector (word_mode, nunits);
23666 code = optab_handler (mov_optab, move_mode);
23667 if (code == CODE_FOR_nothing)
23668 {
23669 move_mode = word_mode;
23670 piece_size = GET_MODE_SIZE (move_mode);
23671 code = optab_handler (mov_optab, move_mode);
23672 }
23673 }
23674 gcc_assert (code != CODE_FOR_nothing);
23675
23676 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23677 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23678
23679 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23680 gcc_assert (size_to_move % piece_size == 0);
23681 adjust = GEN_INT (piece_size);
23682 for (i = 0; i < size_to_move; i += piece_size)
23683 {
23684 /* We move from memory to memory, so we'll need to do it via
23685 a temporary register. */
23686 tempreg = gen_reg_rtx (move_mode);
23687 emit_insn (GEN_FCN (code) (tempreg, src));
23688 emit_insn (GEN_FCN (code) (dst, tempreg));
23689
23690 emit_move_insn (destptr,
23691 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23692 emit_move_insn (srcptr,
23693 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23694
23695 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23696 piece_size);
23697 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23698 piece_size);
23699 }
23700
23701 /* Update DST and SRC rtx. */
23702 *srcmem = src;
23703 return dst;
23704 }
23705
23706 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23707 static void
23708 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23709 rtx destptr, rtx srcptr, rtx count, int max_size)
23710 {
23711 rtx src, dest;
23712 if (CONST_INT_P (count))
23713 {
23714 HOST_WIDE_INT countval = INTVAL (count);
23715 HOST_WIDE_INT epilogue_size = countval % max_size;
23716 int i;
23717
23718 /* For now MAX_SIZE should be a power of 2. This assert could be
23719 relaxed, but it'll require a bit more complicated epilogue
23720 expanding. */
23721 gcc_assert ((max_size & (max_size - 1)) == 0);
23722 for (i = max_size; i >= 1; i >>= 1)
23723 {
23724 if (epilogue_size & i)
23725 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23726 }
23727 return;
23728 }
23729 if (max_size > 8)
23730 {
23731 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23732 count, 1, OPTAB_DIRECT);
23733 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23734 count, QImode, 1, 4, false);
23735 return;
23736 }
23737
23738 /* When there are stringops, we can cheaply increase dest and src pointers.
23739 Otherwise we save code size by maintaining offset (zero is readily
23740 available from preceding rep operation) and using x86 addressing modes.
23741 */
23742 if (TARGET_SINGLE_STRINGOP)
23743 {
23744 if (max_size > 4)
23745 {
23746 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23747 src = change_address (srcmem, SImode, srcptr);
23748 dest = change_address (destmem, SImode, destptr);
23749 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23750 emit_label (label);
23751 LABEL_NUSES (label) = 1;
23752 }
23753 if (max_size > 2)
23754 {
23755 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23756 src = change_address (srcmem, HImode, srcptr);
23757 dest = change_address (destmem, HImode, destptr);
23758 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23759 emit_label (label);
23760 LABEL_NUSES (label) = 1;
23761 }
23762 if (max_size > 1)
23763 {
23764 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23765 src = change_address (srcmem, QImode, srcptr);
23766 dest = change_address (destmem, QImode, destptr);
23767 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23768 emit_label (label);
23769 LABEL_NUSES (label) = 1;
23770 }
23771 }
23772 else
23773 {
23774 rtx offset = force_reg (Pmode, const0_rtx);
23775 rtx tmp;
23776
23777 if (max_size > 4)
23778 {
23779 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23780 src = change_address (srcmem, SImode, srcptr);
23781 dest = change_address (destmem, SImode, destptr);
23782 emit_move_insn (dest, src);
23783 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23784 true, OPTAB_LIB_WIDEN);
23785 if (tmp != offset)
23786 emit_move_insn (offset, tmp);
23787 emit_label (label);
23788 LABEL_NUSES (label) = 1;
23789 }
23790 if (max_size > 2)
23791 {
23792 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23793 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23794 src = change_address (srcmem, HImode, tmp);
23795 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23796 dest = change_address (destmem, HImode, tmp);
23797 emit_move_insn (dest, src);
23798 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23799 true, OPTAB_LIB_WIDEN);
23800 if (tmp != offset)
23801 emit_move_insn (offset, tmp);
23802 emit_label (label);
23803 LABEL_NUSES (label) = 1;
23804 }
23805 if (max_size > 1)
23806 {
23807 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23808 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23809 src = change_address (srcmem, QImode, tmp);
23810 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23811 dest = change_address (destmem, QImode, tmp);
23812 emit_move_insn (dest, src);
23813 emit_label (label);
23814 LABEL_NUSES (label) = 1;
23815 }
23816 }
23817 }
23818
23819 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23820 with value PROMOTED_VAL.
23821 SRC is passed by pointer to be updated on return.
23822 Return value is updated DST. */
23823 static rtx
23824 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23825 HOST_WIDE_INT size_to_move)
23826 {
23827 rtx dst = destmem, adjust;
23828 enum insn_code code;
23829 machine_mode move_mode;
23830 int piece_size, i;
23831
23832 /* Find the widest mode in which we could perform moves.
23833 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23834 it until move of such size is supported. */
23835 move_mode = GET_MODE (promoted_val);
23836 if (move_mode == VOIDmode)
23837 move_mode = QImode;
23838 if (size_to_move < GET_MODE_SIZE (move_mode))
23839 {
23840 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23841 promoted_val = gen_lowpart (move_mode, promoted_val);
23842 }
23843 piece_size = GET_MODE_SIZE (move_mode);
23844 code = optab_handler (mov_optab, move_mode);
23845 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23846
23847 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23848
23849 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23850 gcc_assert (size_to_move % piece_size == 0);
23851 adjust = GEN_INT (piece_size);
23852 for (i = 0; i < size_to_move; i += piece_size)
23853 {
23854 if (piece_size <= GET_MODE_SIZE (word_mode))
23855 {
23856 emit_insn (gen_strset (destptr, dst, promoted_val));
23857 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23858 piece_size);
23859 continue;
23860 }
23861
23862 emit_insn (GEN_FCN (code) (dst, promoted_val));
23863
23864 emit_move_insn (destptr,
23865 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23866
23867 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23868 piece_size);
23869 }
23870
23871 /* Update DST rtx. */
23872 return dst;
23873 }
23874 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23875 static void
23876 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23877 rtx count, int max_size)
23878 {
23879 count =
23880 expand_simple_binop (counter_mode (count), AND, count,
23881 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23882 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23883 gen_lowpart (QImode, value), count, QImode,
23884 1, max_size / 2, true);
23885 }
23886
23887 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23888 static void
23889 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23890 rtx count, int max_size)
23891 {
23892 rtx dest;
23893
23894 if (CONST_INT_P (count))
23895 {
23896 HOST_WIDE_INT countval = INTVAL (count);
23897 HOST_WIDE_INT epilogue_size = countval % max_size;
23898 int i;
23899
23900 /* For now MAX_SIZE should be a power of 2. This assert could be
23901 relaxed, but it'll require a bit more complicated epilogue
23902 expanding. */
23903 gcc_assert ((max_size & (max_size - 1)) == 0);
23904 for (i = max_size; i >= 1; i >>= 1)
23905 {
23906 if (epilogue_size & i)
23907 {
23908 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23909 destmem = emit_memset (destmem, destptr, vec_value, i);
23910 else
23911 destmem = emit_memset (destmem, destptr, value, i);
23912 }
23913 }
23914 return;
23915 }
23916 if (max_size > 32)
23917 {
23918 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23919 return;
23920 }
23921 if (max_size > 16)
23922 {
23923 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23924 if (TARGET_64BIT)
23925 {
23926 dest = change_address (destmem, DImode, destptr);
23927 emit_insn (gen_strset (destptr, dest, value));
23928 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23929 emit_insn (gen_strset (destptr, dest, value));
23930 }
23931 else
23932 {
23933 dest = change_address (destmem, SImode, destptr);
23934 emit_insn (gen_strset (destptr, dest, value));
23935 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23936 emit_insn (gen_strset (destptr, dest, value));
23937 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23938 emit_insn (gen_strset (destptr, dest, value));
23939 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23940 emit_insn (gen_strset (destptr, dest, value));
23941 }
23942 emit_label (label);
23943 LABEL_NUSES (label) = 1;
23944 }
23945 if (max_size > 8)
23946 {
23947 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23948 if (TARGET_64BIT)
23949 {
23950 dest = change_address (destmem, DImode, destptr);
23951 emit_insn (gen_strset (destptr, dest, value));
23952 }
23953 else
23954 {
23955 dest = change_address (destmem, SImode, destptr);
23956 emit_insn (gen_strset (destptr, dest, value));
23957 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23958 emit_insn (gen_strset (destptr, dest, value));
23959 }
23960 emit_label (label);
23961 LABEL_NUSES (label) = 1;
23962 }
23963 if (max_size > 4)
23964 {
23965 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23966 dest = change_address (destmem, SImode, destptr);
23967 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23968 emit_label (label);
23969 LABEL_NUSES (label) = 1;
23970 }
23971 if (max_size > 2)
23972 {
23973 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23974 dest = change_address (destmem, HImode, destptr);
23975 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23976 emit_label (label);
23977 LABEL_NUSES (label) = 1;
23978 }
23979 if (max_size > 1)
23980 {
23981 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23982 dest = change_address (destmem, QImode, destptr);
23983 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23984 emit_label (label);
23985 LABEL_NUSES (label) = 1;
23986 }
23987 }
23988
23989 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23990 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23991 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23992 ignored.
23993 Return value is updated DESTMEM. */
23994 static rtx
23995 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23996 rtx destptr, rtx srcptr, rtx value,
23997 rtx vec_value, rtx count, int align,
23998 int desired_alignment, bool issetmem)
23999 {
24000 int i;
24001 for (i = 1; i < desired_alignment; i <<= 1)
24002 {
24003 if (align <= i)
24004 {
24005 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24006 if (issetmem)
24007 {
24008 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24009 destmem = emit_memset (destmem, destptr, vec_value, i);
24010 else
24011 destmem = emit_memset (destmem, destptr, value, i);
24012 }
24013 else
24014 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24015 ix86_adjust_counter (count, i);
24016 emit_label (label);
24017 LABEL_NUSES (label) = 1;
24018 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24019 }
24020 }
24021 return destmem;
24022 }
24023
24024 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24025 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24026 and jump to DONE_LABEL. */
24027 static void
24028 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24029 rtx destptr, rtx srcptr,
24030 rtx value, rtx vec_value,
24031 rtx count, int size,
24032 rtx done_label, bool issetmem)
24033 {
24034 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24035 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24036 rtx modesize;
24037 int n;
24038
24039 /* If we do not have vector value to copy, we must reduce size. */
24040 if (issetmem)
24041 {
24042 if (!vec_value)
24043 {
24044 if (GET_MODE (value) == VOIDmode && size > 8)
24045 mode = Pmode;
24046 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24047 mode = GET_MODE (value);
24048 }
24049 else
24050 mode = GET_MODE (vec_value), value = vec_value;
24051 }
24052 else
24053 {
24054 /* Choose appropriate vector mode. */
24055 if (size >= 32)
24056 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24057 else if (size >= 16)
24058 mode = TARGET_SSE ? V16QImode : DImode;
24059 srcmem = change_address (srcmem, mode, srcptr);
24060 }
24061 destmem = change_address (destmem, mode, destptr);
24062 modesize = GEN_INT (GET_MODE_SIZE (mode));
24063 gcc_assert (GET_MODE_SIZE (mode) <= size);
24064 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24065 {
24066 if (issetmem)
24067 emit_move_insn (destmem, gen_lowpart (mode, value));
24068 else
24069 {
24070 emit_move_insn (destmem, srcmem);
24071 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24072 }
24073 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24074 }
24075
24076 destmem = offset_address (destmem, count, 1);
24077 destmem = offset_address (destmem, GEN_INT (-2 * size),
24078 GET_MODE_SIZE (mode));
24079 if (!issetmem)
24080 {
24081 srcmem = offset_address (srcmem, count, 1);
24082 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24083 GET_MODE_SIZE (mode));
24084 }
24085 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24086 {
24087 if (issetmem)
24088 emit_move_insn (destmem, gen_lowpart (mode, value));
24089 else
24090 {
24091 emit_move_insn (destmem, srcmem);
24092 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24093 }
24094 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24095 }
24096 emit_jump_insn (gen_jump (done_label));
24097 emit_barrier ();
24098
24099 emit_label (label);
24100 LABEL_NUSES (label) = 1;
24101 }
24102
24103 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24104 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24105 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24106 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24107 DONE_LABEL is a label after the whole copying sequence. The label is created
24108 on demand if *DONE_LABEL is NULL.
24109 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24110 bounds after the initial copies.
24111
24112 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24113 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24114 we will dispatch to a library call for large blocks.
24115
24116 In pseudocode we do:
24117
24118 if (COUNT < SIZE)
24119 {
24120 Assume that SIZE is 4. Bigger sizes are handled analogously
24121 if (COUNT & 4)
24122 {
24123 copy 4 bytes from SRCPTR to DESTPTR
24124 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24125 goto done_label
24126 }
24127 if (!COUNT)
24128 goto done_label;
24129 copy 1 byte from SRCPTR to DESTPTR
24130 if (COUNT & 2)
24131 {
24132 copy 2 bytes from SRCPTR to DESTPTR
24133 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24134 }
24135 }
24136 else
24137 {
24138 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24139 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24140
24141 OLD_DESPTR = DESTPTR;
24142 Align DESTPTR up to DESIRED_ALIGN
24143 SRCPTR += DESTPTR - OLD_DESTPTR
24144 COUNT -= DEST_PTR - OLD_DESTPTR
24145 if (DYNAMIC_CHECK)
24146 Round COUNT down to multiple of SIZE
24147 << optional caller supplied zero size guard is here >>
24148 << optional caller suppplied dynamic check is here >>
24149 << caller supplied main copy loop is here >>
24150 }
24151 done_label:
24152 */
24153 static void
24154 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24155 rtx *destptr, rtx *srcptr,
24156 machine_mode mode,
24157 rtx value, rtx vec_value,
24158 rtx *count,
24159 rtx_code_label **done_label,
24160 int size,
24161 int desired_align,
24162 int align,
24163 unsigned HOST_WIDE_INT *min_size,
24164 bool dynamic_check,
24165 bool issetmem)
24166 {
24167 rtx_code_label *loop_label = NULL, *label;
24168 int n;
24169 rtx modesize;
24170 int prolog_size = 0;
24171 rtx mode_value;
24172
24173 /* Chose proper value to copy. */
24174 if (issetmem && VECTOR_MODE_P (mode))
24175 mode_value = vec_value;
24176 else
24177 mode_value = value;
24178 gcc_assert (GET_MODE_SIZE (mode) <= size);
24179
24180 /* See if block is big or small, handle small blocks. */
24181 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24182 {
24183 int size2 = size;
24184 loop_label = gen_label_rtx ();
24185
24186 if (!*done_label)
24187 *done_label = gen_label_rtx ();
24188
24189 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24190 1, loop_label);
24191 size2 >>= 1;
24192
24193 /* Handle sizes > 3. */
24194 for (;size2 > 2; size2 >>= 1)
24195 expand_small_movmem_or_setmem (destmem, srcmem,
24196 *destptr, *srcptr,
24197 value, vec_value,
24198 *count,
24199 size2, *done_label, issetmem);
24200 /* Nothing to copy? Jump to DONE_LABEL if so */
24201 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24202 1, *done_label);
24203
24204 /* Do a byte copy. */
24205 destmem = change_address (destmem, QImode, *destptr);
24206 if (issetmem)
24207 emit_move_insn (destmem, gen_lowpart (QImode, value));
24208 else
24209 {
24210 srcmem = change_address (srcmem, QImode, *srcptr);
24211 emit_move_insn (destmem, srcmem);
24212 }
24213
24214 /* Handle sizes 2 and 3. */
24215 label = ix86_expand_aligntest (*count, 2, false);
24216 destmem = change_address (destmem, HImode, *destptr);
24217 destmem = offset_address (destmem, *count, 1);
24218 destmem = offset_address (destmem, GEN_INT (-2), 2);
24219 if (issetmem)
24220 emit_move_insn (destmem, gen_lowpart (HImode, value));
24221 else
24222 {
24223 srcmem = change_address (srcmem, HImode, *srcptr);
24224 srcmem = offset_address (srcmem, *count, 1);
24225 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24226 emit_move_insn (destmem, srcmem);
24227 }
24228
24229 emit_label (label);
24230 LABEL_NUSES (label) = 1;
24231 emit_jump_insn (gen_jump (*done_label));
24232 emit_barrier ();
24233 }
24234 else
24235 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24236 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24237
24238 /* Start memcpy for COUNT >= SIZE. */
24239 if (loop_label)
24240 {
24241 emit_label (loop_label);
24242 LABEL_NUSES (loop_label) = 1;
24243 }
24244
24245 /* Copy first desired_align bytes. */
24246 if (!issetmem)
24247 srcmem = change_address (srcmem, mode, *srcptr);
24248 destmem = change_address (destmem, mode, *destptr);
24249 modesize = GEN_INT (GET_MODE_SIZE (mode));
24250 for (n = 0; prolog_size < desired_align - align; n++)
24251 {
24252 if (issetmem)
24253 emit_move_insn (destmem, mode_value);
24254 else
24255 {
24256 emit_move_insn (destmem, srcmem);
24257 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24258 }
24259 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24260 prolog_size += GET_MODE_SIZE (mode);
24261 }
24262
24263
24264 /* Copy last SIZE bytes. */
24265 destmem = offset_address (destmem, *count, 1);
24266 destmem = offset_address (destmem,
24267 GEN_INT (-size - prolog_size),
24268 1);
24269 if (issetmem)
24270 emit_move_insn (destmem, mode_value);
24271 else
24272 {
24273 srcmem = offset_address (srcmem, *count, 1);
24274 srcmem = offset_address (srcmem,
24275 GEN_INT (-size - prolog_size),
24276 1);
24277 emit_move_insn (destmem, srcmem);
24278 }
24279 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24280 {
24281 destmem = offset_address (destmem, modesize, 1);
24282 if (issetmem)
24283 emit_move_insn (destmem, mode_value);
24284 else
24285 {
24286 srcmem = offset_address (srcmem, modesize, 1);
24287 emit_move_insn (destmem, srcmem);
24288 }
24289 }
24290
24291 /* Align destination. */
24292 if (desired_align > 1 && desired_align > align)
24293 {
24294 rtx saveddest = *destptr;
24295
24296 gcc_assert (desired_align <= size);
24297 /* Align destptr up, place it to new register. */
24298 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24299 GEN_INT (prolog_size),
24300 NULL_RTX, 1, OPTAB_DIRECT);
24301 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24302 GEN_INT (-desired_align),
24303 *destptr, 1, OPTAB_DIRECT);
24304 /* See how many bytes we skipped. */
24305 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24306 *destptr,
24307 saveddest, 1, OPTAB_DIRECT);
24308 /* Adjust srcptr and count. */
24309 if (!issetmem)
24310 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
24311 *srcptr, 1, OPTAB_DIRECT);
24312 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24313 saveddest, *count, 1, OPTAB_DIRECT);
24314 /* We copied at most size + prolog_size. */
24315 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24316 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24317 else
24318 *min_size = 0;
24319
24320 /* Our loops always round down the bock size, but for dispatch to library
24321 we need precise value. */
24322 if (dynamic_check)
24323 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24324 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24325 }
24326 else
24327 {
24328 gcc_assert (prolog_size == 0);
24329 /* Decrease count, so we won't end up copying last word twice. */
24330 if (!CONST_INT_P (*count))
24331 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24332 constm1_rtx, *count, 1, OPTAB_DIRECT);
24333 else
24334 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24335 if (*min_size)
24336 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24337 }
24338 }
24339
24340
24341 /* This function is like the previous one, except here we know how many bytes
24342 need to be copied. That allows us to update alignment not only of DST, which
24343 is returned, but also of SRC, which is passed as a pointer for that
24344 reason. */
24345 static rtx
24346 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24347 rtx srcreg, rtx value, rtx vec_value,
24348 int desired_align, int align_bytes,
24349 bool issetmem)
24350 {
24351 rtx src = NULL;
24352 rtx orig_dst = dst;
24353 rtx orig_src = NULL;
24354 int piece_size = 1;
24355 int copied_bytes = 0;
24356
24357 if (!issetmem)
24358 {
24359 gcc_assert (srcp != NULL);
24360 src = *srcp;
24361 orig_src = src;
24362 }
24363
24364 for (piece_size = 1;
24365 piece_size <= desired_align && copied_bytes < align_bytes;
24366 piece_size <<= 1)
24367 {
24368 if (align_bytes & piece_size)
24369 {
24370 if (issetmem)
24371 {
24372 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24373 dst = emit_memset (dst, destreg, vec_value, piece_size);
24374 else
24375 dst = emit_memset (dst, destreg, value, piece_size);
24376 }
24377 else
24378 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24379 copied_bytes += piece_size;
24380 }
24381 }
24382 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24383 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24384 if (MEM_SIZE_KNOWN_P (orig_dst))
24385 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24386
24387 if (!issetmem)
24388 {
24389 int src_align_bytes = get_mem_align_offset (src, desired_align
24390 * BITS_PER_UNIT);
24391 if (src_align_bytes >= 0)
24392 src_align_bytes = desired_align - src_align_bytes;
24393 if (src_align_bytes >= 0)
24394 {
24395 unsigned int src_align;
24396 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24397 {
24398 if ((src_align_bytes & (src_align - 1))
24399 == (align_bytes & (src_align - 1)))
24400 break;
24401 }
24402 if (src_align > (unsigned int) desired_align)
24403 src_align = desired_align;
24404 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24405 set_mem_align (src, src_align * BITS_PER_UNIT);
24406 }
24407 if (MEM_SIZE_KNOWN_P (orig_src))
24408 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24409 *srcp = src;
24410 }
24411
24412 return dst;
24413 }
24414
24415 /* Return true if ALG can be used in current context.
24416 Assume we expand memset if MEMSET is true. */
24417 static bool
24418 alg_usable_p (enum stringop_alg alg, bool memset)
24419 {
24420 if (alg == no_stringop)
24421 return false;
24422 if (alg == vector_loop)
24423 return TARGET_SSE || TARGET_AVX;
24424 /* Algorithms using the rep prefix want at least edi and ecx;
24425 additionally, memset wants eax and memcpy wants esi. Don't
24426 consider such algorithms if the user has appropriated those
24427 registers for their own purposes. */
24428 if (alg == rep_prefix_1_byte
24429 || alg == rep_prefix_4_byte
24430 || alg == rep_prefix_8_byte)
24431 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24432 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24433 return true;
24434 }
24435
24436 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24437 static enum stringop_alg
24438 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24439 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24440 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24441 {
24442 const struct stringop_algs * algs;
24443 bool optimize_for_speed;
24444 int max = 0;
24445 const struct processor_costs *cost;
24446 int i;
24447 bool any_alg_usable_p = false;
24448
24449 *noalign = false;
24450 *dynamic_check = -1;
24451
24452 /* Even if the string operation call is cold, we still might spend a lot
24453 of time processing large blocks. */
24454 if (optimize_function_for_size_p (cfun)
24455 || (optimize_insn_for_size_p ()
24456 && (max_size < 256
24457 || (expected_size != -1 && expected_size < 256))))
24458 optimize_for_speed = false;
24459 else
24460 optimize_for_speed = true;
24461
24462 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24463 if (memset)
24464 algs = &cost->memset[TARGET_64BIT != 0];
24465 else
24466 algs = &cost->memcpy[TARGET_64BIT != 0];
24467
24468 /* See maximal size for user defined algorithm. */
24469 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24470 {
24471 enum stringop_alg candidate = algs->size[i].alg;
24472 bool usable = alg_usable_p (candidate, memset);
24473 any_alg_usable_p |= usable;
24474
24475 if (candidate != libcall && candidate && usable)
24476 max = algs->size[i].max;
24477 }
24478
24479 /* If expected size is not known but max size is small enough
24480 so inline version is a win, set expected size into
24481 the range. */
24482 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24483 && expected_size == -1)
24484 expected_size = min_size / 2 + max_size / 2;
24485
24486 /* If user specified the algorithm, honnor it if possible. */
24487 if (ix86_stringop_alg != no_stringop
24488 && alg_usable_p (ix86_stringop_alg, memset))
24489 return ix86_stringop_alg;
24490 /* rep; movq or rep; movl is the smallest variant. */
24491 else if (!optimize_for_speed)
24492 {
24493 *noalign = true;
24494 if (!count || (count & 3) || (memset && !zero_memset))
24495 return alg_usable_p (rep_prefix_1_byte, memset)
24496 ? rep_prefix_1_byte : loop_1_byte;
24497 else
24498 return alg_usable_p (rep_prefix_4_byte, memset)
24499 ? rep_prefix_4_byte : loop;
24500 }
24501 /* Very tiny blocks are best handled via the loop, REP is expensive to
24502 setup. */
24503 else if (expected_size != -1 && expected_size < 4)
24504 return loop_1_byte;
24505 else if (expected_size != -1)
24506 {
24507 enum stringop_alg alg = libcall;
24508 bool alg_noalign = false;
24509 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24510 {
24511 /* We get here if the algorithms that were not libcall-based
24512 were rep-prefix based and we are unable to use rep prefixes
24513 based on global register usage. Break out of the loop and
24514 use the heuristic below. */
24515 if (algs->size[i].max == 0)
24516 break;
24517 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24518 {
24519 enum stringop_alg candidate = algs->size[i].alg;
24520
24521 if (candidate != libcall && alg_usable_p (candidate, memset))
24522 {
24523 alg = candidate;
24524 alg_noalign = algs->size[i].noalign;
24525 }
24526 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24527 last non-libcall inline algorithm. */
24528 if (TARGET_INLINE_ALL_STRINGOPS)
24529 {
24530 /* When the current size is best to be copied by a libcall,
24531 but we are still forced to inline, run the heuristic below
24532 that will pick code for medium sized blocks. */
24533 if (alg != libcall)
24534 {
24535 *noalign = alg_noalign;
24536 return alg;
24537 }
24538 else if (!any_alg_usable_p)
24539 break;
24540 }
24541 else if (alg_usable_p (candidate, memset))
24542 {
24543 *noalign = algs->size[i].noalign;
24544 return candidate;
24545 }
24546 }
24547 }
24548 }
24549 /* When asked to inline the call anyway, try to pick meaningful choice.
24550 We look for maximal size of block that is faster to copy by hand and
24551 take blocks of at most of that size guessing that average size will
24552 be roughly half of the block.
24553
24554 If this turns out to be bad, we might simply specify the preferred
24555 choice in ix86_costs. */
24556 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24557 && (algs->unknown_size == libcall
24558 || !alg_usable_p (algs->unknown_size, memset)))
24559 {
24560 enum stringop_alg alg;
24561
24562 /* If there aren't any usable algorithms, then recursing on
24563 smaller sizes isn't going to find anything. Just return the
24564 simple byte-at-a-time copy loop. */
24565 if (!any_alg_usable_p)
24566 {
24567 /* Pick something reasonable. */
24568 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24569 *dynamic_check = 128;
24570 return loop_1_byte;
24571 }
24572 if (max <= 0)
24573 max = 4096;
24574 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24575 zero_memset, dynamic_check, noalign);
24576 gcc_assert (*dynamic_check == -1);
24577 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24578 *dynamic_check = max;
24579 else
24580 gcc_assert (alg != libcall);
24581 return alg;
24582 }
24583 return (alg_usable_p (algs->unknown_size, memset)
24584 ? algs->unknown_size : libcall);
24585 }
24586
24587 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24588 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24589 static int
24590 decide_alignment (int align,
24591 enum stringop_alg alg,
24592 int expected_size,
24593 machine_mode move_mode)
24594 {
24595 int desired_align = 0;
24596
24597 gcc_assert (alg != no_stringop);
24598
24599 if (alg == libcall)
24600 return 0;
24601 if (move_mode == VOIDmode)
24602 return 0;
24603
24604 desired_align = GET_MODE_SIZE (move_mode);
24605 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24606 copying whole cacheline at once. */
24607 if (TARGET_PENTIUMPRO
24608 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24609 desired_align = 8;
24610
24611 if (optimize_size)
24612 desired_align = 1;
24613 if (desired_align < align)
24614 desired_align = align;
24615 if (expected_size != -1 && expected_size < 4)
24616 desired_align = align;
24617
24618 return desired_align;
24619 }
24620
24621
24622 /* Helper function for memcpy. For QImode value 0xXY produce
24623 0xXYXYXYXY of wide specified by MODE. This is essentially
24624 a * 0x10101010, but we can do slightly better than
24625 synth_mult by unwinding the sequence by hand on CPUs with
24626 slow multiply. */
24627 static rtx
24628 promote_duplicated_reg (machine_mode mode, rtx val)
24629 {
24630 machine_mode valmode = GET_MODE (val);
24631 rtx tmp;
24632 int nops = mode == DImode ? 3 : 2;
24633
24634 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24635 if (val == const0_rtx)
24636 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24637 if (CONST_INT_P (val))
24638 {
24639 HOST_WIDE_INT v = INTVAL (val) & 255;
24640
24641 v |= v << 8;
24642 v |= v << 16;
24643 if (mode == DImode)
24644 v |= (v << 16) << 16;
24645 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24646 }
24647
24648 if (valmode == VOIDmode)
24649 valmode = QImode;
24650 if (valmode != QImode)
24651 val = gen_lowpart (QImode, val);
24652 if (mode == QImode)
24653 return val;
24654 if (!TARGET_PARTIAL_REG_STALL)
24655 nops--;
24656 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24657 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24658 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24659 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24660 {
24661 rtx reg = convert_modes (mode, QImode, val, true);
24662 tmp = promote_duplicated_reg (mode, const1_rtx);
24663 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24664 OPTAB_DIRECT);
24665 }
24666 else
24667 {
24668 rtx reg = convert_modes (mode, QImode, val, true);
24669
24670 if (!TARGET_PARTIAL_REG_STALL)
24671 if (mode == SImode)
24672 emit_insn (gen_movsi_insv_1 (reg, reg));
24673 else
24674 emit_insn (gen_movdi_insv_1 (reg, reg));
24675 else
24676 {
24677 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24678 NULL, 1, OPTAB_DIRECT);
24679 reg =
24680 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24681 }
24682 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24683 NULL, 1, OPTAB_DIRECT);
24684 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24685 if (mode == SImode)
24686 return reg;
24687 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24688 NULL, 1, OPTAB_DIRECT);
24689 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24690 return reg;
24691 }
24692 }
24693
24694 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24695 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24696 alignment from ALIGN to DESIRED_ALIGN. */
24697 static rtx
24698 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24699 int align)
24700 {
24701 rtx promoted_val;
24702
24703 if (TARGET_64BIT
24704 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24705 promoted_val = promote_duplicated_reg (DImode, val);
24706 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24707 promoted_val = promote_duplicated_reg (SImode, val);
24708 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24709 promoted_val = promote_duplicated_reg (HImode, val);
24710 else
24711 promoted_val = val;
24712
24713 return promoted_val;
24714 }
24715
24716 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24717 operations when profitable. The code depends upon architecture, block size
24718 and alignment, but always has one of the following overall structures:
24719
24720 Aligned move sequence:
24721
24722 1) Prologue guard: Conditional that jumps up to epilogues for small
24723 blocks that can be handled by epilogue alone. This is faster
24724 but also needed for correctness, since prologue assume the block
24725 is larger than the desired alignment.
24726
24727 Optional dynamic check for size and libcall for large
24728 blocks is emitted here too, with -minline-stringops-dynamically.
24729
24730 2) Prologue: copy first few bytes in order to get destination
24731 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24732 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24733 copied. We emit either a jump tree on power of two sized
24734 blocks, or a byte loop.
24735
24736 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24737 with specified algorithm.
24738
24739 4) Epilogue: code copying tail of the block that is too small to be
24740 handled by main body (or up to size guarded by prologue guard).
24741
24742 Misaligned move sequence
24743
24744 1) missaligned move prologue/epilogue containing:
24745 a) Prologue handling small memory blocks and jumping to done_label
24746 (skipped if blocks are known to be large enough)
24747 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24748 needed by single possibly misaligned move
24749 (skipped if alignment is not needed)
24750 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24751
24752 2) Zero size guard dispatching to done_label, if needed
24753
24754 3) dispatch to library call, if needed,
24755
24756 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24757 with specified algorithm. */
24758 bool
24759 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24760 rtx align_exp, rtx expected_align_exp,
24761 rtx expected_size_exp, rtx min_size_exp,
24762 rtx max_size_exp, rtx probable_max_size_exp,
24763 bool issetmem)
24764 {
24765 rtx destreg;
24766 rtx srcreg = NULL;
24767 rtx_code_label *label = NULL;
24768 rtx tmp;
24769 rtx_code_label *jump_around_label = NULL;
24770 HOST_WIDE_INT align = 1;
24771 unsigned HOST_WIDE_INT count = 0;
24772 HOST_WIDE_INT expected_size = -1;
24773 int size_needed = 0, epilogue_size_needed;
24774 int desired_align = 0, align_bytes = 0;
24775 enum stringop_alg alg;
24776 rtx promoted_val = NULL;
24777 rtx vec_promoted_val = NULL;
24778 bool force_loopy_epilogue = false;
24779 int dynamic_check;
24780 bool need_zero_guard = false;
24781 bool noalign;
24782 machine_mode move_mode = VOIDmode;
24783 int unroll_factor = 1;
24784 /* TODO: Once value ranges are available, fill in proper data. */
24785 unsigned HOST_WIDE_INT min_size = 0;
24786 unsigned HOST_WIDE_INT max_size = -1;
24787 unsigned HOST_WIDE_INT probable_max_size = -1;
24788 bool misaligned_prologue_used = false;
24789
24790 if (CONST_INT_P (align_exp))
24791 align = INTVAL (align_exp);
24792 /* i386 can do misaligned access on reasonably increased cost. */
24793 if (CONST_INT_P (expected_align_exp)
24794 && INTVAL (expected_align_exp) > align)
24795 align = INTVAL (expected_align_exp);
24796 /* ALIGN is the minimum of destination and source alignment, but we care here
24797 just about destination alignment. */
24798 else if (!issetmem
24799 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24800 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24801
24802 if (CONST_INT_P (count_exp))
24803 {
24804 min_size = max_size = probable_max_size = count = expected_size
24805 = INTVAL (count_exp);
24806 /* When COUNT is 0, there is nothing to do. */
24807 if (!count)
24808 return true;
24809 }
24810 else
24811 {
24812 if (min_size_exp)
24813 min_size = INTVAL (min_size_exp);
24814 if (max_size_exp)
24815 max_size = INTVAL (max_size_exp);
24816 if (probable_max_size_exp)
24817 probable_max_size = INTVAL (probable_max_size_exp);
24818 if (CONST_INT_P (expected_size_exp))
24819 expected_size = INTVAL (expected_size_exp);
24820 }
24821
24822 /* Make sure we don't need to care about overflow later on. */
24823 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24824 return false;
24825
24826 /* Step 0: Decide on preferred algorithm, desired alignment and
24827 size of chunks to be copied by main loop. */
24828 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24829 issetmem,
24830 issetmem && val_exp == const0_rtx,
24831 &dynamic_check, &noalign);
24832 if (alg == libcall)
24833 return false;
24834 gcc_assert (alg != no_stringop);
24835
24836 /* For now vector-version of memset is generated only for memory zeroing, as
24837 creating of promoted vector value is very cheap in this case. */
24838 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24839 alg = unrolled_loop;
24840
24841 if (!count)
24842 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24843 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24844 if (!issetmem)
24845 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24846
24847 unroll_factor = 1;
24848 move_mode = word_mode;
24849 switch (alg)
24850 {
24851 case libcall:
24852 case no_stringop:
24853 case last_alg:
24854 gcc_unreachable ();
24855 case loop_1_byte:
24856 need_zero_guard = true;
24857 move_mode = QImode;
24858 break;
24859 case loop:
24860 need_zero_guard = true;
24861 break;
24862 case unrolled_loop:
24863 need_zero_guard = true;
24864 unroll_factor = (TARGET_64BIT ? 4 : 2);
24865 break;
24866 case vector_loop:
24867 need_zero_guard = true;
24868 unroll_factor = 4;
24869 /* Find the widest supported mode. */
24870 move_mode = word_mode;
24871 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24872 != CODE_FOR_nothing)
24873 move_mode = GET_MODE_WIDER_MODE (move_mode);
24874
24875 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24876 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24877 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24878 {
24879 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24880 move_mode = mode_for_vector (word_mode, nunits);
24881 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24882 move_mode = word_mode;
24883 }
24884 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24885 break;
24886 case rep_prefix_8_byte:
24887 move_mode = DImode;
24888 break;
24889 case rep_prefix_4_byte:
24890 move_mode = SImode;
24891 break;
24892 case rep_prefix_1_byte:
24893 move_mode = QImode;
24894 break;
24895 }
24896 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24897 epilogue_size_needed = size_needed;
24898
24899 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24900 if (!TARGET_ALIGN_STRINGOPS || noalign)
24901 align = desired_align;
24902
24903 /* Step 1: Prologue guard. */
24904
24905 /* Alignment code needs count to be in register. */
24906 if (CONST_INT_P (count_exp) && desired_align > align)
24907 {
24908 if (INTVAL (count_exp) > desired_align
24909 && INTVAL (count_exp) > size_needed)
24910 {
24911 align_bytes
24912 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24913 if (align_bytes <= 0)
24914 align_bytes = 0;
24915 else
24916 align_bytes = desired_align - align_bytes;
24917 }
24918 if (align_bytes == 0)
24919 count_exp = force_reg (counter_mode (count_exp), count_exp);
24920 }
24921 gcc_assert (desired_align >= 1 && align >= 1);
24922
24923 /* Misaligned move sequences handle both prologue and epilogue at once.
24924 Default code generation results in a smaller code for large alignments
24925 and also avoids redundant job when sizes are known precisely. */
24926 misaligned_prologue_used
24927 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24928 && MAX (desired_align, epilogue_size_needed) <= 32
24929 && desired_align <= epilogue_size_needed
24930 && ((desired_align > align && !align_bytes)
24931 || (!count && epilogue_size_needed > 1)));
24932
24933 /* Do the cheap promotion to allow better CSE across the
24934 main loop and epilogue (ie one load of the big constant in the
24935 front of all code.
24936 For now the misaligned move sequences do not have fast path
24937 without broadcasting. */
24938 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24939 {
24940 if (alg == vector_loop)
24941 {
24942 gcc_assert (val_exp == const0_rtx);
24943 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24944 promoted_val = promote_duplicated_reg_to_size (val_exp,
24945 GET_MODE_SIZE (word_mode),
24946 desired_align, align);
24947 }
24948 else
24949 {
24950 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24951 desired_align, align);
24952 }
24953 }
24954 /* Misaligned move sequences handles both prologues and epilogues at once.
24955 Default code generation results in smaller code for large alignments and
24956 also avoids redundant job when sizes are known precisely. */
24957 if (misaligned_prologue_used)
24958 {
24959 /* Misaligned move prologue handled small blocks by itself. */
24960 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24961 (dst, src, &destreg, &srcreg,
24962 move_mode, promoted_val, vec_promoted_val,
24963 &count_exp,
24964 &jump_around_label,
24965 desired_align < align
24966 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24967 desired_align, align, &min_size, dynamic_check, issetmem);
24968 if (!issetmem)
24969 src = change_address (src, BLKmode, srcreg);
24970 dst = change_address (dst, BLKmode, destreg);
24971 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24972 epilogue_size_needed = 0;
24973 if (need_zero_guard && !min_size)
24974 {
24975 /* It is possible that we copied enough so the main loop will not
24976 execute. */
24977 gcc_assert (size_needed > 1);
24978 if (jump_around_label == NULL_RTX)
24979 jump_around_label = gen_label_rtx ();
24980 emit_cmp_and_jump_insns (count_exp,
24981 GEN_INT (size_needed),
24982 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24983 if (expected_size == -1
24984 || expected_size < (desired_align - align) / 2 + size_needed)
24985 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24986 else
24987 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24988 }
24989 }
24990 /* Ensure that alignment prologue won't copy past end of block. */
24991 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24992 {
24993 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24994 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24995 Make sure it is power of 2. */
24996 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24997
24998 /* To improve performance of small blocks, we jump around the VAL
24999 promoting mode. This mean that if the promoted VAL is not constant,
25000 we might not use it in the epilogue and have to use byte
25001 loop variant. */
25002 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25003 force_loopy_epilogue = true;
25004 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25005 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25006 {
25007 /* If main algorithm works on QImode, no epilogue is needed.
25008 For small sizes just don't align anything. */
25009 if (size_needed == 1)
25010 desired_align = align;
25011 else
25012 goto epilogue;
25013 }
25014 else if (!count
25015 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25016 {
25017 label = gen_label_rtx ();
25018 emit_cmp_and_jump_insns (count_exp,
25019 GEN_INT (epilogue_size_needed),
25020 LTU, 0, counter_mode (count_exp), 1, label);
25021 if (expected_size == -1 || expected_size < epilogue_size_needed)
25022 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25023 else
25024 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25025 }
25026 }
25027
25028 /* Emit code to decide on runtime whether library call or inline should be
25029 used. */
25030 if (dynamic_check != -1)
25031 {
25032 if (!issetmem && CONST_INT_P (count_exp))
25033 {
25034 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25035 {
25036 emit_block_move_via_libcall (dst, src, count_exp, false);
25037 count_exp = const0_rtx;
25038 goto epilogue;
25039 }
25040 }
25041 else
25042 {
25043 rtx_code_label *hot_label = gen_label_rtx ();
25044 if (jump_around_label == NULL_RTX)
25045 jump_around_label = gen_label_rtx ();
25046 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25047 LEU, 0, counter_mode (count_exp),
25048 1, hot_label);
25049 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25050 if (issetmem)
25051 set_storage_via_libcall (dst, count_exp, val_exp, false);
25052 else
25053 emit_block_move_via_libcall (dst, src, count_exp, false);
25054 emit_jump (jump_around_label);
25055 emit_label (hot_label);
25056 }
25057 }
25058
25059 /* Step 2: Alignment prologue. */
25060 /* Do the expensive promotion once we branched off the small blocks. */
25061 if (issetmem && !promoted_val)
25062 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25063 desired_align, align);
25064
25065 if (desired_align > align && !misaligned_prologue_used)
25066 {
25067 if (align_bytes == 0)
25068 {
25069 /* Except for the first move in prologue, we no longer know
25070 constant offset in aliasing info. It don't seems to worth
25071 the pain to maintain it for the first move, so throw away
25072 the info early. */
25073 dst = change_address (dst, BLKmode, destreg);
25074 if (!issetmem)
25075 src = change_address (src, BLKmode, srcreg);
25076 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25077 promoted_val, vec_promoted_val,
25078 count_exp, align, desired_align,
25079 issetmem);
25080 /* At most desired_align - align bytes are copied. */
25081 if (min_size < (unsigned)(desired_align - align))
25082 min_size = 0;
25083 else
25084 min_size -= desired_align - align;
25085 }
25086 else
25087 {
25088 /* If we know how many bytes need to be stored before dst is
25089 sufficiently aligned, maintain aliasing info accurately. */
25090 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25091 srcreg,
25092 promoted_val,
25093 vec_promoted_val,
25094 desired_align,
25095 align_bytes,
25096 issetmem);
25097
25098 count_exp = plus_constant (counter_mode (count_exp),
25099 count_exp, -align_bytes);
25100 count -= align_bytes;
25101 min_size -= align_bytes;
25102 max_size -= align_bytes;
25103 }
25104 if (need_zero_guard
25105 && !min_size
25106 && (count < (unsigned HOST_WIDE_INT) size_needed
25107 || (align_bytes == 0
25108 && count < ((unsigned HOST_WIDE_INT) size_needed
25109 + desired_align - align))))
25110 {
25111 /* It is possible that we copied enough so the main loop will not
25112 execute. */
25113 gcc_assert (size_needed > 1);
25114 if (label == NULL_RTX)
25115 label = gen_label_rtx ();
25116 emit_cmp_and_jump_insns (count_exp,
25117 GEN_INT (size_needed),
25118 LTU, 0, counter_mode (count_exp), 1, label);
25119 if (expected_size == -1
25120 || expected_size < (desired_align - align) / 2 + size_needed)
25121 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25122 else
25123 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25124 }
25125 }
25126 if (label && size_needed == 1)
25127 {
25128 emit_label (label);
25129 LABEL_NUSES (label) = 1;
25130 label = NULL;
25131 epilogue_size_needed = 1;
25132 if (issetmem)
25133 promoted_val = val_exp;
25134 }
25135 else if (label == NULL_RTX && !misaligned_prologue_used)
25136 epilogue_size_needed = size_needed;
25137
25138 /* Step 3: Main loop. */
25139
25140 switch (alg)
25141 {
25142 case libcall:
25143 case no_stringop:
25144 case last_alg:
25145 gcc_unreachable ();
25146 case loop_1_byte:
25147 case loop:
25148 case unrolled_loop:
25149 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25150 count_exp, move_mode, unroll_factor,
25151 expected_size, issetmem);
25152 break;
25153 case vector_loop:
25154 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25155 vec_promoted_val, count_exp, move_mode,
25156 unroll_factor, expected_size, issetmem);
25157 break;
25158 case rep_prefix_8_byte:
25159 case rep_prefix_4_byte:
25160 case rep_prefix_1_byte:
25161 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25162 val_exp, count_exp, move_mode, issetmem);
25163 break;
25164 }
25165 /* Adjust properly the offset of src and dest memory for aliasing. */
25166 if (CONST_INT_P (count_exp))
25167 {
25168 if (!issetmem)
25169 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25170 (count / size_needed) * size_needed);
25171 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25172 (count / size_needed) * size_needed);
25173 }
25174 else
25175 {
25176 if (!issetmem)
25177 src = change_address (src, BLKmode, srcreg);
25178 dst = change_address (dst, BLKmode, destreg);
25179 }
25180
25181 /* Step 4: Epilogue to copy the remaining bytes. */
25182 epilogue:
25183 if (label)
25184 {
25185 /* When the main loop is done, COUNT_EXP might hold original count,
25186 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25187 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25188 bytes. Compensate if needed. */
25189
25190 if (size_needed < epilogue_size_needed)
25191 {
25192 tmp =
25193 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25194 GEN_INT (size_needed - 1), count_exp, 1,
25195 OPTAB_DIRECT);
25196 if (tmp != count_exp)
25197 emit_move_insn (count_exp, tmp);
25198 }
25199 emit_label (label);
25200 LABEL_NUSES (label) = 1;
25201 }
25202
25203 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25204 {
25205 if (force_loopy_epilogue)
25206 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25207 epilogue_size_needed);
25208 else
25209 {
25210 if (issetmem)
25211 expand_setmem_epilogue (dst, destreg, promoted_val,
25212 vec_promoted_val, count_exp,
25213 epilogue_size_needed);
25214 else
25215 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25216 epilogue_size_needed);
25217 }
25218 }
25219 if (jump_around_label)
25220 emit_label (jump_around_label);
25221 return true;
25222 }
25223
25224
25225 /* Expand the appropriate insns for doing strlen if not just doing
25226 repnz; scasb
25227
25228 out = result, initialized with the start address
25229 align_rtx = alignment of the address.
25230 scratch = scratch register, initialized with the startaddress when
25231 not aligned, otherwise undefined
25232
25233 This is just the body. It needs the initializations mentioned above and
25234 some address computing at the end. These things are done in i386.md. */
25235
25236 static void
25237 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25238 {
25239 int align;
25240 rtx tmp;
25241 rtx_code_label *align_2_label = NULL;
25242 rtx_code_label *align_3_label = NULL;
25243 rtx_code_label *align_4_label = gen_label_rtx ();
25244 rtx_code_label *end_0_label = gen_label_rtx ();
25245 rtx mem;
25246 rtx tmpreg = gen_reg_rtx (SImode);
25247 rtx scratch = gen_reg_rtx (SImode);
25248 rtx cmp;
25249
25250 align = 0;
25251 if (CONST_INT_P (align_rtx))
25252 align = INTVAL (align_rtx);
25253
25254 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25255
25256 /* Is there a known alignment and is it less than 4? */
25257 if (align < 4)
25258 {
25259 rtx scratch1 = gen_reg_rtx (Pmode);
25260 emit_move_insn (scratch1, out);
25261 /* Is there a known alignment and is it not 2? */
25262 if (align != 2)
25263 {
25264 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25265 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25266
25267 /* Leave just the 3 lower bits. */
25268 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25269 NULL_RTX, 0, OPTAB_WIDEN);
25270
25271 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25272 Pmode, 1, align_4_label);
25273 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25274 Pmode, 1, align_2_label);
25275 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25276 Pmode, 1, align_3_label);
25277 }
25278 else
25279 {
25280 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25281 check if is aligned to 4 - byte. */
25282
25283 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25284 NULL_RTX, 0, OPTAB_WIDEN);
25285
25286 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25287 Pmode, 1, align_4_label);
25288 }
25289
25290 mem = change_address (src, QImode, out);
25291
25292 /* Now compare the bytes. */
25293
25294 /* Compare the first n unaligned byte on a byte per byte basis. */
25295 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25296 QImode, 1, end_0_label);
25297
25298 /* Increment the address. */
25299 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25300
25301 /* Not needed with an alignment of 2 */
25302 if (align != 2)
25303 {
25304 emit_label (align_2_label);
25305
25306 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25307 end_0_label);
25308
25309 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25310
25311 emit_label (align_3_label);
25312 }
25313
25314 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25315 end_0_label);
25316
25317 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25318 }
25319
25320 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25321 align this loop. It gives only huge programs, but does not help to
25322 speed up. */
25323 emit_label (align_4_label);
25324
25325 mem = change_address (src, SImode, out);
25326 emit_move_insn (scratch, mem);
25327 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25328
25329 /* This formula yields a nonzero result iff one of the bytes is zero.
25330 This saves three branches inside loop and many cycles. */
25331
25332 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25333 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25334 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25335 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25336 gen_int_mode (0x80808080, SImode)));
25337 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25338 align_4_label);
25339
25340 if (TARGET_CMOVE)
25341 {
25342 rtx reg = gen_reg_rtx (SImode);
25343 rtx reg2 = gen_reg_rtx (Pmode);
25344 emit_move_insn (reg, tmpreg);
25345 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25346
25347 /* If zero is not in the first two bytes, move two bytes forward. */
25348 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25349 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25350 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25351 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25352 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25353 reg,
25354 tmpreg)));
25355 /* Emit lea manually to avoid clobbering of flags. */
25356 emit_insn (gen_rtx_SET (SImode, reg2,
25357 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25358
25359 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25360 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25361 emit_insn (gen_rtx_SET (VOIDmode, out,
25362 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25363 reg2,
25364 out)));
25365 }
25366 else
25367 {
25368 rtx_code_label *end_2_label = gen_label_rtx ();
25369 /* Is zero in the first two bytes? */
25370
25371 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25372 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25373 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25374 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25375 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25376 pc_rtx);
25377 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25378 JUMP_LABEL (tmp) = end_2_label;
25379
25380 /* Not in the first two. Move two bytes forward. */
25381 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25382 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25383
25384 emit_label (end_2_label);
25385
25386 }
25387
25388 /* Avoid branch in fixing the byte. */
25389 tmpreg = gen_lowpart (QImode, tmpreg);
25390 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25391 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25392 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25393 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25394
25395 emit_label (end_0_label);
25396 }
25397
25398 /* Expand strlen. */
25399
25400 bool
25401 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25402 {
25403 rtx addr, scratch1, scratch2, scratch3, scratch4;
25404
25405 /* The generic case of strlen expander is long. Avoid it's
25406 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25407
25408 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25409 && !TARGET_INLINE_ALL_STRINGOPS
25410 && !optimize_insn_for_size_p ()
25411 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25412 return false;
25413
25414 addr = force_reg (Pmode, XEXP (src, 0));
25415 scratch1 = gen_reg_rtx (Pmode);
25416
25417 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25418 && !optimize_insn_for_size_p ())
25419 {
25420 /* Well it seems that some optimizer does not combine a call like
25421 foo(strlen(bar), strlen(bar));
25422 when the move and the subtraction is done here. It does calculate
25423 the length just once when these instructions are done inside of
25424 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25425 often used and I use one fewer register for the lifetime of
25426 output_strlen_unroll() this is better. */
25427
25428 emit_move_insn (out, addr);
25429
25430 ix86_expand_strlensi_unroll_1 (out, src, align);
25431
25432 /* strlensi_unroll_1 returns the address of the zero at the end of
25433 the string, like memchr(), so compute the length by subtracting
25434 the start address. */
25435 emit_insn (ix86_gen_sub3 (out, out, addr));
25436 }
25437 else
25438 {
25439 rtx unspec;
25440
25441 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25442 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25443 return false;
25444
25445 scratch2 = gen_reg_rtx (Pmode);
25446 scratch3 = gen_reg_rtx (Pmode);
25447 scratch4 = force_reg (Pmode, constm1_rtx);
25448
25449 emit_move_insn (scratch3, addr);
25450 eoschar = force_reg (QImode, eoschar);
25451
25452 src = replace_equiv_address_nv (src, scratch3);
25453
25454 /* If .md starts supporting :P, this can be done in .md. */
25455 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25456 scratch4), UNSPEC_SCAS);
25457 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25458 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25459 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25460 }
25461 return true;
25462 }
25463
25464 /* For given symbol (function) construct code to compute address of it's PLT
25465 entry in large x86-64 PIC model. */
25466 static rtx
25467 construct_plt_address (rtx symbol)
25468 {
25469 rtx tmp, unspec;
25470
25471 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25472 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25473 gcc_assert (Pmode == DImode);
25474
25475 tmp = gen_reg_rtx (Pmode);
25476 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25477
25478 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25479 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25480 return tmp;
25481 }
25482
25483 rtx
25484 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25485 rtx callarg2,
25486 rtx pop, bool sibcall)
25487 {
25488 rtx vec[3];
25489 rtx use = NULL, call;
25490 unsigned int vec_len = 0;
25491
25492 if (pop == const0_rtx)
25493 pop = NULL;
25494 gcc_assert (!TARGET_64BIT || !pop);
25495
25496 if (TARGET_MACHO && !TARGET_64BIT)
25497 {
25498 #if TARGET_MACHO
25499 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25500 fnaddr = machopic_indirect_call_target (fnaddr);
25501 #endif
25502 }
25503 else
25504 {
25505 /* Static functions and indirect calls don't need the pic register. */
25506 if (flag_pic
25507 && (!TARGET_64BIT
25508 || (ix86_cmodel == CM_LARGE_PIC
25509 && DEFAULT_ABI != MS_ABI))
25510 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25511 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25512 {
25513 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25514 if (ix86_use_pseudo_pic_reg ())
25515 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25516 pic_offset_table_rtx);
25517 }
25518 }
25519
25520 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25521 parameters passed in vector registers. */
25522 if (TARGET_64BIT
25523 && (INTVAL (callarg2) > 0
25524 || (INTVAL (callarg2) == 0
25525 && (TARGET_SSE || !flag_skip_rax_setup))))
25526 {
25527 rtx al = gen_rtx_REG (QImode, AX_REG);
25528 emit_move_insn (al, callarg2);
25529 use_reg (&use, al);
25530 }
25531
25532 if (ix86_cmodel == CM_LARGE_PIC
25533 && !TARGET_PECOFF
25534 && MEM_P (fnaddr)
25535 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25536 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25537 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25538 else if (sibcall
25539 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25540 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25541 {
25542 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25543 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25544 }
25545
25546 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25547
25548 if (retval)
25549 {
25550 /* We should add bounds as destination register in case
25551 pointer with bounds may be returned. */
25552 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25553 {
25554 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25555 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25556 retval = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, retval, b0, b1));
25557 chkp_put_regs_to_expr_list (retval);
25558 }
25559
25560 call = gen_rtx_SET (VOIDmode, retval, call);
25561 }
25562 vec[vec_len++] = call;
25563
25564 if (pop)
25565 {
25566 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25567 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25568 vec[vec_len++] = pop;
25569 }
25570
25571 if (TARGET_64BIT_MS_ABI
25572 && (!callarg2 || INTVAL (callarg2) != -2))
25573 {
25574 int const cregs_size
25575 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25576 int i;
25577
25578 for (i = 0; i < cregs_size; i++)
25579 {
25580 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25581 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25582
25583 clobber_reg (&use, gen_rtx_REG (mode, regno));
25584 }
25585 }
25586
25587 if (vec_len > 1)
25588 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25589 call = emit_call_insn (call);
25590 if (use)
25591 CALL_INSN_FUNCTION_USAGE (call) = use;
25592
25593 return call;
25594 }
25595
25596 /* Output the assembly for a call instruction. */
25597
25598 const char *
25599 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25600 {
25601 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25602 bool seh_nop_p = false;
25603 const char *xasm;
25604
25605 if (SIBLING_CALL_P (insn))
25606 {
25607 if (direct_p)
25608 xasm = "%!jmp\t%P0";
25609 /* SEH epilogue detection requires the indirect branch case
25610 to include REX.W. */
25611 else if (TARGET_SEH)
25612 xasm = "%!rex.W jmp %A0";
25613 else
25614 xasm = "%!jmp\t%A0";
25615
25616 output_asm_insn (xasm, &call_op);
25617 return "";
25618 }
25619
25620 /* SEH unwinding can require an extra nop to be emitted in several
25621 circumstances. Determine if we have one of those. */
25622 if (TARGET_SEH)
25623 {
25624 rtx_insn *i;
25625
25626 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25627 {
25628 /* If we get to another real insn, we don't need the nop. */
25629 if (INSN_P (i))
25630 break;
25631
25632 /* If we get to the epilogue note, prevent a catch region from
25633 being adjacent to the standard epilogue sequence. If non-
25634 call-exceptions, we'll have done this during epilogue emission. */
25635 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25636 && !flag_non_call_exceptions
25637 && !can_throw_internal (insn))
25638 {
25639 seh_nop_p = true;
25640 break;
25641 }
25642 }
25643
25644 /* If we didn't find a real insn following the call, prevent the
25645 unwinder from looking into the next function. */
25646 if (i == NULL)
25647 seh_nop_p = true;
25648 }
25649
25650 if (direct_p)
25651 xasm = "%!call\t%P0";
25652 else
25653 xasm = "%!call\t%A0";
25654
25655 output_asm_insn (xasm, &call_op);
25656
25657 if (seh_nop_p)
25658 return "nop";
25659
25660 return "";
25661 }
25662 \f
25663 /* Clear stack slot assignments remembered from previous functions.
25664 This is called from INIT_EXPANDERS once before RTL is emitted for each
25665 function. */
25666
25667 static struct machine_function *
25668 ix86_init_machine_status (void)
25669 {
25670 struct machine_function *f;
25671
25672 f = ggc_cleared_alloc<machine_function> ();
25673 f->use_fast_prologue_epilogue_nregs = -1;
25674 f->call_abi = ix86_abi;
25675
25676 return f;
25677 }
25678
25679 /* Return a MEM corresponding to a stack slot with mode MODE.
25680 Allocate a new slot if necessary.
25681
25682 The RTL for a function can have several slots available: N is
25683 which slot to use. */
25684
25685 rtx
25686 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25687 {
25688 struct stack_local_entry *s;
25689
25690 gcc_assert (n < MAX_386_STACK_LOCALS);
25691
25692 for (s = ix86_stack_locals; s; s = s->next)
25693 if (s->mode == mode && s->n == n)
25694 return validize_mem (copy_rtx (s->rtl));
25695
25696 s = ggc_alloc<stack_local_entry> ();
25697 s->n = n;
25698 s->mode = mode;
25699 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25700
25701 s->next = ix86_stack_locals;
25702 ix86_stack_locals = s;
25703 return validize_mem (copy_rtx (s->rtl));
25704 }
25705
25706 static void
25707 ix86_instantiate_decls (void)
25708 {
25709 struct stack_local_entry *s;
25710
25711 for (s = ix86_stack_locals; s; s = s->next)
25712 if (s->rtl != NULL_RTX)
25713 instantiate_decl_rtl (s->rtl);
25714 }
25715 \f
25716 /* Check whether x86 address PARTS is a pc-relative address. */
25717
25718 static bool
25719 rip_relative_addr_p (struct ix86_address *parts)
25720 {
25721 rtx base, index, disp;
25722
25723 base = parts->base;
25724 index = parts->index;
25725 disp = parts->disp;
25726
25727 if (disp && !base && !index)
25728 {
25729 if (TARGET_64BIT)
25730 {
25731 rtx symbol = disp;
25732
25733 if (GET_CODE (disp) == CONST)
25734 symbol = XEXP (disp, 0);
25735 if (GET_CODE (symbol) == PLUS
25736 && CONST_INT_P (XEXP (symbol, 1)))
25737 symbol = XEXP (symbol, 0);
25738
25739 if (GET_CODE (symbol) == LABEL_REF
25740 || (GET_CODE (symbol) == SYMBOL_REF
25741 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25742 || (GET_CODE (symbol) == UNSPEC
25743 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25744 || XINT (symbol, 1) == UNSPEC_PCREL
25745 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25746 return true;
25747 }
25748 }
25749 return false;
25750 }
25751
25752 /* Calculate the length of the memory address in the instruction encoding.
25753 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25754 or other prefixes. We never generate addr32 prefix for LEA insn. */
25755
25756 int
25757 memory_address_length (rtx addr, bool lea)
25758 {
25759 struct ix86_address parts;
25760 rtx base, index, disp;
25761 int len;
25762 int ok;
25763
25764 if (GET_CODE (addr) == PRE_DEC
25765 || GET_CODE (addr) == POST_INC
25766 || GET_CODE (addr) == PRE_MODIFY
25767 || GET_CODE (addr) == POST_MODIFY)
25768 return 0;
25769
25770 ok = ix86_decompose_address (addr, &parts);
25771 gcc_assert (ok);
25772
25773 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25774
25775 /* If this is not LEA instruction, add the length of addr32 prefix. */
25776 if (TARGET_64BIT && !lea
25777 && (SImode_address_operand (addr, VOIDmode)
25778 || (parts.base && GET_MODE (parts.base) == SImode)
25779 || (parts.index && GET_MODE (parts.index) == SImode)))
25780 len++;
25781
25782 base = parts.base;
25783 index = parts.index;
25784 disp = parts.disp;
25785
25786 if (base && GET_CODE (base) == SUBREG)
25787 base = SUBREG_REG (base);
25788 if (index && GET_CODE (index) == SUBREG)
25789 index = SUBREG_REG (index);
25790
25791 gcc_assert (base == NULL_RTX || REG_P (base));
25792 gcc_assert (index == NULL_RTX || REG_P (index));
25793
25794 /* Rule of thumb:
25795 - esp as the base always wants an index,
25796 - ebp as the base always wants a displacement,
25797 - r12 as the base always wants an index,
25798 - r13 as the base always wants a displacement. */
25799
25800 /* Register Indirect. */
25801 if (base && !index && !disp)
25802 {
25803 /* esp (for its index) and ebp (for its displacement) need
25804 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25805 code. */
25806 if (base == arg_pointer_rtx
25807 || base == frame_pointer_rtx
25808 || REGNO (base) == SP_REG
25809 || REGNO (base) == BP_REG
25810 || REGNO (base) == R12_REG
25811 || REGNO (base) == R13_REG)
25812 len++;
25813 }
25814
25815 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25816 is not disp32, but disp32(%rip), so for disp32
25817 SIB byte is needed, unless print_operand_address
25818 optimizes it into disp32(%rip) or (%rip) is implied
25819 by UNSPEC. */
25820 else if (disp && !base && !index)
25821 {
25822 len += 4;
25823 if (rip_relative_addr_p (&parts))
25824 len++;
25825 }
25826 else
25827 {
25828 /* Find the length of the displacement constant. */
25829 if (disp)
25830 {
25831 if (base && satisfies_constraint_K (disp))
25832 len += 1;
25833 else
25834 len += 4;
25835 }
25836 /* ebp always wants a displacement. Similarly r13. */
25837 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25838 len++;
25839
25840 /* An index requires the two-byte modrm form.... */
25841 if (index
25842 /* ...like esp (or r12), which always wants an index. */
25843 || base == arg_pointer_rtx
25844 || base == frame_pointer_rtx
25845 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25846 len++;
25847 }
25848
25849 return len;
25850 }
25851
25852 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25853 is set, expect that insn have 8bit immediate alternative. */
25854 int
25855 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25856 {
25857 int len = 0;
25858 int i;
25859 extract_insn_cached (insn);
25860 for (i = recog_data.n_operands - 1; i >= 0; --i)
25861 if (CONSTANT_P (recog_data.operand[i]))
25862 {
25863 enum attr_mode mode = get_attr_mode (insn);
25864
25865 gcc_assert (!len);
25866 if (shortform && CONST_INT_P (recog_data.operand[i]))
25867 {
25868 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25869 switch (mode)
25870 {
25871 case MODE_QI:
25872 len = 1;
25873 continue;
25874 case MODE_HI:
25875 ival = trunc_int_for_mode (ival, HImode);
25876 break;
25877 case MODE_SI:
25878 ival = trunc_int_for_mode (ival, SImode);
25879 break;
25880 default:
25881 break;
25882 }
25883 if (IN_RANGE (ival, -128, 127))
25884 {
25885 len = 1;
25886 continue;
25887 }
25888 }
25889 switch (mode)
25890 {
25891 case MODE_QI:
25892 len = 1;
25893 break;
25894 case MODE_HI:
25895 len = 2;
25896 break;
25897 case MODE_SI:
25898 len = 4;
25899 break;
25900 /* Immediates for DImode instructions are encoded
25901 as 32bit sign extended values. */
25902 case MODE_DI:
25903 len = 4;
25904 break;
25905 default:
25906 fatal_insn ("unknown insn mode", insn);
25907 }
25908 }
25909 return len;
25910 }
25911
25912 /* Compute default value for "length_address" attribute. */
25913 int
25914 ix86_attr_length_address_default (rtx_insn *insn)
25915 {
25916 int i;
25917
25918 if (get_attr_type (insn) == TYPE_LEA)
25919 {
25920 rtx set = PATTERN (insn), addr;
25921
25922 if (GET_CODE (set) == PARALLEL)
25923 set = XVECEXP (set, 0, 0);
25924
25925 gcc_assert (GET_CODE (set) == SET);
25926
25927 addr = SET_SRC (set);
25928
25929 return memory_address_length (addr, true);
25930 }
25931
25932 extract_insn_cached (insn);
25933 for (i = recog_data.n_operands - 1; i >= 0; --i)
25934 if (MEM_P (recog_data.operand[i]))
25935 {
25936 constrain_operands_cached (insn, reload_completed);
25937 if (which_alternative != -1)
25938 {
25939 const char *constraints = recog_data.constraints[i];
25940 int alt = which_alternative;
25941
25942 while (*constraints == '=' || *constraints == '+')
25943 constraints++;
25944 while (alt-- > 0)
25945 while (*constraints++ != ',')
25946 ;
25947 /* Skip ignored operands. */
25948 if (*constraints == 'X')
25949 continue;
25950 }
25951 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25952 }
25953 return 0;
25954 }
25955
25956 /* Compute default value for "length_vex" attribute. It includes
25957 2 or 3 byte VEX prefix and 1 opcode byte. */
25958
25959 int
25960 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25961 bool has_vex_w)
25962 {
25963 int i;
25964
25965 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25966 byte VEX prefix. */
25967 if (!has_0f_opcode || has_vex_w)
25968 return 3 + 1;
25969
25970 /* We can always use 2 byte VEX prefix in 32bit. */
25971 if (!TARGET_64BIT)
25972 return 2 + 1;
25973
25974 extract_insn_cached (insn);
25975
25976 for (i = recog_data.n_operands - 1; i >= 0; --i)
25977 if (REG_P (recog_data.operand[i]))
25978 {
25979 /* REX.W bit uses 3 byte VEX prefix. */
25980 if (GET_MODE (recog_data.operand[i]) == DImode
25981 && GENERAL_REG_P (recog_data.operand[i]))
25982 return 3 + 1;
25983 }
25984 else
25985 {
25986 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25987 if (MEM_P (recog_data.operand[i])
25988 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25989 return 3 + 1;
25990 }
25991
25992 return 2 + 1;
25993 }
25994 \f
25995 /* Return the maximum number of instructions a cpu can issue. */
25996
25997 static int
25998 ix86_issue_rate (void)
25999 {
26000 switch (ix86_tune)
26001 {
26002 case PROCESSOR_PENTIUM:
26003 case PROCESSOR_BONNELL:
26004 case PROCESSOR_SILVERMONT:
26005 case PROCESSOR_KNL:
26006 case PROCESSOR_INTEL:
26007 case PROCESSOR_K6:
26008 case PROCESSOR_BTVER2:
26009 case PROCESSOR_PENTIUM4:
26010 case PROCESSOR_NOCONA:
26011 return 2;
26012
26013 case PROCESSOR_PENTIUMPRO:
26014 case PROCESSOR_ATHLON:
26015 case PROCESSOR_K8:
26016 case PROCESSOR_AMDFAM10:
26017 case PROCESSOR_GENERIC:
26018 case PROCESSOR_BTVER1:
26019 return 3;
26020
26021 case PROCESSOR_BDVER1:
26022 case PROCESSOR_BDVER2:
26023 case PROCESSOR_BDVER3:
26024 case PROCESSOR_BDVER4:
26025 case PROCESSOR_CORE2:
26026 case PROCESSOR_NEHALEM:
26027 case PROCESSOR_SANDYBRIDGE:
26028 case PROCESSOR_HASWELL:
26029 return 4;
26030
26031 default:
26032 return 1;
26033 }
26034 }
26035
26036 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26037 by DEP_INSN and nothing set by DEP_INSN. */
26038
26039 static bool
26040 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26041 {
26042 rtx set, set2;
26043
26044 /* Simplify the test for uninteresting insns. */
26045 if (insn_type != TYPE_SETCC
26046 && insn_type != TYPE_ICMOV
26047 && insn_type != TYPE_FCMOV
26048 && insn_type != TYPE_IBR)
26049 return false;
26050
26051 if ((set = single_set (dep_insn)) != 0)
26052 {
26053 set = SET_DEST (set);
26054 set2 = NULL_RTX;
26055 }
26056 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26057 && XVECLEN (PATTERN (dep_insn), 0) == 2
26058 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26059 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26060 {
26061 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26062 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26063 }
26064 else
26065 return false;
26066
26067 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26068 return false;
26069
26070 /* This test is true if the dependent insn reads the flags but
26071 not any other potentially set register. */
26072 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26073 return false;
26074
26075 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26076 return false;
26077
26078 return true;
26079 }
26080
26081 /* Return true iff USE_INSN has a memory address with operands set by
26082 SET_INSN. */
26083
26084 bool
26085 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26086 {
26087 int i;
26088 extract_insn_cached (use_insn);
26089 for (i = recog_data.n_operands - 1; i >= 0; --i)
26090 if (MEM_P (recog_data.operand[i]))
26091 {
26092 rtx addr = XEXP (recog_data.operand[i], 0);
26093 return modified_in_p (addr, set_insn) != 0;
26094 }
26095 return false;
26096 }
26097
26098 /* Helper function for exact_store_load_dependency.
26099 Return true if addr is found in insn. */
26100 static bool
26101 exact_dependency_1 (rtx addr, rtx insn)
26102 {
26103 enum rtx_code code;
26104 const char *format_ptr;
26105 int i, j;
26106
26107 code = GET_CODE (insn);
26108 switch (code)
26109 {
26110 case MEM:
26111 if (rtx_equal_p (addr, insn))
26112 return true;
26113 break;
26114 case REG:
26115 CASE_CONST_ANY:
26116 case SYMBOL_REF:
26117 case CODE_LABEL:
26118 case PC:
26119 case CC0:
26120 case EXPR_LIST:
26121 return false;
26122 default:
26123 break;
26124 }
26125
26126 format_ptr = GET_RTX_FORMAT (code);
26127 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26128 {
26129 switch (*format_ptr++)
26130 {
26131 case 'e':
26132 if (exact_dependency_1 (addr, XEXP (insn, i)))
26133 return true;
26134 break;
26135 case 'E':
26136 for (j = 0; j < XVECLEN (insn, i); j++)
26137 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26138 return true;
26139 break;
26140 }
26141 }
26142 return false;
26143 }
26144
26145 /* Return true if there exists exact dependency for store & load, i.e.
26146 the same memory address is used in them. */
26147 static bool
26148 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26149 {
26150 rtx set1, set2;
26151
26152 set1 = single_set (store);
26153 if (!set1)
26154 return false;
26155 if (!MEM_P (SET_DEST (set1)))
26156 return false;
26157 set2 = single_set (load);
26158 if (!set2)
26159 return false;
26160 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26161 return true;
26162 return false;
26163 }
26164
26165 static int
26166 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26167 {
26168 enum attr_type insn_type, dep_insn_type;
26169 enum attr_memory memory;
26170 rtx set, set2;
26171 int dep_insn_code_number;
26172
26173 /* Anti and output dependencies have zero cost on all CPUs. */
26174 if (REG_NOTE_KIND (link) != 0)
26175 return 0;
26176
26177 dep_insn_code_number = recog_memoized (dep_insn);
26178
26179 /* If we can't recognize the insns, we can't really do anything. */
26180 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26181 return cost;
26182
26183 insn_type = get_attr_type (insn);
26184 dep_insn_type = get_attr_type (dep_insn);
26185
26186 switch (ix86_tune)
26187 {
26188 case PROCESSOR_PENTIUM:
26189 /* Address Generation Interlock adds a cycle of latency. */
26190 if (insn_type == TYPE_LEA)
26191 {
26192 rtx addr = PATTERN (insn);
26193
26194 if (GET_CODE (addr) == PARALLEL)
26195 addr = XVECEXP (addr, 0, 0);
26196
26197 gcc_assert (GET_CODE (addr) == SET);
26198
26199 addr = SET_SRC (addr);
26200 if (modified_in_p (addr, dep_insn))
26201 cost += 1;
26202 }
26203 else if (ix86_agi_dependent (dep_insn, insn))
26204 cost += 1;
26205
26206 /* ??? Compares pair with jump/setcc. */
26207 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26208 cost = 0;
26209
26210 /* Floating point stores require value to be ready one cycle earlier. */
26211 if (insn_type == TYPE_FMOV
26212 && get_attr_memory (insn) == MEMORY_STORE
26213 && !ix86_agi_dependent (dep_insn, insn))
26214 cost += 1;
26215 break;
26216
26217 case PROCESSOR_PENTIUMPRO:
26218 /* INT->FP conversion is expensive. */
26219 if (get_attr_fp_int_src (dep_insn))
26220 cost += 5;
26221
26222 /* There is one cycle extra latency between an FP op and a store. */
26223 if (insn_type == TYPE_FMOV
26224 && (set = single_set (dep_insn)) != NULL_RTX
26225 && (set2 = single_set (insn)) != NULL_RTX
26226 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26227 && MEM_P (SET_DEST (set2)))
26228 cost += 1;
26229
26230 memory = get_attr_memory (insn);
26231
26232 /* Show ability of reorder buffer to hide latency of load by executing
26233 in parallel with previous instruction in case
26234 previous instruction is not needed to compute the address. */
26235 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26236 && !ix86_agi_dependent (dep_insn, insn))
26237 {
26238 /* Claim moves to take one cycle, as core can issue one load
26239 at time and the next load can start cycle later. */
26240 if (dep_insn_type == TYPE_IMOV
26241 || dep_insn_type == TYPE_FMOV)
26242 cost = 1;
26243 else if (cost > 1)
26244 cost--;
26245 }
26246 break;
26247
26248 case PROCESSOR_K6:
26249 /* The esp dependency is resolved before
26250 the instruction is really finished. */
26251 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26252 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26253 return 1;
26254
26255 /* INT->FP conversion is expensive. */
26256 if (get_attr_fp_int_src (dep_insn))
26257 cost += 5;
26258
26259 memory = get_attr_memory (insn);
26260
26261 /* Show ability of reorder buffer to hide latency of load by executing
26262 in parallel with previous instruction in case
26263 previous instruction is not needed to compute the address. */
26264 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26265 && !ix86_agi_dependent (dep_insn, insn))
26266 {
26267 /* Claim moves to take one cycle, as core can issue one load
26268 at time and the next load can start cycle later. */
26269 if (dep_insn_type == TYPE_IMOV
26270 || dep_insn_type == TYPE_FMOV)
26271 cost = 1;
26272 else if (cost > 2)
26273 cost -= 2;
26274 else
26275 cost = 1;
26276 }
26277 break;
26278
26279 case PROCESSOR_AMDFAM10:
26280 case PROCESSOR_BDVER1:
26281 case PROCESSOR_BDVER2:
26282 case PROCESSOR_BDVER3:
26283 case PROCESSOR_BDVER4:
26284 case PROCESSOR_BTVER1:
26285 case PROCESSOR_BTVER2:
26286 case PROCESSOR_GENERIC:
26287 /* Stack engine allows to execute push&pop instructions in parall. */
26288 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26289 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26290 return 0;
26291 /* FALLTHRU */
26292
26293 case PROCESSOR_ATHLON:
26294 case PROCESSOR_K8:
26295 memory = get_attr_memory (insn);
26296
26297 /* Show ability of reorder buffer to hide latency of load by executing
26298 in parallel with previous instruction in case
26299 previous instruction is not needed to compute the address. */
26300 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26301 && !ix86_agi_dependent (dep_insn, insn))
26302 {
26303 enum attr_unit unit = get_attr_unit (insn);
26304 int loadcost = 3;
26305
26306 /* Because of the difference between the length of integer and
26307 floating unit pipeline preparation stages, the memory operands
26308 for floating point are cheaper.
26309
26310 ??? For Athlon it the difference is most probably 2. */
26311 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26312 loadcost = 3;
26313 else
26314 loadcost = TARGET_ATHLON ? 2 : 0;
26315
26316 if (cost >= loadcost)
26317 cost -= loadcost;
26318 else
26319 cost = 0;
26320 }
26321 break;
26322
26323 case PROCESSOR_CORE2:
26324 case PROCESSOR_NEHALEM:
26325 case PROCESSOR_SANDYBRIDGE:
26326 case PROCESSOR_HASWELL:
26327 /* Stack engine allows to execute push&pop instructions in parall. */
26328 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26329 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26330 return 0;
26331
26332 memory = get_attr_memory (insn);
26333
26334 /* Show ability of reorder buffer to hide latency of load by executing
26335 in parallel with previous instruction in case
26336 previous instruction is not needed to compute the address. */
26337 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26338 && !ix86_agi_dependent (dep_insn, insn))
26339 {
26340 if (cost >= 4)
26341 cost -= 4;
26342 else
26343 cost = 0;
26344 }
26345 break;
26346
26347 case PROCESSOR_SILVERMONT:
26348 case PROCESSOR_KNL:
26349 case PROCESSOR_INTEL:
26350 if (!reload_completed)
26351 return cost;
26352
26353 /* Increase cost of integer loads. */
26354 memory = get_attr_memory (dep_insn);
26355 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26356 {
26357 enum attr_unit unit = get_attr_unit (dep_insn);
26358 if (unit == UNIT_INTEGER && cost == 1)
26359 {
26360 if (memory == MEMORY_LOAD)
26361 cost = 3;
26362 else
26363 {
26364 /* Increase cost of ld/st for short int types only
26365 because of store forwarding issue. */
26366 rtx set = single_set (dep_insn);
26367 if (set && (GET_MODE (SET_DEST (set)) == QImode
26368 || GET_MODE (SET_DEST (set)) == HImode))
26369 {
26370 /* Increase cost of store/load insn if exact
26371 dependence exists and it is load insn. */
26372 enum attr_memory insn_memory = get_attr_memory (insn);
26373 if (insn_memory == MEMORY_LOAD
26374 && exact_store_load_dependency (dep_insn, insn))
26375 cost = 3;
26376 }
26377 }
26378 }
26379 }
26380
26381 default:
26382 break;
26383 }
26384
26385 return cost;
26386 }
26387
26388 /* How many alternative schedules to try. This should be as wide as the
26389 scheduling freedom in the DFA, but no wider. Making this value too
26390 large results extra work for the scheduler. */
26391
26392 static int
26393 ia32_multipass_dfa_lookahead (void)
26394 {
26395 switch (ix86_tune)
26396 {
26397 case PROCESSOR_PENTIUM:
26398 return 2;
26399
26400 case PROCESSOR_PENTIUMPRO:
26401 case PROCESSOR_K6:
26402 return 1;
26403
26404 case PROCESSOR_BDVER1:
26405 case PROCESSOR_BDVER2:
26406 case PROCESSOR_BDVER3:
26407 case PROCESSOR_BDVER4:
26408 /* We use lookahead value 4 for BD both before and after reload
26409 schedules. Plan is to have value 8 included for O3. */
26410 return 4;
26411
26412 case PROCESSOR_CORE2:
26413 case PROCESSOR_NEHALEM:
26414 case PROCESSOR_SANDYBRIDGE:
26415 case PROCESSOR_HASWELL:
26416 case PROCESSOR_BONNELL:
26417 case PROCESSOR_SILVERMONT:
26418 case PROCESSOR_KNL:
26419 case PROCESSOR_INTEL:
26420 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26421 as many instructions can be executed on a cycle, i.e.,
26422 issue_rate. I wonder why tuning for many CPUs does not do this. */
26423 if (reload_completed)
26424 return ix86_issue_rate ();
26425 /* Don't use lookahead for pre-reload schedule to save compile time. */
26426 return 0;
26427
26428 default:
26429 return 0;
26430 }
26431 }
26432
26433 /* Return true if target platform supports macro-fusion. */
26434
26435 static bool
26436 ix86_macro_fusion_p ()
26437 {
26438 return TARGET_FUSE_CMP_AND_BRANCH;
26439 }
26440
26441 /* Check whether current microarchitecture support macro fusion
26442 for insn pair "CONDGEN + CONDJMP". Refer to
26443 "Intel Architectures Optimization Reference Manual". */
26444
26445 static bool
26446 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26447 {
26448 rtx src, dest;
26449 enum rtx_code ccode;
26450 rtx compare_set = NULL_RTX, test_if, cond;
26451 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26452
26453 if (!any_condjump_p (condjmp))
26454 return false;
26455
26456 if (get_attr_type (condgen) != TYPE_TEST
26457 && get_attr_type (condgen) != TYPE_ICMP
26458 && get_attr_type (condgen) != TYPE_INCDEC
26459 && get_attr_type (condgen) != TYPE_ALU)
26460 return false;
26461
26462 compare_set = single_set (condgen);
26463 if (compare_set == NULL_RTX
26464 && !TARGET_FUSE_ALU_AND_BRANCH)
26465 return false;
26466
26467 if (compare_set == NULL_RTX)
26468 {
26469 int i;
26470 rtx pat = PATTERN (condgen);
26471 for (i = 0; i < XVECLEN (pat, 0); i++)
26472 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26473 {
26474 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26475 if (GET_CODE (set_src) == COMPARE)
26476 compare_set = XVECEXP (pat, 0, i);
26477 else
26478 alu_set = XVECEXP (pat, 0, i);
26479 }
26480 }
26481 if (compare_set == NULL_RTX)
26482 return false;
26483 src = SET_SRC (compare_set);
26484 if (GET_CODE (src) != COMPARE)
26485 return false;
26486
26487 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26488 supported. */
26489 if ((MEM_P (XEXP (src, 0))
26490 && CONST_INT_P (XEXP (src, 1)))
26491 || (MEM_P (XEXP (src, 1))
26492 && CONST_INT_P (XEXP (src, 0))))
26493 return false;
26494
26495 /* No fusion for RIP-relative address. */
26496 if (MEM_P (XEXP (src, 0)))
26497 addr = XEXP (XEXP (src, 0), 0);
26498 else if (MEM_P (XEXP (src, 1)))
26499 addr = XEXP (XEXP (src, 1), 0);
26500
26501 if (addr) {
26502 ix86_address parts;
26503 int ok = ix86_decompose_address (addr, &parts);
26504 gcc_assert (ok);
26505
26506 if (rip_relative_addr_p (&parts))
26507 return false;
26508 }
26509
26510 test_if = SET_SRC (pc_set (condjmp));
26511 cond = XEXP (test_if, 0);
26512 ccode = GET_CODE (cond);
26513 /* Check whether conditional jump use Sign or Overflow Flags. */
26514 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26515 && (ccode == GE
26516 || ccode == GT
26517 || ccode == LE
26518 || ccode == LT))
26519 return false;
26520
26521 /* Return true for TYPE_TEST and TYPE_ICMP. */
26522 if (get_attr_type (condgen) == TYPE_TEST
26523 || get_attr_type (condgen) == TYPE_ICMP)
26524 return true;
26525
26526 /* The following is the case that macro-fusion for alu + jmp. */
26527 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26528 return false;
26529
26530 /* No fusion for alu op with memory destination operand. */
26531 dest = SET_DEST (alu_set);
26532 if (MEM_P (dest))
26533 return false;
26534
26535 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26536 supported. */
26537 if (get_attr_type (condgen) == TYPE_INCDEC
26538 && (ccode == GEU
26539 || ccode == GTU
26540 || ccode == LEU
26541 || ccode == LTU))
26542 return false;
26543
26544 return true;
26545 }
26546
26547 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26548 execution. It is applied if
26549 (1) IMUL instruction is on the top of list;
26550 (2) There exists the only producer of independent IMUL instruction in
26551 ready list.
26552 Return index of IMUL producer if it was found and -1 otherwise. */
26553 static int
26554 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26555 {
26556 rtx_insn *insn;
26557 rtx set, insn1, insn2;
26558 sd_iterator_def sd_it;
26559 dep_t dep;
26560 int index = -1;
26561 int i;
26562
26563 if (!TARGET_BONNELL)
26564 return index;
26565
26566 /* Check that IMUL instruction is on the top of ready list. */
26567 insn = ready[n_ready - 1];
26568 set = single_set (insn);
26569 if (!set)
26570 return index;
26571 if (!(GET_CODE (SET_SRC (set)) == MULT
26572 && GET_MODE (SET_SRC (set)) == SImode))
26573 return index;
26574
26575 /* Search for producer of independent IMUL instruction. */
26576 for (i = n_ready - 2; i >= 0; i--)
26577 {
26578 insn = ready[i];
26579 if (!NONDEBUG_INSN_P (insn))
26580 continue;
26581 /* Skip IMUL instruction. */
26582 insn2 = PATTERN (insn);
26583 if (GET_CODE (insn2) == PARALLEL)
26584 insn2 = XVECEXP (insn2, 0, 0);
26585 if (GET_CODE (insn2) == SET
26586 && GET_CODE (SET_SRC (insn2)) == MULT
26587 && GET_MODE (SET_SRC (insn2)) == SImode)
26588 continue;
26589
26590 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26591 {
26592 rtx con;
26593 con = DEP_CON (dep);
26594 if (!NONDEBUG_INSN_P (con))
26595 continue;
26596 insn1 = PATTERN (con);
26597 if (GET_CODE (insn1) == PARALLEL)
26598 insn1 = XVECEXP (insn1, 0, 0);
26599
26600 if (GET_CODE (insn1) == SET
26601 && GET_CODE (SET_SRC (insn1)) == MULT
26602 && GET_MODE (SET_SRC (insn1)) == SImode)
26603 {
26604 sd_iterator_def sd_it1;
26605 dep_t dep1;
26606 /* Check if there is no other dependee for IMUL. */
26607 index = i;
26608 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26609 {
26610 rtx pro;
26611 pro = DEP_PRO (dep1);
26612 if (!NONDEBUG_INSN_P (pro))
26613 continue;
26614 if (pro != insn)
26615 index = -1;
26616 }
26617 if (index >= 0)
26618 break;
26619 }
26620 }
26621 if (index >= 0)
26622 break;
26623 }
26624 return index;
26625 }
26626
26627 /* Try to find the best candidate on the top of ready list if two insns
26628 have the same priority - candidate is best if its dependees were
26629 scheduled earlier. Applied for Silvermont only.
26630 Return true if top 2 insns must be interchanged. */
26631 static bool
26632 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26633 {
26634 rtx_insn *top = ready[n_ready - 1];
26635 rtx_insn *next = ready[n_ready - 2];
26636 rtx set;
26637 sd_iterator_def sd_it;
26638 dep_t dep;
26639 int clock1 = -1;
26640 int clock2 = -1;
26641 #define INSN_TICK(INSN) (HID (INSN)->tick)
26642
26643 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26644 return false;
26645
26646 if (!NONDEBUG_INSN_P (top))
26647 return false;
26648 if (!NONJUMP_INSN_P (top))
26649 return false;
26650 if (!NONDEBUG_INSN_P (next))
26651 return false;
26652 if (!NONJUMP_INSN_P (next))
26653 return false;
26654 set = single_set (top);
26655 if (!set)
26656 return false;
26657 set = single_set (next);
26658 if (!set)
26659 return false;
26660
26661 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26662 {
26663 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26664 return false;
26665 /* Determine winner more precise. */
26666 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26667 {
26668 rtx pro;
26669 pro = DEP_PRO (dep);
26670 if (!NONDEBUG_INSN_P (pro))
26671 continue;
26672 if (INSN_TICK (pro) > clock1)
26673 clock1 = INSN_TICK (pro);
26674 }
26675 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26676 {
26677 rtx pro;
26678 pro = DEP_PRO (dep);
26679 if (!NONDEBUG_INSN_P (pro))
26680 continue;
26681 if (INSN_TICK (pro) > clock2)
26682 clock2 = INSN_TICK (pro);
26683 }
26684
26685 if (clock1 == clock2)
26686 {
26687 /* Determine winner - load must win. */
26688 enum attr_memory memory1, memory2;
26689 memory1 = get_attr_memory (top);
26690 memory2 = get_attr_memory (next);
26691 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26692 return true;
26693 }
26694 return (bool) (clock2 < clock1);
26695 }
26696 return false;
26697 #undef INSN_TICK
26698 }
26699
26700 /* Perform possible reodering of ready list for Atom/Silvermont only.
26701 Return issue rate. */
26702 static int
26703 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26704 int *pn_ready, int clock_var)
26705 {
26706 int issue_rate = -1;
26707 int n_ready = *pn_ready;
26708 int i;
26709 rtx_insn *insn;
26710 int index = -1;
26711
26712 /* Set up issue rate. */
26713 issue_rate = ix86_issue_rate ();
26714
26715 /* Do reodering for BONNELL/SILVERMONT only. */
26716 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26717 return issue_rate;
26718
26719 /* Nothing to do if ready list contains only 1 instruction. */
26720 if (n_ready <= 1)
26721 return issue_rate;
26722
26723 /* Do reodering for post-reload scheduler only. */
26724 if (!reload_completed)
26725 return issue_rate;
26726
26727 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26728 {
26729 if (sched_verbose > 1)
26730 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26731 INSN_UID (ready[index]));
26732
26733 /* Put IMUL producer (ready[index]) at the top of ready list. */
26734 insn = ready[index];
26735 for (i = index; i < n_ready - 1; i++)
26736 ready[i] = ready[i + 1];
26737 ready[n_ready - 1] = insn;
26738 return issue_rate;
26739 }
26740 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26741 {
26742 if (sched_verbose > 1)
26743 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26744 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26745 /* Swap 2 top elements of ready list. */
26746 insn = ready[n_ready - 1];
26747 ready[n_ready - 1] = ready[n_ready - 2];
26748 ready[n_ready - 2] = insn;
26749 }
26750 return issue_rate;
26751 }
26752
26753 static bool
26754 ix86_class_likely_spilled_p (reg_class_t);
26755
26756 /* Returns true if lhs of insn is HW function argument register and set up
26757 is_spilled to true if it is likely spilled HW register. */
26758 static bool
26759 insn_is_function_arg (rtx insn, bool* is_spilled)
26760 {
26761 rtx dst;
26762
26763 if (!NONDEBUG_INSN_P (insn))
26764 return false;
26765 /* Call instructions are not movable, ignore it. */
26766 if (CALL_P (insn))
26767 return false;
26768 insn = PATTERN (insn);
26769 if (GET_CODE (insn) == PARALLEL)
26770 insn = XVECEXP (insn, 0, 0);
26771 if (GET_CODE (insn) != SET)
26772 return false;
26773 dst = SET_DEST (insn);
26774 if (REG_P (dst) && HARD_REGISTER_P (dst)
26775 && ix86_function_arg_regno_p (REGNO (dst)))
26776 {
26777 /* Is it likely spilled HW register? */
26778 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26779 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26780 *is_spilled = true;
26781 return true;
26782 }
26783 return false;
26784 }
26785
26786 /* Add output dependencies for chain of function adjacent arguments if only
26787 there is a move to likely spilled HW register. Return first argument
26788 if at least one dependence was added or NULL otherwise. */
26789 static rtx_insn *
26790 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26791 {
26792 rtx_insn *insn;
26793 rtx_insn *last = call;
26794 rtx_insn *first_arg = NULL;
26795 bool is_spilled = false;
26796
26797 head = PREV_INSN (head);
26798
26799 /* Find nearest to call argument passing instruction. */
26800 while (true)
26801 {
26802 last = PREV_INSN (last);
26803 if (last == head)
26804 return NULL;
26805 if (!NONDEBUG_INSN_P (last))
26806 continue;
26807 if (insn_is_function_arg (last, &is_spilled))
26808 break;
26809 return NULL;
26810 }
26811
26812 first_arg = last;
26813 while (true)
26814 {
26815 insn = PREV_INSN (last);
26816 if (!INSN_P (insn))
26817 break;
26818 if (insn == head)
26819 break;
26820 if (!NONDEBUG_INSN_P (insn))
26821 {
26822 last = insn;
26823 continue;
26824 }
26825 if (insn_is_function_arg (insn, &is_spilled))
26826 {
26827 /* Add output depdendence between two function arguments if chain
26828 of output arguments contains likely spilled HW registers. */
26829 if (is_spilled)
26830 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26831 first_arg = last = insn;
26832 }
26833 else
26834 break;
26835 }
26836 if (!is_spilled)
26837 return NULL;
26838 return first_arg;
26839 }
26840
26841 /* Add output or anti dependency from insn to first_arg to restrict its code
26842 motion. */
26843 static void
26844 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26845 {
26846 rtx set;
26847 rtx tmp;
26848
26849 set = single_set (insn);
26850 if (!set)
26851 return;
26852 tmp = SET_DEST (set);
26853 if (REG_P (tmp))
26854 {
26855 /* Add output dependency to the first function argument. */
26856 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26857 return;
26858 }
26859 /* Add anti dependency. */
26860 add_dependence (first_arg, insn, REG_DEP_ANTI);
26861 }
26862
26863 /* Avoid cross block motion of function argument through adding dependency
26864 from the first non-jump instruction in bb. */
26865 static void
26866 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26867 {
26868 rtx_insn *insn = BB_END (bb);
26869
26870 while (insn)
26871 {
26872 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26873 {
26874 rtx set = single_set (insn);
26875 if (set)
26876 {
26877 avoid_func_arg_motion (arg, insn);
26878 return;
26879 }
26880 }
26881 if (insn == BB_HEAD (bb))
26882 return;
26883 insn = PREV_INSN (insn);
26884 }
26885 }
26886
26887 /* Hook for pre-reload schedule - avoid motion of function arguments
26888 passed in likely spilled HW registers. */
26889 static void
26890 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26891 {
26892 rtx_insn *insn;
26893 rtx_insn *first_arg = NULL;
26894 if (reload_completed)
26895 return;
26896 while (head != tail && DEBUG_INSN_P (head))
26897 head = NEXT_INSN (head);
26898 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26899 if (INSN_P (insn) && CALL_P (insn))
26900 {
26901 first_arg = add_parameter_dependencies (insn, head);
26902 if (first_arg)
26903 {
26904 /* Add dependee for first argument to predecessors if only
26905 region contains more than one block. */
26906 basic_block bb = BLOCK_FOR_INSN (insn);
26907 int rgn = CONTAINING_RGN (bb->index);
26908 int nr_blks = RGN_NR_BLOCKS (rgn);
26909 /* Skip trivial regions and region head blocks that can have
26910 predecessors outside of region. */
26911 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26912 {
26913 edge e;
26914 edge_iterator ei;
26915
26916 /* Regions are SCCs with the exception of selective
26917 scheduling with pipelining of outer blocks enabled.
26918 So also check that immediate predecessors of a non-head
26919 block are in the same region. */
26920 FOR_EACH_EDGE (e, ei, bb->preds)
26921 {
26922 /* Avoid creating of loop-carried dependencies through
26923 using topological ordering in the region. */
26924 if (rgn == CONTAINING_RGN (e->src->index)
26925 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26926 add_dependee_for_func_arg (first_arg, e->src);
26927 }
26928 }
26929 insn = first_arg;
26930 if (insn == head)
26931 break;
26932 }
26933 }
26934 else if (first_arg)
26935 avoid_func_arg_motion (first_arg, insn);
26936 }
26937
26938 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26939 HW registers to maximum, to schedule them at soon as possible. These are
26940 moves from function argument registers at the top of the function entry
26941 and moves from function return value registers after call. */
26942 static int
26943 ix86_adjust_priority (rtx_insn *insn, int priority)
26944 {
26945 rtx set;
26946
26947 if (reload_completed)
26948 return priority;
26949
26950 if (!NONDEBUG_INSN_P (insn))
26951 return priority;
26952
26953 set = single_set (insn);
26954 if (set)
26955 {
26956 rtx tmp = SET_SRC (set);
26957 if (REG_P (tmp)
26958 && HARD_REGISTER_P (tmp)
26959 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26960 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26961 return current_sched_info->sched_max_insns_priority;
26962 }
26963
26964 return priority;
26965 }
26966
26967 /* Model decoder of Core 2/i7.
26968 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26969 track the instruction fetch block boundaries and make sure that long
26970 (9+ bytes) instructions are assigned to D0. */
26971
26972 /* Maximum length of an insn that can be handled by
26973 a secondary decoder unit. '8' for Core 2/i7. */
26974 static int core2i7_secondary_decoder_max_insn_size;
26975
26976 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26977 '16' for Core 2/i7. */
26978 static int core2i7_ifetch_block_size;
26979
26980 /* Maximum number of instructions decoder can handle per cycle.
26981 '6' for Core 2/i7. */
26982 static int core2i7_ifetch_block_max_insns;
26983
26984 typedef struct ix86_first_cycle_multipass_data_ *
26985 ix86_first_cycle_multipass_data_t;
26986 typedef const struct ix86_first_cycle_multipass_data_ *
26987 const_ix86_first_cycle_multipass_data_t;
26988
26989 /* A variable to store target state across calls to max_issue within
26990 one cycle. */
26991 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26992 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26993
26994 /* Initialize DATA. */
26995 static void
26996 core2i7_first_cycle_multipass_init (void *_data)
26997 {
26998 ix86_first_cycle_multipass_data_t data
26999 = (ix86_first_cycle_multipass_data_t) _data;
27000
27001 data->ifetch_block_len = 0;
27002 data->ifetch_block_n_insns = 0;
27003 data->ready_try_change = NULL;
27004 data->ready_try_change_size = 0;
27005 }
27006
27007 /* Advancing the cycle; reset ifetch block counts. */
27008 static void
27009 core2i7_dfa_post_advance_cycle (void)
27010 {
27011 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27012
27013 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27014
27015 data->ifetch_block_len = 0;
27016 data->ifetch_block_n_insns = 0;
27017 }
27018
27019 static int min_insn_size (rtx_insn *);
27020
27021 /* Filter out insns from ready_try that the core will not be able to issue
27022 on current cycle due to decoder. */
27023 static void
27024 core2i7_first_cycle_multipass_filter_ready_try
27025 (const_ix86_first_cycle_multipass_data_t data,
27026 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27027 {
27028 while (n_ready--)
27029 {
27030 rtx_insn *insn;
27031 int insn_size;
27032
27033 if (ready_try[n_ready])
27034 continue;
27035
27036 insn = get_ready_element (n_ready);
27037 insn_size = min_insn_size (insn);
27038
27039 if (/* If this is a too long an insn for a secondary decoder ... */
27040 (!first_cycle_insn_p
27041 && insn_size > core2i7_secondary_decoder_max_insn_size)
27042 /* ... or it would not fit into the ifetch block ... */
27043 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27044 /* ... or the decoder is full already ... */
27045 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27046 /* ... mask the insn out. */
27047 {
27048 ready_try[n_ready] = 1;
27049
27050 if (data->ready_try_change)
27051 bitmap_set_bit (data->ready_try_change, n_ready);
27052 }
27053 }
27054 }
27055
27056 /* Prepare for a new round of multipass lookahead scheduling. */
27057 static void
27058 core2i7_first_cycle_multipass_begin (void *_data,
27059 signed char *ready_try, int n_ready,
27060 bool first_cycle_insn_p)
27061 {
27062 ix86_first_cycle_multipass_data_t data
27063 = (ix86_first_cycle_multipass_data_t) _data;
27064 const_ix86_first_cycle_multipass_data_t prev_data
27065 = ix86_first_cycle_multipass_data;
27066
27067 /* Restore the state from the end of the previous round. */
27068 data->ifetch_block_len = prev_data->ifetch_block_len;
27069 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27070
27071 /* Filter instructions that cannot be issued on current cycle due to
27072 decoder restrictions. */
27073 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27074 first_cycle_insn_p);
27075 }
27076
27077 /* INSN is being issued in current solution. Account for its impact on
27078 the decoder model. */
27079 static void
27080 core2i7_first_cycle_multipass_issue (void *_data,
27081 signed char *ready_try, int n_ready,
27082 rtx_insn *insn, const void *_prev_data)
27083 {
27084 ix86_first_cycle_multipass_data_t data
27085 = (ix86_first_cycle_multipass_data_t) _data;
27086 const_ix86_first_cycle_multipass_data_t prev_data
27087 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27088
27089 int insn_size = min_insn_size (insn);
27090
27091 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27092 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27093 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27094 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27095
27096 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27097 if (!data->ready_try_change)
27098 {
27099 data->ready_try_change = sbitmap_alloc (n_ready);
27100 data->ready_try_change_size = n_ready;
27101 }
27102 else if (data->ready_try_change_size < n_ready)
27103 {
27104 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27105 n_ready, 0);
27106 data->ready_try_change_size = n_ready;
27107 }
27108 bitmap_clear (data->ready_try_change);
27109
27110 /* Filter out insns from ready_try that the core will not be able to issue
27111 on current cycle due to decoder. */
27112 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27113 false);
27114 }
27115
27116 /* Revert the effect on ready_try. */
27117 static void
27118 core2i7_first_cycle_multipass_backtrack (const void *_data,
27119 signed char *ready_try,
27120 int n_ready ATTRIBUTE_UNUSED)
27121 {
27122 const_ix86_first_cycle_multipass_data_t data
27123 = (const_ix86_first_cycle_multipass_data_t) _data;
27124 unsigned int i = 0;
27125 sbitmap_iterator sbi;
27126
27127 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27128 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27129 {
27130 ready_try[i] = 0;
27131 }
27132 }
27133
27134 /* Save the result of multipass lookahead scheduling for the next round. */
27135 static void
27136 core2i7_first_cycle_multipass_end (const void *_data)
27137 {
27138 const_ix86_first_cycle_multipass_data_t data
27139 = (const_ix86_first_cycle_multipass_data_t) _data;
27140 ix86_first_cycle_multipass_data_t next_data
27141 = ix86_first_cycle_multipass_data;
27142
27143 if (data != NULL)
27144 {
27145 next_data->ifetch_block_len = data->ifetch_block_len;
27146 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27147 }
27148 }
27149
27150 /* Deallocate target data. */
27151 static void
27152 core2i7_first_cycle_multipass_fini (void *_data)
27153 {
27154 ix86_first_cycle_multipass_data_t data
27155 = (ix86_first_cycle_multipass_data_t) _data;
27156
27157 if (data->ready_try_change)
27158 {
27159 sbitmap_free (data->ready_try_change);
27160 data->ready_try_change = NULL;
27161 data->ready_try_change_size = 0;
27162 }
27163 }
27164
27165 /* Prepare for scheduling pass. */
27166 static void
27167 ix86_sched_init_global (FILE *, int, int)
27168 {
27169 /* Install scheduling hooks for current CPU. Some of these hooks are used
27170 in time-critical parts of the scheduler, so we only set them up when
27171 they are actually used. */
27172 switch (ix86_tune)
27173 {
27174 case PROCESSOR_CORE2:
27175 case PROCESSOR_NEHALEM:
27176 case PROCESSOR_SANDYBRIDGE:
27177 case PROCESSOR_HASWELL:
27178 /* Do not perform multipass scheduling for pre-reload schedule
27179 to save compile time. */
27180 if (reload_completed)
27181 {
27182 targetm.sched.dfa_post_advance_cycle
27183 = core2i7_dfa_post_advance_cycle;
27184 targetm.sched.first_cycle_multipass_init
27185 = core2i7_first_cycle_multipass_init;
27186 targetm.sched.first_cycle_multipass_begin
27187 = core2i7_first_cycle_multipass_begin;
27188 targetm.sched.first_cycle_multipass_issue
27189 = core2i7_first_cycle_multipass_issue;
27190 targetm.sched.first_cycle_multipass_backtrack
27191 = core2i7_first_cycle_multipass_backtrack;
27192 targetm.sched.first_cycle_multipass_end
27193 = core2i7_first_cycle_multipass_end;
27194 targetm.sched.first_cycle_multipass_fini
27195 = core2i7_first_cycle_multipass_fini;
27196
27197 /* Set decoder parameters. */
27198 core2i7_secondary_decoder_max_insn_size = 8;
27199 core2i7_ifetch_block_size = 16;
27200 core2i7_ifetch_block_max_insns = 6;
27201 break;
27202 }
27203 /* ... Fall through ... */
27204 default:
27205 targetm.sched.dfa_post_advance_cycle = NULL;
27206 targetm.sched.first_cycle_multipass_init = NULL;
27207 targetm.sched.first_cycle_multipass_begin = NULL;
27208 targetm.sched.first_cycle_multipass_issue = NULL;
27209 targetm.sched.first_cycle_multipass_backtrack = NULL;
27210 targetm.sched.first_cycle_multipass_end = NULL;
27211 targetm.sched.first_cycle_multipass_fini = NULL;
27212 break;
27213 }
27214 }
27215
27216 \f
27217 /* Compute the alignment given to a constant that is being placed in memory.
27218 EXP is the constant and ALIGN is the alignment that the object would
27219 ordinarily have.
27220 The value of this function is used instead of that alignment to align
27221 the object. */
27222
27223 int
27224 ix86_constant_alignment (tree exp, int align)
27225 {
27226 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27227 || TREE_CODE (exp) == INTEGER_CST)
27228 {
27229 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27230 return 64;
27231 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27232 return 128;
27233 }
27234 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27235 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27236 return BITS_PER_WORD;
27237
27238 return align;
27239 }
27240
27241 /* Compute the alignment for a static variable.
27242 TYPE is the data type, and ALIGN is the alignment that
27243 the object would ordinarily have. The value of this function is used
27244 instead of that alignment to align the object. */
27245
27246 int
27247 ix86_data_alignment (tree type, int align, bool opt)
27248 {
27249 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27250 for symbols from other compilation units or symbols that don't need
27251 to bind locally. In order to preserve some ABI compatibility with
27252 those compilers, ensure we don't decrease alignment from what we
27253 used to assume. */
27254
27255 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27256
27257 /* A data structure, equal or greater than the size of a cache line
27258 (64 bytes in the Pentium 4 and other recent Intel processors, including
27259 processors based on Intel Core microarchitecture) should be aligned
27260 so that its base address is a multiple of a cache line size. */
27261
27262 int max_align
27263 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27264
27265 if (max_align < BITS_PER_WORD)
27266 max_align = BITS_PER_WORD;
27267
27268 switch (ix86_align_data_type)
27269 {
27270 case ix86_align_data_type_abi: opt = false; break;
27271 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27272 case ix86_align_data_type_cacheline: break;
27273 }
27274
27275 if (opt
27276 && AGGREGATE_TYPE_P (type)
27277 && TYPE_SIZE (type)
27278 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27279 {
27280 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27281 && align < max_align_compat)
27282 align = max_align_compat;
27283 if (wi::geu_p (TYPE_SIZE (type), max_align)
27284 && align < max_align)
27285 align = max_align;
27286 }
27287
27288 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27289 to 16byte boundary. */
27290 if (TARGET_64BIT)
27291 {
27292 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27293 && TYPE_SIZE (type)
27294 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27295 && wi::geu_p (TYPE_SIZE (type), 128)
27296 && align < 128)
27297 return 128;
27298 }
27299
27300 if (!opt)
27301 return align;
27302
27303 if (TREE_CODE (type) == ARRAY_TYPE)
27304 {
27305 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27306 return 64;
27307 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27308 return 128;
27309 }
27310 else if (TREE_CODE (type) == COMPLEX_TYPE)
27311 {
27312
27313 if (TYPE_MODE (type) == DCmode && align < 64)
27314 return 64;
27315 if ((TYPE_MODE (type) == XCmode
27316 || TYPE_MODE (type) == TCmode) && align < 128)
27317 return 128;
27318 }
27319 else if ((TREE_CODE (type) == RECORD_TYPE
27320 || TREE_CODE (type) == UNION_TYPE
27321 || TREE_CODE (type) == QUAL_UNION_TYPE)
27322 && TYPE_FIELDS (type))
27323 {
27324 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27325 return 64;
27326 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27327 return 128;
27328 }
27329 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27330 || TREE_CODE (type) == INTEGER_TYPE)
27331 {
27332 if (TYPE_MODE (type) == DFmode && align < 64)
27333 return 64;
27334 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27335 return 128;
27336 }
27337
27338 return align;
27339 }
27340
27341 /* Compute the alignment for a local variable or a stack slot. EXP is
27342 the data type or decl itself, MODE is the widest mode available and
27343 ALIGN is the alignment that the object would ordinarily have. The
27344 value of this macro is used instead of that alignment to align the
27345 object. */
27346
27347 unsigned int
27348 ix86_local_alignment (tree exp, machine_mode mode,
27349 unsigned int align)
27350 {
27351 tree type, decl;
27352
27353 if (exp && DECL_P (exp))
27354 {
27355 type = TREE_TYPE (exp);
27356 decl = exp;
27357 }
27358 else
27359 {
27360 type = exp;
27361 decl = NULL;
27362 }
27363
27364 /* Don't do dynamic stack realignment for long long objects with
27365 -mpreferred-stack-boundary=2. */
27366 if (!TARGET_64BIT
27367 && align == 64
27368 && ix86_preferred_stack_boundary < 64
27369 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27370 && (!type || !TYPE_USER_ALIGN (type))
27371 && (!decl || !DECL_USER_ALIGN (decl)))
27372 align = 32;
27373
27374 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27375 register in MODE. We will return the largest alignment of XF
27376 and DF. */
27377 if (!type)
27378 {
27379 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27380 align = GET_MODE_ALIGNMENT (DFmode);
27381 return align;
27382 }
27383
27384 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27385 to 16byte boundary. Exact wording is:
27386
27387 An array uses the same alignment as its elements, except that a local or
27388 global array variable of length at least 16 bytes or
27389 a C99 variable-length array variable always has alignment of at least 16 bytes.
27390
27391 This was added to allow use of aligned SSE instructions at arrays. This
27392 rule is meant for static storage (where compiler can not do the analysis
27393 by itself). We follow it for automatic variables only when convenient.
27394 We fully control everything in the function compiled and functions from
27395 other unit can not rely on the alignment.
27396
27397 Exclude va_list type. It is the common case of local array where
27398 we can not benefit from the alignment.
27399
27400 TODO: Probably one should optimize for size only when var is not escaping. */
27401 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27402 && TARGET_SSE)
27403 {
27404 if (AGGREGATE_TYPE_P (type)
27405 && (va_list_type_node == NULL_TREE
27406 || (TYPE_MAIN_VARIANT (type)
27407 != TYPE_MAIN_VARIANT (va_list_type_node)))
27408 && TYPE_SIZE (type)
27409 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27410 && wi::geu_p (TYPE_SIZE (type), 16)
27411 && align < 128)
27412 return 128;
27413 }
27414 if (TREE_CODE (type) == ARRAY_TYPE)
27415 {
27416 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27417 return 64;
27418 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27419 return 128;
27420 }
27421 else if (TREE_CODE (type) == COMPLEX_TYPE)
27422 {
27423 if (TYPE_MODE (type) == DCmode && align < 64)
27424 return 64;
27425 if ((TYPE_MODE (type) == XCmode
27426 || TYPE_MODE (type) == TCmode) && align < 128)
27427 return 128;
27428 }
27429 else if ((TREE_CODE (type) == RECORD_TYPE
27430 || TREE_CODE (type) == UNION_TYPE
27431 || TREE_CODE (type) == QUAL_UNION_TYPE)
27432 && TYPE_FIELDS (type))
27433 {
27434 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27435 return 64;
27436 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27437 return 128;
27438 }
27439 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27440 || TREE_CODE (type) == INTEGER_TYPE)
27441 {
27442
27443 if (TYPE_MODE (type) == DFmode && align < 64)
27444 return 64;
27445 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27446 return 128;
27447 }
27448 return align;
27449 }
27450
27451 /* Compute the minimum required alignment for dynamic stack realignment
27452 purposes for a local variable, parameter or a stack slot. EXP is
27453 the data type or decl itself, MODE is its mode and ALIGN is the
27454 alignment that the object would ordinarily have. */
27455
27456 unsigned int
27457 ix86_minimum_alignment (tree exp, machine_mode mode,
27458 unsigned int align)
27459 {
27460 tree type, decl;
27461
27462 if (exp && DECL_P (exp))
27463 {
27464 type = TREE_TYPE (exp);
27465 decl = exp;
27466 }
27467 else
27468 {
27469 type = exp;
27470 decl = NULL;
27471 }
27472
27473 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27474 return align;
27475
27476 /* Don't do dynamic stack realignment for long long objects with
27477 -mpreferred-stack-boundary=2. */
27478 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27479 && (!type || !TYPE_USER_ALIGN (type))
27480 && (!decl || !DECL_USER_ALIGN (decl)))
27481 return 32;
27482
27483 return align;
27484 }
27485 \f
27486 /* Find a location for the static chain incoming to a nested function.
27487 This is a register, unless all free registers are used by arguments. */
27488
27489 static rtx
27490 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27491 {
27492 unsigned regno;
27493
27494 /* While this function won't be called by the middle-end when a static
27495 chain isn't needed, it's also used throughout the backend so it's
27496 easiest to keep this check centralized. */
27497 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27498 return NULL;
27499
27500 if (TARGET_64BIT)
27501 {
27502 /* We always use R10 in 64-bit mode. */
27503 regno = R10_REG;
27504 }
27505 else
27506 {
27507 const_tree fntype, fndecl;
27508 unsigned int ccvt;
27509
27510 /* By default in 32-bit mode we use ECX to pass the static chain. */
27511 regno = CX_REG;
27512
27513 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27514 {
27515 fntype = TREE_TYPE (fndecl_or_type);
27516 fndecl = fndecl_or_type;
27517 }
27518 else
27519 {
27520 fntype = fndecl_or_type;
27521 fndecl = NULL;
27522 }
27523
27524 ccvt = ix86_get_callcvt (fntype);
27525 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27526 {
27527 /* Fastcall functions use ecx/edx for arguments, which leaves
27528 us with EAX for the static chain.
27529 Thiscall functions use ecx for arguments, which also
27530 leaves us with EAX for the static chain. */
27531 regno = AX_REG;
27532 }
27533 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27534 {
27535 /* Thiscall functions use ecx for arguments, which leaves
27536 us with EAX and EDX for the static chain.
27537 We are using for abi-compatibility EAX. */
27538 regno = AX_REG;
27539 }
27540 else if (ix86_function_regparm (fntype, fndecl) == 3)
27541 {
27542 /* For regparm 3, we have no free call-clobbered registers in
27543 which to store the static chain. In order to implement this,
27544 we have the trampoline push the static chain to the stack.
27545 However, we can't push a value below the return address when
27546 we call the nested function directly, so we have to use an
27547 alternate entry point. For this we use ESI, and have the
27548 alternate entry point push ESI, so that things appear the
27549 same once we're executing the nested function. */
27550 if (incoming_p)
27551 {
27552 if (fndecl == current_function_decl)
27553 ix86_static_chain_on_stack = true;
27554 return gen_frame_mem (SImode,
27555 plus_constant (Pmode,
27556 arg_pointer_rtx, -8));
27557 }
27558 regno = SI_REG;
27559 }
27560 }
27561
27562 return gen_rtx_REG (Pmode, regno);
27563 }
27564
27565 /* Emit RTL insns to initialize the variable parts of a trampoline.
27566 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27567 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27568 to be passed to the target function. */
27569
27570 static void
27571 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27572 {
27573 rtx mem, fnaddr;
27574 int opcode;
27575 int offset = 0;
27576
27577 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27578
27579 if (TARGET_64BIT)
27580 {
27581 int size;
27582
27583 /* Load the function address to r11. Try to load address using
27584 the shorter movl instead of movabs. We may want to support
27585 movq for kernel mode, but kernel does not use trampolines at
27586 the moment. FNADDR is a 32bit address and may not be in
27587 DImode when ptr_mode == SImode. Always use movl in this
27588 case. */
27589 if (ptr_mode == SImode
27590 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27591 {
27592 fnaddr = copy_addr_to_reg (fnaddr);
27593
27594 mem = adjust_address (m_tramp, HImode, offset);
27595 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27596
27597 mem = adjust_address (m_tramp, SImode, offset + 2);
27598 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27599 offset += 6;
27600 }
27601 else
27602 {
27603 mem = adjust_address (m_tramp, HImode, offset);
27604 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27605
27606 mem = adjust_address (m_tramp, DImode, offset + 2);
27607 emit_move_insn (mem, fnaddr);
27608 offset += 10;
27609 }
27610
27611 /* Load static chain using movabs to r10. Use the shorter movl
27612 instead of movabs when ptr_mode == SImode. */
27613 if (ptr_mode == SImode)
27614 {
27615 opcode = 0xba41;
27616 size = 6;
27617 }
27618 else
27619 {
27620 opcode = 0xba49;
27621 size = 10;
27622 }
27623
27624 mem = adjust_address (m_tramp, HImode, offset);
27625 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27626
27627 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27628 emit_move_insn (mem, chain_value);
27629 offset += size;
27630
27631 /* Jump to r11; the last (unused) byte is a nop, only there to
27632 pad the write out to a single 32-bit store. */
27633 mem = adjust_address (m_tramp, SImode, offset);
27634 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27635 offset += 4;
27636 }
27637 else
27638 {
27639 rtx disp, chain;
27640
27641 /* Depending on the static chain location, either load a register
27642 with a constant, or push the constant to the stack. All of the
27643 instructions are the same size. */
27644 chain = ix86_static_chain (fndecl, true);
27645 if (REG_P (chain))
27646 {
27647 switch (REGNO (chain))
27648 {
27649 case AX_REG:
27650 opcode = 0xb8; break;
27651 case CX_REG:
27652 opcode = 0xb9; break;
27653 default:
27654 gcc_unreachable ();
27655 }
27656 }
27657 else
27658 opcode = 0x68;
27659
27660 mem = adjust_address (m_tramp, QImode, offset);
27661 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27662
27663 mem = adjust_address (m_tramp, SImode, offset + 1);
27664 emit_move_insn (mem, chain_value);
27665 offset += 5;
27666
27667 mem = adjust_address (m_tramp, QImode, offset);
27668 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27669
27670 mem = adjust_address (m_tramp, SImode, offset + 1);
27671
27672 /* Compute offset from the end of the jmp to the target function.
27673 In the case in which the trampoline stores the static chain on
27674 the stack, we need to skip the first insn which pushes the
27675 (call-saved) register static chain; this push is 1 byte. */
27676 offset += 5;
27677 disp = expand_binop (SImode, sub_optab, fnaddr,
27678 plus_constant (Pmode, XEXP (m_tramp, 0),
27679 offset - (MEM_P (chain) ? 1 : 0)),
27680 NULL_RTX, 1, OPTAB_DIRECT);
27681 emit_move_insn (mem, disp);
27682 }
27683
27684 gcc_assert (offset <= TRAMPOLINE_SIZE);
27685
27686 #ifdef HAVE_ENABLE_EXECUTE_STACK
27687 #ifdef CHECK_EXECUTE_STACK_ENABLED
27688 if (CHECK_EXECUTE_STACK_ENABLED)
27689 #endif
27690 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27691 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27692 #endif
27693 }
27694 \f
27695 /* The following file contains several enumerations and data structures
27696 built from the definitions in i386-builtin-types.def. */
27697
27698 #include "i386-builtin-types.inc"
27699
27700 /* Table for the ix86 builtin non-function types. */
27701 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27702
27703 /* Retrieve an element from the above table, building some of
27704 the types lazily. */
27705
27706 static tree
27707 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27708 {
27709 unsigned int index;
27710 tree type, itype;
27711
27712 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27713
27714 type = ix86_builtin_type_tab[(int) tcode];
27715 if (type != NULL)
27716 return type;
27717
27718 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27719 if (tcode <= IX86_BT_LAST_VECT)
27720 {
27721 machine_mode mode;
27722
27723 index = tcode - IX86_BT_LAST_PRIM - 1;
27724 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27725 mode = ix86_builtin_type_vect_mode[index];
27726
27727 type = build_vector_type_for_mode (itype, mode);
27728 }
27729 else
27730 {
27731 int quals;
27732
27733 index = tcode - IX86_BT_LAST_VECT - 1;
27734 if (tcode <= IX86_BT_LAST_PTR)
27735 quals = TYPE_UNQUALIFIED;
27736 else
27737 quals = TYPE_QUAL_CONST;
27738
27739 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27740 if (quals != TYPE_UNQUALIFIED)
27741 itype = build_qualified_type (itype, quals);
27742
27743 type = build_pointer_type (itype);
27744 }
27745
27746 ix86_builtin_type_tab[(int) tcode] = type;
27747 return type;
27748 }
27749
27750 /* Table for the ix86 builtin function types. */
27751 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27752
27753 /* Retrieve an element from the above table, building some of
27754 the types lazily. */
27755
27756 static tree
27757 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27758 {
27759 tree type;
27760
27761 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27762
27763 type = ix86_builtin_func_type_tab[(int) tcode];
27764 if (type != NULL)
27765 return type;
27766
27767 if (tcode <= IX86_BT_LAST_FUNC)
27768 {
27769 unsigned start = ix86_builtin_func_start[(int) tcode];
27770 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27771 tree rtype, atype, args = void_list_node;
27772 unsigned i;
27773
27774 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27775 for (i = after - 1; i > start; --i)
27776 {
27777 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27778 args = tree_cons (NULL, atype, args);
27779 }
27780
27781 type = build_function_type (rtype, args);
27782 }
27783 else
27784 {
27785 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27786 enum ix86_builtin_func_type icode;
27787
27788 icode = ix86_builtin_func_alias_base[index];
27789 type = ix86_get_builtin_func_type (icode);
27790 }
27791
27792 ix86_builtin_func_type_tab[(int) tcode] = type;
27793 return type;
27794 }
27795
27796
27797 /* Codes for all the SSE/MMX builtins. */
27798 enum ix86_builtins
27799 {
27800 IX86_BUILTIN_ADDPS,
27801 IX86_BUILTIN_ADDSS,
27802 IX86_BUILTIN_DIVPS,
27803 IX86_BUILTIN_DIVSS,
27804 IX86_BUILTIN_MULPS,
27805 IX86_BUILTIN_MULSS,
27806 IX86_BUILTIN_SUBPS,
27807 IX86_BUILTIN_SUBSS,
27808
27809 IX86_BUILTIN_CMPEQPS,
27810 IX86_BUILTIN_CMPLTPS,
27811 IX86_BUILTIN_CMPLEPS,
27812 IX86_BUILTIN_CMPGTPS,
27813 IX86_BUILTIN_CMPGEPS,
27814 IX86_BUILTIN_CMPNEQPS,
27815 IX86_BUILTIN_CMPNLTPS,
27816 IX86_BUILTIN_CMPNLEPS,
27817 IX86_BUILTIN_CMPNGTPS,
27818 IX86_BUILTIN_CMPNGEPS,
27819 IX86_BUILTIN_CMPORDPS,
27820 IX86_BUILTIN_CMPUNORDPS,
27821 IX86_BUILTIN_CMPEQSS,
27822 IX86_BUILTIN_CMPLTSS,
27823 IX86_BUILTIN_CMPLESS,
27824 IX86_BUILTIN_CMPNEQSS,
27825 IX86_BUILTIN_CMPNLTSS,
27826 IX86_BUILTIN_CMPNLESS,
27827 IX86_BUILTIN_CMPORDSS,
27828 IX86_BUILTIN_CMPUNORDSS,
27829
27830 IX86_BUILTIN_COMIEQSS,
27831 IX86_BUILTIN_COMILTSS,
27832 IX86_BUILTIN_COMILESS,
27833 IX86_BUILTIN_COMIGTSS,
27834 IX86_BUILTIN_COMIGESS,
27835 IX86_BUILTIN_COMINEQSS,
27836 IX86_BUILTIN_UCOMIEQSS,
27837 IX86_BUILTIN_UCOMILTSS,
27838 IX86_BUILTIN_UCOMILESS,
27839 IX86_BUILTIN_UCOMIGTSS,
27840 IX86_BUILTIN_UCOMIGESS,
27841 IX86_BUILTIN_UCOMINEQSS,
27842
27843 IX86_BUILTIN_CVTPI2PS,
27844 IX86_BUILTIN_CVTPS2PI,
27845 IX86_BUILTIN_CVTSI2SS,
27846 IX86_BUILTIN_CVTSI642SS,
27847 IX86_BUILTIN_CVTSS2SI,
27848 IX86_BUILTIN_CVTSS2SI64,
27849 IX86_BUILTIN_CVTTPS2PI,
27850 IX86_BUILTIN_CVTTSS2SI,
27851 IX86_BUILTIN_CVTTSS2SI64,
27852
27853 IX86_BUILTIN_MAXPS,
27854 IX86_BUILTIN_MAXSS,
27855 IX86_BUILTIN_MINPS,
27856 IX86_BUILTIN_MINSS,
27857
27858 IX86_BUILTIN_LOADUPS,
27859 IX86_BUILTIN_STOREUPS,
27860 IX86_BUILTIN_MOVSS,
27861
27862 IX86_BUILTIN_MOVHLPS,
27863 IX86_BUILTIN_MOVLHPS,
27864 IX86_BUILTIN_LOADHPS,
27865 IX86_BUILTIN_LOADLPS,
27866 IX86_BUILTIN_STOREHPS,
27867 IX86_BUILTIN_STORELPS,
27868
27869 IX86_BUILTIN_MASKMOVQ,
27870 IX86_BUILTIN_MOVMSKPS,
27871 IX86_BUILTIN_PMOVMSKB,
27872
27873 IX86_BUILTIN_MOVNTPS,
27874 IX86_BUILTIN_MOVNTQ,
27875
27876 IX86_BUILTIN_LOADDQU,
27877 IX86_BUILTIN_STOREDQU,
27878
27879 IX86_BUILTIN_PACKSSWB,
27880 IX86_BUILTIN_PACKSSDW,
27881 IX86_BUILTIN_PACKUSWB,
27882
27883 IX86_BUILTIN_PADDB,
27884 IX86_BUILTIN_PADDW,
27885 IX86_BUILTIN_PADDD,
27886 IX86_BUILTIN_PADDQ,
27887 IX86_BUILTIN_PADDSB,
27888 IX86_BUILTIN_PADDSW,
27889 IX86_BUILTIN_PADDUSB,
27890 IX86_BUILTIN_PADDUSW,
27891 IX86_BUILTIN_PSUBB,
27892 IX86_BUILTIN_PSUBW,
27893 IX86_BUILTIN_PSUBD,
27894 IX86_BUILTIN_PSUBQ,
27895 IX86_BUILTIN_PSUBSB,
27896 IX86_BUILTIN_PSUBSW,
27897 IX86_BUILTIN_PSUBUSB,
27898 IX86_BUILTIN_PSUBUSW,
27899
27900 IX86_BUILTIN_PAND,
27901 IX86_BUILTIN_PANDN,
27902 IX86_BUILTIN_POR,
27903 IX86_BUILTIN_PXOR,
27904
27905 IX86_BUILTIN_PAVGB,
27906 IX86_BUILTIN_PAVGW,
27907
27908 IX86_BUILTIN_PCMPEQB,
27909 IX86_BUILTIN_PCMPEQW,
27910 IX86_BUILTIN_PCMPEQD,
27911 IX86_BUILTIN_PCMPGTB,
27912 IX86_BUILTIN_PCMPGTW,
27913 IX86_BUILTIN_PCMPGTD,
27914
27915 IX86_BUILTIN_PMADDWD,
27916
27917 IX86_BUILTIN_PMAXSW,
27918 IX86_BUILTIN_PMAXUB,
27919 IX86_BUILTIN_PMINSW,
27920 IX86_BUILTIN_PMINUB,
27921
27922 IX86_BUILTIN_PMULHUW,
27923 IX86_BUILTIN_PMULHW,
27924 IX86_BUILTIN_PMULLW,
27925
27926 IX86_BUILTIN_PSADBW,
27927 IX86_BUILTIN_PSHUFW,
27928
27929 IX86_BUILTIN_PSLLW,
27930 IX86_BUILTIN_PSLLD,
27931 IX86_BUILTIN_PSLLQ,
27932 IX86_BUILTIN_PSRAW,
27933 IX86_BUILTIN_PSRAD,
27934 IX86_BUILTIN_PSRLW,
27935 IX86_BUILTIN_PSRLD,
27936 IX86_BUILTIN_PSRLQ,
27937 IX86_BUILTIN_PSLLWI,
27938 IX86_BUILTIN_PSLLDI,
27939 IX86_BUILTIN_PSLLQI,
27940 IX86_BUILTIN_PSRAWI,
27941 IX86_BUILTIN_PSRADI,
27942 IX86_BUILTIN_PSRLWI,
27943 IX86_BUILTIN_PSRLDI,
27944 IX86_BUILTIN_PSRLQI,
27945
27946 IX86_BUILTIN_PUNPCKHBW,
27947 IX86_BUILTIN_PUNPCKHWD,
27948 IX86_BUILTIN_PUNPCKHDQ,
27949 IX86_BUILTIN_PUNPCKLBW,
27950 IX86_BUILTIN_PUNPCKLWD,
27951 IX86_BUILTIN_PUNPCKLDQ,
27952
27953 IX86_BUILTIN_SHUFPS,
27954
27955 IX86_BUILTIN_RCPPS,
27956 IX86_BUILTIN_RCPSS,
27957 IX86_BUILTIN_RSQRTPS,
27958 IX86_BUILTIN_RSQRTPS_NR,
27959 IX86_BUILTIN_RSQRTSS,
27960 IX86_BUILTIN_RSQRTF,
27961 IX86_BUILTIN_SQRTPS,
27962 IX86_BUILTIN_SQRTPS_NR,
27963 IX86_BUILTIN_SQRTSS,
27964
27965 IX86_BUILTIN_UNPCKHPS,
27966 IX86_BUILTIN_UNPCKLPS,
27967
27968 IX86_BUILTIN_ANDPS,
27969 IX86_BUILTIN_ANDNPS,
27970 IX86_BUILTIN_ORPS,
27971 IX86_BUILTIN_XORPS,
27972
27973 IX86_BUILTIN_EMMS,
27974 IX86_BUILTIN_LDMXCSR,
27975 IX86_BUILTIN_STMXCSR,
27976 IX86_BUILTIN_SFENCE,
27977
27978 IX86_BUILTIN_FXSAVE,
27979 IX86_BUILTIN_FXRSTOR,
27980 IX86_BUILTIN_FXSAVE64,
27981 IX86_BUILTIN_FXRSTOR64,
27982
27983 IX86_BUILTIN_XSAVE,
27984 IX86_BUILTIN_XRSTOR,
27985 IX86_BUILTIN_XSAVE64,
27986 IX86_BUILTIN_XRSTOR64,
27987
27988 IX86_BUILTIN_XSAVEOPT,
27989 IX86_BUILTIN_XSAVEOPT64,
27990
27991 IX86_BUILTIN_XSAVEC,
27992 IX86_BUILTIN_XSAVEC64,
27993
27994 IX86_BUILTIN_XSAVES,
27995 IX86_BUILTIN_XRSTORS,
27996 IX86_BUILTIN_XSAVES64,
27997 IX86_BUILTIN_XRSTORS64,
27998
27999 /* 3DNow! Original */
28000 IX86_BUILTIN_FEMMS,
28001 IX86_BUILTIN_PAVGUSB,
28002 IX86_BUILTIN_PF2ID,
28003 IX86_BUILTIN_PFACC,
28004 IX86_BUILTIN_PFADD,
28005 IX86_BUILTIN_PFCMPEQ,
28006 IX86_BUILTIN_PFCMPGE,
28007 IX86_BUILTIN_PFCMPGT,
28008 IX86_BUILTIN_PFMAX,
28009 IX86_BUILTIN_PFMIN,
28010 IX86_BUILTIN_PFMUL,
28011 IX86_BUILTIN_PFRCP,
28012 IX86_BUILTIN_PFRCPIT1,
28013 IX86_BUILTIN_PFRCPIT2,
28014 IX86_BUILTIN_PFRSQIT1,
28015 IX86_BUILTIN_PFRSQRT,
28016 IX86_BUILTIN_PFSUB,
28017 IX86_BUILTIN_PFSUBR,
28018 IX86_BUILTIN_PI2FD,
28019 IX86_BUILTIN_PMULHRW,
28020
28021 /* 3DNow! Athlon Extensions */
28022 IX86_BUILTIN_PF2IW,
28023 IX86_BUILTIN_PFNACC,
28024 IX86_BUILTIN_PFPNACC,
28025 IX86_BUILTIN_PI2FW,
28026 IX86_BUILTIN_PSWAPDSI,
28027 IX86_BUILTIN_PSWAPDSF,
28028
28029 /* SSE2 */
28030 IX86_BUILTIN_ADDPD,
28031 IX86_BUILTIN_ADDSD,
28032 IX86_BUILTIN_DIVPD,
28033 IX86_BUILTIN_DIVSD,
28034 IX86_BUILTIN_MULPD,
28035 IX86_BUILTIN_MULSD,
28036 IX86_BUILTIN_SUBPD,
28037 IX86_BUILTIN_SUBSD,
28038
28039 IX86_BUILTIN_CMPEQPD,
28040 IX86_BUILTIN_CMPLTPD,
28041 IX86_BUILTIN_CMPLEPD,
28042 IX86_BUILTIN_CMPGTPD,
28043 IX86_BUILTIN_CMPGEPD,
28044 IX86_BUILTIN_CMPNEQPD,
28045 IX86_BUILTIN_CMPNLTPD,
28046 IX86_BUILTIN_CMPNLEPD,
28047 IX86_BUILTIN_CMPNGTPD,
28048 IX86_BUILTIN_CMPNGEPD,
28049 IX86_BUILTIN_CMPORDPD,
28050 IX86_BUILTIN_CMPUNORDPD,
28051 IX86_BUILTIN_CMPEQSD,
28052 IX86_BUILTIN_CMPLTSD,
28053 IX86_BUILTIN_CMPLESD,
28054 IX86_BUILTIN_CMPNEQSD,
28055 IX86_BUILTIN_CMPNLTSD,
28056 IX86_BUILTIN_CMPNLESD,
28057 IX86_BUILTIN_CMPORDSD,
28058 IX86_BUILTIN_CMPUNORDSD,
28059
28060 IX86_BUILTIN_COMIEQSD,
28061 IX86_BUILTIN_COMILTSD,
28062 IX86_BUILTIN_COMILESD,
28063 IX86_BUILTIN_COMIGTSD,
28064 IX86_BUILTIN_COMIGESD,
28065 IX86_BUILTIN_COMINEQSD,
28066 IX86_BUILTIN_UCOMIEQSD,
28067 IX86_BUILTIN_UCOMILTSD,
28068 IX86_BUILTIN_UCOMILESD,
28069 IX86_BUILTIN_UCOMIGTSD,
28070 IX86_BUILTIN_UCOMIGESD,
28071 IX86_BUILTIN_UCOMINEQSD,
28072
28073 IX86_BUILTIN_MAXPD,
28074 IX86_BUILTIN_MAXSD,
28075 IX86_BUILTIN_MINPD,
28076 IX86_BUILTIN_MINSD,
28077
28078 IX86_BUILTIN_ANDPD,
28079 IX86_BUILTIN_ANDNPD,
28080 IX86_BUILTIN_ORPD,
28081 IX86_BUILTIN_XORPD,
28082
28083 IX86_BUILTIN_SQRTPD,
28084 IX86_BUILTIN_SQRTSD,
28085
28086 IX86_BUILTIN_UNPCKHPD,
28087 IX86_BUILTIN_UNPCKLPD,
28088
28089 IX86_BUILTIN_SHUFPD,
28090
28091 IX86_BUILTIN_LOADUPD,
28092 IX86_BUILTIN_STOREUPD,
28093 IX86_BUILTIN_MOVSD,
28094
28095 IX86_BUILTIN_LOADHPD,
28096 IX86_BUILTIN_LOADLPD,
28097
28098 IX86_BUILTIN_CVTDQ2PD,
28099 IX86_BUILTIN_CVTDQ2PS,
28100
28101 IX86_BUILTIN_CVTPD2DQ,
28102 IX86_BUILTIN_CVTPD2PI,
28103 IX86_BUILTIN_CVTPD2PS,
28104 IX86_BUILTIN_CVTTPD2DQ,
28105 IX86_BUILTIN_CVTTPD2PI,
28106
28107 IX86_BUILTIN_CVTPI2PD,
28108 IX86_BUILTIN_CVTSI2SD,
28109 IX86_BUILTIN_CVTSI642SD,
28110
28111 IX86_BUILTIN_CVTSD2SI,
28112 IX86_BUILTIN_CVTSD2SI64,
28113 IX86_BUILTIN_CVTSD2SS,
28114 IX86_BUILTIN_CVTSS2SD,
28115 IX86_BUILTIN_CVTTSD2SI,
28116 IX86_BUILTIN_CVTTSD2SI64,
28117
28118 IX86_BUILTIN_CVTPS2DQ,
28119 IX86_BUILTIN_CVTPS2PD,
28120 IX86_BUILTIN_CVTTPS2DQ,
28121
28122 IX86_BUILTIN_MOVNTI,
28123 IX86_BUILTIN_MOVNTI64,
28124 IX86_BUILTIN_MOVNTPD,
28125 IX86_BUILTIN_MOVNTDQ,
28126
28127 IX86_BUILTIN_MOVQ128,
28128
28129 /* SSE2 MMX */
28130 IX86_BUILTIN_MASKMOVDQU,
28131 IX86_BUILTIN_MOVMSKPD,
28132 IX86_BUILTIN_PMOVMSKB128,
28133
28134 IX86_BUILTIN_PACKSSWB128,
28135 IX86_BUILTIN_PACKSSDW128,
28136 IX86_BUILTIN_PACKUSWB128,
28137
28138 IX86_BUILTIN_PADDB128,
28139 IX86_BUILTIN_PADDW128,
28140 IX86_BUILTIN_PADDD128,
28141 IX86_BUILTIN_PADDQ128,
28142 IX86_BUILTIN_PADDSB128,
28143 IX86_BUILTIN_PADDSW128,
28144 IX86_BUILTIN_PADDUSB128,
28145 IX86_BUILTIN_PADDUSW128,
28146 IX86_BUILTIN_PSUBB128,
28147 IX86_BUILTIN_PSUBW128,
28148 IX86_BUILTIN_PSUBD128,
28149 IX86_BUILTIN_PSUBQ128,
28150 IX86_BUILTIN_PSUBSB128,
28151 IX86_BUILTIN_PSUBSW128,
28152 IX86_BUILTIN_PSUBUSB128,
28153 IX86_BUILTIN_PSUBUSW128,
28154
28155 IX86_BUILTIN_PAND128,
28156 IX86_BUILTIN_PANDN128,
28157 IX86_BUILTIN_POR128,
28158 IX86_BUILTIN_PXOR128,
28159
28160 IX86_BUILTIN_PAVGB128,
28161 IX86_BUILTIN_PAVGW128,
28162
28163 IX86_BUILTIN_PCMPEQB128,
28164 IX86_BUILTIN_PCMPEQW128,
28165 IX86_BUILTIN_PCMPEQD128,
28166 IX86_BUILTIN_PCMPGTB128,
28167 IX86_BUILTIN_PCMPGTW128,
28168 IX86_BUILTIN_PCMPGTD128,
28169
28170 IX86_BUILTIN_PMADDWD128,
28171
28172 IX86_BUILTIN_PMAXSW128,
28173 IX86_BUILTIN_PMAXUB128,
28174 IX86_BUILTIN_PMINSW128,
28175 IX86_BUILTIN_PMINUB128,
28176
28177 IX86_BUILTIN_PMULUDQ,
28178 IX86_BUILTIN_PMULUDQ128,
28179 IX86_BUILTIN_PMULHUW128,
28180 IX86_BUILTIN_PMULHW128,
28181 IX86_BUILTIN_PMULLW128,
28182
28183 IX86_BUILTIN_PSADBW128,
28184 IX86_BUILTIN_PSHUFHW,
28185 IX86_BUILTIN_PSHUFLW,
28186 IX86_BUILTIN_PSHUFD,
28187
28188 IX86_BUILTIN_PSLLDQI128,
28189 IX86_BUILTIN_PSLLWI128,
28190 IX86_BUILTIN_PSLLDI128,
28191 IX86_BUILTIN_PSLLQI128,
28192 IX86_BUILTIN_PSRAWI128,
28193 IX86_BUILTIN_PSRADI128,
28194 IX86_BUILTIN_PSRLDQI128,
28195 IX86_BUILTIN_PSRLWI128,
28196 IX86_BUILTIN_PSRLDI128,
28197 IX86_BUILTIN_PSRLQI128,
28198
28199 IX86_BUILTIN_PSLLDQ128,
28200 IX86_BUILTIN_PSLLW128,
28201 IX86_BUILTIN_PSLLD128,
28202 IX86_BUILTIN_PSLLQ128,
28203 IX86_BUILTIN_PSRAW128,
28204 IX86_BUILTIN_PSRAD128,
28205 IX86_BUILTIN_PSRLW128,
28206 IX86_BUILTIN_PSRLD128,
28207 IX86_BUILTIN_PSRLQ128,
28208
28209 IX86_BUILTIN_PUNPCKHBW128,
28210 IX86_BUILTIN_PUNPCKHWD128,
28211 IX86_BUILTIN_PUNPCKHDQ128,
28212 IX86_BUILTIN_PUNPCKHQDQ128,
28213 IX86_BUILTIN_PUNPCKLBW128,
28214 IX86_BUILTIN_PUNPCKLWD128,
28215 IX86_BUILTIN_PUNPCKLDQ128,
28216 IX86_BUILTIN_PUNPCKLQDQ128,
28217
28218 IX86_BUILTIN_CLFLUSH,
28219 IX86_BUILTIN_MFENCE,
28220 IX86_BUILTIN_LFENCE,
28221 IX86_BUILTIN_PAUSE,
28222
28223 IX86_BUILTIN_FNSTENV,
28224 IX86_BUILTIN_FLDENV,
28225 IX86_BUILTIN_FNSTSW,
28226 IX86_BUILTIN_FNCLEX,
28227
28228 IX86_BUILTIN_BSRSI,
28229 IX86_BUILTIN_BSRDI,
28230 IX86_BUILTIN_RDPMC,
28231 IX86_BUILTIN_RDTSC,
28232 IX86_BUILTIN_RDTSCP,
28233 IX86_BUILTIN_ROLQI,
28234 IX86_BUILTIN_ROLHI,
28235 IX86_BUILTIN_RORQI,
28236 IX86_BUILTIN_RORHI,
28237
28238 /* SSE3. */
28239 IX86_BUILTIN_ADDSUBPS,
28240 IX86_BUILTIN_HADDPS,
28241 IX86_BUILTIN_HSUBPS,
28242 IX86_BUILTIN_MOVSHDUP,
28243 IX86_BUILTIN_MOVSLDUP,
28244 IX86_BUILTIN_ADDSUBPD,
28245 IX86_BUILTIN_HADDPD,
28246 IX86_BUILTIN_HSUBPD,
28247 IX86_BUILTIN_LDDQU,
28248
28249 IX86_BUILTIN_MONITOR,
28250 IX86_BUILTIN_MWAIT,
28251
28252 /* SSSE3. */
28253 IX86_BUILTIN_PHADDW,
28254 IX86_BUILTIN_PHADDD,
28255 IX86_BUILTIN_PHADDSW,
28256 IX86_BUILTIN_PHSUBW,
28257 IX86_BUILTIN_PHSUBD,
28258 IX86_BUILTIN_PHSUBSW,
28259 IX86_BUILTIN_PMADDUBSW,
28260 IX86_BUILTIN_PMULHRSW,
28261 IX86_BUILTIN_PSHUFB,
28262 IX86_BUILTIN_PSIGNB,
28263 IX86_BUILTIN_PSIGNW,
28264 IX86_BUILTIN_PSIGND,
28265 IX86_BUILTIN_PALIGNR,
28266 IX86_BUILTIN_PABSB,
28267 IX86_BUILTIN_PABSW,
28268 IX86_BUILTIN_PABSD,
28269
28270 IX86_BUILTIN_PHADDW128,
28271 IX86_BUILTIN_PHADDD128,
28272 IX86_BUILTIN_PHADDSW128,
28273 IX86_BUILTIN_PHSUBW128,
28274 IX86_BUILTIN_PHSUBD128,
28275 IX86_BUILTIN_PHSUBSW128,
28276 IX86_BUILTIN_PMADDUBSW128,
28277 IX86_BUILTIN_PMULHRSW128,
28278 IX86_BUILTIN_PSHUFB128,
28279 IX86_BUILTIN_PSIGNB128,
28280 IX86_BUILTIN_PSIGNW128,
28281 IX86_BUILTIN_PSIGND128,
28282 IX86_BUILTIN_PALIGNR128,
28283 IX86_BUILTIN_PABSB128,
28284 IX86_BUILTIN_PABSW128,
28285 IX86_BUILTIN_PABSD128,
28286
28287 /* AMDFAM10 - SSE4A New Instructions. */
28288 IX86_BUILTIN_MOVNTSD,
28289 IX86_BUILTIN_MOVNTSS,
28290 IX86_BUILTIN_EXTRQI,
28291 IX86_BUILTIN_EXTRQ,
28292 IX86_BUILTIN_INSERTQI,
28293 IX86_BUILTIN_INSERTQ,
28294
28295 /* SSE4.1. */
28296 IX86_BUILTIN_BLENDPD,
28297 IX86_BUILTIN_BLENDPS,
28298 IX86_BUILTIN_BLENDVPD,
28299 IX86_BUILTIN_BLENDVPS,
28300 IX86_BUILTIN_PBLENDVB128,
28301 IX86_BUILTIN_PBLENDW128,
28302
28303 IX86_BUILTIN_DPPD,
28304 IX86_BUILTIN_DPPS,
28305
28306 IX86_BUILTIN_INSERTPS128,
28307
28308 IX86_BUILTIN_MOVNTDQA,
28309 IX86_BUILTIN_MPSADBW128,
28310 IX86_BUILTIN_PACKUSDW128,
28311 IX86_BUILTIN_PCMPEQQ,
28312 IX86_BUILTIN_PHMINPOSUW128,
28313
28314 IX86_BUILTIN_PMAXSB128,
28315 IX86_BUILTIN_PMAXSD128,
28316 IX86_BUILTIN_PMAXUD128,
28317 IX86_BUILTIN_PMAXUW128,
28318
28319 IX86_BUILTIN_PMINSB128,
28320 IX86_BUILTIN_PMINSD128,
28321 IX86_BUILTIN_PMINUD128,
28322 IX86_BUILTIN_PMINUW128,
28323
28324 IX86_BUILTIN_PMOVSXBW128,
28325 IX86_BUILTIN_PMOVSXBD128,
28326 IX86_BUILTIN_PMOVSXBQ128,
28327 IX86_BUILTIN_PMOVSXWD128,
28328 IX86_BUILTIN_PMOVSXWQ128,
28329 IX86_BUILTIN_PMOVSXDQ128,
28330
28331 IX86_BUILTIN_PMOVZXBW128,
28332 IX86_BUILTIN_PMOVZXBD128,
28333 IX86_BUILTIN_PMOVZXBQ128,
28334 IX86_BUILTIN_PMOVZXWD128,
28335 IX86_BUILTIN_PMOVZXWQ128,
28336 IX86_BUILTIN_PMOVZXDQ128,
28337
28338 IX86_BUILTIN_PMULDQ128,
28339 IX86_BUILTIN_PMULLD128,
28340
28341 IX86_BUILTIN_ROUNDSD,
28342 IX86_BUILTIN_ROUNDSS,
28343
28344 IX86_BUILTIN_ROUNDPD,
28345 IX86_BUILTIN_ROUNDPS,
28346
28347 IX86_BUILTIN_FLOORPD,
28348 IX86_BUILTIN_CEILPD,
28349 IX86_BUILTIN_TRUNCPD,
28350 IX86_BUILTIN_RINTPD,
28351 IX86_BUILTIN_ROUNDPD_AZ,
28352
28353 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28354 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28355 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28356
28357 IX86_BUILTIN_FLOORPS,
28358 IX86_BUILTIN_CEILPS,
28359 IX86_BUILTIN_TRUNCPS,
28360 IX86_BUILTIN_RINTPS,
28361 IX86_BUILTIN_ROUNDPS_AZ,
28362
28363 IX86_BUILTIN_FLOORPS_SFIX,
28364 IX86_BUILTIN_CEILPS_SFIX,
28365 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28366
28367 IX86_BUILTIN_PTESTZ,
28368 IX86_BUILTIN_PTESTC,
28369 IX86_BUILTIN_PTESTNZC,
28370
28371 IX86_BUILTIN_VEC_INIT_V2SI,
28372 IX86_BUILTIN_VEC_INIT_V4HI,
28373 IX86_BUILTIN_VEC_INIT_V8QI,
28374 IX86_BUILTIN_VEC_EXT_V2DF,
28375 IX86_BUILTIN_VEC_EXT_V2DI,
28376 IX86_BUILTIN_VEC_EXT_V4SF,
28377 IX86_BUILTIN_VEC_EXT_V4SI,
28378 IX86_BUILTIN_VEC_EXT_V8HI,
28379 IX86_BUILTIN_VEC_EXT_V2SI,
28380 IX86_BUILTIN_VEC_EXT_V4HI,
28381 IX86_BUILTIN_VEC_EXT_V16QI,
28382 IX86_BUILTIN_VEC_SET_V2DI,
28383 IX86_BUILTIN_VEC_SET_V4SF,
28384 IX86_BUILTIN_VEC_SET_V4SI,
28385 IX86_BUILTIN_VEC_SET_V8HI,
28386 IX86_BUILTIN_VEC_SET_V4HI,
28387 IX86_BUILTIN_VEC_SET_V16QI,
28388
28389 IX86_BUILTIN_VEC_PACK_SFIX,
28390 IX86_BUILTIN_VEC_PACK_SFIX256,
28391
28392 /* SSE4.2. */
28393 IX86_BUILTIN_CRC32QI,
28394 IX86_BUILTIN_CRC32HI,
28395 IX86_BUILTIN_CRC32SI,
28396 IX86_BUILTIN_CRC32DI,
28397
28398 IX86_BUILTIN_PCMPESTRI128,
28399 IX86_BUILTIN_PCMPESTRM128,
28400 IX86_BUILTIN_PCMPESTRA128,
28401 IX86_BUILTIN_PCMPESTRC128,
28402 IX86_BUILTIN_PCMPESTRO128,
28403 IX86_BUILTIN_PCMPESTRS128,
28404 IX86_BUILTIN_PCMPESTRZ128,
28405 IX86_BUILTIN_PCMPISTRI128,
28406 IX86_BUILTIN_PCMPISTRM128,
28407 IX86_BUILTIN_PCMPISTRA128,
28408 IX86_BUILTIN_PCMPISTRC128,
28409 IX86_BUILTIN_PCMPISTRO128,
28410 IX86_BUILTIN_PCMPISTRS128,
28411 IX86_BUILTIN_PCMPISTRZ128,
28412
28413 IX86_BUILTIN_PCMPGTQ,
28414
28415 /* AES instructions */
28416 IX86_BUILTIN_AESENC128,
28417 IX86_BUILTIN_AESENCLAST128,
28418 IX86_BUILTIN_AESDEC128,
28419 IX86_BUILTIN_AESDECLAST128,
28420 IX86_BUILTIN_AESIMC128,
28421 IX86_BUILTIN_AESKEYGENASSIST128,
28422
28423 /* PCLMUL instruction */
28424 IX86_BUILTIN_PCLMULQDQ128,
28425
28426 /* AVX */
28427 IX86_BUILTIN_ADDPD256,
28428 IX86_BUILTIN_ADDPS256,
28429 IX86_BUILTIN_ADDSUBPD256,
28430 IX86_BUILTIN_ADDSUBPS256,
28431 IX86_BUILTIN_ANDPD256,
28432 IX86_BUILTIN_ANDPS256,
28433 IX86_BUILTIN_ANDNPD256,
28434 IX86_BUILTIN_ANDNPS256,
28435 IX86_BUILTIN_BLENDPD256,
28436 IX86_BUILTIN_BLENDPS256,
28437 IX86_BUILTIN_BLENDVPD256,
28438 IX86_BUILTIN_BLENDVPS256,
28439 IX86_BUILTIN_DIVPD256,
28440 IX86_BUILTIN_DIVPS256,
28441 IX86_BUILTIN_DPPS256,
28442 IX86_BUILTIN_HADDPD256,
28443 IX86_BUILTIN_HADDPS256,
28444 IX86_BUILTIN_HSUBPD256,
28445 IX86_BUILTIN_HSUBPS256,
28446 IX86_BUILTIN_MAXPD256,
28447 IX86_BUILTIN_MAXPS256,
28448 IX86_BUILTIN_MINPD256,
28449 IX86_BUILTIN_MINPS256,
28450 IX86_BUILTIN_MULPD256,
28451 IX86_BUILTIN_MULPS256,
28452 IX86_BUILTIN_ORPD256,
28453 IX86_BUILTIN_ORPS256,
28454 IX86_BUILTIN_SHUFPD256,
28455 IX86_BUILTIN_SHUFPS256,
28456 IX86_BUILTIN_SUBPD256,
28457 IX86_BUILTIN_SUBPS256,
28458 IX86_BUILTIN_XORPD256,
28459 IX86_BUILTIN_XORPS256,
28460 IX86_BUILTIN_CMPSD,
28461 IX86_BUILTIN_CMPSS,
28462 IX86_BUILTIN_CMPPD,
28463 IX86_BUILTIN_CMPPS,
28464 IX86_BUILTIN_CMPPD256,
28465 IX86_BUILTIN_CMPPS256,
28466 IX86_BUILTIN_CVTDQ2PD256,
28467 IX86_BUILTIN_CVTDQ2PS256,
28468 IX86_BUILTIN_CVTPD2PS256,
28469 IX86_BUILTIN_CVTPS2DQ256,
28470 IX86_BUILTIN_CVTPS2PD256,
28471 IX86_BUILTIN_CVTTPD2DQ256,
28472 IX86_BUILTIN_CVTPD2DQ256,
28473 IX86_BUILTIN_CVTTPS2DQ256,
28474 IX86_BUILTIN_EXTRACTF128PD256,
28475 IX86_BUILTIN_EXTRACTF128PS256,
28476 IX86_BUILTIN_EXTRACTF128SI256,
28477 IX86_BUILTIN_VZEROALL,
28478 IX86_BUILTIN_VZEROUPPER,
28479 IX86_BUILTIN_VPERMILVARPD,
28480 IX86_BUILTIN_VPERMILVARPS,
28481 IX86_BUILTIN_VPERMILVARPD256,
28482 IX86_BUILTIN_VPERMILVARPS256,
28483 IX86_BUILTIN_VPERMILPD,
28484 IX86_BUILTIN_VPERMILPS,
28485 IX86_BUILTIN_VPERMILPD256,
28486 IX86_BUILTIN_VPERMILPS256,
28487 IX86_BUILTIN_VPERMIL2PD,
28488 IX86_BUILTIN_VPERMIL2PS,
28489 IX86_BUILTIN_VPERMIL2PD256,
28490 IX86_BUILTIN_VPERMIL2PS256,
28491 IX86_BUILTIN_VPERM2F128PD256,
28492 IX86_BUILTIN_VPERM2F128PS256,
28493 IX86_BUILTIN_VPERM2F128SI256,
28494 IX86_BUILTIN_VBROADCASTSS,
28495 IX86_BUILTIN_VBROADCASTSD256,
28496 IX86_BUILTIN_VBROADCASTSS256,
28497 IX86_BUILTIN_VBROADCASTPD256,
28498 IX86_BUILTIN_VBROADCASTPS256,
28499 IX86_BUILTIN_VINSERTF128PD256,
28500 IX86_BUILTIN_VINSERTF128PS256,
28501 IX86_BUILTIN_VINSERTF128SI256,
28502 IX86_BUILTIN_LOADUPD256,
28503 IX86_BUILTIN_LOADUPS256,
28504 IX86_BUILTIN_STOREUPD256,
28505 IX86_BUILTIN_STOREUPS256,
28506 IX86_BUILTIN_LDDQU256,
28507 IX86_BUILTIN_MOVNTDQ256,
28508 IX86_BUILTIN_MOVNTPD256,
28509 IX86_BUILTIN_MOVNTPS256,
28510 IX86_BUILTIN_LOADDQU256,
28511 IX86_BUILTIN_STOREDQU256,
28512 IX86_BUILTIN_MASKLOADPD,
28513 IX86_BUILTIN_MASKLOADPS,
28514 IX86_BUILTIN_MASKSTOREPD,
28515 IX86_BUILTIN_MASKSTOREPS,
28516 IX86_BUILTIN_MASKLOADPD256,
28517 IX86_BUILTIN_MASKLOADPS256,
28518 IX86_BUILTIN_MASKSTOREPD256,
28519 IX86_BUILTIN_MASKSTOREPS256,
28520 IX86_BUILTIN_MOVSHDUP256,
28521 IX86_BUILTIN_MOVSLDUP256,
28522 IX86_BUILTIN_MOVDDUP256,
28523
28524 IX86_BUILTIN_SQRTPD256,
28525 IX86_BUILTIN_SQRTPS256,
28526 IX86_BUILTIN_SQRTPS_NR256,
28527 IX86_BUILTIN_RSQRTPS256,
28528 IX86_BUILTIN_RSQRTPS_NR256,
28529
28530 IX86_BUILTIN_RCPPS256,
28531
28532 IX86_BUILTIN_ROUNDPD256,
28533 IX86_BUILTIN_ROUNDPS256,
28534
28535 IX86_BUILTIN_FLOORPD256,
28536 IX86_BUILTIN_CEILPD256,
28537 IX86_BUILTIN_TRUNCPD256,
28538 IX86_BUILTIN_RINTPD256,
28539 IX86_BUILTIN_ROUNDPD_AZ256,
28540
28541 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28542 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28543 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28544
28545 IX86_BUILTIN_FLOORPS256,
28546 IX86_BUILTIN_CEILPS256,
28547 IX86_BUILTIN_TRUNCPS256,
28548 IX86_BUILTIN_RINTPS256,
28549 IX86_BUILTIN_ROUNDPS_AZ256,
28550
28551 IX86_BUILTIN_FLOORPS_SFIX256,
28552 IX86_BUILTIN_CEILPS_SFIX256,
28553 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28554
28555 IX86_BUILTIN_UNPCKHPD256,
28556 IX86_BUILTIN_UNPCKLPD256,
28557 IX86_BUILTIN_UNPCKHPS256,
28558 IX86_BUILTIN_UNPCKLPS256,
28559
28560 IX86_BUILTIN_SI256_SI,
28561 IX86_BUILTIN_PS256_PS,
28562 IX86_BUILTIN_PD256_PD,
28563 IX86_BUILTIN_SI_SI256,
28564 IX86_BUILTIN_PS_PS256,
28565 IX86_BUILTIN_PD_PD256,
28566
28567 IX86_BUILTIN_VTESTZPD,
28568 IX86_BUILTIN_VTESTCPD,
28569 IX86_BUILTIN_VTESTNZCPD,
28570 IX86_BUILTIN_VTESTZPS,
28571 IX86_BUILTIN_VTESTCPS,
28572 IX86_BUILTIN_VTESTNZCPS,
28573 IX86_BUILTIN_VTESTZPD256,
28574 IX86_BUILTIN_VTESTCPD256,
28575 IX86_BUILTIN_VTESTNZCPD256,
28576 IX86_BUILTIN_VTESTZPS256,
28577 IX86_BUILTIN_VTESTCPS256,
28578 IX86_BUILTIN_VTESTNZCPS256,
28579 IX86_BUILTIN_PTESTZ256,
28580 IX86_BUILTIN_PTESTC256,
28581 IX86_BUILTIN_PTESTNZC256,
28582
28583 IX86_BUILTIN_MOVMSKPD256,
28584 IX86_BUILTIN_MOVMSKPS256,
28585
28586 /* AVX2 */
28587 IX86_BUILTIN_MPSADBW256,
28588 IX86_BUILTIN_PABSB256,
28589 IX86_BUILTIN_PABSW256,
28590 IX86_BUILTIN_PABSD256,
28591 IX86_BUILTIN_PACKSSDW256,
28592 IX86_BUILTIN_PACKSSWB256,
28593 IX86_BUILTIN_PACKUSDW256,
28594 IX86_BUILTIN_PACKUSWB256,
28595 IX86_BUILTIN_PADDB256,
28596 IX86_BUILTIN_PADDW256,
28597 IX86_BUILTIN_PADDD256,
28598 IX86_BUILTIN_PADDQ256,
28599 IX86_BUILTIN_PADDSB256,
28600 IX86_BUILTIN_PADDSW256,
28601 IX86_BUILTIN_PADDUSB256,
28602 IX86_BUILTIN_PADDUSW256,
28603 IX86_BUILTIN_PALIGNR256,
28604 IX86_BUILTIN_AND256I,
28605 IX86_BUILTIN_ANDNOT256I,
28606 IX86_BUILTIN_PAVGB256,
28607 IX86_BUILTIN_PAVGW256,
28608 IX86_BUILTIN_PBLENDVB256,
28609 IX86_BUILTIN_PBLENDVW256,
28610 IX86_BUILTIN_PCMPEQB256,
28611 IX86_BUILTIN_PCMPEQW256,
28612 IX86_BUILTIN_PCMPEQD256,
28613 IX86_BUILTIN_PCMPEQQ256,
28614 IX86_BUILTIN_PCMPGTB256,
28615 IX86_BUILTIN_PCMPGTW256,
28616 IX86_BUILTIN_PCMPGTD256,
28617 IX86_BUILTIN_PCMPGTQ256,
28618 IX86_BUILTIN_PHADDW256,
28619 IX86_BUILTIN_PHADDD256,
28620 IX86_BUILTIN_PHADDSW256,
28621 IX86_BUILTIN_PHSUBW256,
28622 IX86_BUILTIN_PHSUBD256,
28623 IX86_BUILTIN_PHSUBSW256,
28624 IX86_BUILTIN_PMADDUBSW256,
28625 IX86_BUILTIN_PMADDWD256,
28626 IX86_BUILTIN_PMAXSB256,
28627 IX86_BUILTIN_PMAXSW256,
28628 IX86_BUILTIN_PMAXSD256,
28629 IX86_BUILTIN_PMAXUB256,
28630 IX86_BUILTIN_PMAXUW256,
28631 IX86_BUILTIN_PMAXUD256,
28632 IX86_BUILTIN_PMINSB256,
28633 IX86_BUILTIN_PMINSW256,
28634 IX86_BUILTIN_PMINSD256,
28635 IX86_BUILTIN_PMINUB256,
28636 IX86_BUILTIN_PMINUW256,
28637 IX86_BUILTIN_PMINUD256,
28638 IX86_BUILTIN_PMOVMSKB256,
28639 IX86_BUILTIN_PMOVSXBW256,
28640 IX86_BUILTIN_PMOVSXBD256,
28641 IX86_BUILTIN_PMOVSXBQ256,
28642 IX86_BUILTIN_PMOVSXWD256,
28643 IX86_BUILTIN_PMOVSXWQ256,
28644 IX86_BUILTIN_PMOVSXDQ256,
28645 IX86_BUILTIN_PMOVZXBW256,
28646 IX86_BUILTIN_PMOVZXBD256,
28647 IX86_BUILTIN_PMOVZXBQ256,
28648 IX86_BUILTIN_PMOVZXWD256,
28649 IX86_BUILTIN_PMOVZXWQ256,
28650 IX86_BUILTIN_PMOVZXDQ256,
28651 IX86_BUILTIN_PMULDQ256,
28652 IX86_BUILTIN_PMULHRSW256,
28653 IX86_BUILTIN_PMULHUW256,
28654 IX86_BUILTIN_PMULHW256,
28655 IX86_BUILTIN_PMULLW256,
28656 IX86_BUILTIN_PMULLD256,
28657 IX86_BUILTIN_PMULUDQ256,
28658 IX86_BUILTIN_POR256,
28659 IX86_BUILTIN_PSADBW256,
28660 IX86_BUILTIN_PSHUFB256,
28661 IX86_BUILTIN_PSHUFD256,
28662 IX86_BUILTIN_PSHUFHW256,
28663 IX86_BUILTIN_PSHUFLW256,
28664 IX86_BUILTIN_PSIGNB256,
28665 IX86_BUILTIN_PSIGNW256,
28666 IX86_BUILTIN_PSIGND256,
28667 IX86_BUILTIN_PSLLDQI256,
28668 IX86_BUILTIN_PSLLWI256,
28669 IX86_BUILTIN_PSLLW256,
28670 IX86_BUILTIN_PSLLDI256,
28671 IX86_BUILTIN_PSLLD256,
28672 IX86_BUILTIN_PSLLQI256,
28673 IX86_BUILTIN_PSLLQ256,
28674 IX86_BUILTIN_PSRAWI256,
28675 IX86_BUILTIN_PSRAW256,
28676 IX86_BUILTIN_PSRADI256,
28677 IX86_BUILTIN_PSRAD256,
28678 IX86_BUILTIN_PSRLDQI256,
28679 IX86_BUILTIN_PSRLWI256,
28680 IX86_BUILTIN_PSRLW256,
28681 IX86_BUILTIN_PSRLDI256,
28682 IX86_BUILTIN_PSRLD256,
28683 IX86_BUILTIN_PSRLQI256,
28684 IX86_BUILTIN_PSRLQ256,
28685 IX86_BUILTIN_PSUBB256,
28686 IX86_BUILTIN_PSUBW256,
28687 IX86_BUILTIN_PSUBD256,
28688 IX86_BUILTIN_PSUBQ256,
28689 IX86_BUILTIN_PSUBSB256,
28690 IX86_BUILTIN_PSUBSW256,
28691 IX86_BUILTIN_PSUBUSB256,
28692 IX86_BUILTIN_PSUBUSW256,
28693 IX86_BUILTIN_PUNPCKHBW256,
28694 IX86_BUILTIN_PUNPCKHWD256,
28695 IX86_BUILTIN_PUNPCKHDQ256,
28696 IX86_BUILTIN_PUNPCKHQDQ256,
28697 IX86_BUILTIN_PUNPCKLBW256,
28698 IX86_BUILTIN_PUNPCKLWD256,
28699 IX86_BUILTIN_PUNPCKLDQ256,
28700 IX86_BUILTIN_PUNPCKLQDQ256,
28701 IX86_BUILTIN_PXOR256,
28702 IX86_BUILTIN_MOVNTDQA256,
28703 IX86_BUILTIN_VBROADCASTSS_PS,
28704 IX86_BUILTIN_VBROADCASTSS_PS256,
28705 IX86_BUILTIN_VBROADCASTSD_PD256,
28706 IX86_BUILTIN_VBROADCASTSI256,
28707 IX86_BUILTIN_PBLENDD256,
28708 IX86_BUILTIN_PBLENDD128,
28709 IX86_BUILTIN_PBROADCASTB256,
28710 IX86_BUILTIN_PBROADCASTW256,
28711 IX86_BUILTIN_PBROADCASTD256,
28712 IX86_BUILTIN_PBROADCASTQ256,
28713 IX86_BUILTIN_PBROADCASTB128,
28714 IX86_BUILTIN_PBROADCASTW128,
28715 IX86_BUILTIN_PBROADCASTD128,
28716 IX86_BUILTIN_PBROADCASTQ128,
28717 IX86_BUILTIN_VPERMVARSI256,
28718 IX86_BUILTIN_VPERMDF256,
28719 IX86_BUILTIN_VPERMVARSF256,
28720 IX86_BUILTIN_VPERMDI256,
28721 IX86_BUILTIN_VPERMTI256,
28722 IX86_BUILTIN_VEXTRACT128I256,
28723 IX86_BUILTIN_VINSERT128I256,
28724 IX86_BUILTIN_MASKLOADD,
28725 IX86_BUILTIN_MASKLOADQ,
28726 IX86_BUILTIN_MASKLOADD256,
28727 IX86_BUILTIN_MASKLOADQ256,
28728 IX86_BUILTIN_MASKSTORED,
28729 IX86_BUILTIN_MASKSTOREQ,
28730 IX86_BUILTIN_MASKSTORED256,
28731 IX86_BUILTIN_MASKSTOREQ256,
28732 IX86_BUILTIN_PSLLVV4DI,
28733 IX86_BUILTIN_PSLLVV2DI,
28734 IX86_BUILTIN_PSLLVV8SI,
28735 IX86_BUILTIN_PSLLVV4SI,
28736 IX86_BUILTIN_PSRAVV8SI,
28737 IX86_BUILTIN_PSRAVV4SI,
28738 IX86_BUILTIN_PSRLVV4DI,
28739 IX86_BUILTIN_PSRLVV2DI,
28740 IX86_BUILTIN_PSRLVV8SI,
28741 IX86_BUILTIN_PSRLVV4SI,
28742
28743 IX86_BUILTIN_GATHERSIV2DF,
28744 IX86_BUILTIN_GATHERSIV4DF,
28745 IX86_BUILTIN_GATHERDIV2DF,
28746 IX86_BUILTIN_GATHERDIV4DF,
28747 IX86_BUILTIN_GATHERSIV4SF,
28748 IX86_BUILTIN_GATHERSIV8SF,
28749 IX86_BUILTIN_GATHERDIV4SF,
28750 IX86_BUILTIN_GATHERDIV8SF,
28751 IX86_BUILTIN_GATHERSIV2DI,
28752 IX86_BUILTIN_GATHERSIV4DI,
28753 IX86_BUILTIN_GATHERDIV2DI,
28754 IX86_BUILTIN_GATHERDIV4DI,
28755 IX86_BUILTIN_GATHERSIV4SI,
28756 IX86_BUILTIN_GATHERSIV8SI,
28757 IX86_BUILTIN_GATHERDIV4SI,
28758 IX86_BUILTIN_GATHERDIV8SI,
28759
28760 /* AVX512F */
28761 IX86_BUILTIN_SI512_SI256,
28762 IX86_BUILTIN_PD512_PD256,
28763 IX86_BUILTIN_PS512_PS256,
28764 IX86_BUILTIN_SI512_SI,
28765 IX86_BUILTIN_PD512_PD,
28766 IX86_BUILTIN_PS512_PS,
28767 IX86_BUILTIN_ADDPD512,
28768 IX86_BUILTIN_ADDPS512,
28769 IX86_BUILTIN_ADDSD_ROUND,
28770 IX86_BUILTIN_ADDSS_ROUND,
28771 IX86_BUILTIN_ALIGND512,
28772 IX86_BUILTIN_ALIGNQ512,
28773 IX86_BUILTIN_BLENDMD512,
28774 IX86_BUILTIN_BLENDMPD512,
28775 IX86_BUILTIN_BLENDMPS512,
28776 IX86_BUILTIN_BLENDMQ512,
28777 IX86_BUILTIN_BROADCASTF32X4_512,
28778 IX86_BUILTIN_BROADCASTF64X4_512,
28779 IX86_BUILTIN_BROADCASTI32X4_512,
28780 IX86_BUILTIN_BROADCASTI64X4_512,
28781 IX86_BUILTIN_BROADCASTSD512,
28782 IX86_BUILTIN_BROADCASTSS512,
28783 IX86_BUILTIN_CMPD512,
28784 IX86_BUILTIN_CMPPD512,
28785 IX86_BUILTIN_CMPPS512,
28786 IX86_BUILTIN_CMPQ512,
28787 IX86_BUILTIN_CMPSD_MASK,
28788 IX86_BUILTIN_CMPSS_MASK,
28789 IX86_BUILTIN_COMIDF,
28790 IX86_BUILTIN_COMISF,
28791 IX86_BUILTIN_COMPRESSPD512,
28792 IX86_BUILTIN_COMPRESSPDSTORE512,
28793 IX86_BUILTIN_COMPRESSPS512,
28794 IX86_BUILTIN_COMPRESSPSSTORE512,
28795 IX86_BUILTIN_CVTDQ2PD512,
28796 IX86_BUILTIN_CVTDQ2PS512,
28797 IX86_BUILTIN_CVTPD2DQ512,
28798 IX86_BUILTIN_CVTPD2PS512,
28799 IX86_BUILTIN_CVTPD2UDQ512,
28800 IX86_BUILTIN_CVTPH2PS512,
28801 IX86_BUILTIN_CVTPS2DQ512,
28802 IX86_BUILTIN_CVTPS2PD512,
28803 IX86_BUILTIN_CVTPS2PH512,
28804 IX86_BUILTIN_CVTPS2UDQ512,
28805 IX86_BUILTIN_CVTSD2SS_ROUND,
28806 IX86_BUILTIN_CVTSI2SD64,
28807 IX86_BUILTIN_CVTSI2SS32,
28808 IX86_BUILTIN_CVTSI2SS64,
28809 IX86_BUILTIN_CVTSS2SD_ROUND,
28810 IX86_BUILTIN_CVTTPD2DQ512,
28811 IX86_BUILTIN_CVTTPD2UDQ512,
28812 IX86_BUILTIN_CVTTPS2DQ512,
28813 IX86_BUILTIN_CVTTPS2UDQ512,
28814 IX86_BUILTIN_CVTUDQ2PD512,
28815 IX86_BUILTIN_CVTUDQ2PS512,
28816 IX86_BUILTIN_CVTUSI2SD32,
28817 IX86_BUILTIN_CVTUSI2SD64,
28818 IX86_BUILTIN_CVTUSI2SS32,
28819 IX86_BUILTIN_CVTUSI2SS64,
28820 IX86_BUILTIN_DIVPD512,
28821 IX86_BUILTIN_DIVPS512,
28822 IX86_BUILTIN_DIVSD_ROUND,
28823 IX86_BUILTIN_DIVSS_ROUND,
28824 IX86_BUILTIN_EXPANDPD512,
28825 IX86_BUILTIN_EXPANDPD512Z,
28826 IX86_BUILTIN_EXPANDPDLOAD512,
28827 IX86_BUILTIN_EXPANDPDLOAD512Z,
28828 IX86_BUILTIN_EXPANDPS512,
28829 IX86_BUILTIN_EXPANDPS512Z,
28830 IX86_BUILTIN_EXPANDPSLOAD512,
28831 IX86_BUILTIN_EXPANDPSLOAD512Z,
28832 IX86_BUILTIN_EXTRACTF32X4,
28833 IX86_BUILTIN_EXTRACTF64X4,
28834 IX86_BUILTIN_EXTRACTI32X4,
28835 IX86_BUILTIN_EXTRACTI64X4,
28836 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28837 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28838 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28839 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28840 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28841 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28842 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28843 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28844 IX86_BUILTIN_GETEXPPD512,
28845 IX86_BUILTIN_GETEXPPS512,
28846 IX86_BUILTIN_GETEXPSD128,
28847 IX86_BUILTIN_GETEXPSS128,
28848 IX86_BUILTIN_GETMANTPD512,
28849 IX86_BUILTIN_GETMANTPS512,
28850 IX86_BUILTIN_GETMANTSD128,
28851 IX86_BUILTIN_GETMANTSS128,
28852 IX86_BUILTIN_INSERTF32X4,
28853 IX86_BUILTIN_INSERTF64X4,
28854 IX86_BUILTIN_INSERTI32X4,
28855 IX86_BUILTIN_INSERTI64X4,
28856 IX86_BUILTIN_LOADAPD512,
28857 IX86_BUILTIN_LOADAPS512,
28858 IX86_BUILTIN_LOADDQUDI512,
28859 IX86_BUILTIN_LOADDQUSI512,
28860 IX86_BUILTIN_LOADUPD512,
28861 IX86_BUILTIN_LOADUPS512,
28862 IX86_BUILTIN_MAXPD512,
28863 IX86_BUILTIN_MAXPS512,
28864 IX86_BUILTIN_MAXSD_ROUND,
28865 IX86_BUILTIN_MAXSS_ROUND,
28866 IX86_BUILTIN_MINPD512,
28867 IX86_BUILTIN_MINPS512,
28868 IX86_BUILTIN_MINSD_ROUND,
28869 IX86_BUILTIN_MINSS_ROUND,
28870 IX86_BUILTIN_MOVAPD512,
28871 IX86_BUILTIN_MOVAPS512,
28872 IX86_BUILTIN_MOVDDUP512,
28873 IX86_BUILTIN_MOVDQA32LOAD512,
28874 IX86_BUILTIN_MOVDQA32STORE512,
28875 IX86_BUILTIN_MOVDQA32_512,
28876 IX86_BUILTIN_MOVDQA64LOAD512,
28877 IX86_BUILTIN_MOVDQA64STORE512,
28878 IX86_BUILTIN_MOVDQA64_512,
28879 IX86_BUILTIN_MOVNTDQ512,
28880 IX86_BUILTIN_MOVNTDQA512,
28881 IX86_BUILTIN_MOVNTPD512,
28882 IX86_BUILTIN_MOVNTPS512,
28883 IX86_BUILTIN_MOVSHDUP512,
28884 IX86_BUILTIN_MOVSLDUP512,
28885 IX86_BUILTIN_MULPD512,
28886 IX86_BUILTIN_MULPS512,
28887 IX86_BUILTIN_MULSD_ROUND,
28888 IX86_BUILTIN_MULSS_ROUND,
28889 IX86_BUILTIN_PABSD512,
28890 IX86_BUILTIN_PABSQ512,
28891 IX86_BUILTIN_PADDD512,
28892 IX86_BUILTIN_PADDQ512,
28893 IX86_BUILTIN_PANDD512,
28894 IX86_BUILTIN_PANDND512,
28895 IX86_BUILTIN_PANDNQ512,
28896 IX86_BUILTIN_PANDQ512,
28897 IX86_BUILTIN_PBROADCASTD512,
28898 IX86_BUILTIN_PBROADCASTD512_GPR,
28899 IX86_BUILTIN_PBROADCASTMB512,
28900 IX86_BUILTIN_PBROADCASTMW512,
28901 IX86_BUILTIN_PBROADCASTQ512,
28902 IX86_BUILTIN_PBROADCASTQ512_GPR,
28903 IX86_BUILTIN_PCMPEQD512_MASK,
28904 IX86_BUILTIN_PCMPEQQ512_MASK,
28905 IX86_BUILTIN_PCMPGTD512_MASK,
28906 IX86_BUILTIN_PCMPGTQ512_MASK,
28907 IX86_BUILTIN_PCOMPRESSD512,
28908 IX86_BUILTIN_PCOMPRESSDSTORE512,
28909 IX86_BUILTIN_PCOMPRESSQ512,
28910 IX86_BUILTIN_PCOMPRESSQSTORE512,
28911 IX86_BUILTIN_PEXPANDD512,
28912 IX86_BUILTIN_PEXPANDD512Z,
28913 IX86_BUILTIN_PEXPANDDLOAD512,
28914 IX86_BUILTIN_PEXPANDDLOAD512Z,
28915 IX86_BUILTIN_PEXPANDQ512,
28916 IX86_BUILTIN_PEXPANDQ512Z,
28917 IX86_BUILTIN_PEXPANDQLOAD512,
28918 IX86_BUILTIN_PEXPANDQLOAD512Z,
28919 IX86_BUILTIN_PMAXSD512,
28920 IX86_BUILTIN_PMAXSQ512,
28921 IX86_BUILTIN_PMAXUD512,
28922 IX86_BUILTIN_PMAXUQ512,
28923 IX86_BUILTIN_PMINSD512,
28924 IX86_BUILTIN_PMINSQ512,
28925 IX86_BUILTIN_PMINUD512,
28926 IX86_BUILTIN_PMINUQ512,
28927 IX86_BUILTIN_PMOVDB512,
28928 IX86_BUILTIN_PMOVDB512_MEM,
28929 IX86_BUILTIN_PMOVDW512,
28930 IX86_BUILTIN_PMOVDW512_MEM,
28931 IX86_BUILTIN_PMOVQB512,
28932 IX86_BUILTIN_PMOVQB512_MEM,
28933 IX86_BUILTIN_PMOVQD512,
28934 IX86_BUILTIN_PMOVQD512_MEM,
28935 IX86_BUILTIN_PMOVQW512,
28936 IX86_BUILTIN_PMOVQW512_MEM,
28937 IX86_BUILTIN_PMOVSDB512,
28938 IX86_BUILTIN_PMOVSDB512_MEM,
28939 IX86_BUILTIN_PMOVSDW512,
28940 IX86_BUILTIN_PMOVSDW512_MEM,
28941 IX86_BUILTIN_PMOVSQB512,
28942 IX86_BUILTIN_PMOVSQB512_MEM,
28943 IX86_BUILTIN_PMOVSQD512,
28944 IX86_BUILTIN_PMOVSQD512_MEM,
28945 IX86_BUILTIN_PMOVSQW512,
28946 IX86_BUILTIN_PMOVSQW512_MEM,
28947 IX86_BUILTIN_PMOVSXBD512,
28948 IX86_BUILTIN_PMOVSXBQ512,
28949 IX86_BUILTIN_PMOVSXDQ512,
28950 IX86_BUILTIN_PMOVSXWD512,
28951 IX86_BUILTIN_PMOVSXWQ512,
28952 IX86_BUILTIN_PMOVUSDB512,
28953 IX86_BUILTIN_PMOVUSDB512_MEM,
28954 IX86_BUILTIN_PMOVUSDW512,
28955 IX86_BUILTIN_PMOVUSDW512_MEM,
28956 IX86_BUILTIN_PMOVUSQB512,
28957 IX86_BUILTIN_PMOVUSQB512_MEM,
28958 IX86_BUILTIN_PMOVUSQD512,
28959 IX86_BUILTIN_PMOVUSQD512_MEM,
28960 IX86_BUILTIN_PMOVUSQW512,
28961 IX86_BUILTIN_PMOVUSQW512_MEM,
28962 IX86_BUILTIN_PMOVZXBD512,
28963 IX86_BUILTIN_PMOVZXBQ512,
28964 IX86_BUILTIN_PMOVZXDQ512,
28965 IX86_BUILTIN_PMOVZXWD512,
28966 IX86_BUILTIN_PMOVZXWQ512,
28967 IX86_BUILTIN_PMULDQ512,
28968 IX86_BUILTIN_PMULLD512,
28969 IX86_BUILTIN_PMULUDQ512,
28970 IX86_BUILTIN_PORD512,
28971 IX86_BUILTIN_PORQ512,
28972 IX86_BUILTIN_PROLD512,
28973 IX86_BUILTIN_PROLQ512,
28974 IX86_BUILTIN_PROLVD512,
28975 IX86_BUILTIN_PROLVQ512,
28976 IX86_BUILTIN_PRORD512,
28977 IX86_BUILTIN_PRORQ512,
28978 IX86_BUILTIN_PRORVD512,
28979 IX86_BUILTIN_PRORVQ512,
28980 IX86_BUILTIN_PSHUFD512,
28981 IX86_BUILTIN_PSLLD512,
28982 IX86_BUILTIN_PSLLDI512,
28983 IX86_BUILTIN_PSLLQ512,
28984 IX86_BUILTIN_PSLLQI512,
28985 IX86_BUILTIN_PSLLVV16SI,
28986 IX86_BUILTIN_PSLLVV8DI,
28987 IX86_BUILTIN_PSRAD512,
28988 IX86_BUILTIN_PSRADI512,
28989 IX86_BUILTIN_PSRAQ512,
28990 IX86_BUILTIN_PSRAQI512,
28991 IX86_BUILTIN_PSRAVV16SI,
28992 IX86_BUILTIN_PSRAVV8DI,
28993 IX86_BUILTIN_PSRLD512,
28994 IX86_BUILTIN_PSRLDI512,
28995 IX86_BUILTIN_PSRLQ512,
28996 IX86_BUILTIN_PSRLQI512,
28997 IX86_BUILTIN_PSRLVV16SI,
28998 IX86_BUILTIN_PSRLVV8DI,
28999 IX86_BUILTIN_PSUBD512,
29000 IX86_BUILTIN_PSUBQ512,
29001 IX86_BUILTIN_PTESTMD512,
29002 IX86_BUILTIN_PTESTMQ512,
29003 IX86_BUILTIN_PTESTNMD512,
29004 IX86_BUILTIN_PTESTNMQ512,
29005 IX86_BUILTIN_PUNPCKHDQ512,
29006 IX86_BUILTIN_PUNPCKHQDQ512,
29007 IX86_BUILTIN_PUNPCKLDQ512,
29008 IX86_BUILTIN_PUNPCKLQDQ512,
29009 IX86_BUILTIN_PXORD512,
29010 IX86_BUILTIN_PXORQ512,
29011 IX86_BUILTIN_RCP14PD512,
29012 IX86_BUILTIN_RCP14PS512,
29013 IX86_BUILTIN_RCP14SD,
29014 IX86_BUILTIN_RCP14SS,
29015 IX86_BUILTIN_RNDSCALEPD,
29016 IX86_BUILTIN_RNDSCALEPS,
29017 IX86_BUILTIN_RNDSCALESD,
29018 IX86_BUILTIN_RNDSCALESS,
29019 IX86_BUILTIN_RSQRT14PD512,
29020 IX86_BUILTIN_RSQRT14PS512,
29021 IX86_BUILTIN_RSQRT14SD,
29022 IX86_BUILTIN_RSQRT14SS,
29023 IX86_BUILTIN_SCALEFPD512,
29024 IX86_BUILTIN_SCALEFPS512,
29025 IX86_BUILTIN_SCALEFSD,
29026 IX86_BUILTIN_SCALEFSS,
29027 IX86_BUILTIN_SHUFPD512,
29028 IX86_BUILTIN_SHUFPS512,
29029 IX86_BUILTIN_SHUF_F32x4,
29030 IX86_BUILTIN_SHUF_F64x2,
29031 IX86_BUILTIN_SHUF_I32x4,
29032 IX86_BUILTIN_SHUF_I64x2,
29033 IX86_BUILTIN_SQRTPD512,
29034 IX86_BUILTIN_SQRTPD512_MASK,
29035 IX86_BUILTIN_SQRTPS512_MASK,
29036 IX86_BUILTIN_SQRTPS_NR512,
29037 IX86_BUILTIN_SQRTSD_ROUND,
29038 IX86_BUILTIN_SQRTSS_ROUND,
29039 IX86_BUILTIN_STOREAPD512,
29040 IX86_BUILTIN_STOREAPS512,
29041 IX86_BUILTIN_STOREDQUDI512,
29042 IX86_BUILTIN_STOREDQUSI512,
29043 IX86_BUILTIN_STOREUPD512,
29044 IX86_BUILTIN_STOREUPS512,
29045 IX86_BUILTIN_SUBPD512,
29046 IX86_BUILTIN_SUBPS512,
29047 IX86_BUILTIN_SUBSD_ROUND,
29048 IX86_BUILTIN_SUBSS_ROUND,
29049 IX86_BUILTIN_UCMPD512,
29050 IX86_BUILTIN_UCMPQ512,
29051 IX86_BUILTIN_UNPCKHPD512,
29052 IX86_BUILTIN_UNPCKHPS512,
29053 IX86_BUILTIN_UNPCKLPD512,
29054 IX86_BUILTIN_UNPCKLPS512,
29055 IX86_BUILTIN_VCVTSD2SI32,
29056 IX86_BUILTIN_VCVTSD2SI64,
29057 IX86_BUILTIN_VCVTSD2USI32,
29058 IX86_BUILTIN_VCVTSD2USI64,
29059 IX86_BUILTIN_VCVTSS2SI32,
29060 IX86_BUILTIN_VCVTSS2SI64,
29061 IX86_BUILTIN_VCVTSS2USI32,
29062 IX86_BUILTIN_VCVTSS2USI64,
29063 IX86_BUILTIN_VCVTTSD2SI32,
29064 IX86_BUILTIN_VCVTTSD2SI64,
29065 IX86_BUILTIN_VCVTTSD2USI32,
29066 IX86_BUILTIN_VCVTTSD2USI64,
29067 IX86_BUILTIN_VCVTTSS2SI32,
29068 IX86_BUILTIN_VCVTTSS2SI64,
29069 IX86_BUILTIN_VCVTTSS2USI32,
29070 IX86_BUILTIN_VCVTTSS2USI64,
29071 IX86_BUILTIN_VFMADDPD512_MASK,
29072 IX86_BUILTIN_VFMADDPD512_MASK3,
29073 IX86_BUILTIN_VFMADDPD512_MASKZ,
29074 IX86_BUILTIN_VFMADDPS512_MASK,
29075 IX86_BUILTIN_VFMADDPS512_MASK3,
29076 IX86_BUILTIN_VFMADDPS512_MASKZ,
29077 IX86_BUILTIN_VFMADDSD3_ROUND,
29078 IX86_BUILTIN_VFMADDSS3_ROUND,
29079 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29080 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29081 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29082 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29083 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29084 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29085 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29086 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29087 IX86_BUILTIN_VFMSUBPD512_MASK3,
29088 IX86_BUILTIN_VFMSUBPS512_MASK3,
29089 IX86_BUILTIN_VFMSUBSD3_MASK3,
29090 IX86_BUILTIN_VFMSUBSS3_MASK3,
29091 IX86_BUILTIN_VFNMADDPD512_MASK,
29092 IX86_BUILTIN_VFNMADDPS512_MASK,
29093 IX86_BUILTIN_VFNMSUBPD512_MASK,
29094 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29095 IX86_BUILTIN_VFNMSUBPS512_MASK,
29096 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29097 IX86_BUILTIN_VPCLZCNTD512,
29098 IX86_BUILTIN_VPCLZCNTQ512,
29099 IX86_BUILTIN_VPCONFLICTD512,
29100 IX86_BUILTIN_VPCONFLICTQ512,
29101 IX86_BUILTIN_VPERMDF512,
29102 IX86_BUILTIN_VPERMDI512,
29103 IX86_BUILTIN_VPERMI2VARD512,
29104 IX86_BUILTIN_VPERMI2VARPD512,
29105 IX86_BUILTIN_VPERMI2VARPS512,
29106 IX86_BUILTIN_VPERMI2VARQ512,
29107 IX86_BUILTIN_VPERMILPD512,
29108 IX86_BUILTIN_VPERMILPS512,
29109 IX86_BUILTIN_VPERMILVARPD512,
29110 IX86_BUILTIN_VPERMILVARPS512,
29111 IX86_BUILTIN_VPERMT2VARD512,
29112 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29113 IX86_BUILTIN_VPERMT2VARPD512,
29114 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29115 IX86_BUILTIN_VPERMT2VARPS512,
29116 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29117 IX86_BUILTIN_VPERMT2VARQ512,
29118 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29119 IX86_BUILTIN_VPERMVARDF512,
29120 IX86_BUILTIN_VPERMVARDI512,
29121 IX86_BUILTIN_VPERMVARSF512,
29122 IX86_BUILTIN_VPERMVARSI512,
29123 IX86_BUILTIN_VTERNLOGD512_MASK,
29124 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29125 IX86_BUILTIN_VTERNLOGQ512_MASK,
29126 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29127
29128 /* Mask arithmetic operations */
29129 IX86_BUILTIN_KAND16,
29130 IX86_BUILTIN_KANDN16,
29131 IX86_BUILTIN_KNOT16,
29132 IX86_BUILTIN_KOR16,
29133 IX86_BUILTIN_KORTESTC16,
29134 IX86_BUILTIN_KORTESTZ16,
29135 IX86_BUILTIN_KUNPCKBW,
29136 IX86_BUILTIN_KXNOR16,
29137 IX86_BUILTIN_KXOR16,
29138 IX86_BUILTIN_KMOV16,
29139
29140 /* AVX512VL. */
29141 IX86_BUILTIN_PMOVUSQD256_MEM,
29142 IX86_BUILTIN_PMOVUSQD128_MEM,
29143 IX86_BUILTIN_PMOVSQD256_MEM,
29144 IX86_BUILTIN_PMOVSQD128_MEM,
29145 IX86_BUILTIN_PMOVQD256_MEM,
29146 IX86_BUILTIN_PMOVQD128_MEM,
29147 IX86_BUILTIN_PMOVUSQW256_MEM,
29148 IX86_BUILTIN_PMOVUSQW128_MEM,
29149 IX86_BUILTIN_PMOVSQW256_MEM,
29150 IX86_BUILTIN_PMOVSQW128_MEM,
29151 IX86_BUILTIN_PMOVQW256_MEM,
29152 IX86_BUILTIN_PMOVQW128_MEM,
29153 IX86_BUILTIN_PMOVUSQB256_MEM,
29154 IX86_BUILTIN_PMOVUSQB128_MEM,
29155 IX86_BUILTIN_PMOVSQB256_MEM,
29156 IX86_BUILTIN_PMOVSQB128_MEM,
29157 IX86_BUILTIN_PMOVQB256_MEM,
29158 IX86_BUILTIN_PMOVQB128_MEM,
29159 IX86_BUILTIN_PMOVUSDW256_MEM,
29160 IX86_BUILTIN_PMOVUSDW128_MEM,
29161 IX86_BUILTIN_PMOVSDW256_MEM,
29162 IX86_BUILTIN_PMOVSDW128_MEM,
29163 IX86_BUILTIN_PMOVDW256_MEM,
29164 IX86_BUILTIN_PMOVDW128_MEM,
29165 IX86_BUILTIN_PMOVUSDB256_MEM,
29166 IX86_BUILTIN_PMOVUSDB128_MEM,
29167 IX86_BUILTIN_PMOVSDB256_MEM,
29168 IX86_BUILTIN_PMOVSDB128_MEM,
29169 IX86_BUILTIN_PMOVDB256_MEM,
29170 IX86_BUILTIN_PMOVDB128_MEM,
29171 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29172 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29173 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29174 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29175 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29176 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29177 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29178 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29179 IX86_BUILTIN_LOADAPD256_MASK,
29180 IX86_BUILTIN_LOADAPD128_MASK,
29181 IX86_BUILTIN_LOADAPS256_MASK,
29182 IX86_BUILTIN_LOADAPS128_MASK,
29183 IX86_BUILTIN_STOREAPD256_MASK,
29184 IX86_BUILTIN_STOREAPD128_MASK,
29185 IX86_BUILTIN_STOREAPS256_MASK,
29186 IX86_BUILTIN_STOREAPS128_MASK,
29187 IX86_BUILTIN_LOADUPD256_MASK,
29188 IX86_BUILTIN_LOADUPD128_MASK,
29189 IX86_BUILTIN_LOADUPS256_MASK,
29190 IX86_BUILTIN_LOADUPS128_MASK,
29191 IX86_BUILTIN_STOREUPD256_MASK,
29192 IX86_BUILTIN_STOREUPD128_MASK,
29193 IX86_BUILTIN_STOREUPS256_MASK,
29194 IX86_BUILTIN_STOREUPS128_MASK,
29195 IX86_BUILTIN_LOADDQUDI256_MASK,
29196 IX86_BUILTIN_LOADDQUDI128_MASK,
29197 IX86_BUILTIN_LOADDQUSI256_MASK,
29198 IX86_BUILTIN_LOADDQUSI128_MASK,
29199 IX86_BUILTIN_LOADDQUHI256_MASK,
29200 IX86_BUILTIN_LOADDQUHI128_MASK,
29201 IX86_BUILTIN_LOADDQUQI256_MASK,
29202 IX86_BUILTIN_LOADDQUQI128_MASK,
29203 IX86_BUILTIN_STOREDQUDI256_MASK,
29204 IX86_BUILTIN_STOREDQUDI128_MASK,
29205 IX86_BUILTIN_STOREDQUSI256_MASK,
29206 IX86_BUILTIN_STOREDQUSI128_MASK,
29207 IX86_BUILTIN_STOREDQUHI256_MASK,
29208 IX86_BUILTIN_STOREDQUHI128_MASK,
29209 IX86_BUILTIN_STOREDQUQI256_MASK,
29210 IX86_BUILTIN_STOREDQUQI128_MASK,
29211 IX86_BUILTIN_COMPRESSPDSTORE256,
29212 IX86_BUILTIN_COMPRESSPDSTORE128,
29213 IX86_BUILTIN_COMPRESSPSSTORE256,
29214 IX86_BUILTIN_COMPRESSPSSTORE128,
29215 IX86_BUILTIN_PCOMPRESSQSTORE256,
29216 IX86_BUILTIN_PCOMPRESSQSTORE128,
29217 IX86_BUILTIN_PCOMPRESSDSTORE256,
29218 IX86_BUILTIN_PCOMPRESSDSTORE128,
29219 IX86_BUILTIN_EXPANDPDLOAD256,
29220 IX86_BUILTIN_EXPANDPDLOAD128,
29221 IX86_BUILTIN_EXPANDPSLOAD256,
29222 IX86_BUILTIN_EXPANDPSLOAD128,
29223 IX86_BUILTIN_PEXPANDQLOAD256,
29224 IX86_BUILTIN_PEXPANDQLOAD128,
29225 IX86_BUILTIN_PEXPANDDLOAD256,
29226 IX86_BUILTIN_PEXPANDDLOAD128,
29227 IX86_BUILTIN_EXPANDPDLOAD256Z,
29228 IX86_BUILTIN_EXPANDPDLOAD128Z,
29229 IX86_BUILTIN_EXPANDPSLOAD256Z,
29230 IX86_BUILTIN_EXPANDPSLOAD128Z,
29231 IX86_BUILTIN_PEXPANDQLOAD256Z,
29232 IX86_BUILTIN_PEXPANDQLOAD128Z,
29233 IX86_BUILTIN_PEXPANDDLOAD256Z,
29234 IX86_BUILTIN_PEXPANDDLOAD128Z,
29235 IX86_BUILTIN_PALIGNR256_MASK,
29236 IX86_BUILTIN_PALIGNR128_MASK,
29237 IX86_BUILTIN_MOVDQA64_256_MASK,
29238 IX86_BUILTIN_MOVDQA64_128_MASK,
29239 IX86_BUILTIN_MOVDQA32_256_MASK,
29240 IX86_BUILTIN_MOVDQA32_128_MASK,
29241 IX86_BUILTIN_MOVAPD256_MASK,
29242 IX86_BUILTIN_MOVAPD128_MASK,
29243 IX86_BUILTIN_MOVAPS256_MASK,
29244 IX86_BUILTIN_MOVAPS128_MASK,
29245 IX86_BUILTIN_MOVDQUHI256_MASK,
29246 IX86_BUILTIN_MOVDQUHI128_MASK,
29247 IX86_BUILTIN_MOVDQUQI256_MASK,
29248 IX86_BUILTIN_MOVDQUQI128_MASK,
29249 IX86_BUILTIN_MINPS128_MASK,
29250 IX86_BUILTIN_MAXPS128_MASK,
29251 IX86_BUILTIN_MINPD128_MASK,
29252 IX86_BUILTIN_MAXPD128_MASK,
29253 IX86_BUILTIN_MAXPD256_MASK,
29254 IX86_BUILTIN_MAXPS256_MASK,
29255 IX86_BUILTIN_MINPD256_MASK,
29256 IX86_BUILTIN_MINPS256_MASK,
29257 IX86_BUILTIN_MULPS128_MASK,
29258 IX86_BUILTIN_DIVPS128_MASK,
29259 IX86_BUILTIN_MULPD128_MASK,
29260 IX86_BUILTIN_DIVPD128_MASK,
29261 IX86_BUILTIN_DIVPD256_MASK,
29262 IX86_BUILTIN_DIVPS256_MASK,
29263 IX86_BUILTIN_MULPD256_MASK,
29264 IX86_BUILTIN_MULPS256_MASK,
29265 IX86_BUILTIN_ADDPD128_MASK,
29266 IX86_BUILTIN_ADDPD256_MASK,
29267 IX86_BUILTIN_ADDPS128_MASK,
29268 IX86_BUILTIN_ADDPS256_MASK,
29269 IX86_BUILTIN_SUBPD128_MASK,
29270 IX86_BUILTIN_SUBPD256_MASK,
29271 IX86_BUILTIN_SUBPS128_MASK,
29272 IX86_BUILTIN_SUBPS256_MASK,
29273 IX86_BUILTIN_XORPD256_MASK,
29274 IX86_BUILTIN_XORPD128_MASK,
29275 IX86_BUILTIN_XORPS256_MASK,
29276 IX86_BUILTIN_XORPS128_MASK,
29277 IX86_BUILTIN_ORPD256_MASK,
29278 IX86_BUILTIN_ORPD128_MASK,
29279 IX86_BUILTIN_ORPS256_MASK,
29280 IX86_BUILTIN_ORPS128_MASK,
29281 IX86_BUILTIN_BROADCASTF32x2_256,
29282 IX86_BUILTIN_BROADCASTI32x2_256,
29283 IX86_BUILTIN_BROADCASTI32x2_128,
29284 IX86_BUILTIN_BROADCASTF64X2_256,
29285 IX86_BUILTIN_BROADCASTI64X2_256,
29286 IX86_BUILTIN_BROADCASTF32X4_256,
29287 IX86_BUILTIN_BROADCASTI32X4_256,
29288 IX86_BUILTIN_EXTRACTF32X4_256,
29289 IX86_BUILTIN_EXTRACTI32X4_256,
29290 IX86_BUILTIN_DBPSADBW256,
29291 IX86_BUILTIN_DBPSADBW128,
29292 IX86_BUILTIN_CVTTPD2QQ256,
29293 IX86_BUILTIN_CVTTPD2QQ128,
29294 IX86_BUILTIN_CVTTPD2UQQ256,
29295 IX86_BUILTIN_CVTTPD2UQQ128,
29296 IX86_BUILTIN_CVTPD2QQ256,
29297 IX86_BUILTIN_CVTPD2QQ128,
29298 IX86_BUILTIN_CVTPD2UQQ256,
29299 IX86_BUILTIN_CVTPD2UQQ128,
29300 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29301 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29302 IX86_BUILTIN_CVTTPS2QQ256,
29303 IX86_BUILTIN_CVTTPS2QQ128,
29304 IX86_BUILTIN_CVTTPS2UQQ256,
29305 IX86_BUILTIN_CVTTPS2UQQ128,
29306 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29307 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29308 IX86_BUILTIN_CVTTPS2UDQ256,
29309 IX86_BUILTIN_CVTTPS2UDQ128,
29310 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29311 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29312 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29313 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29314 IX86_BUILTIN_CVTPD2DQ256_MASK,
29315 IX86_BUILTIN_CVTPD2DQ128_MASK,
29316 IX86_BUILTIN_CVTDQ2PD256_MASK,
29317 IX86_BUILTIN_CVTDQ2PD128_MASK,
29318 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29319 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29320 IX86_BUILTIN_CVTDQ2PS256_MASK,
29321 IX86_BUILTIN_CVTDQ2PS128_MASK,
29322 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29323 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29324 IX86_BUILTIN_CVTPS2PD256_MASK,
29325 IX86_BUILTIN_CVTPS2PD128_MASK,
29326 IX86_BUILTIN_PBROADCASTB256_MASK,
29327 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29328 IX86_BUILTIN_PBROADCASTB128_MASK,
29329 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29330 IX86_BUILTIN_PBROADCASTW256_MASK,
29331 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29332 IX86_BUILTIN_PBROADCASTW128_MASK,
29333 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29334 IX86_BUILTIN_PBROADCASTD256_MASK,
29335 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29336 IX86_BUILTIN_PBROADCASTD128_MASK,
29337 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29338 IX86_BUILTIN_PBROADCASTQ256_MASK,
29339 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29340 IX86_BUILTIN_PBROADCASTQ128_MASK,
29341 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29342 IX86_BUILTIN_BROADCASTSS256,
29343 IX86_BUILTIN_BROADCASTSS128,
29344 IX86_BUILTIN_BROADCASTSD256,
29345 IX86_BUILTIN_EXTRACTF64X2_256,
29346 IX86_BUILTIN_EXTRACTI64X2_256,
29347 IX86_BUILTIN_INSERTF32X4_256,
29348 IX86_BUILTIN_INSERTI32X4_256,
29349 IX86_BUILTIN_PMOVSXBW256_MASK,
29350 IX86_BUILTIN_PMOVSXBW128_MASK,
29351 IX86_BUILTIN_PMOVSXBD256_MASK,
29352 IX86_BUILTIN_PMOVSXBD128_MASK,
29353 IX86_BUILTIN_PMOVSXBQ256_MASK,
29354 IX86_BUILTIN_PMOVSXBQ128_MASK,
29355 IX86_BUILTIN_PMOVSXWD256_MASK,
29356 IX86_BUILTIN_PMOVSXWD128_MASK,
29357 IX86_BUILTIN_PMOVSXWQ256_MASK,
29358 IX86_BUILTIN_PMOVSXWQ128_MASK,
29359 IX86_BUILTIN_PMOVSXDQ256_MASK,
29360 IX86_BUILTIN_PMOVSXDQ128_MASK,
29361 IX86_BUILTIN_PMOVZXBW256_MASK,
29362 IX86_BUILTIN_PMOVZXBW128_MASK,
29363 IX86_BUILTIN_PMOVZXBD256_MASK,
29364 IX86_BUILTIN_PMOVZXBD128_MASK,
29365 IX86_BUILTIN_PMOVZXBQ256_MASK,
29366 IX86_BUILTIN_PMOVZXBQ128_MASK,
29367 IX86_BUILTIN_PMOVZXWD256_MASK,
29368 IX86_BUILTIN_PMOVZXWD128_MASK,
29369 IX86_BUILTIN_PMOVZXWQ256_MASK,
29370 IX86_BUILTIN_PMOVZXWQ128_MASK,
29371 IX86_BUILTIN_PMOVZXDQ256_MASK,
29372 IX86_BUILTIN_PMOVZXDQ128_MASK,
29373 IX86_BUILTIN_REDUCEPD256_MASK,
29374 IX86_BUILTIN_REDUCEPD128_MASK,
29375 IX86_BUILTIN_REDUCEPS256_MASK,
29376 IX86_BUILTIN_REDUCEPS128_MASK,
29377 IX86_BUILTIN_REDUCESD_MASK,
29378 IX86_BUILTIN_REDUCESS_MASK,
29379 IX86_BUILTIN_VPERMVARHI256_MASK,
29380 IX86_BUILTIN_VPERMVARHI128_MASK,
29381 IX86_BUILTIN_VPERMT2VARHI256,
29382 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29383 IX86_BUILTIN_VPERMT2VARHI128,
29384 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29385 IX86_BUILTIN_VPERMI2VARHI256,
29386 IX86_BUILTIN_VPERMI2VARHI128,
29387 IX86_BUILTIN_RCP14PD256,
29388 IX86_BUILTIN_RCP14PD128,
29389 IX86_BUILTIN_RCP14PS256,
29390 IX86_BUILTIN_RCP14PS128,
29391 IX86_BUILTIN_RSQRT14PD256_MASK,
29392 IX86_BUILTIN_RSQRT14PD128_MASK,
29393 IX86_BUILTIN_RSQRT14PS256_MASK,
29394 IX86_BUILTIN_RSQRT14PS128_MASK,
29395 IX86_BUILTIN_SQRTPD256_MASK,
29396 IX86_BUILTIN_SQRTPD128_MASK,
29397 IX86_BUILTIN_SQRTPS256_MASK,
29398 IX86_BUILTIN_SQRTPS128_MASK,
29399 IX86_BUILTIN_PADDB128_MASK,
29400 IX86_BUILTIN_PADDW128_MASK,
29401 IX86_BUILTIN_PADDD128_MASK,
29402 IX86_BUILTIN_PADDQ128_MASK,
29403 IX86_BUILTIN_PSUBB128_MASK,
29404 IX86_BUILTIN_PSUBW128_MASK,
29405 IX86_BUILTIN_PSUBD128_MASK,
29406 IX86_BUILTIN_PSUBQ128_MASK,
29407 IX86_BUILTIN_PADDSB128_MASK,
29408 IX86_BUILTIN_PADDSW128_MASK,
29409 IX86_BUILTIN_PSUBSB128_MASK,
29410 IX86_BUILTIN_PSUBSW128_MASK,
29411 IX86_BUILTIN_PADDUSB128_MASK,
29412 IX86_BUILTIN_PADDUSW128_MASK,
29413 IX86_BUILTIN_PSUBUSB128_MASK,
29414 IX86_BUILTIN_PSUBUSW128_MASK,
29415 IX86_BUILTIN_PADDB256_MASK,
29416 IX86_BUILTIN_PADDW256_MASK,
29417 IX86_BUILTIN_PADDD256_MASK,
29418 IX86_BUILTIN_PADDQ256_MASK,
29419 IX86_BUILTIN_PADDSB256_MASK,
29420 IX86_BUILTIN_PADDSW256_MASK,
29421 IX86_BUILTIN_PADDUSB256_MASK,
29422 IX86_BUILTIN_PADDUSW256_MASK,
29423 IX86_BUILTIN_PSUBB256_MASK,
29424 IX86_BUILTIN_PSUBW256_MASK,
29425 IX86_BUILTIN_PSUBD256_MASK,
29426 IX86_BUILTIN_PSUBQ256_MASK,
29427 IX86_BUILTIN_PSUBSB256_MASK,
29428 IX86_BUILTIN_PSUBSW256_MASK,
29429 IX86_BUILTIN_PSUBUSB256_MASK,
29430 IX86_BUILTIN_PSUBUSW256_MASK,
29431 IX86_BUILTIN_SHUF_F64x2_256,
29432 IX86_BUILTIN_SHUF_I64x2_256,
29433 IX86_BUILTIN_SHUF_I32x4_256,
29434 IX86_BUILTIN_SHUF_F32x4_256,
29435 IX86_BUILTIN_PMOVWB128,
29436 IX86_BUILTIN_PMOVWB256,
29437 IX86_BUILTIN_PMOVSWB128,
29438 IX86_BUILTIN_PMOVSWB256,
29439 IX86_BUILTIN_PMOVUSWB128,
29440 IX86_BUILTIN_PMOVUSWB256,
29441 IX86_BUILTIN_PMOVDB128,
29442 IX86_BUILTIN_PMOVDB256,
29443 IX86_BUILTIN_PMOVSDB128,
29444 IX86_BUILTIN_PMOVSDB256,
29445 IX86_BUILTIN_PMOVUSDB128,
29446 IX86_BUILTIN_PMOVUSDB256,
29447 IX86_BUILTIN_PMOVDW128,
29448 IX86_BUILTIN_PMOVDW256,
29449 IX86_BUILTIN_PMOVSDW128,
29450 IX86_BUILTIN_PMOVSDW256,
29451 IX86_BUILTIN_PMOVUSDW128,
29452 IX86_BUILTIN_PMOVUSDW256,
29453 IX86_BUILTIN_PMOVQB128,
29454 IX86_BUILTIN_PMOVQB256,
29455 IX86_BUILTIN_PMOVSQB128,
29456 IX86_BUILTIN_PMOVSQB256,
29457 IX86_BUILTIN_PMOVUSQB128,
29458 IX86_BUILTIN_PMOVUSQB256,
29459 IX86_BUILTIN_PMOVQW128,
29460 IX86_BUILTIN_PMOVQW256,
29461 IX86_BUILTIN_PMOVSQW128,
29462 IX86_BUILTIN_PMOVSQW256,
29463 IX86_BUILTIN_PMOVUSQW128,
29464 IX86_BUILTIN_PMOVUSQW256,
29465 IX86_BUILTIN_PMOVQD128,
29466 IX86_BUILTIN_PMOVQD256,
29467 IX86_BUILTIN_PMOVSQD128,
29468 IX86_BUILTIN_PMOVSQD256,
29469 IX86_BUILTIN_PMOVUSQD128,
29470 IX86_BUILTIN_PMOVUSQD256,
29471 IX86_BUILTIN_RANGEPD256,
29472 IX86_BUILTIN_RANGEPD128,
29473 IX86_BUILTIN_RANGEPS256,
29474 IX86_BUILTIN_RANGEPS128,
29475 IX86_BUILTIN_GETEXPPS256,
29476 IX86_BUILTIN_GETEXPPD256,
29477 IX86_BUILTIN_GETEXPPS128,
29478 IX86_BUILTIN_GETEXPPD128,
29479 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29480 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29481 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29482 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29483 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29484 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29485 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29486 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29487 IX86_BUILTIN_PABSQ256,
29488 IX86_BUILTIN_PABSQ128,
29489 IX86_BUILTIN_PABSD256_MASK,
29490 IX86_BUILTIN_PABSD128_MASK,
29491 IX86_BUILTIN_PMULHRSW256_MASK,
29492 IX86_BUILTIN_PMULHRSW128_MASK,
29493 IX86_BUILTIN_PMULHUW128_MASK,
29494 IX86_BUILTIN_PMULHUW256_MASK,
29495 IX86_BUILTIN_PMULHW256_MASK,
29496 IX86_BUILTIN_PMULHW128_MASK,
29497 IX86_BUILTIN_PMULLW256_MASK,
29498 IX86_BUILTIN_PMULLW128_MASK,
29499 IX86_BUILTIN_PMULLQ256,
29500 IX86_BUILTIN_PMULLQ128,
29501 IX86_BUILTIN_ANDPD256_MASK,
29502 IX86_BUILTIN_ANDPD128_MASK,
29503 IX86_BUILTIN_ANDPS256_MASK,
29504 IX86_BUILTIN_ANDPS128_MASK,
29505 IX86_BUILTIN_ANDNPD256_MASK,
29506 IX86_BUILTIN_ANDNPD128_MASK,
29507 IX86_BUILTIN_ANDNPS256_MASK,
29508 IX86_BUILTIN_ANDNPS128_MASK,
29509 IX86_BUILTIN_PSLLWI128_MASK,
29510 IX86_BUILTIN_PSLLDI128_MASK,
29511 IX86_BUILTIN_PSLLQI128_MASK,
29512 IX86_BUILTIN_PSLLW128_MASK,
29513 IX86_BUILTIN_PSLLD128_MASK,
29514 IX86_BUILTIN_PSLLQ128_MASK,
29515 IX86_BUILTIN_PSLLWI256_MASK ,
29516 IX86_BUILTIN_PSLLW256_MASK,
29517 IX86_BUILTIN_PSLLDI256_MASK,
29518 IX86_BUILTIN_PSLLD256_MASK,
29519 IX86_BUILTIN_PSLLQI256_MASK,
29520 IX86_BUILTIN_PSLLQ256_MASK,
29521 IX86_BUILTIN_PSRADI128_MASK,
29522 IX86_BUILTIN_PSRAD128_MASK,
29523 IX86_BUILTIN_PSRADI256_MASK,
29524 IX86_BUILTIN_PSRAD256_MASK,
29525 IX86_BUILTIN_PSRAQI128_MASK,
29526 IX86_BUILTIN_PSRAQ128_MASK,
29527 IX86_BUILTIN_PSRAQI256_MASK,
29528 IX86_BUILTIN_PSRAQ256_MASK,
29529 IX86_BUILTIN_PANDD256,
29530 IX86_BUILTIN_PANDD128,
29531 IX86_BUILTIN_PSRLDI128_MASK,
29532 IX86_BUILTIN_PSRLD128_MASK,
29533 IX86_BUILTIN_PSRLDI256_MASK,
29534 IX86_BUILTIN_PSRLD256_MASK,
29535 IX86_BUILTIN_PSRLQI128_MASK,
29536 IX86_BUILTIN_PSRLQ128_MASK,
29537 IX86_BUILTIN_PSRLQI256_MASK,
29538 IX86_BUILTIN_PSRLQ256_MASK,
29539 IX86_BUILTIN_PANDQ256,
29540 IX86_BUILTIN_PANDQ128,
29541 IX86_BUILTIN_PANDND256,
29542 IX86_BUILTIN_PANDND128,
29543 IX86_BUILTIN_PANDNQ256,
29544 IX86_BUILTIN_PANDNQ128,
29545 IX86_BUILTIN_PORD256,
29546 IX86_BUILTIN_PORD128,
29547 IX86_BUILTIN_PORQ256,
29548 IX86_BUILTIN_PORQ128,
29549 IX86_BUILTIN_PXORD256,
29550 IX86_BUILTIN_PXORD128,
29551 IX86_BUILTIN_PXORQ256,
29552 IX86_BUILTIN_PXORQ128,
29553 IX86_BUILTIN_PACKSSWB256_MASK,
29554 IX86_BUILTIN_PACKSSWB128_MASK,
29555 IX86_BUILTIN_PACKUSWB256_MASK,
29556 IX86_BUILTIN_PACKUSWB128_MASK,
29557 IX86_BUILTIN_RNDSCALEPS256,
29558 IX86_BUILTIN_RNDSCALEPD256,
29559 IX86_BUILTIN_RNDSCALEPS128,
29560 IX86_BUILTIN_RNDSCALEPD128,
29561 IX86_BUILTIN_VTERNLOGQ256_MASK,
29562 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29563 IX86_BUILTIN_VTERNLOGD256_MASK,
29564 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29565 IX86_BUILTIN_VTERNLOGQ128_MASK,
29566 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29567 IX86_BUILTIN_VTERNLOGD128_MASK,
29568 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29569 IX86_BUILTIN_SCALEFPD256,
29570 IX86_BUILTIN_SCALEFPS256,
29571 IX86_BUILTIN_SCALEFPD128,
29572 IX86_BUILTIN_SCALEFPS128,
29573 IX86_BUILTIN_VFMADDPD256_MASK,
29574 IX86_BUILTIN_VFMADDPD256_MASK3,
29575 IX86_BUILTIN_VFMADDPD256_MASKZ,
29576 IX86_BUILTIN_VFMADDPD128_MASK,
29577 IX86_BUILTIN_VFMADDPD128_MASK3,
29578 IX86_BUILTIN_VFMADDPD128_MASKZ,
29579 IX86_BUILTIN_VFMADDPS256_MASK,
29580 IX86_BUILTIN_VFMADDPS256_MASK3,
29581 IX86_BUILTIN_VFMADDPS256_MASKZ,
29582 IX86_BUILTIN_VFMADDPS128_MASK,
29583 IX86_BUILTIN_VFMADDPS128_MASK3,
29584 IX86_BUILTIN_VFMADDPS128_MASKZ,
29585 IX86_BUILTIN_VFMSUBPD256_MASK3,
29586 IX86_BUILTIN_VFMSUBPD128_MASK3,
29587 IX86_BUILTIN_VFMSUBPS256_MASK3,
29588 IX86_BUILTIN_VFMSUBPS128_MASK3,
29589 IX86_BUILTIN_VFNMADDPD256_MASK,
29590 IX86_BUILTIN_VFNMADDPD128_MASK,
29591 IX86_BUILTIN_VFNMADDPS256_MASK,
29592 IX86_BUILTIN_VFNMADDPS128_MASK,
29593 IX86_BUILTIN_VFNMSUBPD256_MASK,
29594 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29595 IX86_BUILTIN_VFNMSUBPD128_MASK,
29596 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29597 IX86_BUILTIN_VFNMSUBPS256_MASK,
29598 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29599 IX86_BUILTIN_VFNMSUBPS128_MASK,
29600 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29601 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29602 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29603 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29604 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29605 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29606 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29607 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29608 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29609 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29610 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29611 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29612 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29613 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29614 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29615 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29616 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29617 IX86_BUILTIN_INSERTF64X2_256,
29618 IX86_BUILTIN_INSERTI64X2_256,
29619 IX86_BUILTIN_PSRAVV16HI,
29620 IX86_BUILTIN_PSRAVV8HI,
29621 IX86_BUILTIN_PMADDUBSW256_MASK,
29622 IX86_BUILTIN_PMADDUBSW128_MASK,
29623 IX86_BUILTIN_PMADDWD256_MASK,
29624 IX86_BUILTIN_PMADDWD128_MASK,
29625 IX86_BUILTIN_PSRLVV16HI,
29626 IX86_BUILTIN_PSRLVV8HI,
29627 IX86_BUILTIN_CVTPS2DQ256_MASK,
29628 IX86_BUILTIN_CVTPS2DQ128_MASK,
29629 IX86_BUILTIN_CVTPS2UDQ256,
29630 IX86_BUILTIN_CVTPS2UDQ128,
29631 IX86_BUILTIN_CVTPS2QQ256,
29632 IX86_BUILTIN_CVTPS2QQ128,
29633 IX86_BUILTIN_CVTPS2UQQ256,
29634 IX86_BUILTIN_CVTPS2UQQ128,
29635 IX86_BUILTIN_GETMANTPS256,
29636 IX86_BUILTIN_GETMANTPS128,
29637 IX86_BUILTIN_GETMANTPD256,
29638 IX86_BUILTIN_GETMANTPD128,
29639 IX86_BUILTIN_MOVDDUP256_MASK,
29640 IX86_BUILTIN_MOVDDUP128_MASK,
29641 IX86_BUILTIN_MOVSHDUP256_MASK,
29642 IX86_BUILTIN_MOVSHDUP128_MASK,
29643 IX86_BUILTIN_MOVSLDUP256_MASK,
29644 IX86_BUILTIN_MOVSLDUP128_MASK,
29645 IX86_BUILTIN_CVTQQ2PS256,
29646 IX86_BUILTIN_CVTQQ2PS128,
29647 IX86_BUILTIN_CVTUQQ2PS256,
29648 IX86_BUILTIN_CVTUQQ2PS128,
29649 IX86_BUILTIN_CVTQQ2PD256,
29650 IX86_BUILTIN_CVTQQ2PD128,
29651 IX86_BUILTIN_CVTUQQ2PD256,
29652 IX86_BUILTIN_CVTUQQ2PD128,
29653 IX86_BUILTIN_VPERMT2VARQ256,
29654 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29655 IX86_BUILTIN_VPERMT2VARD256,
29656 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29657 IX86_BUILTIN_VPERMI2VARQ256,
29658 IX86_BUILTIN_VPERMI2VARD256,
29659 IX86_BUILTIN_VPERMT2VARPD256,
29660 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29661 IX86_BUILTIN_VPERMT2VARPS256,
29662 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29663 IX86_BUILTIN_VPERMI2VARPD256,
29664 IX86_BUILTIN_VPERMI2VARPS256,
29665 IX86_BUILTIN_VPERMT2VARQ128,
29666 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29667 IX86_BUILTIN_VPERMT2VARD128,
29668 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29669 IX86_BUILTIN_VPERMI2VARQ128,
29670 IX86_BUILTIN_VPERMI2VARD128,
29671 IX86_BUILTIN_VPERMT2VARPD128,
29672 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29673 IX86_BUILTIN_VPERMT2VARPS128,
29674 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29675 IX86_BUILTIN_VPERMI2VARPD128,
29676 IX86_BUILTIN_VPERMI2VARPS128,
29677 IX86_BUILTIN_PSHUFB256_MASK,
29678 IX86_BUILTIN_PSHUFB128_MASK,
29679 IX86_BUILTIN_PSHUFHW256_MASK,
29680 IX86_BUILTIN_PSHUFHW128_MASK,
29681 IX86_BUILTIN_PSHUFLW256_MASK,
29682 IX86_BUILTIN_PSHUFLW128_MASK,
29683 IX86_BUILTIN_PSHUFD256_MASK,
29684 IX86_BUILTIN_PSHUFD128_MASK,
29685 IX86_BUILTIN_SHUFPD256_MASK,
29686 IX86_BUILTIN_SHUFPD128_MASK,
29687 IX86_BUILTIN_SHUFPS256_MASK,
29688 IX86_BUILTIN_SHUFPS128_MASK,
29689 IX86_BUILTIN_PROLVQ256,
29690 IX86_BUILTIN_PROLVQ128,
29691 IX86_BUILTIN_PROLQ256,
29692 IX86_BUILTIN_PROLQ128,
29693 IX86_BUILTIN_PRORVQ256,
29694 IX86_BUILTIN_PRORVQ128,
29695 IX86_BUILTIN_PRORQ256,
29696 IX86_BUILTIN_PRORQ128,
29697 IX86_BUILTIN_PSRAVQ128,
29698 IX86_BUILTIN_PSRAVQ256,
29699 IX86_BUILTIN_PSLLVV4DI_MASK,
29700 IX86_BUILTIN_PSLLVV2DI_MASK,
29701 IX86_BUILTIN_PSLLVV8SI_MASK,
29702 IX86_BUILTIN_PSLLVV4SI_MASK,
29703 IX86_BUILTIN_PSRAVV8SI_MASK,
29704 IX86_BUILTIN_PSRAVV4SI_MASK,
29705 IX86_BUILTIN_PSRLVV4DI_MASK,
29706 IX86_BUILTIN_PSRLVV2DI_MASK,
29707 IX86_BUILTIN_PSRLVV8SI_MASK,
29708 IX86_BUILTIN_PSRLVV4SI_MASK,
29709 IX86_BUILTIN_PSRAWI256_MASK,
29710 IX86_BUILTIN_PSRAW256_MASK,
29711 IX86_BUILTIN_PSRAWI128_MASK,
29712 IX86_BUILTIN_PSRAW128_MASK,
29713 IX86_BUILTIN_PSRLWI256_MASK,
29714 IX86_BUILTIN_PSRLW256_MASK,
29715 IX86_BUILTIN_PSRLWI128_MASK,
29716 IX86_BUILTIN_PSRLW128_MASK,
29717 IX86_BUILTIN_PRORVD256,
29718 IX86_BUILTIN_PROLVD256,
29719 IX86_BUILTIN_PRORD256,
29720 IX86_BUILTIN_PROLD256,
29721 IX86_BUILTIN_PRORVD128,
29722 IX86_BUILTIN_PROLVD128,
29723 IX86_BUILTIN_PRORD128,
29724 IX86_BUILTIN_PROLD128,
29725 IX86_BUILTIN_FPCLASSPD256,
29726 IX86_BUILTIN_FPCLASSPD128,
29727 IX86_BUILTIN_FPCLASSSD,
29728 IX86_BUILTIN_FPCLASSPS256,
29729 IX86_BUILTIN_FPCLASSPS128,
29730 IX86_BUILTIN_FPCLASSSS,
29731 IX86_BUILTIN_CVTB2MASK128,
29732 IX86_BUILTIN_CVTB2MASK256,
29733 IX86_BUILTIN_CVTW2MASK128,
29734 IX86_BUILTIN_CVTW2MASK256,
29735 IX86_BUILTIN_CVTD2MASK128,
29736 IX86_BUILTIN_CVTD2MASK256,
29737 IX86_BUILTIN_CVTQ2MASK128,
29738 IX86_BUILTIN_CVTQ2MASK256,
29739 IX86_BUILTIN_CVTMASK2B128,
29740 IX86_BUILTIN_CVTMASK2B256,
29741 IX86_BUILTIN_CVTMASK2W128,
29742 IX86_BUILTIN_CVTMASK2W256,
29743 IX86_BUILTIN_CVTMASK2D128,
29744 IX86_BUILTIN_CVTMASK2D256,
29745 IX86_BUILTIN_CVTMASK2Q128,
29746 IX86_BUILTIN_CVTMASK2Q256,
29747 IX86_BUILTIN_PCMPEQB128_MASK,
29748 IX86_BUILTIN_PCMPEQB256_MASK,
29749 IX86_BUILTIN_PCMPEQW128_MASK,
29750 IX86_BUILTIN_PCMPEQW256_MASK,
29751 IX86_BUILTIN_PCMPEQD128_MASK,
29752 IX86_BUILTIN_PCMPEQD256_MASK,
29753 IX86_BUILTIN_PCMPEQQ128_MASK,
29754 IX86_BUILTIN_PCMPEQQ256_MASK,
29755 IX86_BUILTIN_PCMPGTB128_MASK,
29756 IX86_BUILTIN_PCMPGTB256_MASK,
29757 IX86_BUILTIN_PCMPGTW128_MASK,
29758 IX86_BUILTIN_PCMPGTW256_MASK,
29759 IX86_BUILTIN_PCMPGTD128_MASK,
29760 IX86_BUILTIN_PCMPGTD256_MASK,
29761 IX86_BUILTIN_PCMPGTQ128_MASK,
29762 IX86_BUILTIN_PCMPGTQ256_MASK,
29763 IX86_BUILTIN_PTESTMB128,
29764 IX86_BUILTIN_PTESTMB256,
29765 IX86_BUILTIN_PTESTMW128,
29766 IX86_BUILTIN_PTESTMW256,
29767 IX86_BUILTIN_PTESTMD128,
29768 IX86_BUILTIN_PTESTMD256,
29769 IX86_BUILTIN_PTESTMQ128,
29770 IX86_BUILTIN_PTESTMQ256,
29771 IX86_BUILTIN_PTESTNMB128,
29772 IX86_BUILTIN_PTESTNMB256,
29773 IX86_BUILTIN_PTESTNMW128,
29774 IX86_BUILTIN_PTESTNMW256,
29775 IX86_BUILTIN_PTESTNMD128,
29776 IX86_BUILTIN_PTESTNMD256,
29777 IX86_BUILTIN_PTESTNMQ128,
29778 IX86_BUILTIN_PTESTNMQ256,
29779 IX86_BUILTIN_PBROADCASTMB128,
29780 IX86_BUILTIN_PBROADCASTMB256,
29781 IX86_BUILTIN_PBROADCASTMW128,
29782 IX86_BUILTIN_PBROADCASTMW256,
29783 IX86_BUILTIN_COMPRESSPD256,
29784 IX86_BUILTIN_COMPRESSPD128,
29785 IX86_BUILTIN_COMPRESSPS256,
29786 IX86_BUILTIN_COMPRESSPS128,
29787 IX86_BUILTIN_PCOMPRESSQ256,
29788 IX86_BUILTIN_PCOMPRESSQ128,
29789 IX86_BUILTIN_PCOMPRESSD256,
29790 IX86_BUILTIN_PCOMPRESSD128,
29791 IX86_BUILTIN_EXPANDPD256,
29792 IX86_BUILTIN_EXPANDPD128,
29793 IX86_BUILTIN_EXPANDPS256,
29794 IX86_BUILTIN_EXPANDPS128,
29795 IX86_BUILTIN_PEXPANDQ256,
29796 IX86_BUILTIN_PEXPANDQ128,
29797 IX86_BUILTIN_PEXPANDD256,
29798 IX86_BUILTIN_PEXPANDD128,
29799 IX86_BUILTIN_EXPANDPD256Z,
29800 IX86_BUILTIN_EXPANDPD128Z,
29801 IX86_BUILTIN_EXPANDPS256Z,
29802 IX86_BUILTIN_EXPANDPS128Z,
29803 IX86_BUILTIN_PEXPANDQ256Z,
29804 IX86_BUILTIN_PEXPANDQ128Z,
29805 IX86_BUILTIN_PEXPANDD256Z,
29806 IX86_BUILTIN_PEXPANDD128Z,
29807 IX86_BUILTIN_PMAXSD256_MASK,
29808 IX86_BUILTIN_PMINSD256_MASK,
29809 IX86_BUILTIN_PMAXUD256_MASK,
29810 IX86_BUILTIN_PMINUD256_MASK,
29811 IX86_BUILTIN_PMAXSD128_MASK,
29812 IX86_BUILTIN_PMINSD128_MASK,
29813 IX86_BUILTIN_PMAXUD128_MASK,
29814 IX86_BUILTIN_PMINUD128_MASK,
29815 IX86_BUILTIN_PMAXSQ256_MASK,
29816 IX86_BUILTIN_PMINSQ256_MASK,
29817 IX86_BUILTIN_PMAXUQ256_MASK,
29818 IX86_BUILTIN_PMINUQ256_MASK,
29819 IX86_BUILTIN_PMAXSQ128_MASK,
29820 IX86_BUILTIN_PMINSQ128_MASK,
29821 IX86_BUILTIN_PMAXUQ128_MASK,
29822 IX86_BUILTIN_PMINUQ128_MASK,
29823 IX86_BUILTIN_PMINSB256_MASK,
29824 IX86_BUILTIN_PMINUB256_MASK,
29825 IX86_BUILTIN_PMAXSB256_MASK,
29826 IX86_BUILTIN_PMAXUB256_MASK,
29827 IX86_BUILTIN_PMINSB128_MASK,
29828 IX86_BUILTIN_PMINUB128_MASK,
29829 IX86_BUILTIN_PMAXSB128_MASK,
29830 IX86_BUILTIN_PMAXUB128_MASK,
29831 IX86_BUILTIN_PMINSW256_MASK,
29832 IX86_BUILTIN_PMINUW256_MASK,
29833 IX86_BUILTIN_PMAXSW256_MASK,
29834 IX86_BUILTIN_PMAXUW256_MASK,
29835 IX86_BUILTIN_PMINSW128_MASK,
29836 IX86_BUILTIN_PMINUW128_MASK,
29837 IX86_BUILTIN_PMAXSW128_MASK,
29838 IX86_BUILTIN_PMAXUW128_MASK,
29839 IX86_BUILTIN_VPCONFLICTQ256,
29840 IX86_BUILTIN_VPCONFLICTD256,
29841 IX86_BUILTIN_VPCLZCNTQ256,
29842 IX86_BUILTIN_VPCLZCNTD256,
29843 IX86_BUILTIN_UNPCKHPD256_MASK,
29844 IX86_BUILTIN_UNPCKHPD128_MASK,
29845 IX86_BUILTIN_UNPCKHPS256_MASK,
29846 IX86_BUILTIN_UNPCKHPS128_MASK,
29847 IX86_BUILTIN_UNPCKLPD256_MASK,
29848 IX86_BUILTIN_UNPCKLPD128_MASK,
29849 IX86_BUILTIN_UNPCKLPS256_MASK,
29850 IX86_BUILTIN_VPCONFLICTQ128,
29851 IX86_BUILTIN_VPCONFLICTD128,
29852 IX86_BUILTIN_VPCLZCNTQ128,
29853 IX86_BUILTIN_VPCLZCNTD128,
29854 IX86_BUILTIN_UNPCKLPS128_MASK,
29855 IX86_BUILTIN_ALIGND256,
29856 IX86_BUILTIN_ALIGNQ256,
29857 IX86_BUILTIN_ALIGND128,
29858 IX86_BUILTIN_ALIGNQ128,
29859 IX86_BUILTIN_CVTPS2PH256_MASK,
29860 IX86_BUILTIN_CVTPS2PH_MASK,
29861 IX86_BUILTIN_CVTPH2PS_MASK,
29862 IX86_BUILTIN_CVTPH2PS256_MASK,
29863 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29864 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29865 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29866 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29867 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29868 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29869 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29870 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29871 IX86_BUILTIN_PUNPCKHBW128_MASK,
29872 IX86_BUILTIN_PUNPCKHBW256_MASK,
29873 IX86_BUILTIN_PUNPCKHWD128_MASK,
29874 IX86_BUILTIN_PUNPCKHWD256_MASK,
29875 IX86_BUILTIN_PUNPCKLBW128_MASK,
29876 IX86_BUILTIN_PUNPCKLBW256_MASK,
29877 IX86_BUILTIN_PUNPCKLWD128_MASK,
29878 IX86_BUILTIN_PUNPCKLWD256_MASK,
29879 IX86_BUILTIN_PSLLVV16HI,
29880 IX86_BUILTIN_PSLLVV8HI,
29881 IX86_BUILTIN_PACKSSDW256_MASK,
29882 IX86_BUILTIN_PACKSSDW128_MASK,
29883 IX86_BUILTIN_PACKUSDW256_MASK,
29884 IX86_BUILTIN_PACKUSDW128_MASK,
29885 IX86_BUILTIN_PAVGB256_MASK,
29886 IX86_BUILTIN_PAVGW256_MASK,
29887 IX86_BUILTIN_PAVGB128_MASK,
29888 IX86_BUILTIN_PAVGW128_MASK,
29889 IX86_BUILTIN_VPERMVARSF256_MASK,
29890 IX86_BUILTIN_VPERMVARDF256_MASK,
29891 IX86_BUILTIN_VPERMDF256_MASK,
29892 IX86_BUILTIN_PABSB256_MASK,
29893 IX86_BUILTIN_PABSB128_MASK,
29894 IX86_BUILTIN_PABSW256_MASK,
29895 IX86_BUILTIN_PABSW128_MASK,
29896 IX86_BUILTIN_VPERMILVARPD_MASK,
29897 IX86_BUILTIN_VPERMILVARPS_MASK,
29898 IX86_BUILTIN_VPERMILVARPD256_MASK,
29899 IX86_BUILTIN_VPERMILVARPS256_MASK,
29900 IX86_BUILTIN_VPERMILPD_MASK,
29901 IX86_BUILTIN_VPERMILPS_MASK,
29902 IX86_BUILTIN_VPERMILPD256_MASK,
29903 IX86_BUILTIN_VPERMILPS256_MASK,
29904 IX86_BUILTIN_BLENDMQ256,
29905 IX86_BUILTIN_BLENDMD256,
29906 IX86_BUILTIN_BLENDMPD256,
29907 IX86_BUILTIN_BLENDMPS256,
29908 IX86_BUILTIN_BLENDMQ128,
29909 IX86_BUILTIN_BLENDMD128,
29910 IX86_BUILTIN_BLENDMPD128,
29911 IX86_BUILTIN_BLENDMPS128,
29912 IX86_BUILTIN_BLENDMW256,
29913 IX86_BUILTIN_BLENDMB256,
29914 IX86_BUILTIN_BLENDMW128,
29915 IX86_BUILTIN_BLENDMB128,
29916 IX86_BUILTIN_PMULLD256_MASK,
29917 IX86_BUILTIN_PMULLD128_MASK,
29918 IX86_BUILTIN_PMULUDQ256_MASK,
29919 IX86_BUILTIN_PMULDQ256_MASK,
29920 IX86_BUILTIN_PMULDQ128_MASK,
29921 IX86_BUILTIN_PMULUDQ128_MASK,
29922 IX86_BUILTIN_CVTPD2PS256_MASK,
29923 IX86_BUILTIN_CVTPD2PS_MASK,
29924 IX86_BUILTIN_VPERMVARSI256_MASK,
29925 IX86_BUILTIN_VPERMVARDI256_MASK,
29926 IX86_BUILTIN_VPERMDI256_MASK,
29927 IX86_BUILTIN_CMPQ256,
29928 IX86_BUILTIN_CMPD256,
29929 IX86_BUILTIN_UCMPQ256,
29930 IX86_BUILTIN_UCMPD256,
29931 IX86_BUILTIN_CMPB256,
29932 IX86_BUILTIN_CMPW256,
29933 IX86_BUILTIN_UCMPB256,
29934 IX86_BUILTIN_UCMPW256,
29935 IX86_BUILTIN_CMPPD256_MASK,
29936 IX86_BUILTIN_CMPPS256_MASK,
29937 IX86_BUILTIN_CMPQ128,
29938 IX86_BUILTIN_CMPD128,
29939 IX86_BUILTIN_UCMPQ128,
29940 IX86_BUILTIN_UCMPD128,
29941 IX86_BUILTIN_CMPB128,
29942 IX86_BUILTIN_CMPW128,
29943 IX86_BUILTIN_UCMPB128,
29944 IX86_BUILTIN_UCMPW128,
29945 IX86_BUILTIN_CMPPD128_MASK,
29946 IX86_BUILTIN_CMPPS128_MASK,
29947
29948 IX86_BUILTIN_GATHER3SIV8SF,
29949 IX86_BUILTIN_GATHER3SIV4SF,
29950 IX86_BUILTIN_GATHER3SIV4DF,
29951 IX86_BUILTIN_GATHER3SIV2DF,
29952 IX86_BUILTIN_GATHER3DIV8SF,
29953 IX86_BUILTIN_GATHER3DIV4SF,
29954 IX86_BUILTIN_GATHER3DIV4DF,
29955 IX86_BUILTIN_GATHER3DIV2DF,
29956 IX86_BUILTIN_GATHER3SIV8SI,
29957 IX86_BUILTIN_GATHER3SIV4SI,
29958 IX86_BUILTIN_GATHER3SIV4DI,
29959 IX86_BUILTIN_GATHER3SIV2DI,
29960 IX86_BUILTIN_GATHER3DIV8SI,
29961 IX86_BUILTIN_GATHER3DIV4SI,
29962 IX86_BUILTIN_GATHER3DIV4DI,
29963 IX86_BUILTIN_GATHER3DIV2DI,
29964 IX86_BUILTIN_SCATTERSIV8SF,
29965 IX86_BUILTIN_SCATTERSIV4SF,
29966 IX86_BUILTIN_SCATTERSIV4DF,
29967 IX86_BUILTIN_SCATTERSIV2DF,
29968 IX86_BUILTIN_SCATTERDIV8SF,
29969 IX86_BUILTIN_SCATTERDIV4SF,
29970 IX86_BUILTIN_SCATTERDIV4DF,
29971 IX86_BUILTIN_SCATTERDIV2DF,
29972 IX86_BUILTIN_SCATTERSIV8SI,
29973 IX86_BUILTIN_SCATTERSIV4SI,
29974 IX86_BUILTIN_SCATTERSIV4DI,
29975 IX86_BUILTIN_SCATTERSIV2DI,
29976 IX86_BUILTIN_SCATTERDIV8SI,
29977 IX86_BUILTIN_SCATTERDIV4SI,
29978 IX86_BUILTIN_SCATTERDIV4DI,
29979 IX86_BUILTIN_SCATTERDIV2DI,
29980
29981 /* AVX512DQ. */
29982 IX86_BUILTIN_RANGESD128,
29983 IX86_BUILTIN_RANGESS128,
29984 IX86_BUILTIN_KUNPCKWD,
29985 IX86_BUILTIN_KUNPCKDQ,
29986 IX86_BUILTIN_BROADCASTF32x2_512,
29987 IX86_BUILTIN_BROADCASTI32x2_512,
29988 IX86_BUILTIN_BROADCASTF64X2_512,
29989 IX86_BUILTIN_BROADCASTI64X2_512,
29990 IX86_BUILTIN_BROADCASTF32X8_512,
29991 IX86_BUILTIN_BROADCASTI32X8_512,
29992 IX86_BUILTIN_EXTRACTF64X2_512,
29993 IX86_BUILTIN_EXTRACTF32X8,
29994 IX86_BUILTIN_EXTRACTI64X2_512,
29995 IX86_BUILTIN_EXTRACTI32X8,
29996 IX86_BUILTIN_REDUCEPD512_MASK,
29997 IX86_BUILTIN_REDUCEPS512_MASK,
29998 IX86_BUILTIN_PMULLQ512,
29999 IX86_BUILTIN_XORPD512,
30000 IX86_BUILTIN_XORPS512,
30001 IX86_BUILTIN_ORPD512,
30002 IX86_BUILTIN_ORPS512,
30003 IX86_BUILTIN_ANDPD512,
30004 IX86_BUILTIN_ANDPS512,
30005 IX86_BUILTIN_ANDNPD512,
30006 IX86_BUILTIN_ANDNPS512,
30007 IX86_BUILTIN_INSERTF32X8,
30008 IX86_BUILTIN_INSERTI32X8,
30009 IX86_BUILTIN_INSERTF64X2_512,
30010 IX86_BUILTIN_INSERTI64X2_512,
30011 IX86_BUILTIN_FPCLASSPD512,
30012 IX86_BUILTIN_FPCLASSPS512,
30013 IX86_BUILTIN_CVTD2MASK512,
30014 IX86_BUILTIN_CVTQ2MASK512,
30015 IX86_BUILTIN_CVTMASK2D512,
30016 IX86_BUILTIN_CVTMASK2Q512,
30017 IX86_BUILTIN_CVTPD2QQ512,
30018 IX86_BUILTIN_CVTPS2QQ512,
30019 IX86_BUILTIN_CVTPD2UQQ512,
30020 IX86_BUILTIN_CVTPS2UQQ512,
30021 IX86_BUILTIN_CVTQQ2PS512,
30022 IX86_BUILTIN_CVTUQQ2PS512,
30023 IX86_BUILTIN_CVTQQ2PD512,
30024 IX86_BUILTIN_CVTUQQ2PD512,
30025 IX86_BUILTIN_CVTTPS2QQ512,
30026 IX86_BUILTIN_CVTTPS2UQQ512,
30027 IX86_BUILTIN_CVTTPD2QQ512,
30028 IX86_BUILTIN_CVTTPD2UQQ512,
30029 IX86_BUILTIN_RANGEPS512,
30030 IX86_BUILTIN_RANGEPD512,
30031
30032 /* AVX512BW. */
30033 IX86_BUILTIN_PACKUSDW512,
30034 IX86_BUILTIN_PACKSSDW512,
30035 IX86_BUILTIN_LOADDQUHI512_MASK,
30036 IX86_BUILTIN_LOADDQUQI512_MASK,
30037 IX86_BUILTIN_PSLLDQ512,
30038 IX86_BUILTIN_PSRLDQ512,
30039 IX86_BUILTIN_STOREDQUHI512_MASK,
30040 IX86_BUILTIN_STOREDQUQI512_MASK,
30041 IX86_BUILTIN_PALIGNR512,
30042 IX86_BUILTIN_PALIGNR512_MASK,
30043 IX86_BUILTIN_MOVDQUHI512_MASK,
30044 IX86_BUILTIN_MOVDQUQI512_MASK,
30045 IX86_BUILTIN_PSADBW512,
30046 IX86_BUILTIN_DBPSADBW512,
30047 IX86_BUILTIN_PBROADCASTB512,
30048 IX86_BUILTIN_PBROADCASTB512_GPR,
30049 IX86_BUILTIN_PBROADCASTW512,
30050 IX86_BUILTIN_PBROADCASTW512_GPR,
30051 IX86_BUILTIN_PMOVSXBW512_MASK,
30052 IX86_BUILTIN_PMOVZXBW512_MASK,
30053 IX86_BUILTIN_VPERMVARHI512_MASK,
30054 IX86_BUILTIN_VPERMT2VARHI512,
30055 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30056 IX86_BUILTIN_VPERMI2VARHI512,
30057 IX86_BUILTIN_PAVGB512,
30058 IX86_BUILTIN_PAVGW512,
30059 IX86_BUILTIN_PADDB512,
30060 IX86_BUILTIN_PSUBB512,
30061 IX86_BUILTIN_PSUBSB512,
30062 IX86_BUILTIN_PADDSB512,
30063 IX86_BUILTIN_PSUBUSB512,
30064 IX86_BUILTIN_PADDUSB512,
30065 IX86_BUILTIN_PSUBW512,
30066 IX86_BUILTIN_PADDW512,
30067 IX86_BUILTIN_PSUBSW512,
30068 IX86_BUILTIN_PADDSW512,
30069 IX86_BUILTIN_PSUBUSW512,
30070 IX86_BUILTIN_PADDUSW512,
30071 IX86_BUILTIN_PMAXUW512,
30072 IX86_BUILTIN_PMAXSW512,
30073 IX86_BUILTIN_PMINUW512,
30074 IX86_BUILTIN_PMINSW512,
30075 IX86_BUILTIN_PMAXUB512,
30076 IX86_BUILTIN_PMAXSB512,
30077 IX86_BUILTIN_PMINUB512,
30078 IX86_BUILTIN_PMINSB512,
30079 IX86_BUILTIN_PMOVWB512,
30080 IX86_BUILTIN_PMOVSWB512,
30081 IX86_BUILTIN_PMOVUSWB512,
30082 IX86_BUILTIN_PMULHRSW512_MASK,
30083 IX86_BUILTIN_PMULHUW512_MASK,
30084 IX86_BUILTIN_PMULHW512_MASK,
30085 IX86_BUILTIN_PMULLW512_MASK,
30086 IX86_BUILTIN_PSLLWI512_MASK,
30087 IX86_BUILTIN_PSLLW512_MASK,
30088 IX86_BUILTIN_PACKSSWB512,
30089 IX86_BUILTIN_PACKUSWB512,
30090 IX86_BUILTIN_PSRAVV32HI,
30091 IX86_BUILTIN_PMADDUBSW512_MASK,
30092 IX86_BUILTIN_PMADDWD512_MASK,
30093 IX86_BUILTIN_PSRLVV32HI,
30094 IX86_BUILTIN_PUNPCKHBW512,
30095 IX86_BUILTIN_PUNPCKHWD512,
30096 IX86_BUILTIN_PUNPCKLBW512,
30097 IX86_BUILTIN_PUNPCKLWD512,
30098 IX86_BUILTIN_PSHUFB512,
30099 IX86_BUILTIN_PSHUFHW512,
30100 IX86_BUILTIN_PSHUFLW512,
30101 IX86_BUILTIN_PSRAWI512,
30102 IX86_BUILTIN_PSRAW512,
30103 IX86_BUILTIN_PSRLWI512,
30104 IX86_BUILTIN_PSRLW512,
30105 IX86_BUILTIN_CVTB2MASK512,
30106 IX86_BUILTIN_CVTW2MASK512,
30107 IX86_BUILTIN_CVTMASK2B512,
30108 IX86_BUILTIN_CVTMASK2W512,
30109 IX86_BUILTIN_PCMPEQB512_MASK,
30110 IX86_BUILTIN_PCMPEQW512_MASK,
30111 IX86_BUILTIN_PCMPGTB512_MASK,
30112 IX86_BUILTIN_PCMPGTW512_MASK,
30113 IX86_BUILTIN_PTESTMB512,
30114 IX86_BUILTIN_PTESTMW512,
30115 IX86_BUILTIN_PTESTNMB512,
30116 IX86_BUILTIN_PTESTNMW512,
30117 IX86_BUILTIN_PSLLVV32HI,
30118 IX86_BUILTIN_PABSB512,
30119 IX86_BUILTIN_PABSW512,
30120 IX86_BUILTIN_BLENDMW512,
30121 IX86_BUILTIN_BLENDMB512,
30122 IX86_BUILTIN_CMPB512,
30123 IX86_BUILTIN_CMPW512,
30124 IX86_BUILTIN_UCMPB512,
30125 IX86_BUILTIN_UCMPW512,
30126
30127 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30128 where all operands are 32-byte or 64-byte wide respectively. */
30129 IX86_BUILTIN_GATHERALTSIV4DF,
30130 IX86_BUILTIN_GATHERALTDIV8SF,
30131 IX86_BUILTIN_GATHERALTSIV4DI,
30132 IX86_BUILTIN_GATHERALTDIV8SI,
30133 IX86_BUILTIN_GATHER3ALTDIV16SF,
30134 IX86_BUILTIN_GATHER3ALTDIV16SI,
30135 IX86_BUILTIN_GATHER3ALTSIV4DF,
30136 IX86_BUILTIN_GATHER3ALTDIV8SF,
30137 IX86_BUILTIN_GATHER3ALTSIV4DI,
30138 IX86_BUILTIN_GATHER3ALTDIV8SI,
30139 IX86_BUILTIN_GATHER3ALTSIV8DF,
30140 IX86_BUILTIN_GATHER3ALTSIV8DI,
30141 IX86_BUILTIN_GATHER3DIV16SF,
30142 IX86_BUILTIN_GATHER3DIV16SI,
30143 IX86_BUILTIN_GATHER3DIV8DF,
30144 IX86_BUILTIN_GATHER3DIV8DI,
30145 IX86_BUILTIN_GATHER3SIV16SF,
30146 IX86_BUILTIN_GATHER3SIV16SI,
30147 IX86_BUILTIN_GATHER3SIV8DF,
30148 IX86_BUILTIN_GATHER3SIV8DI,
30149 IX86_BUILTIN_SCATTERDIV16SF,
30150 IX86_BUILTIN_SCATTERDIV16SI,
30151 IX86_BUILTIN_SCATTERDIV8DF,
30152 IX86_BUILTIN_SCATTERDIV8DI,
30153 IX86_BUILTIN_SCATTERSIV16SF,
30154 IX86_BUILTIN_SCATTERSIV16SI,
30155 IX86_BUILTIN_SCATTERSIV8DF,
30156 IX86_BUILTIN_SCATTERSIV8DI,
30157
30158 /* AVX512PF */
30159 IX86_BUILTIN_GATHERPFQPD,
30160 IX86_BUILTIN_GATHERPFDPS,
30161 IX86_BUILTIN_GATHERPFDPD,
30162 IX86_BUILTIN_GATHERPFQPS,
30163 IX86_BUILTIN_SCATTERPFDPD,
30164 IX86_BUILTIN_SCATTERPFDPS,
30165 IX86_BUILTIN_SCATTERPFQPD,
30166 IX86_BUILTIN_SCATTERPFQPS,
30167
30168 /* AVX-512ER */
30169 IX86_BUILTIN_EXP2PD_MASK,
30170 IX86_BUILTIN_EXP2PS_MASK,
30171 IX86_BUILTIN_EXP2PS,
30172 IX86_BUILTIN_RCP28PD,
30173 IX86_BUILTIN_RCP28PS,
30174 IX86_BUILTIN_RCP28SD,
30175 IX86_BUILTIN_RCP28SS,
30176 IX86_BUILTIN_RSQRT28PD,
30177 IX86_BUILTIN_RSQRT28PS,
30178 IX86_BUILTIN_RSQRT28SD,
30179 IX86_BUILTIN_RSQRT28SS,
30180
30181 /* AVX-512IFMA */
30182 IX86_BUILTIN_VPMADD52LUQ512,
30183 IX86_BUILTIN_VPMADD52HUQ512,
30184 IX86_BUILTIN_VPMADD52LUQ256,
30185 IX86_BUILTIN_VPMADD52HUQ256,
30186 IX86_BUILTIN_VPMADD52LUQ128,
30187 IX86_BUILTIN_VPMADD52HUQ128,
30188 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30189 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30190 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30191 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30192 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30193 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30194
30195 /* AVX-512VBMI */
30196 IX86_BUILTIN_VPMULTISHIFTQB512,
30197 IX86_BUILTIN_VPMULTISHIFTQB256,
30198 IX86_BUILTIN_VPMULTISHIFTQB128,
30199 IX86_BUILTIN_VPERMVARQI512_MASK,
30200 IX86_BUILTIN_VPERMT2VARQI512,
30201 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30202 IX86_BUILTIN_VPERMI2VARQI512,
30203 IX86_BUILTIN_VPERMVARQI256_MASK,
30204 IX86_BUILTIN_VPERMVARQI128_MASK,
30205 IX86_BUILTIN_VPERMT2VARQI256,
30206 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30207 IX86_BUILTIN_VPERMT2VARQI128,
30208 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30209 IX86_BUILTIN_VPERMI2VARQI256,
30210 IX86_BUILTIN_VPERMI2VARQI128,
30211
30212 /* SHA builtins. */
30213 IX86_BUILTIN_SHA1MSG1,
30214 IX86_BUILTIN_SHA1MSG2,
30215 IX86_BUILTIN_SHA1NEXTE,
30216 IX86_BUILTIN_SHA1RNDS4,
30217 IX86_BUILTIN_SHA256MSG1,
30218 IX86_BUILTIN_SHA256MSG2,
30219 IX86_BUILTIN_SHA256RNDS2,
30220
30221 /* CLWB instructions. */
30222 IX86_BUILTIN_CLWB,
30223
30224 /* PCOMMIT instructions. */
30225 IX86_BUILTIN_PCOMMIT,
30226
30227 /* CLFLUSHOPT instructions. */
30228 IX86_BUILTIN_CLFLUSHOPT,
30229
30230 /* TFmode support builtins. */
30231 IX86_BUILTIN_INFQ,
30232 IX86_BUILTIN_HUGE_VALQ,
30233 IX86_BUILTIN_FABSQ,
30234 IX86_BUILTIN_COPYSIGNQ,
30235
30236 /* Vectorizer support builtins. */
30237 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30238 IX86_BUILTIN_CPYSGNPS,
30239 IX86_BUILTIN_CPYSGNPD,
30240 IX86_BUILTIN_CPYSGNPS256,
30241 IX86_BUILTIN_CPYSGNPS512,
30242 IX86_BUILTIN_CPYSGNPD256,
30243 IX86_BUILTIN_CPYSGNPD512,
30244 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30245 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30246
30247
30248 /* FMA4 instructions. */
30249 IX86_BUILTIN_VFMADDSS,
30250 IX86_BUILTIN_VFMADDSD,
30251 IX86_BUILTIN_VFMADDPS,
30252 IX86_BUILTIN_VFMADDPD,
30253 IX86_BUILTIN_VFMADDPS256,
30254 IX86_BUILTIN_VFMADDPD256,
30255 IX86_BUILTIN_VFMADDSUBPS,
30256 IX86_BUILTIN_VFMADDSUBPD,
30257 IX86_BUILTIN_VFMADDSUBPS256,
30258 IX86_BUILTIN_VFMADDSUBPD256,
30259
30260 /* FMA3 instructions. */
30261 IX86_BUILTIN_VFMADDSS3,
30262 IX86_BUILTIN_VFMADDSD3,
30263
30264 /* XOP instructions. */
30265 IX86_BUILTIN_VPCMOV,
30266 IX86_BUILTIN_VPCMOV_V2DI,
30267 IX86_BUILTIN_VPCMOV_V4SI,
30268 IX86_BUILTIN_VPCMOV_V8HI,
30269 IX86_BUILTIN_VPCMOV_V16QI,
30270 IX86_BUILTIN_VPCMOV_V4SF,
30271 IX86_BUILTIN_VPCMOV_V2DF,
30272 IX86_BUILTIN_VPCMOV256,
30273 IX86_BUILTIN_VPCMOV_V4DI256,
30274 IX86_BUILTIN_VPCMOV_V8SI256,
30275 IX86_BUILTIN_VPCMOV_V16HI256,
30276 IX86_BUILTIN_VPCMOV_V32QI256,
30277 IX86_BUILTIN_VPCMOV_V8SF256,
30278 IX86_BUILTIN_VPCMOV_V4DF256,
30279
30280 IX86_BUILTIN_VPPERM,
30281
30282 IX86_BUILTIN_VPMACSSWW,
30283 IX86_BUILTIN_VPMACSWW,
30284 IX86_BUILTIN_VPMACSSWD,
30285 IX86_BUILTIN_VPMACSWD,
30286 IX86_BUILTIN_VPMACSSDD,
30287 IX86_BUILTIN_VPMACSDD,
30288 IX86_BUILTIN_VPMACSSDQL,
30289 IX86_BUILTIN_VPMACSSDQH,
30290 IX86_BUILTIN_VPMACSDQL,
30291 IX86_BUILTIN_VPMACSDQH,
30292 IX86_BUILTIN_VPMADCSSWD,
30293 IX86_BUILTIN_VPMADCSWD,
30294
30295 IX86_BUILTIN_VPHADDBW,
30296 IX86_BUILTIN_VPHADDBD,
30297 IX86_BUILTIN_VPHADDBQ,
30298 IX86_BUILTIN_VPHADDWD,
30299 IX86_BUILTIN_VPHADDWQ,
30300 IX86_BUILTIN_VPHADDDQ,
30301 IX86_BUILTIN_VPHADDUBW,
30302 IX86_BUILTIN_VPHADDUBD,
30303 IX86_BUILTIN_VPHADDUBQ,
30304 IX86_BUILTIN_VPHADDUWD,
30305 IX86_BUILTIN_VPHADDUWQ,
30306 IX86_BUILTIN_VPHADDUDQ,
30307 IX86_BUILTIN_VPHSUBBW,
30308 IX86_BUILTIN_VPHSUBWD,
30309 IX86_BUILTIN_VPHSUBDQ,
30310
30311 IX86_BUILTIN_VPROTB,
30312 IX86_BUILTIN_VPROTW,
30313 IX86_BUILTIN_VPROTD,
30314 IX86_BUILTIN_VPROTQ,
30315 IX86_BUILTIN_VPROTB_IMM,
30316 IX86_BUILTIN_VPROTW_IMM,
30317 IX86_BUILTIN_VPROTD_IMM,
30318 IX86_BUILTIN_VPROTQ_IMM,
30319
30320 IX86_BUILTIN_VPSHLB,
30321 IX86_BUILTIN_VPSHLW,
30322 IX86_BUILTIN_VPSHLD,
30323 IX86_BUILTIN_VPSHLQ,
30324 IX86_BUILTIN_VPSHAB,
30325 IX86_BUILTIN_VPSHAW,
30326 IX86_BUILTIN_VPSHAD,
30327 IX86_BUILTIN_VPSHAQ,
30328
30329 IX86_BUILTIN_VFRCZSS,
30330 IX86_BUILTIN_VFRCZSD,
30331 IX86_BUILTIN_VFRCZPS,
30332 IX86_BUILTIN_VFRCZPD,
30333 IX86_BUILTIN_VFRCZPS256,
30334 IX86_BUILTIN_VFRCZPD256,
30335
30336 IX86_BUILTIN_VPCOMEQUB,
30337 IX86_BUILTIN_VPCOMNEUB,
30338 IX86_BUILTIN_VPCOMLTUB,
30339 IX86_BUILTIN_VPCOMLEUB,
30340 IX86_BUILTIN_VPCOMGTUB,
30341 IX86_BUILTIN_VPCOMGEUB,
30342 IX86_BUILTIN_VPCOMFALSEUB,
30343 IX86_BUILTIN_VPCOMTRUEUB,
30344
30345 IX86_BUILTIN_VPCOMEQUW,
30346 IX86_BUILTIN_VPCOMNEUW,
30347 IX86_BUILTIN_VPCOMLTUW,
30348 IX86_BUILTIN_VPCOMLEUW,
30349 IX86_BUILTIN_VPCOMGTUW,
30350 IX86_BUILTIN_VPCOMGEUW,
30351 IX86_BUILTIN_VPCOMFALSEUW,
30352 IX86_BUILTIN_VPCOMTRUEUW,
30353
30354 IX86_BUILTIN_VPCOMEQUD,
30355 IX86_BUILTIN_VPCOMNEUD,
30356 IX86_BUILTIN_VPCOMLTUD,
30357 IX86_BUILTIN_VPCOMLEUD,
30358 IX86_BUILTIN_VPCOMGTUD,
30359 IX86_BUILTIN_VPCOMGEUD,
30360 IX86_BUILTIN_VPCOMFALSEUD,
30361 IX86_BUILTIN_VPCOMTRUEUD,
30362
30363 IX86_BUILTIN_VPCOMEQUQ,
30364 IX86_BUILTIN_VPCOMNEUQ,
30365 IX86_BUILTIN_VPCOMLTUQ,
30366 IX86_BUILTIN_VPCOMLEUQ,
30367 IX86_BUILTIN_VPCOMGTUQ,
30368 IX86_BUILTIN_VPCOMGEUQ,
30369 IX86_BUILTIN_VPCOMFALSEUQ,
30370 IX86_BUILTIN_VPCOMTRUEUQ,
30371
30372 IX86_BUILTIN_VPCOMEQB,
30373 IX86_BUILTIN_VPCOMNEB,
30374 IX86_BUILTIN_VPCOMLTB,
30375 IX86_BUILTIN_VPCOMLEB,
30376 IX86_BUILTIN_VPCOMGTB,
30377 IX86_BUILTIN_VPCOMGEB,
30378 IX86_BUILTIN_VPCOMFALSEB,
30379 IX86_BUILTIN_VPCOMTRUEB,
30380
30381 IX86_BUILTIN_VPCOMEQW,
30382 IX86_BUILTIN_VPCOMNEW,
30383 IX86_BUILTIN_VPCOMLTW,
30384 IX86_BUILTIN_VPCOMLEW,
30385 IX86_BUILTIN_VPCOMGTW,
30386 IX86_BUILTIN_VPCOMGEW,
30387 IX86_BUILTIN_VPCOMFALSEW,
30388 IX86_BUILTIN_VPCOMTRUEW,
30389
30390 IX86_BUILTIN_VPCOMEQD,
30391 IX86_BUILTIN_VPCOMNED,
30392 IX86_BUILTIN_VPCOMLTD,
30393 IX86_BUILTIN_VPCOMLED,
30394 IX86_BUILTIN_VPCOMGTD,
30395 IX86_BUILTIN_VPCOMGED,
30396 IX86_BUILTIN_VPCOMFALSED,
30397 IX86_BUILTIN_VPCOMTRUED,
30398
30399 IX86_BUILTIN_VPCOMEQQ,
30400 IX86_BUILTIN_VPCOMNEQ,
30401 IX86_BUILTIN_VPCOMLTQ,
30402 IX86_BUILTIN_VPCOMLEQ,
30403 IX86_BUILTIN_VPCOMGTQ,
30404 IX86_BUILTIN_VPCOMGEQ,
30405 IX86_BUILTIN_VPCOMFALSEQ,
30406 IX86_BUILTIN_VPCOMTRUEQ,
30407
30408 /* LWP instructions. */
30409 IX86_BUILTIN_LLWPCB,
30410 IX86_BUILTIN_SLWPCB,
30411 IX86_BUILTIN_LWPVAL32,
30412 IX86_BUILTIN_LWPVAL64,
30413 IX86_BUILTIN_LWPINS32,
30414 IX86_BUILTIN_LWPINS64,
30415
30416 IX86_BUILTIN_CLZS,
30417
30418 /* RTM */
30419 IX86_BUILTIN_XBEGIN,
30420 IX86_BUILTIN_XEND,
30421 IX86_BUILTIN_XABORT,
30422 IX86_BUILTIN_XTEST,
30423
30424 /* MPX */
30425 IX86_BUILTIN_BNDMK,
30426 IX86_BUILTIN_BNDSTX,
30427 IX86_BUILTIN_BNDLDX,
30428 IX86_BUILTIN_BNDCL,
30429 IX86_BUILTIN_BNDCU,
30430 IX86_BUILTIN_BNDRET,
30431 IX86_BUILTIN_BNDNARROW,
30432 IX86_BUILTIN_BNDINT,
30433 IX86_BUILTIN_SIZEOF,
30434 IX86_BUILTIN_BNDLOWER,
30435 IX86_BUILTIN_BNDUPPER,
30436
30437 /* BMI instructions. */
30438 IX86_BUILTIN_BEXTR32,
30439 IX86_BUILTIN_BEXTR64,
30440 IX86_BUILTIN_CTZS,
30441
30442 /* TBM instructions. */
30443 IX86_BUILTIN_BEXTRI32,
30444 IX86_BUILTIN_BEXTRI64,
30445
30446 /* BMI2 instructions. */
30447 IX86_BUILTIN_BZHI32,
30448 IX86_BUILTIN_BZHI64,
30449 IX86_BUILTIN_PDEP32,
30450 IX86_BUILTIN_PDEP64,
30451 IX86_BUILTIN_PEXT32,
30452 IX86_BUILTIN_PEXT64,
30453
30454 /* ADX instructions. */
30455 IX86_BUILTIN_ADDCARRYX32,
30456 IX86_BUILTIN_ADDCARRYX64,
30457
30458 /* SBB instructions. */
30459 IX86_BUILTIN_SBB32,
30460 IX86_BUILTIN_SBB64,
30461
30462 /* FSGSBASE instructions. */
30463 IX86_BUILTIN_RDFSBASE32,
30464 IX86_BUILTIN_RDFSBASE64,
30465 IX86_BUILTIN_RDGSBASE32,
30466 IX86_BUILTIN_RDGSBASE64,
30467 IX86_BUILTIN_WRFSBASE32,
30468 IX86_BUILTIN_WRFSBASE64,
30469 IX86_BUILTIN_WRGSBASE32,
30470 IX86_BUILTIN_WRGSBASE64,
30471
30472 /* RDRND instructions. */
30473 IX86_BUILTIN_RDRAND16_STEP,
30474 IX86_BUILTIN_RDRAND32_STEP,
30475 IX86_BUILTIN_RDRAND64_STEP,
30476
30477 /* RDSEED instructions. */
30478 IX86_BUILTIN_RDSEED16_STEP,
30479 IX86_BUILTIN_RDSEED32_STEP,
30480 IX86_BUILTIN_RDSEED64_STEP,
30481
30482 /* F16C instructions. */
30483 IX86_BUILTIN_CVTPH2PS,
30484 IX86_BUILTIN_CVTPH2PS256,
30485 IX86_BUILTIN_CVTPS2PH,
30486 IX86_BUILTIN_CVTPS2PH256,
30487
30488 /* CFString built-in for darwin */
30489 IX86_BUILTIN_CFSTRING,
30490
30491 /* Builtins to get CPU type and supported features. */
30492 IX86_BUILTIN_CPU_INIT,
30493 IX86_BUILTIN_CPU_IS,
30494 IX86_BUILTIN_CPU_SUPPORTS,
30495
30496 /* Read/write FLAGS register built-ins. */
30497 IX86_BUILTIN_READ_FLAGS,
30498 IX86_BUILTIN_WRITE_FLAGS,
30499
30500 IX86_BUILTIN_MAX
30501 };
30502
30503 /* Table for the ix86 builtin decls. */
30504 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30505
30506 /* Table of all of the builtin functions that are possible with different ISA's
30507 but are waiting to be built until a function is declared to use that
30508 ISA. */
30509 struct builtin_isa {
30510 const char *name; /* function name */
30511 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30512 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30513 bool const_p; /* true if the declaration is constant */
30514 bool leaf_p; /* true if the declaration has leaf attribute */
30515 bool nothrow_p; /* true if the declaration has nothrow attribute */
30516 bool set_and_not_built_p;
30517 };
30518
30519 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30520
30521
30522 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30523 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30524 function decl in the ix86_builtins array. Returns the function decl or
30525 NULL_TREE, if the builtin was not added.
30526
30527 If the front end has a special hook for builtin functions, delay adding
30528 builtin functions that aren't in the current ISA until the ISA is changed
30529 with function specific optimization. Doing so, can save about 300K for the
30530 default compiler. When the builtin is expanded, check at that time whether
30531 it is valid.
30532
30533 If the front end doesn't have a special hook, record all builtins, even if
30534 it isn't an instruction set in the current ISA in case the user uses
30535 function specific options for a different ISA, so that we don't get scope
30536 errors if a builtin is added in the middle of a function scope. */
30537
30538 static inline tree
30539 def_builtin (HOST_WIDE_INT mask, const char *name,
30540 enum ix86_builtin_func_type tcode,
30541 enum ix86_builtins code)
30542 {
30543 tree decl = NULL_TREE;
30544
30545 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30546 {
30547 ix86_builtins_isa[(int) code].isa = mask;
30548
30549 mask &= ~OPTION_MASK_ISA_64BIT;
30550 if (mask == 0
30551 || (mask & ix86_isa_flags) != 0
30552 || (lang_hooks.builtin_function
30553 == lang_hooks.builtin_function_ext_scope))
30554
30555 {
30556 tree type = ix86_get_builtin_func_type (tcode);
30557 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30558 NULL, NULL_TREE);
30559 ix86_builtins[(int) code] = decl;
30560 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30561 }
30562 else
30563 {
30564 ix86_builtins[(int) code] = NULL_TREE;
30565 ix86_builtins_isa[(int) code].tcode = tcode;
30566 ix86_builtins_isa[(int) code].name = name;
30567 ix86_builtins_isa[(int) code].leaf_p = false;
30568 ix86_builtins_isa[(int) code].nothrow_p = false;
30569 ix86_builtins_isa[(int) code].const_p = false;
30570 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30571 }
30572 }
30573
30574 return decl;
30575 }
30576
30577 /* Like def_builtin, but also marks the function decl "const". */
30578
30579 static inline tree
30580 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30581 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30582 {
30583 tree decl = def_builtin (mask, name, tcode, code);
30584 if (decl)
30585 TREE_READONLY (decl) = 1;
30586 else
30587 ix86_builtins_isa[(int) code].const_p = true;
30588
30589 return decl;
30590 }
30591
30592 /* Add any new builtin functions for a given ISA that may not have been
30593 declared. This saves a bit of space compared to adding all of the
30594 declarations to the tree, even if we didn't use them. */
30595
30596 static void
30597 ix86_add_new_builtins (HOST_WIDE_INT isa)
30598 {
30599 int i;
30600 tree saved_current_target_pragma = current_target_pragma;
30601 current_target_pragma = NULL_TREE;
30602
30603 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30604 {
30605 if ((ix86_builtins_isa[i].isa & isa) != 0
30606 && ix86_builtins_isa[i].set_and_not_built_p)
30607 {
30608 tree decl, type;
30609
30610 /* Don't define the builtin again. */
30611 ix86_builtins_isa[i].set_and_not_built_p = false;
30612
30613 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30614 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30615 type, i, BUILT_IN_MD, NULL,
30616 NULL_TREE);
30617
30618 ix86_builtins[i] = decl;
30619 if (ix86_builtins_isa[i].const_p)
30620 TREE_READONLY (decl) = 1;
30621 if (ix86_builtins_isa[i].leaf_p)
30622 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30623 NULL_TREE);
30624 if (ix86_builtins_isa[i].nothrow_p)
30625 TREE_NOTHROW (decl) = 1;
30626 }
30627 }
30628
30629 current_target_pragma = saved_current_target_pragma;
30630 }
30631
30632 /* Bits for builtin_description.flag. */
30633
30634 /* Set when we don't support the comparison natively, and should
30635 swap_comparison in order to support it. */
30636 #define BUILTIN_DESC_SWAP_OPERANDS 1
30637
30638 struct builtin_description
30639 {
30640 const HOST_WIDE_INT mask;
30641 const enum insn_code icode;
30642 const char *const name;
30643 const enum ix86_builtins code;
30644 const enum rtx_code comparison;
30645 const int flag;
30646 };
30647
30648 static const struct builtin_description bdesc_comi[] =
30649 {
30650 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30651 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30652 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30653 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30654 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30655 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30656 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30657 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30658 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30659 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30660 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30661 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30664 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30666 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30668 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30669 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30674 };
30675
30676 static const struct builtin_description bdesc_pcmpestr[] =
30677 {
30678 /* SSE4.2 */
30679 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30680 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30681 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30682 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30683 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30684 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30685 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30686 };
30687
30688 static const struct builtin_description bdesc_pcmpistr[] =
30689 {
30690 /* SSE4.2 */
30691 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30692 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30693 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30694 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30695 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30696 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30697 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30698 };
30699
30700 /* Special builtins with variable number of arguments. */
30701 static const struct builtin_description bdesc_special_args[] =
30702 {
30703 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30704 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30705 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30706
30707 /* 80387 (for use internally for atomic compound assignment). */
30708 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30709 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30710 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30711 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30712
30713 /* MMX */
30714 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30715
30716 /* 3DNow! */
30717 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30718
30719 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30720 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30721 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30722 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30723 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30724 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30725 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30726 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30727 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30728
30729 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30730 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30731 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30732 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30733 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30734 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30735 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30736 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30737
30738 /* SSE */
30739 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30740 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30741 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30742
30743 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30744 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30745 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30746 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30747
30748 /* SSE or 3DNow!A */
30749 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30750 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30751
30752 /* SSE2 */
30753 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30759 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30760 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30762 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30763
30764 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30765 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30766
30767 /* SSE3 */
30768 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30769
30770 /* SSE4.1 */
30771 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30772
30773 /* SSE4A */
30774 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30775 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30776
30777 /* AVX */
30778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30780
30781 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30782 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30783 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30786
30787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30794
30795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30798
30799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30807
30808 /* AVX2 */
30809 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30810 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30811 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30812 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30813 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30818
30819 /* AVX512F */
30820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30867
30868 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30869 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30870 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30871 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30872 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30873 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30874
30875 /* FSGSBASE */
30876 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30877 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30878 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30879 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30880 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30881 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30882 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30883 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30884
30885 /* RTM */
30886 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30887 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30888 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30889
30890 /* AVX512BW */
30891 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30892 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30893 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30894 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30895
30896 /* AVX512VL */
30897 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30898 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30899 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30900 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30913 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30914 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30915 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30916 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30923 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30924 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30925 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30926 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30927 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30928 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30929 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30932 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30933 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30934 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30935 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30936 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30946 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30948 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30949 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30958 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30959 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30960 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30961 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30962 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30963 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30964 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30965 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30966 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30970 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30972 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30973 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30983 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30991
30992 /* PCOMMIT. */
30993 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
30994 };
30995
30996 /* Builtins with variable number of arguments. */
30997 static const struct builtin_description bdesc_args[] =
30998 {
30999 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31000 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31001 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31002 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31003 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31004 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31005 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31006
31007 /* MMX */
31008 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31009 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31010 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31011 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31012 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31013 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31014
31015 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31016 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31017 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31018 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31019 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31020 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31021 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31022 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31023
31024 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31025 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31026
31027 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31028 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31029 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31030 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31031
31032 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31033 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31034 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31035 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31036 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31037 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31038
31039 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31040 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31041 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31042 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31043 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31044 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31045
31046 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31047 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31048 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31049
31050 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31051
31052 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31053 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31054 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31055 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31056 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31057 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31058
31059 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31060 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31061 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31062 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31063 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31064 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31065
31066 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31067 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31068 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31069 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31070
31071 /* 3DNow! */
31072 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31073 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31074 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31075 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31076
31077 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31078 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31079 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31080 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31081 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31082 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31083 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31084 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31085 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31086 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31087 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31088 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31089 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31090 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31091 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31092
31093 /* 3DNow!A */
31094 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31095 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31096 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31097 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31098 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31099 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31100
31101 /* SSE */
31102 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31103 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31104 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31105 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31106 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31107 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31108 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31109 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31110 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31111 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31112 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31113 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31114
31115 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31116
31117 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31118 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31119 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31122 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31123 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31124 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31125
31126 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31127 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31128 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31129 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31130 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31131 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31132 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31133 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31134 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31135 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31136 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31137 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31138 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31139 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31140 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31141 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31142 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31143 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31144 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31145 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31146
31147 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31148 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31149 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31150 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31151
31152 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31153 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31154 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31155 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31156
31157 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31158
31159 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31160 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31161 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31162 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31163 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31164
31165 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31166 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31167 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31168
31169 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31170
31171 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31172 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31173 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31174
31175 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31176 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31177
31178 /* SSE MMX or 3Dnow!A */
31179 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31180 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31181 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31182
31183 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31184 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31185 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31186 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31187
31188 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31189 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31190
31191 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31192
31193 /* SSE2 */
31194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31195
31196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31200 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31201
31202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31207
31208 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31209
31210 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31212 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31213 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31214
31215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31217 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31218
31219 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31220 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31221 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31222 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31227
31228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31241 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31243 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31244 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31245 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31246 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31247 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31248
31249 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31250 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31253
31254 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31255 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31256 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31257 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31258
31259 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31260
31261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31262 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31263 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31264
31265 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31266
31267 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31268 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31269 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31270 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31271 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31272 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31273 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31274 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31275
31276 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31279 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31280 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31281 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31283 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31284
31285 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31286 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31287
31288 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31290 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31291 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31292
31293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31295
31296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31302
31303 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31304 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31305 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31307
31308 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31309 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31310 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31311 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31312 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31313 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31314 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31315 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31316
31317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31320
31321 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31323
31324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31325 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31326
31327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31328
31329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31330 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31333
31334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31335 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31336 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31337 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31338 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31339 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31340 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31341
31342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31343 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31344 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31345 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31346 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31347 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31348 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31349
31350 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31351 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31352 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31353 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31354
31355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31358
31359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31360
31361 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31362
31363 /* SSE2 MMX */
31364 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31365 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31366
31367 /* SSE3 */
31368 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31369 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31370
31371 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31372 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31373 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31374 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31375 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31376 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31377
31378 /* SSSE3 */
31379 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31380 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31381 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31382 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31383 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31384 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31385
31386 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31387 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31388 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31389 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31390 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31391 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31392 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31393 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31394 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31395 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31396 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31397 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31398 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31399 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31400 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31401 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31402 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31403 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31404 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31405 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31406 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31407 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31408 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31409 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31410
31411 /* SSSE3. */
31412 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31413 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31414
31415 /* SSE4.1 */
31416 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31417 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31418 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31419 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31420 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31421 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31422 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31423 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31424 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31425 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31426
31427 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31428 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31429 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31430 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31431 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31432 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31433 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31434 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31435 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31436 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31437 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31438 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31439 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31440
31441 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31442 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31443 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31444 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31445 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31446 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31447 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31448 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31449 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31450 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31451 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31452 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31453
31454 /* SSE4.1 */
31455 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31456 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31457 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31458 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31459
31460 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31461 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31462 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31463 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31464
31465 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31466 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31467
31468 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31469 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31470
31471 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31472 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31473 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31474 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31475
31476 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31477 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31478
31479 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31480 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31481
31482 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31483 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31484 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31485
31486 /* SSE4.2 */
31487 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31488 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31489 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31490 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31491 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31492
31493 /* SSE4A */
31494 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31495 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31496 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31497 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31498
31499 /* AES */
31500 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31501 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31502
31503 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31504 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31505 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31506 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31507
31508 /* PCLMUL */
31509 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31510
31511 /* AVX */
31512 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31513 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31514 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31515 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31516 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31517 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31518 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31519 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31520 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31521 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31522 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31523 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31524 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31525 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31526 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31527 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31528 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31529 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31530 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31531 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31532 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31533 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31534 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31535 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31536 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31537 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31538
31539 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31540 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31542 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31543
31544 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31545 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31549 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31550 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31553 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31555 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31556 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31557 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31558 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31559 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31560 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31561 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31562 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31563 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31564 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31565 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31566 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31567 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31568 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31569 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31570 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31571 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31572 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31573 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31574 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31575 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31576 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31577 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31578
31579 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31580 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31581 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31582
31583 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31584 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31585 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31586 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31587 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31588
31589 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31590
31591 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31592 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31593
31594 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31596 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31597 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31598
31599 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31600 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31601
31602 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31603 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31604
31605 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31606 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31608 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31609
31610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31612
31613 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31614 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31615
31616 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31617 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31619 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31620
31621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31624 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31625 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31626 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31627
31628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31639 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31641 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31643
31644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31646
31647 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31648 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31649
31650 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31651
31652 /* AVX2 */
31653 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31654 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31655 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31656 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31657 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31658 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31659 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31660 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31661 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31662 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31663 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31664 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31665 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31666 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31667 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31668 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31669 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31670 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31671 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31672 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31673 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31674 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31675 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31676 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31677 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31678 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31679 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31680 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31681 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31682 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31683 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31684 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31685 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31686 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31687 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31688 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31689 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31690 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31691 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31692 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31693 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31694 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31695 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31696 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31697 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31698 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31699 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31700 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31701 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31702 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31703 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31704 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31705 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31706 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31707 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31708 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31709 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31710 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31711 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31712 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31713 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31714 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31715 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31716 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31717 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31718 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31719 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31720 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31721 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31722 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31723 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31724 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31725 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31726 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31727 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31728 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31729 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31730 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31731 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31732 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31733 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31734 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31735 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31736 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31737 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31738 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31739 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31740 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31741 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31742 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31743 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31744 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31745 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31746 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31747 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31748 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31749 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31750 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31751 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31752 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31753 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31754 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31755 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31756 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31757 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31758 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31759 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31760 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31764 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31767 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31769 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31770 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31796 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31798 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31799
31800 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31801
31802 /* BMI */
31803 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31804 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31805 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31806
31807 /* TBM */
31808 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31809 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31810
31811 /* F16C */
31812 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31813 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31814 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31815 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31816
31817 /* BMI2 */
31818 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31819 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31820 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31821 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31822 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31823 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31824
31825 /* AVX512F */
31826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31881 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31882 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31992 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31993 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31994 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31995 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32022
32023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32027 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32031
32032 /* Mask arithmetic operations */
32033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32043
32044 /* SHA */
32045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32052
32053 /* AVX512VL. */
32054 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32055 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32064 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32065 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32066 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32067 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32092 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32093 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32094 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32095 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32096 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32097 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32098 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32099 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32100 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32101 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32102 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32103 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32104 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32105 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32106 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32109 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32110 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32111 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32112 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32113 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32114 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32115 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32116 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32117 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32118 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32121 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32122 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32123 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32124 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32125 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32135 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32138 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32139 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32140 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32142 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32143 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32144 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32145 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32146 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32147 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32148 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32149 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32150 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32151 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32152 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32164 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32165 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32168 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32169 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32180 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32181 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32192 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32193 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32194 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32195 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32196 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32197 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32198 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32199 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32200 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32201 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32202 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32203 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32204 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32205 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32218 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32219 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32222 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32223 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32226 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32227 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32228 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32229 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32230 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32231 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32232 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32233 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32234 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32235 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32238 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32239 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32240 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32241 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32242 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32243 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32246 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32247 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32248 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32249 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32254 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32255 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32256 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32257 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32258 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32259 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32290 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32291 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32292 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32293 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32310 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32311 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32312 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32313 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32314 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32315 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32316 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32317 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32318 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32319 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32320 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32321 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32322 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32323 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32324 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32325 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32326 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32327 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32328 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32331 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32334 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32335 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32372 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32373 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32374 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32375 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32436 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32437 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32438 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32439 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32440 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32441 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32442 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32443 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32444 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32445 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32450 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32451 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32452 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32453 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32464 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32465 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32466 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32467 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32468 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32469 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32470 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32471 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32496 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32497 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32498 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32499 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32500 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32501 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32528 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32529 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32530 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32531 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32532 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32533 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32534 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32535 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32544 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32545 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32546 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32547 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32548 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32549 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32550 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32551 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32552 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32553 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32554 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32555 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32556 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32557 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32558 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32559 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32560 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32561 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32562 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32563 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32564 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32565 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32566 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32567 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32568 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32569 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32574 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32575 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32576 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32577 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32582 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32583 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32584 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32585 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32590 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32591 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32592 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32593 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32598 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32599 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32600 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32601 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32641 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32642 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32643 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32644 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32645 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32646 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32647 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32648 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32649 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32650 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32651 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32652 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32653 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32654 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32655 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32656 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32657 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32658 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32659 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32660 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32661 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32669 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32670 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32671 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32672 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32682 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32683 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32690 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32691 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32692 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32693 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32694 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32695 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32696 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32697 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32698 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32699 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32700 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32701 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32702 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32703 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32704 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32705 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32706 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32707 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32711 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32712 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32713 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32714 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32731 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32732 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32733 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32734 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32750 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32751 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32752 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32753 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32756 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32757 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32760 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32761 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32762 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32763 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32766
32767 /* AVX512DQ. */
32768 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32769 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32770 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32771 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32772 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32773 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32774 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32775 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32776 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32777 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32778 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32779 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32780 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32781 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32782 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32783 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32784 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32785 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32786 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32787 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32788 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32789 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32790 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32791 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32792 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32793 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32794 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32795 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32796 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32797 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32798 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32799
32800 /* AVX512BW. */
32801 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32802 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32803 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32804 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32805 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32806 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32807 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32808 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32809 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32810 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32811 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32812 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32813 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32814 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32815 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32816 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32817 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32818 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32819 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32820 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32821 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32822 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32823 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32824 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32825 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32826 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32827 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32828 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32829 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32830 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32831 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32832 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32833 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32834 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32835 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32836 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32837 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32838 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32839 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32840 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32841 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32842 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32843 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32844 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32845 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32846 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32847 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32848 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32849 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32850 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32851 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32852 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32853 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32854 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32855 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32856 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32857 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32858 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32859 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32860 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32861 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32862 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32863 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32864 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32865 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32866 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32867 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32868 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32869 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32870 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32871 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32872 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32873 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32874 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32875 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32876 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32877 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32878 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32879 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32880 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32881 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32882 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32883 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32884 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32885 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32886 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32887 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32888 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32889 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32890 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32891 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32892
32893 /* AVX512IFMA */
32894 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32895 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32896 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32897 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32898 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32899 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32900 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32901 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32902 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32903 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32904 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32905 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32906
32907 /* AVX512VBMI */
32908 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32909 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32910 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32911 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32912 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32913 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32914 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32915 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32916 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32917 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32918 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32919 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32920 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32921 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32922 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32923 };
32924
32925 /* Builtins with rounding support. */
32926 static const struct builtin_description bdesc_round_args[] =
32927 {
32928 /* AVX512F */
32929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32948 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32950 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32957 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32959 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33009 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33011 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33013 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33015 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33017 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33019 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33021 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33023 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33048
33049 /* AVX512ER */
33050 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33051 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33052 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33053 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33054 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33055 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33056 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33057 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33058 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33059 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33060
33061 /* AVX512DQ. */
33062 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33063 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33064 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33065 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33066 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33067 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33068 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33069 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33070 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33071 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33072 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33073 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33074 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33075 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33076 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33077 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33078 };
33079
33080 /* Bultins for MPX. */
33081 static const struct builtin_description bdesc_mpx[] =
33082 {
33083 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33084 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33085 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33086 };
33087
33088 /* Const builtins for MPX. */
33089 static const struct builtin_description bdesc_mpx_const[] =
33090 {
33091 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33092 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33093 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33094 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33095 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33096 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33097 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33098 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33099 };
33100
33101 /* FMA4 and XOP. */
33102 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33103 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33104 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33105 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33106 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33107 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33108 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33109 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33110 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33111 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33112 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33113 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33114 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33115 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33116 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33117 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33118 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33119 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33120 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33121 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33122 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33123 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33124 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33125 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33126 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33127 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33128 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33129 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33130 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33131 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33132 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33133 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33134 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33135 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33136 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33137 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33138 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33139 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33140 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33141 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33142 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33143 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33144 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33145 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33146 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33147 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33148 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33149 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33150 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33151 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33152 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33153 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33154
33155 static const struct builtin_description bdesc_multi_arg[] =
33156 {
33157 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33158 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33159 UNKNOWN, (int)MULTI_ARG_3_SF },
33160 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33161 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33162 UNKNOWN, (int)MULTI_ARG_3_DF },
33163
33164 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33165 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33166 UNKNOWN, (int)MULTI_ARG_3_SF },
33167 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33168 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33169 UNKNOWN, (int)MULTI_ARG_3_DF },
33170
33171 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33172 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33173 UNKNOWN, (int)MULTI_ARG_3_SF },
33174 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33175 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33176 UNKNOWN, (int)MULTI_ARG_3_DF },
33177 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33178 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33179 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33180 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33181 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33182 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33183
33184 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33185 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33186 UNKNOWN, (int)MULTI_ARG_3_SF },
33187 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33188 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33189 UNKNOWN, (int)MULTI_ARG_3_DF },
33190 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33191 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33192 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33193 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33194 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33195 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33196
33197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33204
33205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33212
33213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33214
33215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33227
33228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33244
33245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33251
33252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33267
33268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33275
33276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33283
33284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33291
33292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33299
33300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33307
33308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33315
33316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33323
33324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33331
33332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33340
33341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33348 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33349
33350 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33352 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33354
33355 };
33356 \f
33357 /* TM vector builtins. */
33358
33359 /* Reuse the existing x86-specific `struct builtin_description' cause
33360 we're lazy. Add casts to make them fit. */
33361 static const struct builtin_description bdesc_tm[] =
33362 {
33363 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33364 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33365 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33366 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33367 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33368 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33369 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33370
33371 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33372 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33373 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33374 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33375 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33376 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33377 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33378
33379 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33380 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33381 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33382 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33383 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33384 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33385 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33386
33387 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33388 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33389 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33390 };
33391
33392 /* TM callbacks. */
33393
33394 /* Return the builtin decl needed to load a vector of TYPE. */
33395
33396 static tree
33397 ix86_builtin_tm_load (tree type)
33398 {
33399 if (TREE_CODE (type) == VECTOR_TYPE)
33400 {
33401 switch (tree_to_uhwi (TYPE_SIZE (type)))
33402 {
33403 case 64:
33404 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33405 case 128:
33406 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33407 case 256:
33408 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33409 }
33410 }
33411 return NULL_TREE;
33412 }
33413
33414 /* Return the builtin decl needed to store a vector of TYPE. */
33415
33416 static tree
33417 ix86_builtin_tm_store (tree type)
33418 {
33419 if (TREE_CODE (type) == VECTOR_TYPE)
33420 {
33421 switch (tree_to_uhwi (TYPE_SIZE (type)))
33422 {
33423 case 64:
33424 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33425 case 128:
33426 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33427 case 256:
33428 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33429 }
33430 }
33431 return NULL_TREE;
33432 }
33433 \f
33434 /* Initialize the transactional memory vector load/store builtins. */
33435
33436 static void
33437 ix86_init_tm_builtins (void)
33438 {
33439 enum ix86_builtin_func_type ftype;
33440 const struct builtin_description *d;
33441 size_t i;
33442 tree decl;
33443 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33444 tree attrs_log, attrs_type_log;
33445
33446 if (!flag_tm)
33447 return;
33448
33449 /* If there are no builtins defined, we must be compiling in a
33450 language without trans-mem support. */
33451 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33452 return;
33453
33454 /* Use whatever attributes a normal TM load has. */
33455 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33456 attrs_load = DECL_ATTRIBUTES (decl);
33457 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33458 /* Use whatever attributes a normal TM store has. */
33459 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33460 attrs_store = DECL_ATTRIBUTES (decl);
33461 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33462 /* Use whatever attributes a normal TM log has. */
33463 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33464 attrs_log = DECL_ATTRIBUTES (decl);
33465 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33466
33467 for (i = 0, d = bdesc_tm;
33468 i < ARRAY_SIZE (bdesc_tm);
33469 i++, d++)
33470 {
33471 if ((d->mask & ix86_isa_flags) != 0
33472 || (lang_hooks.builtin_function
33473 == lang_hooks.builtin_function_ext_scope))
33474 {
33475 tree type, attrs, attrs_type;
33476 enum built_in_function code = (enum built_in_function) d->code;
33477
33478 ftype = (enum ix86_builtin_func_type) d->flag;
33479 type = ix86_get_builtin_func_type (ftype);
33480
33481 if (BUILTIN_TM_LOAD_P (code))
33482 {
33483 attrs = attrs_load;
33484 attrs_type = attrs_type_load;
33485 }
33486 else if (BUILTIN_TM_STORE_P (code))
33487 {
33488 attrs = attrs_store;
33489 attrs_type = attrs_type_store;
33490 }
33491 else
33492 {
33493 attrs = attrs_log;
33494 attrs_type = attrs_type_log;
33495 }
33496 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33497 /* The builtin without the prefix for
33498 calling it directly. */
33499 d->name + strlen ("__builtin_"),
33500 attrs);
33501 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33502 set the TYPE_ATTRIBUTES. */
33503 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33504
33505 set_builtin_decl (code, decl, false);
33506 }
33507 }
33508 }
33509
33510 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33511 in the current target ISA to allow the user to compile particular modules
33512 with different target specific options that differ from the command line
33513 options. */
33514 static void
33515 ix86_init_mmx_sse_builtins (void)
33516 {
33517 const struct builtin_description * d;
33518 enum ix86_builtin_func_type ftype;
33519 size_t i;
33520
33521 /* Add all special builtins with variable number of operands. */
33522 for (i = 0, d = bdesc_special_args;
33523 i < ARRAY_SIZE (bdesc_special_args);
33524 i++, d++)
33525 {
33526 if (d->name == 0)
33527 continue;
33528
33529 ftype = (enum ix86_builtin_func_type) d->flag;
33530 def_builtin (d->mask, d->name, ftype, d->code);
33531 }
33532
33533 /* Add all builtins with variable number of operands. */
33534 for (i = 0, d = bdesc_args;
33535 i < ARRAY_SIZE (bdesc_args);
33536 i++, d++)
33537 {
33538 if (d->name == 0)
33539 continue;
33540
33541 ftype = (enum ix86_builtin_func_type) d->flag;
33542 def_builtin_const (d->mask, d->name, ftype, d->code);
33543 }
33544
33545 /* Add all builtins with rounding. */
33546 for (i = 0, d = bdesc_round_args;
33547 i < ARRAY_SIZE (bdesc_round_args);
33548 i++, d++)
33549 {
33550 if (d->name == 0)
33551 continue;
33552
33553 ftype = (enum ix86_builtin_func_type) d->flag;
33554 def_builtin_const (d->mask, d->name, ftype, d->code);
33555 }
33556
33557 /* pcmpestr[im] insns. */
33558 for (i = 0, d = bdesc_pcmpestr;
33559 i < ARRAY_SIZE (bdesc_pcmpestr);
33560 i++, d++)
33561 {
33562 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33563 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33564 else
33565 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33566 def_builtin_const (d->mask, d->name, ftype, d->code);
33567 }
33568
33569 /* pcmpistr[im] insns. */
33570 for (i = 0, d = bdesc_pcmpistr;
33571 i < ARRAY_SIZE (bdesc_pcmpistr);
33572 i++, d++)
33573 {
33574 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33575 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33576 else
33577 ftype = INT_FTYPE_V16QI_V16QI_INT;
33578 def_builtin_const (d->mask, d->name, ftype, d->code);
33579 }
33580
33581 /* comi/ucomi insns. */
33582 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33583 {
33584 if (d->mask == OPTION_MASK_ISA_SSE2)
33585 ftype = INT_FTYPE_V2DF_V2DF;
33586 else
33587 ftype = INT_FTYPE_V4SF_V4SF;
33588 def_builtin_const (d->mask, d->name, ftype, d->code);
33589 }
33590
33591 /* SSE */
33592 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33593 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33594 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33595 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33596
33597 /* SSE or 3DNow!A */
33598 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33599 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33600 IX86_BUILTIN_MASKMOVQ);
33601
33602 /* SSE2 */
33603 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33604 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33605
33606 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33607 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33608 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33609 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33610
33611 /* SSE3. */
33612 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33613 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33614 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33615 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33616
33617 /* AES */
33618 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33619 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33620 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33621 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33622 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33623 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33624 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33625 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33626 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33627 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33628 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33629 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33630
33631 /* PCLMUL */
33632 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33633 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33634
33635 /* RDRND */
33636 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33637 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33638 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33639 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33640 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33641 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33642 IX86_BUILTIN_RDRAND64_STEP);
33643
33644 /* AVX2 */
33645 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33646 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33647 IX86_BUILTIN_GATHERSIV2DF);
33648
33649 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33650 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33651 IX86_BUILTIN_GATHERSIV4DF);
33652
33653 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33654 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33655 IX86_BUILTIN_GATHERDIV2DF);
33656
33657 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33658 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33659 IX86_BUILTIN_GATHERDIV4DF);
33660
33661 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33662 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33663 IX86_BUILTIN_GATHERSIV4SF);
33664
33665 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33666 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33667 IX86_BUILTIN_GATHERSIV8SF);
33668
33669 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33670 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33671 IX86_BUILTIN_GATHERDIV4SF);
33672
33673 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33674 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33675 IX86_BUILTIN_GATHERDIV8SF);
33676
33677 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33678 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33679 IX86_BUILTIN_GATHERSIV2DI);
33680
33681 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33682 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33683 IX86_BUILTIN_GATHERSIV4DI);
33684
33685 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33686 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33687 IX86_BUILTIN_GATHERDIV2DI);
33688
33689 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33690 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33691 IX86_BUILTIN_GATHERDIV4DI);
33692
33693 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33694 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33695 IX86_BUILTIN_GATHERSIV4SI);
33696
33697 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33698 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33699 IX86_BUILTIN_GATHERSIV8SI);
33700
33701 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33702 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33703 IX86_BUILTIN_GATHERDIV4SI);
33704
33705 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33706 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33707 IX86_BUILTIN_GATHERDIV8SI);
33708
33709 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33710 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33711 IX86_BUILTIN_GATHERALTSIV4DF);
33712
33713 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33714 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33715 IX86_BUILTIN_GATHERALTDIV8SF);
33716
33717 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33718 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33719 IX86_BUILTIN_GATHERALTSIV4DI);
33720
33721 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33722 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33723 IX86_BUILTIN_GATHERALTDIV8SI);
33724
33725 /* AVX512F */
33726 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33727 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33728 IX86_BUILTIN_GATHER3SIV16SF);
33729
33730 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33731 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33732 IX86_BUILTIN_GATHER3SIV8DF);
33733
33734 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33735 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33736 IX86_BUILTIN_GATHER3DIV16SF);
33737
33738 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33739 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33740 IX86_BUILTIN_GATHER3DIV8DF);
33741
33742 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33743 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33744 IX86_BUILTIN_GATHER3SIV16SI);
33745
33746 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33747 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33748 IX86_BUILTIN_GATHER3SIV8DI);
33749
33750 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33751 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33752 IX86_BUILTIN_GATHER3DIV16SI);
33753
33754 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33755 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33756 IX86_BUILTIN_GATHER3DIV8DI);
33757
33758 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33759 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33760 IX86_BUILTIN_GATHER3ALTSIV8DF);
33761
33762 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33763 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33764 IX86_BUILTIN_GATHER3ALTDIV16SF);
33765
33766 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33767 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33768 IX86_BUILTIN_GATHER3ALTSIV8DI);
33769
33770 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33771 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33772 IX86_BUILTIN_GATHER3ALTDIV16SI);
33773
33774 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33775 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33776 IX86_BUILTIN_SCATTERSIV16SF);
33777
33778 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33779 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33780 IX86_BUILTIN_SCATTERSIV8DF);
33781
33782 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33783 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33784 IX86_BUILTIN_SCATTERDIV16SF);
33785
33786 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33787 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33788 IX86_BUILTIN_SCATTERDIV8DF);
33789
33790 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33791 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33792 IX86_BUILTIN_SCATTERSIV16SI);
33793
33794 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33795 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33796 IX86_BUILTIN_SCATTERSIV8DI);
33797
33798 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33799 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33800 IX86_BUILTIN_SCATTERDIV16SI);
33801
33802 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33803 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33804 IX86_BUILTIN_SCATTERDIV8DI);
33805
33806 /* AVX512VL */
33807 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33808 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33809 IX86_BUILTIN_GATHER3SIV2DF);
33810
33811 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33812 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33813 IX86_BUILTIN_GATHER3SIV4DF);
33814
33815 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33816 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33817 IX86_BUILTIN_GATHER3DIV2DF);
33818
33819 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33820 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33821 IX86_BUILTIN_GATHER3DIV4DF);
33822
33823 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33824 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33825 IX86_BUILTIN_GATHER3SIV4SF);
33826
33827 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33828 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33829 IX86_BUILTIN_GATHER3SIV8SF);
33830
33831 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33832 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33833 IX86_BUILTIN_GATHER3DIV4SF);
33834
33835 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33836 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33837 IX86_BUILTIN_GATHER3DIV8SF);
33838
33839 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33840 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33841 IX86_BUILTIN_GATHER3SIV2DI);
33842
33843 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33844 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33845 IX86_BUILTIN_GATHER3SIV4DI);
33846
33847 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33848 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33849 IX86_BUILTIN_GATHER3DIV2DI);
33850
33851 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33852 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33853 IX86_BUILTIN_GATHER3DIV4DI);
33854
33855 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33856 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33857 IX86_BUILTIN_GATHER3SIV4SI);
33858
33859 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33860 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33861 IX86_BUILTIN_GATHER3SIV8SI);
33862
33863 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33864 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33865 IX86_BUILTIN_GATHER3DIV4SI);
33866
33867 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33868 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33869 IX86_BUILTIN_GATHER3DIV8SI);
33870
33871 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33872 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33873 IX86_BUILTIN_GATHER3ALTSIV4DF);
33874
33875 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33876 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33877 IX86_BUILTIN_GATHER3ALTDIV8SF);
33878
33879 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33880 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33881 IX86_BUILTIN_GATHER3ALTSIV4DI);
33882
33883 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33884 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33885 IX86_BUILTIN_GATHER3ALTDIV8SI);
33886
33887 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33888 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33889 IX86_BUILTIN_SCATTERSIV8SF);
33890
33891 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33892 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33893 IX86_BUILTIN_SCATTERSIV4SF);
33894
33895 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33896 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33897 IX86_BUILTIN_SCATTERSIV4DF);
33898
33899 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33900 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33901 IX86_BUILTIN_SCATTERSIV2DF);
33902
33903 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33904 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33905 IX86_BUILTIN_SCATTERDIV8SF);
33906
33907 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33908 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33909 IX86_BUILTIN_SCATTERDIV4SF);
33910
33911 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33912 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33913 IX86_BUILTIN_SCATTERDIV4DF);
33914
33915 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33916 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33917 IX86_BUILTIN_SCATTERDIV2DF);
33918
33919 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33920 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33921 IX86_BUILTIN_SCATTERSIV8SI);
33922
33923 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33924 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33925 IX86_BUILTIN_SCATTERSIV4SI);
33926
33927 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33928 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33929 IX86_BUILTIN_SCATTERSIV4DI);
33930
33931 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33932 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33933 IX86_BUILTIN_SCATTERSIV2DI);
33934
33935 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33936 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33937 IX86_BUILTIN_SCATTERDIV8SI);
33938
33939 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33940 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33941 IX86_BUILTIN_SCATTERDIV4SI);
33942
33943 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33944 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33945 IX86_BUILTIN_SCATTERDIV4DI);
33946
33947 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33948 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33949 IX86_BUILTIN_SCATTERDIV2DI);
33950
33951 /* AVX512PF */
33952 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33953 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33954 IX86_BUILTIN_GATHERPFDPD);
33955 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33956 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33957 IX86_BUILTIN_GATHERPFDPS);
33958 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33959 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33960 IX86_BUILTIN_GATHERPFQPD);
33961 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33962 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33963 IX86_BUILTIN_GATHERPFQPS);
33964 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33965 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33966 IX86_BUILTIN_SCATTERPFDPD);
33967 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33968 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33969 IX86_BUILTIN_SCATTERPFDPS);
33970 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
33971 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33972 IX86_BUILTIN_SCATTERPFQPD);
33973 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
33974 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33975 IX86_BUILTIN_SCATTERPFQPS);
33976
33977 /* SHA */
33978 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
33979 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
33980 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
33981 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
33982 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
33983 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
33984 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
33985 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
33986 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
33987 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
33988 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
33989 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
33990 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
33991 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
33992
33993 /* RTM. */
33994 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
33995 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
33996
33997 /* MMX access to the vec_init patterns. */
33998 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
33999 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34000
34001 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34002 V4HI_FTYPE_HI_HI_HI_HI,
34003 IX86_BUILTIN_VEC_INIT_V4HI);
34004
34005 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34006 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34007 IX86_BUILTIN_VEC_INIT_V8QI);
34008
34009 /* Access to the vec_extract patterns. */
34010 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34011 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34012 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34013 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34014 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34015 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34016 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34017 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34018 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34019 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34020
34021 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34022 "__builtin_ia32_vec_ext_v4hi",
34023 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34024
34025 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34026 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34027
34028 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34029 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34030
34031 /* Access to the vec_set patterns. */
34032 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34033 "__builtin_ia32_vec_set_v2di",
34034 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34035
34036 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34037 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34038
34039 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34040 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34041
34042 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34043 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34044
34045 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34046 "__builtin_ia32_vec_set_v4hi",
34047 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34048
34049 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34050 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34051
34052 /* RDSEED */
34053 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34054 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34055 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34056 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34057 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34058 "__builtin_ia32_rdseed_di_step",
34059 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34060
34061 /* ADCX */
34062 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34063 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34064 def_builtin (OPTION_MASK_ISA_64BIT,
34065 "__builtin_ia32_addcarryx_u64",
34066 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34067 IX86_BUILTIN_ADDCARRYX64);
34068
34069 /* SBB */
34070 def_builtin (0, "__builtin_ia32_sbb_u32",
34071 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34072 def_builtin (OPTION_MASK_ISA_64BIT,
34073 "__builtin_ia32_sbb_u64",
34074 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34075 IX86_BUILTIN_SBB64);
34076
34077 /* Read/write FLAGS. */
34078 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34079 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34080 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34081 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34082 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34083 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34084 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34085 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34086
34087 /* CLFLUSHOPT. */
34088 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34089 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34090
34091 /* CLWB. */
34092 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34093 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34094
34095 /* Add FMA4 multi-arg argument instructions */
34096 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34097 {
34098 if (d->name == 0)
34099 continue;
34100
34101 ftype = (enum ix86_builtin_func_type) d->flag;
34102 def_builtin_const (d->mask, d->name, ftype, d->code);
34103 }
34104 }
34105
34106 static void
34107 ix86_init_mpx_builtins ()
34108 {
34109 const struct builtin_description * d;
34110 enum ix86_builtin_func_type ftype;
34111 tree decl;
34112 size_t i;
34113
34114 for (i = 0, d = bdesc_mpx;
34115 i < ARRAY_SIZE (bdesc_mpx);
34116 i++, d++)
34117 {
34118 if (d->name == 0)
34119 continue;
34120
34121 ftype = (enum ix86_builtin_func_type) d->flag;
34122 decl = def_builtin (d->mask, d->name, ftype, d->code);
34123
34124 /* With no leaf and nothrow flags for MPX builtins
34125 abnormal edges may follow its call when setjmp
34126 presents in the function. Since we may have a lot
34127 of MPX builtins calls it causes lots of useless
34128 edges and enormous PHI nodes. To avoid this we mark
34129 MPX builtins as leaf and nothrow. */
34130 if (decl)
34131 {
34132 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34133 NULL_TREE);
34134 TREE_NOTHROW (decl) = 1;
34135 }
34136 else
34137 {
34138 ix86_builtins_isa[(int)d->code].leaf_p = true;
34139 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34140 }
34141 }
34142
34143 for (i = 0, d = bdesc_mpx_const;
34144 i < ARRAY_SIZE (bdesc_mpx_const);
34145 i++, d++)
34146 {
34147 if (d->name == 0)
34148 continue;
34149
34150 ftype = (enum ix86_builtin_func_type) d->flag;
34151 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34152
34153 if (decl)
34154 {
34155 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34156 NULL_TREE);
34157 TREE_NOTHROW (decl) = 1;
34158 }
34159 else
34160 {
34161 ix86_builtins_isa[(int)d->code].leaf_p = true;
34162 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34163 }
34164 }
34165 }
34166
34167 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34168 to return a pointer to VERSION_DECL if the outcome of the expression
34169 formed by PREDICATE_CHAIN is true. This function will be called during
34170 version dispatch to decide which function version to execute. It returns
34171 the basic block at the end, to which more conditions can be added. */
34172
34173 static basic_block
34174 add_condition_to_bb (tree function_decl, tree version_decl,
34175 tree predicate_chain, basic_block new_bb)
34176 {
34177 gimple return_stmt;
34178 tree convert_expr, result_var;
34179 gimple convert_stmt;
34180 gimple call_cond_stmt;
34181 gimple if_else_stmt;
34182
34183 basic_block bb1, bb2, bb3;
34184 edge e12, e23;
34185
34186 tree cond_var, and_expr_var = NULL_TREE;
34187 gimple_seq gseq;
34188
34189 tree predicate_decl, predicate_arg;
34190
34191 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34192
34193 gcc_assert (new_bb != NULL);
34194 gseq = bb_seq (new_bb);
34195
34196
34197 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34198 build_fold_addr_expr (version_decl));
34199 result_var = create_tmp_var (ptr_type_node);
34200 convert_stmt = gimple_build_assign (result_var, convert_expr);
34201 return_stmt = gimple_build_return (result_var);
34202
34203 if (predicate_chain == NULL_TREE)
34204 {
34205 gimple_seq_add_stmt (&gseq, convert_stmt);
34206 gimple_seq_add_stmt (&gseq, return_stmt);
34207 set_bb_seq (new_bb, gseq);
34208 gimple_set_bb (convert_stmt, new_bb);
34209 gimple_set_bb (return_stmt, new_bb);
34210 pop_cfun ();
34211 return new_bb;
34212 }
34213
34214 while (predicate_chain != NULL)
34215 {
34216 cond_var = create_tmp_var (integer_type_node);
34217 predicate_decl = TREE_PURPOSE (predicate_chain);
34218 predicate_arg = TREE_VALUE (predicate_chain);
34219 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34220 gimple_call_set_lhs (call_cond_stmt, cond_var);
34221
34222 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34223 gimple_set_bb (call_cond_stmt, new_bb);
34224 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34225
34226 predicate_chain = TREE_CHAIN (predicate_chain);
34227
34228 if (and_expr_var == NULL)
34229 and_expr_var = cond_var;
34230 else
34231 {
34232 gimple assign_stmt;
34233 /* Use MIN_EXPR to check if any integer is zero?.
34234 and_expr_var = min_expr <cond_var, and_expr_var> */
34235 assign_stmt = gimple_build_assign (and_expr_var,
34236 build2 (MIN_EXPR, integer_type_node,
34237 cond_var, and_expr_var));
34238
34239 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34240 gimple_set_bb (assign_stmt, new_bb);
34241 gimple_seq_add_stmt (&gseq, assign_stmt);
34242 }
34243 }
34244
34245 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34246 integer_zero_node,
34247 NULL_TREE, NULL_TREE);
34248 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34249 gimple_set_bb (if_else_stmt, new_bb);
34250 gimple_seq_add_stmt (&gseq, if_else_stmt);
34251
34252 gimple_seq_add_stmt (&gseq, convert_stmt);
34253 gimple_seq_add_stmt (&gseq, return_stmt);
34254 set_bb_seq (new_bb, gseq);
34255
34256 bb1 = new_bb;
34257 e12 = split_block (bb1, if_else_stmt);
34258 bb2 = e12->dest;
34259 e12->flags &= ~EDGE_FALLTHRU;
34260 e12->flags |= EDGE_TRUE_VALUE;
34261
34262 e23 = split_block (bb2, return_stmt);
34263
34264 gimple_set_bb (convert_stmt, bb2);
34265 gimple_set_bb (return_stmt, bb2);
34266
34267 bb3 = e23->dest;
34268 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34269
34270 remove_edge (e23);
34271 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34272
34273 pop_cfun ();
34274
34275 return bb3;
34276 }
34277
34278 /* This parses the attribute arguments to target in DECL and determines
34279 the right builtin to use to match the platform specification.
34280 It returns the priority value for this version decl. If PREDICATE_LIST
34281 is not NULL, it stores the list of cpu features that need to be checked
34282 before dispatching this function. */
34283
34284 static unsigned int
34285 get_builtin_code_for_version (tree decl, tree *predicate_list)
34286 {
34287 tree attrs;
34288 struct cl_target_option cur_target;
34289 tree target_node;
34290 struct cl_target_option *new_target;
34291 const char *arg_str = NULL;
34292 const char *attrs_str = NULL;
34293 char *tok_str = NULL;
34294 char *token;
34295
34296 /* Priority of i386 features, greater value is higher priority. This is
34297 used to decide the order in which function dispatch must happen. For
34298 instance, a version specialized for SSE4.2 should be checked for dispatch
34299 before a version for SSE3, as SSE4.2 implies SSE3. */
34300 enum feature_priority
34301 {
34302 P_ZERO = 0,
34303 P_MMX,
34304 P_SSE,
34305 P_SSE2,
34306 P_SSE3,
34307 P_SSSE3,
34308 P_PROC_SSSE3,
34309 P_SSE4_A,
34310 P_PROC_SSE4_A,
34311 P_SSE4_1,
34312 P_SSE4_2,
34313 P_PROC_SSE4_2,
34314 P_POPCNT,
34315 P_AVX,
34316 P_PROC_AVX,
34317 P_BMI,
34318 P_PROC_BMI,
34319 P_FMA4,
34320 P_XOP,
34321 P_PROC_XOP,
34322 P_FMA,
34323 P_PROC_FMA,
34324 P_BMI2,
34325 P_AVX2,
34326 P_PROC_AVX2,
34327 P_AVX512F,
34328 P_PROC_AVX512F
34329 };
34330
34331 enum feature_priority priority = P_ZERO;
34332
34333 /* These are the target attribute strings for which a dispatcher is
34334 available, from fold_builtin_cpu. */
34335
34336 static struct _feature_list
34337 {
34338 const char *const name;
34339 const enum feature_priority priority;
34340 }
34341 const feature_list[] =
34342 {
34343 {"mmx", P_MMX},
34344 {"sse", P_SSE},
34345 {"sse2", P_SSE2},
34346 {"sse3", P_SSE3},
34347 {"sse4a", P_SSE4_A},
34348 {"ssse3", P_SSSE3},
34349 {"sse4.1", P_SSE4_1},
34350 {"sse4.2", P_SSE4_2},
34351 {"popcnt", P_POPCNT},
34352 {"avx", P_AVX},
34353 {"bmi", P_BMI},
34354 {"fma4", P_FMA4},
34355 {"xop", P_XOP},
34356 {"fma", P_FMA},
34357 {"bmi2", P_BMI2},
34358 {"avx2", P_AVX2},
34359 {"avx512f", P_AVX512F}
34360 };
34361
34362
34363 static unsigned int NUM_FEATURES
34364 = sizeof (feature_list) / sizeof (struct _feature_list);
34365
34366 unsigned int i;
34367
34368 tree predicate_chain = NULL_TREE;
34369 tree predicate_decl, predicate_arg;
34370
34371 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34372 gcc_assert (attrs != NULL);
34373
34374 attrs = TREE_VALUE (TREE_VALUE (attrs));
34375
34376 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34377 attrs_str = TREE_STRING_POINTER (attrs);
34378
34379 /* Return priority zero for default function. */
34380 if (strcmp (attrs_str, "default") == 0)
34381 return 0;
34382
34383 /* Handle arch= if specified. For priority, set it to be 1 more than
34384 the best instruction set the processor can handle. For instance, if
34385 there is a version for atom and a version for ssse3 (the highest ISA
34386 priority for atom), the atom version must be checked for dispatch
34387 before the ssse3 version. */
34388 if (strstr (attrs_str, "arch=") != NULL)
34389 {
34390 cl_target_option_save (&cur_target, &global_options);
34391 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34392 &global_options_set);
34393
34394 gcc_assert (target_node);
34395 new_target = TREE_TARGET_OPTION (target_node);
34396 gcc_assert (new_target);
34397
34398 if (new_target->arch_specified && new_target->arch > 0)
34399 {
34400 switch (new_target->arch)
34401 {
34402 case PROCESSOR_CORE2:
34403 arg_str = "core2";
34404 priority = P_PROC_SSSE3;
34405 break;
34406 case PROCESSOR_NEHALEM:
34407 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34408 arg_str = "westmere";
34409 else
34410 /* We translate "arch=corei7" and "arch=nehalem" to
34411 "corei7" so that it will be mapped to M_INTEL_COREI7
34412 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34413 arg_str = "corei7";
34414 priority = P_PROC_SSE4_2;
34415 break;
34416 case PROCESSOR_SANDYBRIDGE:
34417 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34418 arg_str = "ivybridge";
34419 else
34420 arg_str = "sandybridge";
34421 priority = P_PROC_AVX;
34422 break;
34423 case PROCESSOR_HASWELL:
34424 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34425 arg_str = "broadwell";
34426 else
34427 arg_str = "haswell";
34428 priority = P_PROC_AVX2;
34429 break;
34430 case PROCESSOR_BONNELL:
34431 arg_str = "bonnell";
34432 priority = P_PROC_SSSE3;
34433 break;
34434 case PROCESSOR_KNL:
34435 arg_str = "knl";
34436 priority = P_PROC_AVX512F;
34437 break;
34438 case PROCESSOR_SILVERMONT:
34439 arg_str = "silvermont";
34440 priority = P_PROC_SSE4_2;
34441 break;
34442 case PROCESSOR_AMDFAM10:
34443 arg_str = "amdfam10h";
34444 priority = P_PROC_SSE4_A;
34445 break;
34446 case PROCESSOR_BTVER1:
34447 arg_str = "btver1";
34448 priority = P_PROC_SSE4_A;
34449 break;
34450 case PROCESSOR_BTVER2:
34451 arg_str = "btver2";
34452 priority = P_PROC_BMI;
34453 break;
34454 case PROCESSOR_BDVER1:
34455 arg_str = "bdver1";
34456 priority = P_PROC_XOP;
34457 break;
34458 case PROCESSOR_BDVER2:
34459 arg_str = "bdver2";
34460 priority = P_PROC_FMA;
34461 break;
34462 case PROCESSOR_BDVER3:
34463 arg_str = "bdver3";
34464 priority = P_PROC_FMA;
34465 break;
34466 case PROCESSOR_BDVER4:
34467 arg_str = "bdver4";
34468 priority = P_PROC_AVX2;
34469 break;
34470 }
34471 }
34472
34473 cl_target_option_restore (&global_options, &cur_target);
34474
34475 if (predicate_list && arg_str == NULL)
34476 {
34477 error_at (DECL_SOURCE_LOCATION (decl),
34478 "No dispatcher found for the versioning attributes");
34479 return 0;
34480 }
34481
34482 if (predicate_list)
34483 {
34484 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34485 /* For a C string literal the length includes the trailing NULL. */
34486 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34487 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34488 predicate_chain);
34489 }
34490 }
34491
34492 /* Process feature name. */
34493 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34494 strcpy (tok_str, attrs_str);
34495 token = strtok (tok_str, ",");
34496 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34497
34498 while (token != NULL)
34499 {
34500 /* Do not process "arch=" */
34501 if (strncmp (token, "arch=", 5) == 0)
34502 {
34503 token = strtok (NULL, ",");
34504 continue;
34505 }
34506 for (i = 0; i < NUM_FEATURES; ++i)
34507 {
34508 if (strcmp (token, feature_list[i].name) == 0)
34509 {
34510 if (predicate_list)
34511 {
34512 predicate_arg = build_string_literal (
34513 strlen (feature_list[i].name) + 1,
34514 feature_list[i].name);
34515 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34516 predicate_chain);
34517 }
34518 /* Find the maximum priority feature. */
34519 if (feature_list[i].priority > priority)
34520 priority = feature_list[i].priority;
34521
34522 break;
34523 }
34524 }
34525 if (predicate_list && i == NUM_FEATURES)
34526 {
34527 error_at (DECL_SOURCE_LOCATION (decl),
34528 "No dispatcher found for %s", token);
34529 return 0;
34530 }
34531 token = strtok (NULL, ",");
34532 }
34533 free (tok_str);
34534
34535 if (predicate_list && predicate_chain == NULL_TREE)
34536 {
34537 error_at (DECL_SOURCE_LOCATION (decl),
34538 "No dispatcher found for the versioning attributes : %s",
34539 attrs_str);
34540 return 0;
34541 }
34542 else if (predicate_list)
34543 {
34544 predicate_chain = nreverse (predicate_chain);
34545 *predicate_list = predicate_chain;
34546 }
34547
34548 return priority;
34549 }
34550
34551 /* This compares the priority of target features in function DECL1
34552 and DECL2. It returns positive value if DECL1 is higher priority,
34553 negative value if DECL2 is higher priority and 0 if they are the
34554 same. */
34555
34556 static int
34557 ix86_compare_version_priority (tree decl1, tree decl2)
34558 {
34559 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34560 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34561
34562 return (int)priority1 - (int)priority2;
34563 }
34564
34565 /* V1 and V2 point to function versions with different priorities
34566 based on the target ISA. This function compares their priorities. */
34567
34568 static int
34569 feature_compare (const void *v1, const void *v2)
34570 {
34571 typedef struct _function_version_info
34572 {
34573 tree version_decl;
34574 tree predicate_chain;
34575 unsigned int dispatch_priority;
34576 } function_version_info;
34577
34578 const function_version_info c1 = *(const function_version_info *)v1;
34579 const function_version_info c2 = *(const function_version_info *)v2;
34580 return (c2.dispatch_priority - c1.dispatch_priority);
34581 }
34582
34583 /* This function generates the dispatch function for
34584 multi-versioned functions. DISPATCH_DECL is the function which will
34585 contain the dispatch logic. FNDECLS are the function choices for
34586 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34587 in DISPATCH_DECL in which the dispatch code is generated. */
34588
34589 static int
34590 dispatch_function_versions (tree dispatch_decl,
34591 void *fndecls_p,
34592 basic_block *empty_bb)
34593 {
34594 tree default_decl;
34595 gimple ifunc_cpu_init_stmt;
34596 gimple_seq gseq;
34597 int ix;
34598 tree ele;
34599 vec<tree> *fndecls;
34600 unsigned int num_versions = 0;
34601 unsigned int actual_versions = 0;
34602 unsigned int i;
34603
34604 struct _function_version_info
34605 {
34606 tree version_decl;
34607 tree predicate_chain;
34608 unsigned int dispatch_priority;
34609 }*function_version_info;
34610
34611 gcc_assert (dispatch_decl != NULL
34612 && fndecls_p != NULL
34613 && empty_bb != NULL);
34614
34615 /*fndecls_p is actually a vector. */
34616 fndecls = static_cast<vec<tree> *> (fndecls_p);
34617
34618 /* At least one more version other than the default. */
34619 num_versions = fndecls->length ();
34620 gcc_assert (num_versions >= 2);
34621
34622 function_version_info = (struct _function_version_info *)
34623 XNEWVEC (struct _function_version_info, (num_versions - 1));
34624
34625 /* The first version in the vector is the default decl. */
34626 default_decl = (*fndecls)[0];
34627
34628 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34629
34630 gseq = bb_seq (*empty_bb);
34631 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34632 constructors, so explicity call __builtin_cpu_init here. */
34633 ifunc_cpu_init_stmt = gimple_build_call_vec (
34634 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34635 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34636 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34637 set_bb_seq (*empty_bb, gseq);
34638
34639 pop_cfun ();
34640
34641
34642 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34643 {
34644 tree version_decl = ele;
34645 tree predicate_chain = NULL_TREE;
34646 unsigned int priority;
34647 /* Get attribute string, parse it and find the right predicate decl.
34648 The predicate function could be a lengthy combination of many
34649 features, like arch-type and various isa-variants. */
34650 priority = get_builtin_code_for_version (version_decl,
34651 &predicate_chain);
34652
34653 if (predicate_chain == NULL_TREE)
34654 continue;
34655
34656 function_version_info [actual_versions].version_decl = version_decl;
34657 function_version_info [actual_versions].predicate_chain
34658 = predicate_chain;
34659 function_version_info [actual_versions].dispatch_priority = priority;
34660 actual_versions++;
34661 }
34662
34663 /* Sort the versions according to descending order of dispatch priority. The
34664 priority is based on the ISA. This is not a perfect solution. There
34665 could still be ambiguity. If more than one function version is suitable
34666 to execute, which one should be dispatched? In future, allow the user
34667 to specify a dispatch priority next to the version. */
34668 qsort (function_version_info, actual_versions,
34669 sizeof (struct _function_version_info), feature_compare);
34670
34671 for (i = 0; i < actual_versions; ++i)
34672 *empty_bb = add_condition_to_bb (dispatch_decl,
34673 function_version_info[i].version_decl,
34674 function_version_info[i].predicate_chain,
34675 *empty_bb);
34676
34677 /* dispatch default version at the end. */
34678 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34679 NULL, *empty_bb);
34680
34681 free (function_version_info);
34682 return 0;
34683 }
34684
34685 /* Comparator function to be used in qsort routine to sort attribute
34686 specification strings to "target". */
34687
34688 static int
34689 attr_strcmp (const void *v1, const void *v2)
34690 {
34691 const char *c1 = *(char *const*)v1;
34692 const char *c2 = *(char *const*)v2;
34693 return strcmp (c1, c2);
34694 }
34695
34696 /* ARGLIST is the argument to target attribute. This function tokenizes
34697 the comma separated arguments, sorts them and returns a string which
34698 is a unique identifier for the comma separated arguments. It also
34699 replaces non-identifier characters "=,-" with "_". */
34700
34701 static char *
34702 sorted_attr_string (tree arglist)
34703 {
34704 tree arg;
34705 size_t str_len_sum = 0;
34706 char **args = NULL;
34707 char *attr_str, *ret_str;
34708 char *attr = NULL;
34709 unsigned int argnum = 1;
34710 unsigned int i;
34711
34712 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34713 {
34714 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34715 size_t len = strlen (str);
34716 str_len_sum += len + 1;
34717 if (arg != arglist)
34718 argnum++;
34719 for (i = 0; i < strlen (str); i++)
34720 if (str[i] == ',')
34721 argnum++;
34722 }
34723
34724 attr_str = XNEWVEC (char, str_len_sum);
34725 str_len_sum = 0;
34726 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34727 {
34728 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34729 size_t len = strlen (str);
34730 memcpy (attr_str + str_len_sum, str, len);
34731 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34732 str_len_sum += len + 1;
34733 }
34734
34735 /* Replace "=,-" with "_". */
34736 for (i = 0; i < strlen (attr_str); i++)
34737 if (attr_str[i] == '=' || attr_str[i]== '-')
34738 attr_str[i] = '_';
34739
34740 if (argnum == 1)
34741 return attr_str;
34742
34743 args = XNEWVEC (char *, argnum);
34744
34745 i = 0;
34746 attr = strtok (attr_str, ",");
34747 while (attr != NULL)
34748 {
34749 args[i] = attr;
34750 i++;
34751 attr = strtok (NULL, ",");
34752 }
34753
34754 qsort (args, argnum, sizeof (char *), attr_strcmp);
34755
34756 ret_str = XNEWVEC (char, str_len_sum);
34757 str_len_sum = 0;
34758 for (i = 0; i < argnum; i++)
34759 {
34760 size_t len = strlen (args[i]);
34761 memcpy (ret_str + str_len_sum, args[i], len);
34762 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34763 str_len_sum += len + 1;
34764 }
34765
34766 XDELETEVEC (args);
34767 XDELETEVEC (attr_str);
34768 return ret_str;
34769 }
34770
34771 /* This function changes the assembler name for functions that are
34772 versions. If DECL is a function version and has a "target"
34773 attribute, it appends the attribute string to its assembler name. */
34774
34775 static tree
34776 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34777 {
34778 tree version_attr;
34779 const char *orig_name, *version_string;
34780 char *attr_str, *assembler_name;
34781
34782 if (DECL_DECLARED_INLINE_P (decl)
34783 && lookup_attribute ("gnu_inline",
34784 DECL_ATTRIBUTES (decl)))
34785 error_at (DECL_SOURCE_LOCATION (decl),
34786 "Function versions cannot be marked as gnu_inline,"
34787 " bodies have to be generated");
34788
34789 if (DECL_VIRTUAL_P (decl)
34790 || DECL_VINDEX (decl))
34791 sorry ("Virtual function multiversioning not supported");
34792
34793 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34794
34795 /* target attribute string cannot be NULL. */
34796 gcc_assert (version_attr != NULL_TREE);
34797
34798 orig_name = IDENTIFIER_POINTER (id);
34799 version_string
34800 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34801
34802 if (strcmp (version_string, "default") == 0)
34803 return id;
34804
34805 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34806 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34807
34808 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34809
34810 /* Allow assembler name to be modified if already set. */
34811 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34812 SET_DECL_RTL (decl, NULL);
34813
34814 tree ret = get_identifier (assembler_name);
34815 XDELETEVEC (attr_str);
34816 XDELETEVEC (assembler_name);
34817 return ret;
34818 }
34819
34820 /* This function returns true if FN1 and FN2 are versions of the same function,
34821 that is, the target strings of the function decls are different. This assumes
34822 that FN1 and FN2 have the same signature. */
34823
34824 static bool
34825 ix86_function_versions (tree fn1, tree fn2)
34826 {
34827 tree attr1, attr2;
34828 char *target1, *target2;
34829 bool result;
34830
34831 if (TREE_CODE (fn1) != FUNCTION_DECL
34832 || TREE_CODE (fn2) != FUNCTION_DECL)
34833 return false;
34834
34835 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34836 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34837
34838 /* At least one function decl should have the target attribute specified. */
34839 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34840 return false;
34841
34842 /* Diagnose missing target attribute if one of the decls is already
34843 multi-versioned. */
34844 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34845 {
34846 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34847 {
34848 if (attr2 != NULL_TREE)
34849 {
34850 tree tem = fn1;
34851 fn1 = fn2;
34852 fn2 = tem;
34853 attr1 = attr2;
34854 }
34855 error_at (DECL_SOURCE_LOCATION (fn2),
34856 "missing %<target%> attribute for multi-versioned %D",
34857 fn2);
34858 inform (DECL_SOURCE_LOCATION (fn1),
34859 "previous declaration of %D", fn1);
34860 /* Prevent diagnosing of the same error multiple times. */
34861 DECL_ATTRIBUTES (fn2)
34862 = tree_cons (get_identifier ("target"),
34863 copy_node (TREE_VALUE (attr1)),
34864 DECL_ATTRIBUTES (fn2));
34865 }
34866 return false;
34867 }
34868
34869 target1 = sorted_attr_string (TREE_VALUE (attr1));
34870 target2 = sorted_attr_string (TREE_VALUE (attr2));
34871
34872 /* The sorted target strings must be different for fn1 and fn2
34873 to be versions. */
34874 if (strcmp (target1, target2) == 0)
34875 result = false;
34876 else
34877 result = true;
34878
34879 XDELETEVEC (target1);
34880 XDELETEVEC (target2);
34881
34882 return result;
34883 }
34884
34885 static tree
34886 ix86_mangle_decl_assembler_name (tree decl, tree id)
34887 {
34888 /* For function version, add the target suffix to the assembler name. */
34889 if (TREE_CODE (decl) == FUNCTION_DECL
34890 && DECL_FUNCTION_VERSIONED (decl))
34891 id = ix86_mangle_function_version_assembler_name (decl, id);
34892 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34893 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34894 #endif
34895
34896 return id;
34897 }
34898
34899 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34900 is true, append the full path name of the source file. */
34901
34902 static char *
34903 make_name (tree decl, const char *suffix, bool make_unique)
34904 {
34905 char *global_var_name;
34906 int name_len;
34907 const char *name;
34908 const char *unique_name = NULL;
34909
34910 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34911
34912 /* Get a unique name that can be used globally without any chances
34913 of collision at link time. */
34914 if (make_unique)
34915 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34916
34917 name_len = strlen (name) + strlen (suffix) + 2;
34918
34919 if (make_unique)
34920 name_len += strlen (unique_name) + 1;
34921 global_var_name = XNEWVEC (char, name_len);
34922
34923 /* Use '.' to concatenate names as it is demangler friendly. */
34924 if (make_unique)
34925 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34926 suffix);
34927 else
34928 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34929
34930 return global_var_name;
34931 }
34932
34933 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34934
34935 /* Make a dispatcher declaration for the multi-versioned function DECL.
34936 Calls to DECL function will be replaced with calls to the dispatcher
34937 by the front-end. Return the decl created. */
34938
34939 static tree
34940 make_dispatcher_decl (const tree decl)
34941 {
34942 tree func_decl;
34943 char *func_name;
34944 tree fn_type, func_type;
34945 bool is_uniq = false;
34946
34947 if (TREE_PUBLIC (decl) == 0)
34948 is_uniq = true;
34949
34950 func_name = make_name (decl, "ifunc", is_uniq);
34951
34952 fn_type = TREE_TYPE (decl);
34953 func_type = build_function_type (TREE_TYPE (fn_type),
34954 TYPE_ARG_TYPES (fn_type));
34955
34956 func_decl = build_fn_decl (func_name, func_type);
34957 XDELETEVEC (func_name);
34958 TREE_USED (func_decl) = 1;
34959 DECL_CONTEXT (func_decl) = NULL_TREE;
34960 DECL_INITIAL (func_decl) = error_mark_node;
34961 DECL_ARTIFICIAL (func_decl) = 1;
34962 /* Mark this func as external, the resolver will flip it again if
34963 it gets generated. */
34964 DECL_EXTERNAL (func_decl) = 1;
34965 /* This will be of type IFUNCs have to be externally visible. */
34966 TREE_PUBLIC (func_decl) = 1;
34967
34968 return func_decl;
34969 }
34970
34971 #endif
34972
34973 /* Returns true if decl is multi-versioned and DECL is the default function,
34974 that is it is not tagged with target specific optimization. */
34975
34976 static bool
34977 is_function_default_version (const tree decl)
34978 {
34979 if (TREE_CODE (decl) != FUNCTION_DECL
34980 || !DECL_FUNCTION_VERSIONED (decl))
34981 return false;
34982 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34983 gcc_assert (attr);
34984 attr = TREE_VALUE (TREE_VALUE (attr));
34985 return (TREE_CODE (attr) == STRING_CST
34986 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
34987 }
34988
34989 /* Make a dispatcher declaration for the multi-versioned function DECL.
34990 Calls to DECL function will be replaced with calls to the dispatcher
34991 by the front-end. Returns the decl of the dispatcher function. */
34992
34993 static tree
34994 ix86_get_function_versions_dispatcher (void *decl)
34995 {
34996 tree fn = (tree) decl;
34997 struct cgraph_node *node = NULL;
34998 struct cgraph_node *default_node = NULL;
34999 struct cgraph_function_version_info *node_v = NULL;
35000 struct cgraph_function_version_info *first_v = NULL;
35001
35002 tree dispatch_decl = NULL;
35003
35004 struct cgraph_function_version_info *default_version_info = NULL;
35005
35006 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35007
35008 node = cgraph_node::get (fn);
35009 gcc_assert (node != NULL);
35010
35011 node_v = node->function_version ();
35012 gcc_assert (node_v != NULL);
35013
35014 if (node_v->dispatcher_resolver != NULL)
35015 return node_v->dispatcher_resolver;
35016
35017 /* Find the default version and make it the first node. */
35018 first_v = node_v;
35019 /* Go to the beginning of the chain. */
35020 while (first_v->prev != NULL)
35021 first_v = first_v->prev;
35022 default_version_info = first_v;
35023 while (default_version_info != NULL)
35024 {
35025 if (is_function_default_version
35026 (default_version_info->this_node->decl))
35027 break;
35028 default_version_info = default_version_info->next;
35029 }
35030
35031 /* If there is no default node, just return NULL. */
35032 if (default_version_info == NULL)
35033 return NULL;
35034
35035 /* Make default info the first node. */
35036 if (first_v != default_version_info)
35037 {
35038 default_version_info->prev->next = default_version_info->next;
35039 if (default_version_info->next)
35040 default_version_info->next->prev = default_version_info->prev;
35041 first_v->prev = default_version_info;
35042 default_version_info->next = first_v;
35043 default_version_info->prev = NULL;
35044 }
35045
35046 default_node = default_version_info->this_node;
35047
35048 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35049 if (targetm.has_ifunc_p ())
35050 {
35051 struct cgraph_function_version_info *it_v = NULL;
35052 struct cgraph_node *dispatcher_node = NULL;
35053 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35054
35055 /* Right now, the dispatching is done via ifunc. */
35056 dispatch_decl = make_dispatcher_decl (default_node->decl);
35057
35058 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35059 gcc_assert (dispatcher_node != NULL);
35060 dispatcher_node->dispatcher_function = 1;
35061 dispatcher_version_info
35062 = dispatcher_node->insert_new_function_version ();
35063 dispatcher_version_info->next = default_version_info;
35064 dispatcher_node->definition = 1;
35065
35066 /* Set the dispatcher for all the versions. */
35067 it_v = default_version_info;
35068 while (it_v != NULL)
35069 {
35070 it_v->dispatcher_resolver = dispatch_decl;
35071 it_v = it_v->next;
35072 }
35073 }
35074 else
35075 #endif
35076 {
35077 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35078 "multiversioning needs ifunc which is not supported "
35079 "on this target");
35080 }
35081
35082 return dispatch_decl;
35083 }
35084
35085 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35086 it to CHAIN. */
35087
35088 static tree
35089 make_attribute (const char *name, const char *arg_name, tree chain)
35090 {
35091 tree attr_name;
35092 tree attr_arg_name;
35093 tree attr_args;
35094 tree attr;
35095
35096 attr_name = get_identifier (name);
35097 attr_arg_name = build_string (strlen (arg_name), arg_name);
35098 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35099 attr = tree_cons (attr_name, attr_args, chain);
35100 return attr;
35101 }
35102
35103 /* Make the resolver function decl to dispatch the versions of
35104 a multi-versioned function, DEFAULT_DECL. Create an
35105 empty basic block in the resolver and store the pointer in
35106 EMPTY_BB. Return the decl of the resolver function. */
35107
35108 static tree
35109 make_resolver_func (const tree default_decl,
35110 const tree dispatch_decl,
35111 basic_block *empty_bb)
35112 {
35113 char *resolver_name;
35114 tree decl, type, decl_name, t;
35115 bool is_uniq = false;
35116
35117 /* IFUNC's have to be globally visible. So, if the default_decl is
35118 not, then the name of the IFUNC should be made unique. */
35119 if (TREE_PUBLIC (default_decl) == 0)
35120 is_uniq = true;
35121
35122 /* Append the filename to the resolver function if the versions are
35123 not externally visible. This is because the resolver function has
35124 to be externally visible for the loader to find it. So, appending
35125 the filename will prevent conflicts with a resolver function from
35126 another module which is based on the same version name. */
35127 resolver_name = make_name (default_decl, "resolver", is_uniq);
35128
35129 /* The resolver function should return a (void *). */
35130 type = build_function_type_list (ptr_type_node, NULL_TREE);
35131
35132 decl = build_fn_decl (resolver_name, type);
35133 decl_name = get_identifier (resolver_name);
35134 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35135
35136 DECL_NAME (decl) = decl_name;
35137 TREE_USED (decl) = 1;
35138 DECL_ARTIFICIAL (decl) = 1;
35139 DECL_IGNORED_P (decl) = 0;
35140 /* IFUNC resolvers have to be externally visible. */
35141 TREE_PUBLIC (decl) = 1;
35142 DECL_UNINLINABLE (decl) = 1;
35143
35144 /* Resolver is not external, body is generated. */
35145 DECL_EXTERNAL (decl) = 0;
35146 DECL_EXTERNAL (dispatch_decl) = 0;
35147
35148 DECL_CONTEXT (decl) = NULL_TREE;
35149 DECL_INITIAL (decl) = make_node (BLOCK);
35150 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35151
35152 if (DECL_COMDAT_GROUP (default_decl)
35153 || TREE_PUBLIC (default_decl))
35154 {
35155 /* In this case, each translation unit with a call to this
35156 versioned function will put out a resolver. Ensure it
35157 is comdat to keep just one copy. */
35158 DECL_COMDAT (decl) = 1;
35159 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35160 }
35161 /* Build result decl and add to function_decl. */
35162 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35163 DECL_ARTIFICIAL (t) = 1;
35164 DECL_IGNORED_P (t) = 1;
35165 DECL_RESULT (decl) = t;
35166
35167 gimplify_function_tree (decl);
35168 push_cfun (DECL_STRUCT_FUNCTION (decl));
35169 *empty_bb = init_lowered_empty_function (decl, false, 0);
35170
35171 cgraph_node::add_new_function (decl, true);
35172 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35173
35174 pop_cfun ();
35175
35176 gcc_assert (dispatch_decl != NULL);
35177 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35178 DECL_ATTRIBUTES (dispatch_decl)
35179 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35180
35181 /* Create the alias for dispatch to resolver here. */
35182 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35183 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35184 XDELETEVEC (resolver_name);
35185 return decl;
35186 }
35187
35188 /* Generate the dispatching code body to dispatch multi-versioned function
35189 DECL. The target hook is called to process the "target" attributes and
35190 provide the code to dispatch the right function at run-time. NODE points
35191 to the dispatcher decl whose body will be created. */
35192
35193 static tree
35194 ix86_generate_version_dispatcher_body (void *node_p)
35195 {
35196 tree resolver_decl;
35197 basic_block empty_bb;
35198 tree default_ver_decl;
35199 struct cgraph_node *versn;
35200 struct cgraph_node *node;
35201
35202 struct cgraph_function_version_info *node_version_info = NULL;
35203 struct cgraph_function_version_info *versn_info = NULL;
35204
35205 node = (cgraph_node *)node_p;
35206
35207 node_version_info = node->function_version ();
35208 gcc_assert (node->dispatcher_function
35209 && node_version_info != NULL);
35210
35211 if (node_version_info->dispatcher_resolver)
35212 return node_version_info->dispatcher_resolver;
35213
35214 /* The first version in the chain corresponds to the default version. */
35215 default_ver_decl = node_version_info->next->this_node->decl;
35216
35217 /* node is going to be an alias, so remove the finalized bit. */
35218 node->definition = false;
35219
35220 resolver_decl = make_resolver_func (default_ver_decl,
35221 node->decl, &empty_bb);
35222
35223 node_version_info->dispatcher_resolver = resolver_decl;
35224
35225 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35226
35227 auto_vec<tree, 2> fn_ver_vec;
35228
35229 for (versn_info = node_version_info->next; versn_info;
35230 versn_info = versn_info->next)
35231 {
35232 versn = versn_info->this_node;
35233 /* Check for virtual functions here again, as by this time it should
35234 have been determined if this function needs a vtable index or
35235 not. This happens for methods in derived classes that override
35236 virtual methods in base classes but are not explicitly marked as
35237 virtual. */
35238 if (DECL_VINDEX (versn->decl))
35239 sorry ("Virtual function multiversioning not supported");
35240
35241 fn_ver_vec.safe_push (versn->decl);
35242 }
35243
35244 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35245 cgraph_edge::rebuild_edges ();
35246 pop_cfun ();
35247 return resolver_decl;
35248 }
35249 /* This builds the processor_model struct type defined in
35250 libgcc/config/i386/cpuinfo.c */
35251
35252 static tree
35253 build_processor_model_struct (void)
35254 {
35255 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35256 "__cpu_features"};
35257 tree field = NULL_TREE, field_chain = NULL_TREE;
35258 int i;
35259 tree type = make_node (RECORD_TYPE);
35260
35261 /* The first 3 fields are unsigned int. */
35262 for (i = 0; i < 3; ++i)
35263 {
35264 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35265 get_identifier (field_name[i]), unsigned_type_node);
35266 if (field_chain != NULL_TREE)
35267 DECL_CHAIN (field) = field_chain;
35268 field_chain = field;
35269 }
35270
35271 /* The last field is an array of unsigned integers of size one. */
35272 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35273 get_identifier (field_name[3]),
35274 build_array_type (unsigned_type_node,
35275 build_index_type (size_one_node)));
35276 if (field_chain != NULL_TREE)
35277 DECL_CHAIN (field) = field_chain;
35278 field_chain = field;
35279
35280 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35281 return type;
35282 }
35283
35284 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35285
35286 static tree
35287 make_var_decl (tree type, const char *name)
35288 {
35289 tree new_decl;
35290
35291 new_decl = build_decl (UNKNOWN_LOCATION,
35292 VAR_DECL,
35293 get_identifier(name),
35294 type);
35295
35296 DECL_EXTERNAL (new_decl) = 1;
35297 TREE_STATIC (new_decl) = 1;
35298 TREE_PUBLIC (new_decl) = 1;
35299 DECL_INITIAL (new_decl) = 0;
35300 DECL_ARTIFICIAL (new_decl) = 0;
35301 DECL_PRESERVE_P (new_decl) = 1;
35302
35303 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35304 assemble_variable (new_decl, 0, 0, 0);
35305
35306 return new_decl;
35307 }
35308
35309 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35310 into an integer defined in libgcc/config/i386/cpuinfo.c */
35311
35312 static tree
35313 fold_builtin_cpu (tree fndecl, tree *args)
35314 {
35315 unsigned int i;
35316 enum ix86_builtins fn_code = (enum ix86_builtins)
35317 DECL_FUNCTION_CODE (fndecl);
35318 tree param_string_cst = NULL;
35319
35320 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35321 enum processor_features
35322 {
35323 F_CMOV = 0,
35324 F_MMX,
35325 F_POPCNT,
35326 F_SSE,
35327 F_SSE2,
35328 F_SSE3,
35329 F_SSSE3,
35330 F_SSE4_1,
35331 F_SSE4_2,
35332 F_AVX,
35333 F_AVX2,
35334 F_SSE4_A,
35335 F_FMA4,
35336 F_XOP,
35337 F_FMA,
35338 F_AVX512F,
35339 F_BMI,
35340 F_BMI2,
35341 F_MAX
35342 };
35343
35344 /* These are the values for vendor types and cpu types and subtypes
35345 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35346 the corresponding start value. */
35347 enum processor_model
35348 {
35349 M_INTEL = 1,
35350 M_AMD,
35351 M_CPU_TYPE_START,
35352 M_INTEL_BONNELL,
35353 M_INTEL_CORE2,
35354 M_INTEL_COREI7,
35355 M_AMDFAM10H,
35356 M_AMDFAM15H,
35357 M_INTEL_SILVERMONT,
35358 M_INTEL_KNL,
35359 M_AMD_BTVER1,
35360 M_AMD_BTVER2,
35361 M_CPU_SUBTYPE_START,
35362 M_INTEL_COREI7_NEHALEM,
35363 M_INTEL_COREI7_WESTMERE,
35364 M_INTEL_COREI7_SANDYBRIDGE,
35365 M_AMDFAM10H_BARCELONA,
35366 M_AMDFAM10H_SHANGHAI,
35367 M_AMDFAM10H_ISTANBUL,
35368 M_AMDFAM15H_BDVER1,
35369 M_AMDFAM15H_BDVER2,
35370 M_AMDFAM15H_BDVER3,
35371 M_AMDFAM15H_BDVER4,
35372 M_INTEL_COREI7_IVYBRIDGE,
35373 M_INTEL_COREI7_HASWELL,
35374 M_INTEL_COREI7_BROADWELL
35375 };
35376
35377 static struct _arch_names_table
35378 {
35379 const char *const name;
35380 const enum processor_model model;
35381 }
35382 const arch_names_table[] =
35383 {
35384 {"amd", M_AMD},
35385 {"intel", M_INTEL},
35386 {"atom", M_INTEL_BONNELL},
35387 {"slm", M_INTEL_SILVERMONT},
35388 {"core2", M_INTEL_CORE2},
35389 {"corei7", M_INTEL_COREI7},
35390 {"nehalem", M_INTEL_COREI7_NEHALEM},
35391 {"westmere", M_INTEL_COREI7_WESTMERE},
35392 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35393 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35394 {"haswell", M_INTEL_COREI7_HASWELL},
35395 {"broadwell", M_INTEL_COREI7_BROADWELL},
35396 {"bonnell", M_INTEL_BONNELL},
35397 {"silvermont", M_INTEL_SILVERMONT},
35398 {"knl", M_INTEL_KNL},
35399 {"amdfam10h", M_AMDFAM10H},
35400 {"barcelona", M_AMDFAM10H_BARCELONA},
35401 {"shanghai", M_AMDFAM10H_SHANGHAI},
35402 {"istanbul", M_AMDFAM10H_ISTANBUL},
35403 {"btver1", M_AMD_BTVER1},
35404 {"amdfam15h", M_AMDFAM15H},
35405 {"bdver1", M_AMDFAM15H_BDVER1},
35406 {"bdver2", M_AMDFAM15H_BDVER2},
35407 {"bdver3", M_AMDFAM15H_BDVER3},
35408 {"bdver4", M_AMDFAM15H_BDVER4},
35409 {"btver2", M_AMD_BTVER2},
35410 };
35411
35412 static struct _isa_names_table
35413 {
35414 const char *const name;
35415 const enum processor_features feature;
35416 }
35417 const isa_names_table[] =
35418 {
35419 {"cmov", F_CMOV},
35420 {"mmx", F_MMX},
35421 {"popcnt", F_POPCNT},
35422 {"sse", F_SSE},
35423 {"sse2", F_SSE2},
35424 {"sse3", F_SSE3},
35425 {"ssse3", F_SSSE3},
35426 {"sse4a", F_SSE4_A},
35427 {"sse4.1", F_SSE4_1},
35428 {"sse4.2", F_SSE4_2},
35429 {"avx", F_AVX},
35430 {"fma4", F_FMA4},
35431 {"xop", F_XOP},
35432 {"fma", F_FMA},
35433 {"avx2", F_AVX2},
35434 {"avx512f",F_AVX512F},
35435 {"bmi", F_BMI},
35436 {"bmi2", F_BMI2}
35437 };
35438
35439 tree __processor_model_type = build_processor_model_struct ();
35440 tree __cpu_model_var = make_var_decl (__processor_model_type,
35441 "__cpu_model");
35442
35443
35444 varpool_node::add (__cpu_model_var);
35445
35446 gcc_assert ((args != NULL) && (*args != NULL));
35447
35448 param_string_cst = *args;
35449 while (param_string_cst
35450 && TREE_CODE (param_string_cst) != STRING_CST)
35451 {
35452 /* *args must be a expr that can contain other EXPRS leading to a
35453 STRING_CST. */
35454 if (!EXPR_P (param_string_cst))
35455 {
35456 error ("Parameter to builtin must be a string constant or literal");
35457 return integer_zero_node;
35458 }
35459 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35460 }
35461
35462 gcc_assert (param_string_cst);
35463
35464 if (fn_code == IX86_BUILTIN_CPU_IS)
35465 {
35466 tree ref;
35467 tree field;
35468 tree final;
35469
35470 unsigned int field_val = 0;
35471 unsigned int NUM_ARCH_NAMES
35472 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35473
35474 for (i = 0; i < NUM_ARCH_NAMES; i++)
35475 if (strcmp (arch_names_table[i].name,
35476 TREE_STRING_POINTER (param_string_cst)) == 0)
35477 break;
35478
35479 if (i == NUM_ARCH_NAMES)
35480 {
35481 error ("Parameter to builtin not valid: %s",
35482 TREE_STRING_POINTER (param_string_cst));
35483 return integer_zero_node;
35484 }
35485
35486 field = TYPE_FIELDS (__processor_model_type);
35487 field_val = arch_names_table[i].model;
35488
35489 /* CPU types are stored in the next field. */
35490 if (field_val > M_CPU_TYPE_START
35491 && field_val < M_CPU_SUBTYPE_START)
35492 {
35493 field = DECL_CHAIN (field);
35494 field_val -= M_CPU_TYPE_START;
35495 }
35496
35497 /* CPU subtypes are stored in the next field. */
35498 if (field_val > M_CPU_SUBTYPE_START)
35499 {
35500 field = DECL_CHAIN ( DECL_CHAIN (field));
35501 field_val -= M_CPU_SUBTYPE_START;
35502 }
35503
35504 /* Get the appropriate field in __cpu_model. */
35505 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35506 field, NULL_TREE);
35507
35508 /* Check the value. */
35509 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35510 build_int_cstu (unsigned_type_node, field_val));
35511 return build1 (CONVERT_EXPR, integer_type_node, final);
35512 }
35513 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35514 {
35515 tree ref;
35516 tree array_elt;
35517 tree field;
35518 tree final;
35519
35520 unsigned int field_val = 0;
35521 unsigned int NUM_ISA_NAMES
35522 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35523
35524 for (i = 0; i < NUM_ISA_NAMES; i++)
35525 if (strcmp (isa_names_table[i].name,
35526 TREE_STRING_POINTER (param_string_cst)) == 0)
35527 break;
35528
35529 if (i == NUM_ISA_NAMES)
35530 {
35531 error ("Parameter to builtin not valid: %s",
35532 TREE_STRING_POINTER (param_string_cst));
35533 return integer_zero_node;
35534 }
35535
35536 field = TYPE_FIELDS (__processor_model_type);
35537 /* Get the last field, which is __cpu_features. */
35538 while (DECL_CHAIN (field))
35539 field = DECL_CHAIN (field);
35540
35541 /* Get the appropriate field: __cpu_model.__cpu_features */
35542 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35543 field, NULL_TREE);
35544
35545 /* Access the 0th element of __cpu_features array. */
35546 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35547 integer_zero_node, NULL_TREE, NULL_TREE);
35548
35549 field_val = (1 << isa_names_table[i].feature);
35550 /* Return __cpu_model.__cpu_features[0] & field_val */
35551 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35552 build_int_cstu (unsigned_type_node, field_val));
35553 return build1 (CONVERT_EXPR, integer_type_node, final);
35554 }
35555 gcc_unreachable ();
35556 }
35557
35558 static tree
35559 ix86_fold_builtin (tree fndecl, int n_args,
35560 tree *args, bool ignore ATTRIBUTE_UNUSED)
35561 {
35562 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35563 {
35564 enum ix86_builtins fn_code = (enum ix86_builtins)
35565 DECL_FUNCTION_CODE (fndecl);
35566 if (fn_code == IX86_BUILTIN_CPU_IS
35567 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35568 {
35569 gcc_assert (n_args == 1);
35570 return fold_builtin_cpu (fndecl, args);
35571 }
35572 }
35573
35574 #ifdef SUBTARGET_FOLD_BUILTIN
35575 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35576 #endif
35577
35578 return NULL_TREE;
35579 }
35580
35581 /* Make builtins to detect cpu type and features supported. NAME is
35582 the builtin name, CODE is the builtin code, and FTYPE is the function
35583 type of the builtin. */
35584
35585 static void
35586 make_cpu_type_builtin (const char* name, int code,
35587 enum ix86_builtin_func_type ftype, bool is_const)
35588 {
35589 tree decl;
35590 tree type;
35591
35592 type = ix86_get_builtin_func_type (ftype);
35593 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35594 NULL, NULL_TREE);
35595 gcc_assert (decl != NULL_TREE);
35596 ix86_builtins[(int) code] = decl;
35597 TREE_READONLY (decl) = is_const;
35598 }
35599
35600 /* Make builtins to get CPU type and features supported. The created
35601 builtins are :
35602
35603 __builtin_cpu_init (), to detect cpu type and features,
35604 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35605 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35606 */
35607
35608 static void
35609 ix86_init_platform_type_builtins (void)
35610 {
35611 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35612 INT_FTYPE_VOID, false);
35613 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35614 INT_FTYPE_PCCHAR, true);
35615 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35616 INT_FTYPE_PCCHAR, true);
35617 }
35618
35619 /* Internal method for ix86_init_builtins. */
35620
35621 static void
35622 ix86_init_builtins_va_builtins_abi (void)
35623 {
35624 tree ms_va_ref, sysv_va_ref;
35625 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35626 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35627 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35628 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35629
35630 if (!TARGET_64BIT)
35631 return;
35632 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35633 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35634 ms_va_ref = build_reference_type (ms_va_list_type_node);
35635 sysv_va_ref =
35636 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35637
35638 fnvoid_va_end_ms =
35639 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35640 fnvoid_va_start_ms =
35641 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35642 fnvoid_va_end_sysv =
35643 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35644 fnvoid_va_start_sysv =
35645 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35646 NULL_TREE);
35647 fnvoid_va_copy_ms =
35648 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35649 NULL_TREE);
35650 fnvoid_va_copy_sysv =
35651 build_function_type_list (void_type_node, sysv_va_ref,
35652 sysv_va_ref, NULL_TREE);
35653
35654 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35655 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35656 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35657 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35658 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35659 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35660 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35661 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35662 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35663 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35664 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35665 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35666 }
35667
35668 static void
35669 ix86_init_builtin_types (void)
35670 {
35671 tree float128_type_node, float80_type_node;
35672
35673 /* The __float80 type. */
35674 float80_type_node = long_double_type_node;
35675 if (TYPE_MODE (float80_type_node) != XFmode)
35676 {
35677 /* The __float80 type. */
35678 float80_type_node = make_node (REAL_TYPE);
35679
35680 TYPE_PRECISION (float80_type_node) = 80;
35681 layout_type (float80_type_node);
35682 }
35683 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35684
35685 /* The __float128 type. */
35686 float128_type_node = make_node (REAL_TYPE);
35687 TYPE_PRECISION (float128_type_node) = 128;
35688 layout_type (float128_type_node);
35689 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35690
35691 /* This macro is built by i386-builtin-types.awk. */
35692 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35693 }
35694
35695 static void
35696 ix86_init_builtins (void)
35697 {
35698 tree t;
35699
35700 ix86_init_builtin_types ();
35701
35702 /* Builtins to get CPU type and features. */
35703 ix86_init_platform_type_builtins ();
35704
35705 /* TFmode support builtins. */
35706 def_builtin_const (0, "__builtin_infq",
35707 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35708 def_builtin_const (0, "__builtin_huge_valq",
35709 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35710
35711 /* We will expand them to normal call if SSE isn't available since
35712 they are used by libgcc. */
35713 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35714 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35715 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35716 TREE_READONLY (t) = 1;
35717 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35718
35719 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35720 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35721 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35722 TREE_READONLY (t) = 1;
35723 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35724
35725 ix86_init_tm_builtins ();
35726 ix86_init_mmx_sse_builtins ();
35727 ix86_init_mpx_builtins ();
35728
35729 if (TARGET_LP64)
35730 ix86_init_builtins_va_builtins_abi ();
35731
35732 #ifdef SUBTARGET_INIT_BUILTINS
35733 SUBTARGET_INIT_BUILTINS;
35734 #endif
35735 }
35736
35737 /* Return the ix86 builtin for CODE. */
35738
35739 static tree
35740 ix86_builtin_decl (unsigned code, bool)
35741 {
35742 if (code >= IX86_BUILTIN_MAX)
35743 return error_mark_node;
35744
35745 return ix86_builtins[code];
35746 }
35747
35748 /* Errors in the source file can cause expand_expr to return const0_rtx
35749 where we expect a vector. To avoid crashing, use one of the vector
35750 clear instructions. */
35751 static rtx
35752 safe_vector_operand (rtx x, machine_mode mode)
35753 {
35754 if (x == const0_rtx)
35755 x = CONST0_RTX (mode);
35756 return x;
35757 }
35758
35759 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35760
35761 static rtx
35762 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35763 {
35764 rtx pat;
35765 tree arg0 = CALL_EXPR_ARG (exp, 0);
35766 tree arg1 = CALL_EXPR_ARG (exp, 1);
35767 rtx op0 = expand_normal (arg0);
35768 rtx op1 = expand_normal (arg1);
35769 machine_mode tmode = insn_data[icode].operand[0].mode;
35770 machine_mode mode0 = insn_data[icode].operand[1].mode;
35771 machine_mode mode1 = insn_data[icode].operand[2].mode;
35772
35773 if (VECTOR_MODE_P (mode0))
35774 op0 = safe_vector_operand (op0, mode0);
35775 if (VECTOR_MODE_P (mode1))
35776 op1 = safe_vector_operand (op1, mode1);
35777
35778 if (optimize || !target
35779 || GET_MODE (target) != tmode
35780 || !insn_data[icode].operand[0].predicate (target, tmode))
35781 target = gen_reg_rtx (tmode);
35782
35783 if (GET_MODE (op1) == SImode && mode1 == TImode)
35784 {
35785 rtx x = gen_reg_rtx (V4SImode);
35786 emit_insn (gen_sse2_loadd (x, op1));
35787 op1 = gen_lowpart (TImode, x);
35788 }
35789
35790 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35791 op0 = copy_to_mode_reg (mode0, op0);
35792 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35793 op1 = copy_to_mode_reg (mode1, op1);
35794
35795 pat = GEN_FCN (icode) (target, op0, op1);
35796 if (! pat)
35797 return 0;
35798
35799 emit_insn (pat);
35800
35801 return target;
35802 }
35803
35804 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35805
35806 static rtx
35807 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35808 enum ix86_builtin_func_type m_type,
35809 enum rtx_code sub_code)
35810 {
35811 rtx pat;
35812 int i;
35813 int nargs;
35814 bool comparison_p = false;
35815 bool tf_p = false;
35816 bool last_arg_constant = false;
35817 int num_memory = 0;
35818 struct {
35819 rtx op;
35820 machine_mode mode;
35821 } args[4];
35822
35823 machine_mode tmode = insn_data[icode].operand[0].mode;
35824
35825 switch (m_type)
35826 {
35827 case MULTI_ARG_4_DF2_DI_I:
35828 case MULTI_ARG_4_DF2_DI_I1:
35829 case MULTI_ARG_4_SF2_SI_I:
35830 case MULTI_ARG_4_SF2_SI_I1:
35831 nargs = 4;
35832 last_arg_constant = true;
35833 break;
35834
35835 case MULTI_ARG_3_SF:
35836 case MULTI_ARG_3_DF:
35837 case MULTI_ARG_3_SF2:
35838 case MULTI_ARG_3_DF2:
35839 case MULTI_ARG_3_DI:
35840 case MULTI_ARG_3_SI:
35841 case MULTI_ARG_3_SI_DI:
35842 case MULTI_ARG_3_HI:
35843 case MULTI_ARG_3_HI_SI:
35844 case MULTI_ARG_3_QI:
35845 case MULTI_ARG_3_DI2:
35846 case MULTI_ARG_3_SI2:
35847 case MULTI_ARG_3_HI2:
35848 case MULTI_ARG_3_QI2:
35849 nargs = 3;
35850 break;
35851
35852 case MULTI_ARG_2_SF:
35853 case MULTI_ARG_2_DF:
35854 case MULTI_ARG_2_DI:
35855 case MULTI_ARG_2_SI:
35856 case MULTI_ARG_2_HI:
35857 case MULTI_ARG_2_QI:
35858 nargs = 2;
35859 break;
35860
35861 case MULTI_ARG_2_DI_IMM:
35862 case MULTI_ARG_2_SI_IMM:
35863 case MULTI_ARG_2_HI_IMM:
35864 case MULTI_ARG_2_QI_IMM:
35865 nargs = 2;
35866 last_arg_constant = true;
35867 break;
35868
35869 case MULTI_ARG_1_SF:
35870 case MULTI_ARG_1_DF:
35871 case MULTI_ARG_1_SF2:
35872 case MULTI_ARG_1_DF2:
35873 case MULTI_ARG_1_DI:
35874 case MULTI_ARG_1_SI:
35875 case MULTI_ARG_1_HI:
35876 case MULTI_ARG_1_QI:
35877 case MULTI_ARG_1_SI_DI:
35878 case MULTI_ARG_1_HI_DI:
35879 case MULTI_ARG_1_HI_SI:
35880 case MULTI_ARG_1_QI_DI:
35881 case MULTI_ARG_1_QI_SI:
35882 case MULTI_ARG_1_QI_HI:
35883 nargs = 1;
35884 break;
35885
35886 case MULTI_ARG_2_DI_CMP:
35887 case MULTI_ARG_2_SI_CMP:
35888 case MULTI_ARG_2_HI_CMP:
35889 case MULTI_ARG_2_QI_CMP:
35890 nargs = 2;
35891 comparison_p = true;
35892 break;
35893
35894 case MULTI_ARG_2_SF_TF:
35895 case MULTI_ARG_2_DF_TF:
35896 case MULTI_ARG_2_DI_TF:
35897 case MULTI_ARG_2_SI_TF:
35898 case MULTI_ARG_2_HI_TF:
35899 case MULTI_ARG_2_QI_TF:
35900 nargs = 2;
35901 tf_p = true;
35902 break;
35903
35904 default:
35905 gcc_unreachable ();
35906 }
35907
35908 if (optimize || !target
35909 || GET_MODE (target) != tmode
35910 || !insn_data[icode].operand[0].predicate (target, tmode))
35911 target = gen_reg_rtx (tmode);
35912
35913 gcc_assert (nargs <= 4);
35914
35915 for (i = 0; i < nargs; i++)
35916 {
35917 tree arg = CALL_EXPR_ARG (exp, i);
35918 rtx op = expand_normal (arg);
35919 int adjust = (comparison_p) ? 1 : 0;
35920 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35921
35922 if (last_arg_constant && i == nargs - 1)
35923 {
35924 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35925 {
35926 enum insn_code new_icode = icode;
35927 switch (icode)
35928 {
35929 case CODE_FOR_xop_vpermil2v2df3:
35930 case CODE_FOR_xop_vpermil2v4sf3:
35931 case CODE_FOR_xop_vpermil2v4df3:
35932 case CODE_FOR_xop_vpermil2v8sf3:
35933 error ("the last argument must be a 2-bit immediate");
35934 return gen_reg_rtx (tmode);
35935 case CODE_FOR_xop_rotlv2di3:
35936 new_icode = CODE_FOR_rotlv2di3;
35937 goto xop_rotl;
35938 case CODE_FOR_xop_rotlv4si3:
35939 new_icode = CODE_FOR_rotlv4si3;
35940 goto xop_rotl;
35941 case CODE_FOR_xop_rotlv8hi3:
35942 new_icode = CODE_FOR_rotlv8hi3;
35943 goto xop_rotl;
35944 case CODE_FOR_xop_rotlv16qi3:
35945 new_icode = CODE_FOR_rotlv16qi3;
35946 xop_rotl:
35947 if (CONST_INT_P (op))
35948 {
35949 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35950 op = GEN_INT (INTVAL (op) & mask);
35951 gcc_checking_assert
35952 (insn_data[icode].operand[i + 1].predicate (op, mode));
35953 }
35954 else
35955 {
35956 gcc_checking_assert
35957 (nargs == 2
35958 && insn_data[new_icode].operand[0].mode == tmode
35959 && insn_data[new_icode].operand[1].mode == tmode
35960 && insn_data[new_icode].operand[2].mode == mode
35961 && insn_data[new_icode].operand[0].predicate
35962 == insn_data[icode].operand[0].predicate
35963 && insn_data[new_icode].operand[1].predicate
35964 == insn_data[icode].operand[1].predicate);
35965 icode = new_icode;
35966 goto non_constant;
35967 }
35968 break;
35969 default:
35970 gcc_unreachable ();
35971 }
35972 }
35973 }
35974 else
35975 {
35976 non_constant:
35977 if (VECTOR_MODE_P (mode))
35978 op = safe_vector_operand (op, mode);
35979
35980 /* If we aren't optimizing, only allow one memory operand to be
35981 generated. */
35982 if (memory_operand (op, mode))
35983 num_memory++;
35984
35985 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
35986
35987 if (optimize
35988 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
35989 || num_memory > 1)
35990 op = force_reg (mode, op);
35991 }
35992
35993 args[i].op = op;
35994 args[i].mode = mode;
35995 }
35996
35997 switch (nargs)
35998 {
35999 case 1:
36000 pat = GEN_FCN (icode) (target, args[0].op);
36001 break;
36002
36003 case 2:
36004 if (tf_p)
36005 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36006 GEN_INT ((int)sub_code));
36007 else if (! comparison_p)
36008 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36009 else
36010 {
36011 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36012 args[0].op,
36013 args[1].op);
36014
36015 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36016 }
36017 break;
36018
36019 case 3:
36020 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36021 break;
36022
36023 case 4:
36024 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36025 break;
36026
36027 default:
36028 gcc_unreachable ();
36029 }
36030
36031 if (! pat)
36032 return 0;
36033
36034 emit_insn (pat);
36035 return target;
36036 }
36037
36038 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36039 insns with vec_merge. */
36040
36041 static rtx
36042 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36043 rtx target)
36044 {
36045 rtx pat;
36046 tree arg0 = CALL_EXPR_ARG (exp, 0);
36047 rtx op1, op0 = expand_normal (arg0);
36048 machine_mode tmode = insn_data[icode].operand[0].mode;
36049 machine_mode mode0 = insn_data[icode].operand[1].mode;
36050
36051 if (optimize || !target
36052 || GET_MODE (target) != tmode
36053 || !insn_data[icode].operand[0].predicate (target, tmode))
36054 target = gen_reg_rtx (tmode);
36055
36056 if (VECTOR_MODE_P (mode0))
36057 op0 = safe_vector_operand (op0, mode0);
36058
36059 if ((optimize && !register_operand (op0, mode0))
36060 || !insn_data[icode].operand[1].predicate (op0, mode0))
36061 op0 = copy_to_mode_reg (mode0, op0);
36062
36063 op1 = op0;
36064 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36065 op1 = copy_to_mode_reg (mode0, op1);
36066
36067 pat = GEN_FCN (icode) (target, op0, op1);
36068 if (! pat)
36069 return 0;
36070 emit_insn (pat);
36071 return target;
36072 }
36073
36074 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36075
36076 static rtx
36077 ix86_expand_sse_compare (const struct builtin_description *d,
36078 tree exp, rtx target, bool swap)
36079 {
36080 rtx pat;
36081 tree arg0 = CALL_EXPR_ARG (exp, 0);
36082 tree arg1 = CALL_EXPR_ARG (exp, 1);
36083 rtx op0 = expand_normal (arg0);
36084 rtx op1 = expand_normal (arg1);
36085 rtx op2;
36086 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36087 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36088 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36089 enum rtx_code comparison = d->comparison;
36090
36091 if (VECTOR_MODE_P (mode0))
36092 op0 = safe_vector_operand (op0, mode0);
36093 if (VECTOR_MODE_P (mode1))
36094 op1 = safe_vector_operand (op1, mode1);
36095
36096 /* Swap operands if we have a comparison that isn't available in
36097 hardware. */
36098 if (swap)
36099 std::swap (op0, op1);
36100
36101 if (optimize || !target
36102 || GET_MODE (target) != tmode
36103 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36104 target = gen_reg_rtx (tmode);
36105
36106 if ((optimize && !register_operand (op0, mode0))
36107 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36108 op0 = copy_to_mode_reg (mode0, op0);
36109 if ((optimize && !register_operand (op1, mode1))
36110 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36111 op1 = copy_to_mode_reg (mode1, op1);
36112
36113 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36114 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36115 if (! pat)
36116 return 0;
36117 emit_insn (pat);
36118 return target;
36119 }
36120
36121 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36122
36123 static rtx
36124 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36125 rtx target)
36126 {
36127 rtx pat;
36128 tree arg0 = CALL_EXPR_ARG (exp, 0);
36129 tree arg1 = CALL_EXPR_ARG (exp, 1);
36130 rtx op0 = expand_normal (arg0);
36131 rtx op1 = expand_normal (arg1);
36132 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36133 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36134 enum rtx_code comparison = d->comparison;
36135
36136 if (VECTOR_MODE_P (mode0))
36137 op0 = safe_vector_operand (op0, mode0);
36138 if (VECTOR_MODE_P (mode1))
36139 op1 = safe_vector_operand (op1, mode1);
36140
36141 /* Swap operands if we have a comparison that isn't available in
36142 hardware. */
36143 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36144 std::swap (op0, op1);
36145
36146 target = gen_reg_rtx (SImode);
36147 emit_move_insn (target, const0_rtx);
36148 target = gen_rtx_SUBREG (QImode, target, 0);
36149
36150 if ((optimize && !register_operand (op0, mode0))
36151 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36152 op0 = copy_to_mode_reg (mode0, op0);
36153 if ((optimize && !register_operand (op1, mode1))
36154 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36155 op1 = copy_to_mode_reg (mode1, op1);
36156
36157 pat = GEN_FCN (d->icode) (op0, op1);
36158 if (! pat)
36159 return 0;
36160 emit_insn (pat);
36161 emit_insn (gen_rtx_SET (VOIDmode,
36162 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36163 gen_rtx_fmt_ee (comparison, QImode,
36164 SET_DEST (pat),
36165 const0_rtx)));
36166
36167 return SUBREG_REG (target);
36168 }
36169
36170 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36171
36172 static rtx
36173 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36174 rtx target)
36175 {
36176 rtx pat;
36177 tree arg0 = CALL_EXPR_ARG (exp, 0);
36178 rtx op1, op0 = expand_normal (arg0);
36179 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36180 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36181
36182 if (optimize || target == 0
36183 || GET_MODE (target) != tmode
36184 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36185 target = gen_reg_rtx (tmode);
36186
36187 if (VECTOR_MODE_P (mode0))
36188 op0 = safe_vector_operand (op0, mode0);
36189
36190 if ((optimize && !register_operand (op0, mode0))
36191 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36192 op0 = copy_to_mode_reg (mode0, op0);
36193
36194 op1 = GEN_INT (d->comparison);
36195
36196 pat = GEN_FCN (d->icode) (target, op0, op1);
36197 if (! pat)
36198 return 0;
36199 emit_insn (pat);
36200 return target;
36201 }
36202
36203 static rtx
36204 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36205 tree exp, rtx target)
36206 {
36207 rtx pat;
36208 tree arg0 = CALL_EXPR_ARG (exp, 0);
36209 tree arg1 = CALL_EXPR_ARG (exp, 1);
36210 rtx op0 = expand_normal (arg0);
36211 rtx op1 = expand_normal (arg1);
36212 rtx op2;
36213 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36214 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36215 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36216
36217 if (optimize || target == 0
36218 || GET_MODE (target) != tmode
36219 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36220 target = gen_reg_rtx (tmode);
36221
36222 op0 = safe_vector_operand (op0, mode0);
36223 op1 = safe_vector_operand (op1, mode1);
36224
36225 if ((optimize && !register_operand (op0, mode0))
36226 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36227 op0 = copy_to_mode_reg (mode0, op0);
36228 if ((optimize && !register_operand (op1, mode1))
36229 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36230 op1 = copy_to_mode_reg (mode1, op1);
36231
36232 op2 = GEN_INT (d->comparison);
36233
36234 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36235 if (! pat)
36236 return 0;
36237 emit_insn (pat);
36238 return target;
36239 }
36240
36241 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36242
36243 static rtx
36244 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36245 rtx target)
36246 {
36247 rtx pat;
36248 tree arg0 = CALL_EXPR_ARG (exp, 0);
36249 tree arg1 = CALL_EXPR_ARG (exp, 1);
36250 rtx op0 = expand_normal (arg0);
36251 rtx op1 = expand_normal (arg1);
36252 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36253 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36254 enum rtx_code comparison = d->comparison;
36255
36256 if (VECTOR_MODE_P (mode0))
36257 op0 = safe_vector_operand (op0, mode0);
36258 if (VECTOR_MODE_P (mode1))
36259 op1 = safe_vector_operand (op1, mode1);
36260
36261 target = gen_reg_rtx (SImode);
36262 emit_move_insn (target, const0_rtx);
36263 target = gen_rtx_SUBREG (QImode, target, 0);
36264
36265 if ((optimize && !register_operand (op0, mode0))
36266 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36267 op0 = copy_to_mode_reg (mode0, op0);
36268 if ((optimize && !register_operand (op1, mode1))
36269 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36270 op1 = copy_to_mode_reg (mode1, op1);
36271
36272 pat = GEN_FCN (d->icode) (op0, op1);
36273 if (! pat)
36274 return 0;
36275 emit_insn (pat);
36276 emit_insn (gen_rtx_SET (VOIDmode,
36277 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36278 gen_rtx_fmt_ee (comparison, QImode,
36279 SET_DEST (pat),
36280 const0_rtx)));
36281
36282 return SUBREG_REG (target);
36283 }
36284
36285 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36286
36287 static rtx
36288 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36289 tree exp, rtx target)
36290 {
36291 rtx pat;
36292 tree arg0 = CALL_EXPR_ARG (exp, 0);
36293 tree arg1 = CALL_EXPR_ARG (exp, 1);
36294 tree arg2 = CALL_EXPR_ARG (exp, 2);
36295 tree arg3 = CALL_EXPR_ARG (exp, 3);
36296 tree arg4 = CALL_EXPR_ARG (exp, 4);
36297 rtx scratch0, scratch1;
36298 rtx op0 = expand_normal (arg0);
36299 rtx op1 = expand_normal (arg1);
36300 rtx op2 = expand_normal (arg2);
36301 rtx op3 = expand_normal (arg3);
36302 rtx op4 = expand_normal (arg4);
36303 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36304
36305 tmode0 = insn_data[d->icode].operand[0].mode;
36306 tmode1 = insn_data[d->icode].operand[1].mode;
36307 modev2 = insn_data[d->icode].operand[2].mode;
36308 modei3 = insn_data[d->icode].operand[3].mode;
36309 modev4 = insn_data[d->icode].operand[4].mode;
36310 modei5 = insn_data[d->icode].operand[5].mode;
36311 modeimm = insn_data[d->icode].operand[6].mode;
36312
36313 if (VECTOR_MODE_P (modev2))
36314 op0 = safe_vector_operand (op0, modev2);
36315 if (VECTOR_MODE_P (modev4))
36316 op2 = safe_vector_operand (op2, modev4);
36317
36318 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36319 op0 = copy_to_mode_reg (modev2, op0);
36320 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36321 op1 = copy_to_mode_reg (modei3, op1);
36322 if ((optimize && !register_operand (op2, modev4))
36323 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36324 op2 = copy_to_mode_reg (modev4, op2);
36325 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36326 op3 = copy_to_mode_reg (modei5, op3);
36327
36328 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36329 {
36330 error ("the fifth argument must be an 8-bit immediate");
36331 return const0_rtx;
36332 }
36333
36334 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36335 {
36336 if (optimize || !target
36337 || GET_MODE (target) != tmode0
36338 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36339 target = gen_reg_rtx (tmode0);
36340
36341 scratch1 = gen_reg_rtx (tmode1);
36342
36343 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36344 }
36345 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36346 {
36347 if (optimize || !target
36348 || GET_MODE (target) != tmode1
36349 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36350 target = gen_reg_rtx (tmode1);
36351
36352 scratch0 = gen_reg_rtx (tmode0);
36353
36354 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36355 }
36356 else
36357 {
36358 gcc_assert (d->flag);
36359
36360 scratch0 = gen_reg_rtx (tmode0);
36361 scratch1 = gen_reg_rtx (tmode1);
36362
36363 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36364 }
36365
36366 if (! pat)
36367 return 0;
36368
36369 emit_insn (pat);
36370
36371 if (d->flag)
36372 {
36373 target = gen_reg_rtx (SImode);
36374 emit_move_insn (target, const0_rtx);
36375 target = gen_rtx_SUBREG (QImode, target, 0);
36376
36377 emit_insn
36378 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36379 gen_rtx_fmt_ee (EQ, QImode,
36380 gen_rtx_REG ((machine_mode) d->flag,
36381 FLAGS_REG),
36382 const0_rtx)));
36383 return SUBREG_REG (target);
36384 }
36385 else
36386 return target;
36387 }
36388
36389
36390 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36391
36392 static rtx
36393 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36394 tree exp, rtx target)
36395 {
36396 rtx pat;
36397 tree arg0 = CALL_EXPR_ARG (exp, 0);
36398 tree arg1 = CALL_EXPR_ARG (exp, 1);
36399 tree arg2 = CALL_EXPR_ARG (exp, 2);
36400 rtx scratch0, scratch1;
36401 rtx op0 = expand_normal (arg0);
36402 rtx op1 = expand_normal (arg1);
36403 rtx op2 = expand_normal (arg2);
36404 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36405
36406 tmode0 = insn_data[d->icode].operand[0].mode;
36407 tmode1 = insn_data[d->icode].operand[1].mode;
36408 modev2 = insn_data[d->icode].operand[2].mode;
36409 modev3 = insn_data[d->icode].operand[3].mode;
36410 modeimm = insn_data[d->icode].operand[4].mode;
36411
36412 if (VECTOR_MODE_P (modev2))
36413 op0 = safe_vector_operand (op0, modev2);
36414 if (VECTOR_MODE_P (modev3))
36415 op1 = safe_vector_operand (op1, modev3);
36416
36417 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36418 op0 = copy_to_mode_reg (modev2, op0);
36419 if ((optimize && !register_operand (op1, modev3))
36420 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36421 op1 = copy_to_mode_reg (modev3, op1);
36422
36423 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36424 {
36425 error ("the third argument must be an 8-bit immediate");
36426 return const0_rtx;
36427 }
36428
36429 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36430 {
36431 if (optimize || !target
36432 || GET_MODE (target) != tmode0
36433 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36434 target = gen_reg_rtx (tmode0);
36435
36436 scratch1 = gen_reg_rtx (tmode1);
36437
36438 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36439 }
36440 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36441 {
36442 if (optimize || !target
36443 || GET_MODE (target) != tmode1
36444 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36445 target = gen_reg_rtx (tmode1);
36446
36447 scratch0 = gen_reg_rtx (tmode0);
36448
36449 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36450 }
36451 else
36452 {
36453 gcc_assert (d->flag);
36454
36455 scratch0 = gen_reg_rtx (tmode0);
36456 scratch1 = gen_reg_rtx (tmode1);
36457
36458 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36459 }
36460
36461 if (! pat)
36462 return 0;
36463
36464 emit_insn (pat);
36465
36466 if (d->flag)
36467 {
36468 target = gen_reg_rtx (SImode);
36469 emit_move_insn (target, const0_rtx);
36470 target = gen_rtx_SUBREG (QImode, target, 0);
36471
36472 emit_insn
36473 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36474 gen_rtx_fmt_ee (EQ, QImode,
36475 gen_rtx_REG ((machine_mode) d->flag,
36476 FLAGS_REG),
36477 const0_rtx)));
36478 return SUBREG_REG (target);
36479 }
36480 else
36481 return target;
36482 }
36483
36484 /* Subroutine of ix86_expand_builtin to take care of insns with
36485 variable number of operands. */
36486
36487 static rtx
36488 ix86_expand_args_builtin (const struct builtin_description *d,
36489 tree exp, rtx target)
36490 {
36491 rtx pat, real_target;
36492 unsigned int i, nargs;
36493 unsigned int nargs_constant = 0;
36494 unsigned int mask_pos = 0;
36495 int num_memory = 0;
36496 struct
36497 {
36498 rtx op;
36499 machine_mode mode;
36500 } args[6];
36501 bool last_arg_count = false;
36502 enum insn_code icode = d->icode;
36503 const struct insn_data_d *insn_p = &insn_data[icode];
36504 machine_mode tmode = insn_p->operand[0].mode;
36505 machine_mode rmode = VOIDmode;
36506 bool swap = false;
36507 enum rtx_code comparison = d->comparison;
36508
36509 switch ((enum ix86_builtin_func_type) d->flag)
36510 {
36511 case V2DF_FTYPE_V2DF_ROUND:
36512 case V4DF_FTYPE_V4DF_ROUND:
36513 case V4SF_FTYPE_V4SF_ROUND:
36514 case V8SF_FTYPE_V8SF_ROUND:
36515 case V4SI_FTYPE_V4SF_ROUND:
36516 case V8SI_FTYPE_V8SF_ROUND:
36517 return ix86_expand_sse_round (d, exp, target);
36518 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36519 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36520 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36521 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36522 case INT_FTYPE_V8SF_V8SF_PTEST:
36523 case INT_FTYPE_V4DI_V4DI_PTEST:
36524 case INT_FTYPE_V4DF_V4DF_PTEST:
36525 case INT_FTYPE_V4SF_V4SF_PTEST:
36526 case INT_FTYPE_V2DI_V2DI_PTEST:
36527 case INT_FTYPE_V2DF_V2DF_PTEST:
36528 return ix86_expand_sse_ptest (d, exp, target);
36529 case FLOAT128_FTYPE_FLOAT128:
36530 case FLOAT_FTYPE_FLOAT:
36531 case INT_FTYPE_INT:
36532 case UINT64_FTYPE_INT:
36533 case UINT16_FTYPE_UINT16:
36534 case INT64_FTYPE_INT64:
36535 case INT64_FTYPE_V4SF:
36536 case INT64_FTYPE_V2DF:
36537 case INT_FTYPE_V16QI:
36538 case INT_FTYPE_V8QI:
36539 case INT_FTYPE_V8SF:
36540 case INT_FTYPE_V4DF:
36541 case INT_FTYPE_V4SF:
36542 case INT_FTYPE_V2DF:
36543 case INT_FTYPE_V32QI:
36544 case V16QI_FTYPE_V16QI:
36545 case V8SI_FTYPE_V8SF:
36546 case V8SI_FTYPE_V4SI:
36547 case V8HI_FTYPE_V8HI:
36548 case V8HI_FTYPE_V16QI:
36549 case V8QI_FTYPE_V8QI:
36550 case V8SF_FTYPE_V8SF:
36551 case V8SF_FTYPE_V8SI:
36552 case V8SF_FTYPE_V4SF:
36553 case V8SF_FTYPE_V8HI:
36554 case V4SI_FTYPE_V4SI:
36555 case V4SI_FTYPE_V16QI:
36556 case V4SI_FTYPE_V4SF:
36557 case V4SI_FTYPE_V8SI:
36558 case V4SI_FTYPE_V8HI:
36559 case V4SI_FTYPE_V4DF:
36560 case V4SI_FTYPE_V2DF:
36561 case V4HI_FTYPE_V4HI:
36562 case V4DF_FTYPE_V4DF:
36563 case V4DF_FTYPE_V4SI:
36564 case V4DF_FTYPE_V4SF:
36565 case V4DF_FTYPE_V2DF:
36566 case V4SF_FTYPE_V4SF:
36567 case V4SF_FTYPE_V4SI:
36568 case V4SF_FTYPE_V8SF:
36569 case V4SF_FTYPE_V4DF:
36570 case V4SF_FTYPE_V8HI:
36571 case V4SF_FTYPE_V2DF:
36572 case V2DI_FTYPE_V2DI:
36573 case V2DI_FTYPE_V16QI:
36574 case V2DI_FTYPE_V8HI:
36575 case V2DI_FTYPE_V4SI:
36576 case V2DF_FTYPE_V2DF:
36577 case V2DF_FTYPE_V4SI:
36578 case V2DF_FTYPE_V4DF:
36579 case V2DF_FTYPE_V4SF:
36580 case V2DF_FTYPE_V2SI:
36581 case V2SI_FTYPE_V2SI:
36582 case V2SI_FTYPE_V4SF:
36583 case V2SI_FTYPE_V2SF:
36584 case V2SI_FTYPE_V2DF:
36585 case V2SF_FTYPE_V2SF:
36586 case V2SF_FTYPE_V2SI:
36587 case V32QI_FTYPE_V32QI:
36588 case V32QI_FTYPE_V16QI:
36589 case V16HI_FTYPE_V16HI:
36590 case V16HI_FTYPE_V8HI:
36591 case V8SI_FTYPE_V8SI:
36592 case V16HI_FTYPE_V16QI:
36593 case V8SI_FTYPE_V16QI:
36594 case V4DI_FTYPE_V16QI:
36595 case V8SI_FTYPE_V8HI:
36596 case V4DI_FTYPE_V8HI:
36597 case V4DI_FTYPE_V4SI:
36598 case V4DI_FTYPE_V2DI:
36599 case HI_FTYPE_HI:
36600 case HI_FTYPE_V16QI:
36601 case SI_FTYPE_V32QI:
36602 case DI_FTYPE_V64QI:
36603 case V16QI_FTYPE_HI:
36604 case V32QI_FTYPE_SI:
36605 case V64QI_FTYPE_DI:
36606 case V8HI_FTYPE_QI:
36607 case V16HI_FTYPE_HI:
36608 case V32HI_FTYPE_SI:
36609 case V4SI_FTYPE_QI:
36610 case V8SI_FTYPE_QI:
36611 case V4SI_FTYPE_HI:
36612 case V8SI_FTYPE_HI:
36613 case QI_FTYPE_V8HI:
36614 case HI_FTYPE_V16HI:
36615 case SI_FTYPE_V32HI:
36616 case QI_FTYPE_V4SI:
36617 case QI_FTYPE_V8SI:
36618 case HI_FTYPE_V16SI:
36619 case QI_FTYPE_V2DI:
36620 case QI_FTYPE_V4DI:
36621 case QI_FTYPE_V8DI:
36622 case UINT_FTYPE_V2DF:
36623 case UINT_FTYPE_V4SF:
36624 case UINT64_FTYPE_V2DF:
36625 case UINT64_FTYPE_V4SF:
36626 case V16QI_FTYPE_V8DI:
36627 case V16HI_FTYPE_V16SI:
36628 case V16SI_FTYPE_HI:
36629 case V2DI_FTYPE_QI:
36630 case V4DI_FTYPE_QI:
36631 case V16SI_FTYPE_V16SI:
36632 case V16SI_FTYPE_INT:
36633 case V16SF_FTYPE_FLOAT:
36634 case V16SF_FTYPE_V8SF:
36635 case V16SI_FTYPE_V8SI:
36636 case V16SF_FTYPE_V4SF:
36637 case V16SI_FTYPE_V4SI:
36638 case V16SF_FTYPE_V16SF:
36639 case V8HI_FTYPE_V8DI:
36640 case V8UHI_FTYPE_V8UHI:
36641 case V8SI_FTYPE_V8DI:
36642 case V8SF_FTYPE_V8DF:
36643 case V8DI_FTYPE_QI:
36644 case V8DI_FTYPE_INT64:
36645 case V8DI_FTYPE_V4DI:
36646 case V8DI_FTYPE_V8DI:
36647 case V8DF_FTYPE_DOUBLE:
36648 case V8DF_FTYPE_V4DF:
36649 case V8DF_FTYPE_V2DF:
36650 case V8DF_FTYPE_V8DF:
36651 case V8DF_FTYPE_V8SI:
36652 nargs = 1;
36653 break;
36654 case V4SF_FTYPE_V4SF_VEC_MERGE:
36655 case V2DF_FTYPE_V2DF_VEC_MERGE:
36656 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36657 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36658 case V16QI_FTYPE_V16QI_V16QI:
36659 case V16QI_FTYPE_V8HI_V8HI:
36660 case V16SI_FTYPE_V16SI_V16SI:
36661 case V16SF_FTYPE_V16SF_V16SF:
36662 case V16SF_FTYPE_V16SF_V16SI:
36663 case V8QI_FTYPE_V8QI_V8QI:
36664 case V8QI_FTYPE_V4HI_V4HI:
36665 case V8HI_FTYPE_V8HI_V8HI:
36666 case V8HI_FTYPE_V16QI_V16QI:
36667 case V8HI_FTYPE_V4SI_V4SI:
36668 case V8SF_FTYPE_V8SF_V8SF:
36669 case V8SF_FTYPE_V8SF_V8SI:
36670 case V8DI_FTYPE_V8DI_V8DI:
36671 case V8DF_FTYPE_V8DF_V8DF:
36672 case V8DF_FTYPE_V8DF_V8DI:
36673 case V4SI_FTYPE_V4SI_V4SI:
36674 case V4SI_FTYPE_V8HI_V8HI:
36675 case V4SI_FTYPE_V4SF_V4SF:
36676 case V4SI_FTYPE_V2DF_V2DF:
36677 case V4HI_FTYPE_V4HI_V4HI:
36678 case V4HI_FTYPE_V8QI_V8QI:
36679 case V4HI_FTYPE_V2SI_V2SI:
36680 case V4DF_FTYPE_V4DF_V4DF:
36681 case V4DF_FTYPE_V4DF_V4DI:
36682 case V4SF_FTYPE_V4SF_V4SF:
36683 case V4SF_FTYPE_V4SF_V4SI:
36684 case V4SF_FTYPE_V4SF_V2SI:
36685 case V4SF_FTYPE_V4SF_V2DF:
36686 case V4SF_FTYPE_V4SF_UINT:
36687 case V4SF_FTYPE_V4SF_UINT64:
36688 case V4SF_FTYPE_V4SF_DI:
36689 case V4SF_FTYPE_V4SF_SI:
36690 case V2DI_FTYPE_V2DI_V2DI:
36691 case V2DI_FTYPE_V16QI_V16QI:
36692 case V2DI_FTYPE_V4SI_V4SI:
36693 case V2UDI_FTYPE_V4USI_V4USI:
36694 case V2DI_FTYPE_V2DI_V16QI:
36695 case V2DI_FTYPE_V2DF_V2DF:
36696 case V2SI_FTYPE_V2SI_V2SI:
36697 case V2SI_FTYPE_V4HI_V4HI:
36698 case V2SI_FTYPE_V2SF_V2SF:
36699 case V2DF_FTYPE_V2DF_V2DF:
36700 case V2DF_FTYPE_V2DF_V4SF:
36701 case V2DF_FTYPE_V2DF_V2DI:
36702 case V2DF_FTYPE_V2DF_DI:
36703 case V2DF_FTYPE_V2DF_SI:
36704 case V2DF_FTYPE_V2DF_UINT:
36705 case V2DF_FTYPE_V2DF_UINT64:
36706 case V2SF_FTYPE_V2SF_V2SF:
36707 case V1DI_FTYPE_V1DI_V1DI:
36708 case V1DI_FTYPE_V8QI_V8QI:
36709 case V1DI_FTYPE_V2SI_V2SI:
36710 case V32QI_FTYPE_V16HI_V16HI:
36711 case V16HI_FTYPE_V8SI_V8SI:
36712 case V32QI_FTYPE_V32QI_V32QI:
36713 case V16HI_FTYPE_V32QI_V32QI:
36714 case V16HI_FTYPE_V16HI_V16HI:
36715 case V8SI_FTYPE_V4DF_V4DF:
36716 case V8SI_FTYPE_V8SI_V8SI:
36717 case V8SI_FTYPE_V16HI_V16HI:
36718 case V4DI_FTYPE_V4DI_V4DI:
36719 case V4DI_FTYPE_V8SI_V8SI:
36720 case V4UDI_FTYPE_V8USI_V8USI:
36721 case QI_FTYPE_V8DI_V8DI:
36722 case V8DI_FTYPE_V64QI_V64QI:
36723 case HI_FTYPE_V16SI_V16SI:
36724 if (comparison == UNKNOWN)
36725 return ix86_expand_binop_builtin (icode, exp, target);
36726 nargs = 2;
36727 break;
36728 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36729 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36730 gcc_assert (comparison != UNKNOWN);
36731 nargs = 2;
36732 swap = true;
36733 break;
36734 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36735 case V16HI_FTYPE_V16HI_SI_COUNT:
36736 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36737 case V8SI_FTYPE_V8SI_SI_COUNT:
36738 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36739 case V4DI_FTYPE_V4DI_INT_COUNT:
36740 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36741 case V8HI_FTYPE_V8HI_SI_COUNT:
36742 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36743 case V4SI_FTYPE_V4SI_SI_COUNT:
36744 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36745 case V4HI_FTYPE_V4HI_SI_COUNT:
36746 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36747 case V2DI_FTYPE_V2DI_SI_COUNT:
36748 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36749 case V2SI_FTYPE_V2SI_SI_COUNT:
36750 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36751 case V1DI_FTYPE_V1DI_SI_COUNT:
36752 nargs = 2;
36753 last_arg_count = true;
36754 break;
36755 case UINT64_FTYPE_UINT64_UINT64:
36756 case UINT_FTYPE_UINT_UINT:
36757 case UINT_FTYPE_UINT_USHORT:
36758 case UINT_FTYPE_UINT_UCHAR:
36759 case UINT16_FTYPE_UINT16_INT:
36760 case UINT8_FTYPE_UINT8_INT:
36761 case HI_FTYPE_HI_HI:
36762 case SI_FTYPE_SI_SI:
36763 case DI_FTYPE_DI_DI:
36764 case V16SI_FTYPE_V8DF_V8DF:
36765 nargs = 2;
36766 break;
36767 case V2DI_FTYPE_V2DI_INT_CONVERT:
36768 nargs = 2;
36769 rmode = V1TImode;
36770 nargs_constant = 1;
36771 break;
36772 case V4DI_FTYPE_V4DI_INT_CONVERT:
36773 nargs = 2;
36774 rmode = V2TImode;
36775 nargs_constant = 1;
36776 break;
36777 case V8DI_FTYPE_V8DI_INT_CONVERT:
36778 nargs = 2;
36779 rmode = V4TImode;
36780 nargs_constant = 1;
36781 break;
36782 case V8HI_FTYPE_V8HI_INT:
36783 case V8HI_FTYPE_V8SF_INT:
36784 case V16HI_FTYPE_V16SF_INT:
36785 case V8HI_FTYPE_V4SF_INT:
36786 case V8SF_FTYPE_V8SF_INT:
36787 case V4SF_FTYPE_V16SF_INT:
36788 case V16SF_FTYPE_V16SF_INT:
36789 case V4SI_FTYPE_V4SI_INT:
36790 case V4SI_FTYPE_V8SI_INT:
36791 case V4HI_FTYPE_V4HI_INT:
36792 case V4DF_FTYPE_V4DF_INT:
36793 case V4DF_FTYPE_V8DF_INT:
36794 case V4SF_FTYPE_V4SF_INT:
36795 case V4SF_FTYPE_V8SF_INT:
36796 case V2DI_FTYPE_V2DI_INT:
36797 case V2DF_FTYPE_V2DF_INT:
36798 case V2DF_FTYPE_V4DF_INT:
36799 case V16HI_FTYPE_V16HI_INT:
36800 case V8SI_FTYPE_V8SI_INT:
36801 case V16SI_FTYPE_V16SI_INT:
36802 case V4SI_FTYPE_V16SI_INT:
36803 case V4DI_FTYPE_V4DI_INT:
36804 case V2DI_FTYPE_V4DI_INT:
36805 case V4DI_FTYPE_V8DI_INT:
36806 case HI_FTYPE_HI_INT:
36807 case QI_FTYPE_V4SF_INT:
36808 case QI_FTYPE_V2DF_INT:
36809 nargs = 2;
36810 nargs_constant = 1;
36811 break;
36812 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36813 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36814 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36815 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36816 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36817 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36818 case HI_FTYPE_V16SI_V16SI_HI:
36819 case QI_FTYPE_V8DI_V8DI_QI:
36820 case V16HI_FTYPE_V16SI_V16HI_HI:
36821 case V16QI_FTYPE_V16SI_V16QI_HI:
36822 case V16QI_FTYPE_V8DI_V16QI_QI:
36823 case V16SF_FTYPE_V16SF_V16SF_HI:
36824 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36825 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36826 case V16SF_FTYPE_V16SI_V16SF_HI:
36827 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36828 case V16SF_FTYPE_V4SF_V16SF_HI:
36829 case V16SI_FTYPE_SI_V16SI_HI:
36830 case V16SI_FTYPE_V16HI_V16SI_HI:
36831 case V16SI_FTYPE_V16QI_V16SI_HI:
36832 case V16SI_FTYPE_V16SF_V16SI_HI:
36833 case V8SF_FTYPE_V4SF_V8SF_QI:
36834 case V4DF_FTYPE_V2DF_V4DF_QI:
36835 case V8SI_FTYPE_V4SI_V8SI_QI:
36836 case V8SI_FTYPE_SI_V8SI_QI:
36837 case V4SI_FTYPE_V4SI_V4SI_QI:
36838 case V4SI_FTYPE_SI_V4SI_QI:
36839 case V4DI_FTYPE_V2DI_V4DI_QI:
36840 case V4DI_FTYPE_DI_V4DI_QI:
36841 case V2DI_FTYPE_V2DI_V2DI_QI:
36842 case V2DI_FTYPE_DI_V2DI_QI:
36843 case V64QI_FTYPE_V64QI_V64QI_DI:
36844 case V64QI_FTYPE_V16QI_V64QI_DI:
36845 case V64QI_FTYPE_QI_V64QI_DI:
36846 case V32QI_FTYPE_V32QI_V32QI_SI:
36847 case V32QI_FTYPE_V16QI_V32QI_SI:
36848 case V32QI_FTYPE_QI_V32QI_SI:
36849 case V16QI_FTYPE_V16QI_V16QI_HI:
36850 case V16QI_FTYPE_QI_V16QI_HI:
36851 case V32HI_FTYPE_V8HI_V32HI_SI:
36852 case V32HI_FTYPE_HI_V32HI_SI:
36853 case V16HI_FTYPE_V8HI_V16HI_HI:
36854 case V16HI_FTYPE_HI_V16HI_HI:
36855 case V8HI_FTYPE_V8HI_V8HI_QI:
36856 case V8HI_FTYPE_HI_V8HI_QI:
36857 case V8SF_FTYPE_V8HI_V8SF_QI:
36858 case V4SF_FTYPE_V8HI_V4SF_QI:
36859 case V8SI_FTYPE_V8SF_V8SI_QI:
36860 case V4SI_FTYPE_V4SF_V4SI_QI:
36861 case V8DI_FTYPE_V8SF_V8DI_QI:
36862 case V4DI_FTYPE_V4SF_V4DI_QI:
36863 case V2DI_FTYPE_V4SF_V2DI_QI:
36864 case V8SF_FTYPE_V8DI_V8SF_QI:
36865 case V4SF_FTYPE_V4DI_V4SF_QI:
36866 case V4SF_FTYPE_V2DI_V4SF_QI:
36867 case V8DF_FTYPE_V8DI_V8DF_QI:
36868 case V4DF_FTYPE_V4DI_V4DF_QI:
36869 case V2DF_FTYPE_V2DI_V2DF_QI:
36870 case V16QI_FTYPE_V8HI_V16QI_QI:
36871 case V16QI_FTYPE_V16HI_V16QI_HI:
36872 case V16QI_FTYPE_V4SI_V16QI_QI:
36873 case V16QI_FTYPE_V8SI_V16QI_QI:
36874 case V8HI_FTYPE_V4SI_V8HI_QI:
36875 case V8HI_FTYPE_V8SI_V8HI_QI:
36876 case V16QI_FTYPE_V2DI_V16QI_QI:
36877 case V16QI_FTYPE_V4DI_V16QI_QI:
36878 case V8HI_FTYPE_V2DI_V8HI_QI:
36879 case V8HI_FTYPE_V4DI_V8HI_QI:
36880 case V4SI_FTYPE_V2DI_V4SI_QI:
36881 case V4SI_FTYPE_V4DI_V4SI_QI:
36882 case V32QI_FTYPE_V32HI_V32QI_SI:
36883 case HI_FTYPE_V16QI_V16QI_HI:
36884 case SI_FTYPE_V32QI_V32QI_SI:
36885 case DI_FTYPE_V64QI_V64QI_DI:
36886 case QI_FTYPE_V8HI_V8HI_QI:
36887 case HI_FTYPE_V16HI_V16HI_HI:
36888 case SI_FTYPE_V32HI_V32HI_SI:
36889 case QI_FTYPE_V4SI_V4SI_QI:
36890 case QI_FTYPE_V8SI_V8SI_QI:
36891 case QI_FTYPE_V2DI_V2DI_QI:
36892 case QI_FTYPE_V4DI_V4DI_QI:
36893 case V4SF_FTYPE_V2DF_V4SF_QI:
36894 case V4SF_FTYPE_V4DF_V4SF_QI:
36895 case V16SI_FTYPE_V16SI_V16SI_HI:
36896 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36897 case V16SI_FTYPE_V4SI_V16SI_HI:
36898 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36899 case V2DI_FTYPE_V4SI_V2DI_QI:
36900 case V2DI_FTYPE_V8HI_V2DI_QI:
36901 case V2DI_FTYPE_V16QI_V2DI_QI:
36902 case V4DI_FTYPE_V4DI_V4DI_QI:
36903 case V4DI_FTYPE_V4SI_V4DI_QI:
36904 case V4DI_FTYPE_V8HI_V4DI_QI:
36905 case V4DI_FTYPE_V16QI_V4DI_QI:
36906 case V8DI_FTYPE_V8DF_V8DI_QI:
36907 case V4DI_FTYPE_V4DF_V4DI_QI:
36908 case V2DI_FTYPE_V2DF_V2DI_QI:
36909 case V4SI_FTYPE_V4DF_V4SI_QI:
36910 case V4SI_FTYPE_V2DF_V4SI_QI:
36911 case V4SI_FTYPE_V8HI_V4SI_QI:
36912 case V4SI_FTYPE_V16QI_V4SI_QI:
36913 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36914 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36915 case V8DF_FTYPE_V2DF_V8DF_QI:
36916 case V8DF_FTYPE_V4DF_V8DF_QI:
36917 case V8DF_FTYPE_V8DF_V8DF_QI:
36918 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36919 case V8SF_FTYPE_V8SF_V8SF_QI:
36920 case V8SF_FTYPE_V8SI_V8SF_QI:
36921 case V4DF_FTYPE_V4DF_V4DF_QI:
36922 case V4SF_FTYPE_V4SF_V4SF_QI:
36923 case V2DF_FTYPE_V2DF_V2DF_QI:
36924 case V2DF_FTYPE_V4SF_V2DF_QI:
36925 case V2DF_FTYPE_V4SI_V2DF_QI:
36926 case V4SF_FTYPE_V4SI_V4SF_QI:
36927 case V4DF_FTYPE_V4SF_V4DF_QI:
36928 case V4DF_FTYPE_V4SI_V4DF_QI:
36929 case V8SI_FTYPE_V8SI_V8SI_QI:
36930 case V8SI_FTYPE_V8HI_V8SI_QI:
36931 case V8SI_FTYPE_V16QI_V8SI_QI:
36932 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36933 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36934 case V8DF_FTYPE_V8SF_V8DF_QI:
36935 case V8DF_FTYPE_V8SI_V8DF_QI:
36936 case V8DI_FTYPE_DI_V8DI_QI:
36937 case V16SF_FTYPE_V8SF_V16SF_HI:
36938 case V16SI_FTYPE_V8SI_V16SI_HI:
36939 case V16HI_FTYPE_V16HI_V16HI_HI:
36940 case V8HI_FTYPE_V16QI_V8HI_QI:
36941 case V16HI_FTYPE_V16QI_V16HI_HI:
36942 case V32HI_FTYPE_V32HI_V32HI_SI:
36943 case V32HI_FTYPE_V32QI_V32HI_SI:
36944 case V8DI_FTYPE_V16QI_V8DI_QI:
36945 case V8DI_FTYPE_V2DI_V8DI_QI:
36946 case V8DI_FTYPE_V4DI_V8DI_QI:
36947 case V8DI_FTYPE_V8DI_V8DI_QI:
36948 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36949 case V8DI_FTYPE_V8HI_V8DI_QI:
36950 case V8DI_FTYPE_V8SI_V8DI_QI:
36951 case V8HI_FTYPE_V8DI_V8HI_QI:
36952 case V8SF_FTYPE_V8DF_V8SF_QI:
36953 case V8SI_FTYPE_V8DF_V8SI_QI:
36954 case V8SI_FTYPE_V8DI_V8SI_QI:
36955 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36956 nargs = 3;
36957 break;
36958 case V32QI_FTYPE_V32QI_V32QI_INT:
36959 case V16HI_FTYPE_V16HI_V16HI_INT:
36960 case V16QI_FTYPE_V16QI_V16QI_INT:
36961 case V4DI_FTYPE_V4DI_V4DI_INT:
36962 case V8HI_FTYPE_V8HI_V8HI_INT:
36963 case V8SI_FTYPE_V8SI_V8SI_INT:
36964 case V8SI_FTYPE_V8SI_V4SI_INT:
36965 case V8SF_FTYPE_V8SF_V8SF_INT:
36966 case V8SF_FTYPE_V8SF_V4SF_INT:
36967 case V4SI_FTYPE_V4SI_V4SI_INT:
36968 case V4DF_FTYPE_V4DF_V4DF_INT:
36969 case V16SF_FTYPE_V16SF_V16SF_INT:
36970 case V16SF_FTYPE_V16SF_V4SF_INT:
36971 case V16SI_FTYPE_V16SI_V4SI_INT:
36972 case V4DF_FTYPE_V4DF_V2DF_INT:
36973 case V4SF_FTYPE_V4SF_V4SF_INT:
36974 case V2DI_FTYPE_V2DI_V2DI_INT:
36975 case V4DI_FTYPE_V4DI_V2DI_INT:
36976 case V2DF_FTYPE_V2DF_V2DF_INT:
36977 case QI_FTYPE_V8DI_V8DI_INT:
36978 case QI_FTYPE_V8DF_V8DF_INT:
36979 case QI_FTYPE_V2DF_V2DF_INT:
36980 case QI_FTYPE_V4SF_V4SF_INT:
36981 case HI_FTYPE_V16SI_V16SI_INT:
36982 case HI_FTYPE_V16SF_V16SF_INT:
36983 nargs = 3;
36984 nargs_constant = 1;
36985 break;
36986 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
36987 nargs = 3;
36988 rmode = V4DImode;
36989 nargs_constant = 1;
36990 break;
36991 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
36992 nargs = 3;
36993 rmode = V2DImode;
36994 nargs_constant = 1;
36995 break;
36996 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
36997 nargs = 3;
36998 rmode = DImode;
36999 nargs_constant = 1;
37000 break;
37001 case V2DI_FTYPE_V2DI_UINT_UINT:
37002 nargs = 3;
37003 nargs_constant = 2;
37004 break;
37005 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37006 nargs = 3;
37007 rmode = V8DImode;
37008 nargs_constant = 1;
37009 break;
37010 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37011 nargs = 5;
37012 rmode = V8DImode;
37013 mask_pos = 2;
37014 nargs_constant = 1;
37015 break;
37016 case QI_FTYPE_V8DF_INT_QI:
37017 case QI_FTYPE_V4DF_INT_QI:
37018 case QI_FTYPE_V2DF_INT_QI:
37019 case HI_FTYPE_V16SF_INT_HI:
37020 case QI_FTYPE_V8SF_INT_QI:
37021 case QI_FTYPE_V4SF_INT_QI:
37022 nargs = 3;
37023 mask_pos = 1;
37024 nargs_constant = 1;
37025 break;
37026 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37027 nargs = 5;
37028 rmode = V4DImode;
37029 mask_pos = 2;
37030 nargs_constant = 1;
37031 break;
37032 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37033 nargs = 5;
37034 rmode = V2DImode;
37035 mask_pos = 2;
37036 nargs_constant = 1;
37037 break;
37038 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37039 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37040 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37041 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37042 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37043 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37044 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37045 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37046 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37047 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37048 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37049 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37050 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37051 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37052 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37053 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37054 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37055 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37056 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37057 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37058 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37059 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37060 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37061 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37062 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37063 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37064 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37065 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37066 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37067 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37068 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37069 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37070 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37071 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37072 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37073 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37074 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37075 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37076 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37077 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37078 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37079 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37080 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37081 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37082 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37083 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37084 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37085 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37086 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37087 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37088 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37089 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37090 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37091 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37092 nargs = 4;
37093 break;
37094 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37095 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37096 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37097 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37098 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37099 nargs = 4;
37100 nargs_constant = 1;
37101 break;
37102 case QI_FTYPE_V4DI_V4DI_INT_QI:
37103 case QI_FTYPE_V8SI_V8SI_INT_QI:
37104 case QI_FTYPE_V4DF_V4DF_INT_QI:
37105 case QI_FTYPE_V8SF_V8SF_INT_QI:
37106 case QI_FTYPE_V2DI_V2DI_INT_QI:
37107 case QI_FTYPE_V4SI_V4SI_INT_QI:
37108 case QI_FTYPE_V2DF_V2DF_INT_QI:
37109 case QI_FTYPE_V4SF_V4SF_INT_QI:
37110 case DI_FTYPE_V64QI_V64QI_INT_DI:
37111 case SI_FTYPE_V32QI_V32QI_INT_SI:
37112 case HI_FTYPE_V16QI_V16QI_INT_HI:
37113 case SI_FTYPE_V32HI_V32HI_INT_SI:
37114 case HI_FTYPE_V16HI_V16HI_INT_HI:
37115 case QI_FTYPE_V8HI_V8HI_INT_QI:
37116 nargs = 4;
37117 mask_pos = 1;
37118 nargs_constant = 1;
37119 break;
37120 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37121 nargs = 4;
37122 nargs_constant = 2;
37123 break;
37124 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37125 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37126 nargs = 4;
37127 break;
37128 case QI_FTYPE_V8DI_V8DI_INT_QI:
37129 case HI_FTYPE_V16SI_V16SI_INT_HI:
37130 case QI_FTYPE_V8DF_V8DF_INT_QI:
37131 case HI_FTYPE_V16SF_V16SF_INT_HI:
37132 mask_pos = 1;
37133 nargs = 4;
37134 nargs_constant = 1;
37135 break;
37136 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37137 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37138 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37139 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37140 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37141 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37142 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37143 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37144 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37145 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37146 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37147 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37148 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37149 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37150 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37151 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37152 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37153 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37154 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37155 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37156 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37157 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37158 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37159 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37160 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37161 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37162 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37163 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37164 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37165 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37166 nargs = 4;
37167 mask_pos = 2;
37168 nargs_constant = 1;
37169 break;
37170 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37171 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37172 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37173 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37174 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37175 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37176 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37177 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37178 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37179 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37180 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37181 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37182 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37183 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37184 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37185 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37186 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37187 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37188 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37189 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37190 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37191 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37192 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37193 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37194 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37195 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37196 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37197 nargs = 5;
37198 mask_pos = 2;
37199 nargs_constant = 1;
37200 break;
37201 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37202 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37203 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37204 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37205 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37206 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37207 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37208 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37209 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37210 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37211 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37212 nargs = 5;
37213 nargs = 5;
37214 mask_pos = 1;
37215 nargs_constant = 1;
37216 break;
37217
37218 default:
37219 gcc_unreachable ();
37220 }
37221
37222 gcc_assert (nargs <= ARRAY_SIZE (args));
37223
37224 if (comparison != UNKNOWN)
37225 {
37226 gcc_assert (nargs == 2);
37227 return ix86_expand_sse_compare (d, exp, target, swap);
37228 }
37229
37230 if (rmode == VOIDmode || rmode == tmode)
37231 {
37232 if (optimize
37233 || target == 0
37234 || GET_MODE (target) != tmode
37235 || !insn_p->operand[0].predicate (target, tmode))
37236 target = gen_reg_rtx (tmode);
37237 real_target = target;
37238 }
37239 else
37240 {
37241 real_target = gen_reg_rtx (tmode);
37242 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37243 }
37244
37245 for (i = 0; i < nargs; i++)
37246 {
37247 tree arg = CALL_EXPR_ARG (exp, i);
37248 rtx op = expand_normal (arg);
37249 machine_mode mode = insn_p->operand[i + 1].mode;
37250 bool match = insn_p->operand[i + 1].predicate (op, mode);
37251
37252 if (last_arg_count && (i + 1) == nargs)
37253 {
37254 /* SIMD shift insns take either an 8-bit immediate or
37255 register as count. But builtin functions take int as
37256 count. If count doesn't match, we put it in register. */
37257 if (!match)
37258 {
37259 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37260 if (!insn_p->operand[i + 1].predicate (op, mode))
37261 op = copy_to_reg (op);
37262 }
37263 }
37264 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37265 (!mask_pos && (nargs - i) <= nargs_constant))
37266 {
37267 if (!match)
37268 switch (icode)
37269 {
37270 case CODE_FOR_avx_vinsertf128v4di:
37271 case CODE_FOR_avx_vextractf128v4di:
37272 error ("the last argument must be an 1-bit immediate");
37273 return const0_rtx;
37274
37275 case CODE_FOR_avx512f_cmpv8di3_mask:
37276 case CODE_FOR_avx512f_cmpv16si3_mask:
37277 case CODE_FOR_avx512f_ucmpv8di3_mask:
37278 case CODE_FOR_avx512f_ucmpv16si3_mask:
37279 case CODE_FOR_avx512vl_cmpv4di3_mask:
37280 case CODE_FOR_avx512vl_cmpv8si3_mask:
37281 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37282 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37283 case CODE_FOR_avx512vl_cmpv2di3_mask:
37284 case CODE_FOR_avx512vl_cmpv4si3_mask:
37285 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37286 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37287 error ("the last argument must be a 3-bit immediate");
37288 return const0_rtx;
37289
37290 case CODE_FOR_sse4_1_roundsd:
37291 case CODE_FOR_sse4_1_roundss:
37292
37293 case CODE_FOR_sse4_1_roundpd:
37294 case CODE_FOR_sse4_1_roundps:
37295 case CODE_FOR_avx_roundpd256:
37296 case CODE_FOR_avx_roundps256:
37297
37298 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37299 case CODE_FOR_sse4_1_roundps_sfix:
37300 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37301 case CODE_FOR_avx_roundps_sfix256:
37302
37303 case CODE_FOR_sse4_1_blendps:
37304 case CODE_FOR_avx_blendpd256:
37305 case CODE_FOR_avx_vpermilv4df:
37306 case CODE_FOR_avx_vpermilv4df_mask:
37307 case CODE_FOR_avx512f_getmantv8df_mask:
37308 case CODE_FOR_avx512f_getmantv16sf_mask:
37309 case CODE_FOR_avx512vl_getmantv8sf_mask:
37310 case CODE_FOR_avx512vl_getmantv4df_mask:
37311 case CODE_FOR_avx512vl_getmantv4sf_mask:
37312 case CODE_FOR_avx512vl_getmantv2df_mask:
37313 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37314 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37315 case CODE_FOR_avx512dq_rangepv4df_mask:
37316 case CODE_FOR_avx512dq_rangepv8sf_mask:
37317 case CODE_FOR_avx512dq_rangepv2df_mask:
37318 case CODE_FOR_avx512dq_rangepv4sf_mask:
37319 case CODE_FOR_avx_shufpd256_mask:
37320 error ("the last argument must be a 4-bit immediate");
37321 return const0_rtx;
37322
37323 case CODE_FOR_sha1rnds4:
37324 case CODE_FOR_sse4_1_blendpd:
37325 case CODE_FOR_avx_vpermilv2df:
37326 case CODE_FOR_avx_vpermilv2df_mask:
37327 case CODE_FOR_xop_vpermil2v2df3:
37328 case CODE_FOR_xop_vpermil2v4sf3:
37329 case CODE_FOR_xop_vpermil2v4df3:
37330 case CODE_FOR_xop_vpermil2v8sf3:
37331 case CODE_FOR_avx512f_vinsertf32x4_mask:
37332 case CODE_FOR_avx512f_vinserti32x4_mask:
37333 case CODE_FOR_avx512f_vextractf32x4_mask:
37334 case CODE_FOR_avx512f_vextracti32x4_mask:
37335 case CODE_FOR_sse2_shufpd:
37336 case CODE_FOR_sse2_shufpd_mask:
37337 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37338 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37339 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37340 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37341 error ("the last argument must be a 2-bit immediate");
37342 return const0_rtx;
37343
37344 case CODE_FOR_avx_vextractf128v4df:
37345 case CODE_FOR_avx_vextractf128v8sf:
37346 case CODE_FOR_avx_vextractf128v8si:
37347 case CODE_FOR_avx_vinsertf128v4df:
37348 case CODE_FOR_avx_vinsertf128v8sf:
37349 case CODE_FOR_avx_vinsertf128v8si:
37350 case CODE_FOR_avx512f_vinsertf64x4_mask:
37351 case CODE_FOR_avx512f_vinserti64x4_mask:
37352 case CODE_FOR_avx512f_vextractf64x4_mask:
37353 case CODE_FOR_avx512f_vextracti64x4_mask:
37354 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37355 case CODE_FOR_avx512dq_vinserti32x8_mask:
37356 case CODE_FOR_avx512vl_vinsertv4df:
37357 case CODE_FOR_avx512vl_vinsertv4di:
37358 case CODE_FOR_avx512vl_vinsertv8sf:
37359 case CODE_FOR_avx512vl_vinsertv8si:
37360 error ("the last argument must be a 1-bit immediate");
37361 return const0_rtx;
37362
37363 case CODE_FOR_avx_vmcmpv2df3:
37364 case CODE_FOR_avx_vmcmpv4sf3:
37365 case CODE_FOR_avx_cmpv2df3:
37366 case CODE_FOR_avx_cmpv4sf3:
37367 case CODE_FOR_avx_cmpv4df3:
37368 case CODE_FOR_avx_cmpv8sf3:
37369 case CODE_FOR_avx512f_cmpv8df3_mask:
37370 case CODE_FOR_avx512f_cmpv16sf3_mask:
37371 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37372 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37373 error ("the last argument must be a 5-bit immediate");
37374 return const0_rtx;
37375
37376 default:
37377 switch (nargs_constant)
37378 {
37379 case 2:
37380 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37381 (!mask_pos && (nargs - i) == nargs_constant))
37382 {
37383 error ("the next to last argument must be an 8-bit immediate");
37384 break;
37385 }
37386 case 1:
37387 error ("the last argument must be an 8-bit immediate");
37388 break;
37389 default:
37390 gcc_unreachable ();
37391 }
37392 return const0_rtx;
37393 }
37394 }
37395 else
37396 {
37397 if (VECTOR_MODE_P (mode))
37398 op = safe_vector_operand (op, mode);
37399
37400 /* If we aren't optimizing, only allow one memory operand to
37401 be generated. */
37402 if (memory_operand (op, mode))
37403 num_memory++;
37404
37405 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37406 {
37407 if (optimize || !match || num_memory > 1)
37408 op = copy_to_mode_reg (mode, op);
37409 }
37410 else
37411 {
37412 op = copy_to_reg (op);
37413 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37414 }
37415 }
37416
37417 args[i].op = op;
37418 args[i].mode = mode;
37419 }
37420
37421 switch (nargs)
37422 {
37423 case 1:
37424 pat = GEN_FCN (icode) (real_target, args[0].op);
37425 break;
37426 case 2:
37427 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37428 break;
37429 case 3:
37430 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37431 args[2].op);
37432 break;
37433 case 4:
37434 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37435 args[2].op, args[3].op);
37436 break;
37437 case 5:
37438 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37439 args[2].op, args[3].op, args[4].op);
37440 case 6:
37441 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37442 args[2].op, args[3].op, args[4].op,
37443 args[5].op);
37444 break;
37445 default:
37446 gcc_unreachable ();
37447 }
37448
37449 if (! pat)
37450 return 0;
37451
37452 emit_insn (pat);
37453 return target;
37454 }
37455
37456 /* Transform pattern of following layout:
37457 (parallel [
37458 set (A B)
37459 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37460 ])
37461 into:
37462 (set (A B))
37463
37464 Or:
37465 (parallel [ A B
37466 ...
37467 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37468 ...
37469 ])
37470 into:
37471 (parallel [ A B ... ]) */
37472
37473 static rtx
37474 ix86_erase_embedded_rounding (rtx pat)
37475 {
37476 if (GET_CODE (pat) == INSN)
37477 pat = PATTERN (pat);
37478
37479 gcc_assert (GET_CODE (pat) == PARALLEL);
37480
37481 if (XVECLEN (pat, 0) == 2)
37482 {
37483 rtx p0 = XVECEXP (pat, 0, 0);
37484 rtx p1 = XVECEXP (pat, 0, 1);
37485
37486 gcc_assert (GET_CODE (p0) == SET
37487 && GET_CODE (p1) == UNSPEC
37488 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37489
37490 return p0;
37491 }
37492 else
37493 {
37494 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37495 int i = 0;
37496 int j = 0;
37497
37498 for (; i < XVECLEN (pat, 0); ++i)
37499 {
37500 rtx elem = XVECEXP (pat, 0, i);
37501 if (GET_CODE (elem) != UNSPEC
37502 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37503 res [j++] = elem;
37504 }
37505
37506 /* No more than 1 occurence was removed. */
37507 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37508
37509 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37510 }
37511 }
37512
37513 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37514 with rounding. */
37515 static rtx
37516 ix86_expand_sse_comi_round (const struct builtin_description *d,
37517 tree exp, rtx target)
37518 {
37519 rtx pat, set_dst;
37520 tree arg0 = CALL_EXPR_ARG (exp, 0);
37521 tree arg1 = CALL_EXPR_ARG (exp, 1);
37522 tree arg2 = CALL_EXPR_ARG (exp, 2);
37523 tree arg3 = CALL_EXPR_ARG (exp, 3);
37524 rtx op0 = expand_normal (arg0);
37525 rtx op1 = expand_normal (arg1);
37526 rtx op2 = expand_normal (arg2);
37527 rtx op3 = expand_normal (arg3);
37528 enum insn_code icode = d->icode;
37529 const struct insn_data_d *insn_p = &insn_data[icode];
37530 machine_mode mode0 = insn_p->operand[0].mode;
37531 machine_mode mode1 = insn_p->operand[1].mode;
37532 enum rtx_code comparison = UNEQ;
37533 bool need_ucomi = false;
37534
37535 /* See avxintrin.h for values. */
37536 enum rtx_code comi_comparisons[32] =
37537 {
37538 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37539 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37540 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37541 };
37542 bool need_ucomi_values[32] =
37543 {
37544 true, false, false, true, true, false, false, true,
37545 true, false, false, true, true, false, false, true,
37546 false, true, true, false, false, true, true, false,
37547 false, true, true, false, false, true, true, false
37548 };
37549
37550 if (!CONST_INT_P (op2))
37551 {
37552 error ("the third argument must be comparison constant");
37553 return const0_rtx;
37554 }
37555 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37556 {
37557 error ("incorect comparison mode");
37558 return const0_rtx;
37559 }
37560
37561 if (!insn_p->operand[2].predicate (op3, SImode))
37562 {
37563 error ("incorrect rounding operand");
37564 return const0_rtx;
37565 }
37566
37567 comparison = comi_comparisons[INTVAL (op2)];
37568 need_ucomi = need_ucomi_values[INTVAL (op2)];
37569
37570 if (VECTOR_MODE_P (mode0))
37571 op0 = safe_vector_operand (op0, mode0);
37572 if (VECTOR_MODE_P (mode1))
37573 op1 = safe_vector_operand (op1, mode1);
37574
37575 target = gen_reg_rtx (SImode);
37576 emit_move_insn (target, const0_rtx);
37577 target = gen_rtx_SUBREG (QImode, target, 0);
37578
37579 if ((optimize && !register_operand (op0, mode0))
37580 || !insn_p->operand[0].predicate (op0, mode0))
37581 op0 = copy_to_mode_reg (mode0, op0);
37582 if ((optimize && !register_operand (op1, mode1))
37583 || !insn_p->operand[1].predicate (op1, mode1))
37584 op1 = copy_to_mode_reg (mode1, op1);
37585
37586 if (need_ucomi)
37587 icode = icode == CODE_FOR_sse_comi_round
37588 ? CODE_FOR_sse_ucomi_round
37589 : CODE_FOR_sse2_ucomi_round;
37590
37591 pat = GEN_FCN (icode) (op0, op1, op3);
37592 if (! pat)
37593 return 0;
37594
37595 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37596 if (INTVAL (op3) == NO_ROUND)
37597 {
37598 pat = ix86_erase_embedded_rounding (pat);
37599 if (! pat)
37600 return 0;
37601
37602 set_dst = SET_DEST (pat);
37603 }
37604 else
37605 {
37606 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37607 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37608 }
37609
37610 emit_insn (pat);
37611 emit_insn (gen_rtx_SET (VOIDmode,
37612 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37613 gen_rtx_fmt_ee (comparison, QImode,
37614 set_dst,
37615 const0_rtx)));
37616
37617 return SUBREG_REG (target);
37618 }
37619
37620 static rtx
37621 ix86_expand_round_builtin (const struct builtin_description *d,
37622 tree exp, rtx target)
37623 {
37624 rtx pat;
37625 unsigned int i, nargs;
37626 struct
37627 {
37628 rtx op;
37629 machine_mode mode;
37630 } args[6];
37631 enum insn_code icode = d->icode;
37632 const struct insn_data_d *insn_p = &insn_data[icode];
37633 machine_mode tmode = insn_p->operand[0].mode;
37634 unsigned int nargs_constant = 0;
37635 unsigned int redundant_embed_rnd = 0;
37636
37637 switch ((enum ix86_builtin_func_type) d->flag)
37638 {
37639 case UINT64_FTYPE_V2DF_INT:
37640 case UINT64_FTYPE_V4SF_INT:
37641 case UINT_FTYPE_V2DF_INT:
37642 case UINT_FTYPE_V4SF_INT:
37643 case INT64_FTYPE_V2DF_INT:
37644 case INT64_FTYPE_V4SF_INT:
37645 case INT_FTYPE_V2DF_INT:
37646 case INT_FTYPE_V4SF_INT:
37647 nargs = 2;
37648 break;
37649 case V4SF_FTYPE_V4SF_UINT_INT:
37650 case V4SF_FTYPE_V4SF_UINT64_INT:
37651 case V2DF_FTYPE_V2DF_UINT64_INT:
37652 case V4SF_FTYPE_V4SF_INT_INT:
37653 case V4SF_FTYPE_V4SF_INT64_INT:
37654 case V2DF_FTYPE_V2DF_INT64_INT:
37655 case V4SF_FTYPE_V4SF_V4SF_INT:
37656 case V2DF_FTYPE_V2DF_V2DF_INT:
37657 case V4SF_FTYPE_V4SF_V2DF_INT:
37658 case V2DF_FTYPE_V2DF_V4SF_INT:
37659 nargs = 3;
37660 break;
37661 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37662 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37663 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37664 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37665 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37666 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37667 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37668 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37669 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37670 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37671 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37672 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37673 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37674 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37675 nargs = 4;
37676 break;
37677 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37678 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37679 nargs_constant = 2;
37680 nargs = 4;
37681 break;
37682 case INT_FTYPE_V4SF_V4SF_INT_INT:
37683 case INT_FTYPE_V2DF_V2DF_INT_INT:
37684 return ix86_expand_sse_comi_round (d, exp, target);
37685 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37686 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37687 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37688 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37689 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37690 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37691 nargs = 5;
37692 break;
37693 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37694 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37695 nargs_constant = 4;
37696 nargs = 5;
37697 break;
37698 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37699 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37700 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37701 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37702 nargs_constant = 3;
37703 nargs = 5;
37704 break;
37705 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37706 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37707 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37708 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37709 nargs = 6;
37710 nargs_constant = 4;
37711 break;
37712 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37713 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37714 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37715 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37716 nargs = 6;
37717 nargs_constant = 3;
37718 break;
37719 default:
37720 gcc_unreachable ();
37721 }
37722 gcc_assert (nargs <= ARRAY_SIZE (args));
37723
37724 if (optimize
37725 || target == 0
37726 || GET_MODE (target) != tmode
37727 || !insn_p->operand[0].predicate (target, tmode))
37728 target = gen_reg_rtx (tmode);
37729
37730 for (i = 0; i < nargs; i++)
37731 {
37732 tree arg = CALL_EXPR_ARG (exp, i);
37733 rtx op = expand_normal (arg);
37734 machine_mode mode = insn_p->operand[i + 1].mode;
37735 bool match = insn_p->operand[i + 1].predicate (op, mode);
37736
37737 if (i == nargs - nargs_constant)
37738 {
37739 if (!match)
37740 {
37741 switch (icode)
37742 {
37743 case CODE_FOR_avx512f_getmantv8df_mask_round:
37744 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37745 case CODE_FOR_avx512f_vgetmantv2df_round:
37746 case CODE_FOR_avx512f_vgetmantv4sf_round:
37747 error ("the immediate argument must be a 4-bit immediate");
37748 return const0_rtx;
37749 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37750 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37751 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37752 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37753 error ("the immediate argument must be a 5-bit immediate");
37754 return const0_rtx;
37755 default:
37756 error ("the immediate argument must be an 8-bit immediate");
37757 return const0_rtx;
37758 }
37759 }
37760 }
37761 else if (i == nargs-1)
37762 {
37763 if (!insn_p->operand[nargs].predicate (op, SImode))
37764 {
37765 error ("incorrect rounding operand");
37766 return const0_rtx;
37767 }
37768
37769 /* If there is no rounding use normal version of the pattern. */
37770 if (INTVAL (op) == NO_ROUND)
37771 redundant_embed_rnd = 1;
37772 }
37773 else
37774 {
37775 if (VECTOR_MODE_P (mode))
37776 op = safe_vector_operand (op, mode);
37777
37778 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37779 {
37780 if (optimize || !match)
37781 op = copy_to_mode_reg (mode, op);
37782 }
37783 else
37784 {
37785 op = copy_to_reg (op);
37786 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37787 }
37788 }
37789
37790 args[i].op = op;
37791 args[i].mode = mode;
37792 }
37793
37794 switch (nargs)
37795 {
37796 case 1:
37797 pat = GEN_FCN (icode) (target, args[0].op);
37798 break;
37799 case 2:
37800 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37801 break;
37802 case 3:
37803 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37804 args[2].op);
37805 break;
37806 case 4:
37807 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37808 args[2].op, args[3].op);
37809 break;
37810 case 5:
37811 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37812 args[2].op, args[3].op, args[4].op);
37813 case 6:
37814 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37815 args[2].op, args[3].op, args[4].op,
37816 args[5].op);
37817 break;
37818 default:
37819 gcc_unreachable ();
37820 }
37821
37822 if (!pat)
37823 return 0;
37824
37825 if (redundant_embed_rnd)
37826 pat = ix86_erase_embedded_rounding (pat);
37827
37828 emit_insn (pat);
37829 return target;
37830 }
37831
37832 /* Subroutine of ix86_expand_builtin to take care of special insns
37833 with variable number of operands. */
37834
37835 static rtx
37836 ix86_expand_special_args_builtin (const struct builtin_description *d,
37837 tree exp, rtx target)
37838 {
37839 tree arg;
37840 rtx pat, op;
37841 unsigned int i, nargs, arg_adjust, memory;
37842 bool aligned_mem = false;
37843 struct
37844 {
37845 rtx op;
37846 machine_mode mode;
37847 } args[3];
37848 enum insn_code icode = d->icode;
37849 bool last_arg_constant = false;
37850 const struct insn_data_d *insn_p = &insn_data[icode];
37851 machine_mode tmode = insn_p->operand[0].mode;
37852 enum { load, store } klass;
37853
37854 switch ((enum ix86_builtin_func_type) d->flag)
37855 {
37856 case VOID_FTYPE_VOID:
37857 emit_insn (GEN_FCN (icode) (target));
37858 return 0;
37859 case VOID_FTYPE_UINT64:
37860 case VOID_FTYPE_UNSIGNED:
37861 nargs = 0;
37862 klass = store;
37863 memory = 0;
37864 break;
37865
37866 case INT_FTYPE_VOID:
37867 case USHORT_FTYPE_VOID:
37868 case UINT64_FTYPE_VOID:
37869 case UNSIGNED_FTYPE_VOID:
37870 nargs = 0;
37871 klass = load;
37872 memory = 0;
37873 break;
37874 case UINT64_FTYPE_PUNSIGNED:
37875 case V2DI_FTYPE_PV2DI:
37876 case V4DI_FTYPE_PV4DI:
37877 case V32QI_FTYPE_PCCHAR:
37878 case V16QI_FTYPE_PCCHAR:
37879 case V8SF_FTYPE_PCV4SF:
37880 case V8SF_FTYPE_PCFLOAT:
37881 case V4SF_FTYPE_PCFLOAT:
37882 case V4DF_FTYPE_PCV2DF:
37883 case V4DF_FTYPE_PCDOUBLE:
37884 case V2DF_FTYPE_PCDOUBLE:
37885 case VOID_FTYPE_PVOID:
37886 case V16SI_FTYPE_PV4SI:
37887 case V16SF_FTYPE_PV4SF:
37888 case V8DI_FTYPE_PV4DI:
37889 case V8DI_FTYPE_PV8DI:
37890 case V8DF_FTYPE_PV4DF:
37891 nargs = 1;
37892 klass = load;
37893 memory = 0;
37894 switch (icode)
37895 {
37896 case CODE_FOR_sse4_1_movntdqa:
37897 case CODE_FOR_avx2_movntdqa:
37898 case CODE_FOR_avx512f_movntdqa:
37899 aligned_mem = true;
37900 break;
37901 default:
37902 break;
37903 }
37904 break;
37905 case VOID_FTYPE_PV2SF_V4SF:
37906 case VOID_FTYPE_PV8DI_V8DI:
37907 case VOID_FTYPE_PV4DI_V4DI:
37908 case VOID_FTYPE_PV2DI_V2DI:
37909 case VOID_FTYPE_PCHAR_V32QI:
37910 case VOID_FTYPE_PCHAR_V16QI:
37911 case VOID_FTYPE_PFLOAT_V16SF:
37912 case VOID_FTYPE_PFLOAT_V8SF:
37913 case VOID_FTYPE_PFLOAT_V4SF:
37914 case VOID_FTYPE_PDOUBLE_V8DF:
37915 case VOID_FTYPE_PDOUBLE_V4DF:
37916 case VOID_FTYPE_PDOUBLE_V2DF:
37917 case VOID_FTYPE_PLONGLONG_LONGLONG:
37918 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37919 case VOID_FTYPE_PINT_INT:
37920 nargs = 1;
37921 klass = store;
37922 /* Reserve memory operand for target. */
37923 memory = ARRAY_SIZE (args);
37924 switch (icode)
37925 {
37926 /* These builtins and instructions require the memory
37927 to be properly aligned. */
37928 case CODE_FOR_avx_movntv4di:
37929 case CODE_FOR_sse2_movntv2di:
37930 case CODE_FOR_avx_movntv8sf:
37931 case CODE_FOR_sse_movntv4sf:
37932 case CODE_FOR_sse4a_vmmovntv4sf:
37933 case CODE_FOR_avx_movntv4df:
37934 case CODE_FOR_sse2_movntv2df:
37935 case CODE_FOR_sse4a_vmmovntv2df:
37936 case CODE_FOR_sse2_movntidi:
37937 case CODE_FOR_sse_movntq:
37938 case CODE_FOR_sse2_movntisi:
37939 case CODE_FOR_avx512f_movntv16sf:
37940 case CODE_FOR_avx512f_movntv8df:
37941 case CODE_FOR_avx512f_movntv8di:
37942 aligned_mem = true;
37943 break;
37944 default:
37945 break;
37946 }
37947 break;
37948 case V4SF_FTYPE_V4SF_PCV2SF:
37949 case V2DF_FTYPE_V2DF_PCDOUBLE:
37950 nargs = 2;
37951 klass = load;
37952 memory = 1;
37953 break;
37954 case V8SF_FTYPE_PCV8SF_V8SI:
37955 case V4DF_FTYPE_PCV4DF_V4DI:
37956 case V4SF_FTYPE_PCV4SF_V4SI:
37957 case V2DF_FTYPE_PCV2DF_V2DI:
37958 case V8SI_FTYPE_PCV8SI_V8SI:
37959 case V4DI_FTYPE_PCV4DI_V4DI:
37960 case V4SI_FTYPE_PCV4SI_V4SI:
37961 case V2DI_FTYPE_PCV2DI_V2DI:
37962 nargs = 2;
37963 klass = load;
37964 memory = 0;
37965 break;
37966 case VOID_FTYPE_PV8DF_V8DF_QI:
37967 case VOID_FTYPE_PV16SF_V16SF_HI:
37968 case VOID_FTYPE_PV8DI_V8DI_QI:
37969 case VOID_FTYPE_PV4DI_V4DI_QI:
37970 case VOID_FTYPE_PV2DI_V2DI_QI:
37971 case VOID_FTYPE_PV16SI_V16SI_HI:
37972 case VOID_FTYPE_PV8SI_V8SI_QI:
37973 case VOID_FTYPE_PV4SI_V4SI_QI:
37974 switch (icode)
37975 {
37976 /* These builtins and instructions require the memory
37977 to be properly aligned. */
37978 case CODE_FOR_avx512f_storev16sf_mask:
37979 case CODE_FOR_avx512f_storev16si_mask:
37980 case CODE_FOR_avx512f_storev8df_mask:
37981 case CODE_FOR_avx512f_storev8di_mask:
37982 case CODE_FOR_avx512vl_storev8sf_mask:
37983 case CODE_FOR_avx512vl_storev8si_mask:
37984 case CODE_FOR_avx512vl_storev4df_mask:
37985 case CODE_FOR_avx512vl_storev4di_mask:
37986 case CODE_FOR_avx512vl_storev4sf_mask:
37987 case CODE_FOR_avx512vl_storev4si_mask:
37988 case CODE_FOR_avx512vl_storev2df_mask:
37989 case CODE_FOR_avx512vl_storev2di_mask:
37990 aligned_mem = true;
37991 break;
37992 default:
37993 break;
37994 }
37995 /* FALLTHRU */
37996 case VOID_FTYPE_PV8SF_V8SI_V8SF:
37997 case VOID_FTYPE_PV4DF_V4DI_V4DF:
37998 case VOID_FTYPE_PV4SF_V4SI_V4SF:
37999 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38000 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38001 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38002 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38003 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38004 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38005 case VOID_FTYPE_PFLOAT_V4SF_QI:
38006 case VOID_FTYPE_PV8SI_V8DI_QI:
38007 case VOID_FTYPE_PV8HI_V8DI_QI:
38008 case VOID_FTYPE_PV16HI_V16SI_HI:
38009 case VOID_FTYPE_PV16QI_V8DI_QI:
38010 case VOID_FTYPE_PV16QI_V16SI_HI:
38011 case VOID_FTYPE_PV4SI_V4DI_QI:
38012 case VOID_FTYPE_PV4SI_V2DI_QI:
38013 case VOID_FTYPE_PV8HI_V4DI_QI:
38014 case VOID_FTYPE_PV8HI_V2DI_QI:
38015 case VOID_FTYPE_PV8HI_V8SI_QI:
38016 case VOID_FTYPE_PV8HI_V4SI_QI:
38017 case VOID_FTYPE_PV16QI_V4DI_QI:
38018 case VOID_FTYPE_PV16QI_V2DI_QI:
38019 case VOID_FTYPE_PV16QI_V8SI_QI:
38020 case VOID_FTYPE_PV16QI_V4SI_QI:
38021 case VOID_FTYPE_PV8HI_V8HI_QI:
38022 case VOID_FTYPE_PV16HI_V16HI_HI:
38023 case VOID_FTYPE_PV32HI_V32HI_SI:
38024 case VOID_FTYPE_PV16QI_V16QI_HI:
38025 case VOID_FTYPE_PV32QI_V32QI_SI:
38026 case VOID_FTYPE_PV64QI_V64QI_DI:
38027 case VOID_FTYPE_PV4DF_V4DF_QI:
38028 case VOID_FTYPE_PV2DF_V2DF_QI:
38029 case VOID_FTYPE_PV8SF_V8SF_QI:
38030 case VOID_FTYPE_PV4SF_V4SF_QI:
38031 nargs = 2;
38032 klass = store;
38033 /* Reserve memory operand for target. */
38034 memory = ARRAY_SIZE (args);
38035 break;
38036 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38037 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38038 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38039 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38040 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38041 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38042 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38043 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38044 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38045 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38046 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38047 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38048 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38049 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38050 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38051 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38052 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38053 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38054 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38055 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38056 nargs = 3;
38057 klass = load;
38058 memory = 0;
38059 switch (icode)
38060 {
38061 /* These builtins and instructions require the memory
38062 to be properly aligned. */
38063 case CODE_FOR_avx512f_loadv16sf_mask:
38064 case CODE_FOR_avx512f_loadv16si_mask:
38065 case CODE_FOR_avx512f_loadv8df_mask:
38066 case CODE_FOR_avx512f_loadv8di_mask:
38067 case CODE_FOR_avx512vl_loadv8sf_mask:
38068 case CODE_FOR_avx512vl_loadv8si_mask:
38069 case CODE_FOR_avx512vl_loadv4df_mask:
38070 case CODE_FOR_avx512vl_loadv4di_mask:
38071 case CODE_FOR_avx512vl_loadv4sf_mask:
38072 case CODE_FOR_avx512vl_loadv4si_mask:
38073 case CODE_FOR_avx512vl_loadv2df_mask:
38074 case CODE_FOR_avx512vl_loadv2di_mask:
38075 case CODE_FOR_avx512bw_loadv64qi_mask:
38076 case CODE_FOR_avx512vl_loadv32qi_mask:
38077 case CODE_FOR_avx512vl_loadv16qi_mask:
38078 case CODE_FOR_avx512bw_loadv32hi_mask:
38079 case CODE_FOR_avx512vl_loadv16hi_mask:
38080 case CODE_FOR_avx512vl_loadv8hi_mask:
38081 aligned_mem = true;
38082 break;
38083 default:
38084 break;
38085 }
38086 break;
38087 case VOID_FTYPE_UINT_UINT_UINT:
38088 case VOID_FTYPE_UINT64_UINT_UINT:
38089 case UCHAR_FTYPE_UINT_UINT_UINT:
38090 case UCHAR_FTYPE_UINT64_UINT_UINT:
38091 nargs = 3;
38092 klass = load;
38093 memory = ARRAY_SIZE (args);
38094 last_arg_constant = true;
38095 break;
38096 default:
38097 gcc_unreachable ();
38098 }
38099
38100 gcc_assert (nargs <= ARRAY_SIZE (args));
38101
38102 if (klass == store)
38103 {
38104 arg = CALL_EXPR_ARG (exp, 0);
38105 op = expand_normal (arg);
38106 gcc_assert (target == 0);
38107 if (memory)
38108 {
38109 op = ix86_zero_extend_to_Pmode (op);
38110 target = gen_rtx_MEM (tmode, op);
38111 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38112 on it. Try to improve it using get_pointer_alignment,
38113 and if the special builtin is one that requires strict
38114 mode alignment, also from it's GET_MODE_ALIGNMENT.
38115 Failure to do so could lead to ix86_legitimate_combined_insn
38116 rejecting all changes to such insns. */
38117 unsigned int align = get_pointer_alignment (arg);
38118 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38119 align = GET_MODE_ALIGNMENT (tmode);
38120 if (MEM_ALIGN (target) < align)
38121 set_mem_align (target, align);
38122 }
38123 else
38124 target = force_reg (tmode, op);
38125 arg_adjust = 1;
38126 }
38127 else
38128 {
38129 arg_adjust = 0;
38130 if (optimize
38131 || target == 0
38132 || !register_operand (target, tmode)
38133 || GET_MODE (target) != tmode)
38134 target = gen_reg_rtx (tmode);
38135 }
38136
38137 for (i = 0; i < nargs; i++)
38138 {
38139 machine_mode mode = insn_p->operand[i + 1].mode;
38140 bool match;
38141
38142 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38143 op = expand_normal (arg);
38144 match = insn_p->operand[i + 1].predicate (op, mode);
38145
38146 if (last_arg_constant && (i + 1) == nargs)
38147 {
38148 if (!match)
38149 {
38150 if (icode == CODE_FOR_lwp_lwpvalsi3
38151 || icode == CODE_FOR_lwp_lwpinssi3
38152 || icode == CODE_FOR_lwp_lwpvaldi3
38153 || icode == CODE_FOR_lwp_lwpinsdi3)
38154 error ("the last argument must be a 32-bit immediate");
38155 else
38156 error ("the last argument must be an 8-bit immediate");
38157 return const0_rtx;
38158 }
38159 }
38160 else
38161 {
38162 if (i == memory)
38163 {
38164 /* This must be the memory operand. */
38165 op = ix86_zero_extend_to_Pmode (op);
38166 op = gen_rtx_MEM (mode, op);
38167 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38168 on it. Try to improve it using get_pointer_alignment,
38169 and if the special builtin is one that requires strict
38170 mode alignment, also from it's GET_MODE_ALIGNMENT.
38171 Failure to do so could lead to ix86_legitimate_combined_insn
38172 rejecting all changes to such insns. */
38173 unsigned int align = get_pointer_alignment (arg);
38174 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38175 align = GET_MODE_ALIGNMENT (mode);
38176 if (MEM_ALIGN (op) < align)
38177 set_mem_align (op, align);
38178 }
38179 else
38180 {
38181 /* This must be register. */
38182 if (VECTOR_MODE_P (mode))
38183 op = safe_vector_operand (op, mode);
38184
38185 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38186 op = copy_to_mode_reg (mode, op);
38187 else
38188 {
38189 op = copy_to_reg (op);
38190 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38191 }
38192 }
38193 }
38194
38195 args[i].op = op;
38196 args[i].mode = mode;
38197 }
38198
38199 switch (nargs)
38200 {
38201 case 0:
38202 pat = GEN_FCN (icode) (target);
38203 break;
38204 case 1:
38205 pat = GEN_FCN (icode) (target, args[0].op);
38206 break;
38207 case 2:
38208 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38209 break;
38210 case 3:
38211 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38212 break;
38213 default:
38214 gcc_unreachable ();
38215 }
38216
38217 if (! pat)
38218 return 0;
38219 emit_insn (pat);
38220 return klass == store ? 0 : target;
38221 }
38222
38223 /* Return the integer constant in ARG. Constrain it to be in the range
38224 of the subparts of VEC_TYPE; issue an error if not. */
38225
38226 static int
38227 get_element_number (tree vec_type, tree arg)
38228 {
38229 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38230
38231 if (!tree_fits_uhwi_p (arg)
38232 || (elt = tree_to_uhwi (arg), elt > max))
38233 {
38234 error ("selector must be an integer constant in the range 0..%wi", max);
38235 return 0;
38236 }
38237
38238 return elt;
38239 }
38240
38241 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38242 ix86_expand_vector_init. We DO have language-level syntax for this, in
38243 the form of (type){ init-list }. Except that since we can't place emms
38244 instructions from inside the compiler, we can't allow the use of MMX
38245 registers unless the user explicitly asks for it. So we do *not* define
38246 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38247 we have builtins invoked by mmintrin.h that gives us license to emit
38248 these sorts of instructions. */
38249
38250 static rtx
38251 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38252 {
38253 machine_mode tmode = TYPE_MODE (type);
38254 machine_mode inner_mode = GET_MODE_INNER (tmode);
38255 int i, n_elt = GET_MODE_NUNITS (tmode);
38256 rtvec v = rtvec_alloc (n_elt);
38257
38258 gcc_assert (VECTOR_MODE_P (tmode));
38259 gcc_assert (call_expr_nargs (exp) == n_elt);
38260
38261 for (i = 0; i < n_elt; ++i)
38262 {
38263 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38264 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38265 }
38266
38267 if (!target || !register_operand (target, tmode))
38268 target = gen_reg_rtx (tmode);
38269
38270 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38271 return target;
38272 }
38273
38274 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38275 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38276 had a language-level syntax for referencing vector elements. */
38277
38278 static rtx
38279 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38280 {
38281 machine_mode tmode, mode0;
38282 tree arg0, arg1;
38283 int elt;
38284 rtx op0;
38285
38286 arg0 = CALL_EXPR_ARG (exp, 0);
38287 arg1 = CALL_EXPR_ARG (exp, 1);
38288
38289 op0 = expand_normal (arg0);
38290 elt = get_element_number (TREE_TYPE (arg0), arg1);
38291
38292 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38293 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38294 gcc_assert (VECTOR_MODE_P (mode0));
38295
38296 op0 = force_reg (mode0, op0);
38297
38298 if (optimize || !target || !register_operand (target, tmode))
38299 target = gen_reg_rtx (tmode);
38300
38301 ix86_expand_vector_extract (true, target, op0, elt);
38302
38303 return target;
38304 }
38305
38306 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38307 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38308 a language-level syntax for referencing vector elements. */
38309
38310 static rtx
38311 ix86_expand_vec_set_builtin (tree exp)
38312 {
38313 machine_mode tmode, mode1;
38314 tree arg0, arg1, arg2;
38315 int elt;
38316 rtx op0, op1, target;
38317
38318 arg0 = CALL_EXPR_ARG (exp, 0);
38319 arg1 = CALL_EXPR_ARG (exp, 1);
38320 arg2 = CALL_EXPR_ARG (exp, 2);
38321
38322 tmode = TYPE_MODE (TREE_TYPE (arg0));
38323 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38324 gcc_assert (VECTOR_MODE_P (tmode));
38325
38326 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38327 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38328 elt = get_element_number (TREE_TYPE (arg0), arg2);
38329
38330 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38331 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38332
38333 op0 = force_reg (tmode, op0);
38334 op1 = force_reg (mode1, op1);
38335
38336 /* OP0 is the source of these builtin functions and shouldn't be
38337 modified. Create a copy, use it and return it as target. */
38338 target = gen_reg_rtx (tmode);
38339 emit_move_insn (target, op0);
38340 ix86_expand_vector_set (true, target, op1, elt);
38341
38342 return target;
38343 }
38344
38345 /* Emit conditional move of SRC to DST with condition
38346 OP1 CODE OP2. */
38347 static void
38348 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38349 {
38350 rtx t;
38351
38352 if (TARGET_CMOVE)
38353 {
38354 t = ix86_expand_compare (code, op1, op2);
38355 emit_insn (gen_rtx_SET (VOIDmode, dst,
38356 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38357 src, dst)));
38358 }
38359 else
38360 {
38361 rtx nomove = gen_label_rtx ();
38362 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38363 const0_rtx, GET_MODE (op1), 1, nomove);
38364 emit_move_insn (dst, src);
38365 emit_label (nomove);
38366 }
38367 }
38368
38369 /* Choose max of DST and SRC and put it to DST. */
38370 static void
38371 ix86_emit_move_max (rtx dst, rtx src)
38372 {
38373 ix86_emit_cmove (dst, src, LTU, dst, src);
38374 }
38375
38376 /* Expand an expression EXP that calls a built-in function,
38377 with result going to TARGET if that's convenient
38378 (and in mode MODE if that's convenient).
38379 SUBTARGET may be used as the target for computing one of EXP's operands.
38380 IGNORE is nonzero if the value is to be ignored. */
38381
38382 static rtx
38383 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38384 machine_mode mode, int ignore)
38385 {
38386 const struct builtin_description *d;
38387 size_t i;
38388 enum insn_code icode;
38389 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38390 tree arg0, arg1, arg2, arg3, arg4;
38391 rtx op0, op1, op2, op3, op4, pat, insn;
38392 machine_mode mode0, mode1, mode2, mode3, mode4;
38393 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38394
38395 /* For CPU builtins that can be folded, fold first and expand the fold. */
38396 switch (fcode)
38397 {
38398 case IX86_BUILTIN_CPU_INIT:
38399 {
38400 /* Make it call __cpu_indicator_init in libgcc. */
38401 tree call_expr, fndecl, type;
38402 type = build_function_type_list (integer_type_node, NULL_TREE);
38403 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38404 call_expr = build_call_expr (fndecl, 0);
38405 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38406 }
38407 case IX86_BUILTIN_CPU_IS:
38408 case IX86_BUILTIN_CPU_SUPPORTS:
38409 {
38410 tree arg0 = CALL_EXPR_ARG (exp, 0);
38411 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38412 gcc_assert (fold_expr != NULL_TREE);
38413 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38414 }
38415 }
38416
38417 /* Determine whether the builtin function is available under the current ISA.
38418 Originally the builtin was not created if it wasn't applicable to the
38419 current ISA based on the command line switches. With function specific
38420 options, we need to check in the context of the function making the call
38421 whether it is supported. */
38422 if (ix86_builtins_isa[fcode].isa
38423 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38424 {
38425 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38426 NULL, (enum fpmath_unit) 0, false);
38427
38428 if (!opts)
38429 error ("%qE needs unknown isa option", fndecl);
38430 else
38431 {
38432 gcc_assert (opts != NULL);
38433 error ("%qE needs isa option %s", fndecl, opts);
38434 free (opts);
38435 }
38436 return const0_rtx;
38437 }
38438
38439 switch (fcode)
38440 {
38441 case IX86_BUILTIN_BNDMK:
38442 if (!target
38443 || GET_MODE (target) != BNDmode
38444 || !register_operand (target, BNDmode))
38445 target = gen_reg_rtx (BNDmode);
38446
38447 arg0 = CALL_EXPR_ARG (exp, 0);
38448 arg1 = CALL_EXPR_ARG (exp, 1);
38449
38450 op0 = expand_normal (arg0);
38451 op1 = expand_normal (arg1);
38452
38453 if (!register_operand (op0, Pmode))
38454 op0 = ix86_zero_extend_to_Pmode (op0);
38455 if (!register_operand (op1, Pmode))
38456 op1 = ix86_zero_extend_to_Pmode (op1);
38457
38458 /* Builtin arg1 is size of block but instruction op1 should
38459 be (size - 1). */
38460 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38461 NULL_RTX, 1, OPTAB_DIRECT);
38462
38463 emit_insn (BNDmode == BND64mode
38464 ? gen_bnd64_mk (target, op0, op1)
38465 : gen_bnd32_mk (target, op0, op1));
38466 return target;
38467
38468 case IX86_BUILTIN_BNDSTX:
38469 arg0 = CALL_EXPR_ARG (exp, 0);
38470 arg1 = CALL_EXPR_ARG (exp, 1);
38471 arg2 = CALL_EXPR_ARG (exp, 2);
38472
38473 op0 = expand_normal (arg0);
38474 op1 = expand_normal (arg1);
38475 op2 = expand_normal (arg2);
38476
38477 if (!register_operand (op0, Pmode))
38478 op0 = ix86_zero_extend_to_Pmode (op0);
38479 if (!register_operand (op1, BNDmode))
38480 op1 = copy_to_mode_reg (BNDmode, op1);
38481 if (!register_operand (op2, Pmode))
38482 op2 = ix86_zero_extend_to_Pmode (op2);
38483
38484 emit_insn (BNDmode == BND64mode
38485 ? gen_bnd64_stx (op2, op0, op1)
38486 : gen_bnd32_stx (op2, op0, op1));
38487 return 0;
38488
38489 case IX86_BUILTIN_BNDLDX:
38490 if (!target
38491 || GET_MODE (target) != BNDmode
38492 || !register_operand (target, BNDmode))
38493 target = gen_reg_rtx (BNDmode);
38494
38495 arg0 = CALL_EXPR_ARG (exp, 0);
38496 arg1 = CALL_EXPR_ARG (exp, 1);
38497
38498 op0 = expand_normal (arg0);
38499 op1 = expand_normal (arg1);
38500
38501 if (!register_operand (op0, Pmode))
38502 op0 = ix86_zero_extend_to_Pmode (op0);
38503 if (!register_operand (op1, Pmode))
38504 op1 = ix86_zero_extend_to_Pmode (op1);
38505
38506 emit_insn (BNDmode == BND64mode
38507 ? gen_bnd64_ldx (target, op0, op1)
38508 : gen_bnd32_ldx (target, op0, op1));
38509 return target;
38510
38511 case IX86_BUILTIN_BNDCL:
38512 arg0 = CALL_EXPR_ARG (exp, 0);
38513 arg1 = CALL_EXPR_ARG (exp, 1);
38514
38515 op0 = expand_normal (arg0);
38516 op1 = expand_normal (arg1);
38517
38518 if (!register_operand (op0, Pmode))
38519 op0 = ix86_zero_extend_to_Pmode (op0);
38520 if (!register_operand (op1, BNDmode))
38521 op1 = copy_to_mode_reg (BNDmode, op1);
38522
38523 emit_insn (BNDmode == BND64mode
38524 ? gen_bnd64_cl (op1, op0)
38525 : gen_bnd32_cl (op1, op0));
38526 return 0;
38527
38528 case IX86_BUILTIN_BNDCU:
38529 arg0 = CALL_EXPR_ARG (exp, 0);
38530 arg1 = CALL_EXPR_ARG (exp, 1);
38531
38532 op0 = expand_normal (arg0);
38533 op1 = expand_normal (arg1);
38534
38535 if (!register_operand (op0, Pmode))
38536 op0 = ix86_zero_extend_to_Pmode (op0);
38537 if (!register_operand (op1, BNDmode))
38538 op1 = copy_to_mode_reg (BNDmode, op1);
38539
38540 emit_insn (BNDmode == BND64mode
38541 ? gen_bnd64_cu (op1, op0)
38542 : gen_bnd32_cu (op1, op0));
38543 return 0;
38544
38545 case IX86_BUILTIN_BNDRET:
38546 arg0 = CALL_EXPR_ARG (exp, 0);
38547 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38548 target = chkp_get_rtl_bounds (arg0);
38549
38550 /* If no bounds were specified for returned value,
38551 then use INIT bounds. It usually happens when
38552 some built-in function is expanded. */
38553 if (!target)
38554 {
38555 rtx t1 = gen_reg_rtx (Pmode);
38556 rtx t2 = gen_reg_rtx (Pmode);
38557 target = gen_reg_rtx (BNDmode);
38558 emit_move_insn (t1, const0_rtx);
38559 emit_move_insn (t2, constm1_rtx);
38560 emit_insn (BNDmode == BND64mode
38561 ? gen_bnd64_mk (target, t1, t2)
38562 : gen_bnd32_mk (target, t1, t2));
38563 }
38564
38565 gcc_assert (target && REG_P (target));
38566 return target;
38567
38568 case IX86_BUILTIN_BNDNARROW:
38569 {
38570 rtx m1, m1h1, m1h2, lb, ub, t1;
38571
38572 /* Return value and lb. */
38573 arg0 = CALL_EXPR_ARG (exp, 0);
38574 /* Bounds. */
38575 arg1 = CALL_EXPR_ARG (exp, 1);
38576 /* Size. */
38577 arg2 = CALL_EXPR_ARG (exp, 2);
38578
38579 lb = expand_normal (arg0);
38580 op1 = expand_normal (arg1);
38581 op2 = expand_normal (arg2);
38582
38583 /* Size was passed but we need to use (size - 1) as for bndmk. */
38584 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38585 NULL_RTX, 1, OPTAB_DIRECT);
38586
38587 /* Add LB to size and inverse to get UB. */
38588 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38589 op2, 1, OPTAB_DIRECT);
38590 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38591
38592 if (!register_operand (lb, Pmode))
38593 lb = ix86_zero_extend_to_Pmode (lb);
38594 if (!register_operand (ub, Pmode))
38595 ub = ix86_zero_extend_to_Pmode (ub);
38596
38597 /* We need to move bounds to memory before any computations. */
38598 if (MEM_P (op1))
38599 m1 = op1;
38600 else
38601 {
38602 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38603 emit_move_insn (m1, op1);
38604 }
38605
38606 /* Generate mem expression to be used for access to LB and UB. */
38607 m1h1 = adjust_address (m1, Pmode, 0);
38608 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38609
38610 t1 = gen_reg_rtx (Pmode);
38611
38612 /* Compute LB. */
38613 emit_move_insn (t1, m1h1);
38614 ix86_emit_move_max (t1, lb);
38615 emit_move_insn (m1h1, t1);
38616
38617 /* Compute UB. UB is stored in 1's complement form. Therefore
38618 we also use max here. */
38619 emit_move_insn (t1, m1h2);
38620 ix86_emit_move_max (t1, ub);
38621 emit_move_insn (m1h2, t1);
38622
38623 op2 = gen_reg_rtx (BNDmode);
38624 emit_move_insn (op2, m1);
38625
38626 return chkp_join_splitted_slot (lb, op2);
38627 }
38628
38629 case IX86_BUILTIN_BNDINT:
38630 {
38631 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38632
38633 if (!target
38634 || GET_MODE (target) != BNDmode
38635 || !register_operand (target, BNDmode))
38636 target = gen_reg_rtx (BNDmode);
38637
38638 arg0 = CALL_EXPR_ARG (exp, 0);
38639 arg1 = CALL_EXPR_ARG (exp, 1);
38640
38641 op0 = expand_normal (arg0);
38642 op1 = expand_normal (arg1);
38643
38644 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38645 rh1 = adjust_address (res, Pmode, 0);
38646 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38647
38648 /* Put first bounds to temporaries. */
38649 lb1 = gen_reg_rtx (Pmode);
38650 ub1 = gen_reg_rtx (Pmode);
38651 if (MEM_P (op0))
38652 {
38653 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38654 emit_move_insn (ub1, adjust_address (op0, Pmode,
38655 GET_MODE_SIZE (Pmode)));
38656 }
38657 else
38658 {
38659 emit_move_insn (res, op0);
38660 emit_move_insn (lb1, rh1);
38661 emit_move_insn (ub1, rh2);
38662 }
38663
38664 /* Put second bounds to temporaries. */
38665 lb2 = gen_reg_rtx (Pmode);
38666 ub2 = gen_reg_rtx (Pmode);
38667 if (MEM_P (op1))
38668 {
38669 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38670 emit_move_insn (ub2, adjust_address (op1, Pmode,
38671 GET_MODE_SIZE (Pmode)));
38672 }
38673 else
38674 {
38675 emit_move_insn (res, op1);
38676 emit_move_insn (lb2, rh1);
38677 emit_move_insn (ub2, rh2);
38678 }
38679
38680 /* Compute LB. */
38681 ix86_emit_move_max (lb1, lb2);
38682 emit_move_insn (rh1, lb1);
38683
38684 /* Compute UB. UB is stored in 1's complement form. Therefore
38685 we also use max here. */
38686 ix86_emit_move_max (ub1, ub2);
38687 emit_move_insn (rh2, ub1);
38688
38689 emit_move_insn (target, res);
38690
38691 return target;
38692 }
38693
38694 case IX86_BUILTIN_SIZEOF:
38695 {
38696 tree name;
38697 rtx symbol;
38698
38699 if (!target
38700 || GET_MODE (target) != Pmode
38701 || !register_operand (target, Pmode))
38702 target = gen_reg_rtx (Pmode);
38703
38704 arg0 = CALL_EXPR_ARG (exp, 0);
38705 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38706
38707 name = DECL_ASSEMBLER_NAME (arg0);
38708 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38709
38710 emit_insn (Pmode == SImode
38711 ? gen_move_size_reloc_si (target, symbol)
38712 : gen_move_size_reloc_di (target, symbol));
38713
38714 return target;
38715 }
38716
38717 case IX86_BUILTIN_BNDLOWER:
38718 {
38719 rtx mem, hmem;
38720
38721 if (!target
38722 || GET_MODE (target) != Pmode
38723 || !register_operand (target, Pmode))
38724 target = gen_reg_rtx (Pmode);
38725
38726 arg0 = CALL_EXPR_ARG (exp, 0);
38727 op0 = expand_normal (arg0);
38728
38729 /* We need to move bounds to memory first. */
38730 if (MEM_P (op0))
38731 mem = op0;
38732 else
38733 {
38734 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38735 emit_move_insn (mem, op0);
38736 }
38737
38738 /* Generate mem expression to access LB and load it. */
38739 hmem = adjust_address (mem, Pmode, 0);
38740 emit_move_insn (target, hmem);
38741
38742 return target;
38743 }
38744
38745 case IX86_BUILTIN_BNDUPPER:
38746 {
38747 rtx mem, hmem, res;
38748
38749 if (!target
38750 || GET_MODE (target) != Pmode
38751 || !register_operand (target, Pmode))
38752 target = gen_reg_rtx (Pmode);
38753
38754 arg0 = CALL_EXPR_ARG (exp, 0);
38755 op0 = expand_normal (arg0);
38756
38757 /* We need to move bounds to memory first. */
38758 if (MEM_P (op0))
38759 mem = op0;
38760 else
38761 {
38762 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38763 emit_move_insn (mem, op0);
38764 }
38765
38766 /* Generate mem expression to access UB. */
38767 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38768
38769 /* We need to inverse all bits of UB. */
38770 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38771
38772 if (res != target)
38773 emit_move_insn (target, res);
38774
38775 return target;
38776 }
38777
38778 case IX86_BUILTIN_MASKMOVQ:
38779 case IX86_BUILTIN_MASKMOVDQU:
38780 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38781 ? CODE_FOR_mmx_maskmovq
38782 : CODE_FOR_sse2_maskmovdqu);
38783 /* Note the arg order is different from the operand order. */
38784 arg1 = CALL_EXPR_ARG (exp, 0);
38785 arg2 = CALL_EXPR_ARG (exp, 1);
38786 arg0 = CALL_EXPR_ARG (exp, 2);
38787 op0 = expand_normal (arg0);
38788 op1 = expand_normal (arg1);
38789 op2 = expand_normal (arg2);
38790 mode0 = insn_data[icode].operand[0].mode;
38791 mode1 = insn_data[icode].operand[1].mode;
38792 mode2 = insn_data[icode].operand[2].mode;
38793
38794 op0 = ix86_zero_extend_to_Pmode (op0);
38795 op0 = gen_rtx_MEM (mode1, op0);
38796
38797 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38798 op0 = copy_to_mode_reg (mode0, op0);
38799 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38800 op1 = copy_to_mode_reg (mode1, op1);
38801 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38802 op2 = copy_to_mode_reg (mode2, op2);
38803 pat = GEN_FCN (icode) (op0, op1, op2);
38804 if (! pat)
38805 return 0;
38806 emit_insn (pat);
38807 return 0;
38808
38809 case IX86_BUILTIN_LDMXCSR:
38810 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38811 target = assign_386_stack_local (SImode, SLOT_TEMP);
38812 emit_move_insn (target, op0);
38813 emit_insn (gen_sse_ldmxcsr (target));
38814 return 0;
38815
38816 case IX86_BUILTIN_STMXCSR:
38817 target = assign_386_stack_local (SImode, SLOT_TEMP);
38818 emit_insn (gen_sse_stmxcsr (target));
38819 return copy_to_mode_reg (SImode, target);
38820
38821 case IX86_BUILTIN_CLFLUSH:
38822 arg0 = CALL_EXPR_ARG (exp, 0);
38823 op0 = expand_normal (arg0);
38824 icode = CODE_FOR_sse2_clflush;
38825 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38826 op0 = ix86_zero_extend_to_Pmode (op0);
38827
38828 emit_insn (gen_sse2_clflush (op0));
38829 return 0;
38830
38831 case IX86_BUILTIN_CLWB:
38832 arg0 = CALL_EXPR_ARG (exp, 0);
38833 op0 = expand_normal (arg0);
38834 icode = CODE_FOR_clwb;
38835 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38836 op0 = ix86_zero_extend_to_Pmode (op0);
38837
38838 emit_insn (gen_clwb (op0));
38839 return 0;
38840
38841 case IX86_BUILTIN_CLFLUSHOPT:
38842 arg0 = CALL_EXPR_ARG (exp, 0);
38843 op0 = expand_normal (arg0);
38844 icode = CODE_FOR_clflushopt;
38845 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38846 op0 = ix86_zero_extend_to_Pmode (op0);
38847
38848 emit_insn (gen_clflushopt (op0));
38849 return 0;
38850
38851 case IX86_BUILTIN_MONITOR:
38852 arg0 = CALL_EXPR_ARG (exp, 0);
38853 arg1 = CALL_EXPR_ARG (exp, 1);
38854 arg2 = CALL_EXPR_ARG (exp, 2);
38855 op0 = expand_normal (arg0);
38856 op1 = expand_normal (arg1);
38857 op2 = expand_normal (arg2);
38858 if (!REG_P (op0))
38859 op0 = ix86_zero_extend_to_Pmode (op0);
38860 if (!REG_P (op1))
38861 op1 = copy_to_mode_reg (SImode, op1);
38862 if (!REG_P (op2))
38863 op2 = copy_to_mode_reg (SImode, op2);
38864 emit_insn (ix86_gen_monitor (op0, op1, op2));
38865 return 0;
38866
38867 case IX86_BUILTIN_MWAIT:
38868 arg0 = CALL_EXPR_ARG (exp, 0);
38869 arg1 = CALL_EXPR_ARG (exp, 1);
38870 op0 = expand_normal (arg0);
38871 op1 = expand_normal (arg1);
38872 if (!REG_P (op0))
38873 op0 = copy_to_mode_reg (SImode, op0);
38874 if (!REG_P (op1))
38875 op1 = copy_to_mode_reg (SImode, op1);
38876 emit_insn (gen_sse3_mwait (op0, op1));
38877 return 0;
38878
38879 case IX86_BUILTIN_VEC_INIT_V2SI:
38880 case IX86_BUILTIN_VEC_INIT_V4HI:
38881 case IX86_BUILTIN_VEC_INIT_V8QI:
38882 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38883
38884 case IX86_BUILTIN_VEC_EXT_V2DF:
38885 case IX86_BUILTIN_VEC_EXT_V2DI:
38886 case IX86_BUILTIN_VEC_EXT_V4SF:
38887 case IX86_BUILTIN_VEC_EXT_V4SI:
38888 case IX86_BUILTIN_VEC_EXT_V8HI:
38889 case IX86_BUILTIN_VEC_EXT_V2SI:
38890 case IX86_BUILTIN_VEC_EXT_V4HI:
38891 case IX86_BUILTIN_VEC_EXT_V16QI:
38892 return ix86_expand_vec_ext_builtin (exp, target);
38893
38894 case IX86_BUILTIN_VEC_SET_V2DI:
38895 case IX86_BUILTIN_VEC_SET_V4SF:
38896 case IX86_BUILTIN_VEC_SET_V4SI:
38897 case IX86_BUILTIN_VEC_SET_V8HI:
38898 case IX86_BUILTIN_VEC_SET_V4HI:
38899 case IX86_BUILTIN_VEC_SET_V16QI:
38900 return ix86_expand_vec_set_builtin (exp);
38901
38902 case IX86_BUILTIN_INFQ:
38903 case IX86_BUILTIN_HUGE_VALQ:
38904 {
38905 REAL_VALUE_TYPE inf;
38906 rtx tmp;
38907
38908 real_inf (&inf);
38909 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38910
38911 tmp = validize_mem (force_const_mem (mode, tmp));
38912
38913 if (target == 0)
38914 target = gen_reg_rtx (mode);
38915
38916 emit_move_insn (target, tmp);
38917 return target;
38918 }
38919
38920 case IX86_BUILTIN_RDPMC:
38921 case IX86_BUILTIN_RDTSC:
38922 case IX86_BUILTIN_RDTSCP:
38923
38924 op0 = gen_reg_rtx (DImode);
38925 op1 = gen_reg_rtx (DImode);
38926
38927 if (fcode == IX86_BUILTIN_RDPMC)
38928 {
38929 arg0 = CALL_EXPR_ARG (exp, 0);
38930 op2 = expand_normal (arg0);
38931 if (!register_operand (op2, SImode))
38932 op2 = copy_to_mode_reg (SImode, op2);
38933
38934 insn = (TARGET_64BIT
38935 ? gen_rdpmc_rex64 (op0, op1, op2)
38936 : gen_rdpmc (op0, op2));
38937 emit_insn (insn);
38938 }
38939 else if (fcode == IX86_BUILTIN_RDTSC)
38940 {
38941 insn = (TARGET_64BIT
38942 ? gen_rdtsc_rex64 (op0, op1)
38943 : gen_rdtsc (op0));
38944 emit_insn (insn);
38945 }
38946 else
38947 {
38948 op2 = gen_reg_rtx (SImode);
38949
38950 insn = (TARGET_64BIT
38951 ? gen_rdtscp_rex64 (op0, op1, op2)
38952 : gen_rdtscp (op0, op2));
38953 emit_insn (insn);
38954
38955 arg0 = CALL_EXPR_ARG (exp, 0);
38956 op4 = expand_normal (arg0);
38957 if (!address_operand (op4, VOIDmode))
38958 {
38959 op4 = convert_memory_address (Pmode, op4);
38960 op4 = copy_addr_to_reg (op4);
38961 }
38962 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
38963 }
38964
38965 if (target == 0)
38966 {
38967 /* mode is VOIDmode if __builtin_rd* has been called
38968 without lhs. */
38969 if (mode == VOIDmode)
38970 return target;
38971 target = gen_reg_rtx (mode);
38972 }
38973
38974 if (TARGET_64BIT)
38975 {
38976 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
38977 op1, 1, OPTAB_DIRECT);
38978 op0 = expand_simple_binop (DImode, IOR, op0, op1,
38979 op0, 1, OPTAB_DIRECT);
38980 }
38981
38982 emit_move_insn (target, op0);
38983 return target;
38984
38985 case IX86_BUILTIN_FXSAVE:
38986 case IX86_BUILTIN_FXRSTOR:
38987 case IX86_BUILTIN_FXSAVE64:
38988 case IX86_BUILTIN_FXRSTOR64:
38989 case IX86_BUILTIN_FNSTENV:
38990 case IX86_BUILTIN_FLDENV:
38991 mode0 = BLKmode;
38992 switch (fcode)
38993 {
38994 case IX86_BUILTIN_FXSAVE:
38995 icode = CODE_FOR_fxsave;
38996 break;
38997 case IX86_BUILTIN_FXRSTOR:
38998 icode = CODE_FOR_fxrstor;
38999 break;
39000 case IX86_BUILTIN_FXSAVE64:
39001 icode = CODE_FOR_fxsave64;
39002 break;
39003 case IX86_BUILTIN_FXRSTOR64:
39004 icode = CODE_FOR_fxrstor64;
39005 break;
39006 case IX86_BUILTIN_FNSTENV:
39007 icode = CODE_FOR_fnstenv;
39008 break;
39009 case IX86_BUILTIN_FLDENV:
39010 icode = CODE_FOR_fldenv;
39011 break;
39012 default:
39013 gcc_unreachable ();
39014 }
39015
39016 arg0 = CALL_EXPR_ARG (exp, 0);
39017 op0 = expand_normal (arg0);
39018
39019 if (!address_operand (op0, VOIDmode))
39020 {
39021 op0 = convert_memory_address (Pmode, op0);
39022 op0 = copy_addr_to_reg (op0);
39023 }
39024 op0 = gen_rtx_MEM (mode0, op0);
39025
39026 pat = GEN_FCN (icode) (op0);
39027 if (pat)
39028 emit_insn (pat);
39029 return 0;
39030
39031 case IX86_BUILTIN_XSAVE:
39032 case IX86_BUILTIN_XRSTOR:
39033 case IX86_BUILTIN_XSAVE64:
39034 case IX86_BUILTIN_XRSTOR64:
39035 case IX86_BUILTIN_XSAVEOPT:
39036 case IX86_BUILTIN_XSAVEOPT64:
39037 case IX86_BUILTIN_XSAVES:
39038 case IX86_BUILTIN_XRSTORS:
39039 case IX86_BUILTIN_XSAVES64:
39040 case IX86_BUILTIN_XRSTORS64:
39041 case IX86_BUILTIN_XSAVEC:
39042 case IX86_BUILTIN_XSAVEC64:
39043 arg0 = CALL_EXPR_ARG (exp, 0);
39044 arg1 = CALL_EXPR_ARG (exp, 1);
39045 op0 = expand_normal (arg0);
39046 op1 = expand_normal (arg1);
39047
39048 if (!address_operand (op0, VOIDmode))
39049 {
39050 op0 = convert_memory_address (Pmode, op0);
39051 op0 = copy_addr_to_reg (op0);
39052 }
39053 op0 = gen_rtx_MEM (BLKmode, op0);
39054
39055 op1 = force_reg (DImode, op1);
39056
39057 if (TARGET_64BIT)
39058 {
39059 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39060 NULL, 1, OPTAB_DIRECT);
39061 switch (fcode)
39062 {
39063 case IX86_BUILTIN_XSAVE:
39064 icode = CODE_FOR_xsave_rex64;
39065 break;
39066 case IX86_BUILTIN_XRSTOR:
39067 icode = CODE_FOR_xrstor_rex64;
39068 break;
39069 case IX86_BUILTIN_XSAVE64:
39070 icode = CODE_FOR_xsave64;
39071 break;
39072 case IX86_BUILTIN_XRSTOR64:
39073 icode = CODE_FOR_xrstor64;
39074 break;
39075 case IX86_BUILTIN_XSAVEOPT:
39076 icode = CODE_FOR_xsaveopt_rex64;
39077 break;
39078 case IX86_BUILTIN_XSAVEOPT64:
39079 icode = CODE_FOR_xsaveopt64;
39080 break;
39081 case IX86_BUILTIN_XSAVES:
39082 icode = CODE_FOR_xsaves_rex64;
39083 break;
39084 case IX86_BUILTIN_XRSTORS:
39085 icode = CODE_FOR_xrstors_rex64;
39086 break;
39087 case IX86_BUILTIN_XSAVES64:
39088 icode = CODE_FOR_xsaves64;
39089 break;
39090 case IX86_BUILTIN_XRSTORS64:
39091 icode = CODE_FOR_xrstors64;
39092 break;
39093 case IX86_BUILTIN_XSAVEC:
39094 icode = CODE_FOR_xsavec_rex64;
39095 break;
39096 case IX86_BUILTIN_XSAVEC64:
39097 icode = CODE_FOR_xsavec64;
39098 break;
39099 default:
39100 gcc_unreachable ();
39101 }
39102
39103 op2 = gen_lowpart (SImode, op2);
39104 op1 = gen_lowpart (SImode, op1);
39105 pat = GEN_FCN (icode) (op0, op1, op2);
39106 }
39107 else
39108 {
39109 switch (fcode)
39110 {
39111 case IX86_BUILTIN_XSAVE:
39112 icode = CODE_FOR_xsave;
39113 break;
39114 case IX86_BUILTIN_XRSTOR:
39115 icode = CODE_FOR_xrstor;
39116 break;
39117 case IX86_BUILTIN_XSAVEOPT:
39118 icode = CODE_FOR_xsaveopt;
39119 break;
39120 case IX86_BUILTIN_XSAVES:
39121 icode = CODE_FOR_xsaves;
39122 break;
39123 case IX86_BUILTIN_XRSTORS:
39124 icode = CODE_FOR_xrstors;
39125 break;
39126 case IX86_BUILTIN_XSAVEC:
39127 icode = CODE_FOR_xsavec;
39128 break;
39129 default:
39130 gcc_unreachable ();
39131 }
39132 pat = GEN_FCN (icode) (op0, op1);
39133 }
39134
39135 if (pat)
39136 emit_insn (pat);
39137 return 0;
39138
39139 case IX86_BUILTIN_LLWPCB:
39140 arg0 = CALL_EXPR_ARG (exp, 0);
39141 op0 = expand_normal (arg0);
39142 icode = CODE_FOR_lwp_llwpcb;
39143 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39144 op0 = ix86_zero_extend_to_Pmode (op0);
39145 emit_insn (gen_lwp_llwpcb (op0));
39146 return 0;
39147
39148 case IX86_BUILTIN_SLWPCB:
39149 icode = CODE_FOR_lwp_slwpcb;
39150 if (!target
39151 || !insn_data[icode].operand[0].predicate (target, Pmode))
39152 target = gen_reg_rtx (Pmode);
39153 emit_insn (gen_lwp_slwpcb (target));
39154 return target;
39155
39156 case IX86_BUILTIN_BEXTRI32:
39157 case IX86_BUILTIN_BEXTRI64:
39158 arg0 = CALL_EXPR_ARG (exp, 0);
39159 arg1 = CALL_EXPR_ARG (exp, 1);
39160 op0 = expand_normal (arg0);
39161 op1 = expand_normal (arg1);
39162 icode = (fcode == IX86_BUILTIN_BEXTRI32
39163 ? CODE_FOR_tbm_bextri_si
39164 : CODE_FOR_tbm_bextri_di);
39165 if (!CONST_INT_P (op1))
39166 {
39167 error ("last argument must be an immediate");
39168 return const0_rtx;
39169 }
39170 else
39171 {
39172 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39173 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39174 op1 = GEN_INT (length);
39175 op2 = GEN_INT (lsb_index);
39176 pat = GEN_FCN (icode) (target, op0, op1, op2);
39177 if (pat)
39178 emit_insn (pat);
39179 return target;
39180 }
39181
39182 case IX86_BUILTIN_RDRAND16_STEP:
39183 icode = CODE_FOR_rdrandhi_1;
39184 mode0 = HImode;
39185 goto rdrand_step;
39186
39187 case IX86_BUILTIN_RDRAND32_STEP:
39188 icode = CODE_FOR_rdrandsi_1;
39189 mode0 = SImode;
39190 goto rdrand_step;
39191
39192 case IX86_BUILTIN_RDRAND64_STEP:
39193 icode = CODE_FOR_rdranddi_1;
39194 mode0 = DImode;
39195
39196 rdrand_step:
39197 op0 = gen_reg_rtx (mode0);
39198 emit_insn (GEN_FCN (icode) (op0));
39199
39200 arg0 = CALL_EXPR_ARG (exp, 0);
39201 op1 = expand_normal (arg0);
39202 if (!address_operand (op1, VOIDmode))
39203 {
39204 op1 = convert_memory_address (Pmode, op1);
39205 op1 = copy_addr_to_reg (op1);
39206 }
39207 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39208
39209 op1 = gen_reg_rtx (SImode);
39210 emit_move_insn (op1, CONST1_RTX (SImode));
39211
39212 /* Emit SImode conditional move. */
39213 if (mode0 == HImode)
39214 {
39215 op2 = gen_reg_rtx (SImode);
39216 emit_insn (gen_zero_extendhisi2 (op2, op0));
39217 }
39218 else if (mode0 == SImode)
39219 op2 = op0;
39220 else
39221 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39222
39223 if (target == 0
39224 || !register_operand (target, SImode))
39225 target = gen_reg_rtx (SImode);
39226
39227 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39228 const0_rtx);
39229 emit_insn (gen_rtx_SET (VOIDmode, target,
39230 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39231 return target;
39232
39233 case IX86_BUILTIN_RDSEED16_STEP:
39234 icode = CODE_FOR_rdseedhi_1;
39235 mode0 = HImode;
39236 goto rdseed_step;
39237
39238 case IX86_BUILTIN_RDSEED32_STEP:
39239 icode = CODE_FOR_rdseedsi_1;
39240 mode0 = SImode;
39241 goto rdseed_step;
39242
39243 case IX86_BUILTIN_RDSEED64_STEP:
39244 icode = CODE_FOR_rdseeddi_1;
39245 mode0 = DImode;
39246
39247 rdseed_step:
39248 op0 = gen_reg_rtx (mode0);
39249 emit_insn (GEN_FCN (icode) (op0));
39250
39251 arg0 = CALL_EXPR_ARG (exp, 0);
39252 op1 = expand_normal (arg0);
39253 if (!address_operand (op1, VOIDmode))
39254 {
39255 op1 = convert_memory_address (Pmode, op1);
39256 op1 = copy_addr_to_reg (op1);
39257 }
39258 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39259
39260 op2 = gen_reg_rtx (QImode);
39261
39262 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39263 const0_rtx);
39264 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39265
39266 if (target == 0
39267 || !register_operand (target, SImode))
39268 target = gen_reg_rtx (SImode);
39269
39270 emit_insn (gen_zero_extendqisi2 (target, op2));
39271 return target;
39272
39273 case IX86_BUILTIN_SBB32:
39274 icode = CODE_FOR_subsi3_carry;
39275 mode0 = SImode;
39276 goto addcarryx;
39277
39278 case IX86_BUILTIN_SBB64:
39279 icode = CODE_FOR_subdi3_carry;
39280 mode0 = DImode;
39281 goto addcarryx;
39282
39283 case IX86_BUILTIN_ADDCARRYX32:
39284 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39285 mode0 = SImode;
39286 goto addcarryx;
39287
39288 case IX86_BUILTIN_ADDCARRYX64:
39289 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39290 mode0 = DImode;
39291
39292 addcarryx:
39293 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39294 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39295 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39296 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39297
39298 op0 = gen_reg_rtx (QImode);
39299
39300 /* Generate CF from input operand. */
39301 op1 = expand_normal (arg0);
39302 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39303 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39304
39305 /* Gen ADCX instruction to compute X+Y+CF. */
39306 op2 = expand_normal (arg1);
39307 op3 = expand_normal (arg2);
39308
39309 if (!REG_P (op2))
39310 op2 = copy_to_mode_reg (mode0, op2);
39311 if (!REG_P (op3))
39312 op3 = copy_to_mode_reg (mode0, op3);
39313
39314 op0 = gen_reg_rtx (mode0);
39315
39316 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39317 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39318 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39319
39320 /* Store the result. */
39321 op4 = expand_normal (arg3);
39322 if (!address_operand (op4, VOIDmode))
39323 {
39324 op4 = convert_memory_address (Pmode, op4);
39325 op4 = copy_addr_to_reg (op4);
39326 }
39327 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39328
39329 /* Return current CF value. */
39330 if (target == 0)
39331 target = gen_reg_rtx (QImode);
39332
39333 PUT_MODE (pat, QImode);
39334 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39335 return target;
39336
39337 case IX86_BUILTIN_READ_FLAGS:
39338 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39339
39340 if (optimize
39341 || target == NULL_RTX
39342 || !nonimmediate_operand (target, word_mode)
39343 || GET_MODE (target) != word_mode)
39344 target = gen_reg_rtx (word_mode);
39345
39346 emit_insn (gen_pop (target));
39347 return target;
39348
39349 case IX86_BUILTIN_WRITE_FLAGS:
39350
39351 arg0 = CALL_EXPR_ARG (exp, 0);
39352 op0 = expand_normal (arg0);
39353 if (!general_no_elim_operand (op0, word_mode))
39354 op0 = copy_to_mode_reg (word_mode, op0);
39355
39356 emit_insn (gen_push (op0));
39357 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39358 return 0;
39359
39360 case IX86_BUILTIN_KORTESTC16:
39361 icode = CODE_FOR_kortestchi;
39362 mode0 = HImode;
39363 mode1 = CCCmode;
39364 goto kortest;
39365
39366 case IX86_BUILTIN_KORTESTZ16:
39367 icode = CODE_FOR_kortestzhi;
39368 mode0 = HImode;
39369 mode1 = CCZmode;
39370
39371 kortest:
39372 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39373 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39374 op0 = expand_normal (arg0);
39375 op1 = expand_normal (arg1);
39376
39377 op0 = copy_to_reg (op0);
39378 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39379 op1 = copy_to_reg (op1);
39380 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39381
39382 target = gen_reg_rtx (QImode);
39383 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39384
39385 /* Emit kortest. */
39386 emit_insn (GEN_FCN (icode) (op0, op1));
39387 /* And use setcc to return result from flags. */
39388 ix86_expand_setcc (target, EQ,
39389 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39390 return target;
39391
39392 case IX86_BUILTIN_GATHERSIV2DF:
39393 icode = CODE_FOR_avx2_gathersiv2df;
39394 goto gather_gen;
39395 case IX86_BUILTIN_GATHERSIV4DF:
39396 icode = CODE_FOR_avx2_gathersiv4df;
39397 goto gather_gen;
39398 case IX86_BUILTIN_GATHERDIV2DF:
39399 icode = CODE_FOR_avx2_gatherdiv2df;
39400 goto gather_gen;
39401 case IX86_BUILTIN_GATHERDIV4DF:
39402 icode = CODE_FOR_avx2_gatherdiv4df;
39403 goto gather_gen;
39404 case IX86_BUILTIN_GATHERSIV4SF:
39405 icode = CODE_FOR_avx2_gathersiv4sf;
39406 goto gather_gen;
39407 case IX86_BUILTIN_GATHERSIV8SF:
39408 icode = CODE_FOR_avx2_gathersiv8sf;
39409 goto gather_gen;
39410 case IX86_BUILTIN_GATHERDIV4SF:
39411 icode = CODE_FOR_avx2_gatherdiv4sf;
39412 goto gather_gen;
39413 case IX86_BUILTIN_GATHERDIV8SF:
39414 icode = CODE_FOR_avx2_gatherdiv8sf;
39415 goto gather_gen;
39416 case IX86_BUILTIN_GATHERSIV2DI:
39417 icode = CODE_FOR_avx2_gathersiv2di;
39418 goto gather_gen;
39419 case IX86_BUILTIN_GATHERSIV4DI:
39420 icode = CODE_FOR_avx2_gathersiv4di;
39421 goto gather_gen;
39422 case IX86_BUILTIN_GATHERDIV2DI:
39423 icode = CODE_FOR_avx2_gatherdiv2di;
39424 goto gather_gen;
39425 case IX86_BUILTIN_GATHERDIV4DI:
39426 icode = CODE_FOR_avx2_gatherdiv4di;
39427 goto gather_gen;
39428 case IX86_BUILTIN_GATHERSIV4SI:
39429 icode = CODE_FOR_avx2_gathersiv4si;
39430 goto gather_gen;
39431 case IX86_BUILTIN_GATHERSIV8SI:
39432 icode = CODE_FOR_avx2_gathersiv8si;
39433 goto gather_gen;
39434 case IX86_BUILTIN_GATHERDIV4SI:
39435 icode = CODE_FOR_avx2_gatherdiv4si;
39436 goto gather_gen;
39437 case IX86_BUILTIN_GATHERDIV8SI:
39438 icode = CODE_FOR_avx2_gatherdiv8si;
39439 goto gather_gen;
39440 case IX86_BUILTIN_GATHERALTSIV4DF:
39441 icode = CODE_FOR_avx2_gathersiv4df;
39442 goto gather_gen;
39443 case IX86_BUILTIN_GATHERALTDIV8SF:
39444 icode = CODE_FOR_avx2_gatherdiv8sf;
39445 goto gather_gen;
39446 case IX86_BUILTIN_GATHERALTSIV4DI:
39447 icode = CODE_FOR_avx2_gathersiv4di;
39448 goto gather_gen;
39449 case IX86_BUILTIN_GATHERALTDIV8SI:
39450 icode = CODE_FOR_avx2_gatherdiv8si;
39451 goto gather_gen;
39452 case IX86_BUILTIN_GATHER3SIV16SF:
39453 icode = CODE_FOR_avx512f_gathersiv16sf;
39454 goto gather_gen;
39455 case IX86_BUILTIN_GATHER3SIV8DF:
39456 icode = CODE_FOR_avx512f_gathersiv8df;
39457 goto gather_gen;
39458 case IX86_BUILTIN_GATHER3DIV16SF:
39459 icode = CODE_FOR_avx512f_gatherdiv16sf;
39460 goto gather_gen;
39461 case IX86_BUILTIN_GATHER3DIV8DF:
39462 icode = CODE_FOR_avx512f_gatherdiv8df;
39463 goto gather_gen;
39464 case IX86_BUILTIN_GATHER3SIV16SI:
39465 icode = CODE_FOR_avx512f_gathersiv16si;
39466 goto gather_gen;
39467 case IX86_BUILTIN_GATHER3SIV8DI:
39468 icode = CODE_FOR_avx512f_gathersiv8di;
39469 goto gather_gen;
39470 case IX86_BUILTIN_GATHER3DIV16SI:
39471 icode = CODE_FOR_avx512f_gatherdiv16si;
39472 goto gather_gen;
39473 case IX86_BUILTIN_GATHER3DIV8DI:
39474 icode = CODE_FOR_avx512f_gatherdiv8di;
39475 goto gather_gen;
39476 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39477 icode = CODE_FOR_avx512f_gathersiv8df;
39478 goto gather_gen;
39479 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39480 icode = CODE_FOR_avx512f_gatherdiv16sf;
39481 goto gather_gen;
39482 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39483 icode = CODE_FOR_avx512f_gathersiv8di;
39484 goto gather_gen;
39485 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39486 icode = CODE_FOR_avx512f_gatherdiv16si;
39487 goto gather_gen;
39488 case IX86_BUILTIN_GATHER3SIV2DF:
39489 icode = CODE_FOR_avx512vl_gathersiv2df;
39490 goto gather_gen;
39491 case IX86_BUILTIN_GATHER3SIV4DF:
39492 icode = CODE_FOR_avx512vl_gathersiv4df;
39493 goto gather_gen;
39494 case IX86_BUILTIN_GATHER3DIV2DF:
39495 icode = CODE_FOR_avx512vl_gatherdiv2df;
39496 goto gather_gen;
39497 case IX86_BUILTIN_GATHER3DIV4DF:
39498 icode = CODE_FOR_avx512vl_gatherdiv4df;
39499 goto gather_gen;
39500 case IX86_BUILTIN_GATHER3SIV4SF:
39501 icode = CODE_FOR_avx512vl_gathersiv4sf;
39502 goto gather_gen;
39503 case IX86_BUILTIN_GATHER3SIV8SF:
39504 icode = CODE_FOR_avx512vl_gathersiv8sf;
39505 goto gather_gen;
39506 case IX86_BUILTIN_GATHER3DIV4SF:
39507 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39508 goto gather_gen;
39509 case IX86_BUILTIN_GATHER3DIV8SF:
39510 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39511 goto gather_gen;
39512 case IX86_BUILTIN_GATHER3SIV2DI:
39513 icode = CODE_FOR_avx512vl_gathersiv2di;
39514 goto gather_gen;
39515 case IX86_BUILTIN_GATHER3SIV4DI:
39516 icode = CODE_FOR_avx512vl_gathersiv4di;
39517 goto gather_gen;
39518 case IX86_BUILTIN_GATHER3DIV2DI:
39519 icode = CODE_FOR_avx512vl_gatherdiv2di;
39520 goto gather_gen;
39521 case IX86_BUILTIN_GATHER3DIV4DI:
39522 icode = CODE_FOR_avx512vl_gatherdiv4di;
39523 goto gather_gen;
39524 case IX86_BUILTIN_GATHER3SIV4SI:
39525 icode = CODE_FOR_avx512vl_gathersiv4si;
39526 goto gather_gen;
39527 case IX86_BUILTIN_GATHER3SIV8SI:
39528 icode = CODE_FOR_avx512vl_gathersiv8si;
39529 goto gather_gen;
39530 case IX86_BUILTIN_GATHER3DIV4SI:
39531 icode = CODE_FOR_avx512vl_gatherdiv4si;
39532 goto gather_gen;
39533 case IX86_BUILTIN_GATHER3DIV8SI:
39534 icode = CODE_FOR_avx512vl_gatherdiv8si;
39535 goto gather_gen;
39536 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39537 icode = CODE_FOR_avx512vl_gathersiv4df;
39538 goto gather_gen;
39539 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39540 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39541 goto gather_gen;
39542 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39543 icode = CODE_FOR_avx512vl_gathersiv4di;
39544 goto gather_gen;
39545 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39546 icode = CODE_FOR_avx512vl_gatherdiv8si;
39547 goto gather_gen;
39548 case IX86_BUILTIN_SCATTERSIV16SF:
39549 icode = CODE_FOR_avx512f_scattersiv16sf;
39550 goto scatter_gen;
39551 case IX86_BUILTIN_SCATTERSIV8DF:
39552 icode = CODE_FOR_avx512f_scattersiv8df;
39553 goto scatter_gen;
39554 case IX86_BUILTIN_SCATTERDIV16SF:
39555 icode = CODE_FOR_avx512f_scatterdiv16sf;
39556 goto scatter_gen;
39557 case IX86_BUILTIN_SCATTERDIV8DF:
39558 icode = CODE_FOR_avx512f_scatterdiv8df;
39559 goto scatter_gen;
39560 case IX86_BUILTIN_SCATTERSIV16SI:
39561 icode = CODE_FOR_avx512f_scattersiv16si;
39562 goto scatter_gen;
39563 case IX86_BUILTIN_SCATTERSIV8DI:
39564 icode = CODE_FOR_avx512f_scattersiv8di;
39565 goto scatter_gen;
39566 case IX86_BUILTIN_SCATTERDIV16SI:
39567 icode = CODE_FOR_avx512f_scatterdiv16si;
39568 goto scatter_gen;
39569 case IX86_BUILTIN_SCATTERDIV8DI:
39570 icode = CODE_FOR_avx512f_scatterdiv8di;
39571 goto scatter_gen;
39572 case IX86_BUILTIN_SCATTERSIV8SF:
39573 icode = CODE_FOR_avx512vl_scattersiv8sf;
39574 goto scatter_gen;
39575 case IX86_BUILTIN_SCATTERSIV4SF:
39576 icode = CODE_FOR_avx512vl_scattersiv4sf;
39577 goto scatter_gen;
39578 case IX86_BUILTIN_SCATTERSIV4DF:
39579 icode = CODE_FOR_avx512vl_scattersiv4df;
39580 goto scatter_gen;
39581 case IX86_BUILTIN_SCATTERSIV2DF:
39582 icode = CODE_FOR_avx512vl_scattersiv2df;
39583 goto scatter_gen;
39584 case IX86_BUILTIN_SCATTERDIV8SF:
39585 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39586 goto scatter_gen;
39587 case IX86_BUILTIN_SCATTERDIV4SF:
39588 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39589 goto scatter_gen;
39590 case IX86_BUILTIN_SCATTERDIV4DF:
39591 icode = CODE_FOR_avx512vl_scatterdiv4df;
39592 goto scatter_gen;
39593 case IX86_BUILTIN_SCATTERDIV2DF:
39594 icode = CODE_FOR_avx512vl_scatterdiv2df;
39595 goto scatter_gen;
39596 case IX86_BUILTIN_SCATTERSIV8SI:
39597 icode = CODE_FOR_avx512vl_scattersiv8si;
39598 goto scatter_gen;
39599 case IX86_BUILTIN_SCATTERSIV4SI:
39600 icode = CODE_FOR_avx512vl_scattersiv4si;
39601 goto scatter_gen;
39602 case IX86_BUILTIN_SCATTERSIV4DI:
39603 icode = CODE_FOR_avx512vl_scattersiv4di;
39604 goto scatter_gen;
39605 case IX86_BUILTIN_SCATTERSIV2DI:
39606 icode = CODE_FOR_avx512vl_scattersiv2di;
39607 goto scatter_gen;
39608 case IX86_BUILTIN_SCATTERDIV8SI:
39609 icode = CODE_FOR_avx512vl_scatterdiv8si;
39610 goto scatter_gen;
39611 case IX86_BUILTIN_SCATTERDIV4SI:
39612 icode = CODE_FOR_avx512vl_scatterdiv4si;
39613 goto scatter_gen;
39614 case IX86_BUILTIN_SCATTERDIV4DI:
39615 icode = CODE_FOR_avx512vl_scatterdiv4di;
39616 goto scatter_gen;
39617 case IX86_BUILTIN_SCATTERDIV2DI:
39618 icode = CODE_FOR_avx512vl_scatterdiv2di;
39619 goto scatter_gen;
39620 case IX86_BUILTIN_GATHERPFDPD:
39621 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39622 goto vec_prefetch_gen;
39623 case IX86_BUILTIN_GATHERPFDPS:
39624 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39625 goto vec_prefetch_gen;
39626 case IX86_BUILTIN_GATHERPFQPD:
39627 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39628 goto vec_prefetch_gen;
39629 case IX86_BUILTIN_GATHERPFQPS:
39630 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39631 goto vec_prefetch_gen;
39632 case IX86_BUILTIN_SCATTERPFDPD:
39633 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39634 goto vec_prefetch_gen;
39635 case IX86_BUILTIN_SCATTERPFDPS:
39636 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39637 goto vec_prefetch_gen;
39638 case IX86_BUILTIN_SCATTERPFQPD:
39639 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39640 goto vec_prefetch_gen;
39641 case IX86_BUILTIN_SCATTERPFQPS:
39642 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39643 goto vec_prefetch_gen;
39644
39645 gather_gen:
39646 rtx half;
39647 rtx (*gen) (rtx, rtx);
39648
39649 arg0 = CALL_EXPR_ARG (exp, 0);
39650 arg1 = CALL_EXPR_ARG (exp, 1);
39651 arg2 = CALL_EXPR_ARG (exp, 2);
39652 arg3 = CALL_EXPR_ARG (exp, 3);
39653 arg4 = CALL_EXPR_ARG (exp, 4);
39654 op0 = expand_normal (arg0);
39655 op1 = expand_normal (arg1);
39656 op2 = expand_normal (arg2);
39657 op3 = expand_normal (arg3);
39658 op4 = expand_normal (arg4);
39659 /* Note the arg order is different from the operand order. */
39660 mode0 = insn_data[icode].operand[1].mode;
39661 mode2 = insn_data[icode].operand[3].mode;
39662 mode3 = insn_data[icode].operand[4].mode;
39663 mode4 = insn_data[icode].operand[5].mode;
39664
39665 if (target == NULL_RTX
39666 || GET_MODE (target) != insn_data[icode].operand[0].mode
39667 || !insn_data[icode].operand[0].predicate (target,
39668 GET_MODE (target)))
39669 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39670 else
39671 subtarget = target;
39672
39673 switch (fcode)
39674 {
39675 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39676 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39677 half = gen_reg_rtx (V8SImode);
39678 if (!nonimmediate_operand (op2, V16SImode))
39679 op2 = copy_to_mode_reg (V16SImode, op2);
39680 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39681 op2 = half;
39682 break;
39683 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39684 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39685 case IX86_BUILTIN_GATHERALTSIV4DF:
39686 case IX86_BUILTIN_GATHERALTSIV4DI:
39687 half = gen_reg_rtx (V4SImode);
39688 if (!nonimmediate_operand (op2, V8SImode))
39689 op2 = copy_to_mode_reg (V8SImode, op2);
39690 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39691 op2 = half;
39692 break;
39693 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39694 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39695 half = gen_reg_rtx (mode0);
39696 if (mode0 == V8SFmode)
39697 gen = gen_vec_extract_lo_v16sf;
39698 else
39699 gen = gen_vec_extract_lo_v16si;
39700 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39701 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39702 emit_insn (gen (half, op0));
39703 op0 = half;
39704 if (GET_MODE (op3) != VOIDmode)
39705 {
39706 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39707 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39708 emit_insn (gen (half, op3));
39709 op3 = half;
39710 }
39711 break;
39712 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39713 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39714 case IX86_BUILTIN_GATHERALTDIV8SF:
39715 case IX86_BUILTIN_GATHERALTDIV8SI:
39716 half = gen_reg_rtx (mode0);
39717 if (mode0 == V4SFmode)
39718 gen = gen_vec_extract_lo_v8sf;
39719 else
39720 gen = gen_vec_extract_lo_v8si;
39721 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39722 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39723 emit_insn (gen (half, op0));
39724 op0 = half;
39725 if (GET_MODE (op3) != VOIDmode)
39726 {
39727 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39728 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39729 emit_insn (gen (half, op3));
39730 op3 = half;
39731 }
39732 break;
39733 default:
39734 break;
39735 }
39736
39737 /* Force memory operand only with base register here. But we
39738 don't want to do it on memory operand for other builtin
39739 functions. */
39740 op1 = ix86_zero_extend_to_Pmode (op1);
39741
39742 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39743 op0 = copy_to_mode_reg (mode0, op0);
39744 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39745 op1 = copy_to_mode_reg (Pmode, op1);
39746 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39747 op2 = copy_to_mode_reg (mode2, op2);
39748 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39749 {
39750 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39751 op3 = copy_to_mode_reg (mode3, op3);
39752 }
39753 else
39754 {
39755 op3 = copy_to_reg (op3);
39756 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39757 }
39758 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39759 {
39760 error ("the last argument must be scale 1, 2, 4, 8");
39761 return const0_rtx;
39762 }
39763
39764 /* Optimize. If mask is known to have all high bits set,
39765 replace op0 with pc_rtx to signal that the instruction
39766 overwrites the whole destination and doesn't use its
39767 previous contents. */
39768 if (optimize)
39769 {
39770 if (TREE_CODE (arg3) == INTEGER_CST)
39771 {
39772 if (integer_all_onesp (arg3))
39773 op0 = pc_rtx;
39774 }
39775 else if (TREE_CODE (arg3) == VECTOR_CST)
39776 {
39777 unsigned int negative = 0;
39778 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39779 {
39780 tree cst = VECTOR_CST_ELT (arg3, i);
39781 if (TREE_CODE (cst) == INTEGER_CST
39782 && tree_int_cst_sign_bit (cst))
39783 negative++;
39784 else if (TREE_CODE (cst) == REAL_CST
39785 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39786 negative++;
39787 }
39788 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39789 op0 = pc_rtx;
39790 }
39791 else if (TREE_CODE (arg3) == SSA_NAME
39792 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39793 {
39794 /* Recognize also when mask is like:
39795 __v2df src = _mm_setzero_pd ();
39796 __v2df mask = _mm_cmpeq_pd (src, src);
39797 or
39798 __v8sf src = _mm256_setzero_ps ();
39799 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39800 as that is a cheaper way to load all ones into
39801 a register than having to load a constant from
39802 memory. */
39803 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39804 if (is_gimple_call (def_stmt))
39805 {
39806 tree fndecl = gimple_call_fndecl (def_stmt);
39807 if (fndecl
39808 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39809 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39810 {
39811 case IX86_BUILTIN_CMPPD:
39812 case IX86_BUILTIN_CMPPS:
39813 case IX86_BUILTIN_CMPPD256:
39814 case IX86_BUILTIN_CMPPS256:
39815 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39816 break;
39817 /* FALLTHRU */
39818 case IX86_BUILTIN_CMPEQPD:
39819 case IX86_BUILTIN_CMPEQPS:
39820 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39821 && initializer_zerop (gimple_call_arg (def_stmt,
39822 1)))
39823 op0 = pc_rtx;
39824 break;
39825 default:
39826 break;
39827 }
39828 }
39829 }
39830 }
39831
39832 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39833 if (! pat)
39834 return const0_rtx;
39835 emit_insn (pat);
39836
39837 switch (fcode)
39838 {
39839 case IX86_BUILTIN_GATHER3DIV16SF:
39840 if (target == NULL_RTX)
39841 target = gen_reg_rtx (V8SFmode);
39842 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39843 break;
39844 case IX86_BUILTIN_GATHER3DIV16SI:
39845 if (target == NULL_RTX)
39846 target = gen_reg_rtx (V8SImode);
39847 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39848 break;
39849 case IX86_BUILTIN_GATHER3DIV8SF:
39850 case IX86_BUILTIN_GATHERDIV8SF:
39851 if (target == NULL_RTX)
39852 target = gen_reg_rtx (V4SFmode);
39853 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39854 break;
39855 case IX86_BUILTIN_GATHER3DIV8SI:
39856 case IX86_BUILTIN_GATHERDIV8SI:
39857 if (target == NULL_RTX)
39858 target = gen_reg_rtx (V4SImode);
39859 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39860 break;
39861 default:
39862 target = subtarget;
39863 break;
39864 }
39865 return target;
39866
39867 scatter_gen:
39868 arg0 = CALL_EXPR_ARG (exp, 0);
39869 arg1 = CALL_EXPR_ARG (exp, 1);
39870 arg2 = CALL_EXPR_ARG (exp, 2);
39871 arg3 = CALL_EXPR_ARG (exp, 3);
39872 arg4 = CALL_EXPR_ARG (exp, 4);
39873 op0 = expand_normal (arg0);
39874 op1 = expand_normal (arg1);
39875 op2 = expand_normal (arg2);
39876 op3 = expand_normal (arg3);
39877 op4 = expand_normal (arg4);
39878 mode1 = insn_data[icode].operand[1].mode;
39879 mode2 = insn_data[icode].operand[2].mode;
39880 mode3 = insn_data[icode].operand[3].mode;
39881 mode4 = insn_data[icode].operand[4].mode;
39882
39883 /* Force memory operand only with base register here. But we
39884 don't want to do it on memory operand for other builtin
39885 functions. */
39886 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39887
39888 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39889 op0 = copy_to_mode_reg (Pmode, op0);
39890
39891 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39892 {
39893 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39894 op1 = copy_to_mode_reg (mode1, op1);
39895 }
39896 else
39897 {
39898 op1 = copy_to_reg (op1);
39899 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39900 }
39901
39902 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39903 op2 = copy_to_mode_reg (mode2, op2);
39904
39905 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39906 op3 = copy_to_mode_reg (mode3, op3);
39907
39908 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39909 {
39910 error ("the last argument must be scale 1, 2, 4, 8");
39911 return const0_rtx;
39912 }
39913
39914 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39915 if (! pat)
39916 return const0_rtx;
39917
39918 emit_insn (pat);
39919 return 0;
39920
39921 vec_prefetch_gen:
39922 arg0 = CALL_EXPR_ARG (exp, 0);
39923 arg1 = CALL_EXPR_ARG (exp, 1);
39924 arg2 = CALL_EXPR_ARG (exp, 2);
39925 arg3 = CALL_EXPR_ARG (exp, 3);
39926 arg4 = CALL_EXPR_ARG (exp, 4);
39927 op0 = expand_normal (arg0);
39928 op1 = expand_normal (arg1);
39929 op2 = expand_normal (arg2);
39930 op3 = expand_normal (arg3);
39931 op4 = expand_normal (arg4);
39932 mode0 = insn_data[icode].operand[0].mode;
39933 mode1 = insn_data[icode].operand[1].mode;
39934 mode3 = insn_data[icode].operand[3].mode;
39935 mode4 = insn_data[icode].operand[4].mode;
39936
39937 if (GET_MODE (op0) == mode0
39938 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
39939 {
39940 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39941 op0 = copy_to_mode_reg (mode0, op0);
39942 }
39943 else if (op0 != constm1_rtx)
39944 {
39945 op0 = copy_to_reg (op0);
39946 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39947 }
39948
39949 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39950 op1 = copy_to_mode_reg (mode1, op1);
39951
39952 /* Force memory operand only with base register here. But we
39953 don't want to do it on memory operand for other builtin
39954 functions. */
39955 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
39956
39957 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
39958 op2 = copy_to_mode_reg (Pmode, op2);
39959
39960 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39961 {
39962 error ("the forth argument must be scale 1, 2, 4, 8");
39963 return const0_rtx;
39964 }
39965
39966 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39967 {
39968 error ("incorrect hint operand");
39969 return const0_rtx;
39970 }
39971
39972 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39973 if (! pat)
39974 return const0_rtx;
39975
39976 emit_insn (pat);
39977
39978 return 0;
39979
39980 case IX86_BUILTIN_XABORT:
39981 icode = CODE_FOR_xabort;
39982 arg0 = CALL_EXPR_ARG (exp, 0);
39983 op0 = expand_normal (arg0);
39984 mode0 = insn_data[icode].operand[0].mode;
39985 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39986 {
39987 error ("the xabort's argument must be an 8-bit immediate");
39988 return const0_rtx;
39989 }
39990 emit_insn (gen_xabort (op0));
39991 return 0;
39992
39993 default:
39994 break;
39995 }
39996
39997 for (i = 0, d = bdesc_special_args;
39998 i < ARRAY_SIZE (bdesc_special_args);
39999 i++, d++)
40000 if (d->code == fcode)
40001 return ix86_expand_special_args_builtin (d, exp, target);
40002
40003 for (i = 0, d = bdesc_args;
40004 i < ARRAY_SIZE (bdesc_args);
40005 i++, d++)
40006 if (d->code == fcode)
40007 switch (fcode)
40008 {
40009 case IX86_BUILTIN_FABSQ:
40010 case IX86_BUILTIN_COPYSIGNQ:
40011 if (!TARGET_SSE)
40012 /* Emit a normal call if SSE isn't available. */
40013 return expand_call (exp, target, ignore);
40014 default:
40015 return ix86_expand_args_builtin (d, exp, target);
40016 }
40017
40018 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40019 if (d->code == fcode)
40020 return ix86_expand_sse_comi (d, exp, target);
40021
40022 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40023 if (d->code == fcode)
40024 return ix86_expand_round_builtin (d, exp, target);
40025
40026 for (i = 0, d = bdesc_pcmpestr;
40027 i < ARRAY_SIZE (bdesc_pcmpestr);
40028 i++, d++)
40029 if (d->code == fcode)
40030 return ix86_expand_sse_pcmpestr (d, exp, target);
40031
40032 for (i = 0, d = bdesc_pcmpistr;
40033 i < ARRAY_SIZE (bdesc_pcmpistr);
40034 i++, d++)
40035 if (d->code == fcode)
40036 return ix86_expand_sse_pcmpistr (d, exp, target);
40037
40038 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40039 if (d->code == fcode)
40040 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40041 (enum ix86_builtin_func_type)
40042 d->flag, d->comparison);
40043
40044 gcc_unreachable ();
40045 }
40046
40047 /* This returns the target-specific builtin with code CODE if
40048 current_function_decl has visibility on this builtin, which is checked
40049 using isa flags. Returns NULL_TREE otherwise. */
40050
40051 static tree ix86_get_builtin (enum ix86_builtins code)
40052 {
40053 struct cl_target_option *opts;
40054 tree target_tree = NULL_TREE;
40055
40056 /* Determine the isa flags of current_function_decl. */
40057
40058 if (current_function_decl)
40059 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40060
40061 if (target_tree == NULL)
40062 target_tree = target_option_default_node;
40063
40064 opts = TREE_TARGET_OPTION (target_tree);
40065
40066 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40067 return ix86_builtin_decl (code, true);
40068 else
40069 return NULL_TREE;
40070 }
40071
40072 /* Return function decl for target specific builtin
40073 for given MPX builtin passed i FCODE. */
40074 static tree
40075 ix86_builtin_mpx_function (unsigned fcode)
40076 {
40077 switch (fcode)
40078 {
40079 case BUILT_IN_CHKP_BNDMK:
40080 return ix86_builtins[IX86_BUILTIN_BNDMK];
40081
40082 case BUILT_IN_CHKP_BNDSTX:
40083 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40084
40085 case BUILT_IN_CHKP_BNDLDX:
40086 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40087
40088 case BUILT_IN_CHKP_BNDCL:
40089 return ix86_builtins[IX86_BUILTIN_BNDCL];
40090
40091 case BUILT_IN_CHKP_BNDCU:
40092 return ix86_builtins[IX86_BUILTIN_BNDCU];
40093
40094 case BUILT_IN_CHKP_BNDRET:
40095 return ix86_builtins[IX86_BUILTIN_BNDRET];
40096
40097 case BUILT_IN_CHKP_INTERSECT:
40098 return ix86_builtins[IX86_BUILTIN_BNDINT];
40099
40100 case BUILT_IN_CHKP_NARROW:
40101 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40102
40103 case BUILT_IN_CHKP_SIZEOF:
40104 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40105
40106 case BUILT_IN_CHKP_EXTRACT_LOWER:
40107 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40108
40109 case BUILT_IN_CHKP_EXTRACT_UPPER:
40110 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40111
40112 default:
40113 return NULL_TREE;
40114 }
40115
40116 gcc_unreachable ();
40117 }
40118
40119 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40120
40121 Return an address to be used to load/store bounds for pointer
40122 passed in SLOT.
40123
40124 SLOT_NO is an integer constant holding number of a target
40125 dependent special slot to be used in case SLOT is not a memory.
40126
40127 SPECIAL_BASE is a pointer to be used as a base of fake address
40128 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40129 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40130
40131 static rtx
40132 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40133 {
40134 rtx addr = NULL;
40135
40136 /* NULL slot means we pass bounds for pointer not passed to the
40137 function at all. Register slot means we pass pointer in a
40138 register. In both these cases bounds are passed via Bounds
40139 Table. Since we do not have actual pointer stored in memory,
40140 we have to use fake addresses to access Bounds Table. We
40141 start with (special_base - sizeof (void*)) and decrease this
40142 address by pointer size to get addresses for other slots. */
40143 if (!slot || REG_P (slot))
40144 {
40145 gcc_assert (CONST_INT_P (slot_no));
40146 addr = plus_constant (Pmode, special_base,
40147 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40148 }
40149 /* If pointer is passed in a memory then its address is used to
40150 access Bounds Table. */
40151 else if (MEM_P (slot))
40152 {
40153 addr = XEXP (slot, 0);
40154 if (!register_operand (addr, Pmode))
40155 addr = copy_addr_to_reg (addr);
40156 }
40157 else
40158 gcc_unreachable ();
40159
40160 return addr;
40161 }
40162
40163 /* Expand pass uses this hook to load bounds for function parameter
40164 PTR passed in SLOT in case its bounds are not passed in a register.
40165
40166 If SLOT is a memory, then bounds are loaded as for regular pointer
40167 loaded from memory. PTR may be NULL in case SLOT is a memory.
40168 In such case value of PTR (if required) may be loaded from SLOT.
40169
40170 If SLOT is NULL or a register then SLOT_NO is an integer constant
40171 holding number of the target dependent special slot which should be
40172 used to obtain bounds.
40173
40174 Return loaded bounds. */
40175
40176 static rtx
40177 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40178 {
40179 rtx reg = gen_reg_rtx (BNDmode);
40180 rtx addr;
40181
40182 /* Get address to be used to access Bounds Table. Special slots start
40183 at the location of return address of the current function. */
40184 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40185
40186 /* Load pointer value from a memory if we don't have it. */
40187 if (!ptr)
40188 {
40189 gcc_assert (MEM_P (slot));
40190 ptr = copy_addr_to_reg (slot);
40191 }
40192
40193 emit_insn (BNDmode == BND64mode
40194 ? gen_bnd64_ldx (reg, addr, ptr)
40195 : gen_bnd32_ldx (reg, addr, ptr));
40196
40197 return reg;
40198 }
40199
40200 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40201 passed in SLOT in case BOUNDS are not passed in a register.
40202
40203 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40204 stored in memory. PTR may be NULL in case SLOT is a memory.
40205 In such case value of PTR (if required) may be loaded from SLOT.
40206
40207 If SLOT is NULL or a register then SLOT_NO is an integer constant
40208 holding number of the target dependent special slot which should be
40209 used to store BOUNDS. */
40210
40211 static void
40212 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40213 {
40214 rtx addr;
40215
40216 /* Get address to be used to access Bounds Table. Special slots start
40217 at the location of return address of a called function. */
40218 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40219
40220 /* Load pointer value from a memory if we don't have it. */
40221 if (!ptr)
40222 {
40223 gcc_assert (MEM_P (slot));
40224 ptr = copy_addr_to_reg (slot);
40225 }
40226
40227 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40228 if (!register_operand (bounds, BNDmode))
40229 bounds = copy_to_mode_reg (BNDmode, bounds);
40230
40231 emit_insn (BNDmode == BND64mode
40232 ? gen_bnd64_stx (addr, ptr, bounds)
40233 : gen_bnd32_stx (addr, ptr, bounds));
40234 }
40235
40236 /* Load and return bounds returned by function in SLOT. */
40237
40238 static rtx
40239 ix86_load_returned_bounds (rtx slot)
40240 {
40241 rtx res;
40242
40243 gcc_assert (REG_P (slot));
40244 res = gen_reg_rtx (BNDmode);
40245 emit_move_insn (res, slot);
40246
40247 return res;
40248 }
40249
40250 /* Store BOUNDS returned by function into SLOT. */
40251
40252 static void
40253 ix86_store_returned_bounds (rtx slot, rtx bounds)
40254 {
40255 gcc_assert (REG_P (slot));
40256 emit_move_insn (slot, bounds);
40257 }
40258
40259 /* Returns a function decl for a vectorized version of the builtin function
40260 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40261 if it is not available. */
40262
40263 static tree
40264 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40265 tree type_in)
40266 {
40267 machine_mode in_mode, out_mode;
40268 int in_n, out_n;
40269 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40270
40271 if (TREE_CODE (type_out) != VECTOR_TYPE
40272 || TREE_CODE (type_in) != VECTOR_TYPE
40273 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40274 return NULL_TREE;
40275
40276 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40277 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40278 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40279 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40280
40281 switch (fn)
40282 {
40283 case BUILT_IN_SQRT:
40284 if (out_mode == DFmode && in_mode == DFmode)
40285 {
40286 if (out_n == 2 && in_n == 2)
40287 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40288 else if (out_n == 4 && in_n == 4)
40289 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40290 else if (out_n == 8 && in_n == 8)
40291 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40292 }
40293 break;
40294
40295 case BUILT_IN_EXP2F:
40296 if (out_mode == SFmode && in_mode == SFmode)
40297 {
40298 if (out_n == 16 && in_n == 16)
40299 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40300 }
40301 break;
40302
40303 case BUILT_IN_SQRTF:
40304 if (out_mode == SFmode && in_mode == SFmode)
40305 {
40306 if (out_n == 4 && in_n == 4)
40307 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40308 else if (out_n == 8 && in_n == 8)
40309 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40310 else if (out_n == 16 && in_n == 16)
40311 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40312 }
40313 break;
40314
40315 case BUILT_IN_IFLOOR:
40316 case BUILT_IN_LFLOOR:
40317 case BUILT_IN_LLFLOOR:
40318 /* The round insn does not trap on denormals. */
40319 if (flag_trapping_math || !TARGET_ROUND)
40320 break;
40321
40322 if (out_mode == SImode && in_mode == DFmode)
40323 {
40324 if (out_n == 4 && in_n == 2)
40325 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40326 else if (out_n == 8 && in_n == 4)
40327 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40328 else if (out_n == 16 && in_n == 8)
40329 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40330 }
40331 break;
40332
40333 case BUILT_IN_IFLOORF:
40334 case BUILT_IN_LFLOORF:
40335 case BUILT_IN_LLFLOORF:
40336 /* The round insn does not trap on denormals. */
40337 if (flag_trapping_math || !TARGET_ROUND)
40338 break;
40339
40340 if (out_mode == SImode && in_mode == SFmode)
40341 {
40342 if (out_n == 4 && in_n == 4)
40343 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40344 else if (out_n == 8 && in_n == 8)
40345 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40346 }
40347 break;
40348
40349 case BUILT_IN_ICEIL:
40350 case BUILT_IN_LCEIL:
40351 case BUILT_IN_LLCEIL:
40352 /* The round insn does not trap on denormals. */
40353 if (flag_trapping_math || !TARGET_ROUND)
40354 break;
40355
40356 if (out_mode == SImode && in_mode == DFmode)
40357 {
40358 if (out_n == 4 && in_n == 2)
40359 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40360 else if (out_n == 8 && in_n == 4)
40361 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40362 else if (out_n == 16 && in_n == 8)
40363 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40364 }
40365 break;
40366
40367 case BUILT_IN_ICEILF:
40368 case BUILT_IN_LCEILF:
40369 case BUILT_IN_LLCEILF:
40370 /* The round insn does not trap on denormals. */
40371 if (flag_trapping_math || !TARGET_ROUND)
40372 break;
40373
40374 if (out_mode == SImode && in_mode == SFmode)
40375 {
40376 if (out_n == 4 && in_n == 4)
40377 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40378 else if (out_n == 8 && in_n == 8)
40379 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40380 }
40381 break;
40382
40383 case BUILT_IN_IRINT:
40384 case BUILT_IN_LRINT:
40385 case BUILT_IN_LLRINT:
40386 if (out_mode == SImode && in_mode == DFmode)
40387 {
40388 if (out_n == 4 && in_n == 2)
40389 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40390 else if (out_n == 8 && in_n == 4)
40391 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40392 }
40393 break;
40394
40395 case BUILT_IN_IRINTF:
40396 case BUILT_IN_LRINTF:
40397 case BUILT_IN_LLRINTF:
40398 if (out_mode == SImode && in_mode == SFmode)
40399 {
40400 if (out_n == 4 && in_n == 4)
40401 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40402 else if (out_n == 8 && in_n == 8)
40403 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40404 }
40405 break;
40406
40407 case BUILT_IN_IROUND:
40408 case BUILT_IN_LROUND:
40409 case BUILT_IN_LLROUND:
40410 /* The round insn does not trap on denormals. */
40411 if (flag_trapping_math || !TARGET_ROUND)
40412 break;
40413
40414 if (out_mode == SImode && in_mode == DFmode)
40415 {
40416 if (out_n == 4 && in_n == 2)
40417 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40418 else if (out_n == 8 && in_n == 4)
40419 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40420 else if (out_n == 16 && in_n == 8)
40421 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40422 }
40423 break;
40424
40425 case BUILT_IN_IROUNDF:
40426 case BUILT_IN_LROUNDF:
40427 case BUILT_IN_LLROUNDF:
40428 /* The round insn does not trap on denormals. */
40429 if (flag_trapping_math || !TARGET_ROUND)
40430 break;
40431
40432 if (out_mode == SImode && in_mode == SFmode)
40433 {
40434 if (out_n == 4 && in_n == 4)
40435 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40436 else if (out_n == 8 && in_n == 8)
40437 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40438 }
40439 break;
40440
40441 case BUILT_IN_COPYSIGN:
40442 if (out_mode == DFmode && in_mode == DFmode)
40443 {
40444 if (out_n == 2 && in_n == 2)
40445 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40446 else if (out_n == 4 && in_n == 4)
40447 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40448 else if (out_n == 8 && in_n == 8)
40449 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40450 }
40451 break;
40452
40453 case BUILT_IN_COPYSIGNF:
40454 if (out_mode == SFmode && in_mode == SFmode)
40455 {
40456 if (out_n == 4 && in_n == 4)
40457 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40458 else if (out_n == 8 && in_n == 8)
40459 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40460 else if (out_n == 16 && in_n == 16)
40461 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40462 }
40463 break;
40464
40465 case BUILT_IN_FLOOR:
40466 /* The round insn does not trap on denormals. */
40467 if (flag_trapping_math || !TARGET_ROUND)
40468 break;
40469
40470 if (out_mode == DFmode && in_mode == DFmode)
40471 {
40472 if (out_n == 2 && in_n == 2)
40473 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40474 else if (out_n == 4 && in_n == 4)
40475 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40476 }
40477 break;
40478
40479 case BUILT_IN_FLOORF:
40480 /* The round insn does not trap on denormals. */
40481 if (flag_trapping_math || !TARGET_ROUND)
40482 break;
40483
40484 if (out_mode == SFmode && in_mode == SFmode)
40485 {
40486 if (out_n == 4 && in_n == 4)
40487 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40488 else if (out_n == 8 && in_n == 8)
40489 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40490 }
40491 break;
40492
40493 case BUILT_IN_CEIL:
40494 /* The round insn does not trap on denormals. */
40495 if (flag_trapping_math || !TARGET_ROUND)
40496 break;
40497
40498 if (out_mode == DFmode && in_mode == DFmode)
40499 {
40500 if (out_n == 2 && in_n == 2)
40501 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40502 else if (out_n == 4 && in_n == 4)
40503 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40504 }
40505 break;
40506
40507 case BUILT_IN_CEILF:
40508 /* The round insn does not trap on denormals. */
40509 if (flag_trapping_math || !TARGET_ROUND)
40510 break;
40511
40512 if (out_mode == SFmode && in_mode == SFmode)
40513 {
40514 if (out_n == 4 && in_n == 4)
40515 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40516 else if (out_n == 8 && in_n == 8)
40517 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40518 }
40519 break;
40520
40521 case BUILT_IN_TRUNC:
40522 /* The round insn does not trap on denormals. */
40523 if (flag_trapping_math || !TARGET_ROUND)
40524 break;
40525
40526 if (out_mode == DFmode && in_mode == DFmode)
40527 {
40528 if (out_n == 2 && in_n == 2)
40529 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40530 else if (out_n == 4 && in_n == 4)
40531 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40532 }
40533 break;
40534
40535 case BUILT_IN_TRUNCF:
40536 /* The round insn does not trap on denormals. */
40537 if (flag_trapping_math || !TARGET_ROUND)
40538 break;
40539
40540 if (out_mode == SFmode && in_mode == SFmode)
40541 {
40542 if (out_n == 4 && in_n == 4)
40543 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40544 else if (out_n == 8 && in_n == 8)
40545 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40546 }
40547 break;
40548
40549 case BUILT_IN_RINT:
40550 /* The round insn does not trap on denormals. */
40551 if (flag_trapping_math || !TARGET_ROUND)
40552 break;
40553
40554 if (out_mode == DFmode && in_mode == DFmode)
40555 {
40556 if (out_n == 2 && in_n == 2)
40557 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40558 else if (out_n == 4 && in_n == 4)
40559 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40560 }
40561 break;
40562
40563 case BUILT_IN_RINTF:
40564 /* The round insn does not trap on denormals. */
40565 if (flag_trapping_math || !TARGET_ROUND)
40566 break;
40567
40568 if (out_mode == SFmode && in_mode == SFmode)
40569 {
40570 if (out_n == 4 && in_n == 4)
40571 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40572 else if (out_n == 8 && in_n == 8)
40573 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40574 }
40575 break;
40576
40577 case BUILT_IN_ROUND:
40578 /* The round insn does not trap on denormals. */
40579 if (flag_trapping_math || !TARGET_ROUND)
40580 break;
40581
40582 if (out_mode == DFmode && in_mode == DFmode)
40583 {
40584 if (out_n == 2 && in_n == 2)
40585 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40586 else if (out_n == 4 && in_n == 4)
40587 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40588 }
40589 break;
40590
40591 case BUILT_IN_ROUNDF:
40592 /* The round insn does not trap on denormals. */
40593 if (flag_trapping_math || !TARGET_ROUND)
40594 break;
40595
40596 if (out_mode == SFmode && in_mode == SFmode)
40597 {
40598 if (out_n == 4 && in_n == 4)
40599 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40600 else if (out_n == 8 && in_n == 8)
40601 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40602 }
40603 break;
40604
40605 case BUILT_IN_FMA:
40606 if (out_mode == DFmode && in_mode == DFmode)
40607 {
40608 if (out_n == 2 && in_n == 2)
40609 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40610 if (out_n == 4 && in_n == 4)
40611 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40612 }
40613 break;
40614
40615 case BUILT_IN_FMAF:
40616 if (out_mode == SFmode && in_mode == SFmode)
40617 {
40618 if (out_n == 4 && in_n == 4)
40619 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40620 if (out_n == 8 && in_n == 8)
40621 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40622 }
40623 break;
40624
40625 default:
40626 break;
40627 }
40628
40629 /* Dispatch to a handler for a vectorization library. */
40630 if (ix86_veclib_handler)
40631 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40632 type_in);
40633
40634 return NULL_TREE;
40635 }
40636
40637 /* Handler for an SVML-style interface to
40638 a library with vectorized intrinsics. */
40639
40640 static tree
40641 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40642 {
40643 char name[20];
40644 tree fntype, new_fndecl, args;
40645 unsigned arity;
40646 const char *bname;
40647 machine_mode el_mode, in_mode;
40648 int n, in_n;
40649
40650 /* The SVML is suitable for unsafe math only. */
40651 if (!flag_unsafe_math_optimizations)
40652 return NULL_TREE;
40653
40654 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40655 n = TYPE_VECTOR_SUBPARTS (type_out);
40656 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40657 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40658 if (el_mode != in_mode
40659 || n != in_n)
40660 return NULL_TREE;
40661
40662 switch (fn)
40663 {
40664 case BUILT_IN_EXP:
40665 case BUILT_IN_LOG:
40666 case BUILT_IN_LOG10:
40667 case BUILT_IN_POW:
40668 case BUILT_IN_TANH:
40669 case BUILT_IN_TAN:
40670 case BUILT_IN_ATAN:
40671 case BUILT_IN_ATAN2:
40672 case BUILT_IN_ATANH:
40673 case BUILT_IN_CBRT:
40674 case BUILT_IN_SINH:
40675 case BUILT_IN_SIN:
40676 case BUILT_IN_ASINH:
40677 case BUILT_IN_ASIN:
40678 case BUILT_IN_COSH:
40679 case BUILT_IN_COS:
40680 case BUILT_IN_ACOSH:
40681 case BUILT_IN_ACOS:
40682 if (el_mode != DFmode || n != 2)
40683 return NULL_TREE;
40684 break;
40685
40686 case BUILT_IN_EXPF:
40687 case BUILT_IN_LOGF:
40688 case BUILT_IN_LOG10F:
40689 case BUILT_IN_POWF:
40690 case BUILT_IN_TANHF:
40691 case BUILT_IN_TANF:
40692 case BUILT_IN_ATANF:
40693 case BUILT_IN_ATAN2F:
40694 case BUILT_IN_ATANHF:
40695 case BUILT_IN_CBRTF:
40696 case BUILT_IN_SINHF:
40697 case BUILT_IN_SINF:
40698 case BUILT_IN_ASINHF:
40699 case BUILT_IN_ASINF:
40700 case BUILT_IN_COSHF:
40701 case BUILT_IN_COSF:
40702 case BUILT_IN_ACOSHF:
40703 case BUILT_IN_ACOSF:
40704 if (el_mode != SFmode || n != 4)
40705 return NULL_TREE;
40706 break;
40707
40708 default:
40709 return NULL_TREE;
40710 }
40711
40712 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40713
40714 if (fn == BUILT_IN_LOGF)
40715 strcpy (name, "vmlsLn4");
40716 else if (fn == BUILT_IN_LOG)
40717 strcpy (name, "vmldLn2");
40718 else if (n == 4)
40719 {
40720 sprintf (name, "vmls%s", bname+10);
40721 name[strlen (name)-1] = '4';
40722 }
40723 else
40724 sprintf (name, "vmld%s2", bname+10);
40725
40726 /* Convert to uppercase. */
40727 name[4] &= ~0x20;
40728
40729 arity = 0;
40730 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40731 args;
40732 args = TREE_CHAIN (args))
40733 arity++;
40734
40735 if (arity == 1)
40736 fntype = build_function_type_list (type_out, type_in, NULL);
40737 else
40738 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40739
40740 /* Build a function declaration for the vectorized function. */
40741 new_fndecl = build_decl (BUILTINS_LOCATION,
40742 FUNCTION_DECL, get_identifier (name), fntype);
40743 TREE_PUBLIC (new_fndecl) = 1;
40744 DECL_EXTERNAL (new_fndecl) = 1;
40745 DECL_IS_NOVOPS (new_fndecl) = 1;
40746 TREE_READONLY (new_fndecl) = 1;
40747
40748 return new_fndecl;
40749 }
40750
40751 /* Handler for an ACML-style interface to
40752 a library with vectorized intrinsics. */
40753
40754 static tree
40755 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40756 {
40757 char name[20] = "__vr.._";
40758 tree fntype, new_fndecl, args;
40759 unsigned arity;
40760 const char *bname;
40761 machine_mode el_mode, in_mode;
40762 int n, in_n;
40763
40764 /* The ACML is 64bits only and suitable for unsafe math only as
40765 it does not correctly support parts of IEEE with the required
40766 precision such as denormals. */
40767 if (!TARGET_64BIT
40768 || !flag_unsafe_math_optimizations)
40769 return NULL_TREE;
40770
40771 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40772 n = TYPE_VECTOR_SUBPARTS (type_out);
40773 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40774 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40775 if (el_mode != in_mode
40776 || n != in_n)
40777 return NULL_TREE;
40778
40779 switch (fn)
40780 {
40781 case BUILT_IN_SIN:
40782 case BUILT_IN_COS:
40783 case BUILT_IN_EXP:
40784 case BUILT_IN_LOG:
40785 case BUILT_IN_LOG2:
40786 case BUILT_IN_LOG10:
40787 name[4] = 'd';
40788 name[5] = '2';
40789 if (el_mode != DFmode
40790 || n != 2)
40791 return NULL_TREE;
40792 break;
40793
40794 case BUILT_IN_SINF:
40795 case BUILT_IN_COSF:
40796 case BUILT_IN_EXPF:
40797 case BUILT_IN_POWF:
40798 case BUILT_IN_LOGF:
40799 case BUILT_IN_LOG2F:
40800 case BUILT_IN_LOG10F:
40801 name[4] = 's';
40802 name[5] = '4';
40803 if (el_mode != SFmode
40804 || n != 4)
40805 return NULL_TREE;
40806 break;
40807
40808 default:
40809 return NULL_TREE;
40810 }
40811
40812 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40813 sprintf (name + 7, "%s", bname+10);
40814
40815 arity = 0;
40816 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40817 args;
40818 args = TREE_CHAIN (args))
40819 arity++;
40820
40821 if (arity == 1)
40822 fntype = build_function_type_list (type_out, type_in, NULL);
40823 else
40824 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40825
40826 /* Build a function declaration for the vectorized function. */
40827 new_fndecl = build_decl (BUILTINS_LOCATION,
40828 FUNCTION_DECL, get_identifier (name), fntype);
40829 TREE_PUBLIC (new_fndecl) = 1;
40830 DECL_EXTERNAL (new_fndecl) = 1;
40831 DECL_IS_NOVOPS (new_fndecl) = 1;
40832 TREE_READONLY (new_fndecl) = 1;
40833
40834 return new_fndecl;
40835 }
40836
40837 /* Returns a decl of a function that implements gather load with
40838 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40839 Return NULL_TREE if it is not available. */
40840
40841 static tree
40842 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40843 const_tree index_type, int scale)
40844 {
40845 bool si;
40846 enum ix86_builtins code;
40847
40848 if (! TARGET_AVX2)
40849 return NULL_TREE;
40850
40851 if ((TREE_CODE (index_type) != INTEGER_TYPE
40852 && !POINTER_TYPE_P (index_type))
40853 || (TYPE_MODE (index_type) != SImode
40854 && TYPE_MODE (index_type) != DImode))
40855 return NULL_TREE;
40856
40857 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40858 return NULL_TREE;
40859
40860 /* v*gather* insn sign extends index to pointer mode. */
40861 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40862 && TYPE_UNSIGNED (index_type))
40863 return NULL_TREE;
40864
40865 if (scale <= 0
40866 || scale > 8
40867 || (scale & (scale - 1)) != 0)
40868 return NULL_TREE;
40869
40870 si = TYPE_MODE (index_type) == SImode;
40871 switch (TYPE_MODE (mem_vectype))
40872 {
40873 case V2DFmode:
40874 if (TARGET_AVX512VL)
40875 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40876 else
40877 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40878 break;
40879 case V4DFmode:
40880 if (TARGET_AVX512VL)
40881 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40882 else
40883 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40884 break;
40885 case V2DImode:
40886 if (TARGET_AVX512VL)
40887 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40888 else
40889 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40890 break;
40891 case V4DImode:
40892 if (TARGET_AVX512VL)
40893 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40894 else
40895 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40896 break;
40897 case V4SFmode:
40898 if (TARGET_AVX512VL)
40899 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40900 else
40901 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40902 break;
40903 case V8SFmode:
40904 if (TARGET_AVX512VL)
40905 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40906 else
40907 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40908 break;
40909 case V4SImode:
40910 if (TARGET_AVX512VL)
40911 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40912 else
40913 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40914 break;
40915 case V8SImode:
40916 if (TARGET_AVX512VL)
40917 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
40918 else
40919 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
40920 break;
40921 case V8DFmode:
40922 if (TARGET_AVX512F)
40923 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
40924 else
40925 return NULL_TREE;
40926 break;
40927 case V8DImode:
40928 if (TARGET_AVX512F)
40929 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
40930 else
40931 return NULL_TREE;
40932 break;
40933 case V16SFmode:
40934 if (TARGET_AVX512F)
40935 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
40936 else
40937 return NULL_TREE;
40938 break;
40939 case V16SImode:
40940 if (TARGET_AVX512F)
40941 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
40942 else
40943 return NULL_TREE;
40944 break;
40945 default:
40946 return NULL_TREE;
40947 }
40948
40949 return ix86_get_builtin (code);
40950 }
40951
40952 /* Returns a code for a target-specific builtin that implements
40953 reciprocal of the function, or NULL_TREE if not available. */
40954
40955 static tree
40956 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
40957 {
40958 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
40959 && flag_finite_math_only && !flag_trapping_math
40960 && flag_unsafe_math_optimizations))
40961 return NULL_TREE;
40962
40963 if (md_fn)
40964 /* Machine dependent builtins. */
40965 switch (fn)
40966 {
40967 /* Vectorized version of sqrt to rsqrt conversion. */
40968 case IX86_BUILTIN_SQRTPS_NR:
40969 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
40970
40971 case IX86_BUILTIN_SQRTPS_NR256:
40972 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
40973
40974 default:
40975 return NULL_TREE;
40976 }
40977 else
40978 /* Normal builtins. */
40979 switch (fn)
40980 {
40981 /* Sqrt to rsqrt conversion. */
40982 case BUILT_IN_SQRTF:
40983 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
40984
40985 default:
40986 return NULL_TREE;
40987 }
40988 }
40989 \f
40990 /* Helper for avx_vpermilps256_operand et al. This is also used by
40991 the expansion functions to turn the parallel back into a mask.
40992 The return value is 0 for no match and the imm8+1 for a match. */
40993
40994 int
40995 avx_vpermilp_parallel (rtx par, machine_mode mode)
40996 {
40997 unsigned i, nelt = GET_MODE_NUNITS (mode);
40998 unsigned mask = 0;
40999 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41000
41001 if (XVECLEN (par, 0) != (int) nelt)
41002 return 0;
41003
41004 /* Validate that all of the elements are constants, and not totally
41005 out of range. Copy the data into an integral array to make the
41006 subsequent checks easier. */
41007 for (i = 0; i < nelt; ++i)
41008 {
41009 rtx er = XVECEXP (par, 0, i);
41010 unsigned HOST_WIDE_INT ei;
41011
41012 if (!CONST_INT_P (er))
41013 return 0;
41014 ei = INTVAL (er);
41015 if (ei >= nelt)
41016 return 0;
41017 ipar[i] = ei;
41018 }
41019
41020 switch (mode)
41021 {
41022 case V8DFmode:
41023 /* In the 512-bit DFmode case, we can only move elements within
41024 a 128-bit lane. First fill the second part of the mask,
41025 then fallthru. */
41026 for (i = 4; i < 6; ++i)
41027 {
41028 if (ipar[i] < 4 || ipar[i] >= 6)
41029 return 0;
41030 mask |= (ipar[i] - 4) << i;
41031 }
41032 for (i = 6; i < 8; ++i)
41033 {
41034 if (ipar[i] < 6)
41035 return 0;
41036 mask |= (ipar[i] - 6) << i;
41037 }
41038 /* FALLTHRU */
41039
41040 case V4DFmode:
41041 /* In the 256-bit DFmode case, we can only move elements within
41042 a 128-bit lane. */
41043 for (i = 0; i < 2; ++i)
41044 {
41045 if (ipar[i] >= 2)
41046 return 0;
41047 mask |= ipar[i] << i;
41048 }
41049 for (i = 2; i < 4; ++i)
41050 {
41051 if (ipar[i] < 2)
41052 return 0;
41053 mask |= (ipar[i] - 2) << i;
41054 }
41055 break;
41056
41057 case V16SFmode:
41058 /* In 512 bit SFmode case, permutation in the upper 256 bits
41059 must mirror the permutation in the lower 256-bits. */
41060 for (i = 0; i < 8; ++i)
41061 if (ipar[i] + 8 != ipar[i + 8])
41062 return 0;
41063 /* FALLTHRU */
41064
41065 case V8SFmode:
41066 /* In 256 bit SFmode case, we have full freedom of
41067 movement within the low 128-bit lane, but the high 128-bit
41068 lane must mirror the exact same pattern. */
41069 for (i = 0; i < 4; ++i)
41070 if (ipar[i] + 4 != ipar[i + 4])
41071 return 0;
41072 nelt = 4;
41073 /* FALLTHRU */
41074
41075 case V2DFmode:
41076 case V4SFmode:
41077 /* In the 128-bit case, we've full freedom in the placement of
41078 the elements from the source operand. */
41079 for (i = 0; i < nelt; ++i)
41080 mask |= ipar[i] << (i * (nelt / 2));
41081 break;
41082
41083 default:
41084 gcc_unreachable ();
41085 }
41086
41087 /* Make sure success has a non-zero value by adding one. */
41088 return mask + 1;
41089 }
41090
41091 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41092 the expansion functions to turn the parallel back into a mask.
41093 The return value is 0 for no match and the imm8+1 for a match. */
41094
41095 int
41096 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41097 {
41098 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41099 unsigned mask = 0;
41100 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41101
41102 if (XVECLEN (par, 0) != (int) nelt)
41103 return 0;
41104
41105 /* Validate that all of the elements are constants, and not totally
41106 out of range. Copy the data into an integral array to make the
41107 subsequent checks easier. */
41108 for (i = 0; i < nelt; ++i)
41109 {
41110 rtx er = XVECEXP (par, 0, i);
41111 unsigned HOST_WIDE_INT ei;
41112
41113 if (!CONST_INT_P (er))
41114 return 0;
41115 ei = INTVAL (er);
41116 if (ei >= 2 * nelt)
41117 return 0;
41118 ipar[i] = ei;
41119 }
41120
41121 /* Validate that the halves of the permute are halves. */
41122 for (i = 0; i < nelt2 - 1; ++i)
41123 if (ipar[i] + 1 != ipar[i + 1])
41124 return 0;
41125 for (i = nelt2; i < nelt - 1; ++i)
41126 if (ipar[i] + 1 != ipar[i + 1])
41127 return 0;
41128
41129 /* Reconstruct the mask. */
41130 for (i = 0; i < 2; ++i)
41131 {
41132 unsigned e = ipar[i * nelt2];
41133 if (e % nelt2)
41134 return 0;
41135 e /= nelt2;
41136 mask |= e << (i * 4);
41137 }
41138
41139 /* Make sure success has a non-zero value by adding one. */
41140 return mask + 1;
41141 }
41142 \f
41143 /* Return a register priority for hard reg REGNO. */
41144 static int
41145 ix86_register_priority (int hard_regno)
41146 {
41147 /* ebp and r13 as the base always wants a displacement, r12 as the
41148 base always wants an index. So discourage their usage in an
41149 address. */
41150 if (hard_regno == R12_REG || hard_regno == R13_REG)
41151 return 0;
41152 if (hard_regno == BP_REG)
41153 return 1;
41154 /* New x86-64 int registers result in bigger code size. Discourage
41155 them. */
41156 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41157 return 2;
41158 /* New x86-64 SSE registers result in bigger code size. Discourage
41159 them. */
41160 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41161 return 2;
41162 /* Usage of AX register results in smaller code. Prefer it. */
41163 if (hard_regno == 0)
41164 return 4;
41165 return 3;
41166 }
41167
41168 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41169
41170 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41171 QImode must go into class Q_REGS.
41172 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41173 movdf to do mem-to-mem moves through integer regs. */
41174
41175 static reg_class_t
41176 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41177 {
41178 machine_mode mode = GET_MODE (x);
41179
41180 /* We're only allowed to return a subclass of CLASS. Many of the
41181 following checks fail for NO_REGS, so eliminate that early. */
41182 if (regclass == NO_REGS)
41183 return NO_REGS;
41184
41185 /* All classes can load zeros. */
41186 if (x == CONST0_RTX (mode))
41187 return regclass;
41188
41189 /* Force constants into memory if we are loading a (nonzero) constant into
41190 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41191 instructions to load from a constant. */
41192 if (CONSTANT_P (x)
41193 && (MAYBE_MMX_CLASS_P (regclass)
41194 || MAYBE_SSE_CLASS_P (regclass)
41195 || MAYBE_MASK_CLASS_P (regclass)))
41196 return NO_REGS;
41197
41198 /* Prefer SSE regs only, if we can use them for math. */
41199 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41200 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41201
41202 /* Floating-point constants need more complex checks. */
41203 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41204 {
41205 /* General regs can load everything. */
41206 if (reg_class_subset_p (regclass, GENERAL_REGS))
41207 return regclass;
41208
41209 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41210 zero above. We only want to wind up preferring 80387 registers if
41211 we plan on doing computation with them. */
41212 if (TARGET_80387
41213 && standard_80387_constant_p (x) > 0)
41214 {
41215 /* Limit class to non-sse. */
41216 if (regclass == FLOAT_SSE_REGS)
41217 return FLOAT_REGS;
41218 if (regclass == FP_TOP_SSE_REGS)
41219 return FP_TOP_REG;
41220 if (regclass == FP_SECOND_SSE_REGS)
41221 return FP_SECOND_REG;
41222 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41223 return regclass;
41224 }
41225
41226 return NO_REGS;
41227 }
41228
41229 /* Generally when we see PLUS here, it's the function invariant
41230 (plus soft-fp const_int). Which can only be computed into general
41231 regs. */
41232 if (GET_CODE (x) == PLUS)
41233 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41234
41235 /* QImode constants are easy to load, but non-constant QImode data
41236 must go into Q_REGS. */
41237 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41238 {
41239 if (reg_class_subset_p (regclass, Q_REGS))
41240 return regclass;
41241 if (reg_class_subset_p (Q_REGS, regclass))
41242 return Q_REGS;
41243 return NO_REGS;
41244 }
41245
41246 return regclass;
41247 }
41248
41249 /* Discourage putting floating-point values in SSE registers unless
41250 SSE math is being used, and likewise for the 387 registers. */
41251 static reg_class_t
41252 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41253 {
41254 machine_mode mode = GET_MODE (x);
41255
41256 /* Restrict the output reload class to the register bank that we are doing
41257 math on. If we would like not to return a subset of CLASS, reject this
41258 alternative: if reload cannot do this, it will still use its choice. */
41259 mode = GET_MODE (x);
41260 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41261 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41262
41263 if (X87_FLOAT_MODE_P (mode))
41264 {
41265 if (regclass == FP_TOP_SSE_REGS)
41266 return FP_TOP_REG;
41267 else if (regclass == FP_SECOND_SSE_REGS)
41268 return FP_SECOND_REG;
41269 else
41270 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41271 }
41272
41273 return regclass;
41274 }
41275
41276 static reg_class_t
41277 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41278 machine_mode mode, secondary_reload_info *sri)
41279 {
41280 /* Double-word spills from general registers to non-offsettable memory
41281 references (zero-extended addresses) require special handling. */
41282 if (TARGET_64BIT
41283 && MEM_P (x)
41284 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41285 && INTEGER_CLASS_P (rclass)
41286 && !offsettable_memref_p (x))
41287 {
41288 sri->icode = (in_p
41289 ? CODE_FOR_reload_noff_load
41290 : CODE_FOR_reload_noff_store);
41291 /* Add the cost of moving address to a temporary. */
41292 sri->extra_cost = 1;
41293
41294 return NO_REGS;
41295 }
41296
41297 /* QImode spills from non-QI registers require
41298 intermediate register on 32bit targets. */
41299 if (mode == QImode
41300 && (MAYBE_MASK_CLASS_P (rclass)
41301 || (!TARGET_64BIT && !in_p
41302 && INTEGER_CLASS_P (rclass)
41303 && MAYBE_NON_Q_CLASS_P (rclass))))
41304 {
41305 int regno;
41306
41307 if (REG_P (x))
41308 regno = REGNO (x);
41309 else
41310 regno = -1;
41311
41312 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41313 regno = true_regnum (x);
41314
41315 /* Return Q_REGS if the operand is in memory. */
41316 if (regno == -1)
41317 return Q_REGS;
41318 }
41319
41320 /* This condition handles corner case where an expression involving
41321 pointers gets vectorized. We're trying to use the address of a
41322 stack slot as a vector initializer.
41323
41324 (set (reg:V2DI 74 [ vect_cst_.2 ])
41325 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41326
41327 Eventually frame gets turned into sp+offset like this:
41328
41329 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41330 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41331 (const_int 392 [0x188]))))
41332
41333 That later gets turned into:
41334
41335 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41336 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41337 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41338
41339 We'll have the following reload recorded:
41340
41341 Reload 0: reload_in (DI) =
41342 (plus:DI (reg/f:DI 7 sp)
41343 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41344 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41345 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41346 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41347 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41348 reload_reg_rtx: (reg:V2DI 22 xmm1)
41349
41350 Which isn't going to work since SSE instructions can't handle scalar
41351 additions. Returning GENERAL_REGS forces the addition into integer
41352 register and reload can handle subsequent reloads without problems. */
41353
41354 if (in_p && GET_CODE (x) == PLUS
41355 && SSE_CLASS_P (rclass)
41356 && SCALAR_INT_MODE_P (mode))
41357 return GENERAL_REGS;
41358
41359 return NO_REGS;
41360 }
41361
41362 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41363
41364 static bool
41365 ix86_class_likely_spilled_p (reg_class_t rclass)
41366 {
41367 switch (rclass)
41368 {
41369 case AREG:
41370 case DREG:
41371 case CREG:
41372 case BREG:
41373 case AD_REGS:
41374 case SIREG:
41375 case DIREG:
41376 case SSE_FIRST_REG:
41377 case FP_TOP_REG:
41378 case FP_SECOND_REG:
41379 case BND_REGS:
41380 return true;
41381
41382 default:
41383 break;
41384 }
41385
41386 return false;
41387 }
41388
41389 /* If we are copying between general and FP registers, we need a memory
41390 location. The same is true for SSE and MMX registers.
41391
41392 To optimize register_move_cost performance, allow inline variant.
41393
41394 The macro can't work reliably when one of the CLASSES is class containing
41395 registers from multiple units (SSE, MMX, integer). We avoid this by never
41396 combining those units in single alternative in the machine description.
41397 Ensure that this constraint holds to avoid unexpected surprises.
41398
41399 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41400 enforce these sanity checks. */
41401
41402 static inline bool
41403 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41404 machine_mode mode, int strict)
41405 {
41406 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41407 return false;
41408 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41409 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41410 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41411 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41412 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41413 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41414 {
41415 gcc_assert (!strict || lra_in_progress);
41416 return true;
41417 }
41418
41419 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41420 return true;
41421
41422 /* Between mask and general, we have moves no larger than word size. */
41423 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41424 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41425 return true;
41426
41427 /* ??? This is a lie. We do have moves between mmx/general, and for
41428 mmx/sse2. But by saying we need secondary memory we discourage the
41429 register allocator from using the mmx registers unless needed. */
41430 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41431 return true;
41432
41433 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41434 {
41435 /* SSE1 doesn't have any direct moves from other classes. */
41436 if (!TARGET_SSE2)
41437 return true;
41438
41439 /* If the target says that inter-unit moves are more expensive
41440 than moving through memory, then don't generate them. */
41441 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41442 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41443 return true;
41444
41445 /* Between SSE and general, we have moves no larger than word size. */
41446 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41447 return true;
41448 }
41449
41450 return false;
41451 }
41452
41453 bool
41454 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41455 machine_mode mode, int strict)
41456 {
41457 return inline_secondary_memory_needed (class1, class2, mode, strict);
41458 }
41459
41460 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41461
41462 On the 80386, this is the size of MODE in words,
41463 except in the FP regs, where a single reg is always enough. */
41464
41465 static unsigned char
41466 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41467 {
41468 if (MAYBE_INTEGER_CLASS_P (rclass))
41469 {
41470 if (mode == XFmode)
41471 return (TARGET_64BIT ? 2 : 3);
41472 else if (mode == XCmode)
41473 return (TARGET_64BIT ? 4 : 6);
41474 else
41475 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41476 }
41477 else
41478 {
41479 if (COMPLEX_MODE_P (mode))
41480 return 2;
41481 else
41482 return 1;
41483 }
41484 }
41485
41486 /* Return true if the registers in CLASS cannot represent the change from
41487 modes FROM to TO. */
41488
41489 bool
41490 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41491 enum reg_class regclass)
41492 {
41493 if (from == to)
41494 return false;
41495
41496 /* x87 registers can't do subreg at all, as all values are reformatted
41497 to extended precision. */
41498 if (MAYBE_FLOAT_CLASS_P (regclass))
41499 return true;
41500
41501 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41502 {
41503 /* Vector registers do not support QI or HImode loads. If we don't
41504 disallow a change to these modes, reload will assume it's ok to
41505 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41506 the vec_dupv4hi pattern. */
41507 if (GET_MODE_SIZE (from) < 4)
41508 return true;
41509 }
41510
41511 return false;
41512 }
41513
41514 /* Return the cost of moving data of mode M between a
41515 register and memory. A value of 2 is the default; this cost is
41516 relative to those in `REGISTER_MOVE_COST'.
41517
41518 This function is used extensively by register_move_cost that is used to
41519 build tables at startup. Make it inline in this case.
41520 When IN is 2, return maximum of in and out move cost.
41521
41522 If moving between registers and memory is more expensive than
41523 between two registers, you should define this macro to express the
41524 relative cost.
41525
41526 Model also increased moving costs of QImode registers in non
41527 Q_REGS classes.
41528 */
41529 static inline int
41530 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41531 int in)
41532 {
41533 int cost;
41534 if (FLOAT_CLASS_P (regclass))
41535 {
41536 int index;
41537 switch (mode)
41538 {
41539 case SFmode:
41540 index = 0;
41541 break;
41542 case DFmode:
41543 index = 1;
41544 break;
41545 case XFmode:
41546 index = 2;
41547 break;
41548 default:
41549 return 100;
41550 }
41551 if (in == 2)
41552 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41553 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41554 }
41555 if (SSE_CLASS_P (regclass))
41556 {
41557 int index;
41558 switch (GET_MODE_SIZE (mode))
41559 {
41560 case 4:
41561 index = 0;
41562 break;
41563 case 8:
41564 index = 1;
41565 break;
41566 case 16:
41567 index = 2;
41568 break;
41569 default:
41570 return 100;
41571 }
41572 if (in == 2)
41573 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41574 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41575 }
41576 if (MMX_CLASS_P (regclass))
41577 {
41578 int index;
41579 switch (GET_MODE_SIZE (mode))
41580 {
41581 case 4:
41582 index = 0;
41583 break;
41584 case 8:
41585 index = 1;
41586 break;
41587 default:
41588 return 100;
41589 }
41590 if (in)
41591 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41592 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41593 }
41594 switch (GET_MODE_SIZE (mode))
41595 {
41596 case 1:
41597 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41598 {
41599 if (!in)
41600 return ix86_cost->int_store[0];
41601 if (TARGET_PARTIAL_REG_DEPENDENCY
41602 && optimize_function_for_speed_p (cfun))
41603 cost = ix86_cost->movzbl_load;
41604 else
41605 cost = ix86_cost->int_load[0];
41606 if (in == 2)
41607 return MAX (cost, ix86_cost->int_store[0]);
41608 return cost;
41609 }
41610 else
41611 {
41612 if (in == 2)
41613 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41614 if (in)
41615 return ix86_cost->movzbl_load;
41616 else
41617 return ix86_cost->int_store[0] + 4;
41618 }
41619 break;
41620 case 2:
41621 if (in == 2)
41622 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41623 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41624 default:
41625 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41626 if (mode == TFmode)
41627 mode = XFmode;
41628 if (in == 2)
41629 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41630 else if (in)
41631 cost = ix86_cost->int_load[2];
41632 else
41633 cost = ix86_cost->int_store[2];
41634 return (cost * (((int) GET_MODE_SIZE (mode)
41635 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41636 }
41637 }
41638
41639 static int
41640 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41641 bool in)
41642 {
41643 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41644 }
41645
41646
41647 /* Return the cost of moving data from a register in class CLASS1 to
41648 one in class CLASS2.
41649
41650 It is not required that the cost always equal 2 when FROM is the same as TO;
41651 on some machines it is expensive to move between registers if they are not
41652 general registers. */
41653
41654 static int
41655 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41656 reg_class_t class2_i)
41657 {
41658 enum reg_class class1 = (enum reg_class) class1_i;
41659 enum reg_class class2 = (enum reg_class) class2_i;
41660
41661 /* In case we require secondary memory, compute cost of the store followed
41662 by load. In order to avoid bad register allocation choices, we need
41663 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41664
41665 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41666 {
41667 int cost = 1;
41668
41669 cost += inline_memory_move_cost (mode, class1, 2);
41670 cost += inline_memory_move_cost (mode, class2, 2);
41671
41672 /* In case of copying from general_purpose_register we may emit multiple
41673 stores followed by single load causing memory size mismatch stall.
41674 Count this as arbitrarily high cost of 20. */
41675 if (targetm.class_max_nregs (class1, mode)
41676 > targetm.class_max_nregs (class2, mode))
41677 cost += 20;
41678
41679 /* In the case of FP/MMX moves, the registers actually overlap, and we
41680 have to switch modes in order to treat them differently. */
41681 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41682 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41683 cost += 20;
41684
41685 return cost;
41686 }
41687
41688 /* Moves between SSE/MMX and integer unit are expensive. */
41689 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41690 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41691
41692 /* ??? By keeping returned value relatively high, we limit the number
41693 of moves between integer and MMX/SSE registers for all targets.
41694 Additionally, high value prevents problem with x86_modes_tieable_p(),
41695 where integer modes in MMX/SSE registers are not tieable
41696 because of missing QImode and HImode moves to, from or between
41697 MMX/SSE registers. */
41698 return MAX (8, ix86_cost->mmxsse_to_integer);
41699
41700 if (MAYBE_FLOAT_CLASS_P (class1))
41701 return ix86_cost->fp_move;
41702 if (MAYBE_SSE_CLASS_P (class1))
41703 return ix86_cost->sse_move;
41704 if (MAYBE_MMX_CLASS_P (class1))
41705 return ix86_cost->mmx_move;
41706 return 2;
41707 }
41708
41709 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41710 MODE. */
41711
41712 bool
41713 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41714 {
41715 /* Flags and only flags can only hold CCmode values. */
41716 if (CC_REGNO_P (regno))
41717 return GET_MODE_CLASS (mode) == MODE_CC;
41718 if (GET_MODE_CLASS (mode) == MODE_CC
41719 || GET_MODE_CLASS (mode) == MODE_RANDOM
41720 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41721 return false;
41722 if (STACK_REGNO_P (regno))
41723 return VALID_FP_MODE_P (mode);
41724 if (MASK_REGNO_P (regno))
41725 return (VALID_MASK_REG_MODE (mode)
41726 || (TARGET_AVX512BW
41727 && VALID_MASK_AVX512BW_MODE (mode)));
41728 if (BND_REGNO_P (regno))
41729 return VALID_BND_REG_MODE (mode);
41730 if (SSE_REGNO_P (regno))
41731 {
41732 /* We implement the move patterns for all vector modes into and
41733 out of SSE registers, even when no operation instructions
41734 are available. */
41735
41736 /* For AVX-512 we allow, regardless of regno:
41737 - XI mode
41738 - any of 512-bit wide vector mode
41739 - any scalar mode. */
41740 if (TARGET_AVX512F
41741 && (mode == XImode
41742 || VALID_AVX512F_REG_MODE (mode)
41743 || VALID_AVX512F_SCALAR_MODE (mode)))
41744 return true;
41745
41746 /* TODO check for QI/HI scalars. */
41747 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41748 if (TARGET_AVX512VL
41749 && (mode == OImode
41750 || mode == TImode
41751 || VALID_AVX256_REG_MODE (mode)
41752 || VALID_AVX512VL_128_REG_MODE (mode)))
41753 return true;
41754
41755 /* xmm16-xmm31 are only available for AVX-512. */
41756 if (EXT_REX_SSE_REGNO_P (regno))
41757 return false;
41758
41759 /* OImode and AVX modes are available only when AVX is enabled. */
41760 return ((TARGET_AVX
41761 && VALID_AVX256_REG_OR_OI_MODE (mode))
41762 || VALID_SSE_REG_MODE (mode)
41763 || VALID_SSE2_REG_MODE (mode)
41764 || VALID_MMX_REG_MODE (mode)
41765 || VALID_MMX_REG_MODE_3DNOW (mode));
41766 }
41767 if (MMX_REGNO_P (regno))
41768 {
41769 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41770 so if the register is available at all, then we can move data of
41771 the given mode into or out of it. */
41772 return (VALID_MMX_REG_MODE (mode)
41773 || VALID_MMX_REG_MODE_3DNOW (mode));
41774 }
41775
41776 if (mode == QImode)
41777 {
41778 /* Take care for QImode values - they can be in non-QI regs,
41779 but then they do cause partial register stalls. */
41780 if (ANY_QI_REGNO_P (regno))
41781 return true;
41782 if (!TARGET_PARTIAL_REG_STALL)
41783 return true;
41784 /* LRA checks if the hard register is OK for the given mode.
41785 QImode values can live in non-QI regs, so we allow all
41786 registers here. */
41787 if (lra_in_progress)
41788 return true;
41789 return !can_create_pseudo_p ();
41790 }
41791 /* We handle both integer and floats in the general purpose registers. */
41792 else if (VALID_INT_MODE_P (mode))
41793 return true;
41794 else if (VALID_FP_MODE_P (mode))
41795 return true;
41796 else if (VALID_DFP_MODE_P (mode))
41797 return true;
41798 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41799 on to use that value in smaller contexts, this can easily force a
41800 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41801 supporting DImode, allow it. */
41802 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41803 return true;
41804
41805 return false;
41806 }
41807
41808 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41809 tieable integer mode. */
41810
41811 static bool
41812 ix86_tieable_integer_mode_p (machine_mode mode)
41813 {
41814 switch (mode)
41815 {
41816 case HImode:
41817 case SImode:
41818 return true;
41819
41820 case QImode:
41821 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41822
41823 case DImode:
41824 return TARGET_64BIT;
41825
41826 default:
41827 return false;
41828 }
41829 }
41830
41831 /* Return true if MODE1 is accessible in a register that can hold MODE2
41832 without copying. That is, all register classes that can hold MODE2
41833 can also hold MODE1. */
41834
41835 bool
41836 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41837 {
41838 if (mode1 == mode2)
41839 return true;
41840
41841 if (ix86_tieable_integer_mode_p (mode1)
41842 && ix86_tieable_integer_mode_p (mode2))
41843 return true;
41844
41845 /* MODE2 being XFmode implies fp stack or general regs, which means we
41846 can tie any smaller floating point modes to it. Note that we do not
41847 tie this with TFmode. */
41848 if (mode2 == XFmode)
41849 return mode1 == SFmode || mode1 == DFmode;
41850
41851 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41852 that we can tie it with SFmode. */
41853 if (mode2 == DFmode)
41854 return mode1 == SFmode;
41855
41856 /* If MODE2 is only appropriate for an SSE register, then tie with
41857 any other mode acceptable to SSE registers. */
41858 if (GET_MODE_SIZE (mode2) == 32
41859 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41860 return (GET_MODE_SIZE (mode1) == 32
41861 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41862 if (GET_MODE_SIZE (mode2) == 16
41863 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41864 return (GET_MODE_SIZE (mode1) == 16
41865 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41866
41867 /* If MODE2 is appropriate for an MMX register, then tie
41868 with any other mode acceptable to MMX registers. */
41869 if (GET_MODE_SIZE (mode2) == 8
41870 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41871 return (GET_MODE_SIZE (mode1) == 8
41872 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41873
41874 return false;
41875 }
41876
41877 /* Return the cost of moving between two registers of mode MODE. */
41878
41879 static int
41880 ix86_set_reg_reg_cost (machine_mode mode)
41881 {
41882 unsigned int units = UNITS_PER_WORD;
41883
41884 switch (GET_MODE_CLASS (mode))
41885 {
41886 default:
41887 break;
41888
41889 case MODE_CC:
41890 units = GET_MODE_SIZE (CCmode);
41891 break;
41892
41893 case MODE_FLOAT:
41894 if ((TARGET_SSE && mode == TFmode)
41895 || (TARGET_80387 && mode == XFmode)
41896 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41897 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41898 units = GET_MODE_SIZE (mode);
41899 break;
41900
41901 case MODE_COMPLEX_FLOAT:
41902 if ((TARGET_SSE && mode == TCmode)
41903 || (TARGET_80387 && mode == XCmode)
41904 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41905 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41906 units = GET_MODE_SIZE (mode);
41907 break;
41908
41909 case MODE_VECTOR_INT:
41910 case MODE_VECTOR_FLOAT:
41911 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41912 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41913 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41914 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41915 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
41916 units = GET_MODE_SIZE (mode);
41917 }
41918
41919 /* Return the cost of moving between two registers of mode MODE,
41920 assuming that the move will be in pieces of at most UNITS bytes. */
41921 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
41922 }
41923
41924 /* Compute a (partial) cost for rtx X. Return true if the complete
41925 cost has been computed, and false if subexpressions should be
41926 scanned. In either case, *TOTAL contains the cost result. */
41927
41928 static bool
41929 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
41930 bool speed)
41931 {
41932 rtx mask;
41933 enum rtx_code code = (enum rtx_code) code_i;
41934 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
41935 machine_mode mode = GET_MODE (x);
41936 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
41937
41938 switch (code)
41939 {
41940 case SET:
41941 if (register_operand (SET_DEST (x), VOIDmode)
41942 && reg_or_0_operand (SET_SRC (x), VOIDmode))
41943 {
41944 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
41945 return true;
41946 }
41947 return false;
41948
41949 case CONST_INT:
41950 case CONST:
41951 case LABEL_REF:
41952 case SYMBOL_REF:
41953 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
41954 *total = 3;
41955 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
41956 *total = 2;
41957 else if (flag_pic && SYMBOLIC_CONST (x)
41958 && !(TARGET_64BIT
41959 && (GET_CODE (x) == LABEL_REF
41960 || (GET_CODE (x) == SYMBOL_REF
41961 && SYMBOL_REF_LOCAL_P (x)))))
41962 *total = 1;
41963 else
41964 *total = 0;
41965 return true;
41966
41967 case CONST_DOUBLE:
41968 if (mode == VOIDmode)
41969 {
41970 *total = 0;
41971 return true;
41972 }
41973 switch (standard_80387_constant_p (x))
41974 {
41975 case 1: /* 0.0 */
41976 *total = 1;
41977 return true;
41978 default: /* Other constants */
41979 *total = 2;
41980 return true;
41981 case 0:
41982 case -1:
41983 break;
41984 }
41985 if (SSE_FLOAT_MODE_P (mode))
41986 {
41987 case CONST_VECTOR:
41988 switch (standard_sse_constant_p (x))
41989 {
41990 case 0:
41991 break;
41992 case 1: /* 0: xor eliminates false dependency */
41993 *total = 0;
41994 return true;
41995 default: /* -1: cmp contains false dependency */
41996 *total = 1;
41997 return true;
41998 }
41999 }
42000 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42001 it'll probably end up. Add a penalty for size. */
42002 *total = (COSTS_N_INSNS (1)
42003 + (flag_pic != 0 && !TARGET_64BIT)
42004 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42005 return true;
42006
42007 case ZERO_EXTEND:
42008 /* The zero extensions is often completely free on x86_64, so make
42009 it as cheap as possible. */
42010 if (TARGET_64BIT && mode == DImode
42011 && GET_MODE (XEXP (x, 0)) == SImode)
42012 *total = 1;
42013 else if (TARGET_ZERO_EXTEND_WITH_AND)
42014 *total = cost->add;
42015 else
42016 *total = cost->movzx;
42017 return false;
42018
42019 case SIGN_EXTEND:
42020 *total = cost->movsx;
42021 return false;
42022
42023 case ASHIFT:
42024 if (SCALAR_INT_MODE_P (mode)
42025 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42026 && CONST_INT_P (XEXP (x, 1)))
42027 {
42028 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42029 if (value == 1)
42030 {
42031 *total = cost->add;
42032 return false;
42033 }
42034 if ((value == 2 || value == 3)
42035 && cost->lea <= cost->shift_const)
42036 {
42037 *total = cost->lea;
42038 return false;
42039 }
42040 }
42041 /* FALLTHRU */
42042
42043 case ROTATE:
42044 case ASHIFTRT:
42045 case LSHIFTRT:
42046 case ROTATERT:
42047 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42048 {
42049 /* ??? Should be SSE vector operation cost. */
42050 /* At least for published AMD latencies, this really is the same
42051 as the latency for a simple fpu operation like fabs. */
42052 /* V*QImode is emulated with 1-11 insns. */
42053 if (mode == V16QImode || mode == V32QImode)
42054 {
42055 int count = 11;
42056 if (TARGET_XOP && mode == V16QImode)
42057 {
42058 /* For XOP we use vpshab, which requires a broadcast of the
42059 value to the variable shift insn. For constants this
42060 means a V16Q const in mem; even when we can perform the
42061 shift with one insn set the cost to prefer paddb. */
42062 if (CONSTANT_P (XEXP (x, 1)))
42063 {
42064 *total = (cost->fabs
42065 + rtx_cost (XEXP (x, 0), code, 0, speed)
42066 + (speed ? 2 : COSTS_N_BYTES (16)));
42067 return true;
42068 }
42069 count = 3;
42070 }
42071 else if (TARGET_SSSE3)
42072 count = 7;
42073 *total = cost->fabs * count;
42074 }
42075 else
42076 *total = cost->fabs;
42077 }
42078 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42079 {
42080 if (CONST_INT_P (XEXP (x, 1)))
42081 {
42082 if (INTVAL (XEXP (x, 1)) > 32)
42083 *total = cost->shift_const + COSTS_N_INSNS (2);
42084 else
42085 *total = cost->shift_const * 2;
42086 }
42087 else
42088 {
42089 if (GET_CODE (XEXP (x, 1)) == AND)
42090 *total = cost->shift_var * 2;
42091 else
42092 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42093 }
42094 }
42095 else
42096 {
42097 if (CONST_INT_P (XEXP (x, 1)))
42098 *total = cost->shift_const;
42099 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42100 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42101 {
42102 /* Return the cost after shift-and truncation. */
42103 *total = cost->shift_var;
42104 return true;
42105 }
42106 else
42107 *total = cost->shift_var;
42108 }
42109 return false;
42110
42111 case FMA:
42112 {
42113 rtx sub;
42114
42115 gcc_assert (FLOAT_MODE_P (mode));
42116 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42117
42118 /* ??? SSE scalar/vector cost should be used here. */
42119 /* ??? Bald assumption that fma has the same cost as fmul. */
42120 *total = cost->fmul;
42121 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42122
42123 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42124 sub = XEXP (x, 0);
42125 if (GET_CODE (sub) == NEG)
42126 sub = XEXP (sub, 0);
42127 *total += rtx_cost (sub, FMA, 0, speed);
42128
42129 sub = XEXP (x, 2);
42130 if (GET_CODE (sub) == NEG)
42131 sub = XEXP (sub, 0);
42132 *total += rtx_cost (sub, FMA, 2, speed);
42133 return true;
42134 }
42135
42136 case MULT:
42137 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42138 {
42139 /* ??? SSE scalar cost should be used here. */
42140 *total = cost->fmul;
42141 return false;
42142 }
42143 else if (X87_FLOAT_MODE_P (mode))
42144 {
42145 *total = cost->fmul;
42146 return false;
42147 }
42148 else if (FLOAT_MODE_P (mode))
42149 {
42150 /* ??? SSE vector cost should be used here. */
42151 *total = cost->fmul;
42152 return false;
42153 }
42154 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42155 {
42156 /* V*QImode is emulated with 7-13 insns. */
42157 if (mode == V16QImode || mode == V32QImode)
42158 {
42159 int extra = 11;
42160 if (TARGET_XOP && mode == V16QImode)
42161 extra = 5;
42162 else if (TARGET_SSSE3)
42163 extra = 6;
42164 *total = cost->fmul * 2 + cost->fabs * extra;
42165 }
42166 /* V*DImode is emulated with 5-8 insns. */
42167 else if (mode == V2DImode || mode == V4DImode)
42168 {
42169 if (TARGET_XOP && mode == V2DImode)
42170 *total = cost->fmul * 2 + cost->fabs * 3;
42171 else
42172 *total = cost->fmul * 3 + cost->fabs * 5;
42173 }
42174 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42175 insns, including two PMULUDQ. */
42176 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42177 *total = cost->fmul * 2 + cost->fabs * 5;
42178 else
42179 *total = cost->fmul;
42180 return false;
42181 }
42182 else
42183 {
42184 rtx op0 = XEXP (x, 0);
42185 rtx op1 = XEXP (x, 1);
42186 int nbits;
42187 if (CONST_INT_P (XEXP (x, 1)))
42188 {
42189 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42190 for (nbits = 0; value != 0; value &= value - 1)
42191 nbits++;
42192 }
42193 else
42194 /* This is arbitrary. */
42195 nbits = 7;
42196
42197 /* Compute costs correctly for widening multiplication. */
42198 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42199 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42200 == GET_MODE_SIZE (mode))
42201 {
42202 int is_mulwiden = 0;
42203 machine_mode inner_mode = GET_MODE (op0);
42204
42205 if (GET_CODE (op0) == GET_CODE (op1))
42206 is_mulwiden = 1, op1 = XEXP (op1, 0);
42207 else if (CONST_INT_P (op1))
42208 {
42209 if (GET_CODE (op0) == SIGN_EXTEND)
42210 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42211 == INTVAL (op1);
42212 else
42213 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42214 }
42215
42216 if (is_mulwiden)
42217 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42218 }
42219
42220 *total = (cost->mult_init[MODE_INDEX (mode)]
42221 + nbits * cost->mult_bit
42222 + rtx_cost (op0, outer_code, opno, speed)
42223 + rtx_cost (op1, outer_code, opno, speed));
42224
42225 return true;
42226 }
42227
42228 case DIV:
42229 case UDIV:
42230 case MOD:
42231 case UMOD:
42232 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42233 /* ??? SSE cost should be used here. */
42234 *total = cost->fdiv;
42235 else if (X87_FLOAT_MODE_P (mode))
42236 *total = cost->fdiv;
42237 else if (FLOAT_MODE_P (mode))
42238 /* ??? SSE vector cost should be used here. */
42239 *total = cost->fdiv;
42240 else
42241 *total = cost->divide[MODE_INDEX (mode)];
42242 return false;
42243
42244 case PLUS:
42245 if (GET_MODE_CLASS (mode) == MODE_INT
42246 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42247 {
42248 if (GET_CODE (XEXP (x, 0)) == PLUS
42249 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42250 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42251 && CONSTANT_P (XEXP (x, 1)))
42252 {
42253 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42254 if (val == 2 || val == 4 || val == 8)
42255 {
42256 *total = cost->lea;
42257 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42258 outer_code, opno, speed);
42259 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42260 outer_code, opno, speed);
42261 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42262 return true;
42263 }
42264 }
42265 else if (GET_CODE (XEXP (x, 0)) == MULT
42266 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42267 {
42268 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42269 if (val == 2 || val == 4 || val == 8)
42270 {
42271 *total = cost->lea;
42272 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42273 outer_code, opno, speed);
42274 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42275 return true;
42276 }
42277 }
42278 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42279 {
42280 *total = cost->lea;
42281 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42282 outer_code, opno, speed);
42283 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42284 outer_code, opno, speed);
42285 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42286 return true;
42287 }
42288 }
42289 /* FALLTHRU */
42290
42291 case MINUS:
42292 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42293 {
42294 /* ??? SSE cost should be used here. */
42295 *total = cost->fadd;
42296 return false;
42297 }
42298 else if (X87_FLOAT_MODE_P (mode))
42299 {
42300 *total = cost->fadd;
42301 return false;
42302 }
42303 else if (FLOAT_MODE_P (mode))
42304 {
42305 /* ??? SSE vector cost should be used here. */
42306 *total = cost->fadd;
42307 return false;
42308 }
42309 /* FALLTHRU */
42310
42311 case AND:
42312 case IOR:
42313 case XOR:
42314 if (GET_MODE_CLASS (mode) == MODE_INT
42315 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42316 {
42317 *total = (cost->add * 2
42318 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42319 << (GET_MODE (XEXP (x, 0)) != DImode))
42320 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42321 << (GET_MODE (XEXP (x, 1)) != DImode)));
42322 return true;
42323 }
42324 /* FALLTHRU */
42325
42326 case NEG:
42327 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42328 {
42329 /* ??? SSE cost should be used here. */
42330 *total = cost->fchs;
42331 return false;
42332 }
42333 else if (X87_FLOAT_MODE_P (mode))
42334 {
42335 *total = cost->fchs;
42336 return false;
42337 }
42338 else if (FLOAT_MODE_P (mode))
42339 {
42340 /* ??? SSE vector cost should be used here. */
42341 *total = cost->fchs;
42342 return false;
42343 }
42344 /* FALLTHRU */
42345
42346 case NOT:
42347 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42348 {
42349 /* ??? Should be SSE vector operation cost. */
42350 /* At least for published AMD latencies, this really is the same
42351 as the latency for a simple fpu operation like fabs. */
42352 *total = cost->fabs;
42353 }
42354 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42355 *total = cost->add * 2;
42356 else
42357 *total = cost->add;
42358 return false;
42359
42360 case COMPARE:
42361 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42362 && XEXP (XEXP (x, 0), 1) == const1_rtx
42363 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42364 && XEXP (x, 1) == const0_rtx)
42365 {
42366 /* This kind of construct is implemented using test[bwl].
42367 Treat it as if we had an AND. */
42368 *total = (cost->add
42369 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42370 + rtx_cost (const1_rtx, outer_code, opno, speed));
42371 return true;
42372 }
42373 return false;
42374
42375 case FLOAT_EXTEND:
42376 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42377 *total = 0;
42378 return false;
42379
42380 case ABS:
42381 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42382 /* ??? SSE cost should be used here. */
42383 *total = cost->fabs;
42384 else if (X87_FLOAT_MODE_P (mode))
42385 *total = cost->fabs;
42386 else if (FLOAT_MODE_P (mode))
42387 /* ??? SSE vector cost should be used here. */
42388 *total = cost->fabs;
42389 return false;
42390
42391 case SQRT:
42392 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42393 /* ??? SSE cost should be used here. */
42394 *total = cost->fsqrt;
42395 else if (X87_FLOAT_MODE_P (mode))
42396 *total = cost->fsqrt;
42397 else if (FLOAT_MODE_P (mode))
42398 /* ??? SSE vector cost should be used here. */
42399 *total = cost->fsqrt;
42400 return false;
42401
42402 case UNSPEC:
42403 if (XINT (x, 1) == UNSPEC_TP)
42404 *total = 0;
42405 return false;
42406
42407 case VEC_SELECT:
42408 case VEC_CONCAT:
42409 case VEC_DUPLICATE:
42410 /* ??? Assume all of these vector manipulation patterns are
42411 recognizable. In which case they all pretty much have the
42412 same cost. */
42413 *total = cost->fabs;
42414 return true;
42415 case VEC_MERGE:
42416 mask = XEXP (x, 2);
42417 /* This is masked instruction, assume the same cost,
42418 as nonmasked variant. */
42419 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42420 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42421 else
42422 *total = cost->fabs;
42423 return true;
42424
42425 default:
42426 return false;
42427 }
42428 }
42429
42430 #if TARGET_MACHO
42431
42432 static int current_machopic_label_num;
42433
42434 /* Given a symbol name and its associated stub, write out the
42435 definition of the stub. */
42436
42437 void
42438 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42439 {
42440 unsigned int length;
42441 char *binder_name, *symbol_name, lazy_ptr_name[32];
42442 int label = ++current_machopic_label_num;
42443
42444 /* For 64-bit we shouldn't get here. */
42445 gcc_assert (!TARGET_64BIT);
42446
42447 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42448 symb = targetm.strip_name_encoding (symb);
42449
42450 length = strlen (stub);
42451 binder_name = XALLOCAVEC (char, length + 32);
42452 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42453
42454 length = strlen (symb);
42455 symbol_name = XALLOCAVEC (char, length + 32);
42456 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42457
42458 sprintf (lazy_ptr_name, "L%d$lz", label);
42459
42460 if (MACHOPIC_ATT_STUB)
42461 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42462 else if (MACHOPIC_PURE)
42463 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42464 else
42465 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42466
42467 fprintf (file, "%s:\n", stub);
42468 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42469
42470 if (MACHOPIC_ATT_STUB)
42471 {
42472 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42473 }
42474 else if (MACHOPIC_PURE)
42475 {
42476 /* PIC stub. */
42477 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42478 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42479 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42480 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42481 label, lazy_ptr_name, label);
42482 fprintf (file, "\tjmp\t*%%ecx\n");
42483 }
42484 else
42485 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42486
42487 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42488 it needs no stub-binding-helper. */
42489 if (MACHOPIC_ATT_STUB)
42490 return;
42491
42492 fprintf (file, "%s:\n", binder_name);
42493
42494 if (MACHOPIC_PURE)
42495 {
42496 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42497 fprintf (file, "\tpushl\t%%ecx\n");
42498 }
42499 else
42500 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42501
42502 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42503
42504 /* N.B. Keep the correspondence of these
42505 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42506 old-pic/new-pic/non-pic stubs; altering this will break
42507 compatibility with existing dylibs. */
42508 if (MACHOPIC_PURE)
42509 {
42510 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42511 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42512 }
42513 else
42514 /* 16-byte -mdynamic-no-pic stub. */
42515 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42516
42517 fprintf (file, "%s:\n", lazy_ptr_name);
42518 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42519 fprintf (file, ASM_LONG "%s\n", binder_name);
42520 }
42521 #endif /* TARGET_MACHO */
42522
42523 /* Order the registers for register allocator. */
42524
42525 void
42526 x86_order_regs_for_local_alloc (void)
42527 {
42528 int pos = 0;
42529 int i;
42530
42531 /* First allocate the local general purpose registers. */
42532 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42533 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42534 reg_alloc_order [pos++] = i;
42535
42536 /* Global general purpose registers. */
42537 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42538 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42539 reg_alloc_order [pos++] = i;
42540
42541 /* x87 registers come first in case we are doing FP math
42542 using them. */
42543 if (!TARGET_SSE_MATH)
42544 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42545 reg_alloc_order [pos++] = i;
42546
42547 /* SSE registers. */
42548 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42549 reg_alloc_order [pos++] = i;
42550 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42551 reg_alloc_order [pos++] = i;
42552
42553 /* Extended REX SSE registers. */
42554 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42555 reg_alloc_order [pos++] = i;
42556
42557 /* Mask register. */
42558 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42559 reg_alloc_order [pos++] = i;
42560
42561 /* MPX bound registers. */
42562 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42563 reg_alloc_order [pos++] = i;
42564
42565 /* x87 registers. */
42566 if (TARGET_SSE_MATH)
42567 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42568 reg_alloc_order [pos++] = i;
42569
42570 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42571 reg_alloc_order [pos++] = i;
42572
42573 /* Initialize the rest of array as we do not allocate some registers
42574 at all. */
42575 while (pos < FIRST_PSEUDO_REGISTER)
42576 reg_alloc_order [pos++] = 0;
42577 }
42578
42579 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42580 in struct attribute_spec handler. */
42581 static tree
42582 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42583 tree args,
42584 int,
42585 bool *no_add_attrs)
42586 {
42587 if (TREE_CODE (*node) != FUNCTION_TYPE
42588 && TREE_CODE (*node) != METHOD_TYPE
42589 && TREE_CODE (*node) != FIELD_DECL
42590 && TREE_CODE (*node) != TYPE_DECL)
42591 {
42592 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42593 name);
42594 *no_add_attrs = true;
42595 return NULL_TREE;
42596 }
42597 if (TARGET_64BIT)
42598 {
42599 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42600 name);
42601 *no_add_attrs = true;
42602 return NULL_TREE;
42603 }
42604 if (is_attribute_p ("callee_pop_aggregate_return", name))
42605 {
42606 tree cst;
42607
42608 cst = TREE_VALUE (args);
42609 if (TREE_CODE (cst) != INTEGER_CST)
42610 {
42611 warning (OPT_Wattributes,
42612 "%qE attribute requires an integer constant argument",
42613 name);
42614 *no_add_attrs = true;
42615 }
42616 else if (compare_tree_int (cst, 0) != 0
42617 && compare_tree_int (cst, 1) != 0)
42618 {
42619 warning (OPT_Wattributes,
42620 "argument to %qE attribute is neither zero, nor one",
42621 name);
42622 *no_add_attrs = true;
42623 }
42624
42625 return NULL_TREE;
42626 }
42627
42628 return NULL_TREE;
42629 }
42630
42631 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42632 struct attribute_spec.handler. */
42633 static tree
42634 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42635 bool *no_add_attrs)
42636 {
42637 if (TREE_CODE (*node) != FUNCTION_TYPE
42638 && TREE_CODE (*node) != METHOD_TYPE
42639 && TREE_CODE (*node) != FIELD_DECL
42640 && TREE_CODE (*node) != TYPE_DECL)
42641 {
42642 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42643 name);
42644 *no_add_attrs = true;
42645 return NULL_TREE;
42646 }
42647
42648 /* Can combine regparm with all attributes but fastcall. */
42649 if (is_attribute_p ("ms_abi", name))
42650 {
42651 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42652 {
42653 error ("ms_abi and sysv_abi attributes are not compatible");
42654 }
42655
42656 return NULL_TREE;
42657 }
42658 else if (is_attribute_p ("sysv_abi", name))
42659 {
42660 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42661 {
42662 error ("ms_abi and sysv_abi attributes are not compatible");
42663 }
42664
42665 return NULL_TREE;
42666 }
42667
42668 return NULL_TREE;
42669 }
42670
42671 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42672 struct attribute_spec.handler. */
42673 static tree
42674 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42675 bool *no_add_attrs)
42676 {
42677 tree *type = NULL;
42678 if (DECL_P (*node))
42679 {
42680 if (TREE_CODE (*node) == TYPE_DECL)
42681 type = &TREE_TYPE (*node);
42682 }
42683 else
42684 type = node;
42685
42686 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42687 {
42688 warning (OPT_Wattributes, "%qE attribute ignored",
42689 name);
42690 *no_add_attrs = true;
42691 }
42692
42693 else if ((is_attribute_p ("ms_struct", name)
42694 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42695 || ((is_attribute_p ("gcc_struct", name)
42696 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42697 {
42698 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42699 name);
42700 *no_add_attrs = true;
42701 }
42702
42703 return NULL_TREE;
42704 }
42705
42706 static tree
42707 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42708 bool *no_add_attrs)
42709 {
42710 if (TREE_CODE (*node) != FUNCTION_DECL)
42711 {
42712 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42713 name);
42714 *no_add_attrs = true;
42715 }
42716 return NULL_TREE;
42717 }
42718
42719 static bool
42720 ix86_ms_bitfield_layout_p (const_tree record_type)
42721 {
42722 return ((TARGET_MS_BITFIELD_LAYOUT
42723 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42724 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42725 }
42726
42727 /* Returns an expression indicating where the this parameter is
42728 located on entry to the FUNCTION. */
42729
42730 static rtx
42731 x86_this_parameter (tree function)
42732 {
42733 tree type = TREE_TYPE (function);
42734 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42735 int nregs;
42736
42737 if (TARGET_64BIT)
42738 {
42739 const int *parm_regs;
42740
42741 if (ix86_function_type_abi (type) == MS_ABI)
42742 parm_regs = x86_64_ms_abi_int_parameter_registers;
42743 else
42744 parm_regs = x86_64_int_parameter_registers;
42745 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42746 }
42747
42748 nregs = ix86_function_regparm (type, function);
42749
42750 if (nregs > 0 && !stdarg_p (type))
42751 {
42752 int regno;
42753 unsigned int ccvt = ix86_get_callcvt (type);
42754
42755 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42756 regno = aggr ? DX_REG : CX_REG;
42757 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42758 {
42759 regno = CX_REG;
42760 if (aggr)
42761 return gen_rtx_MEM (SImode,
42762 plus_constant (Pmode, stack_pointer_rtx, 4));
42763 }
42764 else
42765 {
42766 regno = AX_REG;
42767 if (aggr)
42768 {
42769 regno = DX_REG;
42770 if (nregs == 1)
42771 return gen_rtx_MEM (SImode,
42772 plus_constant (Pmode,
42773 stack_pointer_rtx, 4));
42774 }
42775 }
42776 return gen_rtx_REG (SImode, regno);
42777 }
42778
42779 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42780 aggr ? 8 : 4));
42781 }
42782
42783 /* Determine whether x86_output_mi_thunk can succeed. */
42784
42785 static bool
42786 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42787 const_tree function)
42788 {
42789 /* 64-bit can handle anything. */
42790 if (TARGET_64BIT)
42791 return true;
42792
42793 /* For 32-bit, everything's fine if we have one free register. */
42794 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42795 return true;
42796
42797 /* Need a free register for vcall_offset. */
42798 if (vcall_offset)
42799 return false;
42800
42801 /* Need a free register for GOT references. */
42802 if (flag_pic && !targetm.binds_local_p (function))
42803 return false;
42804
42805 /* Otherwise ok. */
42806 return true;
42807 }
42808
42809 /* Output the assembler code for a thunk function. THUNK_DECL is the
42810 declaration for the thunk function itself, FUNCTION is the decl for
42811 the target function. DELTA is an immediate constant offset to be
42812 added to THIS. If VCALL_OFFSET is nonzero, the word at
42813 *(*this + vcall_offset) should be added to THIS. */
42814
42815 static void
42816 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42817 HOST_WIDE_INT vcall_offset, tree function)
42818 {
42819 rtx this_param = x86_this_parameter (function);
42820 rtx this_reg, tmp, fnaddr;
42821 unsigned int tmp_regno;
42822 rtx_insn *insn;
42823
42824 if (TARGET_64BIT)
42825 tmp_regno = R10_REG;
42826 else
42827 {
42828 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42829 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42830 tmp_regno = AX_REG;
42831 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42832 tmp_regno = DX_REG;
42833 else
42834 tmp_regno = CX_REG;
42835 }
42836
42837 emit_note (NOTE_INSN_PROLOGUE_END);
42838
42839 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42840 pull it in now and let DELTA benefit. */
42841 if (REG_P (this_param))
42842 this_reg = this_param;
42843 else if (vcall_offset)
42844 {
42845 /* Put the this parameter into %eax. */
42846 this_reg = gen_rtx_REG (Pmode, AX_REG);
42847 emit_move_insn (this_reg, this_param);
42848 }
42849 else
42850 this_reg = NULL_RTX;
42851
42852 /* Adjust the this parameter by a fixed constant. */
42853 if (delta)
42854 {
42855 rtx delta_rtx = GEN_INT (delta);
42856 rtx delta_dst = this_reg ? this_reg : this_param;
42857
42858 if (TARGET_64BIT)
42859 {
42860 if (!x86_64_general_operand (delta_rtx, Pmode))
42861 {
42862 tmp = gen_rtx_REG (Pmode, tmp_regno);
42863 emit_move_insn (tmp, delta_rtx);
42864 delta_rtx = tmp;
42865 }
42866 }
42867
42868 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42869 }
42870
42871 /* Adjust the this parameter by a value stored in the vtable. */
42872 if (vcall_offset)
42873 {
42874 rtx vcall_addr, vcall_mem, this_mem;
42875
42876 tmp = gen_rtx_REG (Pmode, tmp_regno);
42877
42878 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42879 if (Pmode != ptr_mode)
42880 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42881 emit_move_insn (tmp, this_mem);
42882
42883 /* Adjust the this parameter. */
42884 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42885 if (TARGET_64BIT
42886 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42887 {
42888 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42889 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42890 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42891 }
42892
42893 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42894 if (Pmode != ptr_mode)
42895 emit_insn (gen_addsi_1_zext (this_reg,
42896 gen_rtx_REG (ptr_mode,
42897 REGNO (this_reg)),
42898 vcall_mem));
42899 else
42900 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42901 }
42902
42903 /* If necessary, drop THIS back to its stack slot. */
42904 if (this_reg && this_reg != this_param)
42905 emit_move_insn (this_param, this_reg);
42906
42907 fnaddr = XEXP (DECL_RTL (function), 0);
42908 if (TARGET_64BIT)
42909 {
42910 if (!flag_pic || targetm.binds_local_p (function)
42911 || TARGET_PECOFF)
42912 ;
42913 else
42914 {
42915 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
42916 tmp = gen_rtx_CONST (Pmode, tmp);
42917 fnaddr = gen_const_mem (Pmode, tmp);
42918 }
42919 }
42920 else
42921 {
42922 if (!flag_pic || targetm.binds_local_p (function))
42923 ;
42924 #if TARGET_MACHO
42925 else if (TARGET_MACHO)
42926 {
42927 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
42928 fnaddr = XEXP (fnaddr, 0);
42929 }
42930 #endif /* TARGET_MACHO */
42931 else
42932 {
42933 tmp = gen_rtx_REG (Pmode, CX_REG);
42934 output_set_got (tmp, NULL_RTX);
42935
42936 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
42937 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
42938 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
42939 fnaddr = gen_const_mem (Pmode, fnaddr);
42940 }
42941 }
42942
42943 /* Our sibling call patterns do not allow memories, because we have no
42944 predicate that can distinguish between frame and non-frame memory.
42945 For our purposes here, we can get away with (ab)using a jump pattern,
42946 because we're going to do no optimization. */
42947 if (MEM_P (fnaddr))
42948 {
42949 if (sibcall_insn_operand (fnaddr, word_mode))
42950 {
42951 fnaddr = XEXP (DECL_RTL (function), 0);
42952 tmp = gen_rtx_MEM (QImode, fnaddr);
42953 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42954 tmp = emit_call_insn (tmp);
42955 SIBLING_CALL_P (tmp) = 1;
42956 }
42957 else
42958 emit_jump_insn (gen_indirect_jump (fnaddr));
42959 }
42960 else
42961 {
42962 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
42963 {
42964 // CM_LARGE_PIC always uses pseudo PIC register which is
42965 // uninitialized. Since FUNCTION is local and calling it
42966 // doesn't go through PLT, we use scratch register %r11 as
42967 // PIC register and initialize it here.
42968 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
42969 ix86_init_large_pic_reg (tmp_regno);
42970 fnaddr = legitimize_pic_address (fnaddr,
42971 gen_rtx_REG (Pmode, tmp_regno));
42972 }
42973
42974 if (!sibcall_insn_operand (fnaddr, word_mode))
42975 {
42976 tmp = gen_rtx_REG (word_mode, tmp_regno);
42977 if (GET_MODE (fnaddr) != word_mode)
42978 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
42979 emit_move_insn (tmp, fnaddr);
42980 fnaddr = tmp;
42981 }
42982
42983 tmp = gen_rtx_MEM (QImode, fnaddr);
42984 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42985 tmp = emit_call_insn (tmp);
42986 SIBLING_CALL_P (tmp) = 1;
42987 }
42988 emit_barrier ();
42989
42990 /* Emit just enough of rest_of_compilation to get the insns emitted.
42991 Note that use_thunk calls assemble_start_function et al. */
42992 insn = get_insns ();
42993 shorten_branches (insn);
42994 final_start_function (insn, file, 1);
42995 final (insn, file, 1);
42996 final_end_function ();
42997 }
42998
42999 static void
43000 x86_file_start (void)
43001 {
43002 default_file_start ();
43003 if (TARGET_16BIT)
43004 fputs ("\t.code16gcc\n", asm_out_file);
43005 #if TARGET_MACHO
43006 darwin_file_start ();
43007 #endif
43008 if (X86_FILE_START_VERSION_DIRECTIVE)
43009 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43010 if (X86_FILE_START_FLTUSED)
43011 fputs ("\t.global\t__fltused\n", asm_out_file);
43012 if (ix86_asm_dialect == ASM_INTEL)
43013 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43014 }
43015
43016 int
43017 x86_field_alignment (tree field, int computed)
43018 {
43019 machine_mode mode;
43020 tree type = TREE_TYPE (field);
43021
43022 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43023 return computed;
43024 mode = TYPE_MODE (strip_array_types (type));
43025 if (mode == DFmode || mode == DCmode
43026 || GET_MODE_CLASS (mode) == MODE_INT
43027 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43028 return MIN (32, computed);
43029 return computed;
43030 }
43031
43032 /* Print call to TARGET to FILE. */
43033
43034 static void
43035 x86_print_call_or_nop (FILE *file, const char *target)
43036 {
43037 if (flag_nop_mcount)
43038 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43039 else
43040 fprintf (file, "1:\tcall\t%s\n", target);
43041 }
43042
43043 /* Output assembler code to FILE to increment profiler label # LABELNO
43044 for profiling a function entry. */
43045 void
43046 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43047 {
43048 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43049 : MCOUNT_NAME);
43050 if (TARGET_64BIT)
43051 {
43052 #ifndef NO_PROFILE_COUNTERS
43053 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43054 #endif
43055
43056 if (!TARGET_PECOFF && flag_pic)
43057 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43058 else
43059 x86_print_call_or_nop (file, mcount_name);
43060 }
43061 else if (flag_pic)
43062 {
43063 #ifndef NO_PROFILE_COUNTERS
43064 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43065 LPREFIX, labelno);
43066 #endif
43067 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43068 }
43069 else
43070 {
43071 #ifndef NO_PROFILE_COUNTERS
43072 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43073 LPREFIX, labelno);
43074 #endif
43075 x86_print_call_or_nop (file, mcount_name);
43076 }
43077
43078 if (flag_record_mcount)
43079 {
43080 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43081 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43082 fprintf (file, "\t.previous\n");
43083 }
43084 }
43085
43086 /* We don't have exact information about the insn sizes, but we may assume
43087 quite safely that we are informed about all 1 byte insns and memory
43088 address sizes. This is enough to eliminate unnecessary padding in
43089 99% of cases. */
43090
43091 static int
43092 min_insn_size (rtx_insn *insn)
43093 {
43094 int l = 0, len;
43095
43096 if (!INSN_P (insn) || !active_insn_p (insn))
43097 return 0;
43098
43099 /* Discard alignments we've emit and jump instructions. */
43100 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43101 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43102 return 0;
43103
43104 /* Important case - calls are always 5 bytes.
43105 It is common to have many calls in the row. */
43106 if (CALL_P (insn)
43107 && symbolic_reference_mentioned_p (PATTERN (insn))
43108 && !SIBLING_CALL_P (insn))
43109 return 5;
43110 len = get_attr_length (insn);
43111 if (len <= 1)
43112 return 1;
43113
43114 /* For normal instructions we rely on get_attr_length being exact,
43115 with a few exceptions. */
43116 if (!JUMP_P (insn))
43117 {
43118 enum attr_type type = get_attr_type (insn);
43119
43120 switch (type)
43121 {
43122 case TYPE_MULTI:
43123 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43124 || asm_noperands (PATTERN (insn)) >= 0)
43125 return 0;
43126 break;
43127 case TYPE_OTHER:
43128 case TYPE_FCMP:
43129 break;
43130 default:
43131 /* Otherwise trust get_attr_length. */
43132 return len;
43133 }
43134
43135 l = get_attr_length_address (insn);
43136 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43137 l = 4;
43138 }
43139 if (l)
43140 return 1+l;
43141 else
43142 return 2;
43143 }
43144
43145 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43146
43147 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43148 window. */
43149
43150 static void
43151 ix86_avoid_jump_mispredicts (void)
43152 {
43153 rtx_insn *insn, *start = get_insns ();
43154 int nbytes = 0, njumps = 0;
43155 bool isjump = false;
43156
43157 /* Look for all minimal intervals of instructions containing 4 jumps.
43158 The intervals are bounded by START and INSN. NBYTES is the total
43159 size of instructions in the interval including INSN and not including
43160 START. When the NBYTES is smaller than 16 bytes, it is possible
43161 that the end of START and INSN ends up in the same 16byte page.
43162
43163 The smallest offset in the page INSN can start is the case where START
43164 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43165 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43166
43167 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43168 have to, control transfer to label(s) can be performed through other
43169 means, and also we estimate minimum length of all asm stmts as 0. */
43170 for (insn = start; insn; insn = NEXT_INSN (insn))
43171 {
43172 int min_size;
43173
43174 if (LABEL_P (insn))
43175 {
43176 int align = label_to_alignment (insn);
43177 int max_skip = label_to_max_skip (insn);
43178
43179 if (max_skip > 15)
43180 max_skip = 15;
43181 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43182 already in the current 16 byte page, because otherwise
43183 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43184 bytes to reach 16 byte boundary. */
43185 if (align <= 0
43186 || (align <= 3 && max_skip != (1 << align) - 1))
43187 max_skip = 0;
43188 if (dump_file)
43189 fprintf (dump_file, "Label %i with max_skip %i\n",
43190 INSN_UID (insn), max_skip);
43191 if (max_skip)
43192 {
43193 while (nbytes + max_skip >= 16)
43194 {
43195 start = NEXT_INSN (start);
43196 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43197 || CALL_P (start))
43198 njumps--, isjump = true;
43199 else
43200 isjump = false;
43201 nbytes -= min_insn_size (start);
43202 }
43203 }
43204 continue;
43205 }
43206
43207 min_size = min_insn_size (insn);
43208 nbytes += min_size;
43209 if (dump_file)
43210 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43211 INSN_UID (insn), min_size);
43212 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43213 || CALL_P (insn))
43214 njumps++;
43215 else
43216 continue;
43217
43218 while (njumps > 3)
43219 {
43220 start = NEXT_INSN (start);
43221 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43222 || CALL_P (start))
43223 njumps--, isjump = true;
43224 else
43225 isjump = false;
43226 nbytes -= min_insn_size (start);
43227 }
43228 gcc_assert (njumps >= 0);
43229 if (dump_file)
43230 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43231 INSN_UID (start), INSN_UID (insn), nbytes);
43232
43233 if (njumps == 3 && isjump && nbytes < 16)
43234 {
43235 int padsize = 15 - nbytes + min_insn_size (insn);
43236
43237 if (dump_file)
43238 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43239 INSN_UID (insn), padsize);
43240 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43241 }
43242 }
43243 }
43244 #endif
43245
43246 /* AMD Athlon works faster
43247 when RET is not destination of conditional jump or directly preceded
43248 by other jump instruction. We avoid the penalty by inserting NOP just
43249 before the RET instructions in such cases. */
43250 static void
43251 ix86_pad_returns (void)
43252 {
43253 edge e;
43254 edge_iterator ei;
43255
43256 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43257 {
43258 basic_block bb = e->src;
43259 rtx_insn *ret = BB_END (bb);
43260 rtx_insn *prev;
43261 bool replace = false;
43262
43263 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43264 || optimize_bb_for_size_p (bb))
43265 continue;
43266 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43267 if (active_insn_p (prev) || LABEL_P (prev))
43268 break;
43269 if (prev && LABEL_P (prev))
43270 {
43271 edge e;
43272 edge_iterator ei;
43273
43274 FOR_EACH_EDGE (e, ei, bb->preds)
43275 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43276 && !(e->flags & EDGE_FALLTHRU))
43277 {
43278 replace = true;
43279 break;
43280 }
43281 }
43282 if (!replace)
43283 {
43284 prev = prev_active_insn (ret);
43285 if (prev
43286 && ((JUMP_P (prev) && any_condjump_p (prev))
43287 || CALL_P (prev)))
43288 replace = true;
43289 /* Empty functions get branch mispredict even when
43290 the jump destination is not visible to us. */
43291 if (!prev && !optimize_function_for_size_p (cfun))
43292 replace = true;
43293 }
43294 if (replace)
43295 {
43296 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43297 delete_insn (ret);
43298 }
43299 }
43300 }
43301
43302 /* Count the minimum number of instructions in BB. Return 4 if the
43303 number of instructions >= 4. */
43304
43305 static int
43306 ix86_count_insn_bb (basic_block bb)
43307 {
43308 rtx_insn *insn;
43309 int insn_count = 0;
43310
43311 /* Count number of instructions in this block. Return 4 if the number
43312 of instructions >= 4. */
43313 FOR_BB_INSNS (bb, insn)
43314 {
43315 /* Only happen in exit blocks. */
43316 if (JUMP_P (insn)
43317 && ANY_RETURN_P (PATTERN (insn)))
43318 break;
43319
43320 if (NONDEBUG_INSN_P (insn)
43321 && GET_CODE (PATTERN (insn)) != USE
43322 && GET_CODE (PATTERN (insn)) != CLOBBER)
43323 {
43324 insn_count++;
43325 if (insn_count >= 4)
43326 return insn_count;
43327 }
43328 }
43329
43330 return insn_count;
43331 }
43332
43333
43334 /* Count the minimum number of instructions in code path in BB.
43335 Return 4 if the number of instructions >= 4. */
43336
43337 static int
43338 ix86_count_insn (basic_block bb)
43339 {
43340 edge e;
43341 edge_iterator ei;
43342 int min_prev_count;
43343
43344 /* Only bother counting instructions along paths with no
43345 more than 2 basic blocks between entry and exit. Given
43346 that BB has an edge to exit, determine if a predecessor
43347 of BB has an edge from entry. If so, compute the number
43348 of instructions in the predecessor block. If there
43349 happen to be multiple such blocks, compute the minimum. */
43350 min_prev_count = 4;
43351 FOR_EACH_EDGE (e, ei, bb->preds)
43352 {
43353 edge prev_e;
43354 edge_iterator prev_ei;
43355
43356 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43357 {
43358 min_prev_count = 0;
43359 break;
43360 }
43361 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43362 {
43363 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43364 {
43365 int count = ix86_count_insn_bb (e->src);
43366 if (count < min_prev_count)
43367 min_prev_count = count;
43368 break;
43369 }
43370 }
43371 }
43372
43373 if (min_prev_count < 4)
43374 min_prev_count += ix86_count_insn_bb (bb);
43375
43376 return min_prev_count;
43377 }
43378
43379 /* Pad short function to 4 instructions. */
43380
43381 static void
43382 ix86_pad_short_function (void)
43383 {
43384 edge e;
43385 edge_iterator ei;
43386
43387 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43388 {
43389 rtx_insn *ret = BB_END (e->src);
43390 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43391 {
43392 int insn_count = ix86_count_insn (e->src);
43393
43394 /* Pad short function. */
43395 if (insn_count < 4)
43396 {
43397 rtx_insn *insn = ret;
43398
43399 /* Find epilogue. */
43400 while (insn
43401 && (!NOTE_P (insn)
43402 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43403 insn = PREV_INSN (insn);
43404
43405 if (!insn)
43406 insn = ret;
43407
43408 /* Two NOPs count as one instruction. */
43409 insn_count = 2 * (4 - insn_count);
43410 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43411 }
43412 }
43413 }
43414 }
43415
43416 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43417 the epilogue, the Windows system unwinder will apply epilogue logic and
43418 produce incorrect offsets. This can be avoided by adding a nop between
43419 the last insn that can throw and the first insn of the epilogue. */
43420
43421 static void
43422 ix86_seh_fixup_eh_fallthru (void)
43423 {
43424 edge e;
43425 edge_iterator ei;
43426
43427 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43428 {
43429 rtx_insn *insn, *next;
43430
43431 /* Find the beginning of the epilogue. */
43432 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43433 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43434 break;
43435 if (insn == NULL)
43436 continue;
43437
43438 /* We only care about preceding insns that can throw. */
43439 insn = prev_active_insn (insn);
43440 if (insn == NULL || !can_throw_internal (insn))
43441 continue;
43442
43443 /* Do not separate calls from their debug information. */
43444 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43445 if (NOTE_P (next)
43446 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43447 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43448 insn = next;
43449 else
43450 break;
43451
43452 emit_insn_after (gen_nops (const1_rtx), insn);
43453 }
43454 }
43455
43456 /* Implement machine specific optimizations. We implement padding of returns
43457 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43458 static void
43459 ix86_reorg (void)
43460 {
43461 /* We are freeing block_for_insn in the toplev to keep compatibility
43462 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43463 compute_bb_for_insn ();
43464
43465 if (TARGET_SEH && current_function_has_exception_handlers ())
43466 ix86_seh_fixup_eh_fallthru ();
43467
43468 if (optimize && optimize_function_for_speed_p (cfun))
43469 {
43470 if (TARGET_PAD_SHORT_FUNCTION)
43471 ix86_pad_short_function ();
43472 else if (TARGET_PAD_RETURNS)
43473 ix86_pad_returns ();
43474 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43475 if (TARGET_FOUR_JUMP_LIMIT)
43476 ix86_avoid_jump_mispredicts ();
43477 #endif
43478 }
43479 }
43480
43481 /* Return nonzero when QImode register that must be represented via REX prefix
43482 is used. */
43483 bool
43484 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43485 {
43486 int i;
43487 extract_insn_cached (insn);
43488 for (i = 0; i < recog_data.n_operands; i++)
43489 if (GENERAL_REG_P (recog_data.operand[i])
43490 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43491 return true;
43492 return false;
43493 }
43494
43495 /* Return true when INSN mentions register that must be encoded using REX
43496 prefix. */
43497 bool
43498 x86_extended_reg_mentioned_p (rtx insn)
43499 {
43500 subrtx_iterator::array_type array;
43501 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43502 {
43503 const_rtx x = *iter;
43504 if (REG_P (x)
43505 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43506 return true;
43507 }
43508 return false;
43509 }
43510
43511 /* If profitable, negate (without causing overflow) integer constant
43512 of mode MODE at location LOC. Return true in this case. */
43513 bool
43514 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43515 {
43516 HOST_WIDE_INT val;
43517
43518 if (!CONST_INT_P (*loc))
43519 return false;
43520
43521 switch (mode)
43522 {
43523 case DImode:
43524 /* DImode x86_64 constants must fit in 32 bits. */
43525 gcc_assert (x86_64_immediate_operand (*loc, mode));
43526
43527 mode = SImode;
43528 break;
43529
43530 case SImode:
43531 case HImode:
43532 case QImode:
43533 break;
43534
43535 default:
43536 gcc_unreachable ();
43537 }
43538
43539 /* Avoid overflows. */
43540 if (mode_signbit_p (mode, *loc))
43541 return false;
43542
43543 val = INTVAL (*loc);
43544
43545 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43546 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43547 if ((val < 0 && val != -128)
43548 || val == 128)
43549 {
43550 *loc = GEN_INT (-val);
43551 return true;
43552 }
43553
43554 return false;
43555 }
43556
43557 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43558 optabs would emit if we didn't have TFmode patterns. */
43559
43560 void
43561 x86_emit_floatuns (rtx operands[2])
43562 {
43563 rtx_code_label *neglab, *donelab;
43564 rtx i0, i1, f0, in, out;
43565 machine_mode mode, inmode;
43566
43567 inmode = GET_MODE (operands[1]);
43568 gcc_assert (inmode == SImode || inmode == DImode);
43569
43570 out = operands[0];
43571 in = force_reg (inmode, operands[1]);
43572 mode = GET_MODE (out);
43573 neglab = gen_label_rtx ();
43574 donelab = gen_label_rtx ();
43575 f0 = gen_reg_rtx (mode);
43576
43577 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43578
43579 expand_float (out, in, 0);
43580
43581 emit_jump_insn (gen_jump (donelab));
43582 emit_barrier ();
43583
43584 emit_label (neglab);
43585
43586 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43587 1, OPTAB_DIRECT);
43588 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43589 1, OPTAB_DIRECT);
43590 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43591
43592 expand_float (f0, i0, 0);
43593
43594 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43595
43596 emit_label (donelab);
43597 }
43598 \f
43599 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43600 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43601 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43602 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43603
43604 /* Get a vector mode of the same size as the original but with elements
43605 twice as wide. This is only guaranteed to apply to integral vectors. */
43606
43607 static inline machine_mode
43608 get_mode_wider_vector (machine_mode o)
43609 {
43610 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43611 machine_mode n = GET_MODE_WIDER_MODE (o);
43612 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43613 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43614 return n;
43615 }
43616
43617 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43618 fill target with val via vec_duplicate. */
43619
43620 static bool
43621 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43622 {
43623 bool ok;
43624 rtx_insn *insn;
43625 rtx dup;
43626
43627 /* First attempt to recognize VAL as-is. */
43628 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43629 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43630 if (recog_memoized (insn) < 0)
43631 {
43632 rtx_insn *seq;
43633 /* If that fails, force VAL into a register. */
43634
43635 start_sequence ();
43636 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43637 seq = get_insns ();
43638 end_sequence ();
43639 if (seq)
43640 emit_insn_before (seq, insn);
43641
43642 ok = recog_memoized (insn) >= 0;
43643 gcc_assert (ok);
43644 }
43645 return true;
43646 }
43647
43648 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43649 with all elements equal to VAR. Return true if successful. */
43650
43651 static bool
43652 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43653 rtx target, rtx val)
43654 {
43655 bool ok;
43656
43657 switch (mode)
43658 {
43659 case V2SImode:
43660 case V2SFmode:
43661 if (!mmx_ok)
43662 return false;
43663 /* FALLTHRU */
43664
43665 case V4DFmode:
43666 case V4DImode:
43667 case V8SFmode:
43668 case V8SImode:
43669 case V2DFmode:
43670 case V2DImode:
43671 case V4SFmode:
43672 case V4SImode:
43673 case V16SImode:
43674 case V8DImode:
43675 case V16SFmode:
43676 case V8DFmode:
43677 return ix86_vector_duplicate_value (mode, target, val);
43678
43679 case V4HImode:
43680 if (!mmx_ok)
43681 return false;
43682 if (TARGET_SSE || TARGET_3DNOW_A)
43683 {
43684 rtx x;
43685
43686 val = gen_lowpart (SImode, val);
43687 x = gen_rtx_TRUNCATE (HImode, val);
43688 x = gen_rtx_VEC_DUPLICATE (mode, x);
43689 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43690 return true;
43691 }
43692 goto widen;
43693
43694 case V8QImode:
43695 if (!mmx_ok)
43696 return false;
43697 goto widen;
43698
43699 case V8HImode:
43700 if (TARGET_AVX2)
43701 return ix86_vector_duplicate_value (mode, target, val);
43702
43703 if (TARGET_SSE2)
43704 {
43705 struct expand_vec_perm_d dperm;
43706 rtx tmp1, tmp2;
43707
43708 permute:
43709 memset (&dperm, 0, sizeof (dperm));
43710 dperm.target = target;
43711 dperm.vmode = mode;
43712 dperm.nelt = GET_MODE_NUNITS (mode);
43713 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43714 dperm.one_operand_p = true;
43715
43716 /* Extend to SImode using a paradoxical SUBREG. */
43717 tmp1 = gen_reg_rtx (SImode);
43718 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43719
43720 /* Insert the SImode value as low element of a V4SImode vector. */
43721 tmp2 = gen_reg_rtx (V4SImode);
43722 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43723 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43724
43725 ok = (expand_vec_perm_1 (&dperm)
43726 || expand_vec_perm_broadcast_1 (&dperm));
43727 gcc_assert (ok);
43728 return ok;
43729 }
43730 goto widen;
43731
43732 case V16QImode:
43733 if (TARGET_AVX2)
43734 return ix86_vector_duplicate_value (mode, target, val);
43735
43736 if (TARGET_SSE2)
43737 goto permute;
43738 goto widen;
43739
43740 widen:
43741 /* Replicate the value once into the next wider mode and recurse. */
43742 {
43743 machine_mode smode, wsmode, wvmode;
43744 rtx x;
43745
43746 smode = GET_MODE_INNER (mode);
43747 wvmode = get_mode_wider_vector (mode);
43748 wsmode = GET_MODE_INNER (wvmode);
43749
43750 val = convert_modes (wsmode, smode, val, true);
43751 x = expand_simple_binop (wsmode, ASHIFT, val,
43752 GEN_INT (GET_MODE_BITSIZE (smode)),
43753 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43754 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43755
43756 x = gen_reg_rtx (wvmode);
43757 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43758 gcc_assert (ok);
43759 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43760 return ok;
43761 }
43762
43763 case V16HImode:
43764 case V32QImode:
43765 if (TARGET_AVX2)
43766 return ix86_vector_duplicate_value (mode, target, val);
43767 else
43768 {
43769 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43770 rtx x = gen_reg_rtx (hvmode);
43771
43772 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43773 gcc_assert (ok);
43774
43775 x = gen_rtx_VEC_CONCAT (mode, x, x);
43776 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43777 }
43778 return true;
43779
43780 case V64QImode:
43781 case V32HImode:
43782 if (TARGET_AVX512BW)
43783 return ix86_vector_duplicate_value (mode, target, val);
43784 else
43785 {
43786 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43787 rtx x = gen_reg_rtx (hvmode);
43788
43789 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43790 gcc_assert (ok);
43791
43792 x = gen_rtx_VEC_CONCAT (mode, x, x);
43793 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43794 }
43795 return true;
43796
43797 default:
43798 return false;
43799 }
43800 }
43801
43802 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43803 whose ONE_VAR element is VAR, and other elements are zero. Return true
43804 if successful. */
43805
43806 static bool
43807 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43808 rtx target, rtx var, int one_var)
43809 {
43810 machine_mode vsimode;
43811 rtx new_target;
43812 rtx x, tmp;
43813 bool use_vector_set = false;
43814
43815 switch (mode)
43816 {
43817 case V2DImode:
43818 /* For SSE4.1, we normally use vector set. But if the second
43819 element is zero and inter-unit moves are OK, we use movq
43820 instead. */
43821 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43822 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43823 && one_var == 0));
43824 break;
43825 case V16QImode:
43826 case V4SImode:
43827 case V4SFmode:
43828 use_vector_set = TARGET_SSE4_1;
43829 break;
43830 case V8HImode:
43831 use_vector_set = TARGET_SSE2;
43832 break;
43833 case V4HImode:
43834 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43835 break;
43836 case V32QImode:
43837 case V16HImode:
43838 case V8SImode:
43839 case V8SFmode:
43840 case V4DFmode:
43841 use_vector_set = TARGET_AVX;
43842 break;
43843 case V4DImode:
43844 /* Use ix86_expand_vector_set in 64bit mode only. */
43845 use_vector_set = TARGET_AVX && TARGET_64BIT;
43846 break;
43847 default:
43848 break;
43849 }
43850
43851 if (use_vector_set)
43852 {
43853 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43854 var = force_reg (GET_MODE_INNER (mode), var);
43855 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43856 return true;
43857 }
43858
43859 switch (mode)
43860 {
43861 case V2SFmode:
43862 case V2SImode:
43863 if (!mmx_ok)
43864 return false;
43865 /* FALLTHRU */
43866
43867 case V2DFmode:
43868 case V2DImode:
43869 if (one_var != 0)
43870 return false;
43871 var = force_reg (GET_MODE_INNER (mode), var);
43872 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43873 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43874 return true;
43875
43876 case V4SFmode:
43877 case V4SImode:
43878 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43879 new_target = gen_reg_rtx (mode);
43880 else
43881 new_target = target;
43882 var = force_reg (GET_MODE_INNER (mode), var);
43883 x = gen_rtx_VEC_DUPLICATE (mode, var);
43884 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43885 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
43886 if (one_var != 0)
43887 {
43888 /* We need to shuffle the value to the correct position, so
43889 create a new pseudo to store the intermediate result. */
43890
43891 /* With SSE2, we can use the integer shuffle insns. */
43892 if (mode != V4SFmode && TARGET_SSE2)
43893 {
43894 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43895 const1_rtx,
43896 GEN_INT (one_var == 1 ? 0 : 1),
43897 GEN_INT (one_var == 2 ? 0 : 1),
43898 GEN_INT (one_var == 3 ? 0 : 1)));
43899 if (target != new_target)
43900 emit_move_insn (target, new_target);
43901 return true;
43902 }
43903
43904 /* Otherwise convert the intermediate result to V4SFmode and
43905 use the SSE1 shuffle instructions. */
43906 if (mode != V4SFmode)
43907 {
43908 tmp = gen_reg_rtx (V4SFmode);
43909 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43910 }
43911 else
43912 tmp = new_target;
43913
43914 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43915 const1_rtx,
43916 GEN_INT (one_var == 1 ? 0 : 1),
43917 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43918 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43919
43920 if (mode != V4SFmode)
43921 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43922 else if (tmp != target)
43923 emit_move_insn (target, tmp);
43924 }
43925 else if (target != new_target)
43926 emit_move_insn (target, new_target);
43927 return true;
43928
43929 case V8HImode:
43930 case V16QImode:
43931 vsimode = V4SImode;
43932 goto widen;
43933 case V4HImode:
43934 case V8QImode:
43935 if (!mmx_ok)
43936 return false;
43937 vsimode = V2SImode;
43938 goto widen;
43939 widen:
43940 if (one_var != 0)
43941 return false;
43942
43943 /* Zero extend the variable element to SImode and recurse. */
43944 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43945
43946 x = gen_reg_rtx (vsimode);
43947 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43948 var, one_var))
43949 gcc_unreachable ();
43950
43951 emit_move_insn (target, gen_lowpart (mode, x));
43952 return true;
43953
43954 default:
43955 return false;
43956 }
43957 }
43958
43959 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43960 consisting of the values in VALS. It is known that all elements
43961 except ONE_VAR are constants. Return true if successful. */
43962
43963 static bool
43964 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43965 rtx target, rtx vals, int one_var)
43966 {
43967 rtx var = XVECEXP (vals, 0, one_var);
43968 machine_mode wmode;
43969 rtx const_vec, x;
43970
43971 const_vec = copy_rtx (vals);
43972 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43973 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43974
43975 switch (mode)
43976 {
43977 case V2DFmode:
43978 case V2DImode:
43979 case V2SFmode:
43980 case V2SImode:
43981 /* For the two element vectors, it's just as easy to use
43982 the general case. */
43983 return false;
43984
43985 case V4DImode:
43986 /* Use ix86_expand_vector_set in 64bit mode only. */
43987 if (!TARGET_64BIT)
43988 return false;
43989 case V4DFmode:
43990 case V8SFmode:
43991 case V8SImode:
43992 case V16HImode:
43993 case V32QImode:
43994 case V4SFmode:
43995 case V4SImode:
43996 case V8HImode:
43997 case V4HImode:
43998 break;
43999
44000 case V16QImode:
44001 if (TARGET_SSE4_1)
44002 break;
44003 wmode = V8HImode;
44004 goto widen;
44005 case V8QImode:
44006 wmode = V4HImode;
44007 goto widen;
44008 widen:
44009 /* There's no way to set one QImode entry easily. Combine
44010 the variable value with its adjacent constant value, and
44011 promote to an HImode set. */
44012 x = XVECEXP (vals, 0, one_var ^ 1);
44013 if (one_var & 1)
44014 {
44015 var = convert_modes (HImode, QImode, var, true);
44016 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44017 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44018 x = GEN_INT (INTVAL (x) & 0xff);
44019 }
44020 else
44021 {
44022 var = convert_modes (HImode, QImode, var, true);
44023 x = gen_int_mode (INTVAL (x) << 8, HImode);
44024 }
44025 if (x != const0_rtx)
44026 var = expand_simple_binop (HImode, IOR, var, x, var,
44027 1, OPTAB_LIB_WIDEN);
44028
44029 x = gen_reg_rtx (wmode);
44030 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44031 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44032
44033 emit_move_insn (target, gen_lowpart (mode, x));
44034 return true;
44035
44036 default:
44037 return false;
44038 }
44039
44040 emit_move_insn (target, const_vec);
44041 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44042 return true;
44043 }
44044
44045 /* A subroutine of ix86_expand_vector_init_general. Use vector
44046 concatenate to handle the most general case: all values variable,
44047 and none identical. */
44048
44049 static void
44050 ix86_expand_vector_init_concat (machine_mode mode,
44051 rtx target, rtx *ops, int n)
44052 {
44053 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44054 rtx first[16], second[8], third[4];
44055 rtvec v;
44056 int i, j;
44057
44058 switch (n)
44059 {
44060 case 2:
44061 switch (mode)
44062 {
44063 case V16SImode:
44064 cmode = V8SImode;
44065 break;
44066 case V16SFmode:
44067 cmode = V8SFmode;
44068 break;
44069 case V8DImode:
44070 cmode = V4DImode;
44071 break;
44072 case V8DFmode:
44073 cmode = V4DFmode;
44074 break;
44075 case V8SImode:
44076 cmode = V4SImode;
44077 break;
44078 case V8SFmode:
44079 cmode = V4SFmode;
44080 break;
44081 case V4DImode:
44082 cmode = V2DImode;
44083 break;
44084 case V4DFmode:
44085 cmode = V2DFmode;
44086 break;
44087 case V4SImode:
44088 cmode = V2SImode;
44089 break;
44090 case V4SFmode:
44091 cmode = V2SFmode;
44092 break;
44093 case V2DImode:
44094 cmode = DImode;
44095 break;
44096 case V2SImode:
44097 cmode = SImode;
44098 break;
44099 case V2DFmode:
44100 cmode = DFmode;
44101 break;
44102 case V2SFmode:
44103 cmode = SFmode;
44104 break;
44105 default:
44106 gcc_unreachable ();
44107 }
44108
44109 if (!register_operand (ops[1], cmode))
44110 ops[1] = force_reg (cmode, ops[1]);
44111 if (!register_operand (ops[0], cmode))
44112 ops[0] = force_reg (cmode, ops[0]);
44113 emit_insn (gen_rtx_SET (VOIDmode, target,
44114 gen_rtx_VEC_CONCAT (mode, ops[0],
44115 ops[1])));
44116 break;
44117
44118 case 4:
44119 switch (mode)
44120 {
44121 case V4DImode:
44122 cmode = V2DImode;
44123 break;
44124 case V4DFmode:
44125 cmode = V2DFmode;
44126 break;
44127 case V4SImode:
44128 cmode = V2SImode;
44129 break;
44130 case V4SFmode:
44131 cmode = V2SFmode;
44132 break;
44133 default:
44134 gcc_unreachable ();
44135 }
44136 goto half;
44137
44138 case 8:
44139 switch (mode)
44140 {
44141 case V8DImode:
44142 cmode = V2DImode;
44143 hmode = V4DImode;
44144 break;
44145 case V8DFmode:
44146 cmode = V2DFmode;
44147 hmode = V4DFmode;
44148 break;
44149 case V8SImode:
44150 cmode = V2SImode;
44151 hmode = V4SImode;
44152 break;
44153 case V8SFmode:
44154 cmode = V2SFmode;
44155 hmode = V4SFmode;
44156 break;
44157 default:
44158 gcc_unreachable ();
44159 }
44160 goto half;
44161
44162 case 16:
44163 switch (mode)
44164 {
44165 case V16SImode:
44166 cmode = V2SImode;
44167 hmode = V4SImode;
44168 gmode = V8SImode;
44169 break;
44170 case V16SFmode:
44171 cmode = V2SFmode;
44172 hmode = V4SFmode;
44173 gmode = V8SFmode;
44174 break;
44175 default:
44176 gcc_unreachable ();
44177 }
44178 goto half;
44179
44180 half:
44181 /* FIXME: We process inputs backward to help RA. PR 36222. */
44182 i = n - 1;
44183 j = (n >> 1) - 1;
44184 for (; i > 0; i -= 2, j--)
44185 {
44186 first[j] = gen_reg_rtx (cmode);
44187 v = gen_rtvec (2, ops[i - 1], ops[i]);
44188 ix86_expand_vector_init (false, first[j],
44189 gen_rtx_PARALLEL (cmode, v));
44190 }
44191
44192 n >>= 1;
44193 if (n > 4)
44194 {
44195 gcc_assert (hmode != VOIDmode);
44196 gcc_assert (gmode != VOIDmode);
44197 for (i = j = 0; i < n; i += 2, j++)
44198 {
44199 second[j] = gen_reg_rtx (hmode);
44200 ix86_expand_vector_init_concat (hmode, second [j],
44201 &first [i], 2);
44202 }
44203 n >>= 1;
44204 for (i = j = 0; i < n; i += 2, j++)
44205 {
44206 third[j] = gen_reg_rtx (gmode);
44207 ix86_expand_vector_init_concat (gmode, third[j],
44208 &second[i], 2);
44209 }
44210 n >>= 1;
44211 ix86_expand_vector_init_concat (mode, target, third, n);
44212 }
44213 else if (n > 2)
44214 {
44215 gcc_assert (hmode != VOIDmode);
44216 for (i = j = 0; i < n; i += 2, j++)
44217 {
44218 second[j] = gen_reg_rtx (hmode);
44219 ix86_expand_vector_init_concat (hmode, second [j],
44220 &first [i], 2);
44221 }
44222 n >>= 1;
44223 ix86_expand_vector_init_concat (mode, target, second, n);
44224 }
44225 else
44226 ix86_expand_vector_init_concat (mode, target, first, n);
44227 break;
44228
44229 default:
44230 gcc_unreachable ();
44231 }
44232 }
44233
44234 /* A subroutine of ix86_expand_vector_init_general. Use vector
44235 interleave to handle the most general case: all values variable,
44236 and none identical. */
44237
44238 static void
44239 ix86_expand_vector_init_interleave (machine_mode mode,
44240 rtx target, rtx *ops, int n)
44241 {
44242 machine_mode first_imode, second_imode, third_imode, inner_mode;
44243 int i, j;
44244 rtx op0, op1;
44245 rtx (*gen_load_even) (rtx, rtx, rtx);
44246 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44247 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44248
44249 switch (mode)
44250 {
44251 case V8HImode:
44252 gen_load_even = gen_vec_setv8hi;
44253 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44254 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44255 inner_mode = HImode;
44256 first_imode = V4SImode;
44257 second_imode = V2DImode;
44258 third_imode = VOIDmode;
44259 break;
44260 case V16QImode:
44261 gen_load_even = gen_vec_setv16qi;
44262 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44263 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44264 inner_mode = QImode;
44265 first_imode = V8HImode;
44266 second_imode = V4SImode;
44267 third_imode = V2DImode;
44268 break;
44269 default:
44270 gcc_unreachable ();
44271 }
44272
44273 for (i = 0; i < n; i++)
44274 {
44275 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44276 op0 = gen_reg_rtx (SImode);
44277 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44278
44279 /* Insert the SImode value as low element of V4SImode vector. */
44280 op1 = gen_reg_rtx (V4SImode);
44281 op0 = gen_rtx_VEC_MERGE (V4SImode,
44282 gen_rtx_VEC_DUPLICATE (V4SImode,
44283 op0),
44284 CONST0_RTX (V4SImode),
44285 const1_rtx);
44286 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44287
44288 /* Cast the V4SImode vector back to a vector in orignal mode. */
44289 op0 = gen_reg_rtx (mode);
44290 emit_move_insn (op0, gen_lowpart (mode, op1));
44291
44292 /* Load even elements into the second position. */
44293 emit_insn (gen_load_even (op0,
44294 force_reg (inner_mode,
44295 ops [i + i + 1]),
44296 const1_rtx));
44297
44298 /* Cast vector to FIRST_IMODE vector. */
44299 ops[i] = gen_reg_rtx (first_imode);
44300 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44301 }
44302
44303 /* Interleave low FIRST_IMODE vectors. */
44304 for (i = j = 0; i < n; i += 2, j++)
44305 {
44306 op0 = gen_reg_rtx (first_imode);
44307 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44308
44309 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44310 ops[j] = gen_reg_rtx (second_imode);
44311 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44312 }
44313
44314 /* Interleave low SECOND_IMODE vectors. */
44315 switch (second_imode)
44316 {
44317 case V4SImode:
44318 for (i = j = 0; i < n / 2; i += 2, j++)
44319 {
44320 op0 = gen_reg_rtx (second_imode);
44321 emit_insn (gen_interleave_second_low (op0, ops[i],
44322 ops[i + 1]));
44323
44324 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44325 vector. */
44326 ops[j] = gen_reg_rtx (third_imode);
44327 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44328 }
44329 second_imode = V2DImode;
44330 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44331 /* FALLTHRU */
44332
44333 case V2DImode:
44334 op0 = gen_reg_rtx (second_imode);
44335 emit_insn (gen_interleave_second_low (op0, ops[0],
44336 ops[1]));
44337
44338 /* Cast the SECOND_IMODE vector back to a vector on original
44339 mode. */
44340 emit_insn (gen_rtx_SET (VOIDmode, target,
44341 gen_lowpart (mode, op0)));
44342 break;
44343
44344 default:
44345 gcc_unreachable ();
44346 }
44347 }
44348
44349 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44350 all values variable, and none identical. */
44351
44352 static void
44353 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44354 rtx target, rtx vals)
44355 {
44356 rtx ops[64], op0, op1, op2, op3, op4, op5;
44357 machine_mode half_mode = VOIDmode;
44358 machine_mode quarter_mode = VOIDmode;
44359 int n, i;
44360
44361 switch (mode)
44362 {
44363 case V2SFmode:
44364 case V2SImode:
44365 if (!mmx_ok && !TARGET_SSE)
44366 break;
44367 /* FALLTHRU */
44368
44369 case V16SImode:
44370 case V16SFmode:
44371 case V8DFmode:
44372 case V8DImode:
44373 case V8SFmode:
44374 case V8SImode:
44375 case V4DFmode:
44376 case V4DImode:
44377 case V4SFmode:
44378 case V4SImode:
44379 case V2DFmode:
44380 case V2DImode:
44381 n = GET_MODE_NUNITS (mode);
44382 for (i = 0; i < n; i++)
44383 ops[i] = XVECEXP (vals, 0, i);
44384 ix86_expand_vector_init_concat (mode, target, ops, n);
44385 return;
44386
44387 case V32QImode:
44388 half_mode = V16QImode;
44389 goto half;
44390
44391 case V16HImode:
44392 half_mode = V8HImode;
44393 goto half;
44394
44395 half:
44396 n = GET_MODE_NUNITS (mode);
44397 for (i = 0; i < n; i++)
44398 ops[i] = XVECEXP (vals, 0, i);
44399 op0 = gen_reg_rtx (half_mode);
44400 op1 = gen_reg_rtx (half_mode);
44401 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44402 n >> 2);
44403 ix86_expand_vector_init_interleave (half_mode, op1,
44404 &ops [n >> 1], n >> 2);
44405 emit_insn (gen_rtx_SET (VOIDmode, target,
44406 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44407 return;
44408
44409 case V64QImode:
44410 quarter_mode = V16QImode;
44411 half_mode = V32QImode;
44412 goto quarter;
44413
44414 case V32HImode:
44415 quarter_mode = V8HImode;
44416 half_mode = V16HImode;
44417 goto quarter;
44418
44419 quarter:
44420 n = GET_MODE_NUNITS (mode);
44421 for (i = 0; i < n; i++)
44422 ops[i] = XVECEXP (vals, 0, i);
44423 op0 = gen_reg_rtx (quarter_mode);
44424 op1 = gen_reg_rtx (quarter_mode);
44425 op2 = gen_reg_rtx (quarter_mode);
44426 op3 = gen_reg_rtx (quarter_mode);
44427 op4 = gen_reg_rtx (half_mode);
44428 op5 = gen_reg_rtx (half_mode);
44429 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44430 n >> 3);
44431 ix86_expand_vector_init_interleave (quarter_mode, op1,
44432 &ops [n >> 2], n >> 3);
44433 ix86_expand_vector_init_interleave (quarter_mode, op2,
44434 &ops [n >> 1], n >> 3);
44435 ix86_expand_vector_init_interleave (quarter_mode, op3,
44436 &ops [(n >> 1) | (n >> 2)], n >> 3);
44437 emit_insn (gen_rtx_SET (VOIDmode, op4,
44438 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44439 emit_insn (gen_rtx_SET (VOIDmode, op5,
44440 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44441 emit_insn (gen_rtx_SET (VOIDmode, target,
44442 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44443 return;
44444
44445 case V16QImode:
44446 if (!TARGET_SSE4_1)
44447 break;
44448 /* FALLTHRU */
44449
44450 case V8HImode:
44451 if (!TARGET_SSE2)
44452 break;
44453
44454 /* Don't use ix86_expand_vector_init_interleave if we can't
44455 move from GPR to SSE register directly. */
44456 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44457 break;
44458
44459 n = GET_MODE_NUNITS (mode);
44460 for (i = 0; i < n; i++)
44461 ops[i] = XVECEXP (vals, 0, i);
44462 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44463 return;
44464
44465 case V4HImode:
44466 case V8QImode:
44467 break;
44468
44469 default:
44470 gcc_unreachable ();
44471 }
44472
44473 {
44474 int i, j, n_elts, n_words, n_elt_per_word;
44475 machine_mode inner_mode;
44476 rtx words[4], shift;
44477
44478 inner_mode = GET_MODE_INNER (mode);
44479 n_elts = GET_MODE_NUNITS (mode);
44480 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44481 n_elt_per_word = n_elts / n_words;
44482 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44483
44484 for (i = 0; i < n_words; ++i)
44485 {
44486 rtx word = NULL_RTX;
44487
44488 for (j = 0; j < n_elt_per_word; ++j)
44489 {
44490 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44491 elt = convert_modes (word_mode, inner_mode, elt, true);
44492
44493 if (j == 0)
44494 word = elt;
44495 else
44496 {
44497 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44498 word, 1, OPTAB_LIB_WIDEN);
44499 word = expand_simple_binop (word_mode, IOR, word, elt,
44500 word, 1, OPTAB_LIB_WIDEN);
44501 }
44502 }
44503
44504 words[i] = word;
44505 }
44506
44507 if (n_words == 1)
44508 emit_move_insn (target, gen_lowpart (mode, words[0]));
44509 else if (n_words == 2)
44510 {
44511 rtx tmp = gen_reg_rtx (mode);
44512 emit_clobber (tmp);
44513 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44514 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44515 emit_move_insn (target, tmp);
44516 }
44517 else if (n_words == 4)
44518 {
44519 rtx tmp = gen_reg_rtx (V4SImode);
44520 gcc_assert (word_mode == SImode);
44521 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44522 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44523 emit_move_insn (target, gen_lowpart (mode, tmp));
44524 }
44525 else
44526 gcc_unreachable ();
44527 }
44528 }
44529
44530 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44531 instructions unless MMX_OK is true. */
44532
44533 void
44534 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44535 {
44536 machine_mode mode = GET_MODE (target);
44537 machine_mode inner_mode = GET_MODE_INNER (mode);
44538 int n_elts = GET_MODE_NUNITS (mode);
44539 int n_var = 0, one_var = -1;
44540 bool all_same = true, all_const_zero = true;
44541 int i;
44542 rtx x;
44543
44544 for (i = 0; i < n_elts; ++i)
44545 {
44546 x = XVECEXP (vals, 0, i);
44547 if (!(CONST_INT_P (x)
44548 || GET_CODE (x) == CONST_DOUBLE
44549 || GET_CODE (x) == CONST_FIXED))
44550 n_var++, one_var = i;
44551 else if (x != CONST0_RTX (inner_mode))
44552 all_const_zero = false;
44553 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44554 all_same = false;
44555 }
44556
44557 /* Constants are best loaded from the constant pool. */
44558 if (n_var == 0)
44559 {
44560 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44561 return;
44562 }
44563
44564 /* If all values are identical, broadcast the value. */
44565 if (all_same
44566 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44567 XVECEXP (vals, 0, 0)))
44568 return;
44569
44570 /* Values where only one field is non-constant are best loaded from
44571 the pool and overwritten via move later. */
44572 if (n_var == 1)
44573 {
44574 if (all_const_zero
44575 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44576 XVECEXP (vals, 0, one_var),
44577 one_var))
44578 return;
44579
44580 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44581 return;
44582 }
44583
44584 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44585 }
44586
44587 void
44588 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44589 {
44590 machine_mode mode = GET_MODE (target);
44591 machine_mode inner_mode = GET_MODE_INNER (mode);
44592 machine_mode half_mode;
44593 bool use_vec_merge = false;
44594 rtx tmp;
44595 static rtx (*gen_extract[6][2]) (rtx, rtx)
44596 = {
44597 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44598 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44599 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44600 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44601 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44602 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44603 };
44604 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44605 = {
44606 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44607 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44608 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44609 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44610 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44611 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44612 };
44613 int i, j, n;
44614
44615 switch (mode)
44616 {
44617 case V2SFmode:
44618 case V2SImode:
44619 if (mmx_ok)
44620 {
44621 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44622 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44623 if (elt == 0)
44624 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44625 else
44626 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44627 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44628 return;
44629 }
44630 break;
44631
44632 case V2DImode:
44633 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44634 if (use_vec_merge)
44635 break;
44636
44637 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44638 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44639 if (elt == 0)
44640 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44641 else
44642 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44643 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44644 return;
44645
44646 case V2DFmode:
44647 {
44648 rtx op0, op1;
44649
44650 /* For the two element vectors, we implement a VEC_CONCAT with
44651 the extraction of the other element. */
44652
44653 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44654 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44655
44656 if (elt == 0)
44657 op0 = val, op1 = tmp;
44658 else
44659 op0 = tmp, op1 = val;
44660
44661 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44662 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44663 }
44664 return;
44665
44666 case V4SFmode:
44667 use_vec_merge = TARGET_SSE4_1;
44668 if (use_vec_merge)
44669 break;
44670
44671 switch (elt)
44672 {
44673 case 0:
44674 use_vec_merge = true;
44675 break;
44676
44677 case 1:
44678 /* tmp = target = A B C D */
44679 tmp = copy_to_reg (target);
44680 /* target = A A B B */
44681 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44682 /* target = X A B B */
44683 ix86_expand_vector_set (false, target, val, 0);
44684 /* target = A X C D */
44685 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44686 const1_rtx, const0_rtx,
44687 GEN_INT (2+4), GEN_INT (3+4)));
44688 return;
44689
44690 case 2:
44691 /* tmp = target = A B C D */
44692 tmp = copy_to_reg (target);
44693 /* tmp = X B C D */
44694 ix86_expand_vector_set (false, tmp, val, 0);
44695 /* target = A B X D */
44696 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44697 const0_rtx, const1_rtx,
44698 GEN_INT (0+4), GEN_INT (3+4)));
44699 return;
44700
44701 case 3:
44702 /* tmp = target = A B C D */
44703 tmp = copy_to_reg (target);
44704 /* tmp = X B C D */
44705 ix86_expand_vector_set (false, tmp, val, 0);
44706 /* target = A B X D */
44707 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44708 const0_rtx, const1_rtx,
44709 GEN_INT (2+4), GEN_INT (0+4)));
44710 return;
44711
44712 default:
44713 gcc_unreachable ();
44714 }
44715 break;
44716
44717 case V4SImode:
44718 use_vec_merge = TARGET_SSE4_1;
44719 if (use_vec_merge)
44720 break;
44721
44722 /* Element 0 handled by vec_merge below. */
44723 if (elt == 0)
44724 {
44725 use_vec_merge = true;
44726 break;
44727 }
44728
44729 if (TARGET_SSE2)
44730 {
44731 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44732 store into element 0, then shuffle them back. */
44733
44734 rtx order[4];
44735
44736 order[0] = GEN_INT (elt);
44737 order[1] = const1_rtx;
44738 order[2] = const2_rtx;
44739 order[3] = GEN_INT (3);
44740 order[elt] = const0_rtx;
44741
44742 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44743 order[1], order[2], order[3]));
44744
44745 ix86_expand_vector_set (false, target, val, 0);
44746
44747 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44748 order[1], order[2], order[3]));
44749 }
44750 else
44751 {
44752 /* For SSE1, we have to reuse the V4SF code. */
44753 rtx t = gen_reg_rtx (V4SFmode);
44754 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44755 emit_move_insn (target, gen_lowpart (mode, t));
44756 }
44757 return;
44758
44759 case V8HImode:
44760 use_vec_merge = TARGET_SSE2;
44761 break;
44762 case V4HImode:
44763 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44764 break;
44765
44766 case V16QImode:
44767 use_vec_merge = TARGET_SSE4_1;
44768 break;
44769
44770 case V8QImode:
44771 break;
44772
44773 case V32QImode:
44774 half_mode = V16QImode;
44775 j = 0;
44776 n = 16;
44777 goto half;
44778
44779 case V16HImode:
44780 half_mode = V8HImode;
44781 j = 1;
44782 n = 8;
44783 goto half;
44784
44785 case V8SImode:
44786 half_mode = V4SImode;
44787 j = 2;
44788 n = 4;
44789 goto half;
44790
44791 case V4DImode:
44792 half_mode = V2DImode;
44793 j = 3;
44794 n = 2;
44795 goto half;
44796
44797 case V8SFmode:
44798 half_mode = V4SFmode;
44799 j = 4;
44800 n = 4;
44801 goto half;
44802
44803 case V4DFmode:
44804 half_mode = V2DFmode;
44805 j = 5;
44806 n = 2;
44807 goto half;
44808
44809 half:
44810 /* Compute offset. */
44811 i = elt / n;
44812 elt %= n;
44813
44814 gcc_assert (i <= 1);
44815
44816 /* Extract the half. */
44817 tmp = gen_reg_rtx (half_mode);
44818 emit_insn (gen_extract[j][i] (tmp, target));
44819
44820 /* Put val in tmp at elt. */
44821 ix86_expand_vector_set (false, tmp, val, elt);
44822
44823 /* Put it back. */
44824 emit_insn (gen_insert[j][i] (target, target, tmp));
44825 return;
44826
44827 case V8DFmode:
44828 if (TARGET_AVX512F)
44829 {
44830 tmp = gen_reg_rtx (mode);
44831 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44832 gen_rtx_VEC_DUPLICATE (mode, val)));
44833 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44834 force_reg (QImode, GEN_INT (1 << elt))));
44835 return;
44836 }
44837 else
44838 break;
44839 case V8DImode:
44840 if (TARGET_AVX512F)
44841 {
44842 tmp = gen_reg_rtx (mode);
44843 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44844 gen_rtx_VEC_DUPLICATE (mode, val)));
44845 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44846 force_reg (QImode, GEN_INT (1 << elt))));
44847 return;
44848 }
44849 else
44850 break;
44851 case V16SFmode:
44852 if (TARGET_AVX512F)
44853 {
44854 tmp = gen_reg_rtx (mode);
44855 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44856 gen_rtx_VEC_DUPLICATE (mode, val)));
44857 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44858 force_reg (HImode, GEN_INT (1 << elt))));
44859 return;
44860 }
44861 else
44862 break;
44863 case V16SImode:
44864 if (TARGET_AVX512F)
44865 {
44866 tmp = gen_reg_rtx (mode);
44867 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44868 gen_rtx_VEC_DUPLICATE (mode, val)));
44869 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44870 force_reg (HImode, GEN_INT (1 << elt))));
44871 return;
44872 }
44873 else
44874 break;
44875 case V32HImode:
44876 if (TARGET_AVX512F && TARGET_AVX512BW)
44877 {
44878 tmp = gen_reg_rtx (mode);
44879 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44880 gen_rtx_VEC_DUPLICATE (mode, val)));
44881 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44882 force_reg (SImode, GEN_INT (1 << elt))));
44883 return;
44884 }
44885 else
44886 break;
44887 case V64QImode:
44888 if (TARGET_AVX512F && TARGET_AVX512BW)
44889 {
44890 tmp = gen_reg_rtx (mode);
44891 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44892 gen_rtx_VEC_DUPLICATE (mode, val)));
44893 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44894 force_reg (DImode, GEN_INT (1 << elt))));
44895 return;
44896 }
44897 else
44898 break;
44899
44900 default:
44901 break;
44902 }
44903
44904 if (use_vec_merge)
44905 {
44906 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44907 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44908 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44909 }
44910 else
44911 {
44912 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44913
44914 emit_move_insn (mem, target);
44915
44916 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44917 emit_move_insn (tmp, val);
44918
44919 emit_move_insn (target, mem);
44920 }
44921 }
44922
44923 void
44924 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44925 {
44926 machine_mode mode = GET_MODE (vec);
44927 machine_mode inner_mode = GET_MODE_INNER (mode);
44928 bool use_vec_extr = false;
44929 rtx tmp;
44930
44931 switch (mode)
44932 {
44933 case V2SImode:
44934 case V2SFmode:
44935 if (!mmx_ok)
44936 break;
44937 /* FALLTHRU */
44938
44939 case V2DFmode:
44940 case V2DImode:
44941 use_vec_extr = true;
44942 break;
44943
44944 case V4SFmode:
44945 use_vec_extr = TARGET_SSE4_1;
44946 if (use_vec_extr)
44947 break;
44948
44949 switch (elt)
44950 {
44951 case 0:
44952 tmp = vec;
44953 break;
44954
44955 case 1:
44956 case 3:
44957 tmp = gen_reg_rtx (mode);
44958 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44959 GEN_INT (elt), GEN_INT (elt),
44960 GEN_INT (elt+4), GEN_INT (elt+4)));
44961 break;
44962
44963 case 2:
44964 tmp = gen_reg_rtx (mode);
44965 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44966 break;
44967
44968 default:
44969 gcc_unreachable ();
44970 }
44971 vec = tmp;
44972 use_vec_extr = true;
44973 elt = 0;
44974 break;
44975
44976 case V4SImode:
44977 use_vec_extr = TARGET_SSE4_1;
44978 if (use_vec_extr)
44979 break;
44980
44981 if (TARGET_SSE2)
44982 {
44983 switch (elt)
44984 {
44985 case 0:
44986 tmp = vec;
44987 break;
44988
44989 case 1:
44990 case 3:
44991 tmp = gen_reg_rtx (mode);
44992 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
44993 GEN_INT (elt), GEN_INT (elt),
44994 GEN_INT (elt), GEN_INT (elt)));
44995 break;
44996
44997 case 2:
44998 tmp = gen_reg_rtx (mode);
44999 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45000 break;
45001
45002 default:
45003 gcc_unreachable ();
45004 }
45005 vec = tmp;
45006 use_vec_extr = true;
45007 elt = 0;
45008 }
45009 else
45010 {
45011 /* For SSE1, we have to reuse the V4SF code. */
45012 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45013 gen_lowpart (V4SFmode, vec), elt);
45014 return;
45015 }
45016 break;
45017
45018 case V8HImode:
45019 use_vec_extr = TARGET_SSE2;
45020 break;
45021 case V4HImode:
45022 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45023 break;
45024
45025 case V16QImode:
45026 use_vec_extr = TARGET_SSE4_1;
45027 break;
45028
45029 case V8SFmode:
45030 if (TARGET_AVX)
45031 {
45032 tmp = gen_reg_rtx (V4SFmode);
45033 if (elt < 4)
45034 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45035 else
45036 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45037 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45038 return;
45039 }
45040 break;
45041
45042 case V4DFmode:
45043 if (TARGET_AVX)
45044 {
45045 tmp = gen_reg_rtx (V2DFmode);
45046 if (elt < 2)
45047 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45048 else
45049 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45050 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45051 return;
45052 }
45053 break;
45054
45055 case V32QImode:
45056 if (TARGET_AVX)
45057 {
45058 tmp = gen_reg_rtx (V16QImode);
45059 if (elt < 16)
45060 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45061 else
45062 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45063 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45064 return;
45065 }
45066 break;
45067
45068 case V16HImode:
45069 if (TARGET_AVX)
45070 {
45071 tmp = gen_reg_rtx (V8HImode);
45072 if (elt < 8)
45073 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45074 else
45075 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45076 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45077 return;
45078 }
45079 break;
45080
45081 case V8SImode:
45082 if (TARGET_AVX)
45083 {
45084 tmp = gen_reg_rtx (V4SImode);
45085 if (elt < 4)
45086 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45087 else
45088 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45089 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45090 return;
45091 }
45092 break;
45093
45094 case V4DImode:
45095 if (TARGET_AVX)
45096 {
45097 tmp = gen_reg_rtx (V2DImode);
45098 if (elt < 2)
45099 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45100 else
45101 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45102 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45103 return;
45104 }
45105 break;
45106
45107 case V32HImode:
45108 if (TARGET_AVX512BW)
45109 {
45110 tmp = gen_reg_rtx (V16HImode);
45111 if (elt < 16)
45112 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45113 else
45114 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45115 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45116 return;
45117 }
45118 break;
45119
45120 case V64QImode:
45121 if (TARGET_AVX512BW)
45122 {
45123 tmp = gen_reg_rtx (V32QImode);
45124 if (elt < 32)
45125 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45126 else
45127 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45128 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45129 return;
45130 }
45131 break;
45132
45133 case V16SFmode:
45134 tmp = gen_reg_rtx (V8SFmode);
45135 if (elt < 8)
45136 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45137 else
45138 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45139 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45140 return;
45141
45142 case V8DFmode:
45143 tmp = gen_reg_rtx (V4DFmode);
45144 if (elt < 4)
45145 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45146 else
45147 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45148 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45149 return;
45150
45151 case V16SImode:
45152 tmp = gen_reg_rtx (V8SImode);
45153 if (elt < 8)
45154 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45155 else
45156 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45157 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45158 return;
45159
45160 case V8DImode:
45161 tmp = gen_reg_rtx (V4DImode);
45162 if (elt < 4)
45163 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45164 else
45165 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45166 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45167 return;
45168
45169 case V8QImode:
45170 /* ??? Could extract the appropriate HImode element and shift. */
45171 default:
45172 break;
45173 }
45174
45175 if (use_vec_extr)
45176 {
45177 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45178 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45179
45180 /* Let the rtl optimizers know about the zero extension performed. */
45181 if (inner_mode == QImode || inner_mode == HImode)
45182 {
45183 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45184 target = gen_lowpart (SImode, target);
45185 }
45186
45187 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45188 }
45189 else
45190 {
45191 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45192
45193 emit_move_insn (mem, vec);
45194
45195 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45196 emit_move_insn (target, tmp);
45197 }
45198 }
45199
45200 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45201 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45202 The upper bits of DEST are undefined, though they shouldn't cause
45203 exceptions (some bits from src or all zeros are ok). */
45204
45205 static void
45206 emit_reduc_half (rtx dest, rtx src, int i)
45207 {
45208 rtx tem, d = dest;
45209 switch (GET_MODE (src))
45210 {
45211 case V4SFmode:
45212 if (i == 128)
45213 tem = gen_sse_movhlps (dest, src, src);
45214 else
45215 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45216 GEN_INT (1 + 4), GEN_INT (1 + 4));
45217 break;
45218 case V2DFmode:
45219 tem = gen_vec_interleave_highv2df (dest, src, src);
45220 break;
45221 case V16QImode:
45222 case V8HImode:
45223 case V4SImode:
45224 case V2DImode:
45225 d = gen_reg_rtx (V1TImode);
45226 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45227 GEN_INT (i / 2));
45228 break;
45229 case V8SFmode:
45230 if (i == 256)
45231 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45232 else
45233 tem = gen_avx_shufps256 (dest, src, src,
45234 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45235 break;
45236 case V4DFmode:
45237 if (i == 256)
45238 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45239 else
45240 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45241 break;
45242 case V32QImode:
45243 case V16HImode:
45244 case V8SImode:
45245 case V4DImode:
45246 if (i == 256)
45247 {
45248 if (GET_MODE (dest) != V4DImode)
45249 d = gen_reg_rtx (V4DImode);
45250 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45251 gen_lowpart (V4DImode, src),
45252 const1_rtx);
45253 }
45254 else
45255 {
45256 d = gen_reg_rtx (V2TImode);
45257 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45258 GEN_INT (i / 2));
45259 }
45260 break;
45261 case V64QImode:
45262 case V32HImode:
45263 case V16SImode:
45264 case V16SFmode:
45265 case V8DImode:
45266 case V8DFmode:
45267 if (i > 128)
45268 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45269 gen_lowpart (V16SImode, src),
45270 gen_lowpart (V16SImode, src),
45271 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45272 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45273 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45274 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45275 GEN_INT (0xC), GEN_INT (0xD),
45276 GEN_INT (0xE), GEN_INT (0xF),
45277 GEN_INT (0x10), GEN_INT (0x11),
45278 GEN_INT (0x12), GEN_INT (0x13),
45279 GEN_INT (0x14), GEN_INT (0x15),
45280 GEN_INT (0x16), GEN_INT (0x17));
45281 else
45282 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45283 gen_lowpart (V16SImode, src),
45284 GEN_INT (i == 128 ? 0x2 : 0x1),
45285 GEN_INT (0x3),
45286 GEN_INT (0x3),
45287 GEN_INT (0x3),
45288 GEN_INT (i == 128 ? 0x6 : 0x5),
45289 GEN_INT (0x7),
45290 GEN_INT (0x7),
45291 GEN_INT (0x7),
45292 GEN_INT (i == 128 ? 0xA : 0x9),
45293 GEN_INT (0xB),
45294 GEN_INT (0xB),
45295 GEN_INT (0xB),
45296 GEN_INT (i == 128 ? 0xE : 0xD),
45297 GEN_INT (0xF),
45298 GEN_INT (0xF),
45299 GEN_INT (0xF));
45300 break;
45301 default:
45302 gcc_unreachable ();
45303 }
45304 emit_insn (tem);
45305 if (d != dest)
45306 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45307 }
45308
45309 /* Expand a vector reduction. FN is the binary pattern to reduce;
45310 DEST is the destination; IN is the input vector. */
45311
45312 void
45313 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45314 {
45315 rtx half, dst, vec = in;
45316 machine_mode mode = GET_MODE (in);
45317 int i;
45318
45319 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45320 if (TARGET_SSE4_1
45321 && mode == V8HImode
45322 && fn == gen_uminv8hi3)
45323 {
45324 emit_insn (gen_sse4_1_phminposuw (dest, in));
45325 return;
45326 }
45327
45328 for (i = GET_MODE_BITSIZE (mode);
45329 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45330 i >>= 1)
45331 {
45332 half = gen_reg_rtx (mode);
45333 emit_reduc_half (half, vec, i);
45334 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45335 dst = dest;
45336 else
45337 dst = gen_reg_rtx (mode);
45338 emit_insn (fn (dst, half, vec));
45339 vec = dst;
45340 }
45341 }
45342 \f
45343 /* Target hook for scalar_mode_supported_p. */
45344 static bool
45345 ix86_scalar_mode_supported_p (machine_mode mode)
45346 {
45347 if (DECIMAL_FLOAT_MODE_P (mode))
45348 return default_decimal_float_supported_p ();
45349 else if (mode == TFmode)
45350 return true;
45351 else
45352 return default_scalar_mode_supported_p (mode);
45353 }
45354
45355 /* Implements target hook vector_mode_supported_p. */
45356 static bool
45357 ix86_vector_mode_supported_p (machine_mode mode)
45358 {
45359 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45360 return true;
45361 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45362 return true;
45363 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45364 return true;
45365 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45366 return true;
45367 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45368 return true;
45369 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45370 return true;
45371 return false;
45372 }
45373
45374 /* Implement target hook libgcc_floating_mode_supported_p. */
45375 static bool
45376 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45377 {
45378 switch (mode)
45379 {
45380 case SFmode:
45381 case DFmode:
45382 case XFmode:
45383 return true;
45384
45385 case TFmode:
45386 #ifdef IX86_NO_LIBGCC_TFMODE
45387 return false;
45388 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45389 return TARGET_LONG_DOUBLE_128;
45390 #else
45391 return true;
45392 #endif
45393
45394 default:
45395 return false;
45396 }
45397 }
45398
45399 /* Target hook for c_mode_for_suffix. */
45400 static machine_mode
45401 ix86_c_mode_for_suffix (char suffix)
45402 {
45403 if (suffix == 'q')
45404 return TFmode;
45405 if (suffix == 'w')
45406 return XFmode;
45407
45408 return VOIDmode;
45409 }
45410
45411 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45412
45413 We do this in the new i386 backend to maintain source compatibility
45414 with the old cc0-based compiler. */
45415
45416 static tree
45417 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45418 {
45419 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45420 clobbers);
45421 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45422 clobbers);
45423 return clobbers;
45424 }
45425
45426 /* Implements target vector targetm.asm.encode_section_info. */
45427
45428 static void ATTRIBUTE_UNUSED
45429 ix86_encode_section_info (tree decl, rtx rtl, int first)
45430 {
45431 default_encode_section_info (decl, rtl, first);
45432
45433 if (ix86_in_large_data_p (decl))
45434 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45435 }
45436
45437 /* Worker function for REVERSE_CONDITION. */
45438
45439 enum rtx_code
45440 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45441 {
45442 return (mode != CCFPmode && mode != CCFPUmode
45443 ? reverse_condition (code)
45444 : reverse_condition_maybe_unordered (code));
45445 }
45446
45447 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45448 to OPERANDS[0]. */
45449
45450 const char *
45451 output_387_reg_move (rtx insn, rtx *operands)
45452 {
45453 if (REG_P (operands[0]))
45454 {
45455 if (REG_P (operands[1])
45456 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45457 {
45458 if (REGNO (operands[0]) == FIRST_STACK_REG)
45459 return output_387_ffreep (operands, 0);
45460 return "fstp\t%y0";
45461 }
45462 if (STACK_TOP_P (operands[0]))
45463 return "fld%Z1\t%y1";
45464 return "fst\t%y0";
45465 }
45466 else if (MEM_P (operands[0]))
45467 {
45468 gcc_assert (REG_P (operands[1]));
45469 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45470 return "fstp%Z0\t%y0";
45471 else
45472 {
45473 /* There is no non-popping store to memory for XFmode.
45474 So if we need one, follow the store with a load. */
45475 if (GET_MODE (operands[0]) == XFmode)
45476 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45477 else
45478 return "fst%Z0\t%y0";
45479 }
45480 }
45481 else
45482 gcc_unreachable();
45483 }
45484
45485 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45486 FP status register is set. */
45487
45488 void
45489 ix86_emit_fp_unordered_jump (rtx label)
45490 {
45491 rtx reg = gen_reg_rtx (HImode);
45492 rtx temp;
45493
45494 emit_insn (gen_x86_fnstsw_1 (reg));
45495
45496 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45497 {
45498 emit_insn (gen_x86_sahf_1 (reg));
45499
45500 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45501 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45502 }
45503 else
45504 {
45505 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45506
45507 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45508 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45509 }
45510
45511 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45512 gen_rtx_LABEL_REF (VOIDmode, label),
45513 pc_rtx);
45514 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45515
45516 emit_jump_insn (temp);
45517 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45518 }
45519
45520 /* Output code to perform a log1p XFmode calculation. */
45521
45522 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45523 {
45524 rtx_code_label *label1 = gen_label_rtx ();
45525 rtx_code_label *label2 = gen_label_rtx ();
45526
45527 rtx tmp = gen_reg_rtx (XFmode);
45528 rtx tmp2 = gen_reg_rtx (XFmode);
45529 rtx test;
45530
45531 emit_insn (gen_absxf2 (tmp, op1));
45532 test = gen_rtx_GE (VOIDmode, tmp,
45533 CONST_DOUBLE_FROM_REAL_VALUE (
45534 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45535 XFmode));
45536 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45537
45538 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45539 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45540 emit_jump (label2);
45541
45542 emit_label (label1);
45543 emit_move_insn (tmp, CONST1_RTX (XFmode));
45544 emit_insn (gen_addxf3 (tmp, op1, tmp));
45545 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45546 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45547
45548 emit_label (label2);
45549 }
45550
45551 /* Emit code for round calculation. */
45552 void ix86_emit_i387_round (rtx op0, rtx op1)
45553 {
45554 machine_mode inmode = GET_MODE (op1);
45555 machine_mode outmode = GET_MODE (op0);
45556 rtx e1, e2, res, tmp, tmp1, half;
45557 rtx scratch = gen_reg_rtx (HImode);
45558 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45559 rtx_code_label *jump_label = gen_label_rtx ();
45560 rtx insn;
45561 rtx (*gen_abs) (rtx, rtx);
45562 rtx (*gen_neg) (rtx, rtx);
45563
45564 switch (inmode)
45565 {
45566 case SFmode:
45567 gen_abs = gen_abssf2;
45568 break;
45569 case DFmode:
45570 gen_abs = gen_absdf2;
45571 break;
45572 case XFmode:
45573 gen_abs = gen_absxf2;
45574 break;
45575 default:
45576 gcc_unreachable ();
45577 }
45578
45579 switch (outmode)
45580 {
45581 case SFmode:
45582 gen_neg = gen_negsf2;
45583 break;
45584 case DFmode:
45585 gen_neg = gen_negdf2;
45586 break;
45587 case XFmode:
45588 gen_neg = gen_negxf2;
45589 break;
45590 case HImode:
45591 gen_neg = gen_neghi2;
45592 break;
45593 case SImode:
45594 gen_neg = gen_negsi2;
45595 break;
45596 case DImode:
45597 gen_neg = gen_negdi2;
45598 break;
45599 default:
45600 gcc_unreachable ();
45601 }
45602
45603 e1 = gen_reg_rtx (inmode);
45604 e2 = gen_reg_rtx (inmode);
45605 res = gen_reg_rtx (outmode);
45606
45607 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45608
45609 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45610
45611 /* scratch = fxam(op1) */
45612 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45613 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45614 UNSPEC_FXAM)));
45615 /* e1 = fabs(op1) */
45616 emit_insn (gen_abs (e1, op1));
45617
45618 /* e2 = e1 + 0.5 */
45619 half = force_reg (inmode, half);
45620 emit_insn (gen_rtx_SET (VOIDmode, e2,
45621 gen_rtx_PLUS (inmode, e1, half)));
45622
45623 /* res = floor(e2) */
45624 if (inmode != XFmode)
45625 {
45626 tmp1 = gen_reg_rtx (XFmode);
45627
45628 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45629 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45630 }
45631 else
45632 tmp1 = e2;
45633
45634 switch (outmode)
45635 {
45636 case SFmode:
45637 case DFmode:
45638 {
45639 rtx tmp0 = gen_reg_rtx (XFmode);
45640
45641 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45642
45643 emit_insn (gen_rtx_SET (VOIDmode, res,
45644 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45645 UNSPEC_TRUNC_NOOP)));
45646 }
45647 break;
45648 case XFmode:
45649 emit_insn (gen_frndintxf2_floor (res, tmp1));
45650 break;
45651 case HImode:
45652 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45653 break;
45654 case SImode:
45655 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45656 break;
45657 case DImode:
45658 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45659 break;
45660 default:
45661 gcc_unreachable ();
45662 }
45663
45664 /* flags = signbit(a) */
45665 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45666
45667 /* if (flags) then res = -res */
45668 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45669 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45670 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45671 pc_rtx);
45672 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45673 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45674 JUMP_LABEL (insn) = jump_label;
45675
45676 emit_insn (gen_neg (res, res));
45677
45678 emit_label (jump_label);
45679 LABEL_NUSES (jump_label) = 1;
45680
45681 emit_move_insn (op0, res);
45682 }
45683
45684 /* Output code to perform a Newton-Rhapson approximation of a single precision
45685 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45686
45687 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45688 {
45689 rtx x0, x1, e0, e1;
45690
45691 x0 = gen_reg_rtx (mode);
45692 e0 = gen_reg_rtx (mode);
45693 e1 = gen_reg_rtx (mode);
45694 x1 = gen_reg_rtx (mode);
45695
45696 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45697
45698 b = force_reg (mode, b);
45699
45700 /* x0 = rcp(b) estimate */
45701 if (mode == V16SFmode || mode == V8DFmode)
45702 emit_insn (gen_rtx_SET (VOIDmode, x0,
45703 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45704 UNSPEC_RCP14)));
45705 else
45706 emit_insn (gen_rtx_SET (VOIDmode, x0,
45707 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45708 UNSPEC_RCP)));
45709
45710 /* e0 = x0 * b */
45711 emit_insn (gen_rtx_SET (VOIDmode, e0,
45712 gen_rtx_MULT (mode, x0, b)));
45713
45714 /* e0 = x0 * e0 */
45715 emit_insn (gen_rtx_SET (VOIDmode, e0,
45716 gen_rtx_MULT (mode, x0, e0)));
45717
45718 /* e1 = x0 + x0 */
45719 emit_insn (gen_rtx_SET (VOIDmode, e1,
45720 gen_rtx_PLUS (mode, x0, x0)));
45721
45722 /* x1 = e1 - e0 */
45723 emit_insn (gen_rtx_SET (VOIDmode, x1,
45724 gen_rtx_MINUS (mode, e1, e0)));
45725
45726 /* res = a * x1 */
45727 emit_insn (gen_rtx_SET (VOIDmode, res,
45728 gen_rtx_MULT (mode, a, x1)));
45729 }
45730
45731 /* Output code to perform a Newton-Rhapson approximation of a
45732 single precision floating point [reciprocal] square root. */
45733
45734 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45735 bool recip)
45736 {
45737 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45738 REAL_VALUE_TYPE r;
45739 int unspec;
45740
45741 x0 = gen_reg_rtx (mode);
45742 e0 = gen_reg_rtx (mode);
45743 e1 = gen_reg_rtx (mode);
45744 e2 = gen_reg_rtx (mode);
45745 e3 = gen_reg_rtx (mode);
45746
45747 real_from_integer (&r, VOIDmode, -3, SIGNED);
45748 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45749
45750 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45751 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45752 unspec = UNSPEC_RSQRT;
45753
45754 if (VECTOR_MODE_P (mode))
45755 {
45756 mthree = ix86_build_const_vector (mode, true, mthree);
45757 mhalf = ix86_build_const_vector (mode, true, mhalf);
45758 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45759 if (GET_MODE_SIZE (mode) == 64)
45760 unspec = UNSPEC_RSQRT14;
45761 }
45762
45763 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45764 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45765
45766 a = force_reg (mode, a);
45767
45768 /* x0 = rsqrt(a) estimate */
45769 emit_insn (gen_rtx_SET (VOIDmode, x0,
45770 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45771 unspec)));
45772
45773 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45774 if (!recip)
45775 {
45776 rtx zero, mask;
45777
45778 zero = gen_reg_rtx (mode);
45779 mask = gen_reg_rtx (mode);
45780
45781 zero = force_reg (mode, CONST0_RTX(mode));
45782
45783 /* Handle masked compare. */
45784 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45785 {
45786 mask = gen_reg_rtx (HImode);
45787 /* Imm value 0x4 corresponds to not-equal comparison. */
45788 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45789 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45790 }
45791 else
45792 {
45793 emit_insn (gen_rtx_SET (VOIDmode, mask,
45794 gen_rtx_NE (mode, zero, a)));
45795
45796 emit_insn (gen_rtx_SET (VOIDmode, x0,
45797 gen_rtx_AND (mode, x0, mask)));
45798 }
45799 }
45800
45801 /* e0 = x0 * a */
45802 emit_insn (gen_rtx_SET (VOIDmode, e0,
45803 gen_rtx_MULT (mode, x0, a)));
45804 /* e1 = e0 * x0 */
45805 emit_insn (gen_rtx_SET (VOIDmode, e1,
45806 gen_rtx_MULT (mode, e0, x0)));
45807
45808 /* e2 = e1 - 3. */
45809 mthree = force_reg (mode, mthree);
45810 emit_insn (gen_rtx_SET (VOIDmode, e2,
45811 gen_rtx_PLUS (mode, e1, mthree)));
45812
45813 mhalf = force_reg (mode, mhalf);
45814 if (recip)
45815 /* e3 = -.5 * x0 */
45816 emit_insn (gen_rtx_SET (VOIDmode, e3,
45817 gen_rtx_MULT (mode, x0, mhalf)));
45818 else
45819 /* e3 = -.5 * e0 */
45820 emit_insn (gen_rtx_SET (VOIDmode, e3,
45821 gen_rtx_MULT (mode, e0, mhalf)));
45822 /* ret = e2 * e3 */
45823 emit_insn (gen_rtx_SET (VOIDmode, res,
45824 gen_rtx_MULT (mode, e2, e3)));
45825 }
45826
45827 #ifdef TARGET_SOLARIS
45828 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45829
45830 static void
45831 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45832 tree decl)
45833 {
45834 /* With Binutils 2.15, the "@unwind" marker must be specified on
45835 every occurrence of the ".eh_frame" section, not just the first
45836 one. */
45837 if (TARGET_64BIT
45838 && strcmp (name, ".eh_frame") == 0)
45839 {
45840 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45841 flags & SECTION_WRITE ? "aw" : "a");
45842 return;
45843 }
45844
45845 #ifndef USE_GAS
45846 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45847 {
45848 solaris_elf_asm_comdat_section (name, flags, decl);
45849 return;
45850 }
45851 #endif
45852
45853 default_elf_asm_named_section (name, flags, decl);
45854 }
45855 #endif /* TARGET_SOLARIS */
45856
45857 /* Return the mangling of TYPE if it is an extended fundamental type. */
45858
45859 static const char *
45860 ix86_mangle_type (const_tree type)
45861 {
45862 type = TYPE_MAIN_VARIANT (type);
45863
45864 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45865 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45866 return NULL;
45867
45868 switch (TYPE_MODE (type))
45869 {
45870 case TFmode:
45871 /* __float128 is "g". */
45872 return "g";
45873 case XFmode:
45874 /* "long double" or __float80 is "e". */
45875 return "e";
45876 default:
45877 return NULL;
45878 }
45879 }
45880
45881 /* For 32-bit code we can save PIC register setup by using
45882 __stack_chk_fail_local hidden function instead of calling
45883 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45884 register, so it is better to call __stack_chk_fail directly. */
45885
45886 static tree ATTRIBUTE_UNUSED
45887 ix86_stack_protect_fail (void)
45888 {
45889 return TARGET_64BIT
45890 ? default_external_stack_protect_fail ()
45891 : default_hidden_stack_protect_fail ();
45892 }
45893
45894 /* Select a format to encode pointers in exception handling data. CODE
45895 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45896 true if the symbol may be affected by dynamic relocations.
45897
45898 ??? All x86 object file formats are capable of representing this.
45899 After all, the relocation needed is the same as for the call insn.
45900 Whether or not a particular assembler allows us to enter such, I
45901 guess we'll have to see. */
45902 int
45903 asm_preferred_eh_data_format (int code, int global)
45904 {
45905 if (flag_pic)
45906 {
45907 int type = DW_EH_PE_sdata8;
45908 if (!TARGET_64BIT
45909 || ix86_cmodel == CM_SMALL_PIC
45910 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45911 type = DW_EH_PE_sdata4;
45912 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45913 }
45914 if (ix86_cmodel == CM_SMALL
45915 || (ix86_cmodel == CM_MEDIUM && code))
45916 return DW_EH_PE_udata4;
45917 return DW_EH_PE_absptr;
45918 }
45919 \f
45920 /* Expand copysign from SIGN to the positive value ABS_VALUE
45921 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45922 the sign-bit. */
45923 static void
45924 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45925 {
45926 machine_mode mode = GET_MODE (sign);
45927 rtx sgn = gen_reg_rtx (mode);
45928 if (mask == NULL_RTX)
45929 {
45930 machine_mode vmode;
45931
45932 if (mode == SFmode)
45933 vmode = V4SFmode;
45934 else if (mode == DFmode)
45935 vmode = V2DFmode;
45936 else
45937 vmode = mode;
45938
45939 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45940 if (!VECTOR_MODE_P (mode))
45941 {
45942 /* We need to generate a scalar mode mask in this case. */
45943 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45944 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45945 mask = gen_reg_rtx (mode);
45946 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45947 }
45948 }
45949 else
45950 mask = gen_rtx_NOT (mode, mask);
45951 emit_insn (gen_rtx_SET (VOIDmode, sgn,
45952 gen_rtx_AND (mode, mask, sign)));
45953 emit_insn (gen_rtx_SET (VOIDmode, result,
45954 gen_rtx_IOR (mode, abs_value, sgn)));
45955 }
45956
45957 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45958 mask for masking out the sign-bit is stored in *SMASK, if that is
45959 non-null. */
45960 static rtx
45961 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45962 {
45963 machine_mode vmode, mode = GET_MODE (op0);
45964 rtx xa, mask;
45965
45966 xa = gen_reg_rtx (mode);
45967 if (mode == SFmode)
45968 vmode = V4SFmode;
45969 else if (mode == DFmode)
45970 vmode = V2DFmode;
45971 else
45972 vmode = mode;
45973 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45974 if (!VECTOR_MODE_P (mode))
45975 {
45976 /* We need to generate a scalar mode mask in this case. */
45977 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45978 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45979 mask = gen_reg_rtx (mode);
45980 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45981 }
45982 emit_insn (gen_rtx_SET (VOIDmode, xa,
45983 gen_rtx_AND (mode, op0, mask)));
45984
45985 if (smask)
45986 *smask = mask;
45987
45988 return xa;
45989 }
45990
45991 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
45992 swapping the operands if SWAP_OPERANDS is true. The expanded
45993 code is a forward jump to a newly created label in case the
45994 comparison is true. The generated label rtx is returned. */
45995 static rtx_code_label *
45996 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
45997 bool swap_operands)
45998 {
45999 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46000 rtx_code_label *label;
46001 rtx tmp;
46002
46003 if (swap_operands)
46004 std::swap (op0, op1);
46005
46006 label = gen_label_rtx ();
46007 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46008 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46009 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46010 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46011 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46012 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46013 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
46014 JUMP_LABEL (tmp) = label;
46015
46016 return label;
46017 }
46018
46019 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46020 using comparison code CODE. Operands are swapped for the comparison if
46021 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46022 static rtx
46023 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46024 bool swap_operands)
46025 {
46026 rtx (*insn)(rtx, rtx, rtx, rtx);
46027 machine_mode mode = GET_MODE (op0);
46028 rtx mask = gen_reg_rtx (mode);
46029
46030 if (swap_operands)
46031 std::swap (op0, op1);
46032
46033 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46034
46035 emit_insn (insn (mask, op0, op1,
46036 gen_rtx_fmt_ee (code, mode, op0, op1)));
46037 return mask;
46038 }
46039
46040 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46041 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46042 static rtx
46043 ix86_gen_TWO52 (machine_mode mode)
46044 {
46045 REAL_VALUE_TYPE TWO52r;
46046 rtx TWO52;
46047
46048 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46049 TWO52 = const_double_from_real_value (TWO52r, mode);
46050 TWO52 = force_reg (mode, TWO52);
46051
46052 return TWO52;
46053 }
46054
46055 /* Expand SSE sequence for computing lround from OP1 storing
46056 into OP0. */
46057 void
46058 ix86_expand_lround (rtx op0, rtx op1)
46059 {
46060 /* C code for the stuff we're doing below:
46061 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46062 return (long)tmp;
46063 */
46064 machine_mode mode = GET_MODE (op1);
46065 const struct real_format *fmt;
46066 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46067 rtx adj;
46068
46069 /* load nextafter (0.5, 0.0) */
46070 fmt = REAL_MODE_FORMAT (mode);
46071 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46072 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46073
46074 /* adj = copysign (0.5, op1) */
46075 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46076 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46077
46078 /* adj = op1 + adj */
46079 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46080
46081 /* op0 = (imode)adj */
46082 expand_fix (op0, adj, 0);
46083 }
46084
46085 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46086 into OPERAND0. */
46087 void
46088 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46089 {
46090 /* C code for the stuff we're doing below (for do_floor):
46091 xi = (long)op1;
46092 xi -= (double)xi > op1 ? 1 : 0;
46093 return xi;
46094 */
46095 machine_mode fmode = GET_MODE (op1);
46096 machine_mode imode = GET_MODE (op0);
46097 rtx ireg, freg, tmp;
46098 rtx_code_label *label;
46099
46100 /* reg = (long)op1 */
46101 ireg = gen_reg_rtx (imode);
46102 expand_fix (ireg, op1, 0);
46103
46104 /* freg = (double)reg */
46105 freg = gen_reg_rtx (fmode);
46106 expand_float (freg, ireg, 0);
46107
46108 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46109 label = ix86_expand_sse_compare_and_jump (UNLE,
46110 freg, op1, !do_floor);
46111 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46112 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46113 emit_move_insn (ireg, tmp);
46114
46115 emit_label (label);
46116 LABEL_NUSES (label) = 1;
46117
46118 emit_move_insn (op0, ireg);
46119 }
46120
46121 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46122 result in OPERAND0. */
46123 void
46124 ix86_expand_rint (rtx operand0, rtx operand1)
46125 {
46126 /* C code for the stuff we're doing below:
46127 xa = fabs (operand1);
46128 if (!isless (xa, 2**52))
46129 return operand1;
46130 xa = xa + 2**52 - 2**52;
46131 return copysign (xa, operand1);
46132 */
46133 machine_mode mode = GET_MODE (operand0);
46134 rtx res, xa, TWO52, mask;
46135 rtx_code_label *label;
46136
46137 res = gen_reg_rtx (mode);
46138 emit_move_insn (res, operand1);
46139
46140 /* xa = abs (operand1) */
46141 xa = ix86_expand_sse_fabs (res, &mask);
46142
46143 /* if (!isless (xa, TWO52)) goto label; */
46144 TWO52 = ix86_gen_TWO52 (mode);
46145 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46146
46147 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46148 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46149
46150 ix86_sse_copysign_to_positive (res, xa, res, mask);
46151
46152 emit_label (label);
46153 LABEL_NUSES (label) = 1;
46154
46155 emit_move_insn (operand0, res);
46156 }
46157
46158 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46159 into OPERAND0. */
46160 void
46161 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46162 {
46163 /* C code for the stuff we expand below.
46164 double xa = fabs (x), x2;
46165 if (!isless (xa, TWO52))
46166 return x;
46167 xa = xa + TWO52 - TWO52;
46168 x2 = copysign (xa, x);
46169 Compensate. Floor:
46170 if (x2 > x)
46171 x2 -= 1;
46172 Compensate. Ceil:
46173 if (x2 < x)
46174 x2 -= -1;
46175 return x2;
46176 */
46177 machine_mode mode = GET_MODE (operand0);
46178 rtx xa, TWO52, tmp, one, res, mask;
46179 rtx_code_label *label;
46180
46181 TWO52 = ix86_gen_TWO52 (mode);
46182
46183 /* Temporary for holding the result, initialized to the input
46184 operand to ease control flow. */
46185 res = gen_reg_rtx (mode);
46186 emit_move_insn (res, operand1);
46187
46188 /* xa = abs (operand1) */
46189 xa = ix86_expand_sse_fabs (res, &mask);
46190
46191 /* if (!isless (xa, TWO52)) goto label; */
46192 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46193
46194 /* xa = xa + TWO52 - TWO52; */
46195 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46196 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46197
46198 /* xa = copysign (xa, operand1) */
46199 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46200
46201 /* generate 1.0 or -1.0 */
46202 one = force_reg (mode,
46203 const_double_from_real_value (do_floor
46204 ? dconst1 : dconstm1, mode));
46205
46206 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46207 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46208 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46209 gen_rtx_AND (mode, one, tmp)));
46210 /* We always need to subtract here to preserve signed zero. */
46211 tmp = expand_simple_binop (mode, MINUS,
46212 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46213 emit_move_insn (res, tmp);
46214
46215 emit_label (label);
46216 LABEL_NUSES (label) = 1;
46217
46218 emit_move_insn (operand0, res);
46219 }
46220
46221 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46222 into OPERAND0. */
46223 void
46224 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46225 {
46226 /* C code for the stuff we expand below.
46227 double xa = fabs (x), x2;
46228 if (!isless (xa, TWO52))
46229 return x;
46230 x2 = (double)(long)x;
46231 Compensate. Floor:
46232 if (x2 > x)
46233 x2 -= 1;
46234 Compensate. Ceil:
46235 if (x2 < x)
46236 x2 += 1;
46237 if (HONOR_SIGNED_ZEROS (mode))
46238 return copysign (x2, x);
46239 return x2;
46240 */
46241 machine_mode mode = GET_MODE (operand0);
46242 rtx xa, xi, TWO52, tmp, one, res, mask;
46243 rtx_code_label *label;
46244
46245 TWO52 = ix86_gen_TWO52 (mode);
46246
46247 /* Temporary for holding the result, initialized to the input
46248 operand to ease control flow. */
46249 res = gen_reg_rtx (mode);
46250 emit_move_insn (res, operand1);
46251
46252 /* xa = abs (operand1) */
46253 xa = ix86_expand_sse_fabs (res, &mask);
46254
46255 /* if (!isless (xa, TWO52)) goto label; */
46256 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46257
46258 /* xa = (double)(long)x */
46259 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46260 expand_fix (xi, res, 0);
46261 expand_float (xa, xi, 0);
46262
46263 /* generate 1.0 */
46264 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46265
46266 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46267 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46268 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46269 gen_rtx_AND (mode, one, tmp)));
46270 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46271 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46272 emit_move_insn (res, tmp);
46273
46274 if (HONOR_SIGNED_ZEROS (mode))
46275 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46276
46277 emit_label (label);
46278 LABEL_NUSES (label) = 1;
46279
46280 emit_move_insn (operand0, res);
46281 }
46282
46283 /* Expand SSE sequence for computing round from OPERAND1 storing
46284 into OPERAND0. Sequence that works without relying on DImode truncation
46285 via cvttsd2siq that is only available on 64bit targets. */
46286 void
46287 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46288 {
46289 /* C code for the stuff we expand below.
46290 double xa = fabs (x), xa2, x2;
46291 if (!isless (xa, TWO52))
46292 return x;
46293 Using the absolute value and copying back sign makes
46294 -0.0 -> -0.0 correct.
46295 xa2 = xa + TWO52 - TWO52;
46296 Compensate.
46297 dxa = xa2 - xa;
46298 if (dxa <= -0.5)
46299 xa2 += 1;
46300 else if (dxa > 0.5)
46301 xa2 -= 1;
46302 x2 = copysign (xa2, x);
46303 return x2;
46304 */
46305 machine_mode mode = GET_MODE (operand0);
46306 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46307 rtx_code_label *label;
46308
46309 TWO52 = ix86_gen_TWO52 (mode);
46310
46311 /* Temporary for holding the result, initialized to the input
46312 operand to ease control flow. */
46313 res = gen_reg_rtx (mode);
46314 emit_move_insn (res, operand1);
46315
46316 /* xa = abs (operand1) */
46317 xa = ix86_expand_sse_fabs (res, &mask);
46318
46319 /* if (!isless (xa, TWO52)) goto label; */
46320 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46321
46322 /* xa2 = xa + TWO52 - TWO52; */
46323 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46324 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46325
46326 /* dxa = xa2 - xa; */
46327 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46328
46329 /* generate 0.5, 1.0 and -0.5 */
46330 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46331 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46332 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46333 0, OPTAB_DIRECT);
46334
46335 /* Compensate. */
46336 tmp = gen_reg_rtx (mode);
46337 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46338 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46339 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46340 gen_rtx_AND (mode, one, tmp)));
46341 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46342 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46343 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46344 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46345 gen_rtx_AND (mode, one, tmp)));
46346 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46347
46348 /* res = copysign (xa2, operand1) */
46349 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46350
46351 emit_label (label);
46352 LABEL_NUSES (label) = 1;
46353
46354 emit_move_insn (operand0, res);
46355 }
46356
46357 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46358 into OPERAND0. */
46359 void
46360 ix86_expand_trunc (rtx operand0, rtx operand1)
46361 {
46362 /* C code for SSE variant we expand below.
46363 double xa = fabs (x), x2;
46364 if (!isless (xa, TWO52))
46365 return x;
46366 x2 = (double)(long)x;
46367 if (HONOR_SIGNED_ZEROS (mode))
46368 return copysign (x2, x);
46369 return x2;
46370 */
46371 machine_mode mode = GET_MODE (operand0);
46372 rtx xa, xi, TWO52, res, mask;
46373 rtx_code_label *label;
46374
46375 TWO52 = ix86_gen_TWO52 (mode);
46376
46377 /* Temporary for holding the result, initialized to the input
46378 operand to ease control flow. */
46379 res = gen_reg_rtx (mode);
46380 emit_move_insn (res, operand1);
46381
46382 /* xa = abs (operand1) */
46383 xa = ix86_expand_sse_fabs (res, &mask);
46384
46385 /* if (!isless (xa, TWO52)) goto label; */
46386 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46387
46388 /* x = (double)(long)x */
46389 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46390 expand_fix (xi, res, 0);
46391 expand_float (res, xi, 0);
46392
46393 if (HONOR_SIGNED_ZEROS (mode))
46394 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46395
46396 emit_label (label);
46397 LABEL_NUSES (label) = 1;
46398
46399 emit_move_insn (operand0, res);
46400 }
46401
46402 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46403 into OPERAND0. */
46404 void
46405 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46406 {
46407 machine_mode mode = GET_MODE (operand0);
46408 rtx xa, mask, TWO52, one, res, smask, tmp;
46409 rtx_code_label *label;
46410
46411 /* C code for SSE variant we expand below.
46412 double xa = fabs (x), x2;
46413 if (!isless (xa, TWO52))
46414 return x;
46415 xa2 = xa + TWO52 - TWO52;
46416 Compensate:
46417 if (xa2 > xa)
46418 xa2 -= 1.0;
46419 x2 = copysign (xa2, x);
46420 return x2;
46421 */
46422
46423 TWO52 = ix86_gen_TWO52 (mode);
46424
46425 /* Temporary for holding the result, initialized to the input
46426 operand to ease control flow. */
46427 res = gen_reg_rtx (mode);
46428 emit_move_insn (res, operand1);
46429
46430 /* xa = abs (operand1) */
46431 xa = ix86_expand_sse_fabs (res, &smask);
46432
46433 /* if (!isless (xa, TWO52)) goto label; */
46434 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46435
46436 /* res = xa + TWO52 - TWO52; */
46437 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46438 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46439 emit_move_insn (res, tmp);
46440
46441 /* generate 1.0 */
46442 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46443
46444 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46445 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46446 emit_insn (gen_rtx_SET (VOIDmode, mask,
46447 gen_rtx_AND (mode, mask, one)));
46448 tmp = expand_simple_binop (mode, MINUS,
46449 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46450 emit_move_insn (res, tmp);
46451
46452 /* res = copysign (res, operand1) */
46453 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46454
46455 emit_label (label);
46456 LABEL_NUSES (label) = 1;
46457
46458 emit_move_insn (operand0, res);
46459 }
46460
46461 /* Expand SSE sequence for computing round from OPERAND1 storing
46462 into OPERAND0. */
46463 void
46464 ix86_expand_round (rtx operand0, rtx operand1)
46465 {
46466 /* C code for the stuff we're doing below:
46467 double xa = fabs (x);
46468 if (!isless (xa, TWO52))
46469 return x;
46470 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46471 return copysign (xa, x);
46472 */
46473 machine_mode mode = GET_MODE (operand0);
46474 rtx res, TWO52, xa, xi, half, mask;
46475 rtx_code_label *label;
46476 const struct real_format *fmt;
46477 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46478
46479 /* Temporary for holding the result, initialized to the input
46480 operand to ease control flow. */
46481 res = gen_reg_rtx (mode);
46482 emit_move_insn (res, operand1);
46483
46484 TWO52 = ix86_gen_TWO52 (mode);
46485 xa = ix86_expand_sse_fabs (res, &mask);
46486 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46487
46488 /* load nextafter (0.5, 0.0) */
46489 fmt = REAL_MODE_FORMAT (mode);
46490 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46491 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46492
46493 /* xa = xa + 0.5 */
46494 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46495 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46496
46497 /* xa = (double)(int64_t)xa */
46498 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46499 expand_fix (xi, xa, 0);
46500 expand_float (xa, xi, 0);
46501
46502 /* res = copysign (xa, operand1) */
46503 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46504
46505 emit_label (label);
46506 LABEL_NUSES (label) = 1;
46507
46508 emit_move_insn (operand0, res);
46509 }
46510
46511 /* Expand SSE sequence for computing round
46512 from OP1 storing into OP0 using sse4 round insn. */
46513 void
46514 ix86_expand_round_sse4 (rtx op0, rtx op1)
46515 {
46516 machine_mode mode = GET_MODE (op0);
46517 rtx e1, e2, res, half;
46518 const struct real_format *fmt;
46519 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46520 rtx (*gen_copysign) (rtx, rtx, rtx);
46521 rtx (*gen_round) (rtx, rtx, rtx);
46522
46523 switch (mode)
46524 {
46525 case SFmode:
46526 gen_copysign = gen_copysignsf3;
46527 gen_round = gen_sse4_1_roundsf2;
46528 break;
46529 case DFmode:
46530 gen_copysign = gen_copysigndf3;
46531 gen_round = gen_sse4_1_rounddf2;
46532 break;
46533 default:
46534 gcc_unreachable ();
46535 }
46536
46537 /* round (a) = trunc (a + copysign (0.5, a)) */
46538
46539 /* load nextafter (0.5, 0.0) */
46540 fmt = REAL_MODE_FORMAT (mode);
46541 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46542 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46543 half = const_double_from_real_value (pred_half, mode);
46544
46545 /* e1 = copysign (0.5, op1) */
46546 e1 = gen_reg_rtx (mode);
46547 emit_insn (gen_copysign (e1, half, op1));
46548
46549 /* e2 = op1 + e1 */
46550 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46551
46552 /* res = trunc (e2) */
46553 res = gen_reg_rtx (mode);
46554 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46555
46556 emit_move_insn (op0, res);
46557 }
46558 \f
46559
46560 /* Table of valid machine attributes. */
46561 static const struct attribute_spec ix86_attribute_table[] =
46562 {
46563 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46564 affects_type_identity } */
46565 /* Stdcall attribute says callee is responsible for popping arguments
46566 if they are not variable. */
46567 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46568 true },
46569 /* Fastcall attribute says callee is responsible for popping arguments
46570 if they are not variable. */
46571 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46572 true },
46573 /* Thiscall attribute says callee is responsible for popping arguments
46574 if they are not variable. */
46575 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46576 true },
46577 /* Cdecl attribute says the callee is a normal C declaration */
46578 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46579 true },
46580 /* Regparm attribute specifies how many integer arguments are to be
46581 passed in registers. */
46582 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46583 true },
46584 /* Sseregparm attribute says we are using x86_64 calling conventions
46585 for FP arguments. */
46586 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46587 true },
46588 /* The transactional memory builtins are implicitly regparm or fastcall
46589 depending on the ABI. Override the generic do-nothing attribute that
46590 these builtins were declared with. */
46591 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46592 true },
46593 /* force_align_arg_pointer says this function realigns the stack at entry. */
46594 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46595 false, true, true, ix86_handle_cconv_attribute, false },
46596 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46597 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46598 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46599 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46600 false },
46601 #endif
46602 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46603 false },
46604 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46605 false },
46606 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46607 SUBTARGET_ATTRIBUTE_TABLE,
46608 #endif
46609 /* ms_abi and sysv_abi calling convention function attributes. */
46610 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46611 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46612 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46613 false },
46614 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46615 ix86_handle_callee_pop_aggregate_return, true },
46616 /* End element. */
46617 { NULL, 0, 0, false, false, false, NULL, false }
46618 };
46619
46620 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46621 static int
46622 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46623 tree vectype, int)
46624 {
46625 unsigned elements;
46626
46627 switch (type_of_cost)
46628 {
46629 case scalar_stmt:
46630 return ix86_cost->scalar_stmt_cost;
46631
46632 case scalar_load:
46633 return ix86_cost->scalar_load_cost;
46634
46635 case scalar_store:
46636 return ix86_cost->scalar_store_cost;
46637
46638 case vector_stmt:
46639 return ix86_cost->vec_stmt_cost;
46640
46641 case vector_load:
46642 return ix86_cost->vec_align_load_cost;
46643
46644 case vector_store:
46645 return ix86_cost->vec_store_cost;
46646
46647 case vec_to_scalar:
46648 return ix86_cost->vec_to_scalar_cost;
46649
46650 case scalar_to_vec:
46651 return ix86_cost->scalar_to_vec_cost;
46652
46653 case unaligned_load:
46654 case unaligned_store:
46655 return ix86_cost->vec_unalign_load_cost;
46656
46657 case cond_branch_taken:
46658 return ix86_cost->cond_taken_branch_cost;
46659
46660 case cond_branch_not_taken:
46661 return ix86_cost->cond_not_taken_branch_cost;
46662
46663 case vec_perm:
46664 case vec_promote_demote:
46665 return ix86_cost->vec_stmt_cost;
46666
46667 case vec_construct:
46668 elements = TYPE_VECTOR_SUBPARTS (vectype);
46669 return elements / 2 + 1;
46670
46671 default:
46672 gcc_unreachable ();
46673 }
46674 }
46675
46676 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46677 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46678 insn every time. */
46679
46680 static GTY(()) rtx_insn *vselect_insn;
46681
46682 /* Initialize vselect_insn. */
46683
46684 static void
46685 init_vselect_insn (void)
46686 {
46687 unsigned i;
46688 rtx x;
46689
46690 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46691 for (i = 0; i < MAX_VECT_LEN; ++i)
46692 XVECEXP (x, 0, i) = const0_rtx;
46693 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46694 const0_rtx), x);
46695 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46696 start_sequence ();
46697 vselect_insn = emit_insn (x);
46698 end_sequence ();
46699 }
46700
46701 /* Construct (set target (vec_select op0 (parallel perm))) and
46702 return true if that's a valid instruction in the active ISA. */
46703
46704 static bool
46705 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46706 unsigned nelt, bool testing_p)
46707 {
46708 unsigned int i;
46709 rtx x, save_vconcat;
46710 int icode;
46711
46712 if (vselect_insn == NULL_RTX)
46713 init_vselect_insn ();
46714
46715 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46716 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46717 for (i = 0; i < nelt; ++i)
46718 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46719 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46720 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46721 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46722 SET_DEST (PATTERN (vselect_insn)) = target;
46723 icode = recog_memoized (vselect_insn);
46724
46725 if (icode >= 0 && !testing_p)
46726 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46727
46728 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46729 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46730 INSN_CODE (vselect_insn) = -1;
46731
46732 return icode >= 0;
46733 }
46734
46735 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46736
46737 static bool
46738 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46739 const unsigned char *perm, unsigned nelt,
46740 bool testing_p)
46741 {
46742 machine_mode v2mode;
46743 rtx x;
46744 bool ok;
46745
46746 if (vselect_insn == NULL_RTX)
46747 init_vselect_insn ();
46748
46749 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46750 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46751 PUT_MODE (x, v2mode);
46752 XEXP (x, 0) = op0;
46753 XEXP (x, 1) = op1;
46754 ok = expand_vselect (target, x, perm, nelt, testing_p);
46755 XEXP (x, 0) = const0_rtx;
46756 XEXP (x, 1) = const0_rtx;
46757 return ok;
46758 }
46759
46760 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46761 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46762
46763 static bool
46764 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46765 {
46766 machine_mode vmode = d->vmode;
46767 unsigned i, mask, nelt = d->nelt;
46768 rtx target, op0, op1, x;
46769 rtx rperm[32], vperm;
46770
46771 if (d->one_operand_p)
46772 return false;
46773 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46774 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46775 ;
46776 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46777 ;
46778 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46779 ;
46780 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46781 ;
46782 else
46783 return false;
46784
46785 /* This is a blend, not a permute. Elements must stay in their
46786 respective lanes. */
46787 for (i = 0; i < nelt; ++i)
46788 {
46789 unsigned e = d->perm[i];
46790 if (!(e == i || e == i + nelt))
46791 return false;
46792 }
46793
46794 if (d->testing_p)
46795 return true;
46796
46797 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46798 decision should be extracted elsewhere, so that we only try that
46799 sequence once all budget==3 options have been tried. */
46800 target = d->target;
46801 op0 = d->op0;
46802 op1 = d->op1;
46803 mask = 0;
46804
46805 switch (vmode)
46806 {
46807 case V8DFmode:
46808 case V16SFmode:
46809 case V4DFmode:
46810 case V8SFmode:
46811 case V2DFmode:
46812 case V4SFmode:
46813 case V8HImode:
46814 case V8SImode:
46815 case V32HImode:
46816 case V64QImode:
46817 case V16SImode:
46818 case V8DImode:
46819 for (i = 0; i < nelt; ++i)
46820 mask |= (d->perm[i] >= nelt) << i;
46821 break;
46822
46823 case V2DImode:
46824 for (i = 0; i < 2; ++i)
46825 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46826 vmode = V8HImode;
46827 goto do_subreg;
46828
46829 case V4SImode:
46830 for (i = 0; i < 4; ++i)
46831 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46832 vmode = V8HImode;
46833 goto do_subreg;
46834
46835 case V16QImode:
46836 /* See if bytes move in pairs so we can use pblendw with
46837 an immediate argument, rather than pblendvb with a vector
46838 argument. */
46839 for (i = 0; i < 16; i += 2)
46840 if (d->perm[i] + 1 != d->perm[i + 1])
46841 {
46842 use_pblendvb:
46843 for (i = 0; i < nelt; ++i)
46844 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46845
46846 finish_pblendvb:
46847 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46848 vperm = force_reg (vmode, vperm);
46849
46850 if (GET_MODE_SIZE (vmode) == 16)
46851 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46852 else
46853 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46854 if (target != d->target)
46855 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46856 return true;
46857 }
46858
46859 for (i = 0; i < 8; ++i)
46860 mask |= (d->perm[i * 2] >= 16) << i;
46861 vmode = V8HImode;
46862 /* FALLTHRU */
46863
46864 do_subreg:
46865 target = gen_reg_rtx (vmode);
46866 op0 = gen_lowpart (vmode, op0);
46867 op1 = gen_lowpart (vmode, op1);
46868 break;
46869
46870 case V32QImode:
46871 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46872 for (i = 0; i < 32; i += 2)
46873 if (d->perm[i] + 1 != d->perm[i + 1])
46874 goto use_pblendvb;
46875 /* See if bytes move in quadruplets. If yes, vpblendd
46876 with immediate can be used. */
46877 for (i = 0; i < 32; i += 4)
46878 if (d->perm[i] + 2 != d->perm[i + 2])
46879 break;
46880 if (i < 32)
46881 {
46882 /* See if bytes move the same in both lanes. If yes,
46883 vpblendw with immediate can be used. */
46884 for (i = 0; i < 16; i += 2)
46885 if (d->perm[i] + 16 != d->perm[i + 16])
46886 goto use_pblendvb;
46887
46888 /* Use vpblendw. */
46889 for (i = 0; i < 16; ++i)
46890 mask |= (d->perm[i * 2] >= 32) << i;
46891 vmode = V16HImode;
46892 goto do_subreg;
46893 }
46894
46895 /* Use vpblendd. */
46896 for (i = 0; i < 8; ++i)
46897 mask |= (d->perm[i * 4] >= 32) << i;
46898 vmode = V8SImode;
46899 goto do_subreg;
46900
46901 case V16HImode:
46902 /* See if words move in pairs. If yes, vpblendd can be used. */
46903 for (i = 0; i < 16; i += 2)
46904 if (d->perm[i] + 1 != d->perm[i + 1])
46905 break;
46906 if (i < 16)
46907 {
46908 /* See if words move the same in both lanes. If not,
46909 vpblendvb must be used. */
46910 for (i = 0; i < 8; i++)
46911 if (d->perm[i] + 8 != d->perm[i + 8])
46912 {
46913 /* Use vpblendvb. */
46914 for (i = 0; i < 32; ++i)
46915 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46916
46917 vmode = V32QImode;
46918 nelt = 32;
46919 target = gen_reg_rtx (vmode);
46920 op0 = gen_lowpart (vmode, op0);
46921 op1 = gen_lowpart (vmode, op1);
46922 goto finish_pblendvb;
46923 }
46924
46925 /* Use vpblendw. */
46926 for (i = 0; i < 16; ++i)
46927 mask |= (d->perm[i] >= 16) << i;
46928 break;
46929 }
46930
46931 /* Use vpblendd. */
46932 for (i = 0; i < 8; ++i)
46933 mask |= (d->perm[i * 2] >= 16) << i;
46934 vmode = V8SImode;
46935 goto do_subreg;
46936
46937 case V4DImode:
46938 /* Use vpblendd. */
46939 for (i = 0; i < 4; ++i)
46940 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46941 vmode = V8SImode;
46942 goto do_subreg;
46943
46944 default:
46945 gcc_unreachable ();
46946 }
46947
46948 /* This matches five different patterns with the different modes. */
46949 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
46950 x = gen_rtx_SET (VOIDmode, target, x);
46951 emit_insn (x);
46952 if (target != d->target)
46953 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46954
46955 return true;
46956 }
46957
46958 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46959 in terms of the variable form of vpermilps.
46960
46961 Note that we will have already failed the immediate input vpermilps,
46962 which requires that the high and low part shuffle be identical; the
46963 variable form doesn't require that. */
46964
46965 static bool
46966 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46967 {
46968 rtx rperm[8], vperm;
46969 unsigned i;
46970
46971 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46972 return false;
46973
46974 /* We can only permute within the 128-bit lane. */
46975 for (i = 0; i < 8; ++i)
46976 {
46977 unsigned e = d->perm[i];
46978 if (i < 4 ? e >= 4 : e < 4)
46979 return false;
46980 }
46981
46982 if (d->testing_p)
46983 return true;
46984
46985 for (i = 0; i < 8; ++i)
46986 {
46987 unsigned e = d->perm[i];
46988
46989 /* Within each 128-bit lane, the elements of op0 are numbered
46990 from 0 and the elements of op1 are numbered from 4. */
46991 if (e >= 8 + 4)
46992 e -= 8;
46993 else if (e >= 4)
46994 e -= 4;
46995
46996 rperm[i] = GEN_INT (e);
46997 }
46998
46999 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47000 vperm = force_reg (V8SImode, vperm);
47001 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47002
47003 return true;
47004 }
47005
47006 /* Return true if permutation D can be performed as VMODE permutation
47007 instead. */
47008
47009 static bool
47010 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47011 {
47012 unsigned int i, j, chunk;
47013
47014 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47015 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47016 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47017 return false;
47018
47019 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47020 return true;
47021
47022 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47023 for (i = 0; i < d->nelt; i += chunk)
47024 if (d->perm[i] & (chunk - 1))
47025 return false;
47026 else
47027 for (j = 1; j < chunk; ++j)
47028 if (d->perm[i] + j != d->perm[i + j])
47029 return false;
47030
47031 return true;
47032 }
47033
47034 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47035 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47036
47037 static bool
47038 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47039 {
47040 unsigned i, nelt, eltsz, mask;
47041 unsigned char perm[64];
47042 machine_mode vmode = V16QImode;
47043 rtx rperm[64], vperm, target, op0, op1;
47044
47045 nelt = d->nelt;
47046
47047 if (!d->one_operand_p)
47048 {
47049 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47050 {
47051 if (TARGET_AVX2
47052 && valid_perm_using_mode_p (V2TImode, d))
47053 {
47054 if (d->testing_p)
47055 return true;
47056
47057 /* Use vperm2i128 insn. The pattern uses
47058 V4DImode instead of V2TImode. */
47059 target = d->target;
47060 if (d->vmode != V4DImode)
47061 target = gen_reg_rtx (V4DImode);
47062 op0 = gen_lowpart (V4DImode, d->op0);
47063 op1 = gen_lowpart (V4DImode, d->op1);
47064 rperm[0]
47065 = GEN_INT ((d->perm[0] / (nelt / 2))
47066 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47067 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47068 if (target != d->target)
47069 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47070 return true;
47071 }
47072 return false;
47073 }
47074 }
47075 else
47076 {
47077 if (GET_MODE_SIZE (d->vmode) == 16)
47078 {
47079 if (!TARGET_SSSE3)
47080 return false;
47081 }
47082 else if (GET_MODE_SIZE (d->vmode) == 32)
47083 {
47084 if (!TARGET_AVX2)
47085 return false;
47086
47087 /* V4DImode should be already handled through
47088 expand_vselect by vpermq instruction. */
47089 gcc_assert (d->vmode != V4DImode);
47090
47091 vmode = V32QImode;
47092 if (d->vmode == V8SImode
47093 || d->vmode == V16HImode
47094 || d->vmode == V32QImode)
47095 {
47096 /* First see if vpermq can be used for
47097 V8SImode/V16HImode/V32QImode. */
47098 if (valid_perm_using_mode_p (V4DImode, d))
47099 {
47100 for (i = 0; i < 4; i++)
47101 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47102 if (d->testing_p)
47103 return true;
47104 target = gen_reg_rtx (V4DImode);
47105 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47106 perm, 4, false))
47107 {
47108 emit_move_insn (d->target,
47109 gen_lowpart (d->vmode, target));
47110 return true;
47111 }
47112 return false;
47113 }
47114
47115 /* Next see if vpermd can be used. */
47116 if (valid_perm_using_mode_p (V8SImode, d))
47117 vmode = V8SImode;
47118 }
47119 /* Or if vpermps can be used. */
47120 else if (d->vmode == V8SFmode)
47121 vmode = V8SImode;
47122
47123 if (vmode == V32QImode)
47124 {
47125 /* vpshufb only works intra lanes, it is not
47126 possible to shuffle bytes in between the lanes. */
47127 for (i = 0; i < nelt; ++i)
47128 if ((d->perm[i] ^ i) & (nelt / 2))
47129 return false;
47130 }
47131 }
47132 else if (GET_MODE_SIZE (d->vmode) == 64)
47133 {
47134 if (!TARGET_AVX512BW)
47135 return false;
47136
47137 /* If vpermq didn't work, vpshufb won't work either. */
47138 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47139 return false;
47140
47141 vmode = V64QImode;
47142 if (d->vmode == V16SImode
47143 || d->vmode == V32HImode
47144 || d->vmode == V64QImode)
47145 {
47146 /* First see if vpermq can be used for
47147 V16SImode/V32HImode/V64QImode. */
47148 if (valid_perm_using_mode_p (V8DImode, d))
47149 {
47150 for (i = 0; i < 8; i++)
47151 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47152 if (d->testing_p)
47153 return true;
47154 target = gen_reg_rtx (V8DImode);
47155 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47156 perm, 8, false))
47157 {
47158 emit_move_insn (d->target,
47159 gen_lowpart (d->vmode, target));
47160 return true;
47161 }
47162 return false;
47163 }
47164
47165 /* Next see if vpermd can be used. */
47166 if (valid_perm_using_mode_p (V16SImode, d))
47167 vmode = V16SImode;
47168 }
47169 /* Or if vpermps can be used. */
47170 else if (d->vmode == V16SFmode)
47171 vmode = V16SImode;
47172 if (vmode == V64QImode)
47173 {
47174 /* vpshufb only works intra lanes, it is not
47175 possible to shuffle bytes in between the lanes. */
47176 for (i = 0; i < nelt; ++i)
47177 if ((d->perm[i] ^ i) & (nelt / 4))
47178 return false;
47179 }
47180 }
47181 else
47182 return false;
47183 }
47184
47185 if (d->testing_p)
47186 return true;
47187
47188 if (vmode == V8SImode)
47189 for (i = 0; i < 8; ++i)
47190 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47191 else if (vmode == V16SImode)
47192 for (i = 0; i < 16; ++i)
47193 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47194 else
47195 {
47196 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47197 if (!d->one_operand_p)
47198 mask = 2 * nelt - 1;
47199 else if (vmode == V16QImode)
47200 mask = nelt - 1;
47201 else if (vmode == V64QImode)
47202 mask = nelt / 4 - 1;
47203 else
47204 mask = nelt / 2 - 1;
47205
47206 for (i = 0; i < nelt; ++i)
47207 {
47208 unsigned j, e = d->perm[i] & mask;
47209 for (j = 0; j < eltsz; ++j)
47210 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47211 }
47212 }
47213
47214 vperm = gen_rtx_CONST_VECTOR (vmode,
47215 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47216 vperm = force_reg (vmode, vperm);
47217
47218 target = d->target;
47219 if (d->vmode != vmode)
47220 target = gen_reg_rtx (vmode);
47221 op0 = gen_lowpart (vmode, d->op0);
47222 if (d->one_operand_p)
47223 {
47224 if (vmode == V16QImode)
47225 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47226 else if (vmode == V32QImode)
47227 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47228 else if (vmode == V64QImode)
47229 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47230 else if (vmode == V8SFmode)
47231 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47232 else if (vmode == V8SImode)
47233 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47234 else if (vmode == V16SFmode)
47235 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47236 else if (vmode == V16SImode)
47237 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47238 else
47239 gcc_unreachable ();
47240 }
47241 else
47242 {
47243 op1 = gen_lowpart (vmode, d->op1);
47244 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47245 }
47246 if (target != d->target)
47247 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47248
47249 return true;
47250 }
47251
47252 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47253 in a single instruction. */
47254
47255 static bool
47256 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47257 {
47258 unsigned i, nelt = d->nelt;
47259 unsigned char perm2[MAX_VECT_LEN];
47260
47261 /* Check plain VEC_SELECT first, because AVX has instructions that could
47262 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47263 input where SEL+CONCAT may not. */
47264 if (d->one_operand_p)
47265 {
47266 int mask = nelt - 1;
47267 bool identity_perm = true;
47268 bool broadcast_perm = true;
47269
47270 for (i = 0; i < nelt; i++)
47271 {
47272 perm2[i] = d->perm[i] & mask;
47273 if (perm2[i] != i)
47274 identity_perm = false;
47275 if (perm2[i])
47276 broadcast_perm = false;
47277 }
47278
47279 if (identity_perm)
47280 {
47281 if (!d->testing_p)
47282 emit_move_insn (d->target, d->op0);
47283 return true;
47284 }
47285 else if (broadcast_perm && TARGET_AVX2)
47286 {
47287 /* Use vpbroadcast{b,w,d}. */
47288 rtx (*gen) (rtx, rtx) = NULL;
47289 switch (d->vmode)
47290 {
47291 case V64QImode:
47292 if (TARGET_AVX512BW)
47293 gen = gen_avx512bw_vec_dupv64qi_1;
47294 break;
47295 case V32QImode:
47296 gen = gen_avx2_pbroadcastv32qi_1;
47297 break;
47298 case V32HImode:
47299 if (TARGET_AVX512BW)
47300 gen = gen_avx512bw_vec_dupv32hi_1;
47301 break;
47302 case V16HImode:
47303 gen = gen_avx2_pbroadcastv16hi_1;
47304 break;
47305 case V16SImode:
47306 if (TARGET_AVX512F)
47307 gen = gen_avx512f_vec_dupv16si_1;
47308 break;
47309 case V8SImode:
47310 gen = gen_avx2_pbroadcastv8si_1;
47311 break;
47312 case V16QImode:
47313 gen = gen_avx2_pbroadcastv16qi;
47314 break;
47315 case V8HImode:
47316 gen = gen_avx2_pbroadcastv8hi;
47317 break;
47318 case V16SFmode:
47319 if (TARGET_AVX512F)
47320 gen = gen_avx512f_vec_dupv16sf_1;
47321 break;
47322 case V8SFmode:
47323 gen = gen_avx2_vec_dupv8sf_1;
47324 break;
47325 case V8DFmode:
47326 if (TARGET_AVX512F)
47327 gen = gen_avx512f_vec_dupv8df_1;
47328 break;
47329 case V8DImode:
47330 if (TARGET_AVX512F)
47331 gen = gen_avx512f_vec_dupv8di_1;
47332 break;
47333 /* For other modes prefer other shuffles this function creates. */
47334 default: break;
47335 }
47336 if (gen != NULL)
47337 {
47338 if (!d->testing_p)
47339 emit_insn (gen (d->target, d->op0));
47340 return true;
47341 }
47342 }
47343
47344 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47345 return true;
47346
47347 /* There are plenty of patterns in sse.md that are written for
47348 SEL+CONCAT and are not replicated for a single op. Perhaps
47349 that should be changed, to avoid the nastiness here. */
47350
47351 /* Recognize interleave style patterns, which means incrementing
47352 every other permutation operand. */
47353 for (i = 0; i < nelt; i += 2)
47354 {
47355 perm2[i] = d->perm[i] & mask;
47356 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47357 }
47358 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47359 d->testing_p))
47360 return true;
47361
47362 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47363 if (nelt >= 4)
47364 {
47365 for (i = 0; i < nelt; i += 4)
47366 {
47367 perm2[i + 0] = d->perm[i + 0] & mask;
47368 perm2[i + 1] = d->perm[i + 1] & mask;
47369 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47370 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47371 }
47372
47373 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47374 d->testing_p))
47375 return true;
47376 }
47377 }
47378
47379 /* Finally, try the fully general two operand permute. */
47380 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47381 d->testing_p))
47382 return true;
47383
47384 /* Recognize interleave style patterns with reversed operands. */
47385 if (!d->one_operand_p)
47386 {
47387 for (i = 0; i < nelt; ++i)
47388 {
47389 unsigned e = d->perm[i];
47390 if (e >= nelt)
47391 e -= nelt;
47392 else
47393 e += nelt;
47394 perm2[i] = e;
47395 }
47396
47397 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47398 d->testing_p))
47399 return true;
47400 }
47401
47402 /* Try the SSE4.1 blend variable merge instructions. */
47403 if (expand_vec_perm_blend (d))
47404 return true;
47405
47406 /* Try one of the AVX vpermil variable permutations. */
47407 if (expand_vec_perm_vpermil (d))
47408 return true;
47409
47410 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47411 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47412 if (expand_vec_perm_pshufb (d))
47413 return true;
47414
47415 /* Try the AVX2 vpalignr instruction. */
47416 if (expand_vec_perm_palignr (d, true))
47417 return true;
47418
47419 /* Try the AVX512F vpermi2 instructions. */
47420 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47421 return true;
47422
47423 return false;
47424 }
47425
47426 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47427 in terms of a pair of pshuflw + pshufhw instructions. */
47428
47429 static bool
47430 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47431 {
47432 unsigned char perm2[MAX_VECT_LEN];
47433 unsigned i;
47434 bool ok;
47435
47436 if (d->vmode != V8HImode || !d->one_operand_p)
47437 return false;
47438
47439 /* The two permutations only operate in 64-bit lanes. */
47440 for (i = 0; i < 4; ++i)
47441 if (d->perm[i] >= 4)
47442 return false;
47443 for (i = 4; i < 8; ++i)
47444 if (d->perm[i] < 4)
47445 return false;
47446
47447 if (d->testing_p)
47448 return true;
47449
47450 /* Emit the pshuflw. */
47451 memcpy (perm2, d->perm, 4);
47452 for (i = 4; i < 8; ++i)
47453 perm2[i] = i;
47454 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47455 gcc_assert (ok);
47456
47457 /* Emit the pshufhw. */
47458 memcpy (perm2 + 4, d->perm + 4, 4);
47459 for (i = 0; i < 4; ++i)
47460 perm2[i] = i;
47461 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47462 gcc_assert (ok);
47463
47464 return true;
47465 }
47466
47467 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47468 the permutation using the SSSE3 palignr instruction. This succeeds
47469 when all of the elements in PERM fit within one vector and we merely
47470 need to shift them down so that a single vector permutation has a
47471 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47472 the vpalignr instruction itself can perform the requested permutation. */
47473
47474 static bool
47475 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47476 {
47477 unsigned i, nelt = d->nelt;
47478 unsigned min, max, minswap, maxswap;
47479 bool in_order, ok, swap = false;
47480 rtx shift, target;
47481 struct expand_vec_perm_d dcopy;
47482
47483 /* Even with AVX, palignr only operates on 128-bit vectors,
47484 in AVX2 palignr operates on both 128-bit lanes. */
47485 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47486 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47487 return false;
47488
47489 min = 2 * nelt;
47490 max = 0;
47491 minswap = 2 * nelt;
47492 maxswap = 0;
47493 for (i = 0; i < nelt; ++i)
47494 {
47495 unsigned e = d->perm[i];
47496 unsigned eswap = d->perm[i] ^ nelt;
47497 if (GET_MODE_SIZE (d->vmode) == 32)
47498 {
47499 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47500 eswap = e ^ (nelt / 2);
47501 }
47502 if (e < min)
47503 min = e;
47504 if (e > max)
47505 max = e;
47506 if (eswap < minswap)
47507 minswap = eswap;
47508 if (eswap > maxswap)
47509 maxswap = eswap;
47510 }
47511 if (min == 0
47512 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47513 {
47514 if (d->one_operand_p
47515 || minswap == 0
47516 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47517 ? nelt / 2 : nelt))
47518 return false;
47519 swap = true;
47520 min = minswap;
47521 max = maxswap;
47522 }
47523
47524 /* Given that we have SSSE3, we know we'll be able to implement the
47525 single operand permutation after the palignr with pshufb for
47526 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47527 first. */
47528 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47529 return true;
47530
47531 dcopy = *d;
47532 if (swap)
47533 {
47534 dcopy.op0 = d->op1;
47535 dcopy.op1 = d->op0;
47536 for (i = 0; i < nelt; ++i)
47537 dcopy.perm[i] ^= nelt;
47538 }
47539
47540 in_order = true;
47541 for (i = 0; i < nelt; ++i)
47542 {
47543 unsigned e = dcopy.perm[i];
47544 if (GET_MODE_SIZE (d->vmode) == 32
47545 && e >= nelt
47546 && (e & (nelt / 2 - 1)) < min)
47547 e = e - min - (nelt / 2);
47548 else
47549 e = e - min;
47550 if (e != i)
47551 in_order = false;
47552 dcopy.perm[i] = e;
47553 }
47554 dcopy.one_operand_p = true;
47555
47556 if (single_insn_only_p && !in_order)
47557 return false;
47558
47559 /* For AVX2, test whether we can permute the result in one instruction. */
47560 if (d->testing_p)
47561 {
47562 if (in_order)
47563 return true;
47564 dcopy.op1 = dcopy.op0;
47565 return expand_vec_perm_1 (&dcopy);
47566 }
47567
47568 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47569 if (GET_MODE_SIZE (d->vmode) == 16)
47570 {
47571 target = gen_reg_rtx (TImode);
47572 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47573 gen_lowpart (TImode, dcopy.op0), shift));
47574 }
47575 else
47576 {
47577 target = gen_reg_rtx (V2TImode);
47578 emit_insn (gen_avx2_palignrv2ti (target,
47579 gen_lowpart (V2TImode, dcopy.op1),
47580 gen_lowpart (V2TImode, dcopy.op0),
47581 shift));
47582 }
47583
47584 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47585
47586 /* Test for the degenerate case where the alignment by itself
47587 produces the desired permutation. */
47588 if (in_order)
47589 {
47590 emit_move_insn (d->target, dcopy.op0);
47591 return true;
47592 }
47593
47594 ok = expand_vec_perm_1 (&dcopy);
47595 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47596
47597 return ok;
47598 }
47599
47600 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47601 the permutation using the SSE4_1 pblendv instruction. Potentially
47602 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47603
47604 static bool
47605 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47606 {
47607 unsigned i, which, nelt = d->nelt;
47608 struct expand_vec_perm_d dcopy, dcopy1;
47609 machine_mode vmode = d->vmode;
47610 bool ok;
47611
47612 /* Use the same checks as in expand_vec_perm_blend. */
47613 if (d->one_operand_p)
47614 return false;
47615 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47616 ;
47617 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47618 ;
47619 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47620 ;
47621 else
47622 return false;
47623
47624 /* Figure out where permutation elements stay not in their
47625 respective lanes. */
47626 for (i = 0, which = 0; i < nelt; ++i)
47627 {
47628 unsigned e = d->perm[i];
47629 if (e != i)
47630 which |= (e < nelt ? 1 : 2);
47631 }
47632 /* We can pblend the part where elements stay not in their
47633 respective lanes only when these elements are all in one
47634 half of a permutation.
47635 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47636 lanes, but both 8 and 9 >= 8
47637 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47638 respective lanes and 8 >= 8, but 2 not. */
47639 if (which != 1 && which != 2)
47640 return false;
47641 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47642 return true;
47643
47644 /* First we apply one operand permutation to the part where
47645 elements stay not in their respective lanes. */
47646 dcopy = *d;
47647 if (which == 2)
47648 dcopy.op0 = dcopy.op1 = d->op1;
47649 else
47650 dcopy.op0 = dcopy.op1 = d->op0;
47651 if (!d->testing_p)
47652 dcopy.target = gen_reg_rtx (vmode);
47653 dcopy.one_operand_p = true;
47654
47655 for (i = 0; i < nelt; ++i)
47656 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47657
47658 ok = expand_vec_perm_1 (&dcopy);
47659 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47660 return false;
47661 else
47662 gcc_assert (ok);
47663 if (d->testing_p)
47664 return true;
47665
47666 /* Next we put permuted elements into their positions. */
47667 dcopy1 = *d;
47668 if (which == 2)
47669 dcopy1.op1 = dcopy.target;
47670 else
47671 dcopy1.op0 = dcopy.target;
47672
47673 for (i = 0; i < nelt; ++i)
47674 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47675
47676 ok = expand_vec_perm_blend (&dcopy1);
47677 gcc_assert (ok);
47678
47679 return true;
47680 }
47681
47682 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47683
47684 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47685 a two vector permutation into a single vector permutation by using
47686 an interleave operation to merge the vectors. */
47687
47688 static bool
47689 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47690 {
47691 struct expand_vec_perm_d dremap, dfinal;
47692 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47693 unsigned HOST_WIDE_INT contents;
47694 unsigned char remap[2 * MAX_VECT_LEN];
47695 rtx_insn *seq;
47696 bool ok, same_halves = false;
47697
47698 if (GET_MODE_SIZE (d->vmode) == 16)
47699 {
47700 if (d->one_operand_p)
47701 return false;
47702 }
47703 else if (GET_MODE_SIZE (d->vmode) == 32)
47704 {
47705 if (!TARGET_AVX)
47706 return false;
47707 /* For 32-byte modes allow even d->one_operand_p.
47708 The lack of cross-lane shuffling in some instructions
47709 might prevent a single insn shuffle. */
47710 dfinal = *d;
47711 dfinal.testing_p = true;
47712 /* If expand_vec_perm_interleave3 can expand this into
47713 a 3 insn sequence, give up and let it be expanded as
47714 3 insn sequence. While that is one insn longer,
47715 it doesn't need a memory operand and in the common
47716 case that both interleave low and high permutations
47717 with the same operands are adjacent needs 4 insns
47718 for both after CSE. */
47719 if (expand_vec_perm_interleave3 (&dfinal))
47720 return false;
47721 }
47722 else
47723 return false;
47724
47725 /* Examine from whence the elements come. */
47726 contents = 0;
47727 for (i = 0; i < nelt; ++i)
47728 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
47729
47730 memset (remap, 0xff, sizeof (remap));
47731 dremap = *d;
47732
47733 if (GET_MODE_SIZE (d->vmode) == 16)
47734 {
47735 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47736
47737 /* Split the two input vectors into 4 halves. */
47738 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
47739 h2 = h1 << nelt2;
47740 h3 = h2 << nelt2;
47741 h4 = h3 << nelt2;
47742
47743 /* If the elements from the low halves use interleave low, and similarly
47744 for interleave high. If the elements are from mis-matched halves, we
47745 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47746 if ((contents & (h1 | h3)) == contents)
47747 {
47748 /* punpckl* */
47749 for (i = 0; i < nelt2; ++i)
47750 {
47751 remap[i] = i * 2;
47752 remap[i + nelt] = i * 2 + 1;
47753 dremap.perm[i * 2] = i;
47754 dremap.perm[i * 2 + 1] = i + nelt;
47755 }
47756 if (!TARGET_SSE2 && d->vmode == V4SImode)
47757 dremap.vmode = V4SFmode;
47758 }
47759 else if ((contents & (h2 | h4)) == contents)
47760 {
47761 /* punpckh* */
47762 for (i = 0; i < nelt2; ++i)
47763 {
47764 remap[i + nelt2] = i * 2;
47765 remap[i + nelt + nelt2] = i * 2 + 1;
47766 dremap.perm[i * 2] = i + nelt2;
47767 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47768 }
47769 if (!TARGET_SSE2 && d->vmode == V4SImode)
47770 dremap.vmode = V4SFmode;
47771 }
47772 else if ((contents & (h1 | h4)) == contents)
47773 {
47774 /* shufps */
47775 for (i = 0; i < nelt2; ++i)
47776 {
47777 remap[i] = i;
47778 remap[i + nelt + nelt2] = i + nelt2;
47779 dremap.perm[i] = i;
47780 dremap.perm[i + nelt2] = i + nelt + nelt2;
47781 }
47782 if (nelt != 4)
47783 {
47784 /* shufpd */
47785 dremap.vmode = V2DImode;
47786 dremap.nelt = 2;
47787 dremap.perm[0] = 0;
47788 dremap.perm[1] = 3;
47789 }
47790 }
47791 else if ((contents & (h2 | h3)) == contents)
47792 {
47793 /* shufps */
47794 for (i = 0; i < nelt2; ++i)
47795 {
47796 remap[i + nelt2] = i;
47797 remap[i + nelt] = i + nelt2;
47798 dremap.perm[i] = i + nelt2;
47799 dremap.perm[i + nelt2] = i + nelt;
47800 }
47801 if (nelt != 4)
47802 {
47803 /* shufpd */
47804 dremap.vmode = V2DImode;
47805 dremap.nelt = 2;
47806 dremap.perm[0] = 1;
47807 dremap.perm[1] = 2;
47808 }
47809 }
47810 else
47811 return false;
47812 }
47813 else
47814 {
47815 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47816 unsigned HOST_WIDE_INT q[8];
47817 unsigned int nonzero_halves[4];
47818
47819 /* Split the two input vectors into 8 quarters. */
47820 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
47821 for (i = 1; i < 8; ++i)
47822 q[i] = q[0] << (nelt4 * i);
47823 for (i = 0; i < 4; ++i)
47824 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47825 {
47826 nonzero_halves[nzcnt] = i;
47827 ++nzcnt;
47828 }
47829
47830 if (nzcnt == 1)
47831 {
47832 gcc_assert (d->one_operand_p);
47833 nonzero_halves[1] = nonzero_halves[0];
47834 same_halves = true;
47835 }
47836 else if (d->one_operand_p)
47837 {
47838 gcc_assert (nonzero_halves[0] == 0);
47839 gcc_assert (nonzero_halves[1] == 1);
47840 }
47841
47842 if (nzcnt <= 2)
47843 {
47844 if (d->perm[0] / nelt2 == nonzero_halves[1])
47845 {
47846 /* Attempt to increase the likelihood that dfinal
47847 shuffle will be intra-lane. */
47848 char tmph = nonzero_halves[0];
47849 nonzero_halves[0] = nonzero_halves[1];
47850 nonzero_halves[1] = tmph;
47851 }
47852
47853 /* vperm2f128 or vperm2i128. */
47854 for (i = 0; i < nelt2; ++i)
47855 {
47856 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47857 remap[i + nonzero_halves[0] * nelt2] = i;
47858 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47859 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47860 }
47861
47862 if (d->vmode != V8SFmode
47863 && d->vmode != V4DFmode
47864 && d->vmode != V8SImode)
47865 {
47866 dremap.vmode = V8SImode;
47867 dremap.nelt = 8;
47868 for (i = 0; i < 4; ++i)
47869 {
47870 dremap.perm[i] = i + nonzero_halves[0] * 4;
47871 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47872 }
47873 }
47874 }
47875 else if (d->one_operand_p)
47876 return false;
47877 else if (TARGET_AVX2
47878 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47879 {
47880 /* vpunpckl* */
47881 for (i = 0; i < nelt4; ++i)
47882 {
47883 remap[i] = i * 2;
47884 remap[i + nelt] = i * 2 + 1;
47885 remap[i + nelt2] = i * 2 + nelt2;
47886 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47887 dremap.perm[i * 2] = i;
47888 dremap.perm[i * 2 + 1] = i + nelt;
47889 dremap.perm[i * 2 + nelt2] = i + nelt2;
47890 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47891 }
47892 }
47893 else if (TARGET_AVX2
47894 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47895 {
47896 /* vpunpckh* */
47897 for (i = 0; i < nelt4; ++i)
47898 {
47899 remap[i + nelt4] = i * 2;
47900 remap[i + nelt + nelt4] = i * 2 + 1;
47901 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47902 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47903 dremap.perm[i * 2] = i + nelt4;
47904 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47905 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47906 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47907 }
47908 }
47909 else
47910 return false;
47911 }
47912
47913 /* Use the remapping array set up above to move the elements from their
47914 swizzled locations into their final destinations. */
47915 dfinal = *d;
47916 for (i = 0; i < nelt; ++i)
47917 {
47918 unsigned e = remap[d->perm[i]];
47919 gcc_assert (e < nelt);
47920 /* If same_halves is true, both halves of the remapped vector are the
47921 same. Avoid cross-lane accesses if possible. */
47922 if (same_halves && i >= nelt2)
47923 {
47924 gcc_assert (e < nelt2);
47925 dfinal.perm[i] = e + nelt2;
47926 }
47927 else
47928 dfinal.perm[i] = e;
47929 }
47930 if (!d->testing_p)
47931 {
47932 dremap.target = gen_reg_rtx (dremap.vmode);
47933 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47934 }
47935 dfinal.op1 = dfinal.op0;
47936 dfinal.one_operand_p = true;
47937
47938 /* Test if the final remap can be done with a single insn. For V4SFmode or
47939 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47940 start_sequence ();
47941 ok = expand_vec_perm_1 (&dfinal);
47942 seq = get_insns ();
47943 end_sequence ();
47944
47945 if (!ok)
47946 return false;
47947
47948 if (d->testing_p)
47949 return true;
47950
47951 if (dremap.vmode != dfinal.vmode)
47952 {
47953 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47954 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47955 }
47956
47957 ok = expand_vec_perm_1 (&dremap);
47958 gcc_assert (ok);
47959
47960 emit_insn (seq);
47961 return true;
47962 }
47963
47964 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47965 a single vector cross-lane permutation into vpermq followed
47966 by any of the single insn permutations. */
47967
47968 static bool
47969 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47970 {
47971 struct expand_vec_perm_d dremap, dfinal;
47972 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47973 unsigned contents[2];
47974 bool ok;
47975
47976 if (!(TARGET_AVX2
47977 && (d->vmode == V32QImode || d->vmode == V16HImode)
47978 && d->one_operand_p))
47979 return false;
47980
47981 contents[0] = 0;
47982 contents[1] = 0;
47983 for (i = 0; i < nelt2; ++i)
47984 {
47985 contents[0] |= 1u << (d->perm[i] / nelt4);
47986 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47987 }
47988
47989 for (i = 0; i < 2; ++i)
47990 {
47991 unsigned int cnt = 0;
47992 for (j = 0; j < 4; ++j)
47993 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
47994 return false;
47995 }
47996
47997 if (d->testing_p)
47998 return true;
47999
48000 dremap = *d;
48001 dremap.vmode = V4DImode;
48002 dremap.nelt = 4;
48003 dremap.target = gen_reg_rtx (V4DImode);
48004 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48005 dremap.op1 = dremap.op0;
48006 dremap.one_operand_p = true;
48007 for (i = 0; i < 2; ++i)
48008 {
48009 unsigned int cnt = 0;
48010 for (j = 0; j < 4; ++j)
48011 if ((contents[i] & (1u << j)) != 0)
48012 dremap.perm[2 * i + cnt++] = j;
48013 for (; cnt < 2; ++cnt)
48014 dremap.perm[2 * i + cnt] = 0;
48015 }
48016
48017 dfinal = *d;
48018 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48019 dfinal.op1 = dfinal.op0;
48020 dfinal.one_operand_p = true;
48021 for (i = 0, j = 0; i < nelt; ++i)
48022 {
48023 if (i == nelt2)
48024 j = 2;
48025 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48026 if ((d->perm[i] / nelt4) == dremap.perm[j])
48027 ;
48028 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48029 dfinal.perm[i] |= nelt4;
48030 else
48031 gcc_unreachable ();
48032 }
48033
48034 ok = expand_vec_perm_1 (&dremap);
48035 gcc_assert (ok);
48036
48037 ok = expand_vec_perm_1 (&dfinal);
48038 gcc_assert (ok);
48039
48040 return true;
48041 }
48042
48043 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48044 a vector permutation using two instructions, vperm2f128 resp.
48045 vperm2i128 followed by any single in-lane permutation. */
48046
48047 static bool
48048 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48049 {
48050 struct expand_vec_perm_d dfirst, dsecond;
48051 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48052 bool ok;
48053
48054 if (!TARGET_AVX
48055 || GET_MODE_SIZE (d->vmode) != 32
48056 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48057 return false;
48058
48059 dsecond = *d;
48060 dsecond.one_operand_p = false;
48061 dsecond.testing_p = true;
48062
48063 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48064 immediate. For perm < 16 the second permutation uses
48065 d->op0 as first operand, for perm >= 16 it uses d->op1
48066 as first operand. The second operand is the result of
48067 vperm2[fi]128. */
48068 for (perm = 0; perm < 32; perm++)
48069 {
48070 /* Ignore permutations which do not move anything cross-lane. */
48071 if (perm < 16)
48072 {
48073 /* The second shuffle for e.g. V4DFmode has
48074 0123 and ABCD operands.
48075 Ignore AB23, as 23 is already in the second lane
48076 of the first operand. */
48077 if ((perm & 0xc) == (1 << 2)) continue;
48078 /* And 01CD, as 01 is in the first lane of the first
48079 operand. */
48080 if ((perm & 3) == 0) continue;
48081 /* And 4567, as then the vperm2[fi]128 doesn't change
48082 anything on the original 4567 second operand. */
48083 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48084 }
48085 else
48086 {
48087 /* The second shuffle for e.g. V4DFmode has
48088 4567 and ABCD operands.
48089 Ignore AB67, as 67 is already in the second lane
48090 of the first operand. */
48091 if ((perm & 0xc) == (3 << 2)) continue;
48092 /* And 45CD, as 45 is in the first lane of the first
48093 operand. */
48094 if ((perm & 3) == 2) continue;
48095 /* And 0123, as then the vperm2[fi]128 doesn't change
48096 anything on the original 0123 first operand. */
48097 if ((perm & 0xf) == (1 << 2)) continue;
48098 }
48099
48100 for (i = 0; i < nelt; i++)
48101 {
48102 j = d->perm[i] / nelt2;
48103 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48104 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48105 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48106 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48107 else
48108 break;
48109 }
48110
48111 if (i == nelt)
48112 {
48113 start_sequence ();
48114 ok = expand_vec_perm_1 (&dsecond);
48115 end_sequence ();
48116 }
48117 else
48118 ok = false;
48119
48120 if (ok)
48121 {
48122 if (d->testing_p)
48123 return true;
48124
48125 /* Found a usable second shuffle. dfirst will be
48126 vperm2f128 on d->op0 and d->op1. */
48127 dsecond.testing_p = false;
48128 dfirst = *d;
48129 dfirst.target = gen_reg_rtx (d->vmode);
48130 for (i = 0; i < nelt; i++)
48131 dfirst.perm[i] = (i & (nelt2 - 1))
48132 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48133
48134 canonicalize_perm (&dfirst);
48135 ok = expand_vec_perm_1 (&dfirst);
48136 gcc_assert (ok);
48137
48138 /* And dsecond is some single insn shuffle, taking
48139 d->op0 and result of vperm2f128 (if perm < 16) or
48140 d->op1 and result of vperm2f128 (otherwise). */
48141 if (perm >= 16)
48142 dsecond.op0 = dsecond.op1;
48143 dsecond.op1 = dfirst.target;
48144
48145 ok = expand_vec_perm_1 (&dsecond);
48146 gcc_assert (ok);
48147
48148 return true;
48149 }
48150
48151 /* For one operand, the only useful vperm2f128 permutation is 0x01
48152 aka lanes swap. */
48153 if (d->one_operand_p)
48154 return false;
48155 }
48156
48157 return false;
48158 }
48159
48160 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48161 a two vector permutation using 2 intra-lane interleave insns
48162 and cross-lane shuffle for 32-byte vectors. */
48163
48164 static bool
48165 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48166 {
48167 unsigned i, nelt;
48168 rtx (*gen) (rtx, rtx, rtx);
48169
48170 if (d->one_operand_p)
48171 return false;
48172 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48173 ;
48174 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48175 ;
48176 else
48177 return false;
48178
48179 nelt = d->nelt;
48180 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48181 return false;
48182 for (i = 0; i < nelt; i += 2)
48183 if (d->perm[i] != d->perm[0] + i / 2
48184 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48185 return false;
48186
48187 if (d->testing_p)
48188 return true;
48189
48190 switch (d->vmode)
48191 {
48192 case V32QImode:
48193 if (d->perm[0])
48194 gen = gen_vec_interleave_highv32qi;
48195 else
48196 gen = gen_vec_interleave_lowv32qi;
48197 break;
48198 case V16HImode:
48199 if (d->perm[0])
48200 gen = gen_vec_interleave_highv16hi;
48201 else
48202 gen = gen_vec_interleave_lowv16hi;
48203 break;
48204 case V8SImode:
48205 if (d->perm[0])
48206 gen = gen_vec_interleave_highv8si;
48207 else
48208 gen = gen_vec_interleave_lowv8si;
48209 break;
48210 case V4DImode:
48211 if (d->perm[0])
48212 gen = gen_vec_interleave_highv4di;
48213 else
48214 gen = gen_vec_interleave_lowv4di;
48215 break;
48216 case V8SFmode:
48217 if (d->perm[0])
48218 gen = gen_vec_interleave_highv8sf;
48219 else
48220 gen = gen_vec_interleave_lowv8sf;
48221 break;
48222 case V4DFmode:
48223 if (d->perm[0])
48224 gen = gen_vec_interleave_highv4df;
48225 else
48226 gen = gen_vec_interleave_lowv4df;
48227 break;
48228 default:
48229 gcc_unreachable ();
48230 }
48231
48232 emit_insn (gen (d->target, d->op0, d->op1));
48233 return true;
48234 }
48235
48236 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48237 a single vector permutation using a single intra-lane vector
48238 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48239 the non-swapped and swapped vectors together. */
48240
48241 static bool
48242 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48243 {
48244 struct expand_vec_perm_d dfirst, dsecond;
48245 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48246 rtx_insn *seq;
48247 bool ok;
48248 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48249
48250 if (!TARGET_AVX
48251 || TARGET_AVX2
48252 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48253 || !d->one_operand_p)
48254 return false;
48255
48256 dfirst = *d;
48257 for (i = 0; i < nelt; i++)
48258 dfirst.perm[i] = 0xff;
48259 for (i = 0, msk = 0; i < nelt; i++)
48260 {
48261 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48262 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48263 return false;
48264 dfirst.perm[j] = d->perm[i];
48265 if (j != i)
48266 msk |= (1 << i);
48267 }
48268 for (i = 0; i < nelt; i++)
48269 if (dfirst.perm[i] == 0xff)
48270 dfirst.perm[i] = i;
48271
48272 if (!d->testing_p)
48273 dfirst.target = gen_reg_rtx (dfirst.vmode);
48274
48275 start_sequence ();
48276 ok = expand_vec_perm_1 (&dfirst);
48277 seq = get_insns ();
48278 end_sequence ();
48279
48280 if (!ok)
48281 return false;
48282
48283 if (d->testing_p)
48284 return true;
48285
48286 emit_insn (seq);
48287
48288 dsecond = *d;
48289 dsecond.op0 = dfirst.target;
48290 dsecond.op1 = dfirst.target;
48291 dsecond.one_operand_p = true;
48292 dsecond.target = gen_reg_rtx (dsecond.vmode);
48293 for (i = 0; i < nelt; i++)
48294 dsecond.perm[i] = i ^ nelt2;
48295
48296 ok = expand_vec_perm_1 (&dsecond);
48297 gcc_assert (ok);
48298
48299 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48300 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48301 return true;
48302 }
48303
48304 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48305 permutation using two vperm2f128, followed by a vshufpd insn blending
48306 the two vectors together. */
48307
48308 static bool
48309 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48310 {
48311 struct expand_vec_perm_d dfirst, dsecond, dthird;
48312 bool ok;
48313
48314 if (!TARGET_AVX || (d->vmode != V4DFmode))
48315 return false;
48316
48317 if (d->testing_p)
48318 return true;
48319
48320 dfirst = *d;
48321 dsecond = *d;
48322 dthird = *d;
48323
48324 dfirst.perm[0] = (d->perm[0] & ~1);
48325 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48326 dfirst.perm[2] = (d->perm[2] & ~1);
48327 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48328 dsecond.perm[0] = (d->perm[1] & ~1);
48329 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48330 dsecond.perm[2] = (d->perm[3] & ~1);
48331 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48332 dthird.perm[0] = (d->perm[0] % 2);
48333 dthird.perm[1] = (d->perm[1] % 2) + 4;
48334 dthird.perm[2] = (d->perm[2] % 2) + 2;
48335 dthird.perm[3] = (d->perm[3] % 2) + 6;
48336
48337 dfirst.target = gen_reg_rtx (dfirst.vmode);
48338 dsecond.target = gen_reg_rtx (dsecond.vmode);
48339 dthird.op0 = dfirst.target;
48340 dthird.op1 = dsecond.target;
48341 dthird.one_operand_p = false;
48342
48343 canonicalize_perm (&dfirst);
48344 canonicalize_perm (&dsecond);
48345
48346 ok = expand_vec_perm_1 (&dfirst)
48347 && expand_vec_perm_1 (&dsecond)
48348 && expand_vec_perm_1 (&dthird);
48349
48350 gcc_assert (ok);
48351
48352 return true;
48353 }
48354
48355 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48356 permutation with two pshufb insns and an ior. We should have already
48357 failed all two instruction sequences. */
48358
48359 static bool
48360 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48361 {
48362 rtx rperm[2][16], vperm, l, h, op, m128;
48363 unsigned int i, nelt, eltsz;
48364
48365 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48366 return false;
48367 gcc_assert (!d->one_operand_p);
48368
48369 if (d->testing_p)
48370 return true;
48371
48372 nelt = d->nelt;
48373 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48374
48375 /* Generate two permutation masks. If the required element is within
48376 the given vector it is shuffled into the proper lane. If the required
48377 element is in the other vector, force a zero into the lane by setting
48378 bit 7 in the permutation mask. */
48379 m128 = GEN_INT (-128);
48380 for (i = 0; i < nelt; ++i)
48381 {
48382 unsigned j, e = d->perm[i];
48383 unsigned which = (e >= nelt);
48384 if (e >= nelt)
48385 e -= nelt;
48386
48387 for (j = 0; j < eltsz; ++j)
48388 {
48389 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48390 rperm[1-which][i*eltsz + j] = m128;
48391 }
48392 }
48393
48394 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48395 vperm = force_reg (V16QImode, vperm);
48396
48397 l = gen_reg_rtx (V16QImode);
48398 op = gen_lowpart (V16QImode, d->op0);
48399 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48400
48401 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48402 vperm = force_reg (V16QImode, vperm);
48403
48404 h = gen_reg_rtx (V16QImode);
48405 op = gen_lowpart (V16QImode, d->op1);
48406 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48407
48408 op = d->target;
48409 if (d->vmode != V16QImode)
48410 op = gen_reg_rtx (V16QImode);
48411 emit_insn (gen_iorv16qi3 (op, l, h));
48412 if (op != d->target)
48413 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48414
48415 return true;
48416 }
48417
48418 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48419 with two vpshufb insns, vpermq and vpor. We should have already failed
48420 all two or three instruction sequences. */
48421
48422 static bool
48423 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48424 {
48425 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48426 unsigned int i, nelt, eltsz;
48427
48428 if (!TARGET_AVX2
48429 || !d->one_operand_p
48430 || (d->vmode != V32QImode && d->vmode != V16HImode))
48431 return false;
48432
48433 if (d->testing_p)
48434 return true;
48435
48436 nelt = d->nelt;
48437 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48438
48439 /* Generate two permutation masks. If the required element is within
48440 the same lane, it is shuffled in. If the required element from the
48441 other lane, force a zero by setting bit 7 in the permutation mask.
48442 In the other mask the mask has non-negative elements if element
48443 is requested from the other lane, but also moved to the other lane,
48444 so that the result of vpshufb can have the two V2TImode halves
48445 swapped. */
48446 m128 = GEN_INT (-128);
48447 for (i = 0; i < nelt; ++i)
48448 {
48449 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48450 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48451
48452 for (j = 0; j < eltsz; ++j)
48453 {
48454 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48455 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48456 }
48457 }
48458
48459 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48460 vperm = force_reg (V32QImode, vperm);
48461
48462 h = gen_reg_rtx (V32QImode);
48463 op = gen_lowpart (V32QImode, d->op0);
48464 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48465
48466 /* Swap the 128-byte lanes of h into hp. */
48467 hp = gen_reg_rtx (V4DImode);
48468 op = gen_lowpart (V4DImode, h);
48469 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48470 const1_rtx));
48471
48472 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48473 vperm = force_reg (V32QImode, vperm);
48474
48475 l = gen_reg_rtx (V32QImode);
48476 op = gen_lowpart (V32QImode, d->op0);
48477 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48478
48479 op = d->target;
48480 if (d->vmode != V32QImode)
48481 op = gen_reg_rtx (V32QImode);
48482 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48483 if (op != d->target)
48484 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48485
48486 return true;
48487 }
48488
48489 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48490 and extract-odd permutations of two V32QImode and V16QImode operand
48491 with two vpshufb insns, vpor and vpermq. We should have already
48492 failed all two or three instruction sequences. */
48493
48494 static bool
48495 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48496 {
48497 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48498 unsigned int i, nelt, eltsz;
48499
48500 if (!TARGET_AVX2
48501 || d->one_operand_p
48502 || (d->vmode != V32QImode && d->vmode != V16HImode))
48503 return false;
48504
48505 for (i = 0; i < d->nelt; ++i)
48506 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48507 return false;
48508
48509 if (d->testing_p)
48510 return true;
48511
48512 nelt = d->nelt;
48513 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48514
48515 /* Generate two permutation masks. In the first permutation mask
48516 the first quarter will contain indexes for the first half
48517 of the op0, the second quarter will contain bit 7 set, third quarter
48518 will contain indexes for the second half of the op0 and the
48519 last quarter bit 7 set. In the second permutation mask
48520 the first quarter will contain bit 7 set, the second quarter
48521 indexes for the first half of the op1, the third quarter bit 7 set
48522 and last quarter indexes for the second half of the op1.
48523 I.e. the first mask e.g. for V32QImode extract even will be:
48524 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48525 (all values masked with 0xf except for -128) and second mask
48526 for extract even will be
48527 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48528 m128 = GEN_INT (-128);
48529 for (i = 0; i < nelt; ++i)
48530 {
48531 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48532 unsigned which = d->perm[i] >= nelt;
48533 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48534
48535 for (j = 0; j < eltsz; ++j)
48536 {
48537 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48538 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48539 }
48540 }
48541
48542 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48543 vperm = force_reg (V32QImode, vperm);
48544
48545 l = gen_reg_rtx (V32QImode);
48546 op = gen_lowpart (V32QImode, d->op0);
48547 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48548
48549 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48550 vperm = force_reg (V32QImode, vperm);
48551
48552 h = gen_reg_rtx (V32QImode);
48553 op = gen_lowpart (V32QImode, d->op1);
48554 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48555
48556 ior = gen_reg_rtx (V32QImode);
48557 emit_insn (gen_iorv32qi3 (ior, l, h));
48558
48559 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48560 op = gen_reg_rtx (V4DImode);
48561 ior = gen_lowpart (V4DImode, ior);
48562 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48563 const1_rtx, GEN_INT (3)));
48564 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48565
48566 return true;
48567 }
48568
48569 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48570 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48571 with two "and" and "pack" or two "shift" and "pack" insns. We should
48572 have already failed all two instruction sequences. */
48573
48574 static bool
48575 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48576 {
48577 rtx op, dop0, dop1, t, rperm[16];
48578 unsigned i, odd, c, s, nelt = d->nelt;
48579 bool end_perm = false;
48580 machine_mode half_mode;
48581 rtx (*gen_and) (rtx, rtx, rtx);
48582 rtx (*gen_pack) (rtx, rtx, rtx);
48583 rtx (*gen_shift) (rtx, rtx, rtx);
48584
48585 if (d->one_operand_p)
48586 return false;
48587
48588 switch (d->vmode)
48589 {
48590 case V8HImode:
48591 /* Required for "pack". */
48592 if (!TARGET_SSE4_1)
48593 return false;
48594 c = 0xffff;
48595 s = 16;
48596 half_mode = V4SImode;
48597 gen_and = gen_andv4si3;
48598 gen_pack = gen_sse4_1_packusdw;
48599 gen_shift = gen_lshrv4si3;
48600 break;
48601 case V16QImode:
48602 /* No check as all instructions are SSE2. */
48603 c = 0xff;
48604 s = 8;
48605 half_mode = V8HImode;
48606 gen_and = gen_andv8hi3;
48607 gen_pack = gen_sse2_packuswb;
48608 gen_shift = gen_lshrv8hi3;
48609 break;
48610 case V16HImode:
48611 if (!TARGET_AVX2)
48612 return false;
48613 c = 0xffff;
48614 s = 16;
48615 half_mode = V8SImode;
48616 gen_and = gen_andv8si3;
48617 gen_pack = gen_avx2_packusdw;
48618 gen_shift = gen_lshrv8si3;
48619 end_perm = true;
48620 break;
48621 case V32QImode:
48622 if (!TARGET_AVX2)
48623 return false;
48624 c = 0xff;
48625 s = 8;
48626 half_mode = V16HImode;
48627 gen_and = gen_andv16hi3;
48628 gen_pack = gen_avx2_packuswb;
48629 gen_shift = gen_lshrv16hi3;
48630 end_perm = true;
48631 break;
48632 default:
48633 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48634 general shuffles. */
48635 return false;
48636 }
48637
48638 /* Check that permutation is even or odd. */
48639 odd = d->perm[0];
48640 if (odd > 1)
48641 return false;
48642
48643 for (i = 1; i < nelt; ++i)
48644 if (d->perm[i] != 2 * i + odd)
48645 return false;
48646
48647 if (d->testing_p)
48648 return true;
48649
48650 dop0 = gen_reg_rtx (half_mode);
48651 dop1 = gen_reg_rtx (half_mode);
48652 if (odd == 0)
48653 {
48654 for (i = 0; i < nelt / 2; i++)
48655 rperm[i] = GEN_INT (c);
48656 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48657 t = force_reg (half_mode, t);
48658 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48659 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48660 }
48661 else
48662 {
48663 emit_insn (gen_shift (dop0,
48664 gen_lowpart (half_mode, d->op0),
48665 GEN_INT (s)));
48666 emit_insn (gen_shift (dop1,
48667 gen_lowpart (half_mode, d->op1),
48668 GEN_INT (s)));
48669 }
48670 /* In AVX2 for 256 bit case we need to permute pack result. */
48671 if (TARGET_AVX2 && end_perm)
48672 {
48673 op = gen_reg_rtx (d->vmode);
48674 t = gen_reg_rtx (V4DImode);
48675 emit_insn (gen_pack (op, dop0, dop1));
48676 emit_insn (gen_avx2_permv4di_1 (t,
48677 gen_lowpart (V4DImode, op),
48678 const0_rtx,
48679 const2_rtx,
48680 const1_rtx,
48681 GEN_INT (3)));
48682 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48683 }
48684 else
48685 emit_insn (gen_pack (d->target, dop0, dop1));
48686
48687 return true;
48688 }
48689
48690 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48691 and extract-odd permutations. */
48692
48693 static bool
48694 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48695 {
48696 rtx t1, t2, t3, t4, t5;
48697
48698 switch (d->vmode)
48699 {
48700 case V4DFmode:
48701 if (d->testing_p)
48702 break;
48703 t1 = gen_reg_rtx (V4DFmode);
48704 t2 = gen_reg_rtx (V4DFmode);
48705
48706 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48707 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48708 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48709
48710 /* Now an unpck[lh]pd will produce the result required. */
48711 if (odd)
48712 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48713 else
48714 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48715 emit_insn (t3);
48716 break;
48717
48718 case V8SFmode:
48719 {
48720 int mask = odd ? 0xdd : 0x88;
48721
48722 if (d->testing_p)
48723 break;
48724 t1 = gen_reg_rtx (V8SFmode);
48725 t2 = gen_reg_rtx (V8SFmode);
48726 t3 = gen_reg_rtx (V8SFmode);
48727
48728 /* Shuffle within the 128-bit lanes to produce:
48729 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48730 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48731 GEN_INT (mask)));
48732
48733 /* Shuffle the lanes around to produce:
48734 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48735 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48736 GEN_INT (0x3)));
48737
48738 /* Shuffle within the 128-bit lanes to produce:
48739 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48740 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48741
48742 /* Shuffle within the 128-bit lanes to produce:
48743 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48744 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48745
48746 /* Shuffle the lanes around to produce:
48747 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48748 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48749 GEN_INT (0x20)));
48750 }
48751 break;
48752
48753 case V2DFmode:
48754 case V4SFmode:
48755 case V2DImode:
48756 case V4SImode:
48757 /* These are always directly implementable by expand_vec_perm_1. */
48758 gcc_unreachable ();
48759
48760 case V8HImode:
48761 if (TARGET_SSE4_1)
48762 return expand_vec_perm_even_odd_pack (d);
48763 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48764 return expand_vec_perm_pshufb2 (d);
48765 else
48766 {
48767 if (d->testing_p)
48768 break;
48769 /* We need 2*log2(N)-1 operations to achieve odd/even
48770 with interleave. */
48771 t1 = gen_reg_rtx (V8HImode);
48772 t2 = gen_reg_rtx (V8HImode);
48773 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48774 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48775 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48776 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48777 if (odd)
48778 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48779 else
48780 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48781 emit_insn (t3);
48782 }
48783 break;
48784
48785 case V16QImode:
48786 return expand_vec_perm_even_odd_pack (d);
48787
48788 case V16HImode:
48789 case V32QImode:
48790 return expand_vec_perm_even_odd_pack (d);
48791
48792 case V4DImode:
48793 if (!TARGET_AVX2)
48794 {
48795 struct expand_vec_perm_d d_copy = *d;
48796 d_copy.vmode = V4DFmode;
48797 if (d->testing_p)
48798 d_copy.target = gen_lowpart (V4DFmode, d->target);
48799 else
48800 d_copy.target = gen_reg_rtx (V4DFmode);
48801 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48802 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48803 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48804 {
48805 if (!d->testing_p)
48806 emit_move_insn (d->target,
48807 gen_lowpart (V4DImode, d_copy.target));
48808 return true;
48809 }
48810 return false;
48811 }
48812
48813 if (d->testing_p)
48814 break;
48815
48816 t1 = gen_reg_rtx (V4DImode);
48817 t2 = gen_reg_rtx (V4DImode);
48818
48819 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48820 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48821 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48822
48823 /* Now an vpunpck[lh]qdq will produce the result required. */
48824 if (odd)
48825 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48826 else
48827 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48828 emit_insn (t3);
48829 break;
48830
48831 case V8SImode:
48832 if (!TARGET_AVX2)
48833 {
48834 struct expand_vec_perm_d d_copy = *d;
48835 d_copy.vmode = V8SFmode;
48836 if (d->testing_p)
48837 d_copy.target = gen_lowpart (V8SFmode, d->target);
48838 else
48839 d_copy.target = gen_reg_rtx (V8SFmode);
48840 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48841 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48842 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48843 {
48844 if (!d->testing_p)
48845 emit_move_insn (d->target,
48846 gen_lowpart (V8SImode, d_copy.target));
48847 return true;
48848 }
48849 return false;
48850 }
48851
48852 if (d->testing_p)
48853 break;
48854
48855 t1 = gen_reg_rtx (V8SImode);
48856 t2 = gen_reg_rtx (V8SImode);
48857 t3 = gen_reg_rtx (V4DImode);
48858 t4 = gen_reg_rtx (V4DImode);
48859 t5 = gen_reg_rtx (V4DImode);
48860
48861 /* Shuffle the lanes around into
48862 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48863 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48864 gen_lowpart (V4DImode, d->op1),
48865 GEN_INT (0x20)));
48866 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48867 gen_lowpart (V4DImode, d->op1),
48868 GEN_INT (0x31)));
48869
48870 /* Swap the 2nd and 3rd position in each lane into
48871 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48872 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48873 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48874 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48875 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48876
48877 /* Now an vpunpck[lh]qdq will produce
48878 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48879 if (odd)
48880 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48881 gen_lowpart (V4DImode, t2));
48882 else
48883 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48884 gen_lowpart (V4DImode, t2));
48885 emit_insn (t3);
48886 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48887 break;
48888
48889 default:
48890 gcc_unreachable ();
48891 }
48892
48893 return true;
48894 }
48895
48896 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48897 extract-even and extract-odd permutations. */
48898
48899 static bool
48900 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48901 {
48902 unsigned i, odd, nelt = d->nelt;
48903
48904 odd = d->perm[0];
48905 if (odd != 0 && odd != 1)
48906 return false;
48907
48908 for (i = 1; i < nelt; ++i)
48909 if (d->perm[i] != 2 * i + odd)
48910 return false;
48911
48912 return expand_vec_perm_even_odd_1 (d, odd);
48913 }
48914
48915 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48916 permutations. We assume that expand_vec_perm_1 has already failed. */
48917
48918 static bool
48919 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48920 {
48921 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48922 machine_mode vmode = d->vmode;
48923 unsigned char perm2[4];
48924 rtx op0 = d->op0, dest;
48925 bool ok;
48926
48927 switch (vmode)
48928 {
48929 case V4DFmode:
48930 case V8SFmode:
48931 /* These are special-cased in sse.md so that we can optionally
48932 use the vbroadcast instruction. They expand to two insns
48933 if the input happens to be in a register. */
48934 gcc_unreachable ();
48935
48936 case V2DFmode:
48937 case V2DImode:
48938 case V4SFmode:
48939 case V4SImode:
48940 /* These are always implementable using standard shuffle patterns. */
48941 gcc_unreachable ();
48942
48943 case V8HImode:
48944 case V16QImode:
48945 /* These can be implemented via interleave. We save one insn by
48946 stopping once we have promoted to V4SImode and then use pshufd. */
48947 if (d->testing_p)
48948 return true;
48949 do
48950 {
48951 rtx dest;
48952 rtx (*gen) (rtx, rtx, rtx)
48953 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48954 : gen_vec_interleave_lowv8hi;
48955
48956 if (elt >= nelt2)
48957 {
48958 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48959 : gen_vec_interleave_highv8hi;
48960 elt -= nelt2;
48961 }
48962 nelt2 /= 2;
48963
48964 dest = gen_reg_rtx (vmode);
48965 emit_insn (gen (dest, op0, op0));
48966 vmode = get_mode_wider_vector (vmode);
48967 op0 = gen_lowpart (vmode, dest);
48968 }
48969 while (vmode != V4SImode);
48970
48971 memset (perm2, elt, 4);
48972 dest = gen_reg_rtx (V4SImode);
48973 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48974 gcc_assert (ok);
48975 if (!d->testing_p)
48976 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48977 return true;
48978
48979 case V64QImode:
48980 case V32QImode:
48981 case V16HImode:
48982 case V8SImode:
48983 case V4DImode:
48984 /* For AVX2 broadcasts of the first element vpbroadcast* or
48985 vpermq should be used by expand_vec_perm_1. */
48986 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48987 return false;
48988
48989 default:
48990 gcc_unreachable ();
48991 }
48992 }
48993
48994 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48995 broadcast permutations. */
48996
48997 static bool
48998 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
48999 {
49000 unsigned i, elt, nelt = d->nelt;
49001
49002 if (!d->one_operand_p)
49003 return false;
49004
49005 elt = d->perm[0];
49006 for (i = 1; i < nelt; ++i)
49007 if (d->perm[i] != elt)
49008 return false;
49009
49010 return expand_vec_perm_broadcast_1 (d);
49011 }
49012
49013 /* Implement arbitrary permutations of two V64QImode operands
49014 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49015 static bool
49016 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49017 {
49018 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49019 return false;
49020
49021 if (d->testing_p)
49022 return true;
49023
49024 struct expand_vec_perm_d ds[2];
49025 rtx rperm[128], vperm, target0, target1;
49026 unsigned int i, nelt;
49027 machine_mode vmode;
49028
49029 nelt = d->nelt;
49030 vmode = V64QImode;
49031
49032 for (i = 0; i < 2; i++)
49033 {
49034 ds[i] = *d;
49035 ds[i].vmode = V32HImode;
49036 ds[i].nelt = 32;
49037 ds[i].target = gen_reg_rtx (V32HImode);
49038 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49039 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49040 }
49041
49042 /* Prepare permutations such that the first one takes care of
49043 putting the even bytes into the right positions or one higher
49044 positions (ds[0]) and the second one takes care of
49045 putting the odd bytes into the right positions or one below
49046 (ds[1]). */
49047
49048 for (i = 0; i < nelt; i++)
49049 {
49050 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49051 if (i & 1)
49052 {
49053 rperm[i] = constm1_rtx;
49054 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49055 }
49056 else
49057 {
49058 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49059 rperm[i + 64] = constm1_rtx;
49060 }
49061 }
49062
49063 bool ok = expand_vec_perm_1 (&ds[0]);
49064 gcc_assert (ok);
49065 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49066
49067 ok = expand_vec_perm_1 (&ds[1]);
49068 gcc_assert (ok);
49069 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49070
49071 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49072 vperm = force_reg (vmode, vperm);
49073 target0 = gen_reg_rtx (V64QImode);
49074 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49075
49076 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49077 vperm = force_reg (vmode, vperm);
49078 target1 = gen_reg_rtx (V64QImode);
49079 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49080
49081 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49082 return true;
49083 }
49084
49085 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49086 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49087 all the shorter instruction sequences. */
49088
49089 static bool
49090 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49091 {
49092 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49093 unsigned int i, nelt, eltsz;
49094 bool used[4];
49095
49096 if (!TARGET_AVX2
49097 || d->one_operand_p
49098 || (d->vmode != V32QImode && d->vmode != V16HImode))
49099 return false;
49100
49101 if (d->testing_p)
49102 return true;
49103
49104 nelt = d->nelt;
49105 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49106
49107 /* Generate 4 permutation masks. If the required element is within
49108 the same lane, it is shuffled in. If the required element from the
49109 other lane, force a zero by setting bit 7 in the permutation mask.
49110 In the other mask the mask has non-negative elements if element
49111 is requested from the other lane, but also moved to the other lane,
49112 so that the result of vpshufb can have the two V2TImode halves
49113 swapped. */
49114 m128 = GEN_INT (-128);
49115 for (i = 0; i < 32; ++i)
49116 {
49117 rperm[0][i] = m128;
49118 rperm[1][i] = m128;
49119 rperm[2][i] = m128;
49120 rperm[3][i] = m128;
49121 }
49122 used[0] = false;
49123 used[1] = false;
49124 used[2] = false;
49125 used[3] = false;
49126 for (i = 0; i < nelt; ++i)
49127 {
49128 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49129 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49130 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49131
49132 for (j = 0; j < eltsz; ++j)
49133 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49134 used[which] = true;
49135 }
49136
49137 for (i = 0; i < 2; ++i)
49138 {
49139 if (!used[2 * i + 1])
49140 {
49141 h[i] = NULL_RTX;
49142 continue;
49143 }
49144 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49145 gen_rtvec_v (32, rperm[2 * i + 1]));
49146 vperm = force_reg (V32QImode, vperm);
49147 h[i] = gen_reg_rtx (V32QImode);
49148 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49149 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49150 }
49151
49152 /* Swap the 128-byte lanes of h[X]. */
49153 for (i = 0; i < 2; ++i)
49154 {
49155 if (h[i] == NULL_RTX)
49156 continue;
49157 op = gen_reg_rtx (V4DImode);
49158 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49159 const2_rtx, GEN_INT (3), const0_rtx,
49160 const1_rtx));
49161 h[i] = gen_lowpart (V32QImode, op);
49162 }
49163
49164 for (i = 0; i < 2; ++i)
49165 {
49166 if (!used[2 * i])
49167 {
49168 l[i] = NULL_RTX;
49169 continue;
49170 }
49171 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49172 vperm = force_reg (V32QImode, vperm);
49173 l[i] = gen_reg_rtx (V32QImode);
49174 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49175 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49176 }
49177
49178 for (i = 0; i < 2; ++i)
49179 {
49180 if (h[i] && l[i])
49181 {
49182 op = gen_reg_rtx (V32QImode);
49183 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49184 l[i] = op;
49185 }
49186 else if (h[i])
49187 l[i] = h[i];
49188 }
49189
49190 gcc_assert (l[0] && l[1]);
49191 op = d->target;
49192 if (d->vmode != V32QImode)
49193 op = gen_reg_rtx (V32QImode);
49194 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49195 if (op != d->target)
49196 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49197 return true;
49198 }
49199
49200 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49201 With all of the interface bits taken care of, perform the expansion
49202 in D and return true on success. */
49203
49204 static bool
49205 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49206 {
49207 /* Try a single instruction expansion. */
49208 if (expand_vec_perm_1 (d))
49209 return true;
49210
49211 /* Try sequences of two instructions. */
49212
49213 if (expand_vec_perm_pshuflw_pshufhw (d))
49214 return true;
49215
49216 if (expand_vec_perm_palignr (d, false))
49217 return true;
49218
49219 if (expand_vec_perm_interleave2 (d))
49220 return true;
49221
49222 if (expand_vec_perm_broadcast (d))
49223 return true;
49224
49225 if (expand_vec_perm_vpermq_perm_1 (d))
49226 return true;
49227
49228 if (expand_vec_perm_vperm2f128 (d))
49229 return true;
49230
49231 if (expand_vec_perm_pblendv (d))
49232 return true;
49233
49234 /* Try sequences of three instructions. */
49235
49236 if (expand_vec_perm_even_odd_pack (d))
49237 return true;
49238
49239 if (expand_vec_perm_2vperm2f128_vshuf (d))
49240 return true;
49241
49242 if (expand_vec_perm_pshufb2 (d))
49243 return true;
49244
49245 if (expand_vec_perm_interleave3 (d))
49246 return true;
49247
49248 if (expand_vec_perm_vperm2f128_vblend (d))
49249 return true;
49250
49251 /* Try sequences of four instructions. */
49252
49253 if (expand_vec_perm_vpshufb2_vpermq (d))
49254 return true;
49255
49256 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49257 return true;
49258
49259 if (expand_vec_perm_vpermi2_vpshub2 (d))
49260 return true;
49261
49262 /* ??? Look for narrow permutations whose element orderings would
49263 allow the promotion to a wider mode. */
49264
49265 /* ??? Look for sequences of interleave or a wider permute that place
49266 the data into the correct lanes for a half-vector shuffle like
49267 pshuf[lh]w or vpermilps. */
49268
49269 /* ??? Look for sequences of interleave that produce the desired results.
49270 The combinatorics of punpck[lh] get pretty ugly... */
49271
49272 if (expand_vec_perm_even_odd (d))
49273 return true;
49274
49275 /* Even longer sequences. */
49276 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49277 return true;
49278
49279 return false;
49280 }
49281
49282 /* If a permutation only uses one operand, make it clear. Returns true
49283 if the permutation references both operands. */
49284
49285 static bool
49286 canonicalize_perm (struct expand_vec_perm_d *d)
49287 {
49288 int i, which, nelt = d->nelt;
49289
49290 for (i = which = 0; i < nelt; ++i)
49291 which |= (d->perm[i] < nelt ? 1 : 2);
49292
49293 d->one_operand_p = true;
49294 switch (which)
49295 {
49296 default:
49297 gcc_unreachable();
49298
49299 case 3:
49300 if (!rtx_equal_p (d->op0, d->op1))
49301 {
49302 d->one_operand_p = false;
49303 break;
49304 }
49305 /* The elements of PERM do not suggest that only the first operand
49306 is used, but both operands are identical. Allow easier matching
49307 of the permutation by folding the permutation into the single
49308 input vector. */
49309 /* FALLTHRU */
49310
49311 case 2:
49312 for (i = 0; i < nelt; ++i)
49313 d->perm[i] &= nelt - 1;
49314 d->op0 = d->op1;
49315 break;
49316
49317 case 1:
49318 d->op1 = d->op0;
49319 break;
49320 }
49321
49322 return (which == 3);
49323 }
49324
49325 bool
49326 ix86_expand_vec_perm_const (rtx operands[4])
49327 {
49328 struct expand_vec_perm_d d;
49329 unsigned char perm[MAX_VECT_LEN];
49330 int i, nelt;
49331 bool two_args;
49332 rtx sel;
49333
49334 d.target = operands[0];
49335 d.op0 = operands[1];
49336 d.op1 = operands[2];
49337 sel = operands[3];
49338
49339 d.vmode = GET_MODE (d.target);
49340 gcc_assert (VECTOR_MODE_P (d.vmode));
49341 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49342 d.testing_p = false;
49343
49344 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49345 gcc_assert (XVECLEN (sel, 0) == nelt);
49346 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49347
49348 for (i = 0; i < nelt; ++i)
49349 {
49350 rtx e = XVECEXP (sel, 0, i);
49351 int ei = INTVAL (e) & (2 * nelt - 1);
49352 d.perm[i] = ei;
49353 perm[i] = ei;
49354 }
49355
49356 two_args = canonicalize_perm (&d);
49357
49358 if (ix86_expand_vec_perm_const_1 (&d))
49359 return true;
49360
49361 /* If the selector says both arguments are needed, but the operands are the
49362 same, the above tried to expand with one_operand_p and flattened selector.
49363 If that didn't work, retry without one_operand_p; we succeeded with that
49364 during testing. */
49365 if (two_args && d.one_operand_p)
49366 {
49367 d.one_operand_p = false;
49368 memcpy (d.perm, perm, sizeof (perm));
49369 return ix86_expand_vec_perm_const_1 (&d);
49370 }
49371
49372 return false;
49373 }
49374
49375 /* Implement targetm.vectorize.vec_perm_const_ok. */
49376
49377 static bool
49378 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49379 const unsigned char *sel)
49380 {
49381 struct expand_vec_perm_d d;
49382 unsigned int i, nelt, which;
49383 bool ret;
49384
49385 d.vmode = vmode;
49386 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49387 d.testing_p = true;
49388
49389 /* Given sufficient ISA support we can just return true here
49390 for selected vector modes. */
49391 switch (d.vmode)
49392 {
49393 case V16SFmode:
49394 case V16SImode:
49395 case V8DImode:
49396 case V8DFmode:
49397 if (TARGET_AVX512F)
49398 /* All implementable with a single vpermi2 insn. */
49399 return true;
49400 break;
49401 case V32HImode:
49402 if (TARGET_AVX512BW)
49403 /* All implementable with a single vpermi2 insn. */
49404 return true;
49405 break;
49406 case V64QImode:
49407 if (TARGET_AVX512BW)
49408 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49409 return true;
49410 break;
49411 case V8SImode:
49412 case V8SFmode:
49413 case V4DFmode:
49414 case V4DImode:
49415 if (TARGET_AVX512VL)
49416 /* All implementable with a single vpermi2 insn. */
49417 return true;
49418 break;
49419 case V16HImode:
49420 if (TARGET_AVX2)
49421 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49422 return true;
49423 break;
49424 case V32QImode:
49425 if (TARGET_AVX2)
49426 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49427 return true;
49428 break;
49429 case V4SImode:
49430 case V4SFmode:
49431 case V8HImode:
49432 case V16QImode:
49433 /* All implementable with a single vpperm insn. */
49434 if (TARGET_XOP)
49435 return true;
49436 /* All implementable with 2 pshufb + 1 ior. */
49437 if (TARGET_SSSE3)
49438 return true;
49439 break;
49440 case V2DImode:
49441 case V2DFmode:
49442 /* All implementable with shufpd or unpck[lh]pd. */
49443 return true;
49444 default:
49445 return false;
49446 }
49447
49448 /* Extract the values from the vector CST into the permutation
49449 array in D. */
49450 memcpy (d.perm, sel, nelt);
49451 for (i = which = 0; i < nelt; ++i)
49452 {
49453 unsigned char e = d.perm[i];
49454 gcc_assert (e < 2 * nelt);
49455 which |= (e < nelt ? 1 : 2);
49456 }
49457
49458 /* For all elements from second vector, fold the elements to first. */
49459 if (which == 2)
49460 for (i = 0; i < nelt; ++i)
49461 d.perm[i] -= nelt;
49462
49463 /* Check whether the mask can be applied to the vector type. */
49464 d.one_operand_p = (which != 3);
49465
49466 /* Implementable with shufps or pshufd. */
49467 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49468 return true;
49469
49470 /* Otherwise we have to go through the motions and see if we can
49471 figure out how to generate the requested permutation. */
49472 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49473 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49474 if (!d.one_operand_p)
49475 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49476
49477 start_sequence ();
49478 ret = ix86_expand_vec_perm_const_1 (&d);
49479 end_sequence ();
49480
49481 return ret;
49482 }
49483
49484 void
49485 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49486 {
49487 struct expand_vec_perm_d d;
49488 unsigned i, nelt;
49489
49490 d.target = targ;
49491 d.op0 = op0;
49492 d.op1 = op1;
49493 d.vmode = GET_MODE (targ);
49494 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49495 d.one_operand_p = false;
49496 d.testing_p = false;
49497
49498 for (i = 0; i < nelt; ++i)
49499 d.perm[i] = i * 2 + odd;
49500
49501 /* We'll either be able to implement the permutation directly... */
49502 if (expand_vec_perm_1 (&d))
49503 return;
49504
49505 /* ... or we use the special-case patterns. */
49506 expand_vec_perm_even_odd_1 (&d, odd);
49507 }
49508
49509 static void
49510 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49511 {
49512 struct expand_vec_perm_d d;
49513 unsigned i, nelt, base;
49514 bool ok;
49515
49516 d.target = targ;
49517 d.op0 = op0;
49518 d.op1 = op1;
49519 d.vmode = GET_MODE (targ);
49520 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49521 d.one_operand_p = false;
49522 d.testing_p = false;
49523
49524 base = high_p ? nelt / 2 : 0;
49525 for (i = 0; i < nelt / 2; ++i)
49526 {
49527 d.perm[i * 2] = i + base;
49528 d.perm[i * 2 + 1] = i + base + nelt;
49529 }
49530
49531 /* Note that for AVX this isn't one instruction. */
49532 ok = ix86_expand_vec_perm_const_1 (&d);
49533 gcc_assert (ok);
49534 }
49535
49536
49537 /* Expand a vector operation CODE for a V*QImode in terms of the
49538 same operation on V*HImode. */
49539
49540 void
49541 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49542 {
49543 machine_mode qimode = GET_MODE (dest);
49544 machine_mode himode;
49545 rtx (*gen_il) (rtx, rtx, rtx);
49546 rtx (*gen_ih) (rtx, rtx, rtx);
49547 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49548 struct expand_vec_perm_d d;
49549 bool ok, full_interleave;
49550 bool uns_p = false;
49551 int i;
49552
49553 switch (qimode)
49554 {
49555 case V16QImode:
49556 himode = V8HImode;
49557 gen_il = gen_vec_interleave_lowv16qi;
49558 gen_ih = gen_vec_interleave_highv16qi;
49559 break;
49560 case V32QImode:
49561 himode = V16HImode;
49562 gen_il = gen_avx2_interleave_lowv32qi;
49563 gen_ih = gen_avx2_interleave_highv32qi;
49564 break;
49565 case V64QImode:
49566 himode = V32HImode;
49567 gen_il = gen_avx512bw_interleave_lowv64qi;
49568 gen_ih = gen_avx512bw_interleave_highv64qi;
49569 break;
49570 default:
49571 gcc_unreachable ();
49572 }
49573
49574 op2_l = op2_h = op2;
49575 switch (code)
49576 {
49577 case MULT:
49578 /* Unpack data such that we've got a source byte in each low byte of
49579 each word. We don't care what goes into the high byte of each word.
49580 Rather than trying to get zero in there, most convenient is to let
49581 it be a copy of the low byte. */
49582 op2_l = gen_reg_rtx (qimode);
49583 op2_h = gen_reg_rtx (qimode);
49584 emit_insn (gen_il (op2_l, op2, op2));
49585 emit_insn (gen_ih (op2_h, op2, op2));
49586 /* FALLTHRU */
49587
49588 op1_l = gen_reg_rtx (qimode);
49589 op1_h = gen_reg_rtx (qimode);
49590 emit_insn (gen_il (op1_l, op1, op1));
49591 emit_insn (gen_ih (op1_h, op1, op1));
49592 full_interleave = qimode == V16QImode;
49593 break;
49594
49595 case ASHIFT:
49596 case LSHIFTRT:
49597 uns_p = true;
49598 /* FALLTHRU */
49599 case ASHIFTRT:
49600 op1_l = gen_reg_rtx (himode);
49601 op1_h = gen_reg_rtx (himode);
49602 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49603 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49604 full_interleave = true;
49605 break;
49606 default:
49607 gcc_unreachable ();
49608 }
49609
49610 /* Perform the operation. */
49611 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49612 1, OPTAB_DIRECT);
49613 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49614 1, OPTAB_DIRECT);
49615 gcc_assert (res_l && res_h);
49616
49617 /* Merge the data back into the right place. */
49618 d.target = dest;
49619 d.op0 = gen_lowpart (qimode, res_l);
49620 d.op1 = gen_lowpart (qimode, res_h);
49621 d.vmode = qimode;
49622 d.nelt = GET_MODE_NUNITS (qimode);
49623 d.one_operand_p = false;
49624 d.testing_p = false;
49625
49626 if (full_interleave)
49627 {
49628 /* For SSE2, we used an full interleave, so the desired
49629 results are in the even elements. */
49630 for (i = 0; i < 64; ++i)
49631 d.perm[i] = i * 2;
49632 }
49633 else
49634 {
49635 /* For AVX, the interleave used above was not cross-lane. So the
49636 extraction is evens but with the second and third quarter swapped.
49637 Happily, that is even one insn shorter than even extraction. */
49638 for (i = 0; i < 64; ++i)
49639 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49640 }
49641
49642 ok = ix86_expand_vec_perm_const_1 (&d);
49643 gcc_assert (ok);
49644
49645 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49646 gen_rtx_fmt_ee (code, qimode, op1, op2));
49647 }
49648
49649 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49650 if op is CONST_VECTOR with all odd elements equal to their
49651 preceding element. */
49652
49653 static bool
49654 const_vector_equal_evenodd_p (rtx op)
49655 {
49656 machine_mode mode = GET_MODE (op);
49657 int i, nunits = GET_MODE_NUNITS (mode);
49658 if (GET_CODE (op) != CONST_VECTOR
49659 || nunits != CONST_VECTOR_NUNITS (op))
49660 return false;
49661 for (i = 0; i < nunits; i += 2)
49662 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49663 return false;
49664 return true;
49665 }
49666
49667 void
49668 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49669 bool uns_p, bool odd_p)
49670 {
49671 machine_mode mode = GET_MODE (op1);
49672 machine_mode wmode = GET_MODE (dest);
49673 rtx x;
49674 rtx orig_op1 = op1, orig_op2 = op2;
49675
49676 if (!nonimmediate_operand (op1, mode))
49677 op1 = force_reg (mode, op1);
49678 if (!nonimmediate_operand (op2, mode))
49679 op2 = force_reg (mode, op2);
49680
49681 /* We only play even/odd games with vectors of SImode. */
49682 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49683
49684 /* If we're looking for the odd results, shift those members down to
49685 the even slots. For some cpus this is faster than a PSHUFD. */
49686 if (odd_p)
49687 {
49688 /* For XOP use vpmacsdqh, but only for smult, as it is only
49689 signed. */
49690 if (TARGET_XOP && mode == V4SImode && !uns_p)
49691 {
49692 x = force_reg (wmode, CONST0_RTX (wmode));
49693 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49694 return;
49695 }
49696
49697 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49698 if (!const_vector_equal_evenodd_p (orig_op1))
49699 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49700 x, NULL, 1, OPTAB_DIRECT);
49701 if (!const_vector_equal_evenodd_p (orig_op2))
49702 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49703 x, NULL, 1, OPTAB_DIRECT);
49704 op1 = gen_lowpart (mode, op1);
49705 op2 = gen_lowpart (mode, op2);
49706 }
49707
49708 if (mode == V16SImode)
49709 {
49710 if (uns_p)
49711 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49712 else
49713 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49714 }
49715 else if (mode == V8SImode)
49716 {
49717 if (uns_p)
49718 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49719 else
49720 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49721 }
49722 else if (uns_p)
49723 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49724 else if (TARGET_SSE4_1)
49725 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49726 else
49727 {
49728 rtx s1, s2, t0, t1, t2;
49729
49730 /* The easiest way to implement this without PMULDQ is to go through
49731 the motions as if we are performing a full 64-bit multiply. With
49732 the exception that we need to do less shuffling of the elements. */
49733
49734 /* Compute the sign-extension, aka highparts, of the two operands. */
49735 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49736 op1, pc_rtx, pc_rtx);
49737 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49738 op2, pc_rtx, pc_rtx);
49739
49740 /* Multiply LO(A) * HI(B), and vice-versa. */
49741 t1 = gen_reg_rtx (wmode);
49742 t2 = gen_reg_rtx (wmode);
49743 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49744 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49745
49746 /* Multiply LO(A) * LO(B). */
49747 t0 = gen_reg_rtx (wmode);
49748 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49749
49750 /* Combine and shift the highparts into place. */
49751 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49752 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49753 1, OPTAB_DIRECT);
49754
49755 /* Combine high and low parts. */
49756 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49757 return;
49758 }
49759 emit_insn (x);
49760 }
49761
49762 void
49763 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49764 bool uns_p, bool high_p)
49765 {
49766 machine_mode wmode = GET_MODE (dest);
49767 machine_mode mode = GET_MODE (op1);
49768 rtx t1, t2, t3, t4, mask;
49769
49770 switch (mode)
49771 {
49772 case V4SImode:
49773 t1 = gen_reg_rtx (mode);
49774 t2 = gen_reg_rtx (mode);
49775 if (TARGET_XOP && !uns_p)
49776 {
49777 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49778 shuffle the elements once so that all elements are in the right
49779 place for immediate use: { A C B D }. */
49780 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49781 const1_rtx, GEN_INT (3)));
49782 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49783 const1_rtx, GEN_INT (3)));
49784 }
49785 else
49786 {
49787 /* Put the elements into place for the multiply. */
49788 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49789 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49790 high_p = false;
49791 }
49792 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49793 break;
49794
49795 case V8SImode:
49796 /* Shuffle the elements between the lanes. After this we
49797 have { A B E F | C D G H } for each operand. */
49798 t1 = gen_reg_rtx (V4DImode);
49799 t2 = gen_reg_rtx (V4DImode);
49800 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49801 const0_rtx, const2_rtx,
49802 const1_rtx, GEN_INT (3)));
49803 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49804 const0_rtx, const2_rtx,
49805 const1_rtx, GEN_INT (3)));
49806
49807 /* Shuffle the elements within the lanes. After this we
49808 have { A A B B | C C D D } or { E E F F | G G H H }. */
49809 t3 = gen_reg_rtx (V8SImode);
49810 t4 = gen_reg_rtx (V8SImode);
49811 mask = GEN_INT (high_p
49812 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49813 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49814 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49815 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49816
49817 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49818 break;
49819
49820 case V8HImode:
49821 case V16HImode:
49822 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49823 uns_p, OPTAB_DIRECT);
49824 t2 = expand_binop (mode,
49825 uns_p ? umul_highpart_optab : smul_highpart_optab,
49826 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49827 gcc_assert (t1 && t2);
49828
49829 t3 = gen_reg_rtx (mode);
49830 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49831 emit_move_insn (dest, gen_lowpart (wmode, t3));
49832 break;
49833
49834 case V16QImode:
49835 case V32QImode:
49836 case V32HImode:
49837 case V16SImode:
49838 case V64QImode:
49839 t1 = gen_reg_rtx (wmode);
49840 t2 = gen_reg_rtx (wmode);
49841 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49842 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49843
49844 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49845 break;
49846
49847 default:
49848 gcc_unreachable ();
49849 }
49850 }
49851
49852 void
49853 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49854 {
49855 rtx res_1, res_2, res_3, res_4;
49856
49857 res_1 = gen_reg_rtx (V4SImode);
49858 res_2 = gen_reg_rtx (V4SImode);
49859 res_3 = gen_reg_rtx (V2DImode);
49860 res_4 = gen_reg_rtx (V2DImode);
49861 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49862 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49863
49864 /* Move the results in element 2 down to element 1; we don't care
49865 what goes in elements 2 and 3. Then we can merge the parts
49866 back together with an interleave.
49867
49868 Note that two other sequences were tried:
49869 (1) Use interleaves at the start instead of psrldq, which allows
49870 us to use a single shufps to merge things back at the end.
49871 (2) Use shufps here to combine the two vectors, then pshufd to
49872 put the elements in the correct order.
49873 In both cases the cost of the reformatting stall was too high
49874 and the overall sequence slower. */
49875
49876 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49877 const0_rtx, const2_rtx,
49878 const0_rtx, const0_rtx));
49879 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49880 const0_rtx, const2_rtx,
49881 const0_rtx, const0_rtx));
49882 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49883
49884 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49885 }
49886
49887 void
49888 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49889 {
49890 machine_mode mode = GET_MODE (op0);
49891 rtx t1, t2, t3, t4, t5, t6;
49892
49893 if (TARGET_AVX512DQ && mode == V8DImode)
49894 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49895 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49896 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49897 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49898 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49899 else if (TARGET_XOP && mode == V2DImode)
49900 {
49901 /* op1: A,B,C,D, op2: E,F,G,H */
49902 op1 = gen_lowpart (V4SImode, op1);
49903 op2 = gen_lowpart (V4SImode, op2);
49904
49905 t1 = gen_reg_rtx (V4SImode);
49906 t2 = gen_reg_rtx (V4SImode);
49907 t3 = gen_reg_rtx (V2DImode);
49908 t4 = gen_reg_rtx (V2DImode);
49909
49910 /* t1: B,A,D,C */
49911 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49912 GEN_INT (1),
49913 GEN_INT (0),
49914 GEN_INT (3),
49915 GEN_INT (2)));
49916
49917 /* t2: (B*E),(A*F),(D*G),(C*H) */
49918 emit_insn (gen_mulv4si3 (t2, t1, op2));
49919
49920 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49921 emit_insn (gen_xop_phadddq (t3, t2));
49922
49923 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49924 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49925
49926 /* Multiply lower parts and add all */
49927 t5 = gen_reg_rtx (V2DImode);
49928 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49929 gen_lowpart (V4SImode, op1),
49930 gen_lowpart (V4SImode, op2)));
49931 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49932
49933 }
49934 else
49935 {
49936 machine_mode nmode;
49937 rtx (*umul) (rtx, rtx, rtx);
49938
49939 if (mode == V2DImode)
49940 {
49941 umul = gen_vec_widen_umult_even_v4si;
49942 nmode = V4SImode;
49943 }
49944 else if (mode == V4DImode)
49945 {
49946 umul = gen_vec_widen_umult_even_v8si;
49947 nmode = V8SImode;
49948 }
49949 else if (mode == V8DImode)
49950 {
49951 umul = gen_vec_widen_umult_even_v16si;
49952 nmode = V16SImode;
49953 }
49954 else
49955 gcc_unreachable ();
49956
49957
49958 /* Multiply low parts. */
49959 t1 = gen_reg_rtx (mode);
49960 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49961
49962 /* Shift input vectors right 32 bits so we can multiply high parts. */
49963 t6 = GEN_INT (32);
49964 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49965 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49966
49967 /* Multiply high parts by low parts. */
49968 t4 = gen_reg_rtx (mode);
49969 t5 = gen_reg_rtx (mode);
49970 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49971 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49972
49973 /* Combine and shift the highparts back. */
49974 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49975 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49976
49977 /* Combine high and low parts. */
49978 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49979 }
49980
49981 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49982 gen_rtx_MULT (mode, op1, op2));
49983 }
49984
49985 /* Return 1 if control tansfer instruction INSN
49986 should be encoded with bnd prefix.
49987 If insn is NULL then return 1 when control
49988 transfer instructions should be prefixed with
49989 bnd by default for current function. */
49990
49991 bool
49992 ix86_bnd_prefixed_insn_p (rtx insn)
49993 {
49994 /* For call insns check special flag. */
49995 if (insn && CALL_P (insn))
49996 {
49997 rtx call = get_call_rtx_from (insn);
49998 if (call)
49999 return CALL_EXPR_WITH_BOUNDS_P (call);
50000 }
50001
50002 /* All other insns are prefixed only if function is instrumented. */
50003 return chkp_function_instrumented_p (current_function_decl);
50004 }
50005
50006 /* Calculate integer abs() using only SSE2 instructions. */
50007
50008 void
50009 ix86_expand_sse2_abs (rtx target, rtx input)
50010 {
50011 machine_mode mode = GET_MODE (target);
50012 rtx tmp0, tmp1, x;
50013
50014 switch (mode)
50015 {
50016 /* For 32-bit signed integer X, the best way to calculate the absolute
50017 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50018 case V4SImode:
50019 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50020 GEN_INT (GET_MODE_BITSIZE
50021 (GET_MODE_INNER (mode)) - 1),
50022 NULL, 0, OPTAB_DIRECT);
50023 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50024 NULL, 0, OPTAB_DIRECT);
50025 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50026 target, 0, OPTAB_DIRECT);
50027 break;
50028
50029 /* For 16-bit signed integer X, the best way to calculate the absolute
50030 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50031 case V8HImode:
50032 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50033
50034 x = expand_simple_binop (mode, SMAX, tmp0, input,
50035 target, 0, OPTAB_DIRECT);
50036 break;
50037
50038 /* For 8-bit signed integer X, the best way to calculate the absolute
50039 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50040 as SSE2 provides the PMINUB insn. */
50041 case V16QImode:
50042 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50043
50044 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50045 target, 0, OPTAB_DIRECT);
50046 break;
50047
50048 default:
50049 gcc_unreachable ();
50050 }
50051
50052 if (x != target)
50053 emit_move_insn (target, x);
50054 }
50055
50056 /* Expand an insert into a vector register through pinsr insn.
50057 Return true if successful. */
50058
50059 bool
50060 ix86_expand_pinsr (rtx *operands)
50061 {
50062 rtx dst = operands[0];
50063 rtx src = operands[3];
50064
50065 unsigned int size = INTVAL (operands[1]);
50066 unsigned int pos = INTVAL (operands[2]);
50067
50068 if (GET_CODE (dst) == SUBREG)
50069 {
50070 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50071 dst = SUBREG_REG (dst);
50072 }
50073
50074 if (GET_CODE (src) == SUBREG)
50075 src = SUBREG_REG (src);
50076
50077 switch (GET_MODE (dst))
50078 {
50079 case V16QImode:
50080 case V8HImode:
50081 case V4SImode:
50082 case V2DImode:
50083 {
50084 machine_mode srcmode, dstmode;
50085 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50086
50087 srcmode = mode_for_size (size, MODE_INT, 0);
50088
50089 switch (srcmode)
50090 {
50091 case QImode:
50092 if (!TARGET_SSE4_1)
50093 return false;
50094 dstmode = V16QImode;
50095 pinsr = gen_sse4_1_pinsrb;
50096 break;
50097
50098 case HImode:
50099 if (!TARGET_SSE2)
50100 return false;
50101 dstmode = V8HImode;
50102 pinsr = gen_sse2_pinsrw;
50103 break;
50104
50105 case SImode:
50106 if (!TARGET_SSE4_1)
50107 return false;
50108 dstmode = V4SImode;
50109 pinsr = gen_sse4_1_pinsrd;
50110 break;
50111
50112 case DImode:
50113 gcc_assert (TARGET_64BIT);
50114 if (!TARGET_SSE4_1)
50115 return false;
50116 dstmode = V2DImode;
50117 pinsr = gen_sse4_1_pinsrq;
50118 break;
50119
50120 default:
50121 return false;
50122 }
50123
50124 rtx d = dst;
50125 if (GET_MODE (dst) != dstmode)
50126 d = gen_reg_rtx (dstmode);
50127 src = gen_lowpart (srcmode, src);
50128
50129 pos /= size;
50130
50131 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50132 GEN_INT (1 << pos)));
50133 if (d != dst)
50134 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50135 return true;
50136 }
50137
50138 default:
50139 return false;
50140 }
50141 }
50142 \f
50143 /* This function returns the calling abi specific va_list type node.
50144 It returns the FNDECL specific va_list type. */
50145
50146 static tree
50147 ix86_fn_abi_va_list (tree fndecl)
50148 {
50149 if (!TARGET_64BIT)
50150 return va_list_type_node;
50151 gcc_assert (fndecl != NULL_TREE);
50152
50153 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50154 return ms_va_list_type_node;
50155 else
50156 return sysv_va_list_type_node;
50157 }
50158
50159 /* Returns the canonical va_list type specified by TYPE. If there
50160 is no valid TYPE provided, it return NULL_TREE. */
50161
50162 static tree
50163 ix86_canonical_va_list_type (tree type)
50164 {
50165 tree wtype, htype;
50166
50167 /* Resolve references and pointers to va_list type. */
50168 if (TREE_CODE (type) == MEM_REF)
50169 type = TREE_TYPE (type);
50170 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50171 type = TREE_TYPE (type);
50172 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50173 type = TREE_TYPE (type);
50174
50175 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50176 {
50177 wtype = va_list_type_node;
50178 gcc_assert (wtype != NULL_TREE);
50179 htype = type;
50180 if (TREE_CODE (wtype) == ARRAY_TYPE)
50181 {
50182 /* If va_list is an array type, the argument may have decayed
50183 to a pointer type, e.g. by being passed to another function.
50184 In that case, unwrap both types so that we can compare the
50185 underlying records. */
50186 if (TREE_CODE (htype) == ARRAY_TYPE
50187 || POINTER_TYPE_P (htype))
50188 {
50189 wtype = TREE_TYPE (wtype);
50190 htype = TREE_TYPE (htype);
50191 }
50192 }
50193 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50194 return va_list_type_node;
50195 wtype = sysv_va_list_type_node;
50196 gcc_assert (wtype != NULL_TREE);
50197 htype = type;
50198 if (TREE_CODE (wtype) == ARRAY_TYPE)
50199 {
50200 /* If va_list is an array type, the argument may have decayed
50201 to a pointer type, e.g. by being passed to another function.
50202 In that case, unwrap both types so that we can compare the
50203 underlying records. */
50204 if (TREE_CODE (htype) == ARRAY_TYPE
50205 || POINTER_TYPE_P (htype))
50206 {
50207 wtype = TREE_TYPE (wtype);
50208 htype = TREE_TYPE (htype);
50209 }
50210 }
50211 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50212 return sysv_va_list_type_node;
50213 wtype = ms_va_list_type_node;
50214 gcc_assert (wtype != NULL_TREE);
50215 htype = type;
50216 if (TREE_CODE (wtype) == ARRAY_TYPE)
50217 {
50218 /* If va_list is an array type, the argument may have decayed
50219 to a pointer type, e.g. by being passed to another function.
50220 In that case, unwrap both types so that we can compare the
50221 underlying records. */
50222 if (TREE_CODE (htype) == ARRAY_TYPE
50223 || POINTER_TYPE_P (htype))
50224 {
50225 wtype = TREE_TYPE (wtype);
50226 htype = TREE_TYPE (htype);
50227 }
50228 }
50229 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50230 return ms_va_list_type_node;
50231 return NULL_TREE;
50232 }
50233 return std_canonical_va_list_type (type);
50234 }
50235
50236 /* Iterate through the target-specific builtin types for va_list.
50237 IDX denotes the iterator, *PTREE is set to the result type of
50238 the va_list builtin, and *PNAME to its internal type.
50239 Returns zero if there is no element for this index, otherwise
50240 IDX should be increased upon the next call.
50241 Note, do not iterate a base builtin's name like __builtin_va_list.
50242 Used from c_common_nodes_and_builtins. */
50243
50244 static int
50245 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50246 {
50247 if (TARGET_64BIT)
50248 {
50249 switch (idx)
50250 {
50251 default:
50252 break;
50253
50254 case 0:
50255 *ptree = ms_va_list_type_node;
50256 *pname = "__builtin_ms_va_list";
50257 return 1;
50258
50259 case 1:
50260 *ptree = sysv_va_list_type_node;
50261 *pname = "__builtin_sysv_va_list";
50262 return 1;
50263 }
50264 }
50265
50266 return 0;
50267 }
50268
50269 #undef TARGET_SCHED_DISPATCH
50270 #define TARGET_SCHED_DISPATCH has_dispatch
50271 #undef TARGET_SCHED_DISPATCH_DO
50272 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50273 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50274 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50275 #undef TARGET_SCHED_REORDER
50276 #define TARGET_SCHED_REORDER ix86_sched_reorder
50277 #undef TARGET_SCHED_ADJUST_PRIORITY
50278 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50279 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50280 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50281 ix86_dependencies_evaluation_hook
50282
50283 /* The size of the dispatch window is the total number of bytes of
50284 object code allowed in a window. */
50285 #define DISPATCH_WINDOW_SIZE 16
50286
50287 /* Number of dispatch windows considered for scheduling. */
50288 #define MAX_DISPATCH_WINDOWS 3
50289
50290 /* Maximum number of instructions in a window. */
50291 #define MAX_INSN 4
50292
50293 /* Maximum number of immediate operands in a window. */
50294 #define MAX_IMM 4
50295
50296 /* Maximum number of immediate bits allowed in a window. */
50297 #define MAX_IMM_SIZE 128
50298
50299 /* Maximum number of 32 bit immediates allowed in a window. */
50300 #define MAX_IMM_32 4
50301
50302 /* Maximum number of 64 bit immediates allowed in a window. */
50303 #define MAX_IMM_64 2
50304
50305 /* Maximum total of loads or prefetches allowed in a window. */
50306 #define MAX_LOAD 2
50307
50308 /* Maximum total of stores allowed in a window. */
50309 #define MAX_STORE 1
50310
50311 #undef BIG
50312 #define BIG 100
50313
50314
50315 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50316 enum dispatch_group {
50317 disp_no_group = 0,
50318 disp_load,
50319 disp_store,
50320 disp_load_store,
50321 disp_prefetch,
50322 disp_imm,
50323 disp_imm_32,
50324 disp_imm_64,
50325 disp_branch,
50326 disp_cmp,
50327 disp_jcc,
50328 disp_last
50329 };
50330
50331 /* Number of allowable groups in a dispatch window. It is an array
50332 indexed by dispatch_group enum. 100 is used as a big number,
50333 because the number of these kind of operations does not have any
50334 effect in dispatch window, but we need them for other reasons in
50335 the table. */
50336 static unsigned int num_allowable_groups[disp_last] = {
50337 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50338 };
50339
50340 char group_name[disp_last + 1][16] = {
50341 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50342 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50343 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50344 };
50345
50346 /* Instruction path. */
50347 enum insn_path {
50348 no_path = 0,
50349 path_single, /* Single micro op. */
50350 path_double, /* Double micro op. */
50351 path_multi, /* Instructions with more than 2 micro op.. */
50352 last_path
50353 };
50354
50355 /* sched_insn_info defines a window to the instructions scheduled in
50356 the basic block. It contains a pointer to the insn_info table and
50357 the instruction scheduled.
50358
50359 Windows are allocated for each basic block and are linked
50360 together. */
50361 typedef struct sched_insn_info_s {
50362 rtx insn;
50363 enum dispatch_group group;
50364 enum insn_path path;
50365 int byte_len;
50366 int imm_bytes;
50367 } sched_insn_info;
50368
50369 /* Linked list of dispatch windows. This is a two way list of
50370 dispatch windows of a basic block. It contains information about
50371 the number of uops in the window and the total number of
50372 instructions and of bytes in the object code for this dispatch
50373 window. */
50374 typedef struct dispatch_windows_s {
50375 int num_insn; /* Number of insn in the window. */
50376 int num_uops; /* Number of uops in the window. */
50377 int window_size; /* Number of bytes in the window. */
50378 int window_num; /* Window number between 0 or 1. */
50379 int num_imm; /* Number of immediates in an insn. */
50380 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50381 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50382 int imm_size; /* Total immediates in the window. */
50383 int num_loads; /* Total memory loads in the window. */
50384 int num_stores; /* Total memory stores in the window. */
50385 int violation; /* Violation exists in window. */
50386 sched_insn_info *window; /* Pointer to the window. */
50387 struct dispatch_windows_s *next;
50388 struct dispatch_windows_s *prev;
50389 } dispatch_windows;
50390
50391 /* Immediate valuse used in an insn. */
50392 typedef struct imm_info_s
50393 {
50394 int imm;
50395 int imm32;
50396 int imm64;
50397 } imm_info;
50398
50399 static dispatch_windows *dispatch_window_list;
50400 static dispatch_windows *dispatch_window_list1;
50401
50402 /* Get dispatch group of insn. */
50403
50404 static enum dispatch_group
50405 get_mem_group (rtx_insn *insn)
50406 {
50407 enum attr_memory memory;
50408
50409 if (INSN_CODE (insn) < 0)
50410 return disp_no_group;
50411 memory = get_attr_memory (insn);
50412 if (memory == MEMORY_STORE)
50413 return disp_store;
50414
50415 if (memory == MEMORY_LOAD)
50416 return disp_load;
50417
50418 if (memory == MEMORY_BOTH)
50419 return disp_load_store;
50420
50421 return disp_no_group;
50422 }
50423
50424 /* Return true if insn is a compare instruction. */
50425
50426 static bool
50427 is_cmp (rtx_insn *insn)
50428 {
50429 enum attr_type type;
50430
50431 type = get_attr_type (insn);
50432 return (type == TYPE_TEST
50433 || type == TYPE_ICMP
50434 || type == TYPE_FCMP
50435 || GET_CODE (PATTERN (insn)) == COMPARE);
50436 }
50437
50438 /* Return true if a dispatch violation encountered. */
50439
50440 static bool
50441 dispatch_violation (void)
50442 {
50443 if (dispatch_window_list->next)
50444 return dispatch_window_list->next->violation;
50445 return dispatch_window_list->violation;
50446 }
50447
50448 /* Return true if insn is a branch instruction. */
50449
50450 static bool
50451 is_branch (rtx insn)
50452 {
50453 return (CALL_P (insn) || JUMP_P (insn));
50454 }
50455
50456 /* Return true if insn is a prefetch instruction. */
50457
50458 static bool
50459 is_prefetch (rtx insn)
50460 {
50461 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50462 }
50463
50464 /* This function initializes a dispatch window and the list container holding a
50465 pointer to the window. */
50466
50467 static void
50468 init_window (int window_num)
50469 {
50470 int i;
50471 dispatch_windows *new_list;
50472
50473 if (window_num == 0)
50474 new_list = dispatch_window_list;
50475 else
50476 new_list = dispatch_window_list1;
50477
50478 new_list->num_insn = 0;
50479 new_list->num_uops = 0;
50480 new_list->window_size = 0;
50481 new_list->next = NULL;
50482 new_list->prev = NULL;
50483 new_list->window_num = window_num;
50484 new_list->num_imm = 0;
50485 new_list->num_imm_32 = 0;
50486 new_list->num_imm_64 = 0;
50487 new_list->imm_size = 0;
50488 new_list->num_loads = 0;
50489 new_list->num_stores = 0;
50490 new_list->violation = false;
50491
50492 for (i = 0; i < MAX_INSN; i++)
50493 {
50494 new_list->window[i].insn = NULL;
50495 new_list->window[i].group = disp_no_group;
50496 new_list->window[i].path = no_path;
50497 new_list->window[i].byte_len = 0;
50498 new_list->window[i].imm_bytes = 0;
50499 }
50500 return;
50501 }
50502
50503 /* This function allocates and initializes a dispatch window and the
50504 list container holding a pointer to the window. */
50505
50506 static dispatch_windows *
50507 allocate_window (void)
50508 {
50509 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50510 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50511
50512 return new_list;
50513 }
50514
50515 /* This routine initializes the dispatch scheduling information. It
50516 initiates building dispatch scheduler tables and constructs the
50517 first dispatch window. */
50518
50519 static void
50520 init_dispatch_sched (void)
50521 {
50522 /* Allocate a dispatch list and a window. */
50523 dispatch_window_list = allocate_window ();
50524 dispatch_window_list1 = allocate_window ();
50525 init_window (0);
50526 init_window (1);
50527 }
50528
50529 /* This function returns true if a branch is detected. End of a basic block
50530 does not have to be a branch, but here we assume only branches end a
50531 window. */
50532
50533 static bool
50534 is_end_basic_block (enum dispatch_group group)
50535 {
50536 return group == disp_branch;
50537 }
50538
50539 /* This function is called when the end of a window processing is reached. */
50540
50541 static void
50542 process_end_window (void)
50543 {
50544 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50545 if (dispatch_window_list->next)
50546 {
50547 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50548 gcc_assert (dispatch_window_list->window_size
50549 + dispatch_window_list1->window_size <= 48);
50550 init_window (1);
50551 }
50552 init_window (0);
50553 }
50554
50555 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50556 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50557 for 48 bytes of instructions. Note that these windows are not dispatch
50558 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50559
50560 static dispatch_windows *
50561 allocate_next_window (int window_num)
50562 {
50563 if (window_num == 0)
50564 {
50565 if (dispatch_window_list->next)
50566 init_window (1);
50567 init_window (0);
50568 return dispatch_window_list;
50569 }
50570
50571 dispatch_window_list->next = dispatch_window_list1;
50572 dispatch_window_list1->prev = dispatch_window_list;
50573
50574 return dispatch_window_list1;
50575 }
50576
50577 /* Compute number of immediate operands of an instruction. */
50578
50579 static void
50580 find_constant (rtx in_rtx, imm_info *imm_values)
50581 {
50582 if (INSN_P (in_rtx))
50583 in_rtx = PATTERN (in_rtx);
50584 subrtx_iterator::array_type array;
50585 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50586 if (const_rtx x = *iter)
50587 switch (GET_CODE (x))
50588 {
50589 case CONST:
50590 case SYMBOL_REF:
50591 case CONST_INT:
50592 (imm_values->imm)++;
50593 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50594 (imm_values->imm32)++;
50595 else
50596 (imm_values->imm64)++;
50597 break;
50598
50599 case CONST_DOUBLE:
50600 (imm_values->imm)++;
50601 (imm_values->imm64)++;
50602 break;
50603
50604 case CODE_LABEL:
50605 if (LABEL_KIND (x) == LABEL_NORMAL)
50606 {
50607 (imm_values->imm)++;
50608 (imm_values->imm32)++;
50609 }
50610 break;
50611
50612 default:
50613 break;
50614 }
50615 }
50616
50617 /* Return total size of immediate operands of an instruction along with number
50618 of corresponding immediate-operands. It initializes its parameters to zero
50619 befor calling FIND_CONSTANT.
50620 INSN is the input instruction. IMM is the total of immediates.
50621 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50622 bit immediates. */
50623
50624 static int
50625 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
50626 {
50627 imm_info imm_values = {0, 0, 0};
50628
50629 find_constant (insn, &imm_values);
50630 *imm = imm_values.imm;
50631 *imm32 = imm_values.imm32;
50632 *imm64 = imm_values.imm64;
50633 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50634 }
50635
50636 /* This function indicates if an operand of an instruction is an
50637 immediate. */
50638
50639 static bool
50640 has_immediate (rtx insn)
50641 {
50642 int num_imm_operand;
50643 int num_imm32_operand;
50644 int num_imm64_operand;
50645
50646 if (insn)
50647 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50648 &num_imm64_operand);
50649 return false;
50650 }
50651
50652 /* Return single or double path for instructions. */
50653
50654 static enum insn_path
50655 get_insn_path (rtx_insn *insn)
50656 {
50657 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50658
50659 if ((int)path == 0)
50660 return path_single;
50661
50662 if ((int)path == 1)
50663 return path_double;
50664
50665 return path_multi;
50666 }
50667
50668 /* Return insn dispatch group. */
50669
50670 static enum dispatch_group
50671 get_insn_group (rtx_insn *insn)
50672 {
50673 enum dispatch_group group = get_mem_group (insn);
50674 if (group)
50675 return group;
50676
50677 if (is_branch (insn))
50678 return disp_branch;
50679
50680 if (is_cmp (insn))
50681 return disp_cmp;
50682
50683 if (has_immediate (insn))
50684 return disp_imm;
50685
50686 if (is_prefetch (insn))
50687 return disp_prefetch;
50688
50689 return disp_no_group;
50690 }
50691
50692 /* Count number of GROUP restricted instructions in a dispatch
50693 window WINDOW_LIST. */
50694
50695 static int
50696 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50697 {
50698 enum dispatch_group group = get_insn_group (insn);
50699 int imm_size;
50700 int num_imm_operand;
50701 int num_imm32_operand;
50702 int num_imm64_operand;
50703
50704 if (group == disp_no_group)
50705 return 0;
50706
50707 if (group == disp_imm)
50708 {
50709 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50710 &num_imm64_operand);
50711 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50712 || num_imm_operand + window_list->num_imm > MAX_IMM
50713 || (num_imm32_operand > 0
50714 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50715 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50716 || (num_imm64_operand > 0
50717 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50718 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50719 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50720 && num_imm64_operand > 0
50721 && ((window_list->num_imm_64 > 0
50722 && window_list->num_insn >= 2)
50723 || window_list->num_insn >= 3)))
50724 return BIG;
50725
50726 return 1;
50727 }
50728
50729 if ((group == disp_load_store
50730 && (window_list->num_loads >= MAX_LOAD
50731 || window_list->num_stores >= MAX_STORE))
50732 || ((group == disp_load
50733 || group == disp_prefetch)
50734 && window_list->num_loads >= MAX_LOAD)
50735 || (group == disp_store
50736 && window_list->num_stores >= MAX_STORE))
50737 return BIG;
50738
50739 return 1;
50740 }
50741
50742 /* This function returns true if insn satisfies dispatch rules on the
50743 last window scheduled. */
50744
50745 static bool
50746 fits_dispatch_window (rtx_insn *insn)
50747 {
50748 dispatch_windows *window_list = dispatch_window_list;
50749 dispatch_windows *window_list_next = dispatch_window_list->next;
50750 unsigned int num_restrict;
50751 enum dispatch_group group = get_insn_group (insn);
50752 enum insn_path path = get_insn_path (insn);
50753 int sum;
50754
50755 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50756 instructions should be given the lowest priority in the
50757 scheduling process in Haifa scheduler to make sure they will be
50758 scheduled in the same dispatch window as the reference to them. */
50759 if (group == disp_jcc || group == disp_cmp)
50760 return false;
50761
50762 /* Check nonrestricted. */
50763 if (group == disp_no_group || group == disp_branch)
50764 return true;
50765
50766 /* Get last dispatch window. */
50767 if (window_list_next)
50768 window_list = window_list_next;
50769
50770 if (window_list->window_num == 1)
50771 {
50772 sum = window_list->prev->window_size + window_list->window_size;
50773
50774 if (sum == 32
50775 || (min_insn_size (insn) + sum) >= 48)
50776 /* Window 1 is full. Go for next window. */
50777 return true;
50778 }
50779
50780 num_restrict = count_num_restricted (insn, window_list);
50781
50782 if (num_restrict > num_allowable_groups[group])
50783 return false;
50784
50785 /* See if it fits in the first window. */
50786 if (window_list->window_num == 0)
50787 {
50788 /* The first widow should have only single and double path
50789 uops. */
50790 if (path == path_double
50791 && (window_list->num_uops + 2) > MAX_INSN)
50792 return false;
50793 else if (path != path_single)
50794 return false;
50795 }
50796 return true;
50797 }
50798
50799 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50800 dispatch window WINDOW_LIST. */
50801
50802 static void
50803 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50804 {
50805 int byte_len = min_insn_size (insn);
50806 int num_insn = window_list->num_insn;
50807 int imm_size;
50808 sched_insn_info *window = window_list->window;
50809 enum dispatch_group group = get_insn_group (insn);
50810 enum insn_path path = get_insn_path (insn);
50811 int num_imm_operand;
50812 int num_imm32_operand;
50813 int num_imm64_operand;
50814
50815 if (!window_list->violation && group != disp_cmp
50816 && !fits_dispatch_window (insn))
50817 window_list->violation = true;
50818
50819 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50820 &num_imm64_operand);
50821
50822 /* Initialize window with new instruction. */
50823 window[num_insn].insn = insn;
50824 window[num_insn].byte_len = byte_len;
50825 window[num_insn].group = group;
50826 window[num_insn].path = path;
50827 window[num_insn].imm_bytes = imm_size;
50828
50829 window_list->window_size += byte_len;
50830 window_list->num_insn = num_insn + 1;
50831 window_list->num_uops = window_list->num_uops + num_uops;
50832 window_list->imm_size += imm_size;
50833 window_list->num_imm += num_imm_operand;
50834 window_list->num_imm_32 += num_imm32_operand;
50835 window_list->num_imm_64 += num_imm64_operand;
50836
50837 if (group == disp_store)
50838 window_list->num_stores += 1;
50839 else if (group == disp_load
50840 || group == disp_prefetch)
50841 window_list->num_loads += 1;
50842 else if (group == disp_load_store)
50843 {
50844 window_list->num_stores += 1;
50845 window_list->num_loads += 1;
50846 }
50847 }
50848
50849 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50850 If the total bytes of instructions or the number of instructions in
50851 the window exceed allowable, it allocates a new window. */
50852
50853 static void
50854 add_to_dispatch_window (rtx_insn *insn)
50855 {
50856 int byte_len;
50857 dispatch_windows *window_list;
50858 dispatch_windows *next_list;
50859 dispatch_windows *window0_list;
50860 enum insn_path path;
50861 enum dispatch_group insn_group;
50862 bool insn_fits;
50863 int num_insn;
50864 int num_uops;
50865 int window_num;
50866 int insn_num_uops;
50867 int sum;
50868
50869 if (INSN_CODE (insn) < 0)
50870 return;
50871
50872 byte_len = min_insn_size (insn);
50873 window_list = dispatch_window_list;
50874 next_list = window_list->next;
50875 path = get_insn_path (insn);
50876 insn_group = get_insn_group (insn);
50877
50878 /* Get the last dispatch window. */
50879 if (next_list)
50880 window_list = dispatch_window_list->next;
50881
50882 if (path == path_single)
50883 insn_num_uops = 1;
50884 else if (path == path_double)
50885 insn_num_uops = 2;
50886 else
50887 insn_num_uops = (int) path;
50888
50889 /* If current window is full, get a new window.
50890 Window number zero is full, if MAX_INSN uops are scheduled in it.
50891 Window number one is full, if window zero's bytes plus window
50892 one's bytes is 32, or if the bytes of the new instruction added
50893 to the total makes it greater than 48, or it has already MAX_INSN
50894 instructions in it. */
50895 num_insn = window_list->num_insn;
50896 num_uops = window_list->num_uops;
50897 window_num = window_list->window_num;
50898 insn_fits = fits_dispatch_window (insn);
50899
50900 if (num_insn >= MAX_INSN
50901 || num_uops + insn_num_uops > MAX_INSN
50902 || !(insn_fits))
50903 {
50904 window_num = ~window_num & 1;
50905 window_list = allocate_next_window (window_num);
50906 }
50907
50908 if (window_num == 0)
50909 {
50910 add_insn_window (insn, window_list, insn_num_uops);
50911 if (window_list->num_insn >= MAX_INSN
50912 && insn_group == disp_branch)
50913 {
50914 process_end_window ();
50915 return;
50916 }
50917 }
50918 else if (window_num == 1)
50919 {
50920 window0_list = window_list->prev;
50921 sum = window0_list->window_size + window_list->window_size;
50922 if (sum == 32
50923 || (byte_len + sum) >= 48)
50924 {
50925 process_end_window ();
50926 window_list = dispatch_window_list;
50927 }
50928
50929 add_insn_window (insn, window_list, insn_num_uops);
50930 }
50931 else
50932 gcc_unreachable ();
50933
50934 if (is_end_basic_block (insn_group))
50935 {
50936 /* End of basic block is reached do end-basic-block process. */
50937 process_end_window ();
50938 return;
50939 }
50940 }
50941
50942 /* Print the dispatch window, WINDOW_NUM, to FILE. */
50943
50944 DEBUG_FUNCTION static void
50945 debug_dispatch_window_file (FILE *file, int window_num)
50946 {
50947 dispatch_windows *list;
50948 int i;
50949
50950 if (window_num == 0)
50951 list = dispatch_window_list;
50952 else
50953 list = dispatch_window_list1;
50954
50955 fprintf (file, "Window #%d:\n", list->window_num);
50956 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
50957 list->num_insn, list->num_uops, list->window_size);
50958 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50959 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
50960
50961 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
50962 list->num_stores);
50963 fprintf (file, " insn info:\n");
50964
50965 for (i = 0; i < MAX_INSN; i++)
50966 {
50967 if (!list->window[i].insn)
50968 break;
50969 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
50970 i, group_name[list->window[i].group],
50971 i, (void *)list->window[i].insn,
50972 i, list->window[i].path,
50973 i, list->window[i].byte_len,
50974 i, list->window[i].imm_bytes);
50975 }
50976 }
50977
50978 /* Print to stdout a dispatch window. */
50979
50980 DEBUG_FUNCTION void
50981 debug_dispatch_window (int window_num)
50982 {
50983 debug_dispatch_window_file (stdout, window_num);
50984 }
50985
50986 /* Print INSN dispatch information to FILE. */
50987
50988 DEBUG_FUNCTION static void
50989 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
50990 {
50991 int byte_len;
50992 enum insn_path path;
50993 enum dispatch_group group;
50994 int imm_size;
50995 int num_imm_operand;
50996 int num_imm32_operand;
50997 int num_imm64_operand;
50998
50999 if (INSN_CODE (insn) < 0)
51000 return;
51001
51002 byte_len = min_insn_size (insn);
51003 path = get_insn_path (insn);
51004 group = get_insn_group (insn);
51005 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51006 &num_imm64_operand);
51007
51008 fprintf (file, " insn info:\n");
51009 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51010 group_name[group], path, byte_len);
51011 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51012 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51013 }
51014
51015 /* Print to STDERR the status of the ready list with respect to
51016 dispatch windows. */
51017
51018 DEBUG_FUNCTION void
51019 debug_ready_dispatch (void)
51020 {
51021 int i;
51022 int no_ready = number_in_ready ();
51023
51024 fprintf (stdout, "Number of ready: %d\n", no_ready);
51025
51026 for (i = 0; i < no_ready; i++)
51027 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51028 }
51029
51030 /* This routine is the driver of the dispatch scheduler. */
51031
51032 static void
51033 do_dispatch (rtx_insn *insn, int mode)
51034 {
51035 if (mode == DISPATCH_INIT)
51036 init_dispatch_sched ();
51037 else if (mode == ADD_TO_DISPATCH_WINDOW)
51038 add_to_dispatch_window (insn);
51039 }
51040
51041 /* Return TRUE if Dispatch Scheduling is supported. */
51042
51043 static bool
51044 has_dispatch (rtx_insn *insn, int action)
51045 {
51046 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51047 && flag_dispatch_scheduler)
51048 switch (action)
51049 {
51050 default:
51051 return false;
51052
51053 case IS_DISPATCH_ON:
51054 return true;
51055 break;
51056
51057 case IS_CMP:
51058 return is_cmp (insn);
51059
51060 case DISPATCH_VIOLATION:
51061 return dispatch_violation ();
51062
51063 case FITS_DISPATCH_WINDOW:
51064 return fits_dispatch_window (insn);
51065 }
51066
51067 return false;
51068 }
51069
51070 /* Implementation of reassociation_width target hook used by
51071 reassoc phase to identify parallelism level in reassociated
51072 tree. Statements tree_code is passed in OPC. Arguments type
51073 is passed in MODE.
51074
51075 Currently parallel reassociation is enabled for Atom
51076 processors only and we set reassociation width to be 2
51077 because Atom may issue up to 2 instructions per cycle.
51078
51079 Return value should be fixed if parallel reassociation is
51080 enabled for other processors. */
51081
51082 static int
51083 ix86_reassociation_width (unsigned int, machine_mode mode)
51084 {
51085 /* Vector part. */
51086 if (VECTOR_MODE_P (mode))
51087 {
51088 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51089 return 2;
51090 else
51091 return 1;
51092 }
51093
51094 /* Scalar part. */
51095 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51096 return 2;
51097 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51098 return 2;
51099 else
51100 return 1;
51101 }
51102
51103 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51104 place emms and femms instructions. */
51105
51106 static machine_mode
51107 ix86_preferred_simd_mode (machine_mode mode)
51108 {
51109 if (!TARGET_SSE)
51110 return word_mode;
51111
51112 switch (mode)
51113 {
51114 case QImode:
51115 return TARGET_AVX512BW ? V64QImode :
51116 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51117 case HImode:
51118 return TARGET_AVX512BW ? V32HImode :
51119 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51120 case SImode:
51121 return TARGET_AVX512F ? V16SImode :
51122 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51123 case DImode:
51124 return TARGET_AVX512F ? V8DImode :
51125 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51126
51127 case SFmode:
51128 if (TARGET_AVX512F)
51129 return V16SFmode;
51130 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51131 return V8SFmode;
51132 else
51133 return V4SFmode;
51134
51135 case DFmode:
51136 if (!TARGET_VECTORIZE_DOUBLE)
51137 return word_mode;
51138 else if (TARGET_AVX512F)
51139 return V8DFmode;
51140 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51141 return V4DFmode;
51142 else if (TARGET_SSE2)
51143 return V2DFmode;
51144 /* FALLTHRU */
51145
51146 default:
51147 return word_mode;
51148 }
51149 }
51150
51151 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51152 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51153 256bit and 128bit vectors. */
51154
51155 static unsigned int
51156 ix86_autovectorize_vector_sizes (void)
51157 {
51158 return TARGET_AVX512F ? 64 | 32 | 16 :
51159 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51160 }
51161
51162 \f
51163
51164 /* Return class of registers which could be used for pseudo of MODE
51165 and of class RCLASS for spilling instead of memory. Return NO_REGS
51166 if it is not possible or non-profitable. */
51167 static reg_class_t
51168 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51169 {
51170 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51171 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51172 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51173 return ALL_SSE_REGS;
51174 return NO_REGS;
51175 }
51176
51177 /* Implement targetm.vectorize.init_cost. */
51178
51179 static void *
51180 ix86_init_cost (struct loop *)
51181 {
51182 unsigned *cost = XNEWVEC (unsigned, 3);
51183 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51184 return cost;
51185 }
51186
51187 /* Implement targetm.vectorize.add_stmt_cost. */
51188
51189 static unsigned
51190 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51191 struct _stmt_vec_info *stmt_info, int misalign,
51192 enum vect_cost_model_location where)
51193 {
51194 unsigned *cost = (unsigned *) data;
51195 unsigned retval = 0;
51196
51197 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51198 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51199
51200 /* Statements in an inner loop relative to the loop being
51201 vectorized are weighted more heavily. The value here is
51202 arbitrary and could potentially be improved with analysis. */
51203 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51204 count *= 50; /* FIXME. */
51205
51206 retval = (unsigned) (count * stmt_cost);
51207
51208 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51209 for Silvermont as it has out of order integer pipeline and can execute
51210 2 scalar instruction per tick, but has in order SIMD pipeline. */
51211 if (TARGET_SILVERMONT || TARGET_INTEL)
51212 if (stmt_info && stmt_info->stmt)
51213 {
51214 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51215 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51216 retval = (retval * 17) / 10;
51217 }
51218
51219 cost[where] += retval;
51220
51221 return retval;
51222 }
51223
51224 /* Implement targetm.vectorize.finish_cost. */
51225
51226 static void
51227 ix86_finish_cost (void *data, unsigned *prologue_cost,
51228 unsigned *body_cost, unsigned *epilogue_cost)
51229 {
51230 unsigned *cost = (unsigned *) data;
51231 *prologue_cost = cost[vect_prologue];
51232 *body_cost = cost[vect_body];
51233 *epilogue_cost = cost[vect_epilogue];
51234 }
51235
51236 /* Implement targetm.vectorize.destroy_cost_data. */
51237
51238 static void
51239 ix86_destroy_cost_data (void *data)
51240 {
51241 free (data);
51242 }
51243
51244 /* Validate target specific memory model bits in VAL. */
51245
51246 static unsigned HOST_WIDE_INT
51247 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51248 {
51249 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51250 bool strong;
51251
51252 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51253 |MEMMODEL_MASK)
51254 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51255 {
51256 warning (OPT_Winvalid_memory_model,
51257 "Unknown architecture specific memory model");
51258 return MEMMODEL_SEQ_CST;
51259 }
51260 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51261 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51262 {
51263 warning (OPT_Winvalid_memory_model,
51264 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51265 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51266 }
51267 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51268 {
51269 warning (OPT_Winvalid_memory_model,
51270 "HLE_RELEASE not used with RELEASE or stronger memory model");
51271 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51272 }
51273 return val;
51274 }
51275
51276 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51277 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51278 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51279 or number of vecsize_mangle variants that should be emitted. */
51280
51281 static int
51282 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51283 struct cgraph_simd_clone *clonei,
51284 tree base_type, int num)
51285 {
51286 int ret = 1;
51287
51288 if (clonei->simdlen
51289 && (clonei->simdlen < 2
51290 || clonei->simdlen > 16
51291 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51292 {
51293 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51294 "unsupported simdlen %d", clonei->simdlen);
51295 return 0;
51296 }
51297
51298 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51299 if (TREE_CODE (ret_type) != VOID_TYPE)
51300 switch (TYPE_MODE (ret_type))
51301 {
51302 case QImode:
51303 case HImode:
51304 case SImode:
51305 case DImode:
51306 case SFmode:
51307 case DFmode:
51308 /* case SCmode: */
51309 /* case DCmode: */
51310 break;
51311 default:
51312 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51313 "unsupported return type %qT for simd\n", ret_type);
51314 return 0;
51315 }
51316
51317 tree t;
51318 int i;
51319
51320 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51321 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51322 switch (TYPE_MODE (TREE_TYPE (t)))
51323 {
51324 case QImode:
51325 case HImode:
51326 case SImode:
51327 case DImode:
51328 case SFmode:
51329 case DFmode:
51330 /* case SCmode: */
51331 /* case DCmode: */
51332 break;
51333 default:
51334 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51335 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51336 return 0;
51337 }
51338
51339 if (clonei->cilk_elemental)
51340 {
51341 /* Parse here processor clause. If not present, default to 'b'. */
51342 clonei->vecsize_mangle = 'b';
51343 }
51344 else if (!TREE_PUBLIC (node->decl))
51345 {
51346 /* If the function isn't exported, we can pick up just one ISA
51347 for the clones. */
51348 if (TARGET_AVX2)
51349 clonei->vecsize_mangle = 'd';
51350 else if (TARGET_AVX)
51351 clonei->vecsize_mangle = 'c';
51352 else
51353 clonei->vecsize_mangle = 'b';
51354 ret = 1;
51355 }
51356 else
51357 {
51358 clonei->vecsize_mangle = "bcd"[num];
51359 ret = 3;
51360 }
51361 switch (clonei->vecsize_mangle)
51362 {
51363 case 'b':
51364 clonei->vecsize_int = 128;
51365 clonei->vecsize_float = 128;
51366 break;
51367 case 'c':
51368 clonei->vecsize_int = 128;
51369 clonei->vecsize_float = 256;
51370 break;
51371 case 'd':
51372 clonei->vecsize_int = 256;
51373 clonei->vecsize_float = 256;
51374 break;
51375 }
51376 if (clonei->simdlen == 0)
51377 {
51378 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51379 clonei->simdlen = clonei->vecsize_int;
51380 else
51381 clonei->simdlen = clonei->vecsize_float;
51382 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51383 if (clonei->simdlen > 16)
51384 clonei->simdlen = 16;
51385 }
51386 return ret;
51387 }
51388
51389 /* Add target attribute to SIMD clone NODE if needed. */
51390
51391 static void
51392 ix86_simd_clone_adjust (struct cgraph_node *node)
51393 {
51394 const char *str = NULL;
51395 gcc_assert (node->decl == cfun->decl);
51396 switch (node->simdclone->vecsize_mangle)
51397 {
51398 case 'b':
51399 if (!TARGET_SSE2)
51400 str = "sse2";
51401 break;
51402 case 'c':
51403 if (!TARGET_AVX)
51404 str = "avx";
51405 break;
51406 case 'd':
51407 if (!TARGET_AVX2)
51408 str = "avx2";
51409 break;
51410 default:
51411 gcc_unreachable ();
51412 }
51413 if (str == NULL)
51414 return;
51415 push_cfun (NULL);
51416 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51417 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51418 gcc_assert (ok);
51419 pop_cfun ();
51420 ix86_reset_previous_fndecl ();
51421 ix86_set_current_function (node->decl);
51422 }
51423
51424 /* If SIMD clone NODE can't be used in a vectorized loop
51425 in current function, return -1, otherwise return a badness of using it
51426 (0 if it is most desirable from vecsize_mangle point of view, 1
51427 slightly less desirable, etc.). */
51428
51429 static int
51430 ix86_simd_clone_usable (struct cgraph_node *node)
51431 {
51432 switch (node->simdclone->vecsize_mangle)
51433 {
51434 case 'b':
51435 if (!TARGET_SSE2)
51436 return -1;
51437 if (!TARGET_AVX)
51438 return 0;
51439 return TARGET_AVX2 ? 2 : 1;
51440 case 'c':
51441 if (!TARGET_AVX)
51442 return -1;
51443 return TARGET_AVX2 ? 1 : 0;
51444 break;
51445 case 'd':
51446 if (!TARGET_AVX2)
51447 return -1;
51448 return 0;
51449 default:
51450 gcc_unreachable ();
51451 }
51452 }
51453
51454 /* This function adjusts the unroll factor based on
51455 the hardware capabilities. For ex, bdver3 has
51456 a loop buffer which makes unrolling of smaller
51457 loops less important. This function decides the
51458 unroll factor using number of memory references
51459 (value 32 is used) as a heuristic. */
51460
51461 static unsigned
51462 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51463 {
51464 basic_block *bbs;
51465 rtx_insn *insn;
51466 unsigned i;
51467 unsigned mem_count = 0;
51468
51469 if (!TARGET_ADJUST_UNROLL)
51470 return nunroll;
51471
51472 /* Count the number of memory references within the loop body.
51473 This value determines the unrolling factor for bdver3 and bdver4
51474 architectures. */
51475 subrtx_iterator::array_type array;
51476 bbs = get_loop_body (loop);
51477 for (i = 0; i < loop->num_nodes; i++)
51478 FOR_BB_INSNS (bbs[i], insn)
51479 if (NONDEBUG_INSN_P (insn))
51480 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51481 if (const_rtx x = *iter)
51482 if (MEM_P (x))
51483 {
51484 machine_mode mode = GET_MODE (x);
51485 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51486 if (n_words > 4)
51487 mem_count += 2;
51488 else
51489 mem_count += 1;
51490 }
51491 free (bbs);
51492
51493 if (mem_count && mem_count <=32)
51494 return 32/mem_count;
51495
51496 return nunroll;
51497 }
51498
51499
51500 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51501
51502 static bool
51503 ix86_float_exceptions_rounding_supported_p (void)
51504 {
51505 /* For x87 floating point with standard excess precision handling,
51506 there is no adddf3 pattern (since x87 floating point only has
51507 XFmode operations) so the default hook implementation gets this
51508 wrong. */
51509 return TARGET_80387 || TARGET_SSE_MATH;
51510 }
51511
51512 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51513
51514 static void
51515 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51516 {
51517 if (!TARGET_80387 && !TARGET_SSE_MATH)
51518 return;
51519 tree exceptions_var = create_tmp_var (integer_type_node);
51520 if (TARGET_80387)
51521 {
51522 tree fenv_index_type = build_index_type (size_int (6));
51523 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51524 tree fenv_var = create_tmp_var (fenv_type);
51525 mark_addressable (fenv_var);
51526 tree fenv_ptr = build_pointer_type (fenv_type);
51527 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51528 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51529 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51530 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51531 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51532 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51533 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51534 tree hold_fnclex = build_call_expr (fnclex, 0);
51535 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51536 hold_fnclex);
51537 *clear = build_call_expr (fnclex, 0);
51538 tree sw_var = create_tmp_var (short_unsigned_type_node);
51539 tree fnstsw_call = build_call_expr (fnstsw, 0);
51540 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51541 sw_var, fnstsw_call);
51542 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51543 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51544 exceptions_var, exceptions_x87);
51545 *update = build2 (COMPOUND_EXPR, integer_type_node,
51546 sw_mod, update_mod);
51547 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51548 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51549 }
51550 if (TARGET_SSE_MATH)
51551 {
51552 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51553 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51554 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51555 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51556 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51557 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51558 mxcsr_orig_var, stmxcsr_hold_call);
51559 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51560 mxcsr_orig_var,
51561 build_int_cst (unsigned_type_node, 0x1f80));
51562 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51563 build_int_cst (unsigned_type_node, 0xffffffc0));
51564 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51565 mxcsr_mod_var, hold_mod_val);
51566 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51567 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51568 hold_assign_orig, hold_assign_mod);
51569 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51570 ldmxcsr_hold_call);
51571 if (*hold)
51572 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51573 else
51574 *hold = hold_all;
51575 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51576 if (*clear)
51577 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51578 ldmxcsr_clear_call);
51579 else
51580 *clear = ldmxcsr_clear_call;
51581 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51582 tree exceptions_sse = fold_convert (integer_type_node,
51583 stxmcsr_update_call);
51584 if (*update)
51585 {
51586 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51587 exceptions_var, exceptions_sse);
51588 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51589 exceptions_var, exceptions_mod);
51590 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51591 exceptions_assign);
51592 }
51593 else
51594 *update = build2 (MODIFY_EXPR, integer_type_node,
51595 exceptions_var, exceptions_sse);
51596 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51597 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51598 ldmxcsr_update_call);
51599 }
51600 tree atomic_feraiseexcept
51601 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51602 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51603 1, exceptions_var);
51604 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51605 atomic_feraiseexcept_call);
51606 }
51607
51608 /* Return mode to be used for bounds or VOIDmode
51609 if bounds are not supported. */
51610
51611 static enum machine_mode
51612 ix86_mpx_bound_mode ()
51613 {
51614 /* Do not support pointer checker if MPX
51615 is not enabled. */
51616 if (!TARGET_MPX)
51617 {
51618 if (flag_check_pointer_bounds)
51619 warning (0, "Pointer Checker requires MPX support on this target."
51620 " Use -mmpx options to enable MPX.");
51621 return VOIDmode;
51622 }
51623
51624 return BNDmode;
51625 }
51626
51627 /* Return constant used to statically initialize constant bounds.
51628
51629 This function is used to create special bound values. For now
51630 only INIT bounds and NONE bounds are expected. More special
51631 values may be added later. */
51632
51633 static tree
51634 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51635 {
51636 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51637 : build_zero_cst (pointer_sized_int_node);
51638 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51639 : build_minus_one_cst (pointer_sized_int_node);
51640
51641 /* This function is supposed to be used to create INIT and
51642 NONE bounds only. */
51643 gcc_assert ((lb == 0 && ub == -1)
51644 || (lb == -1 && ub == 0));
51645
51646 return build_complex (NULL, low, high);
51647 }
51648
51649 /* Generate a list of statements STMTS to initialize pointer bounds
51650 variable VAR with bounds LB and UB. Return the number of generated
51651 statements. */
51652
51653 static int
51654 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51655 {
51656 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51657 tree lhs, modify, var_p;
51658
51659 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51660 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51661
51662 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51663 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51664 append_to_statement_list (modify, stmts);
51665
51666 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51667 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51668 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51669 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51670 append_to_statement_list (modify, stmts);
51671
51672 return 2;
51673 }
51674
51675 /* Initialize the GCC target structure. */
51676 #undef TARGET_RETURN_IN_MEMORY
51677 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51678
51679 #undef TARGET_LEGITIMIZE_ADDRESS
51680 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51681
51682 #undef TARGET_ATTRIBUTE_TABLE
51683 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51684 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51685 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51686 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51687 # undef TARGET_MERGE_DECL_ATTRIBUTES
51688 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51689 #endif
51690
51691 #undef TARGET_COMP_TYPE_ATTRIBUTES
51692 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51693
51694 #undef TARGET_INIT_BUILTINS
51695 #define TARGET_INIT_BUILTINS ix86_init_builtins
51696 #undef TARGET_BUILTIN_DECL
51697 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51698 #undef TARGET_EXPAND_BUILTIN
51699 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51700
51701 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51702 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51703 ix86_builtin_vectorized_function
51704
51705 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51706 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51707
51708 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51709 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51710
51711 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51712 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51713
51714 #undef TARGET_BUILTIN_RECIPROCAL
51715 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51716
51717 #undef TARGET_ASM_FUNCTION_EPILOGUE
51718 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51719
51720 #undef TARGET_ENCODE_SECTION_INFO
51721 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51722 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51723 #else
51724 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51725 #endif
51726
51727 #undef TARGET_ASM_OPEN_PAREN
51728 #define TARGET_ASM_OPEN_PAREN ""
51729 #undef TARGET_ASM_CLOSE_PAREN
51730 #define TARGET_ASM_CLOSE_PAREN ""
51731
51732 #undef TARGET_ASM_BYTE_OP
51733 #define TARGET_ASM_BYTE_OP ASM_BYTE
51734
51735 #undef TARGET_ASM_ALIGNED_HI_OP
51736 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51737 #undef TARGET_ASM_ALIGNED_SI_OP
51738 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51739 #ifdef ASM_QUAD
51740 #undef TARGET_ASM_ALIGNED_DI_OP
51741 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51742 #endif
51743
51744 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51745 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51746
51747 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51748 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51749
51750 #undef TARGET_ASM_UNALIGNED_HI_OP
51751 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51752 #undef TARGET_ASM_UNALIGNED_SI_OP
51753 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51754 #undef TARGET_ASM_UNALIGNED_DI_OP
51755 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51756
51757 #undef TARGET_PRINT_OPERAND
51758 #define TARGET_PRINT_OPERAND ix86_print_operand
51759 #undef TARGET_PRINT_OPERAND_ADDRESS
51760 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51761 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51762 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51763 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51764 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51765
51766 #undef TARGET_SCHED_INIT_GLOBAL
51767 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51768 #undef TARGET_SCHED_ADJUST_COST
51769 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51770 #undef TARGET_SCHED_ISSUE_RATE
51771 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51772 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51773 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51774 ia32_multipass_dfa_lookahead
51775 #undef TARGET_SCHED_MACRO_FUSION_P
51776 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51777 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51778 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51779
51780 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51781 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51782
51783 #undef TARGET_MEMMODEL_CHECK
51784 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51785
51786 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51787 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51788
51789 #ifdef HAVE_AS_TLS
51790 #undef TARGET_HAVE_TLS
51791 #define TARGET_HAVE_TLS true
51792 #endif
51793 #undef TARGET_CANNOT_FORCE_CONST_MEM
51794 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51795 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51796 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51797
51798 #undef TARGET_DELEGITIMIZE_ADDRESS
51799 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51800
51801 #undef TARGET_MS_BITFIELD_LAYOUT_P
51802 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51803
51804 #if TARGET_MACHO
51805 #undef TARGET_BINDS_LOCAL_P
51806 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51807 #endif
51808 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51809 #undef TARGET_BINDS_LOCAL_P
51810 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51811 #endif
51812
51813 #undef TARGET_ASM_OUTPUT_MI_THUNK
51814 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51815 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51816 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51817
51818 #undef TARGET_ASM_FILE_START
51819 #define TARGET_ASM_FILE_START x86_file_start
51820
51821 #undef TARGET_OPTION_OVERRIDE
51822 #define TARGET_OPTION_OVERRIDE ix86_option_override
51823
51824 #undef TARGET_REGISTER_MOVE_COST
51825 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51826 #undef TARGET_MEMORY_MOVE_COST
51827 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51828 #undef TARGET_RTX_COSTS
51829 #define TARGET_RTX_COSTS ix86_rtx_costs
51830 #undef TARGET_ADDRESS_COST
51831 #define TARGET_ADDRESS_COST ix86_address_cost
51832
51833 #undef TARGET_FIXED_CONDITION_CODE_REGS
51834 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51835 #undef TARGET_CC_MODES_COMPATIBLE
51836 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51837
51838 #undef TARGET_MACHINE_DEPENDENT_REORG
51839 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51840
51841 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51842 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51843
51844 #undef TARGET_BUILD_BUILTIN_VA_LIST
51845 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51846
51847 #undef TARGET_FOLD_BUILTIN
51848 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51849
51850 #undef TARGET_COMPARE_VERSION_PRIORITY
51851 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51852
51853 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51854 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51855 ix86_generate_version_dispatcher_body
51856
51857 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51858 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51859 ix86_get_function_versions_dispatcher
51860
51861 #undef TARGET_ENUM_VA_LIST_P
51862 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51863
51864 #undef TARGET_FN_ABI_VA_LIST
51865 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51866
51867 #undef TARGET_CANONICAL_VA_LIST_TYPE
51868 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51869
51870 #undef TARGET_EXPAND_BUILTIN_VA_START
51871 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51872
51873 #undef TARGET_MD_ASM_CLOBBERS
51874 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
51875
51876 #undef TARGET_PROMOTE_PROTOTYPES
51877 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51878 #undef TARGET_SETUP_INCOMING_VARARGS
51879 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51880 #undef TARGET_MUST_PASS_IN_STACK
51881 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51882 #undef TARGET_FUNCTION_ARG_ADVANCE
51883 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51884 #undef TARGET_FUNCTION_ARG
51885 #define TARGET_FUNCTION_ARG ix86_function_arg
51886 #undef TARGET_INIT_PIC_REG
51887 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51888 #undef TARGET_USE_PSEUDO_PIC_REG
51889 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51890 #undef TARGET_FUNCTION_ARG_BOUNDARY
51891 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51892 #undef TARGET_PASS_BY_REFERENCE
51893 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51894 #undef TARGET_INTERNAL_ARG_POINTER
51895 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51896 #undef TARGET_UPDATE_STACK_BOUNDARY
51897 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51898 #undef TARGET_GET_DRAP_RTX
51899 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51900 #undef TARGET_STRICT_ARGUMENT_NAMING
51901 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51902 #undef TARGET_STATIC_CHAIN
51903 #define TARGET_STATIC_CHAIN ix86_static_chain
51904 #undef TARGET_TRAMPOLINE_INIT
51905 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51906 #undef TARGET_RETURN_POPS_ARGS
51907 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51908
51909 #undef TARGET_LEGITIMATE_COMBINED_INSN
51910 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51911
51912 #undef TARGET_ASAN_SHADOW_OFFSET
51913 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51914
51915 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51916 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51917
51918 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51919 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51920
51921 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51922 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51923
51924 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
51925 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
51926 ix86_libgcc_floating_mode_supported_p
51927
51928 #undef TARGET_C_MODE_FOR_SUFFIX
51929 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
51930
51931 #ifdef HAVE_AS_TLS
51932 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
51933 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
51934 #endif
51935
51936 #ifdef SUBTARGET_INSERT_ATTRIBUTES
51937 #undef TARGET_INSERT_ATTRIBUTES
51938 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
51939 #endif
51940
51941 #undef TARGET_MANGLE_TYPE
51942 #define TARGET_MANGLE_TYPE ix86_mangle_type
51943
51944 #if !TARGET_MACHO
51945 #undef TARGET_STACK_PROTECT_FAIL
51946 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
51947 #endif
51948
51949 #undef TARGET_FUNCTION_VALUE
51950 #define TARGET_FUNCTION_VALUE ix86_function_value
51951
51952 #undef TARGET_FUNCTION_VALUE_REGNO_P
51953 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
51954
51955 #undef TARGET_PROMOTE_FUNCTION_MODE
51956 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
51957
51958 #undef TARGET_MEMBER_TYPE_FORCES_BLK
51959 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
51960
51961 #undef TARGET_INSTANTIATE_DECLS
51962 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
51963
51964 #undef TARGET_SECONDARY_RELOAD
51965 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
51966
51967 #undef TARGET_CLASS_MAX_NREGS
51968 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
51969
51970 #undef TARGET_PREFERRED_RELOAD_CLASS
51971 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
51972 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
51973 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
51974 #undef TARGET_CLASS_LIKELY_SPILLED_P
51975 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
51976
51977 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
51978 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
51979 ix86_builtin_vectorization_cost
51980 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
51981 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
51982 ix86_vectorize_vec_perm_const_ok
51983 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
51984 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
51985 ix86_preferred_simd_mode
51986 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
51987 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
51988 ix86_autovectorize_vector_sizes
51989 #undef TARGET_VECTORIZE_INIT_COST
51990 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
51991 #undef TARGET_VECTORIZE_ADD_STMT_COST
51992 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
51993 #undef TARGET_VECTORIZE_FINISH_COST
51994 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
51995 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
51996 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
51997
51998 #undef TARGET_SET_CURRENT_FUNCTION
51999 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52000
52001 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52002 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52003
52004 #undef TARGET_OPTION_SAVE
52005 #define TARGET_OPTION_SAVE ix86_function_specific_save
52006
52007 #undef TARGET_OPTION_RESTORE
52008 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52009
52010 #undef TARGET_OPTION_PRINT
52011 #define TARGET_OPTION_PRINT ix86_function_specific_print
52012
52013 #undef TARGET_OPTION_FUNCTION_VERSIONS
52014 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52015
52016 #undef TARGET_CAN_INLINE_P
52017 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52018
52019 #undef TARGET_EXPAND_TO_RTL_HOOK
52020 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52021
52022 #undef TARGET_LEGITIMATE_ADDRESS_P
52023 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52024
52025 #undef TARGET_LRA_P
52026 #define TARGET_LRA_P hook_bool_void_true
52027
52028 #undef TARGET_REGISTER_PRIORITY
52029 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52030
52031 #undef TARGET_REGISTER_USAGE_LEVELING_P
52032 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52033
52034 #undef TARGET_LEGITIMATE_CONSTANT_P
52035 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52036
52037 #undef TARGET_FRAME_POINTER_REQUIRED
52038 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52039
52040 #undef TARGET_CAN_ELIMINATE
52041 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52042
52043 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52044 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52045
52046 #undef TARGET_ASM_CODE_END
52047 #define TARGET_ASM_CODE_END ix86_code_end
52048
52049 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52050 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52051
52052 #if TARGET_MACHO
52053 #undef TARGET_INIT_LIBFUNCS
52054 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52055 #endif
52056
52057 #undef TARGET_LOOP_UNROLL_ADJUST
52058 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52059
52060 #undef TARGET_SPILL_CLASS
52061 #define TARGET_SPILL_CLASS ix86_spill_class
52062
52063 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52064 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52065 ix86_simd_clone_compute_vecsize_and_simdlen
52066
52067 #undef TARGET_SIMD_CLONE_ADJUST
52068 #define TARGET_SIMD_CLONE_ADJUST \
52069 ix86_simd_clone_adjust
52070
52071 #undef TARGET_SIMD_CLONE_USABLE
52072 #define TARGET_SIMD_CLONE_USABLE \
52073 ix86_simd_clone_usable
52074
52075 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52076 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52077 ix86_float_exceptions_rounding_supported_p
52078
52079 #undef TARGET_MODE_EMIT
52080 #define TARGET_MODE_EMIT ix86_emit_mode_set
52081
52082 #undef TARGET_MODE_NEEDED
52083 #define TARGET_MODE_NEEDED ix86_mode_needed
52084
52085 #undef TARGET_MODE_AFTER
52086 #define TARGET_MODE_AFTER ix86_mode_after
52087
52088 #undef TARGET_MODE_ENTRY
52089 #define TARGET_MODE_ENTRY ix86_mode_entry
52090
52091 #undef TARGET_MODE_EXIT
52092 #define TARGET_MODE_EXIT ix86_mode_exit
52093
52094 #undef TARGET_MODE_PRIORITY
52095 #define TARGET_MODE_PRIORITY ix86_mode_priority
52096
52097 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52098 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52099
52100 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52101 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52102
52103 #undef TARGET_STORE_BOUNDS_FOR_ARG
52104 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52105
52106 #undef TARGET_LOAD_RETURNED_BOUNDS
52107 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52108
52109 #undef TARGET_STORE_RETURNED_BOUNDS
52110 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52111
52112 #undef TARGET_CHKP_BOUND_MODE
52113 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52114
52115 #undef TARGET_BUILTIN_CHKP_FUNCTION
52116 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52117
52118 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52119 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52120
52121 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52122 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52123
52124 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52125 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52126
52127 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52128 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52129
52130 #undef TARGET_OFFLOAD_OPTIONS
52131 #define TARGET_OFFLOAD_OPTIONS \
52132 ix86_offload_options
52133
52134 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52135 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52136
52137 struct gcc_target targetm = TARGET_INITIALIZER;
52138 \f
52139 #include "gt-i386.h"
This page took 2.420432 seconds and 4 git commands to generate.