]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386-features.c
re PR target/91814 (ICE in elimination_costs_in_insn, at reload1.c:3549 since r274926)
[gcc.git] / gcc / config / i386 / i386-features.c
1 /* Copyright (C) 1988-2019 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with GCC; see the file COPYING3. If not see
17 <http://www.gnu.org/licenses/>. */
18
19 #define IN_TARGET_CODE 1
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "memmodel.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "cfgloop.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic.h"
41 #include "cfgbuild.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "attribs.h"
45 #include "calls.h"
46 #include "stor-layout.h"
47 #include "varasm.h"
48 #include "output.h"
49 #include "insn-attr.h"
50 #include "flags.h"
51 #include "except.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "gimplify.h"
59 #include "dwarf2.h"
60 #include "tm-constrs.h"
61 #include "params.h"
62 #include "cselib.h"
63 #include "sched-int.h"
64 #include "opts.h"
65 #include "tree-pass.h"
66 #include "context.h"
67 #include "pass_manager.h"
68 #include "target-globals.h"
69 #include "gimple-iterator.h"
70 #include "tree-vectorizer.h"
71 #include "shrink-wrap.h"
72 #include "builtins.h"
73 #include "rtl-iter.h"
74 #include "tree-iterator.h"
75 #include "dbgcnt.h"
76 #include "case-cfn-macros.h"
77 #include "dojump.h"
78 #include "fold-const-call.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "selftest.h"
82 #include "selftest-rtl.h"
83 #include "print-rtl.h"
84 #include "intl.h"
85 #include "ifcvt.h"
86 #include "symbol-summary.h"
87 #include "ipa-prop.h"
88 #include "ipa-fnsummary.h"
89 #include "wide-int-bitmask.h"
90 #include "tree-vector-builder.h"
91 #include "debug.h"
92 #include "dwarf2out.h"
93 #include "i386-builtins.h"
94 #include "i386-features.h"
95
96 const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
97 "savms64",
98 "resms64",
99 "resms64x",
100 "savms64f",
101 "resms64f",
102 "resms64fx"
103 };
104
105 const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
106 /* The below offset values are where each register is stored for the layout
107 relative to incoming stack pointer. The value of each m_regs[].offset will
108 be relative to the incoming base pointer (rax or rsi) used by the stub.
109
110 s_instances: 0 1 2 3
111 Offset: realigned or aligned + 8
112 Register aligned aligned + 8 aligned w/HFP w/HFP */
113 XMM15_REG, /* 0x10 0x18 0x10 0x18 */
114 XMM14_REG, /* 0x20 0x28 0x20 0x28 */
115 XMM13_REG, /* 0x30 0x38 0x30 0x38 */
116 XMM12_REG, /* 0x40 0x48 0x40 0x48 */
117 XMM11_REG, /* 0x50 0x58 0x50 0x58 */
118 XMM10_REG, /* 0x60 0x68 0x60 0x68 */
119 XMM9_REG, /* 0x70 0x78 0x70 0x78 */
120 XMM8_REG, /* 0x80 0x88 0x80 0x88 */
121 XMM7_REG, /* 0x90 0x98 0x90 0x98 */
122 XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */
123 SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */
124 DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */
125 BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */
126 BP_REG, /* 0xc0 0xc8 N/A N/A */
127 R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */
128 R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */
129 R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */
130 R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */
131 };
132
133 /* Instantiate static const values. */
134 const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
135 const unsigned xlogue_layout::MIN_REGS;
136 const unsigned xlogue_layout::MAX_REGS;
137 const unsigned xlogue_layout::MAX_EXTRA_REGS;
138 const unsigned xlogue_layout::VARIANT_COUNT;
139 const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
140
141 /* Initialize xlogue_layout::s_stub_names to zero. */
142 char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
143 [STUB_NAME_MAX_LEN];
144
145 /* Instantiates all xlogue_layout instances. */
146 const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
147 xlogue_layout (0, false),
148 xlogue_layout (8, false),
149 xlogue_layout (0, true),
150 xlogue_layout (8, true)
151 };
152
153 /* Return an appropriate const instance of xlogue_layout based upon values
154 in cfun->machine and crtl. */
155 const class xlogue_layout &
156 xlogue_layout::get_instance ()
157 {
158 enum xlogue_stub_sets stub_set;
159 bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
160
161 if (stack_realign_fp)
162 stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
163 else if (frame_pointer_needed)
164 stub_set = aligned_plus_8
165 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
166 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
167 else
168 stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
169
170 return s_instances[stub_set];
171 }
172
173 /* Determine how many clobbered registers can be saved by the stub.
174 Returns the count of registers the stub will save and restore. */
175 unsigned
176 xlogue_layout::count_stub_managed_regs ()
177 {
178 bool hfp = frame_pointer_needed || stack_realign_fp;
179 unsigned i, count;
180 unsigned regno;
181
182 for (count = i = MIN_REGS; i < MAX_REGS; ++i)
183 {
184 regno = REG_ORDER[i];
185 if (regno == BP_REG && hfp)
186 continue;
187 if (!ix86_save_reg (regno, false, false))
188 break;
189 ++count;
190 }
191 return count;
192 }
193
194 /* Determine if register REGNO is a stub managed register given the
195 total COUNT of stub managed registers. */
196 bool
197 xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
198 {
199 bool hfp = frame_pointer_needed || stack_realign_fp;
200 unsigned i;
201
202 for (i = 0; i < count; ++i)
203 {
204 gcc_assert (i < MAX_REGS);
205 if (REG_ORDER[i] == BP_REG && hfp)
206 ++count;
207 else if (REG_ORDER[i] == regno)
208 return true;
209 }
210 return false;
211 }
212
213 /* Constructor for xlogue_layout. */
214 xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
215 : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
216 m_stack_align_off_in (stack_align_off_in)
217 {
218 HOST_WIDE_INT offset = stack_align_off_in;
219 unsigned i, j;
220
221 for (i = j = 0; i < MAX_REGS; ++i)
222 {
223 unsigned regno = REG_ORDER[i];
224
225 if (regno == BP_REG && hfp)
226 continue;
227 if (SSE_REGNO_P (regno))
228 {
229 offset += 16;
230 /* Verify that SSE regs are always aligned. */
231 gcc_assert (!((stack_align_off_in + offset) & 15));
232 }
233 else
234 offset += 8;
235
236 m_regs[j].regno = regno;
237 m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
238 }
239 gcc_assert (j == m_nregs);
240 }
241
242 const char *
243 xlogue_layout::get_stub_name (enum xlogue_stub stub,
244 unsigned n_extra_regs)
245 {
246 const int have_avx = TARGET_AVX;
247 char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
248
249 /* Lazy init */
250 if (!*name)
251 {
252 int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
253 (have_avx ? "avx" : "sse"),
254 STUB_BASE_NAMES[stub],
255 MIN_REGS + n_extra_regs);
256 gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
257 }
258
259 return name;
260 }
261
262 /* Return rtx of a symbol ref for the entry point (based upon
263 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
264 rtx
265 xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
266 {
267 const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
268 gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
269 gcc_assert (stub < XLOGUE_STUB_COUNT);
270 gcc_assert (crtl->stack_realign_finalized);
271
272 return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
273 }
274
275 unsigned scalar_chain::max_id = 0;
276
277 namespace {
278
279 /* Initialize new chain. */
280
281 scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
282 {
283 smode = smode_;
284 vmode = vmode_;
285
286 chain_id = ++max_id;
287
288 if (dump_file)
289 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
290
291 bitmap_obstack_initialize (NULL);
292 insns = BITMAP_ALLOC (NULL);
293 defs = BITMAP_ALLOC (NULL);
294 defs_conv = BITMAP_ALLOC (NULL);
295 queue = NULL;
296 }
297
298 /* Free chain's data. */
299
300 scalar_chain::~scalar_chain ()
301 {
302 BITMAP_FREE (insns);
303 BITMAP_FREE (defs);
304 BITMAP_FREE (defs_conv);
305 bitmap_obstack_release (NULL);
306 }
307
308 /* Add instruction into chains' queue. */
309
310 void
311 scalar_chain::add_to_queue (unsigned insn_uid)
312 {
313 if (bitmap_bit_p (insns, insn_uid)
314 || bitmap_bit_p (queue, insn_uid))
315 return;
316
317 if (dump_file)
318 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
319 insn_uid, chain_id);
320 bitmap_set_bit (queue, insn_uid);
321 }
322
323 general_scalar_chain::general_scalar_chain (enum machine_mode smode_,
324 enum machine_mode vmode_)
325 : scalar_chain (smode_, vmode_)
326 {
327 insns_conv = BITMAP_ALLOC (NULL);
328 n_sse_to_integer = 0;
329 n_integer_to_sse = 0;
330 }
331
332 general_scalar_chain::~general_scalar_chain ()
333 {
334 BITMAP_FREE (insns_conv);
335 }
336
337 /* For DImode conversion, mark register defined by DEF as requiring
338 conversion. */
339
340 void
341 general_scalar_chain::mark_dual_mode_def (df_ref def)
342 {
343 gcc_assert (DF_REF_REG_DEF_P (def));
344
345 /* Record the def/insn pair so we can later efficiently iterate over
346 the defs to convert on insns not in the chain. */
347 bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
348 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
349 {
350 if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
351 && !reg_new)
352 return;
353 n_integer_to_sse++;
354 }
355 else
356 {
357 if (!reg_new)
358 return;
359 n_sse_to_integer++;
360 }
361
362 if (dump_file)
363 fprintf (dump_file,
364 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
365 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
366 }
367
368 /* For TImode conversion, it is unused. */
369
370 void
371 timode_scalar_chain::mark_dual_mode_def (df_ref)
372 {
373 gcc_unreachable ();
374 }
375
376 /* Check REF's chain to add new insns into a queue
377 and find registers requiring conversion. */
378
379 void
380 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
381 {
382 df_link *chain;
383
384 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
385 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
386 add_to_queue (DF_REF_INSN_UID (ref));
387
388 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
389 {
390 unsigned uid = DF_REF_INSN_UID (chain->ref);
391
392 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
393 continue;
394
395 if (!DF_REF_REG_MEM_P (chain->ref))
396 {
397 if (bitmap_bit_p (insns, uid))
398 continue;
399
400 if (bitmap_bit_p (candidates, uid))
401 {
402 add_to_queue (uid);
403 continue;
404 }
405 }
406
407 if (DF_REF_REG_DEF_P (chain->ref))
408 {
409 if (dump_file)
410 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
411 DF_REF_REGNO (chain->ref), uid);
412 mark_dual_mode_def (chain->ref);
413 }
414 else
415 {
416 if (dump_file)
417 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
418 DF_REF_REGNO (chain->ref), uid);
419 mark_dual_mode_def (ref);
420 }
421 }
422 }
423
424 /* Add instruction into a chain. */
425
426 void
427 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
428 {
429 if (bitmap_bit_p (insns, insn_uid))
430 return;
431
432 if (dump_file)
433 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
434
435 bitmap_set_bit (insns, insn_uid);
436
437 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
438 rtx def_set = single_set (insn);
439 if (def_set && REG_P (SET_DEST (def_set))
440 && !HARD_REGISTER_P (SET_DEST (def_set)))
441 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
442
443 /* ??? The following is quadratic since analyze_register_chain
444 iterates over all refs to look for dual-mode regs. Instead this
445 should be done separately for all regs mentioned in the chain once. */
446 df_ref ref;
447 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
448 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
449 analyze_register_chain (candidates, ref);
450 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
451 if (!DF_REF_REG_MEM_P (ref))
452 analyze_register_chain (candidates, ref);
453 }
454
455 /* Build new chain starting from insn INSN_UID recursively
456 adding all dependent uses and definitions. */
457
458 void
459 scalar_chain::build (bitmap candidates, unsigned insn_uid)
460 {
461 queue = BITMAP_ALLOC (NULL);
462 bitmap_set_bit (queue, insn_uid);
463
464 if (dump_file)
465 fprintf (dump_file, "Building chain #%d...\n", chain_id);
466
467 while (!bitmap_empty_p (queue))
468 {
469 insn_uid = bitmap_first_set_bit (queue);
470 bitmap_clear_bit (queue, insn_uid);
471 bitmap_clear_bit (candidates, insn_uid);
472 add_insn (candidates, insn_uid);
473 }
474
475 if (dump_file)
476 {
477 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
478 fprintf (dump_file, " insns: ");
479 dump_bitmap (dump_file, insns);
480 if (!bitmap_empty_p (defs_conv))
481 {
482 bitmap_iterator bi;
483 unsigned id;
484 const char *comma = "";
485 fprintf (dump_file, " defs to convert: ");
486 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
487 {
488 fprintf (dump_file, "%sr%d", comma, id);
489 comma = ", ";
490 }
491 fprintf (dump_file, "\n");
492 }
493 }
494
495 BITMAP_FREE (queue);
496 }
497
498 /* Return a cost of building a vector costant
499 instead of using a scalar one. */
500
501 int
502 general_scalar_chain::vector_const_cost (rtx exp)
503 {
504 gcc_assert (CONST_INT_P (exp));
505
506 if (standard_sse_constant_p (exp, vmode))
507 return ix86_cost->sse_op;
508 /* We have separate costs for SImode and DImode, use SImode costs
509 for smaller modes. */
510 return ix86_cost->sse_load[smode == DImode ? 1 : 0];
511 }
512
513 /* Compute a gain for chain conversion. */
514
515 int
516 general_scalar_chain::compute_convert_gain ()
517 {
518 bitmap_iterator bi;
519 unsigned insn_uid;
520 int gain = 0;
521 int cost = 0;
522
523 if (dump_file)
524 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
525
526 /* SSE costs distinguish between SImode and DImode loads/stores, for
527 int costs factor in the number of GPRs involved. When supporting
528 smaller modes than SImode the int load/store costs need to be
529 adjusted as well. */
530 unsigned sse_cost_idx = smode == DImode ? 1 : 0;
531 unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
532
533 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
534 {
535 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
536 rtx def_set = single_set (insn);
537 rtx src = SET_SRC (def_set);
538 rtx dst = SET_DEST (def_set);
539 int igain = 0;
540
541 if (REG_P (src) && REG_P (dst))
542 igain += 2 * m - ix86_cost->xmm_move;
543 else if (REG_P (src) && MEM_P (dst))
544 igain
545 += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx];
546 else if (MEM_P (src) && REG_P (dst))
547 igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx];
548 else if (GET_CODE (src) == ASHIFT
549 || GET_CODE (src) == ASHIFTRT
550 || GET_CODE (src) == LSHIFTRT)
551 {
552 if (m == 2)
553 {
554 if (INTVAL (XEXP (src, 1)) >= 32)
555 igain += ix86_cost->add;
556 else
557 igain += ix86_cost->shift_const;
558 }
559
560 igain += ix86_cost->shift_const - ix86_cost->sse_op;
561
562 if (CONST_INT_P (XEXP (src, 0)))
563 igain -= vector_const_cost (XEXP (src, 0));
564 }
565 else if (GET_CODE (src) == PLUS
566 || GET_CODE (src) == MINUS
567 || GET_CODE (src) == IOR
568 || GET_CODE (src) == XOR
569 || GET_CODE (src) == AND)
570 {
571 igain += m * ix86_cost->add - ix86_cost->sse_op;
572 /* Additional gain for andnot for targets without BMI. */
573 if (GET_CODE (XEXP (src, 0)) == NOT
574 && !TARGET_BMI)
575 igain += m * ix86_cost->add;
576
577 if (CONST_INT_P (XEXP (src, 0)))
578 igain -= vector_const_cost (XEXP (src, 0));
579 if (CONST_INT_P (XEXP (src, 1)))
580 igain -= vector_const_cost (XEXP (src, 1));
581 }
582 else if (GET_CODE (src) == NEG
583 || GET_CODE (src) == NOT)
584 igain += m * ix86_cost->add - ix86_cost->sse_op - COSTS_N_INSNS (1);
585 else if (GET_CODE (src) == SMAX
586 || GET_CODE (src) == SMIN
587 || GET_CODE (src) == UMAX
588 || GET_CODE (src) == UMIN)
589 {
590 /* We do not have any conditional move cost, estimate it as a
591 reg-reg move. Comparisons are costed as adds. */
592 igain += m * (COSTS_N_INSNS (2) + ix86_cost->add);
593 /* Integer SSE ops are all costed the same. */
594 igain -= ix86_cost->sse_op;
595 }
596 else if (GET_CODE (src) == COMPARE)
597 {
598 /* Assume comparison cost is the same. */
599 }
600 else if (CONST_INT_P (src))
601 {
602 if (REG_P (dst))
603 /* DImode can be immediate for TARGET_64BIT and SImode always. */
604 igain += m * COSTS_N_INSNS (1);
605 else if (MEM_P (dst))
606 igain += (m * ix86_cost->int_store[2]
607 - ix86_cost->sse_store[sse_cost_idx]);
608 igain -= vector_const_cost (src);
609 }
610 else
611 gcc_unreachable ();
612
613 if (igain != 0 && dump_file)
614 {
615 fprintf (dump_file, " Instruction gain %d for ", igain);
616 dump_insn_slim (dump_file, insn);
617 }
618 gain += igain;
619 }
620
621 if (dump_file)
622 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
623
624 /* Cost the integer to sse and sse to integer moves. */
625 cost += n_sse_to_integer * ix86_cost->sse_to_integer;
626 /* ??? integer_to_sse but we only have that in the RA cost table.
627 Assume sse_to_integer/integer_to_sse are the same which they
628 are at the moment. */
629 cost += n_integer_to_sse * ix86_cost->sse_to_integer;
630
631 if (dump_file)
632 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
633
634 gain -= cost;
635
636 if (dump_file)
637 fprintf (dump_file, " Total gain: %d\n", gain);
638
639 return gain;
640 }
641
642 /* Insert generated conversion instruction sequence INSNS
643 after instruction AFTER. New BB may be required in case
644 instruction has EH region attached. */
645
646 void
647 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
648 {
649 if (!control_flow_insn_p (after))
650 {
651 emit_insn_after (insns, after);
652 return;
653 }
654
655 basic_block bb = BLOCK_FOR_INSN (after);
656 edge e = find_fallthru_edge (bb->succs);
657 gcc_assert (e);
658
659 basic_block new_bb = split_edge (e);
660 emit_insn_after (insns, BB_HEAD (new_bb));
661 }
662
663 } // anon namespace
664
665 /* Generate the canonical SET_SRC to move GPR to a VMODE vector register,
666 zeroing the upper parts. */
667
668 static rtx
669 gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr)
670 {
671 if (!nonimmediate_operand (gpr, GET_MODE_INNER (vmode)))
672 gpr = force_reg (GET_MODE_INNER (vmode), gpr);
673 switch (GET_MODE_NUNITS (vmode))
674 {
675 case 1:
676 /* We are not using this case currently. */
677 gcc_unreachable ();
678 case 2:
679 return gen_rtx_VEC_CONCAT (vmode, gpr,
680 CONST0_RTX (GET_MODE_INNER (vmode)));
681 default:
682 return gen_rtx_VEC_MERGE (vmode, gen_rtx_VEC_DUPLICATE (vmode, gpr),
683 CONST0_RTX (vmode), GEN_INT (HOST_WIDE_INT_1U));
684 }
685 }
686
687 /* Make vector copies for all register REGNO definitions
688 and replace its uses in a chain. */
689
690 void
691 general_scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
692 {
693 rtx vreg = *defs_map.get (reg);
694
695 start_sequence ();
696 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
697 {
698 rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
699 if (smode == DImode && !TARGET_64BIT)
700 {
701 emit_move_insn (adjust_address (tmp, SImode, 0),
702 gen_rtx_SUBREG (SImode, reg, 0));
703 emit_move_insn (adjust_address (tmp, SImode, 4),
704 gen_rtx_SUBREG (SImode, reg, 4));
705 }
706 else
707 emit_move_insn (copy_rtx (tmp), reg);
708 emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
709 gen_gpr_to_xmm_move_src (vmode, tmp)));
710 }
711 else if (!TARGET_64BIT && smode == DImode)
712 {
713 if (TARGET_SSE4_1)
714 {
715 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
716 CONST0_RTX (V4SImode),
717 gen_rtx_SUBREG (SImode, reg, 0)));
718 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
719 gen_rtx_SUBREG (V4SImode, vreg, 0),
720 gen_rtx_SUBREG (SImode, reg, 4),
721 GEN_INT (2)));
722 }
723 else
724 {
725 rtx tmp = gen_reg_rtx (DImode);
726 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
727 CONST0_RTX (V4SImode),
728 gen_rtx_SUBREG (SImode, reg, 0)));
729 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
730 CONST0_RTX (V4SImode),
731 gen_rtx_SUBREG (SImode, reg, 4)));
732 emit_insn (gen_vec_interleave_lowv4si
733 (gen_rtx_SUBREG (V4SImode, vreg, 0),
734 gen_rtx_SUBREG (V4SImode, vreg, 0),
735 gen_rtx_SUBREG (V4SImode, tmp, 0)));
736 }
737 }
738 else
739 emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
740 gen_gpr_to_xmm_move_src (vmode, reg)));
741 rtx_insn *seq = get_insns ();
742 end_sequence ();
743 emit_conversion_insns (seq, insn);
744
745 if (dump_file)
746 fprintf (dump_file,
747 " Copied r%d to a vector register r%d for insn %d\n",
748 REGNO (reg), REGNO (vreg), INSN_UID (insn));
749 }
750
751 /* Copy the definition SRC of INSN inside the chain to DST for
752 scalar uses outside of the chain. */
753
754 void
755 general_scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src)
756 {
757 start_sequence ();
758 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
759 {
760 rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
761 emit_move_insn (tmp, src);
762 if (!TARGET_64BIT && smode == DImode)
763 {
764 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
765 adjust_address (tmp, SImode, 0));
766 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
767 adjust_address (tmp, SImode, 4));
768 }
769 else
770 emit_move_insn (dst, copy_rtx (tmp));
771 }
772 else if (!TARGET_64BIT && smode == DImode)
773 {
774 if (TARGET_SSE4_1)
775 {
776 rtx tmp = gen_rtx_PARALLEL (VOIDmode,
777 gen_rtvec (1, const0_rtx));
778 emit_insn
779 (gen_rtx_SET
780 (gen_rtx_SUBREG (SImode, dst, 0),
781 gen_rtx_VEC_SELECT (SImode,
782 gen_rtx_SUBREG (V4SImode, src, 0),
783 tmp)));
784
785 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
786 emit_insn
787 (gen_rtx_SET
788 (gen_rtx_SUBREG (SImode, dst, 4),
789 gen_rtx_VEC_SELECT (SImode,
790 gen_rtx_SUBREG (V4SImode, src, 0),
791 tmp)));
792 }
793 else
794 {
795 rtx vcopy = gen_reg_rtx (V2DImode);
796 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, src, 0));
797 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
798 gen_rtx_SUBREG (SImode, vcopy, 0));
799 emit_move_insn (vcopy,
800 gen_rtx_LSHIFTRT (V2DImode,
801 vcopy, GEN_INT (32)));
802 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
803 gen_rtx_SUBREG (SImode, vcopy, 0));
804 }
805 }
806 else
807 emit_move_insn (dst, src);
808
809 rtx_insn *seq = get_insns ();
810 end_sequence ();
811 emit_conversion_insns (seq, insn);
812
813 if (dump_file)
814 fprintf (dump_file,
815 " Copied r%d to a scalar register r%d for insn %d\n",
816 REGNO (src), REGNO (dst), INSN_UID (insn));
817 }
818
819 /* Convert operand OP in INSN. We should handle
820 memory operands and uninitialized registers.
821 All other register uses are converted during
822 registers conversion. */
823
824 void
825 general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
826 {
827 *op = copy_rtx_if_shared (*op);
828
829 if (GET_CODE (*op) == NOT)
830 {
831 convert_op (&XEXP (*op, 0), insn);
832 PUT_MODE (*op, vmode);
833 }
834 else if (MEM_P (*op))
835 {
836 rtx tmp = gen_reg_rtx (GET_MODE (*op));
837
838 emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode, tmp, 0),
839 gen_gpr_to_xmm_move_src (vmode, *op)),
840 insn);
841 *op = gen_rtx_SUBREG (vmode, tmp, 0);
842
843 if (dump_file)
844 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
845 INSN_UID (insn), REGNO (tmp));
846 }
847 else if (REG_P (*op))
848 {
849 *op = gen_rtx_SUBREG (vmode, *op, 0);
850 }
851 else if (CONST_INT_P (*op))
852 {
853 rtx vec_cst;
854 rtx tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0);
855
856 /* Prefer all ones vector in case of -1. */
857 if (constm1_operand (*op, GET_MODE (*op)))
858 vec_cst = CONSTM1_RTX (vmode);
859 else
860 {
861 unsigned n = GET_MODE_NUNITS (vmode);
862 rtx *v = XALLOCAVEC (rtx, n);
863 v[0] = *op;
864 for (unsigned i = 1; i < n; ++i)
865 v[i] = const0_rtx;
866 vec_cst = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
867 }
868
869 if (!standard_sse_constant_p (vec_cst, vmode))
870 {
871 start_sequence ();
872 vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
873 rtx_insn *seq = get_insns ();
874 end_sequence ();
875 emit_insn_before (seq, insn);
876 }
877
878 emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
879 *op = tmp;
880 }
881 else
882 {
883 gcc_assert (SUBREG_P (*op));
884 gcc_assert (GET_MODE (*op) == vmode);
885 }
886 }
887
888 /* Convert INSN to vector mode. */
889
890 void
891 general_scalar_chain::convert_insn (rtx_insn *insn)
892 {
893 /* Generate copies for out-of-chain uses of defs and adjust debug uses. */
894 for (df_ref ref = DF_INSN_DEFS (insn); ref; ref = DF_REF_NEXT_LOC (ref))
895 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
896 {
897 df_link *use;
898 for (use = DF_REF_CHAIN (ref); use; use = use->next)
899 if (NONDEBUG_INSN_P (DF_REF_INSN (use->ref))
900 && (DF_REF_REG_MEM_P (use->ref)
901 || !bitmap_bit_p (insns, DF_REF_INSN_UID (use->ref))))
902 break;
903 if (use)
904 convert_reg (insn, DF_REF_REG (ref),
905 *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]));
906 else if (MAY_HAVE_DEBUG_BIND_INSNS)
907 {
908 /* If we generated a scalar copy we can leave debug-insns
909 as-is, if not, we have to adjust them. */
910 auto_vec<rtx_insn *, 5> to_reset_debug_insns;
911 for (use = DF_REF_CHAIN (ref); use; use = use->next)
912 if (DEBUG_INSN_P (DF_REF_INSN (use->ref)))
913 {
914 rtx_insn *debug_insn = DF_REF_INSN (use->ref);
915 /* If there's a reaching definition outside of the
916 chain we have to reset. */
917 df_link *def;
918 for (def = DF_REF_CHAIN (use->ref); def; def = def->next)
919 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def->ref)))
920 break;
921 if (def)
922 to_reset_debug_insns.safe_push (debug_insn);
923 else
924 {
925 *DF_REF_REAL_LOC (use->ref)
926 = *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]);
927 df_insn_rescan (debug_insn);
928 }
929 }
930 /* Have to do the reset outside of the DF_CHAIN walk to not
931 disrupt it. */
932 while (!to_reset_debug_insns.is_empty ())
933 {
934 rtx_insn *debug_insn = to_reset_debug_insns.pop ();
935 INSN_VAR_LOCATION_LOC (debug_insn) = gen_rtx_UNKNOWN_VAR_LOC ();
936 df_insn_rescan_debug_internal (debug_insn);
937 }
938 }
939 }
940
941 /* Replace uses in this insn with the defs we use in the chain. */
942 for (df_ref ref = DF_INSN_USES (insn); ref; ref = DF_REF_NEXT_LOC (ref))
943 if (!DF_REF_REG_MEM_P (ref))
944 if (rtx *vreg = defs_map.get (regno_reg_rtx[DF_REF_REGNO (ref)]))
945 {
946 /* Also update a corresponding REG_DEAD note. */
947 rtx note = find_reg_note (insn, REG_DEAD, DF_REF_REG (ref));
948 if (note)
949 XEXP (note, 0) = *vreg;
950 *DF_REF_REAL_LOC (ref) = *vreg;
951 }
952
953 rtx def_set = single_set (insn);
954 rtx src = SET_SRC (def_set);
955 rtx dst = SET_DEST (def_set);
956 rtx subreg;
957
958 if (MEM_P (dst) && !REG_P (src))
959 {
960 /* There are no scalar integer instructions and therefore
961 temporary register usage is required. */
962 rtx tmp = gen_reg_rtx (smode);
963 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
964 dst = gen_rtx_SUBREG (vmode, tmp, 0);
965 }
966 else if (REG_P (dst))
967 {
968 /* Replace the definition with a SUBREG to the definition we
969 use inside the chain. */
970 rtx *vdef = defs_map.get (dst);
971 if (vdef)
972 dst = *vdef;
973 dst = gen_rtx_SUBREG (vmode, dst, 0);
974 /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST
975 is a non-REG_P. So kill those off. */
976 rtx note = find_reg_equal_equiv_note (insn);
977 if (note)
978 remove_note (insn, note);
979 }
980
981 switch (GET_CODE (src))
982 {
983 case ASHIFT:
984 case ASHIFTRT:
985 case LSHIFTRT:
986 convert_op (&XEXP (src, 0), insn);
987 PUT_MODE (src, vmode);
988 break;
989
990 case PLUS:
991 case MINUS:
992 case IOR:
993 case XOR:
994 case AND:
995 case SMAX:
996 case SMIN:
997 case UMAX:
998 case UMIN:
999 convert_op (&XEXP (src, 0), insn);
1000 convert_op (&XEXP (src, 1), insn);
1001 PUT_MODE (src, vmode);
1002 break;
1003
1004 case NEG:
1005 src = XEXP (src, 0);
1006 convert_op (&src, insn);
1007 subreg = gen_reg_rtx (vmode);
1008 emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn);
1009 src = gen_rtx_MINUS (vmode, subreg, src);
1010 break;
1011
1012 case NOT:
1013 src = XEXP (src, 0);
1014 convert_op (&src, insn);
1015 subreg = gen_reg_rtx (vmode);
1016 emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn);
1017 src = gen_rtx_XOR (vmode, src, subreg);
1018 break;
1019
1020 case MEM:
1021 if (!REG_P (dst))
1022 convert_op (&src, insn);
1023 break;
1024
1025 case REG:
1026 if (!MEM_P (dst))
1027 convert_op (&src, insn);
1028 break;
1029
1030 case SUBREG:
1031 gcc_assert (GET_MODE (src) == vmode);
1032 break;
1033
1034 case COMPARE:
1035 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
1036
1037 gcc_assert (REG_P (src) && GET_MODE (src) == DImode);
1038 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
1039 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
1040 copy_rtx_if_shared (subreg),
1041 copy_rtx_if_shared (subreg)),
1042 insn);
1043 dst = gen_rtx_REG (CCmode, FLAGS_REG);
1044 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (subreg),
1045 copy_rtx_if_shared (subreg)),
1046 UNSPEC_PTEST);
1047 break;
1048
1049 case CONST_INT:
1050 convert_op (&src, insn);
1051 break;
1052
1053 default:
1054 gcc_unreachable ();
1055 }
1056
1057 SET_SRC (def_set) = src;
1058 SET_DEST (def_set) = dst;
1059
1060 /* Drop possible dead definitions. */
1061 PATTERN (insn) = def_set;
1062
1063 INSN_CODE (insn) = -1;
1064 int patt = recog_memoized (insn);
1065 if (patt == -1)
1066 fatal_insn_not_found (insn);
1067 df_insn_rescan (insn);
1068 }
1069
1070 /* Fix uses of converted REG in debug insns. */
1071
1072 void
1073 timode_scalar_chain::fix_debug_reg_uses (rtx reg)
1074 {
1075 if (!flag_var_tracking)
1076 return;
1077
1078 df_ref ref, next;
1079 for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
1080 {
1081 rtx_insn *insn = DF_REF_INSN (ref);
1082 /* Make sure the next ref is for a different instruction,
1083 so that we're not affected by the rescan. */
1084 next = DF_REF_NEXT_REG (ref);
1085 while (next && DF_REF_INSN (next) == insn)
1086 next = DF_REF_NEXT_REG (next);
1087
1088 if (DEBUG_INSN_P (insn))
1089 {
1090 /* It may be a debug insn with a TImode variable in
1091 register. */
1092 bool changed = false;
1093 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
1094 {
1095 rtx *loc = DF_REF_LOC (ref);
1096 if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
1097 {
1098 *loc = gen_rtx_SUBREG (TImode, *loc, 0);
1099 changed = true;
1100 }
1101 }
1102 if (changed)
1103 df_insn_rescan (insn);
1104 }
1105 }
1106 }
1107
1108 /* Convert INSN from TImode to V1T1mode. */
1109
1110 void
1111 timode_scalar_chain::convert_insn (rtx_insn *insn)
1112 {
1113 rtx def_set = single_set (insn);
1114 rtx src = SET_SRC (def_set);
1115 rtx dst = SET_DEST (def_set);
1116
1117 switch (GET_CODE (dst))
1118 {
1119 case REG:
1120 {
1121 rtx tmp = find_reg_equal_equiv_note (insn);
1122 if (tmp)
1123 PUT_MODE (XEXP (tmp, 0), V1TImode);
1124 PUT_MODE (dst, V1TImode);
1125 fix_debug_reg_uses (dst);
1126 }
1127 break;
1128 case MEM:
1129 PUT_MODE (dst, V1TImode);
1130 break;
1131
1132 default:
1133 gcc_unreachable ();
1134 }
1135
1136 switch (GET_CODE (src))
1137 {
1138 case REG:
1139 PUT_MODE (src, V1TImode);
1140 /* Call fix_debug_reg_uses only if SRC is never defined. */
1141 if (!DF_REG_DEF_CHAIN (REGNO (src)))
1142 fix_debug_reg_uses (src);
1143 break;
1144
1145 case MEM:
1146 PUT_MODE (src, V1TImode);
1147 break;
1148
1149 case CONST_WIDE_INT:
1150 if (NONDEBUG_INSN_P (insn))
1151 {
1152 /* Since there are no instructions to store 128-bit constant,
1153 temporary register usage is required. */
1154 rtx tmp = gen_reg_rtx (V1TImode);
1155 start_sequence ();
1156 src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
1157 src = validize_mem (force_const_mem (V1TImode, src));
1158 rtx_insn *seq = get_insns ();
1159 end_sequence ();
1160 if (seq)
1161 emit_insn_before (seq, insn);
1162 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
1163 dst = tmp;
1164 }
1165 break;
1166
1167 case CONST_INT:
1168 switch (standard_sse_constant_p (src, TImode))
1169 {
1170 case 1:
1171 src = CONST0_RTX (GET_MODE (dst));
1172 break;
1173 case 2:
1174 src = CONSTM1_RTX (GET_MODE (dst));
1175 break;
1176 default:
1177 gcc_unreachable ();
1178 }
1179 if (NONDEBUG_INSN_P (insn))
1180 {
1181 rtx tmp = gen_reg_rtx (V1TImode);
1182 /* Since there are no instructions to store standard SSE
1183 constant, temporary register usage is required. */
1184 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
1185 dst = tmp;
1186 }
1187 break;
1188
1189 default:
1190 gcc_unreachable ();
1191 }
1192
1193 SET_SRC (def_set) = src;
1194 SET_DEST (def_set) = dst;
1195
1196 /* Drop possible dead definitions. */
1197 PATTERN (insn) = def_set;
1198
1199 INSN_CODE (insn) = -1;
1200 recog_memoized (insn);
1201 df_insn_rescan (insn);
1202 }
1203
1204 /* Generate copies from defs used by the chain but not defined therein.
1205 Also populates defs_map which is used later by convert_insn. */
1206
1207 void
1208 general_scalar_chain::convert_registers ()
1209 {
1210 bitmap_iterator bi;
1211 unsigned id;
1212 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
1213 defs_map.put (regno_reg_rtx[id], gen_reg_rtx (smode));
1214 EXECUTE_IF_SET_IN_BITMAP (insns_conv, 0, id, bi)
1215 for (df_ref ref = DF_INSN_UID_DEFS (id); ref; ref = DF_REF_NEXT_LOC (ref))
1216 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
1217 make_vector_copies (DF_REF_INSN (ref), DF_REF_REAL_REG (ref));
1218 }
1219
1220 /* Convert whole chain creating required register
1221 conversions and copies. */
1222
1223 int
1224 scalar_chain::convert ()
1225 {
1226 bitmap_iterator bi;
1227 unsigned id;
1228 int converted_insns = 0;
1229
1230 if (!dbg_cnt (stv_conversion))
1231 return 0;
1232
1233 if (dump_file)
1234 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
1235
1236 convert_registers ();
1237
1238 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
1239 {
1240 convert_insn (DF_INSN_UID_GET (id)->insn);
1241 converted_insns++;
1242 }
1243
1244 return converted_insns;
1245 }
1246
1247 /* Return 1 if INSN uses or defines a hard register.
1248 Hard register uses in a memory address are ignored.
1249 Clobbers and flags definitions are ignored. */
1250
1251 static bool
1252 has_non_address_hard_reg (rtx_insn *insn)
1253 {
1254 df_ref ref;
1255 FOR_EACH_INSN_DEF (ref, insn)
1256 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
1257 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
1258 && DF_REF_REGNO (ref) != FLAGS_REG)
1259 return true;
1260
1261 FOR_EACH_INSN_USE (ref, insn)
1262 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
1263 return true;
1264
1265 return false;
1266 }
1267
1268 /* Check if comparison INSN may be transformed
1269 into vector comparison. Currently we transform
1270 zero checks only which look like:
1271
1272 (set (reg:CCZ 17 flags)
1273 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
1274 (subreg:SI (reg:DI x) 0))
1275 (const_int 0 [0]))) */
1276
1277 static bool
1278 convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
1279 {
1280 /* ??? Currently convertible for double-word DImode chain only. */
1281 if (TARGET_64BIT || mode != DImode)
1282 return false;
1283
1284 if (!TARGET_SSE4_1)
1285 return false;
1286
1287 rtx def_set = single_set (insn);
1288
1289 gcc_assert (def_set);
1290
1291 rtx src = SET_SRC (def_set);
1292 rtx dst = SET_DEST (def_set);
1293
1294 gcc_assert (GET_CODE (src) == COMPARE);
1295
1296 if (GET_CODE (dst) != REG
1297 || REGNO (dst) != FLAGS_REG
1298 || GET_MODE (dst) != CCZmode)
1299 return false;
1300
1301 rtx op1 = XEXP (src, 0);
1302 rtx op2 = XEXP (src, 1);
1303
1304 if (op2 != CONST0_RTX (GET_MODE (op2)))
1305 return false;
1306
1307 if (GET_CODE (op1) != IOR)
1308 return false;
1309
1310 op2 = XEXP (op1, 1);
1311 op1 = XEXP (op1, 0);
1312
1313 if (!SUBREG_P (op1)
1314 || !SUBREG_P (op2)
1315 || GET_MODE (op1) != SImode
1316 || GET_MODE (op2) != SImode
1317 || ((SUBREG_BYTE (op1) != 0
1318 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
1319 && (SUBREG_BYTE (op2) != 0
1320 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
1321 return false;
1322
1323 op1 = SUBREG_REG (op1);
1324 op2 = SUBREG_REG (op2);
1325
1326 if (op1 != op2
1327 || !REG_P (op1)
1328 || GET_MODE (op1) != DImode)
1329 return false;
1330
1331 return true;
1332 }
1333
1334 /* The general version of scalar_to_vector_candidate_p. */
1335
1336 static bool
1337 general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
1338 {
1339 rtx def_set = single_set (insn);
1340
1341 if (!def_set)
1342 return false;
1343
1344 if (has_non_address_hard_reg (insn))
1345 return false;
1346
1347 rtx src = SET_SRC (def_set);
1348 rtx dst = SET_DEST (def_set);
1349
1350 if (GET_CODE (src) == COMPARE)
1351 return convertible_comparison_p (insn, mode);
1352
1353 /* We are interested in "mode" only. */
1354 if ((GET_MODE (src) != mode
1355 && !CONST_INT_P (src))
1356 || GET_MODE (dst) != mode)
1357 return false;
1358
1359 if (!REG_P (dst) && !MEM_P (dst))
1360 return false;
1361
1362 switch (GET_CODE (src))
1363 {
1364 case ASHIFTRT:
1365 if (!TARGET_AVX512VL)
1366 return false;
1367 /* FALLTHRU */
1368
1369 case ASHIFT:
1370 case LSHIFTRT:
1371 if (!CONST_INT_P (XEXP (src, 1))
1372 || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
1373 return false;
1374 break;
1375
1376 case SMAX:
1377 case SMIN:
1378 case UMAX:
1379 case UMIN:
1380 if ((mode == DImode && !TARGET_AVX512VL)
1381 || (mode == SImode && !TARGET_SSE4_1))
1382 return false;
1383 /* Fallthru. */
1384
1385 case PLUS:
1386 case MINUS:
1387 case IOR:
1388 case XOR:
1389 case AND:
1390 if (!REG_P (XEXP (src, 1))
1391 && !MEM_P (XEXP (src, 1))
1392 && !CONST_INT_P (XEXP (src, 1)))
1393 return false;
1394
1395 if (GET_MODE (XEXP (src, 1)) != mode
1396 && !CONST_INT_P (XEXP (src, 1)))
1397 return false;
1398 break;
1399
1400 case NEG:
1401 case NOT:
1402 break;
1403
1404 case REG:
1405 return true;
1406
1407 case MEM:
1408 case CONST_INT:
1409 return REG_P (dst);
1410
1411 default:
1412 return false;
1413 }
1414
1415 if (!REG_P (XEXP (src, 0))
1416 && !MEM_P (XEXP (src, 0))
1417 && !CONST_INT_P (XEXP (src, 0))
1418 /* Check for andnot case. */
1419 && (GET_CODE (src) != AND
1420 || GET_CODE (XEXP (src, 0)) != NOT
1421 || !REG_P (XEXP (XEXP (src, 0), 0))))
1422 return false;
1423
1424 if (GET_MODE (XEXP (src, 0)) != mode
1425 && !CONST_INT_P (XEXP (src, 0)))
1426 return false;
1427
1428 return true;
1429 }
1430
1431 /* The TImode version of scalar_to_vector_candidate_p. */
1432
1433 static bool
1434 timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1435 {
1436 rtx def_set = single_set (insn);
1437
1438 if (!def_set)
1439 return false;
1440
1441 if (has_non_address_hard_reg (insn))
1442 return false;
1443
1444 rtx src = SET_SRC (def_set);
1445 rtx dst = SET_DEST (def_set);
1446
1447 /* Only TImode load and store are allowed. */
1448 if (GET_MODE (dst) != TImode)
1449 return false;
1450
1451 if (MEM_P (dst))
1452 {
1453 /* Check for store. Memory must be aligned or unaligned store
1454 is optimal. Only support store from register, standard SSE
1455 constant or CONST_WIDE_INT generated from piecewise store.
1456
1457 ??? Verify performance impact before enabling CONST_INT for
1458 __int128 store. */
1459 if (misaligned_operand (dst, TImode)
1460 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1461 return false;
1462
1463 switch (GET_CODE (src))
1464 {
1465 default:
1466 return false;
1467
1468 case REG:
1469 case CONST_WIDE_INT:
1470 return true;
1471
1472 case CONST_INT:
1473 return standard_sse_constant_p (src, TImode);
1474 }
1475 }
1476 else if (MEM_P (src))
1477 {
1478 /* Check for load. Memory must be aligned or unaligned load is
1479 optimal. */
1480 return (REG_P (dst)
1481 && (!misaligned_operand (src, TImode)
1482 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1483 }
1484
1485 return false;
1486 }
1487
1488 /* For a register REGNO, scan instructions for its defs and uses.
1489 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1490
1491 static void
1492 timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1493 unsigned int regno)
1494 {
1495 for (df_ref def = DF_REG_DEF_CHAIN (regno);
1496 def;
1497 def = DF_REF_NEXT_REG (def))
1498 {
1499 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1500 {
1501 if (dump_file)
1502 fprintf (dump_file,
1503 "r%d has non convertible def in insn %d\n",
1504 regno, DF_REF_INSN_UID (def));
1505
1506 bitmap_set_bit (regs, regno);
1507 break;
1508 }
1509 }
1510
1511 for (df_ref ref = DF_REG_USE_CHAIN (regno);
1512 ref;
1513 ref = DF_REF_NEXT_REG (ref))
1514 {
1515 /* Debug instructions are skipped. */
1516 if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1517 && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1518 {
1519 if (dump_file)
1520 fprintf (dump_file,
1521 "r%d has non convertible use in insn %d\n",
1522 regno, DF_REF_INSN_UID (ref));
1523
1524 bitmap_set_bit (regs, regno);
1525 break;
1526 }
1527 }
1528 }
1529
1530 /* The TImode version of remove_non_convertible_regs. */
1531
1532 static void
1533 timode_remove_non_convertible_regs (bitmap candidates)
1534 {
1535 bitmap_iterator bi;
1536 unsigned id;
1537 bitmap regs = BITMAP_ALLOC (NULL);
1538
1539 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1540 {
1541 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1542 rtx dest = SET_DEST (def_set);
1543 rtx src = SET_SRC (def_set);
1544
1545 if ((!REG_P (dest)
1546 || bitmap_bit_p (regs, REGNO (dest))
1547 || HARD_REGISTER_P (dest))
1548 && (!REG_P (src)
1549 || bitmap_bit_p (regs, REGNO (src))
1550 || HARD_REGISTER_P (src)))
1551 continue;
1552
1553 if (REG_P (dest))
1554 timode_check_non_convertible_regs (candidates, regs,
1555 REGNO (dest));
1556
1557 if (REG_P (src))
1558 timode_check_non_convertible_regs (candidates, regs,
1559 REGNO (src));
1560 }
1561
1562 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1563 {
1564 for (df_ref def = DF_REG_DEF_CHAIN (id);
1565 def;
1566 def = DF_REF_NEXT_REG (def))
1567 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1568 {
1569 if (dump_file)
1570 fprintf (dump_file, "Removing insn %d from candidates list\n",
1571 DF_REF_INSN_UID (def));
1572
1573 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1574 }
1575
1576 for (df_ref ref = DF_REG_USE_CHAIN (id);
1577 ref;
1578 ref = DF_REF_NEXT_REG (ref))
1579 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1580 {
1581 if (dump_file)
1582 fprintf (dump_file, "Removing insn %d from candidates list\n",
1583 DF_REF_INSN_UID (ref));
1584
1585 bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1586 }
1587 }
1588
1589 BITMAP_FREE (regs);
1590 }
1591
1592 /* Main STV pass function. Find and convert scalar
1593 instructions into vector mode when profitable. */
1594
1595 static unsigned int
1596 convert_scalars_to_vector (bool timode_p)
1597 {
1598 basic_block bb;
1599 int converted_insns = 0;
1600
1601 bitmap_obstack_initialize (NULL);
1602 const machine_mode cand_mode[3] = { SImode, DImode, TImode };
1603 const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode };
1604 bitmap_head candidates[3]; /* { SImode, DImode, TImode } */
1605 for (unsigned i = 0; i < 3; ++i)
1606 bitmap_initialize (&candidates[i], &bitmap_default_obstack);
1607
1608 calculate_dominance_info (CDI_DOMINATORS);
1609 df_set_flags (DF_DEFER_INSN_RESCAN);
1610 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
1611 df_analyze ();
1612
1613 /* Find all instructions we want to convert into vector mode. */
1614 if (dump_file)
1615 fprintf (dump_file, "Searching for mode conversion candidates...\n");
1616
1617 FOR_EACH_BB_FN (bb, cfun)
1618 {
1619 rtx_insn *insn;
1620 FOR_BB_INSNS (bb, insn)
1621 if (timode_p
1622 && timode_scalar_to_vector_candidate_p (insn))
1623 {
1624 if (dump_file)
1625 fprintf (dump_file, " insn %d is marked as a TImode candidate\n",
1626 INSN_UID (insn));
1627
1628 bitmap_set_bit (&candidates[2], INSN_UID (insn));
1629 }
1630 else if (!timode_p)
1631 {
1632 /* Check {SI,DI}mode. */
1633 for (unsigned i = 0; i <= 1; ++i)
1634 if (general_scalar_to_vector_candidate_p (insn, cand_mode[i]))
1635 {
1636 if (dump_file)
1637 fprintf (dump_file, " insn %d is marked as a %s candidate\n",
1638 INSN_UID (insn), i == 0 ? "SImode" : "DImode");
1639
1640 bitmap_set_bit (&candidates[i], INSN_UID (insn));
1641 break;
1642 }
1643 }
1644 }
1645
1646 if (timode_p)
1647 timode_remove_non_convertible_regs (&candidates[2]);
1648
1649 for (unsigned i = 0; i <= 2; ++i)
1650 if (!bitmap_empty_p (&candidates[i]))
1651 break;
1652 else if (i == 2 && dump_file)
1653 fprintf (dump_file, "There are no candidates for optimization.\n");
1654
1655 for (unsigned i = 0; i <= 2; ++i)
1656 while (!bitmap_empty_p (&candidates[i]))
1657 {
1658 unsigned uid = bitmap_first_set_bit (&candidates[i]);
1659 scalar_chain *chain;
1660
1661 if (cand_mode[i] == TImode)
1662 chain = new timode_scalar_chain;
1663 else
1664 chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]);
1665
1666 /* Find instructions chain we want to convert to vector mode.
1667 Check all uses and definitions to estimate all required
1668 conversions. */
1669 chain->build (&candidates[i], uid);
1670
1671 if (chain->compute_convert_gain () > 0)
1672 converted_insns += chain->convert ();
1673 else
1674 if (dump_file)
1675 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
1676 chain->chain_id);
1677
1678 delete chain;
1679 }
1680
1681 if (dump_file)
1682 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
1683
1684 for (unsigned i = 0; i <= 2; ++i)
1685 bitmap_release (&candidates[i]);
1686 bitmap_obstack_release (NULL);
1687 df_process_deferred_rescans ();
1688
1689 /* Conversion means we may have 128bit register spills/fills
1690 which require aligned stack. */
1691 if (converted_insns)
1692 {
1693 if (crtl->stack_alignment_needed < 128)
1694 crtl->stack_alignment_needed = 128;
1695 if (crtl->stack_alignment_estimated < 128)
1696 crtl->stack_alignment_estimated = 128;
1697
1698 crtl->stack_realign_needed
1699 = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated;
1700 crtl->stack_realign_tried = crtl->stack_realign_needed;
1701
1702 crtl->stack_realign_processed = true;
1703
1704 if (!crtl->drap_reg)
1705 {
1706 rtx drap_rtx = targetm.calls.get_drap_rtx ();
1707
1708 /* stack_realign_drap and drap_rtx must match. */
1709 gcc_assert ((stack_realign_drap != 0) == (drap_rtx != NULL));
1710
1711 /* Do nothing if NULL is returned,
1712 which means DRAP is not needed. */
1713 if (drap_rtx != NULL)
1714 {
1715 crtl->args.internal_arg_pointer = drap_rtx;
1716
1717 /* Call fixup_tail_calls to clean up
1718 REG_EQUIV note if DRAP is needed. */
1719 fixup_tail_calls ();
1720 }
1721 }
1722
1723 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
1724 if (TARGET_64BIT)
1725 for (tree parm = DECL_ARGUMENTS (current_function_decl);
1726 parm; parm = DECL_CHAIN (parm))
1727 {
1728 if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
1729 continue;
1730 if (DECL_RTL_SET_P (parm)
1731 && GET_MODE (DECL_RTL (parm)) == V1TImode)
1732 {
1733 rtx r = DECL_RTL (parm);
1734 if (REG_P (r))
1735 SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
1736 }
1737 if (DECL_INCOMING_RTL (parm)
1738 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
1739 {
1740 rtx r = DECL_INCOMING_RTL (parm);
1741 if (REG_P (r))
1742 DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
1743 }
1744 }
1745 }
1746
1747 return 0;
1748 }
1749
1750 static unsigned int
1751 rest_of_handle_insert_vzeroupper (void)
1752 {
1753 int i;
1754
1755 /* vzeroupper instructions are inserted immediately after reload to
1756 account for possible spills from 256bit or 512bit registers. The pass
1757 reuses mode switching infrastructure by re-running mode insertion
1758 pass, so disable entities that have already been processed. */
1759 for (i = 0; i < MAX_386_ENTITIES; i++)
1760 ix86_optimize_mode_switching[i] = 0;
1761
1762 ix86_optimize_mode_switching[AVX_U128] = 1;
1763
1764 /* Call optimize_mode_switching. */
1765 g->get_passes ()->execute_pass_mode_switching ();
1766 return 0;
1767 }
1768
1769 namespace {
1770
1771 const pass_data pass_data_insert_vzeroupper =
1772 {
1773 RTL_PASS, /* type */
1774 "vzeroupper", /* name */
1775 OPTGROUP_NONE, /* optinfo_flags */
1776 TV_MACH_DEP, /* tv_id */
1777 0, /* properties_required */
1778 0, /* properties_provided */
1779 0, /* properties_destroyed */
1780 0, /* todo_flags_start */
1781 TODO_df_finish, /* todo_flags_finish */
1782 };
1783
1784 class pass_insert_vzeroupper : public rtl_opt_pass
1785 {
1786 public:
1787 pass_insert_vzeroupper(gcc::context *ctxt)
1788 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
1789 {}
1790
1791 /* opt_pass methods: */
1792 virtual bool gate (function *)
1793 {
1794 return TARGET_AVX
1795 && TARGET_VZEROUPPER && flag_expensive_optimizations
1796 && !optimize_size;
1797 }
1798
1799 virtual unsigned int execute (function *)
1800 {
1801 return rest_of_handle_insert_vzeroupper ();
1802 }
1803
1804 }; // class pass_insert_vzeroupper
1805
1806 const pass_data pass_data_stv =
1807 {
1808 RTL_PASS, /* type */
1809 "stv", /* name */
1810 OPTGROUP_NONE, /* optinfo_flags */
1811 TV_MACH_DEP, /* tv_id */
1812 0, /* properties_required */
1813 0, /* properties_provided */
1814 0, /* properties_destroyed */
1815 0, /* todo_flags_start */
1816 TODO_df_finish, /* todo_flags_finish */
1817 };
1818
1819 class pass_stv : public rtl_opt_pass
1820 {
1821 public:
1822 pass_stv (gcc::context *ctxt)
1823 : rtl_opt_pass (pass_data_stv, ctxt),
1824 timode_p (false)
1825 {}
1826
1827 /* opt_pass methods: */
1828 virtual bool gate (function *)
1829 {
1830 return ((!timode_p || TARGET_64BIT)
1831 && TARGET_STV && TARGET_SSE2 && optimize > 1);
1832 }
1833
1834 virtual unsigned int execute (function *)
1835 {
1836 return convert_scalars_to_vector (timode_p);
1837 }
1838
1839 opt_pass *clone ()
1840 {
1841 return new pass_stv (m_ctxt);
1842 }
1843
1844 void set_pass_param (unsigned int n, bool param)
1845 {
1846 gcc_assert (n == 0);
1847 timode_p = param;
1848 }
1849
1850 private:
1851 bool timode_p;
1852 }; // class pass_stv
1853
1854 } // anon namespace
1855
1856 rtl_opt_pass *
1857 make_pass_insert_vzeroupper (gcc::context *ctxt)
1858 {
1859 return new pass_insert_vzeroupper (ctxt);
1860 }
1861
1862 rtl_opt_pass *
1863 make_pass_stv (gcc::context *ctxt)
1864 {
1865 return new pass_stv (ctxt);
1866 }
1867
1868 /* Inserting ENDBRANCH instructions. */
1869
1870 static unsigned int
1871 rest_of_insert_endbranch (void)
1872 {
1873 timevar_push (TV_MACH_DEP);
1874
1875 rtx cet_eb;
1876 rtx_insn *insn;
1877 basic_block bb;
1878
1879 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is
1880 absent among function attributes. Later an optimization will be
1881 introduced to make analysis if an address of a static function is
1882 taken. A static function whose address is not taken will get a
1883 nocf_check attribute. This will allow to reduce the number of EB. */
1884
1885 if (!lookup_attribute ("nocf_check",
1886 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
1887 && (!flag_manual_endbr
1888 || lookup_attribute ("cf_check",
1889 DECL_ATTRIBUTES (cfun->decl)))
1890 && !cgraph_node::get (cfun->decl)->only_called_directly_p ())
1891 {
1892 /* Queue ENDBR insertion to x86_function_profiler. */
1893 if (crtl->profile && flag_fentry)
1894 cfun->machine->endbr_queued_at_entrance = true;
1895 else
1896 {
1897 cet_eb = gen_nop_endbr ();
1898
1899 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
1900 insn = BB_HEAD (bb);
1901 emit_insn_before (cet_eb, insn);
1902 }
1903 }
1904
1905 bb = 0;
1906 FOR_EACH_BB_FN (bb, cfun)
1907 {
1908 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
1909 insn = NEXT_INSN (insn))
1910 {
1911 if (CALL_P (insn))
1912 {
1913 bool need_endbr;
1914 need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL;
1915 if (!need_endbr && !SIBLING_CALL_P (insn))
1916 {
1917 rtx call = get_call_rtx_from (insn);
1918 rtx fnaddr = XEXP (call, 0);
1919 tree fndecl = NULL_TREE;
1920
1921 /* Also generate ENDBRANCH for non-tail call which
1922 may return via indirect branch. */
1923 if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
1924 fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
1925 if (fndecl == NULL_TREE)
1926 fndecl = MEM_EXPR (fnaddr);
1927 if (fndecl
1928 && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE
1929 && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE)
1930 fndecl = NULL_TREE;
1931 if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
1932 {
1933 tree fntype = TREE_TYPE (fndecl);
1934 if (lookup_attribute ("indirect_return",
1935 TYPE_ATTRIBUTES (fntype)))
1936 need_endbr = true;
1937 }
1938 }
1939 if (!need_endbr)
1940 continue;
1941 /* Generate ENDBRANCH after CALL, which can return more than
1942 twice, setjmp-like functions. */
1943
1944 cet_eb = gen_nop_endbr ();
1945 emit_insn_after_setloc (cet_eb, insn, INSN_LOCATION (insn));
1946 continue;
1947 }
1948
1949 if (JUMP_P (insn) && flag_cet_switch)
1950 {
1951 rtx target = JUMP_LABEL (insn);
1952 if (target == NULL_RTX || ANY_RETURN_P (target))
1953 continue;
1954
1955 /* Check the jump is a switch table. */
1956 rtx_insn *label = as_a<rtx_insn *> (target);
1957 rtx_insn *table = next_insn (label);
1958 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
1959 continue;
1960
1961 /* For the indirect jump find out all places it jumps and insert
1962 ENDBRANCH there. It should be done under a special flag to
1963 control ENDBRANCH generation for switch stmts. */
1964 edge_iterator ei;
1965 edge e;
1966 basic_block dest_blk;
1967
1968 FOR_EACH_EDGE (e, ei, bb->succs)
1969 {
1970 rtx_insn *insn;
1971
1972 dest_blk = e->dest;
1973 insn = BB_HEAD (dest_blk);
1974 gcc_assert (LABEL_P (insn));
1975 cet_eb = gen_nop_endbr ();
1976 emit_insn_after (cet_eb, insn);
1977 }
1978 continue;
1979 }
1980
1981 if (LABEL_P (insn) && LABEL_PRESERVE_P (insn))
1982 {
1983 cet_eb = gen_nop_endbr ();
1984 emit_insn_after (cet_eb, insn);
1985 continue;
1986 }
1987 }
1988 }
1989
1990 timevar_pop (TV_MACH_DEP);
1991 return 0;
1992 }
1993
1994 namespace {
1995
1996 const pass_data pass_data_insert_endbranch =
1997 {
1998 RTL_PASS, /* type. */
1999 "cet", /* name. */
2000 OPTGROUP_NONE, /* optinfo_flags. */
2001 TV_MACH_DEP, /* tv_id. */
2002 0, /* properties_required. */
2003 0, /* properties_provided. */
2004 0, /* properties_destroyed. */
2005 0, /* todo_flags_start. */
2006 0, /* todo_flags_finish. */
2007 };
2008
2009 class pass_insert_endbranch : public rtl_opt_pass
2010 {
2011 public:
2012 pass_insert_endbranch (gcc::context *ctxt)
2013 : rtl_opt_pass (pass_data_insert_endbranch, ctxt)
2014 {}
2015
2016 /* opt_pass methods: */
2017 virtual bool gate (function *)
2018 {
2019 return ((flag_cf_protection & CF_BRANCH));
2020 }
2021
2022 virtual unsigned int execute (function *)
2023 {
2024 return rest_of_insert_endbranch ();
2025 }
2026
2027 }; // class pass_insert_endbranch
2028
2029 } // anon namespace
2030
2031 rtl_opt_pass *
2032 make_pass_insert_endbranch (gcc::context *ctxt)
2033 {
2034 return new pass_insert_endbranch (ctxt);
2035 }
2036
2037 /* At entry of the nearest common dominator for basic blocks with
2038 conversions, generate a single
2039 vxorps %xmmN, %xmmN, %xmmN
2040 for all
2041 vcvtss2sd op, %xmmN, %xmmX
2042 vcvtsd2ss op, %xmmN, %xmmX
2043 vcvtsi2ss op, %xmmN, %xmmX
2044 vcvtsi2sd op, %xmmN, %xmmX
2045
2046 NB: We want to generate only a single vxorps to cover the whole
2047 function. The LCM algorithm isn't appropriate here since it may
2048 place a vxorps inside the loop. */
2049
2050 static unsigned int
2051 remove_partial_avx_dependency (void)
2052 {
2053 timevar_push (TV_MACH_DEP);
2054
2055 bitmap_obstack_initialize (NULL);
2056 bitmap convert_bbs = BITMAP_ALLOC (NULL);
2057
2058 basic_block bb;
2059 rtx_insn *insn, *set_insn;
2060 rtx set;
2061 rtx v4sf_const0 = NULL_RTX;
2062
2063 auto_vec<rtx_insn *> control_flow_insns;
2064
2065 FOR_EACH_BB_FN (bb, cfun)
2066 {
2067 FOR_BB_INSNS (bb, insn)
2068 {
2069 if (!NONDEBUG_INSN_P (insn))
2070 continue;
2071
2072 set = single_set (insn);
2073 if (!set)
2074 continue;
2075
2076 if (get_attr_avx_partial_xmm_update (insn)
2077 != AVX_PARTIAL_XMM_UPDATE_TRUE)
2078 continue;
2079
2080 if (!v4sf_const0)
2081 {
2082 calculate_dominance_info (CDI_DOMINATORS);
2083 df_set_flags (DF_DEFER_INSN_RESCAN);
2084 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2085 df_md_add_problem ();
2086 df_analyze ();
2087 v4sf_const0 = gen_reg_rtx (V4SFmode);
2088 }
2089
2090 /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
2091 SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and
2092 vec_merge with subreg. */
2093 rtx src = SET_SRC (set);
2094 rtx dest = SET_DEST (set);
2095 machine_mode dest_mode = GET_MODE (dest);
2096
2097 rtx zero;
2098 machine_mode dest_vecmode;
2099 if (dest_mode == E_SFmode)
2100 {
2101 dest_vecmode = V4SFmode;
2102 zero = v4sf_const0;
2103 }
2104 else
2105 {
2106 dest_vecmode = V2DFmode;
2107 zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0);
2108 }
2109
2110 /* Change source to vector mode. */
2111 src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src);
2112 src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero,
2113 GEN_INT (HOST_WIDE_INT_1U));
2114 /* Change destination to vector mode. */
2115 rtx vec = gen_reg_rtx (dest_vecmode);
2116 /* Generate an XMM vector SET. */
2117 set = gen_rtx_SET (vec, src);
2118 set_insn = emit_insn_before (set, insn);
2119 df_insn_rescan (set_insn);
2120
2121 if (cfun->can_throw_non_call_exceptions)
2122 {
2123 /* Handle REG_EH_REGION note. */
2124 rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
2125 if (note)
2126 {
2127 control_flow_insns.safe_push (set_insn);
2128 add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0));
2129 }
2130 }
2131
2132 src = gen_rtx_SUBREG (dest_mode, vec, 0);
2133 set = gen_rtx_SET (dest, src);
2134
2135 /* Drop possible dead definitions. */
2136 PATTERN (insn) = set;
2137
2138 INSN_CODE (insn) = -1;
2139 recog_memoized (insn);
2140 df_insn_rescan (insn);
2141 bitmap_set_bit (convert_bbs, bb->index);
2142 }
2143 }
2144
2145 if (v4sf_const0)
2146 {
2147 /* (Re-)discover loops so that bb->loop_father can be used in the
2148 analysis below. */
2149 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2150
2151 /* Generate a vxorps at entry of the nearest dominator for basic
2152 blocks with conversions, which is in the the fake loop that
2153 contains the whole function, so that there is only a single
2154 vxorps in the whole function. */
2155 bb = nearest_common_dominator_for_set (CDI_DOMINATORS,
2156 convert_bbs);
2157 while (bb->loop_father->latch
2158 != EXIT_BLOCK_PTR_FOR_FN (cfun))
2159 bb = get_immediate_dominator (CDI_DOMINATORS,
2160 bb->loop_father->header);
2161
2162 set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode));
2163
2164 insn = BB_HEAD (bb);
2165 while (insn && !NONDEBUG_INSN_P (insn))
2166 {
2167 if (insn == BB_END (bb))
2168 {
2169 insn = NULL;
2170 break;
2171 }
2172 insn = NEXT_INSN (insn);
2173 }
2174 if (insn == BB_HEAD (bb))
2175 set_insn = emit_insn_before (set, insn);
2176 else
2177 set_insn = emit_insn_after (set,
2178 insn ? PREV_INSN (insn) : BB_END (bb));
2179 df_insn_rescan (set_insn);
2180 df_process_deferred_rescans ();
2181 loop_optimizer_finalize ();
2182
2183 if (!control_flow_insns.is_empty ())
2184 {
2185 free_dominance_info (CDI_DOMINATORS);
2186
2187 unsigned int i;
2188 FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
2189 if (control_flow_insn_p (insn))
2190 {
2191 /* Split the block after insn. There will be a fallthru
2192 edge, which is OK so we keep it. We have to create
2193 the exception edges ourselves. */
2194 bb = BLOCK_FOR_INSN (insn);
2195 split_block (bb, insn);
2196 rtl_make_eh_edge (NULL, bb, BB_END (bb));
2197 }
2198 }
2199 }
2200
2201 bitmap_obstack_release (NULL);
2202 BITMAP_FREE (convert_bbs);
2203
2204 timevar_pop (TV_MACH_DEP);
2205 return 0;
2206 }
2207
2208 namespace {
2209
2210 const pass_data pass_data_remove_partial_avx_dependency =
2211 {
2212 RTL_PASS, /* type */
2213 "rpad", /* name */
2214 OPTGROUP_NONE, /* optinfo_flags */
2215 TV_MACH_DEP, /* tv_id */
2216 0, /* properties_required */
2217 0, /* properties_provided */
2218 0, /* properties_destroyed */
2219 0, /* todo_flags_start */
2220 TODO_df_finish, /* todo_flags_finish */
2221 };
2222
2223 class pass_remove_partial_avx_dependency : public rtl_opt_pass
2224 {
2225 public:
2226 pass_remove_partial_avx_dependency (gcc::context *ctxt)
2227 : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt)
2228 {}
2229
2230 /* opt_pass methods: */
2231 virtual bool gate (function *)
2232 {
2233 return (TARGET_AVX
2234 && TARGET_SSE_PARTIAL_REG_DEPENDENCY
2235 && TARGET_SSE_MATH
2236 && optimize
2237 && optimize_function_for_speed_p (cfun));
2238 }
2239
2240 virtual unsigned int execute (function *)
2241 {
2242 return remove_partial_avx_dependency ();
2243 }
2244 }; // class pass_rpad
2245
2246 } // anon namespace
2247
2248 rtl_opt_pass *
2249 make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
2250 {
2251 return new pass_remove_partial_avx_dependency (ctxt);
2252 }
2253
2254 /* This compares the priority of target features in function DECL1
2255 and DECL2. It returns positive value if DECL1 is higher priority,
2256 negative value if DECL2 is higher priority and 0 if they are the
2257 same. */
2258
2259 int
2260 ix86_compare_version_priority (tree decl1, tree decl2)
2261 {
2262 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
2263 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
2264
2265 return (int)priority1 - (int)priority2;
2266 }
2267
2268 /* V1 and V2 point to function versions with different priorities
2269 based on the target ISA. This function compares their priorities. */
2270
2271 static int
2272 feature_compare (const void *v1, const void *v2)
2273 {
2274 typedef struct _function_version_info
2275 {
2276 tree version_decl;
2277 tree predicate_chain;
2278 unsigned int dispatch_priority;
2279 } function_version_info;
2280
2281 const function_version_info c1 = *(const function_version_info *)v1;
2282 const function_version_info c2 = *(const function_version_info *)v2;
2283 return (c2.dispatch_priority - c1.dispatch_priority);
2284 }
2285
2286 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
2287 to return a pointer to VERSION_DECL if the outcome of the expression
2288 formed by PREDICATE_CHAIN is true. This function will be called during
2289 version dispatch to decide which function version to execute. It returns
2290 the basic block at the end, to which more conditions can be added. */
2291
2292 static basic_block
2293 add_condition_to_bb (tree function_decl, tree version_decl,
2294 tree predicate_chain, basic_block new_bb)
2295 {
2296 gimple *return_stmt;
2297 tree convert_expr, result_var;
2298 gimple *convert_stmt;
2299 gimple *call_cond_stmt;
2300 gimple *if_else_stmt;
2301
2302 basic_block bb1, bb2, bb3;
2303 edge e12, e23;
2304
2305 tree cond_var, and_expr_var = NULL_TREE;
2306 gimple_seq gseq;
2307
2308 tree predicate_decl, predicate_arg;
2309
2310 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
2311
2312 gcc_assert (new_bb != NULL);
2313 gseq = bb_seq (new_bb);
2314
2315
2316 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
2317 build_fold_addr_expr (version_decl));
2318 result_var = create_tmp_var (ptr_type_node);
2319 convert_stmt = gimple_build_assign (result_var, convert_expr);
2320 return_stmt = gimple_build_return (result_var);
2321
2322 if (predicate_chain == NULL_TREE)
2323 {
2324 gimple_seq_add_stmt (&gseq, convert_stmt);
2325 gimple_seq_add_stmt (&gseq, return_stmt);
2326 set_bb_seq (new_bb, gseq);
2327 gimple_set_bb (convert_stmt, new_bb);
2328 gimple_set_bb (return_stmt, new_bb);
2329 pop_cfun ();
2330 return new_bb;
2331 }
2332
2333 while (predicate_chain != NULL)
2334 {
2335 cond_var = create_tmp_var (integer_type_node);
2336 predicate_decl = TREE_PURPOSE (predicate_chain);
2337 predicate_arg = TREE_VALUE (predicate_chain);
2338 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
2339 gimple_call_set_lhs (call_cond_stmt, cond_var);
2340
2341 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
2342 gimple_set_bb (call_cond_stmt, new_bb);
2343 gimple_seq_add_stmt (&gseq, call_cond_stmt);
2344
2345 predicate_chain = TREE_CHAIN (predicate_chain);
2346
2347 if (and_expr_var == NULL)
2348 and_expr_var = cond_var;
2349 else
2350 {
2351 gimple *assign_stmt;
2352 /* Use MIN_EXPR to check if any integer is zero?.
2353 and_expr_var = min_expr <cond_var, and_expr_var> */
2354 assign_stmt = gimple_build_assign (and_expr_var,
2355 build2 (MIN_EXPR, integer_type_node,
2356 cond_var, and_expr_var));
2357
2358 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
2359 gimple_set_bb (assign_stmt, new_bb);
2360 gimple_seq_add_stmt (&gseq, assign_stmt);
2361 }
2362 }
2363
2364 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
2365 integer_zero_node,
2366 NULL_TREE, NULL_TREE);
2367 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
2368 gimple_set_bb (if_else_stmt, new_bb);
2369 gimple_seq_add_stmt (&gseq, if_else_stmt);
2370
2371 gimple_seq_add_stmt (&gseq, convert_stmt);
2372 gimple_seq_add_stmt (&gseq, return_stmt);
2373 set_bb_seq (new_bb, gseq);
2374
2375 bb1 = new_bb;
2376 e12 = split_block (bb1, if_else_stmt);
2377 bb2 = e12->dest;
2378 e12->flags &= ~EDGE_FALLTHRU;
2379 e12->flags |= EDGE_TRUE_VALUE;
2380
2381 e23 = split_block (bb2, return_stmt);
2382
2383 gimple_set_bb (convert_stmt, bb2);
2384 gimple_set_bb (return_stmt, bb2);
2385
2386 bb3 = e23->dest;
2387 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2388
2389 remove_edge (e23);
2390 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
2391
2392 pop_cfun ();
2393
2394 return bb3;
2395 }
2396
2397 /* This function generates the dispatch function for
2398 multi-versioned functions. DISPATCH_DECL is the function which will
2399 contain the dispatch logic. FNDECLS are the function choices for
2400 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
2401 in DISPATCH_DECL in which the dispatch code is generated. */
2402
2403 static int
2404 dispatch_function_versions (tree dispatch_decl,
2405 void *fndecls_p,
2406 basic_block *empty_bb)
2407 {
2408 tree default_decl;
2409 gimple *ifunc_cpu_init_stmt;
2410 gimple_seq gseq;
2411 int ix;
2412 tree ele;
2413 vec<tree> *fndecls;
2414 unsigned int num_versions = 0;
2415 unsigned int actual_versions = 0;
2416 unsigned int i;
2417
2418 struct _function_version_info
2419 {
2420 tree version_decl;
2421 tree predicate_chain;
2422 unsigned int dispatch_priority;
2423 }*function_version_info;
2424
2425 gcc_assert (dispatch_decl != NULL
2426 && fndecls_p != NULL
2427 && empty_bb != NULL);
2428
2429 /*fndecls_p is actually a vector. */
2430 fndecls = static_cast<vec<tree> *> (fndecls_p);
2431
2432 /* At least one more version other than the default. */
2433 num_versions = fndecls->length ();
2434 gcc_assert (num_versions >= 2);
2435
2436 function_version_info = (struct _function_version_info *)
2437 XNEWVEC (struct _function_version_info, (num_versions - 1));
2438
2439 /* The first version in the vector is the default decl. */
2440 default_decl = (*fndecls)[0];
2441
2442 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
2443
2444 gseq = bb_seq (*empty_bb);
2445 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
2446 constructors, so explicity call __builtin_cpu_init here. */
2447 ifunc_cpu_init_stmt
2448 = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL);
2449 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
2450 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
2451 set_bb_seq (*empty_bb, gseq);
2452
2453 pop_cfun ();
2454
2455
2456 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
2457 {
2458 tree version_decl = ele;
2459 tree predicate_chain = NULL_TREE;
2460 unsigned int priority;
2461 /* Get attribute string, parse it and find the right predicate decl.
2462 The predicate function could be a lengthy combination of many
2463 features, like arch-type and various isa-variants. */
2464 priority = get_builtin_code_for_version (version_decl,
2465 &predicate_chain);
2466
2467 if (predicate_chain == NULL_TREE)
2468 continue;
2469
2470 function_version_info [actual_versions].version_decl = version_decl;
2471 function_version_info [actual_versions].predicate_chain
2472 = predicate_chain;
2473 function_version_info [actual_versions].dispatch_priority = priority;
2474 actual_versions++;
2475 }
2476
2477 /* Sort the versions according to descending order of dispatch priority. The
2478 priority is based on the ISA. This is not a perfect solution. There
2479 could still be ambiguity. If more than one function version is suitable
2480 to execute, which one should be dispatched? In future, allow the user
2481 to specify a dispatch priority next to the version. */
2482 qsort (function_version_info, actual_versions,
2483 sizeof (struct _function_version_info), feature_compare);
2484
2485 for (i = 0; i < actual_versions; ++i)
2486 *empty_bb = add_condition_to_bb (dispatch_decl,
2487 function_version_info[i].version_decl,
2488 function_version_info[i].predicate_chain,
2489 *empty_bb);
2490
2491 /* dispatch default version at the end. */
2492 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
2493 NULL, *empty_bb);
2494
2495 free (function_version_info);
2496 return 0;
2497 }
2498
2499 /* This function changes the assembler name for functions that are
2500 versions. If DECL is a function version and has a "target"
2501 attribute, it appends the attribute string to its assembler name. */
2502
2503 static tree
2504 ix86_mangle_function_version_assembler_name (tree decl, tree id)
2505 {
2506 tree version_attr;
2507 const char *orig_name, *version_string;
2508 char *attr_str, *assembler_name;
2509
2510 if (DECL_DECLARED_INLINE_P (decl)
2511 && lookup_attribute ("gnu_inline",
2512 DECL_ATTRIBUTES (decl)))
2513 error_at (DECL_SOURCE_LOCATION (decl),
2514 "function versions cannot be marked as %<gnu_inline%>,"
2515 " bodies have to be generated");
2516
2517 if (DECL_VIRTUAL_P (decl)
2518 || DECL_VINDEX (decl))
2519 sorry ("virtual function multiversioning not supported");
2520
2521 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
2522
2523 /* target attribute string cannot be NULL. */
2524 gcc_assert (version_attr != NULL_TREE);
2525
2526 orig_name = IDENTIFIER_POINTER (id);
2527 version_string
2528 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
2529
2530 if (strcmp (version_string, "default") == 0)
2531 return id;
2532
2533 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
2534 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
2535
2536 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
2537
2538 /* Allow assembler name to be modified if already set. */
2539 if (DECL_ASSEMBLER_NAME_SET_P (decl))
2540 SET_DECL_RTL (decl, NULL);
2541
2542 tree ret = get_identifier (assembler_name);
2543 XDELETEVEC (attr_str);
2544 XDELETEVEC (assembler_name);
2545 return ret;
2546 }
2547
2548 tree
2549 ix86_mangle_decl_assembler_name (tree decl, tree id)
2550 {
2551 /* For function version, add the target suffix to the assembler name. */
2552 if (TREE_CODE (decl) == FUNCTION_DECL
2553 && DECL_FUNCTION_VERSIONED (decl))
2554 id = ix86_mangle_function_version_assembler_name (decl, id);
2555 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
2556 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
2557 #endif
2558
2559 return id;
2560 }
2561
2562 /* Make a dispatcher declaration for the multi-versioned function DECL.
2563 Calls to DECL function will be replaced with calls to the dispatcher
2564 by the front-end. Returns the decl of the dispatcher function. */
2565
2566 tree
2567 ix86_get_function_versions_dispatcher (void *decl)
2568 {
2569 tree fn = (tree) decl;
2570 struct cgraph_node *node = NULL;
2571 struct cgraph_node *default_node = NULL;
2572 struct cgraph_function_version_info *node_v = NULL;
2573 struct cgraph_function_version_info *first_v = NULL;
2574
2575 tree dispatch_decl = NULL;
2576
2577 struct cgraph_function_version_info *default_version_info = NULL;
2578
2579 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
2580
2581 node = cgraph_node::get (fn);
2582 gcc_assert (node != NULL);
2583
2584 node_v = node->function_version ();
2585 gcc_assert (node_v != NULL);
2586
2587 if (node_v->dispatcher_resolver != NULL)
2588 return node_v->dispatcher_resolver;
2589
2590 /* Find the default version and make it the first node. */
2591 first_v = node_v;
2592 /* Go to the beginning of the chain. */
2593 while (first_v->prev != NULL)
2594 first_v = first_v->prev;
2595 default_version_info = first_v;
2596 while (default_version_info != NULL)
2597 {
2598 if (is_function_default_version
2599 (default_version_info->this_node->decl))
2600 break;
2601 default_version_info = default_version_info->next;
2602 }
2603
2604 /* If there is no default node, just return NULL. */
2605 if (default_version_info == NULL)
2606 return NULL;
2607
2608 /* Make default info the first node. */
2609 if (first_v != default_version_info)
2610 {
2611 default_version_info->prev->next = default_version_info->next;
2612 if (default_version_info->next)
2613 default_version_info->next->prev = default_version_info->prev;
2614 first_v->prev = default_version_info;
2615 default_version_info->next = first_v;
2616 default_version_info->prev = NULL;
2617 }
2618
2619 default_node = default_version_info->this_node;
2620
2621 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
2622 if (targetm.has_ifunc_p ())
2623 {
2624 struct cgraph_function_version_info *it_v = NULL;
2625 struct cgraph_node *dispatcher_node = NULL;
2626 struct cgraph_function_version_info *dispatcher_version_info = NULL;
2627
2628 /* Right now, the dispatching is done via ifunc. */
2629 dispatch_decl = make_dispatcher_decl (default_node->decl);
2630
2631 dispatcher_node = cgraph_node::get_create (dispatch_decl);
2632 gcc_assert (dispatcher_node != NULL);
2633 dispatcher_node->dispatcher_function = 1;
2634 dispatcher_version_info
2635 = dispatcher_node->insert_new_function_version ();
2636 dispatcher_version_info->next = default_version_info;
2637 dispatcher_node->definition = 1;
2638
2639 /* Set the dispatcher for all the versions. */
2640 it_v = default_version_info;
2641 while (it_v != NULL)
2642 {
2643 it_v->dispatcher_resolver = dispatch_decl;
2644 it_v = it_v->next;
2645 }
2646 }
2647 else
2648 #endif
2649 {
2650 error_at (DECL_SOURCE_LOCATION (default_node->decl),
2651 "multiversioning needs %<ifunc%> which is not supported "
2652 "on this target");
2653 }
2654
2655 return dispatch_decl;
2656 }
2657
2658 /* Make the resolver function decl to dispatch the versions of
2659 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
2660 ifunc alias that will point to the created resolver. Create an
2661 empty basic block in the resolver and store the pointer in
2662 EMPTY_BB. Return the decl of the resolver function. */
2663
2664 static tree
2665 make_resolver_func (const tree default_decl,
2666 const tree ifunc_alias_decl,
2667 basic_block *empty_bb)
2668 {
2669 char *resolver_name;
2670 tree decl, type, decl_name, t;
2671
2672 /* IFUNC's have to be globally visible. So, if the default_decl is
2673 not, then the name of the IFUNC should be made unique. */
2674 if (TREE_PUBLIC (default_decl) == 0)
2675 {
2676 char *ifunc_name = make_unique_name (default_decl, "ifunc", true);
2677 symtab->change_decl_assembler_name (ifunc_alias_decl,
2678 get_identifier (ifunc_name));
2679 XDELETEVEC (ifunc_name);
2680 }
2681
2682 resolver_name = make_unique_name (default_decl, "resolver", false);
2683
2684 /* The resolver function should return a (void *). */
2685 type = build_function_type_list (ptr_type_node, NULL_TREE);
2686
2687 decl = build_fn_decl (resolver_name, type);
2688 decl_name = get_identifier (resolver_name);
2689 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
2690
2691 DECL_NAME (decl) = decl_name;
2692 TREE_USED (decl) = 1;
2693 DECL_ARTIFICIAL (decl) = 1;
2694 DECL_IGNORED_P (decl) = 1;
2695 TREE_PUBLIC (decl) = 0;
2696 DECL_UNINLINABLE (decl) = 1;
2697
2698 /* Resolver is not external, body is generated. */
2699 DECL_EXTERNAL (decl) = 0;
2700 DECL_EXTERNAL (ifunc_alias_decl) = 0;
2701
2702 DECL_CONTEXT (decl) = NULL_TREE;
2703 DECL_INITIAL (decl) = make_node (BLOCK);
2704 DECL_STATIC_CONSTRUCTOR (decl) = 0;
2705
2706 if (DECL_COMDAT_GROUP (default_decl)
2707 || TREE_PUBLIC (default_decl))
2708 {
2709 /* In this case, each translation unit with a call to this
2710 versioned function will put out a resolver. Ensure it
2711 is comdat to keep just one copy. */
2712 DECL_COMDAT (decl) = 1;
2713 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
2714 }
2715 /* Build result decl and add to function_decl. */
2716 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
2717 DECL_CONTEXT (t) = decl;
2718 DECL_ARTIFICIAL (t) = 1;
2719 DECL_IGNORED_P (t) = 1;
2720 DECL_RESULT (decl) = t;
2721
2722 gimplify_function_tree (decl);
2723 push_cfun (DECL_STRUCT_FUNCTION (decl));
2724 *empty_bb = init_lowered_empty_function (decl, false,
2725 profile_count::uninitialized ());
2726
2727 cgraph_node::add_new_function (decl, true);
2728 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
2729
2730 pop_cfun ();
2731
2732 gcc_assert (ifunc_alias_decl != NULL);
2733 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
2734 DECL_ATTRIBUTES (ifunc_alias_decl)
2735 = make_attribute ("ifunc", resolver_name,
2736 DECL_ATTRIBUTES (ifunc_alias_decl));
2737
2738 /* Create the alias for dispatch to resolver here. */
2739 cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
2740 XDELETEVEC (resolver_name);
2741 return decl;
2742 }
2743
2744 /* Generate the dispatching code body to dispatch multi-versioned function
2745 DECL. The target hook is called to process the "target" attributes and
2746 provide the code to dispatch the right function at run-time. NODE points
2747 to the dispatcher decl whose body will be created. */
2748
2749 tree
2750 ix86_generate_version_dispatcher_body (void *node_p)
2751 {
2752 tree resolver_decl;
2753 basic_block empty_bb;
2754 tree default_ver_decl;
2755 struct cgraph_node *versn;
2756 struct cgraph_node *node;
2757
2758 struct cgraph_function_version_info *node_version_info = NULL;
2759 struct cgraph_function_version_info *versn_info = NULL;
2760
2761 node = (cgraph_node *)node_p;
2762
2763 node_version_info = node->function_version ();
2764 gcc_assert (node->dispatcher_function
2765 && node_version_info != NULL);
2766
2767 if (node_version_info->dispatcher_resolver)
2768 return node_version_info->dispatcher_resolver;
2769
2770 /* The first version in the chain corresponds to the default version. */
2771 default_ver_decl = node_version_info->next->this_node->decl;
2772
2773 /* node is going to be an alias, so remove the finalized bit. */
2774 node->definition = false;
2775
2776 resolver_decl = make_resolver_func (default_ver_decl,
2777 node->decl, &empty_bb);
2778
2779 node_version_info->dispatcher_resolver = resolver_decl;
2780
2781 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
2782
2783 auto_vec<tree, 2> fn_ver_vec;
2784
2785 for (versn_info = node_version_info->next; versn_info;
2786 versn_info = versn_info->next)
2787 {
2788 versn = versn_info->this_node;
2789 /* Check for virtual functions here again, as by this time it should
2790 have been determined if this function needs a vtable index or
2791 not. This happens for methods in derived classes that override
2792 virtual methods in base classes but are not explicitly marked as
2793 virtual. */
2794 if (DECL_VINDEX (versn->decl))
2795 sorry ("virtual function multiversioning not supported");
2796
2797 fn_ver_vec.safe_push (versn->decl);
2798 }
2799
2800 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
2801 cgraph_edge::rebuild_edges ();
2802 pop_cfun ();
2803 return resolver_decl;
2804 }
2805
2806
This page took 0.158855 seconds and 6 git commands to generate.