]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386-features.c
i386-features.c (general_scalar_chain::convert_insn): Revert 2019-08-14 change.
[gcc.git] / gcc / config / i386 / i386-features.c
CommitLineData
2bf6d935
ML
1/* Copyright (C) 1988-2019 Free Software Foundation, Inc.
2
3This file is part of GCC.
4
5GCC is free software; you can redistribute it and/or modify
6it under the terms of the GNU General Public License as published by
7the Free Software Foundation; either version 3, or (at your option)
8any later version.
9
10GCC is distributed in the hope that it will be useful,
11but WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with GCC; see the file COPYING3. If not see
17<http://www.gnu.org/licenses/>. */
18
19#define IN_TARGET_CODE 1
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "backend.h"
25#include "rtl.h"
26#include "tree.h"
27#include "memmodel.h"
28#include "gimple.h"
29#include "cfghooks.h"
30#include "cfgloop.h"
31#include "df.h"
32#include "tm_p.h"
33#include "stringpool.h"
34#include "expmed.h"
35#include "optabs.h"
36#include "regs.h"
37#include "emit-rtl.h"
38#include "recog.h"
39#include "cgraph.h"
40#include "diagnostic.h"
41#include "cfgbuild.h"
42#include "alias.h"
43#include "fold-const.h"
44#include "attribs.h"
45#include "calls.h"
46#include "stor-layout.h"
47#include "varasm.h"
48#include "output.h"
49#include "insn-attr.h"
50#include "flags.h"
51#include "except.h"
52#include "explow.h"
53#include "expr.h"
54#include "cfgrtl.h"
55#include "common/common-target.h"
56#include "langhooks.h"
57#include "reload.h"
58#include "gimplify.h"
59#include "dwarf2.h"
60#include "tm-constrs.h"
61#include "params.h"
62#include "cselib.h"
63#include "sched-int.h"
64#include "opts.h"
65#include "tree-pass.h"
66#include "context.h"
67#include "pass_manager.h"
68#include "target-globals.h"
69#include "gimple-iterator.h"
70#include "tree-vectorizer.h"
71#include "shrink-wrap.h"
72#include "builtins.h"
73#include "rtl-iter.h"
74#include "tree-iterator.h"
75#include "dbgcnt.h"
76#include "case-cfn-macros.h"
77#include "dojump.h"
78#include "fold-const-call.h"
79#include "tree-vrp.h"
80#include "tree-ssanames.h"
81#include "selftest.h"
82#include "selftest-rtl.h"
83#include "print-rtl.h"
84#include "intl.h"
85#include "ifcvt.h"
86#include "symbol-summary.h"
87#include "ipa-prop.h"
88#include "ipa-fnsummary.h"
89#include "wide-int-bitmask.h"
90#include "tree-vector-builder.h"
91#include "debug.h"
92#include "dwarf2out.h"
93#include "i386-builtins.h"
94#include "i386-features.h"
95
96const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
97 "savms64",
98 "resms64",
99 "resms64x",
100 "savms64f",
101 "resms64f",
102 "resms64fx"
103};
104
105const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
106/* The below offset values are where each register is stored for the layout
107 relative to incoming stack pointer. The value of each m_regs[].offset will
108 be relative to the incoming base pointer (rax or rsi) used by the stub.
109
110 s_instances: 0 1 2 3
111 Offset: realigned or aligned + 8
112 Register aligned aligned + 8 aligned w/HFP w/HFP */
113 XMM15_REG, /* 0x10 0x18 0x10 0x18 */
114 XMM14_REG, /* 0x20 0x28 0x20 0x28 */
115 XMM13_REG, /* 0x30 0x38 0x30 0x38 */
116 XMM12_REG, /* 0x40 0x48 0x40 0x48 */
117 XMM11_REG, /* 0x50 0x58 0x50 0x58 */
118 XMM10_REG, /* 0x60 0x68 0x60 0x68 */
119 XMM9_REG, /* 0x70 0x78 0x70 0x78 */
120 XMM8_REG, /* 0x80 0x88 0x80 0x88 */
121 XMM7_REG, /* 0x90 0x98 0x90 0x98 */
122 XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */
123 SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */
124 DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */
125 BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */
126 BP_REG, /* 0xc0 0xc8 N/A N/A */
127 R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */
128 R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */
129 R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */
130 R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */
131};
132
133/* Instantiate static const values. */
134const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
135const unsigned xlogue_layout::MIN_REGS;
136const unsigned xlogue_layout::MAX_REGS;
137const unsigned xlogue_layout::MAX_EXTRA_REGS;
138const unsigned xlogue_layout::VARIANT_COUNT;
139const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
140
141/* Initialize xlogue_layout::s_stub_names to zero. */
142char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
143 [STUB_NAME_MAX_LEN];
144
145/* Instantiates all xlogue_layout instances. */
146const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
147 xlogue_layout (0, false),
148 xlogue_layout (8, false),
149 xlogue_layout (0, true),
150 xlogue_layout (8, true)
151};
152
153/* Return an appropriate const instance of xlogue_layout based upon values
154 in cfun->machine and crtl. */
99b1c316 155const class xlogue_layout &
2bf6d935
ML
156xlogue_layout::get_instance ()
157{
158 enum xlogue_stub_sets stub_set;
159 bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
160
161 if (stack_realign_fp)
162 stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
163 else if (frame_pointer_needed)
164 stub_set = aligned_plus_8
165 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
166 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
167 else
168 stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
169
170 return s_instances[stub_set];
171}
172
173/* Determine how many clobbered registers can be saved by the stub.
174 Returns the count of registers the stub will save and restore. */
175unsigned
176xlogue_layout::count_stub_managed_regs ()
177{
178 bool hfp = frame_pointer_needed || stack_realign_fp;
179 unsigned i, count;
180 unsigned regno;
181
182 for (count = i = MIN_REGS; i < MAX_REGS; ++i)
183 {
184 regno = REG_ORDER[i];
185 if (regno == BP_REG && hfp)
186 continue;
187 if (!ix86_save_reg (regno, false, false))
188 break;
189 ++count;
190 }
191 return count;
192}
193
194/* Determine if register REGNO is a stub managed register given the
195 total COUNT of stub managed registers. */
196bool
197xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
198{
199 bool hfp = frame_pointer_needed || stack_realign_fp;
200 unsigned i;
201
202 for (i = 0; i < count; ++i)
203 {
204 gcc_assert (i < MAX_REGS);
205 if (REG_ORDER[i] == BP_REG && hfp)
206 ++count;
207 else if (REG_ORDER[i] == regno)
208 return true;
209 }
210 return false;
211}
212
213/* Constructor for xlogue_layout. */
214xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
215 : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
216 m_stack_align_off_in (stack_align_off_in)
217{
218 HOST_WIDE_INT offset = stack_align_off_in;
219 unsigned i, j;
220
221 for (i = j = 0; i < MAX_REGS; ++i)
222 {
223 unsigned regno = REG_ORDER[i];
224
225 if (regno == BP_REG && hfp)
226 continue;
227 if (SSE_REGNO_P (regno))
228 {
229 offset += 16;
230 /* Verify that SSE regs are always aligned. */
231 gcc_assert (!((stack_align_off_in + offset) & 15));
232 }
233 else
234 offset += 8;
235
236 m_regs[j].regno = regno;
237 m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
238 }
239 gcc_assert (j == m_nregs);
240}
241
242const char *
243xlogue_layout::get_stub_name (enum xlogue_stub stub,
244 unsigned n_extra_regs)
245{
246 const int have_avx = TARGET_AVX;
247 char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
248
249 /* Lazy init */
250 if (!*name)
251 {
252 int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
253 (have_avx ? "avx" : "sse"),
254 STUB_BASE_NAMES[stub],
255 MIN_REGS + n_extra_regs);
256 gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
257 }
258
259 return name;
260}
261
262/* Return rtx of a symbol ref for the entry point (based upon
263 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
264rtx
265xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
266{
267 const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
268 gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
269 gcc_assert (stub < XLOGUE_STUB_COUNT);
270 gcc_assert (crtl->stack_realign_finalized);
271
272 return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
273}
274
275unsigned scalar_chain::max_id = 0;
276
277/* Initialize new chain. */
278
93cf5515 279scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
2bf6d935 280{
93cf5515
RB
281 smode = smode_;
282 vmode = vmode_;
283
2bf6d935
ML
284 chain_id = ++max_id;
285
286 if (dump_file)
287 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
288
289 bitmap_obstack_initialize (NULL);
290 insns = BITMAP_ALLOC (NULL);
291 defs = BITMAP_ALLOC (NULL);
292 defs_conv = BITMAP_ALLOC (NULL);
293 queue = NULL;
294}
295
296/* Free chain's data. */
297
298scalar_chain::~scalar_chain ()
299{
300 BITMAP_FREE (insns);
301 BITMAP_FREE (defs);
302 BITMAP_FREE (defs_conv);
303 bitmap_obstack_release (NULL);
304}
305
306/* Add instruction into chains' queue. */
307
308void
309scalar_chain::add_to_queue (unsigned insn_uid)
310{
311 if (bitmap_bit_p (insns, insn_uid)
312 || bitmap_bit_p (queue, insn_uid))
313 return;
314
315 if (dump_file)
316 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
317 insn_uid, chain_id);
318 bitmap_set_bit (queue, insn_uid);
319}
320
321/* For DImode conversion, mark register defined by DEF as requiring
322 conversion. */
323
324void
93cf5515 325general_scalar_chain::mark_dual_mode_def (df_ref def)
2bf6d935
ML
326{
327 gcc_assert (DF_REF_REG_DEF_P (def));
328
329 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
330 return;
331
332 if (dump_file)
333 fprintf (dump_file,
334 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
335 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
336
337 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
338}
339
340/* For TImode conversion, it is unused. */
341
342void
343timode_scalar_chain::mark_dual_mode_def (df_ref)
344{
345 gcc_unreachable ();
346}
347
348/* Check REF's chain to add new insns into a queue
349 and find registers requiring conversion. */
350
351void
352scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
353{
354 df_link *chain;
355
356 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
357 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
358 add_to_queue (DF_REF_INSN_UID (ref));
359
360 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
361 {
362 unsigned uid = DF_REF_INSN_UID (chain->ref);
363
364 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
365 continue;
366
367 if (!DF_REF_REG_MEM_P (chain->ref))
368 {
369 if (bitmap_bit_p (insns, uid))
370 continue;
371
372 if (bitmap_bit_p (candidates, uid))
373 {
374 add_to_queue (uid);
375 continue;
376 }
377 }
378
379 if (DF_REF_REG_DEF_P (chain->ref))
380 {
381 if (dump_file)
382 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
383 DF_REF_REGNO (chain->ref), uid);
384 mark_dual_mode_def (chain->ref);
385 }
386 else
387 {
388 if (dump_file)
389 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
390 DF_REF_REGNO (chain->ref), uid);
391 mark_dual_mode_def (ref);
392 }
393 }
394}
395
396/* Add instruction into a chain. */
397
398void
399scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
400{
401 if (bitmap_bit_p (insns, insn_uid))
402 return;
403
404 if (dump_file)
405 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
406
407 bitmap_set_bit (insns, insn_uid);
408
409 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
410 rtx def_set = single_set (insn);
411 if (def_set && REG_P (SET_DEST (def_set))
412 && !HARD_REGISTER_P (SET_DEST (def_set)))
413 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
414
93cf5515
RB
415 /* ??? The following is quadratic since analyze_register_chain
416 iterates over all refs to look for dual-mode regs. Instead this
417 should be done separately for all regs mentioned in the chain once. */
2bf6d935
ML
418 df_ref ref;
419 df_ref def;
420 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
421 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
422 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
423 def;
424 def = DF_REF_NEXT_REG (def))
425 analyze_register_chain (candidates, def);
426 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
427 if (!DF_REF_REG_MEM_P (ref))
428 analyze_register_chain (candidates, ref);
429}
430
431/* Build new chain starting from insn INSN_UID recursively
432 adding all dependent uses and definitions. */
433
434void
435scalar_chain::build (bitmap candidates, unsigned insn_uid)
436{
437 queue = BITMAP_ALLOC (NULL);
438 bitmap_set_bit (queue, insn_uid);
439
440 if (dump_file)
441 fprintf (dump_file, "Building chain #%d...\n", chain_id);
442
443 while (!bitmap_empty_p (queue))
444 {
445 insn_uid = bitmap_first_set_bit (queue);
446 bitmap_clear_bit (queue, insn_uid);
447 bitmap_clear_bit (candidates, insn_uid);
448 add_insn (candidates, insn_uid);
449 }
450
451 if (dump_file)
452 {
453 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
454 fprintf (dump_file, " insns: ");
455 dump_bitmap (dump_file, insns);
456 if (!bitmap_empty_p (defs_conv))
457 {
458 bitmap_iterator bi;
459 unsigned id;
460 const char *comma = "";
461 fprintf (dump_file, " defs to convert: ");
462 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
463 {
464 fprintf (dump_file, "%sr%d", comma, id);
465 comma = ", ";
466 }
467 fprintf (dump_file, "\n");
468 }
469 }
470
471 BITMAP_FREE (queue);
472}
473
474/* Return a cost of building a vector costant
475 instead of using a scalar one. */
476
477int
93cf5515 478general_scalar_chain::vector_const_cost (rtx exp)
2bf6d935
ML
479{
480 gcc_assert (CONST_INT_P (exp));
481
93cf5515
RB
482 if (standard_sse_constant_p (exp, vmode))
483 return ix86_cost->sse_op;
484 /* We have separate costs for SImode and DImode, use SImode costs
485 for smaller modes. */
486 return ix86_cost->sse_load[smode == DImode ? 1 : 0];
2bf6d935
ML
487}
488
489/* Compute a gain for chain conversion. */
490
491int
93cf5515 492general_scalar_chain::compute_convert_gain ()
2bf6d935
ML
493{
494 bitmap_iterator bi;
495 unsigned insn_uid;
496 int gain = 0;
497 int cost = 0;
498
499 if (dump_file)
500 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
501
93cf5515
RB
502 /* SSE costs distinguish between SImode and DImode loads/stores, for
503 int costs factor in the number of GPRs involved. When supporting
504 smaller modes than SImode the int load/store costs need to be
505 adjusted as well. */
506 unsigned sse_cost_idx = smode == DImode ? 1 : 0;
507 unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
508
2bf6d935
ML
509 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
510 {
511 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
512 rtx def_set = single_set (insn);
513 rtx src = SET_SRC (def_set);
514 rtx dst = SET_DEST (def_set);
c6521daa 515 int igain = 0;
2bf6d935
ML
516
517 if (REG_P (src) && REG_P (dst))
93cf5515 518 igain += 2 * m - ix86_cost->xmm_move;
2bf6d935 519 else if (REG_P (src) && MEM_P (dst))
93cf5515
RB
520 igain
521 += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx];
2bf6d935 522 else if (MEM_P (src) && REG_P (dst))
93cf5515 523 igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx];
2bf6d935
ML
524 else if (GET_CODE (src) == ASHIFT
525 || GET_CODE (src) == ASHIFTRT
526 || GET_CODE (src) == LSHIFTRT)
527 {
528 if (CONST_INT_P (XEXP (src, 0)))
c6521daa 529 igain -= vector_const_cost (XEXP (src, 0));
93cf5515 530 igain += m * ix86_cost->shift_const - ix86_cost->sse_op;
2bf6d935 531 if (INTVAL (XEXP (src, 1)) >= 32)
c6521daa 532 igain -= COSTS_N_INSNS (1);
2bf6d935
ML
533 }
534 else if (GET_CODE (src) == PLUS
535 || GET_CODE (src) == MINUS
536 || GET_CODE (src) == IOR
537 || GET_CODE (src) == XOR
538 || GET_CODE (src) == AND)
539 {
93cf5515 540 igain += m * ix86_cost->add - ix86_cost->sse_op;
2bf6d935
ML
541 /* Additional gain for andnot for targets without BMI. */
542 if (GET_CODE (XEXP (src, 0)) == NOT
543 && !TARGET_BMI)
93cf5515 544 igain += m * ix86_cost->add;
2bf6d935
ML
545
546 if (CONST_INT_P (XEXP (src, 0)))
c6521daa 547 igain -= vector_const_cost (XEXP (src, 0));
2bf6d935 548 if (CONST_INT_P (XEXP (src, 1)))
c6521daa 549 igain -= vector_const_cost (XEXP (src, 1));
2bf6d935
ML
550 }
551 else if (GET_CODE (src) == NEG
552 || GET_CODE (src) == NOT)
93cf5515
RB
553 igain += m * ix86_cost->add - ix86_cost->sse_op - COSTS_N_INSNS (1);
554 else if (GET_CODE (src) == SMAX
555 || GET_CODE (src) == SMIN
556 || GET_CODE (src) == UMAX
557 || GET_CODE (src) == UMIN)
558 {
559 /* We do not have any conditional move cost, estimate it as a
560 reg-reg move. Comparisons are costed as adds. */
561 igain += m * (COSTS_N_INSNS (2) + ix86_cost->add);
562 /* Integer SSE ops are all costed the same. */
563 igain -= ix86_cost->sse_op;
564 }
2bf6d935
ML
565 else if (GET_CODE (src) == COMPARE)
566 {
567 /* Assume comparison cost is the same. */
568 }
569 else if (CONST_INT_P (src))
570 {
571 if (REG_P (dst))
93cf5515
RB
572 /* DImode can be immediate for TARGET_64BIT and SImode always. */
573 igain += m * COSTS_N_INSNS (1);
2bf6d935 574 else if (MEM_P (dst))
93cf5515
RB
575 igain += (m * ix86_cost->int_store[2]
576 - ix86_cost->sse_store[sse_cost_idx]);
c6521daa 577 igain -= vector_const_cost (src);
2bf6d935
ML
578 }
579 else
580 gcc_unreachable ();
c6521daa
RB
581
582 if (igain != 0 && dump_file)
583 {
584 fprintf (dump_file, " Instruction gain %d for ", igain);
585 dump_insn_slim (dump_file, insn);
586 }
587 gain += igain;
2bf6d935
ML
588 }
589
590 if (dump_file)
591 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
592
93cf5515 593 /* ??? What about integer to SSE? */
2bf6d935 594 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
66574c53 595 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->sse_to_integer;
2bf6d935
ML
596
597 if (dump_file)
598 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
599
600 gain -= cost;
601
602 if (dump_file)
603 fprintf (dump_file, " Total gain: %d\n", gain);
604
605 return gain;
606}
607
608/* Replace REG in X with a V2DI subreg of NEW_REG. */
609
610rtx
93cf5515 611general_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
2bf6d935
ML
612{
613 if (x == reg)
93cf5515 614 return gen_rtx_SUBREG (vmode, new_reg, 0);
2bf6d935
ML
615
616 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
617 int i, j;
618 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
619 {
620 if (fmt[i] == 'e')
621 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
622 else if (fmt[i] == 'E')
623 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
624 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
625 reg, new_reg);
626 }
627
628 return x;
629}
630
631/* Replace REG in INSN with a V2DI subreg of NEW_REG. */
632
633void
93cf5515 634general_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn,
2bf6d935
ML
635 rtx reg, rtx new_reg)
636{
637 replace_with_subreg (single_set (insn), reg, new_reg);
638}
639
640/* Insert generated conversion instruction sequence INSNS
641 after instruction AFTER. New BB may be required in case
642 instruction has EH region attached. */
643
644void
645scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
646{
647 if (!control_flow_insn_p (after))
648 {
649 emit_insn_after (insns, after);
650 return;
651 }
652
653 basic_block bb = BLOCK_FOR_INSN (after);
654 edge e = find_fallthru_edge (bb->succs);
655 gcc_assert (e);
656
657 basic_block new_bb = split_edge (e);
658 emit_insn_after (insns, BB_HEAD (new_bb));
659}
660
661/* Make vector copies for all register REGNO definitions
662 and replace its uses in a chain. */
663
664void
93cf5515 665general_scalar_chain::make_vector_copies (unsigned regno)
2bf6d935
ML
666{
667 rtx reg = regno_reg_rtx[regno];
93cf5515 668 rtx vreg = gen_reg_rtx (smode);
2bf6d935
ML
669 df_ref ref;
670
671 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
672 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
673 {
674 start_sequence ();
675 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
676 {
93cf5515
RB
677 rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
678 if (smode == DImode && !TARGET_64BIT)
679 {
680 emit_move_insn (adjust_address (tmp, SImode, 0),
681 gen_rtx_SUBREG (SImode, reg, 0));
682 emit_move_insn (adjust_address (tmp, SImode, 4),
683 gen_rtx_SUBREG (SImode, reg, 4));
684 }
685 else
686 emit_move_insn (tmp, reg);
687 emit_insn (gen_rtx_SET
688 (gen_rtx_SUBREG (vmode, vreg, 0),
689 gen_rtx_VEC_MERGE (vmode,
690 gen_rtx_VEC_DUPLICATE (vmode,
691 tmp),
692 CONST0_RTX (vmode),
693 GEN_INT (HOST_WIDE_INT_1U))));
2bf6d935 694 }
93cf5515 695 else if (!TARGET_64BIT && smode == DImode)
2bf6d935 696 {
93cf5515
RB
697 if (TARGET_SSE4_1)
698 {
699 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
700 CONST0_RTX (V4SImode),
701 gen_rtx_SUBREG (SImode, reg, 0)));
702 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
703 gen_rtx_SUBREG (V4SImode, vreg, 0),
704 gen_rtx_SUBREG (SImode, reg, 4),
705 GEN_INT (2)));
706 }
707 else
708 {
709 rtx tmp = gen_reg_rtx (DImode);
710 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
711 CONST0_RTX (V4SImode),
712 gen_rtx_SUBREG (SImode, reg, 0)));
713 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
714 CONST0_RTX (V4SImode),
715 gen_rtx_SUBREG (SImode, reg, 4)));
716 emit_insn (gen_vec_interleave_lowv4si
717 (gen_rtx_SUBREG (V4SImode, vreg, 0),
718 gen_rtx_SUBREG (V4SImode, vreg, 0),
719 gen_rtx_SUBREG (V4SImode, tmp, 0)));
720 }
2bf6d935
ML
721 }
722 else
93cf5515
RB
723 emit_insn (gen_rtx_SET
724 (gen_rtx_SUBREG (vmode, vreg, 0),
725 gen_rtx_VEC_MERGE (vmode,
726 gen_rtx_VEC_DUPLICATE (vmode,
727 reg),
728 CONST0_RTX (vmode),
729 GEN_INT (HOST_WIDE_INT_1U))));
2bf6d935
ML
730 rtx_insn *seq = get_insns ();
731 end_sequence ();
732 rtx_insn *insn = DF_REF_INSN (ref);
733 emit_conversion_insns (seq, insn);
734
735 if (dump_file)
736 fprintf (dump_file,
737 " Copied r%d to a vector register r%d for insn %d\n",
738 regno, REGNO (vreg), INSN_UID (insn));
739 }
740
741 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
742 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
743 {
744 rtx_insn *insn = DF_REF_INSN (ref);
745 replace_with_subreg_in_insn (insn, reg, vreg);
746
747 if (dump_file)
748 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
749 regno, REGNO (vreg), INSN_UID (insn));
750 }
751}
752
753/* Convert all definitions of register REGNO
754 and fix its uses. Scalar copies may be created
755 in case register is used in not convertible insn. */
756
757void
93cf5515 758general_scalar_chain::convert_reg (unsigned regno)
2bf6d935
ML
759{
760 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
761 rtx reg = regno_reg_rtx[regno];
762 rtx scopy = NULL_RTX;
763 df_ref ref;
764 bitmap conv;
765
766 conv = BITMAP_ALLOC (NULL);
767 bitmap_copy (conv, insns);
768
769 if (scalar_copy)
93cf5515 770 scopy = gen_reg_rtx (smode);
2bf6d935
ML
771
772 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
773 {
774 rtx_insn *insn = DF_REF_INSN (ref);
775 rtx def_set = single_set (insn);
776 rtx src = SET_SRC (def_set);
777 rtx reg = DF_REF_REG (ref);
778
779 if (!MEM_P (src))
780 {
781 replace_with_subreg_in_insn (insn, reg, reg);
782 bitmap_clear_bit (conv, INSN_UID (insn));
783 }
784
785 if (scalar_copy)
786 {
787 start_sequence ();
788 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
789 {
93cf5515 790 rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
2bf6d935 791 emit_move_insn (tmp, reg);
93cf5515
RB
792 if (!TARGET_64BIT && smode == DImode)
793 {
794 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
795 adjust_address (tmp, SImode, 0));
796 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
797 adjust_address (tmp, SImode, 4));
798 }
799 else
800 emit_move_insn (scopy, tmp);
2bf6d935 801 }
93cf5515 802 else if (!TARGET_64BIT && smode == DImode)
2bf6d935 803 {
93cf5515
RB
804 if (TARGET_SSE4_1)
805 {
806 rtx tmp = gen_rtx_PARALLEL (VOIDmode,
807 gen_rtvec (1, const0_rtx));
808 emit_insn
809 (gen_rtx_SET
810 (gen_rtx_SUBREG (SImode, scopy, 0),
811 gen_rtx_VEC_SELECT (SImode,
812 gen_rtx_SUBREG (V4SImode, reg, 0),
813 tmp)));
814
815 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
816 emit_insn
817 (gen_rtx_SET
818 (gen_rtx_SUBREG (SImode, scopy, 4),
819 gen_rtx_VEC_SELECT (SImode,
820 gen_rtx_SUBREG (V4SImode, reg, 0),
821 tmp)));
822 }
823 else
824 {
825 rtx vcopy = gen_reg_rtx (V2DImode);
826 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
827 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
828 gen_rtx_SUBREG (SImode, vcopy, 0));
829 emit_move_insn (vcopy,
830 gen_rtx_LSHIFTRT (V2DImode,
831 vcopy, GEN_INT (32)));
832 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
833 gen_rtx_SUBREG (SImode, vcopy, 0));
834 }
2bf6d935
ML
835 }
836 else
93cf5515
RB
837 emit_move_insn (scopy, reg);
838
2bf6d935
ML
839 rtx_insn *seq = get_insns ();
840 end_sequence ();
841 emit_conversion_insns (seq, insn);
842
843 if (dump_file)
844 fprintf (dump_file,
845 " Copied r%d to a scalar register r%d for insn %d\n",
846 regno, REGNO (scopy), INSN_UID (insn));
847 }
848 }
849
850 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
851 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
852 {
853 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
854 {
855 rtx_insn *insn = DF_REF_INSN (ref);
856
857 rtx def_set = single_set (insn);
858 gcc_assert (def_set);
859
860 rtx src = SET_SRC (def_set);
861 rtx dst = SET_DEST (def_set);
862
863 if (!MEM_P (dst) || !REG_P (src))
864 replace_with_subreg_in_insn (insn, reg, reg);
865
866 bitmap_clear_bit (conv, INSN_UID (insn));
867 }
868 }
869 /* Skip debug insns and uninitialized uses. */
870 else if (DF_REF_CHAIN (ref)
871 && NONDEBUG_INSN_P (DF_REF_INSN (ref)))
872 {
873 gcc_assert (scopy);
874 replace_rtx (DF_REF_INSN (ref), reg, scopy);
875 df_insn_rescan (DF_REF_INSN (ref));
876 }
877
878 BITMAP_FREE (conv);
879}
880
881/* Convert operand OP in INSN. We should handle
882 memory operands and uninitialized registers.
883 All other register uses are converted during
884 registers conversion. */
885
886void
93cf5515 887general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
2bf6d935
ML
888{
889 *op = copy_rtx_if_shared (*op);
890
891 if (GET_CODE (*op) == NOT)
892 {
893 convert_op (&XEXP (*op, 0), insn);
93cf5515 894 PUT_MODE (*op, vmode);
2bf6d935
ML
895 }
896 else if (MEM_P (*op))
897 {
93cf5515 898 rtx tmp = gen_reg_rtx (GET_MODE (*op));
2bf6d935
ML
899
900 emit_insn_before (gen_move_insn (tmp, *op), insn);
93cf5515 901 *op = gen_rtx_SUBREG (vmode, tmp, 0);
2bf6d935
ML
902
903 if (dump_file)
904 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
905 INSN_UID (insn), REGNO (tmp));
906 }
907 else if (REG_P (*op))
908 {
909 /* We may have not converted register usage in case
910 this register has no definition. Otherwise it
911 should be converted in convert_reg. */
912 df_ref ref;
913 FOR_EACH_INSN_USE (ref, insn)
914 if (DF_REF_REGNO (ref) == REGNO (*op))
915 {
916 gcc_assert (!DF_REF_CHAIN (ref));
917 break;
918 }
93cf5515 919 *op = gen_rtx_SUBREG (vmode, *op, 0);
2bf6d935
ML
920 }
921 else if (CONST_INT_P (*op))
922 {
923 rtx vec_cst;
93cf5515 924 rtx tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0);
2bf6d935
ML
925
926 /* Prefer all ones vector in case of -1. */
927 if (constm1_operand (*op, GET_MODE (*op)))
93cf5515 928 vec_cst = CONSTM1_RTX (vmode);
2bf6d935 929 else
93cf5515
RB
930 {
931 unsigned n = GET_MODE_NUNITS (vmode);
932 rtx *v = XALLOCAVEC (rtx, n);
933 v[0] = *op;
934 for (unsigned i = 1; i < n; ++i)
935 v[i] = const0_rtx;
936 vec_cst = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
937 }
2bf6d935 938
93cf5515 939 if (!standard_sse_constant_p (vec_cst, vmode))
2bf6d935
ML
940 {
941 start_sequence ();
93cf5515 942 vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
2bf6d935
ML
943 rtx_insn *seq = get_insns ();
944 end_sequence ();
945 emit_insn_before (seq, insn);
946 }
947
948 emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
949 *op = tmp;
950 }
951 else
952 {
953 gcc_assert (SUBREG_P (*op));
93cf5515 954 gcc_assert (GET_MODE (*op) == vmode);
2bf6d935
ML
955 }
956}
957
958/* Convert INSN to vector mode. */
959
960void
93cf5515 961general_scalar_chain::convert_insn (rtx_insn *insn)
2bf6d935
ML
962{
963 rtx def_set = single_set (insn);
964 rtx src = SET_SRC (def_set);
965 rtx dst = SET_DEST (def_set);
966 rtx subreg;
967
968 if (MEM_P (dst) && !REG_P (src))
969 {
970 /* There are no scalar integer instructions and therefore
971 temporary register usage is required. */
93cf5515 972 rtx tmp = gen_reg_rtx (smode);
2bf6d935 973 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
93cf5515 974 dst = gen_rtx_SUBREG (vmode, tmp, 0);
2bf6d935
ML
975 }
976
977 switch (GET_CODE (src))
978 {
979 case ASHIFT:
980 case ASHIFTRT:
981 case LSHIFTRT:
982 convert_op (&XEXP (src, 0), insn);
93cf5515 983 PUT_MODE (src, vmode);
2bf6d935
ML
984 break;
985
986 case PLUS:
987 case MINUS:
988 case IOR:
989 case XOR:
990 case AND:
93cf5515
RB
991 case SMAX:
992 case SMIN:
993 case UMAX:
994 case UMIN:
2bf6d935
ML
995 convert_op (&XEXP (src, 0), insn);
996 convert_op (&XEXP (src, 1), insn);
93cf5515 997 PUT_MODE (src, vmode);
2bf6d935
ML
998 break;
999
1000 case NEG:
1001 src = XEXP (src, 0);
1002 convert_op (&src, insn);
93cf5515
RB
1003 subreg = gen_reg_rtx (vmode);
1004 emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn);
1005 src = gen_rtx_MINUS (vmode, subreg, src);
2bf6d935
ML
1006 break;
1007
1008 case NOT:
1009 src = XEXP (src, 0);
1010 convert_op (&src, insn);
93cf5515
RB
1011 subreg = gen_reg_rtx (vmode);
1012 emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn);
1013 src = gen_rtx_XOR (vmode, src, subreg);
2bf6d935
ML
1014 break;
1015
1016 case MEM:
1017 if (!REG_P (dst))
1018 convert_op (&src, insn);
1019 break;
1020
1021 case REG:
1022 if (!MEM_P (dst))
1023 convert_op (&src, insn);
1024 break;
1025
1026 case SUBREG:
93cf5515 1027 gcc_assert (GET_MODE (src) == vmode);
2bf6d935
ML
1028 break;
1029
1030 case COMPARE:
1031 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
1032
c839844a
UB
1033 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
1034 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
2bf6d935
ML
1035
1036 if (REG_P (src))
c839844a 1037 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
2bf6d935
ML
1038 else
1039 subreg = copy_rtx_if_shared (src);
1040 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
1041 copy_rtx_if_shared (subreg),
1042 copy_rtx_if_shared (subreg)),
1043 insn);
1044 dst = gen_rtx_REG (CCmode, FLAGS_REG);
1045 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
1046 copy_rtx_if_shared (src)),
1047 UNSPEC_PTEST);
1048 break;
1049
1050 case CONST_INT:
1051 convert_op (&src, insn);
1052 break;
1053
1054 default:
1055 gcc_unreachable ();
1056 }
1057
1058 SET_SRC (def_set) = src;
1059 SET_DEST (def_set) = dst;
1060
1061 /* Drop possible dead definitions. */
1062 PATTERN (insn) = def_set;
1063
1064 INSN_CODE (insn) = -1;
93cf5515
RB
1065 int patt = recog_memoized (insn);
1066 if (patt == -1)
1067 fatal_insn_not_found (insn);
2bf6d935
ML
1068 df_insn_rescan (insn);
1069}
1070
1071/* Fix uses of converted REG in debug insns. */
1072
1073void
1074timode_scalar_chain::fix_debug_reg_uses (rtx reg)
1075{
1076 if (!flag_var_tracking)
1077 return;
1078
1079 df_ref ref, next;
1080 for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
1081 {
1082 rtx_insn *insn = DF_REF_INSN (ref);
1083 /* Make sure the next ref is for a different instruction,
1084 so that we're not affected by the rescan. */
1085 next = DF_REF_NEXT_REG (ref);
1086 while (next && DF_REF_INSN (next) == insn)
1087 next = DF_REF_NEXT_REG (next);
1088
1089 if (DEBUG_INSN_P (insn))
1090 {
1091 /* It may be a debug insn with a TImode variable in
1092 register. */
1093 bool changed = false;
1094 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
1095 {
1096 rtx *loc = DF_REF_LOC (ref);
1097 if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
1098 {
1099 *loc = gen_rtx_SUBREG (TImode, *loc, 0);
1100 changed = true;
1101 }
1102 }
1103 if (changed)
1104 df_insn_rescan (insn);
1105 }
1106 }
1107}
1108
1109/* Convert INSN from TImode to V1T1mode. */
1110
1111void
1112timode_scalar_chain::convert_insn (rtx_insn *insn)
1113{
1114 rtx def_set = single_set (insn);
1115 rtx src = SET_SRC (def_set);
1116 rtx dst = SET_DEST (def_set);
1117
1118 switch (GET_CODE (dst))
1119 {
1120 case REG:
1121 {
1122 rtx tmp = find_reg_equal_equiv_note (insn);
1123 if (tmp)
1124 PUT_MODE (XEXP (tmp, 0), V1TImode);
1125 PUT_MODE (dst, V1TImode);
1126 fix_debug_reg_uses (dst);
1127 }
1128 break;
1129 case MEM:
1130 PUT_MODE (dst, V1TImode);
1131 break;
1132
1133 default:
1134 gcc_unreachable ();
1135 }
1136
1137 switch (GET_CODE (src))
1138 {
1139 case REG:
1140 PUT_MODE (src, V1TImode);
1141 /* Call fix_debug_reg_uses only if SRC is never defined. */
1142 if (!DF_REG_DEF_CHAIN (REGNO (src)))
1143 fix_debug_reg_uses (src);
1144 break;
1145
1146 case MEM:
1147 PUT_MODE (src, V1TImode);
1148 break;
1149
1150 case CONST_WIDE_INT:
1151 if (NONDEBUG_INSN_P (insn))
1152 {
1153 /* Since there are no instructions to store 128-bit constant,
1154 temporary register usage is required. */
1155 rtx tmp = gen_reg_rtx (V1TImode);
1156 start_sequence ();
1157 src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
1158 src = validize_mem (force_const_mem (V1TImode, src));
1159 rtx_insn *seq = get_insns ();
1160 end_sequence ();
1161 if (seq)
1162 emit_insn_before (seq, insn);
1163 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
1164 dst = tmp;
1165 }
1166 break;
1167
1168 case CONST_INT:
1169 switch (standard_sse_constant_p (src, TImode))
1170 {
1171 case 1:
1172 src = CONST0_RTX (GET_MODE (dst));
1173 break;
1174 case 2:
1175 src = CONSTM1_RTX (GET_MODE (dst));
1176 break;
1177 default:
1178 gcc_unreachable ();
1179 }
1180 if (NONDEBUG_INSN_P (insn))
1181 {
1182 rtx tmp = gen_reg_rtx (V1TImode);
1183 /* Since there are no instructions to store standard SSE
1184 constant, temporary register usage is required. */
1185 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
1186 dst = tmp;
1187 }
1188 break;
1189
1190 default:
1191 gcc_unreachable ();
1192 }
1193
1194 SET_SRC (def_set) = src;
1195 SET_DEST (def_set) = dst;
1196
1197 /* Drop possible dead definitions. */
1198 PATTERN (insn) = def_set;
1199
1200 INSN_CODE (insn) = -1;
1201 recog_memoized (insn);
1202 df_insn_rescan (insn);
1203}
1204
1205void
93cf5515 1206general_scalar_chain::convert_registers ()
2bf6d935
ML
1207{
1208 bitmap_iterator bi;
1209 unsigned id;
1210
1211 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
1212 convert_reg (id);
1213
1214 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
1215 make_vector_copies (id);
1216}
1217
1218/* Convert whole chain creating required register
1219 conversions and copies. */
1220
1221int
1222scalar_chain::convert ()
1223{
1224 bitmap_iterator bi;
1225 unsigned id;
1226 int converted_insns = 0;
1227
1228 if (!dbg_cnt (stv_conversion))
1229 return 0;
1230
1231 if (dump_file)
1232 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
1233
1234 convert_registers ();
1235
1236 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
1237 {
1238 convert_insn (DF_INSN_UID_GET (id)->insn);
1239 converted_insns++;
1240 }
1241
1242 return converted_insns;
1243}
1244
1245/* Return 1 if INSN uses or defines a hard register.
1246 Hard register uses in a memory address are ignored.
1247 Clobbers and flags definitions are ignored. */
1248
1249static bool
1250has_non_address_hard_reg (rtx_insn *insn)
1251{
1252 df_ref ref;
1253 FOR_EACH_INSN_DEF (ref, insn)
1254 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
1255 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
1256 && DF_REF_REGNO (ref) != FLAGS_REG)
1257 return true;
1258
1259 FOR_EACH_INSN_USE (ref, insn)
1260 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
1261 return true;
1262
1263 return false;
1264}
1265
1266/* Check if comparison INSN may be transformed
1267 into vector comparison. Currently we transform
1268 zero checks only which look like:
1269
1270 (set (reg:CCZ 17 flags)
1271 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
1272 (subreg:SI (reg:DI x) 0))
1273 (const_int 0 [0]))) */
1274
1275static bool
c839844a 1276convertible_comparison_p (rtx_insn *insn, machine_mode mode)
2bf6d935 1277{
c839844a
UB
1278 /* ??? Currently convertible for double-word DImode chain only. */
1279 if (TARGET_64BIT || mode != DImode)
1280 return false;
1281
2bf6d935
ML
1282 if (!TARGET_SSE4_1)
1283 return false;
1284
1285 rtx def_set = single_set (insn);
1286
1287 gcc_assert (def_set);
1288
1289 rtx src = SET_SRC (def_set);
1290 rtx dst = SET_DEST (def_set);
1291
1292 gcc_assert (GET_CODE (src) == COMPARE);
1293
1294 if (GET_CODE (dst) != REG
1295 || REGNO (dst) != FLAGS_REG
1296 || GET_MODE (dst) != CCZmode)
1297 return false;
1298
1299 rtx op1 = XEXP (src, 0);
1300 rtx op2 = XEXP (src, 1);
1301
1302 if (op2 != CONST0_RTX (GET_MODE (op2)))
1303 return false;
1304
1305 if (GET_CODE (op1) != IOR)
1306 return false;
1307
1308 op2 = XEXP (op1, 1);
1309 op1 = XEXP (op1, 0);
1310
1311 if (!SUBREG_P (op1)
1312 || !SUBREG_P (op2)
c839844a
UB
1313 || GET_MODE (op1) != SImode
1314 || GET_MODE (op2) != SImode
2bf6d935 1315 || ((SUBREG_BYTE (op1) != 0
c839844a 1316 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
2bf6d935 1317 && (SUBREG_BYTE (op2) != 0
c839844a 1318 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
2bf6d935
ML
1319 return false;
1320
1321 op1 = SUBREG_REG (op1);
1322 op2 = SUBREG_REG (op2);
1323
1324 if (op1 != op2
1325 || !REG_P (op1)
c839844a 1326 || GET_MODE (op1) != DImode)
2bf6d935
ML
1327 return false;
1328
1329 return true;
1330}
1331
c839844a 1332/* The general version of scalar_to_vector_candidate_p. */
2bf6d935
ML
1333
1334static bool
93cf5515 1335general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
2bf6d935
ML
1336{
1337 rtx def_set = single_set (insn);
1338
1339 if (!def_set)
1340 return false;
1341
1342 if (has_non_address_hard_reg (insn))
1343 return false;
1344
1345 rtx src = SET_SRC (def_set);
1346 rtx dst = SET_DEST (def_set);
1347
1348 if (GET_CODE (src) == COMPARE)
93cf5515 1349 return convertible_comparison_p (insn, mode);
2bf6d935 1350
c839844a 1351 /* We are interested in "mode" only. */
93cf5515 1352 if ((GET_MODE (src) != mode
2bf6d935 1353 && !CONST_INT_P (src))
93cf5515 1354 || GET_MODE (dst) != mode)
2bf6d935
ML
1355 return false;
1356
1357 if (!REG_P (dst) && !MEM_P (dst))
1358 return false;
1359
1360 switch (GET_CODE (src))
1361 {
1362 case ASHIFTRT:
1363 if (!TARGET_AVX512VL)
1364 return false;
1365 /* FALLTHRU */
1366
1367 case ASHIFT:
1368 case LSHIFTRT:
1369 if (!CONST_INT_P (XEXP (src, 1))
1370 || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63))
1371 return false;
1372 break;
1373
93cf5515
RB
1374 case SMAX:
1375 case SMIN:
1376 case UMAX:
1377 case UMIN:
1378 if ((mode == DImode && !TARGET_AVX512VL)
1379 || (mode == SImode && !TARGET_SSE4_1))
1380 return false;
1381 /* Fallthru. */
1382
2bf6d935
ML
1383 case PLUS:
1384 case MINUS:
1385 case IOR:
1386 case XOR:
1387 case AND:
1388 if (!REG_P (XEXP (src, 1))
1389 && !MEM_P (XEXP (src, 1))
1390 && !CONST_INT_P (XEXP (src, 1)))
1391 return false;
1392
93cf5515 1393 if (GET_MODE (XEXP (src, 1)) != mode
2bf6d935
ML
1394 && !CONST_INT_P (XEXP (src, 1)))
1395 return false;
1396 break;
1397
1398 case NEG:
1399 case NOT:
1400 break;
1401
1402 case REG:
1403 return true;
1404
1405 case MEM:
1406 case CONST_INT:
1407 return REG_P (dst);
1408
1409 default:
1410 return false;
1411 }
1412
1413 if (!REG_P (XEXP (src, 0))
1414 && !MEM_P (XEXP (src, 0))
1415 && !CONST_INT_P (XEXP (src, 0))
1416 /* Check for andnot case. */
1417 && (GET_CODE (src) != AND
1418 || GET_CODE (XEXP (src, 0)) != NOT
1419 || !REG_P (XEXP (XEXP (src, 0), 0))))
1420 return false;
1421
93cf5515 1422 if (GET_MODE (XEXP (src, 0)) != mode
2bf6d935
ML
1423 && !CONST_INT_P (XEXP (src, 0)))
1424 return false;
1425
1426 return true;
1427}
1428
1429/* The TImode version of scalar_to_vector_candidate_p. */
1430
1431static bool
1432timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1433{
1434 rtx def_set = single_set (insn);
1435
1436 if (!def_set)
1437 return false;
1438
1439 if (has_non_address_hard_reg (insn))
1440 return false;
1441
1442 rtx src = SET_SRC (def_set);
1443 rtx dst = SET_DEST (def_set);
1444
1445 /* Only TImode load and store are allowed. */
1446 if (GET_MODE (dst) != TImode)
1447 return false;
1448
1449 if (MEM_P (dst))
1450 {
1451 /* Check for store. Memory must be aligned or unaligned store
1452 is optimal. Only support store from register, standard SSE
1453 constant or CONST_WIDE_INT generated from piecewise store.
1454
1455 ??? Verify performance impact before enabling CONST_INT for
1456 __int128 store. */
1457 if (misaligned_operand (dst, TImode)
1458 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1459 return false;
1460
1461 switch (GET_CODE (src))
1462 {
1463 default:
1464 return false;
1465
1466 case REG:
1467 case CONST_WIDE_INT:
1468 return true;
1469
1470 case CONST_INT:
1471 return standard_sse_constant_p (src, TImode);
1472 }
1473 }
1474 else if (MEM_P (src))
1475 {
1476 /* Check for load. Memory must be aligned or unaligned load is
1477 optimal. */
1478 return (REG_P (dst)
1479 && (!misaligned_operand (src, TImode)
1480 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1481 }
1482
1483 return false;
1484}
1485
93cf5515
RB
1486/* For a given bitmap of insn UIDs scans all instruction and
1487 remove insn from CANDIDATES in case it has both convertible
1488 and not convertible definitions.
2bf6d935 1489
93cf5515
RB
1490 All insns in a bitmap are conversion candidates according to
1491 scalar_to_vector_candidate_p. Currently it implies all insns
1492 are single_set. */
2bf6d935
ML
1493
1494static void
93cf5515 1495general_remove_non_convertible_regs (bitmap candidates)
2bf6d935
ML
1496{
1497 bitmap_iterator bi;
1498 unsigned id;
1499 bitmap regs = BITMAP_ALLOC (NULL);
1500
1501 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1502 {
1503 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1504 rtx reg = SET_DEST (def_set);
1505
1506 if (!REG_P (reg)
1507 || bitmap_bit_p (regs, REGNO (reg))
1508 || HARD_REGISTER_P (reg))
1509 continue;
1510
1511 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
1512 def;
1513 def = DF_REF_NEXT_REG (def))
1514 {
1515 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1516 {
1517 if (dump_file)
1518 fprintf (dump_file,
1519 "r%d has non convertible definition in insn %d\n",
1520 REGNO (reg), DF_REF_INSN_UID (def));
1521
1522 bitmap_set_bit (regs, REGNO (reg));
1523 break;
1524 }
1525 }
1526 }
1527
1528 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1529 {
1530 for (df_ref def = DF_REG_DEF_CHAIN (id);
1531 def;
1532 def = DF_REF_NEXT_REG (def))
1533 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1534 {
1535 if (dump_file)
1536 fprintf (dump_file, "Removing insn %d from candidates list\n",
1537 DF_REF_INSN_UID (def));
1538
1539 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1540 }
1541 }
1542
1543 BITMAP_FREE (regs);
1544}
1545
1546/* For a register REGNO, scan instructions for its defs and uses.
1547 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1548
1549static void
1550timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1551 unsigned int regno)
1552{
1553 for (df_ref def = DF_REG_DEF_CHAIN (regno);
1554 def;
1555 def = DF_REF_NEXT_REG (def))
1556 {
1557 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1558 {
1559 if (dump_file)
1560 fprintf (dump_file,
1561 "r%d has non convertible def in insn %d\n",
1562 regno, DF_REF_INSN_UID (def));
1563
1564 bitmap_set_bit (regs, regno);
1565 break;
1566 }
1567 }
1568
1569 for (df_ref ref = DF_REG_USE_CHAIN (regno);
1570 ref;
1571 ref = DF_REF_NEXT_REG (ref))
1572 {
1573 /* Debug instructions are skipped. */
1574 if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1575 && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1576 {
1577 if (dump_file)
1578 fprintf (dump_file,
1579 "r%d has non convertible use in insn %d\n",
1580 regno, DF_REF_INSN_UID (ref));
1581
1582 bitmap_set_bit (regs, regno);
1583 break;
1584 }
1585 }
1586}
1587
1588/* The TImode version of remove_non_convertible_regs. */
1589
1590static void
1591timode_remove_non_convertible_regs (bitmap candidates)
1592{
1593 bitmap_iterator bi;
1594 unsigned id;
1595 bitmap regs = BITMAP_ALLOC (NULL);
1596
1597 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1598 {
1599 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1600 rtx dest = SET_DEST (def_set);
1601 rtx src = SET_SRC (def_set);
1602
1603 if ((!REG_P (dest)
1604 || bitmap_bit_p (regs, REGNO (dest))
1605 || HARD_REGISTER_P (dest))
1606 && (!REG_P (src)
1607 || bitmap_bit_p (regs, REGNO (src))
1608 || HARD_REGISTER_P (src)))
1609 continue;
1610
1611 if (REG_P (dest))
1612 timode_check_non_convertible_regs (candidates, regs,
1613 REGNO (dest));
1614
1615 if (REG_P (src))
1616 timode_check_non_convertible_regs (candidates, regs,
1617 REGNO (src));
1618 }
1619
1620 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1621 {
1622 for (df_ref def = DF_REG_DEF_CHAIN (id);
1623 def;
1624 def = DF_REF_NEXT_REG (def))
1625 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1626 {
1627 if (dump_file)
1628 fprintf (dump_file, "Removing insn %d from candidates list\n",
1629 DF_REF_INSN_UID (def));
1630
1631 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1632 }
1633
1634 for (df_ref ref = DF_REG_USE_CHAIN (id);
1635 ref;
1636 ref = DF_REF_NEXT_REG (ref))
1637 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1638 {
1639 if (dump_file)
1640 fprintf (dump_file, "Removing insn %d from candidates list\n",
1641 DF_REF_INSN_UID (ref));
1642
1643 bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1644 }
1645 }
1646
1647 BITMAP_FREE (regs);
1648}
1649
2bf6d935
ML
1650/* Main STV pass function. Find and convert scalar
1651 instructions into vector mode when profitable. */
1652
1653static unsigned int
1654convert_scalars_to_vector ()
1655{
1656 basic_block bb;
2bf6d935
ML
1657 int converted_insns = 0;
1658
1659 bitmap_obstack_initialize (NULL);
93cf5515
RB
1660 const machine_mode cand_mode[3] = { SImode, DImode, TImode };
1661 const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode };
1662 bitmap_head candidates[3]; /* { SImode, DImode, TImode } */
1663 for (unsigned i = 0; i < 3; ++i)
1664 bitmap_initialize (&candidates[i], &bitmap_default_obstack);
2bf6d935
ML
1665
1666 calculate_dominance_info (CDI_DOMINATORS);
1667 df_set_flags (DF_DEFER_INSN_RESCAN);
1668 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
1669 df_md_add_problem ();
1670 df_analyze ();
1671
1672 /* Find all instructions we want to convert into vector mode. */
1673 if (dump_file)
1674 fprintf (dump_file, "Searching for mode conversion candidates...\n");
1675
1676 FOR_EACH_BB_FN (bb, cfun)
1677 {
1678 rtx_insn *insn;
1679 FOR_BB_INSNS (bb, insn)
93cf5515
RB
1680 if (TARGET_64BIT
1681 && timode_scalar_to_vector_candidate_p (insn))
2bf6d935
ML
1682 {
1683 if (dump_file)
93cf5515 1684 fprintf (dump_file, " insn %d is marked as a TImode candidate\n",
2bf6d935
ML
1685 INSN_UID (insn));
1686
93cf5515
RB
1687 bitmap_set_bit (&candidates[2], INSN_UID (insn));
1688 }
1689 else
1690 {
1691 /* Check {SI,DI}mode. */
1692 for (unsigned i = 0; i <= 1; ++i)
1693 if (general_scalar_to_vector_candidate_p (insn, cand_mode[i]))
1694 {
1695 if (dump_file)
1696 fprintf (dump_file, " insn %d is marked as a %s candidate\n",
1697 INSN_UID (insn), i == 0 ? "SImode" : "DImode");
1698
1699 bitmap_set_bit (&candidates[i], INSN_UID (insn));
1700 break;
1701 }
2bf6d935
ML
1702 }
1703 }
1704
93cf5515
RB
1705 if (TARGET_64BIT)
1706 timode_remove_non_convertible_regs (&candidates[2]);
1707 for (unsigned i = 0; i <= 1; ++i)
1708 general_remove_non_convertible_regs (&candidates[i]);
2bf6d935 1709
93cf5515
RB
1710 for (unsigned i = 0; i <= 2; ++i)
1711 if (!bitmap_empty_p (&candidates[i]))
1712 break;
1713 else if (i == 2 && dump_file)
2bf6d935
ML
1714 fprintf (dump_file, "There are no candidates for optimization.\n");
1715
93cf5515
RB
1716 for (unsigned i = 0; i <= 2; ++i)
1717 while (!bitmap_empty_p (&candidates[i]))
1718 {
1719 unsigned uid = bitmap_first_set_bit (&candidates[i]);
1720 scalar_chain *chain;
2bf6d935 1721
93cf5515
RB
1722 if (cand_mode[i] == TImode)
1723 chain = new timode_scalar_chain;
1724 else
1725 chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]);
2bf6d935 1726
93cf5515
RB
1727 /* Find instructions chain we want to convert to vector mode.
1728 Check all uses and definitions to estimate all required
1729 conversions. */
1730 chain->build (&candidates[i], uid);
2bf6d935 1731
93cf5515
RB
1732 if (chain->compute_convert_gain () > 0)
1733 converted_insns += chain->convert ();
1734 else
1735 if (dump_file)
1736 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
1737 chain->chain_id);
2bf6d935 1738
93cf5515
RB
1739 delete chain;
1740 }
2bf6d935
ML
1741
1742 if (dump_file)
1743 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
1744
93cf5515
RB
1745 for (unsigned i = 0; i <= 2; ++i)
1746 bitmap_release (&candidates[i]);
2bf6d935
ML
1747 bitmap_obstack_release (NULL);
1748 df_process_deferred_rescans ();
1749
1750 /* Conversion means we may have 128bit register spills/fills
1751 which require aligned stack. */
1752 if (converted_insns)
1753 {
1754 if (crtl->stack_alignment_needed < 128)
1755 crtl->stack_alignment_needed = 128;
1756 if (crtl->stack_alignment_estimated < 128)
1757 crtl->stack_alignment_estimated = 128;
1758 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
1759 if (TARGET_64BIT)
1760 for (tree parm = DECL_ARGUMENTS (current_function_decl);
1761 parm; parm = DECL_CHAIN (parm))
1762 {
1763 if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
1764 continue;
1765 if (DECL_RTL_SET_P (parm)
1766 && GET_MODE (DECL_RTL (parm)) == V1TImode)
1767 {
1768 rtx r = DECL_RTL (parm);
1769 if (REG_P (r))
1770 SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
1771 }
1772 if (DECL_INCOMING_RTL (parm)
1773 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
1774 {
1775 rtx r = DECL_INCOMING_RTL (parm);
1776 if (REG_P (r))
1777 DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
1778 }
1779 }
1780 }
1781
1782 return 0;
1783}
1784
1785static unsigned int
1786rest_of_handle_insert_vzeroupper (void)
1787{
1788 int i;
1789
1790 /* vzeroupper instructions are inserted immediately after reload to
1791 account for possible spills from 256bit or 512bit registers. The pass
1792 reuses mode switching infrastructure by re-running mode insertion
1793 pass, so disable entities that have already been processed. */
1794 for (i = 0; i < MAX_386_ENTITIES; i++)
1795 ix86_optimize_mode_switching[i] = 0;
1796
1797 ix86_optimize_mode_switching[AVX_U128] = 1;
1798
1799 /* Call optimize_mode_switching. */
1800 g->get_passes ()->execute_pass_mode_switching ();
1801 return 0;
1802}
1803
1804namespace {
1805
1806const pass_data pass_data_insert_vzeroupper =
1807{
1808 RTL_PASS, /* type */
1809 "vzeroupper", /* name */
1810 OPTGROUP_NONE, /* optinfo_flags */
1811 TV_MACH_DEP, /* tv_id */
1812 0, /* properties_required */
1813 0, /* properties_provided */
1814 0, /* properties_destroyed */
1815 0, /* todo_flags_start */
1816 TODO_df_finish, /* todo_flags_finish */
1817};
1818
1819class pass_insert_vzeroupper : public rtl_opt_pass
1820{
1821public:
1822 pass_insert_vzeroupper(gcc::context *ctxt)
1823 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
1824 {}
1825
1826 /* opt_pass methods: */
1827 virtual bool gate (function *)
1828 {
1829 return TARGET_AVX
1830 && TARGET_VZEROUPPER && flag_expensive_optimizations
1831 && !optimize_size;
1832 }
1833
1834 virtual unsigned int execute (function *)
1835 {
1836 return rest_of_handle_insert_vzeroupper ();
1837 }
1838
1839}; // class pass_insert_vzeroupper
1840
1841const pass_data pass_data_stv =
1842{
1843 RTL_PASS, /* type */
1844 "stv", /* name */
1845 OPTGROUP_NONE, /* optinfo_flags */
1846 TV_MACH_DEP, /* tv_id */
1847 0, /* properties_required */
1848 0, /* properties_provided */
1849 0, /* properties_destroyed */
1850 0, /* todo_flags_start */
1851 TODO_df_finish, /* todo_flags_finish */
1852};
1853
1854class pass_stv : public rtl_opt_pass
1855{
1856public:
1857 pass_stv (gcc::context *ctxt)
1858 : rtl_opt_pass (pass_data_stv, ctxt),
1859 timode_p (false)
1860 {}
1861
1862 /* opt_pass methods: */
1863 virtual bool gate (function *)
1864 {
1865 return (timode_p == !!TARGET_64BIT
1866 && TARGET_STV && TARGET_SSE2 && optimize > 1);
1867 }
1868
1869 virtual unsigned int execute (function *)
1870 {
1871 return convert_scalars_to_vector ();
1872 }
1873
1874 opt_pass *clone ()
1875 {
1876 return new pass_stv (m_ctxt);
1877 }
1878
1879 void set_pass_param (unsigned int n, bool param)
1880 {
1881 gcc_assert (n == 0);
1882 timode_p = param;
1883 }
1884
1885private:
1886 bool timode_p;
1887}; // class pass_stv
1888
1889} // anon namespace
1890
1891rtl_opt_pass *
1892make_pass_insert_vzeroupper (gcc::context *ctxt)
1893{
1894 return new pass_insert_vzeroupper (ctxt);
1895}
1896
1897rtl_opt_pass *
1898make_pass_stv (gcc::context *ctxt)
1899{
1900 return new pass_stv (ctxt);
1901}
1902
1903/* Inserting ENDBRANCH instructions. */
1904
1905static unsigned int
1906rest_of_insert_endbranch (void)
1907{
1908 timevar_push (TV_MACH_DEP);
1909
1910 rtx cet_eb;
1911 rtx_insn *insn;
1912 basic_block bb;
1913
1914 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is
1915 absent among function attributes. Later an optimization will be
1916 introduced to make analysis if an address of a static function is
1917 taken. A static function whose address is not taken will get a
1918 nocf_check attribute. This will allow to reduce the number of EB. */
1919
1920 if (!lookup_attribute ("nocf_check",
1921 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
1922 && (!flag_manual_endbr
1923 || lookup_attribute ("cf_check",
1924 DECL_ATTRIBUTES (cfun->decl)))
1925 && !cgraph_node::get (cfun->decl)->only_called_directly_p ())
1926 {
1927 /* Queue ENDBR insertion to x86_function_profiler. */
1928 if (crtl->profile && flag_fentry)
1929 cfun->machine->endbr_queued_at_entrance = true;
1930 else
1931 {
1932 cet_eb = gen_nop_endbr ();
1933
1934 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
1935 insn = BB_HEAD (bb);
1936 emit_insn_before (cet_eb, insn);
1937 }
1938 }
1939
1940 bb = 0;
1941 FOR_EACH_BB_FN (bb, cfun)
1942 {
1943 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
1944 insn = NEXT_INSN (insn))
1945 {
1946 if (CALL_P (insn))
1947 {
1948 bool need_endbr;
1949 need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL;
1950 if (!need_endbr && !SIBLING_CALL_P (insn))
1951 {
1952 rtx call = get_call_rtx_from (insn);
1953 rtx fnaddr = XEXP (call, 0);
1954 tree fndecl = NULL_TREE;
1955
1956 /* Also generate ENDBRANCH for non-tail call which
1957 may return via indirect branch. */
1958 if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
1959 fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
1960 if (fndecl == NULL_TREE)
1961 fndecl = MEM_EXPR (fnaddr);
1962 if (fndecl
1963 && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE
1964 && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE)
1965 fndecl = NULL_TREE;
1966 if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
1967 {
1968 tree fntype = TREE_TYPE (fndecl);
1969 if (lookup_attribute ("indirect_return",
1970 TYPE_ATTRIBUTES (fntype)))
1971 need_endbr = true;
1972 }
1973 }
1974 if (!need_endbr)
1975 continue;
1976 /* Generate ENDBRANCH after CALL, which can return more than
1977 twice, setjmp-like functions. */
1978
1979 cet_eb = gen_nop_endbr ();
1980 emit_insn_after_setloc (cet_eb, insn, INSN_LOCATION (insn));
1981 continue;
1982 }
1983
1984 if (JUMP_P (insn) && flag_cet_switch)
1985 {
1986 rtx target = JUMP_LABEL (insn);
1987 if (target == NULL_RTX || ANY_RETURN_P (target))
1988 continue;
1989
1990 /* Check the jump is a switch table. */
1991 rtx_insn *label = as_a<rtx_insn *> (target);
1992 rtx_insn *table = next_insn (label);
1993 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
1994 continue;
1995
1996 /* For the indirect jump find out all places it jumps and insert
1997 ENDBRANCH there. It should be done under a special flag to
1998 control ENDBRANCH generation for switch stmts. */
1999 edge_iterator ei;
2000 edge e;
2001 basic_block dest_blk;
2002
2003 FOR_EACH_EDGE (e, ei, bb->succs)
2004 {
2005 rtx_insn *insn;
2006
2007 dest_blk = e->dest;
2008 insn = BB_HEAD (dest_blk);
2009 gcc_assert (LABEL_P (insn));
2010 cet_eb = gen_nop_endbr ();
2011 emit_insn_after (cet_eb, insn);
2012 }
2013 continue;
2014 }
2015
02ed9049 2016 if (LABEL_P (insn) && LABEL_PRESERVE_P (insn))
2bf6d935
ML
2017 {
2018 cet_eb = gen_nop_endbr ();
2019 emit_insn_after (cet_eb, insn);
2020 continue;
2021 }
2022 }
2023 }
2024
2025 timevar_pop (TV_MACH_DEP);
2026 return 0;
2027}
2028
2029namespace {
2030
2031const pass_data pass_data_insert_endbranch =
2032{
2033 RTL_PASS, /* type. */
2034 "cet", /* name. */
2035 OPTGROUP_NONE, /* optinfo_flags. */
2036 TV_MACH_DEP, /* tv_id. */
2037 0, /* properties_required. */
2038 0, /* properties_provided. */
2039 0, /* properties_destroyed. */
2040 0, /* todo_flags_start. */
2041 0, /* todo_flags_finish. */
2042};
2043
2044class pass_insert_endbranch : public rtl_opt_pass
2045{
2046public:
2047 pass_insert_endbranch (gcc::context *ctxt)
2048 : rtl_opt_pass (pass_data_insert_endbranch, ctxt)
2049 {}
2050
2051 /* opt_pass methods: */
2052 virtual bool gate (function *)
2053 {
2054 return ((flag_cf_protection & CF_BRANCH));
2055 }
2056
2057 virtual unsigned int execute (function *)
2058 {
2059 return rest_of_insert_endbranch ();
2060 }
2061
2062}; // class pass_insert_endbranch
2063
2064} // anon namespace
2065
2066rtl_opt_pass *
2067make_pass_insert_endbranch (gcc::context *ctxt)
2068{
2069 return new pass_insert_endbranch (ctxt);
2070}
2071
2072/* At entry of the nearest common dominator for basic blocks with
2073 conversions, generate a single
2074 vxorps %xmmN, %xmmN, %xmmN
2075 for all
2076 vcvtss2sd op, %xmmN, %xmmX
2077 vcvtsd2ss op, %xmmN, %xmmX
2078 vcvtsi2ss op, %xmmN, %xmmX
2079 vcvtsi2sd op, %xmmN, %xmmX
2080
2081 NB: We want to generate only a single vxorps to cover the whole
2082 function. The LCM algorithm isn't appropriate here since it may
2083 place a vxorps inside the loop. */
2084
2085static unsigned int
2086remove_partial_avx_dependency (void)
2087{
2088 timevar_push (TV_MACH_DEP);
2089
2090 bitmap_obstack_initialize (NULL);
2091 bitmap convert_bbs = BITMAP_ALLOC (NULL);
2092
2093 basic_block bb;
2094 rtx_insn *insn, *set_insn;
2095 rtx set;
2096 rtx v4sf_const0 = NULL_RTX;
2097
2098 auto_vec<rtx_insn *> control_flow_insns;
2099
2100 FOR_EACH_BB_FN (bb, cfun)
2101 {
2102 FOR_BB_INSNS (bb, insn)
2103 {
2104 if (!NONDEBUG_INSN_P (insn))
2105 continue;
2106
2107 set = single_set (insn);
2108 if (!set)
2109 continue;
2110
2111 if (get_attr_avx_partial_xmm_update (insn)
2112 != AVX_PARTIAL_XMM_UPDATE_TRUE)
2113 continue;
2114
2115 if (!v4sf_const0)
2116 {
2117 calculate_dominance_info (CDI_DOMINATORS);
2118 df_set_flags (DF_DEFER_INSN_RESCAN);
2119 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2120 df_md_add_problem ();
2121 df_analyze ();
2122 v4sf_const0 = gen_reg_rtx (V4SFmode);
2123 }
2124
2125 /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
2126 SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and
2127 vec_merge with subreg. */
2128 rtx src = SET_SRC (set);
2129 rtx dest = SET_DEST (set);
2130 machine_mode dest_mode = GET_MODE (dest);
2131
2132 rtx zero;
2133 machine_mode dest_vecmode;
2134 if (dest_mode == E_SFmode)
2135 {
2136 dest_vecmode = V4SFmode;
2137 zero = v4sf_const0;
2138 }
2139 else
2140 {
2141 dest_vecmode = V2DFmode;
2142 zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0);
2143 }
2144
2145 /* Change source to vector mode. */
2146 src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src);
2147 src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero,
2148 GEN_INT (HOST_WIDE_INT_1U));
2149 /* Change destination to vector mode. */
2150 rtx vec = gen_reg_rtx (dest_vecmode);
2151 /* Generate an XMM vector SET. */
2152 set = gen_rtx_SET (vec, src);
2153 set_insn = emit_insn_before (set, insn);
2154 df_insn_rescan (set_insn);
2155
2156 if (cfun->can_throw_non_call_exceptions)
2157 {
2158 /* Handle REG_EH_REGION note. */
2159 rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
2160 if (note)
2161 {
2162 control_flow_insns.safe_push (set_insn);
2163 add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0));
2164 }
2165 }
2166
2167 src = gen_rtx_SUBREG (dest_mode, vec, 0);
2168 set = gen_rtx_SET (dest, src);
2169
2170 /* Drop possible dead definitions. */
2171 PATTERN (insn) = set;
2172
2173 INSN_CODE (insn) = -1;
2174 recog_memoized (insn);
2175 df_insn_rescan (insn);
2176 bitmap_set_bit (convert_bbs, bb->index);
2177 }
2178 }
2179
2180 if (v4sf_const0)
2181 {
2182 /* (Re-)discover loops so that bb->loop_father can be used in the
2183 analysis below. */
2184 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2185
2186 /* Generate a vxorps at entry of the nearest dominator for basic
2187 blocks with conversions, which is in the the fake loop that
2188 contains the whole function, so that there is only a single
2189 vxorps in the whole function. */
2190 bb = nearest_common_dominator_for_set (CDI_DOMINATORS,
2191 convert_bbs);
2192 while (bb->loop_father->latch
2193 != EXIT_BLOCK_PTR_FOR_FN (cfun))
2194 bb = get_immediate_dominator (CDI_DOMINATORS,
2195 bb->loop_father->header);
2196
2197 set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode));
2198
2199 insn = BB_HEAD (bb);
2200 while (insn && !NONDEBUG_INSN_P (insn))
2201 {
2202 if (insn == BB_END (bb))
2203 {
2204 insn = NULL;
2205 break;
2206 }
2207 insn = NEXT_INSN (insn);
2208 }
2209 if (insn == BB_HEAD (bb))
2210 set_insn = emit_insn_before (set, insn);
2211 else
2212 set_insn = emit_insn_after (set,
2213 insn ? PREV_INSN (insn) : BB_END (bb));
2214 df_insn_rescan (set_insn);
2215 df_process_deferred_rescans ();
2216 loop_optimizer_finalize ();
2217
2218 if (!control_flow_insns.is_empty ())
2219 {
2220 free_dominance_info (CDI_DOMINATORS);
2221
2222 unsigned int i;
2223 FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
2224 if (control_flow_insn_p (insn))
2225 {
2226 /* Split the block after insn. There will be a fallthru
2227 edge, which is OK so we keep it. We have to create
2228 the exception edges ourselves. */
2229 bb = BLOCK_FOR_INSN (insn);
2230 split_block (bb, insn);
2231 rtl_make_eh_edge (NULL, bb, BB_END (bb));
2232 }
2233 }
2234 }
2235
2236 bitmap_obstack_release (NULL);
2237 BITMAP_FREE (convert_bbs);
2238
2239 timevar_pop (TV_MACH_DEP);
2240 return 0;
2241}
2242
2243namespace {
2244
2245const pass_data pass_data_remove_partial_avx_dependency =
2246{
2247 RTL_PASS, /* type */
2248 "rpad", /* name */
2249 OPTGROUP_NONE, /* optinfo_flags */
2250 TV_MACH_DEP, /* tv_id */
2251 0, /* properties_required */
2252 0, /* properties_provided */
2253 0, /* properties_destroyed */
2254 0, /* todo_flags_start */
2255 TODO_df_finish, /* todo_flags_finish */
2256};
2257
2258class pass_remove_partial_avx_dependency : public rtl_opt_pass
2259{
2260public:
2261 pass_remove_partial_avx_dependency (gcc::context *ctxt)
2262 : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt)
2263 {}
2264
2265 /* opt_pass methods: */
2266 virtual bool gate (function *)
2267 {
2268 return (TARGET_AVX
2269 && TARGET_SSE_PARTIAL_REG_DEPENDENCY
2270 && TARGET_SSE_MATH
2271 && optimize
2272 && optimize_function_for_speed_p (cfun));
2273 }
2274
2275 virtual unsigned int execute (function *)
2276 {
2277 return remove_partial_avx_dependency ();
2278 }
2279}; // class pass_rpad
2280
2281} // anon namespace
2282
2283rtl_opt_pass *
2284make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
2285{
2286 return new pass_remove_partial_avx_dependency (ctxt);
2287}
2288
2289/* This compares the priority of target features in function DECL1
2290 and DECL2. It returns positive value if DECL1 is higher priority,
2291 negative value if DECL2 is higher priority and 0 if they are the
2292 same. */
2293
2294int
2295ix86_compare_version_priority (tree decl1, tree decl2)
2296{
2297 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
2298 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
2299
2300 return (int)priority1 - (int)priority2;
2301}
2302
2303/* V1 and V2 point to function versions with different priorities
2304 based on the target ISA. This function compares their priorities. */
2305
2306static int
2307feature_compare (const void *v1, const void *v2)
2308{
2309 typedef struct _function_version_info
2310 {
2311 tree version_decl;
2312 tree predicate_chain;
2313 unsigned int dispatch_priority;
2314 } function_version_info;
2315
2316 const function_version_info c1 = *(const function_version_info *)v1;
2317 const function_version_info c2 = *(const function_version_info *)v2;
2318 return (c2.dispatch_priority - c1.dispatch_priority);
2319}
2320
2321/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
2322 to return a pointer to VERSION_DECL if the outcome of the expression
2323 formed by PREDICATE_CHAIN is true. This function will be called during
2324 version dispatch to decide which function version to execute. It returns
2325 the basic block at the end, to which more conditions can be added. */
2326
2327static basic_block
2328add_condition_to_bb (tree function_decl, tree version_decl,
2329 tree predicate_chain, basic_block new_bb)
2330{
2331 gimple *return_stmt;
2332 tree convert_expr, result_var;
2333 gimple *convert_stmt;
2334 gimple *call_cond_stmt;
2335 gimple *if_else_stmt;
2336
2337 basic_block bb1, bb2, bb3;
2338 edge e12, e23;
2339
2340 tree cond_var, and_expr_var = NULL_TREE;
2341 gimple_seq gseq;
2342
2343 tree predicate_decl, predicate_arg;
2344
2345 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
2346
2347 gcc_assert (new_bb != NULL);
2348 gseq = bb_seq (new_bb);
2349
2350
2351 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
2352 build_fold_addr_expr (version_decl));
2353 result_var = create_tmp_var (ptr_type_node);
2354 convert_stmt = gimple_build_assign (result_var, convert_expr);
2355 return_stmt = gimple_build_return (result_var);
2356
2357 if (predicate_chain == NULL_TREE)
2358 {
2359 gimple_seq_add_stmt (&gseq, convert_stmt);
2360 gimple_seq_add_stmt (&gseq, return_stmt);
2361 set_bb_seq (new_bb, gseq);
2362 gimple_set_bb (convert_stmt, new_bb);
2363 gimple_set_bb (return_stmt, new_bb);
2364 pop_cfun ();
2365 return new_bb;
2366 }
2367
2368 while (predicate_chain != NULL)
2369 {
2370 cond_var = create_tmp_var (integer_type_node);
2371 predicate_decl = TREE_PURPOSE (predicate_chain);
2372 predicate_arg = TREE_VALUE (predicate_chain);
2373 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
2374 gimple_call_set_lhs (call_cond_stmt, cond_var);
2375
2376 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
2377 gimple_set_bb (call_cond_stmt, new_bb);
2378 gimple_seq_add_stmt (&gseq, call_cond_stmt);
2379
2380 predicate_chain = TREE_CHAIN (predicate_chain);
2381
2382 if (and_expr_var == NULL)
2383 and_expr_var = cond_var;
2384 else
2385 {
2386 gimple *assign_stmt;
2387 /* Use MIN_EXPR to check if any integer is zero?.
2388 and_expr_var = min_expr <cond_var, and_expr_var> */
2389 assign_stmt = gimple_build_assign (and_expr_var,
2390 build2 (MIN_EXPR, integer_type_node,
2391 cond_var, and_expr_var));
2392
2393 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
2394 gimple_set_bb (assign_stmt, new_bb);
2395 gimple_seq_add_stmt (&gseq, assign_stmt);
2396 }
2397 }
2398
2399 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
2400 integer_zero_node,
2401 NULL_TREE, NULL_TREE);
2402 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
2403 gimple_set_bb (if_else_stmt, new_bb);
2404 gimple_seq_add_stmt (&gseq, if_else_stmt);
2405
2406 gimple_seq_add_stmt (&gseq, convert_stmt);
2407 gimple_seq_add_stmt (&gseq, return_stmt);
2408 set_bb_seq (new_bb, gseq);
2409
2410 bb1 = new_bb;
2411 e12 = split_block (bb1, if_else_stmt);
2412 bb2 = e12->dest;
2413 e12->flags &= ~EDGE_FALLTHRU;
2414 e12->flags |= EDGE_TRUE_VALUE;
2415
2416 e23 = split_block (bb2, return_stmt);
2417
2418 gimple_set_bb (convert_stmt, bb2);
2419 gimple_set_bb (return_stmt, bb2);
2420
2421 bb3 = e23->dest;
2422 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2423
2424 remove_edge (e23);
2425 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
2426
2427 pop_cfun ();
2428
2429 return bb3;
2430}
2431
2432/* This function generates the dispatch function for
2433 multi-versioned functions. DISPATCH_DECL is the function which will
2434 contain the dispatch logic. FNDECLS are the function choices for
2435 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
2436 in DISPATCH_DECL in which the dispatch code is generated. */
2437
2438static int
2439dispatch_function_versions (tree dispatch_decl,
2440 void *fndecls_p,
2441 basic_block *empty_bb)
2442{
2443 tree default_decl;
2444 gimple *ifunc_cpu_init_stmt;
2445 gimple_seq gseq;
2446 int ix;
2447 tree ele;
2448 vec<tree> *fndecls;
2449 unsigned int num_versions = 0;
2450 unsigned int actual_versions = 0;
2451 unsigned int i;
2452
2453 struct _function_version_info
2454 {
2455 tree version_decl;
2456 tree predicate_chain;
2457 unsigned int dispatch_priority;
2458 }*function_version_info;
2459
2460 gcc_assert (dispatch_decl != NULL
2461 && fndecls_p != NULL
2462 && empty_bb != NULL);
2463
2464 /*fndecls_p is actually a vector. */
2465 fndecls = static_cast<vec<tree> *> (fndecls_p);
2466
2467 /* At least one more version other than the default. */
2468 num_versions = fndecls->length ();
2469 gcc_assert (num_versions >= 2);
2470
2471 function_version_info = (struct _function_version_info *)
2472 XNEWVEC (struct _function_version_info, (num_versions - 1));
2473
2474 /* The first version in the vector is the default decl. */
2475 default_decl = (*fndecls)[0];
2476
2477 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
2478
2479 gseq = bb_seq (*empty_bb);
2480 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
2481 constructors, so explicity call __builtin_cpu_init here. */
2482 ifunc_cpu_init_stmt
2483 = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL);
2484 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
2485 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
2486 set_bb_seq (*empty_bb, gseq);
2487
2488 pop_cfun ();
2489
2490
2491 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
2492 {
2493 tree version_decl = ele;
2494 tree predicate_chain = NULL_TREE;
2495 unsigned int priority;
2496 /* Get attribute string, parse it and find the right predicate decl.
2497 The predicate function could be a lengthy combination of many
2498 features, like arch-type and various isa-variants. */
2499 priority = get_builtin_code_for_version (version_decl,
2500 &predicate_chain);
2501
2502 if (predicate_chain == NULL_TREE)
2503 continue;
2504
2505 function_version_info [actual_versions].version_decl = version_decl;
2506 function_version_info [actual_versions].predicate_chain
2507 = predicate_chain;
2508 function_version_info [actual_versions].dispatch_priority = priority;
2509 actual_versions++;
2510 }
2511
2512 /* Sort the versions according to descending order of dispatch priority. The
2513 priority is based on the ISA. This is not a perfect solution. There
2514 could still be ambiguity. If more than one function version is suitable
2515 to execute, which one should be dispatched? In future, allow the user
2516 to specify a dispatch priority next to the version. */
2517 qsort (function_version_info, actual_versions,
2518 sizeof (struct _function_version_info), feature_compare);
2519
2520 for (i = 0; i < actual_versions; ++i)
2521 *empty_bb = add_condition_to_bb (dispatch_decl,
2522 function_version_info[i].version_decl,
2523 function_version_info[i].predicate_chain,
2524 *empty_bb);
2525
2526 /* dispatch default version at the end. */
2527 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
2528 NULL, *empty_bb);
2529
2530 free (function_version_info);
2531 return 0;
2532}
2533
2534/* This function changes the assembler name for functions that are
2535 versions. If DECL is a function version and has a "target"
2536 attribute, it appends the attribute string to its assembler name. */
2537
2538static tree
2539ix86_mangle_function_version_assembler_name (tree decl, tree id)
2540{
2541 tree version_attr;
2542 const char *orig_name, *version_string;
2543 char *attr_str, *assembler_name;
2544
2545 if (DECL_DECLARED_INLINE_P (decl)
2546 && lookup_attribute ("gnu_inline",
2547 DECL_ATTRIBUTES (decl)))
2548 error_at (DECL_SOURCE_LOCATION (decl),
a9c697b8 2549 "function versions cannot be marked as %<gnu_inline%>,"
2bf6d935
ML
2550 " bodies have to be generated");
2551
2552 if (DECL_VIRTUAL_P (decl)
2553 || DECL_VINDEX (decl))
2554 sorry ("virtual function multiversioning not supported");
2555
2556 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
2557
2558 /* target attribute string cannot be NULL. */
2559 gcc_assert (version_attr != NULL_TREE);
2560
2561 orig_name = IDENTIFIER_POINTER (id);
2562 version_string
2563 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
2564
2565 if (strcmp (version_string, "default") == 0)
2566 return id;
2567
2568 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
2569 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
2570
2571 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
2572
2573 /* Allow assembler name to be modified if already set. */
2574 if (DECL_ASSEMBLER_NAME_SET_P (decl))
2575 SET_DECL_RTL (decl, NULL);
2576
2577 tree ret = get_identifier (assembler_name);
2578 XDELETEVEC (attr_str);
2579 XDELETEVEC (assembler_name);
2580 return ret;
2581}
2582
2583tree
2584ix86_mangle_decl_assembler_name (tree decl, tree id)
2585{
2586 /* For function version, add the target suffix to the assembler name. */
2587 if (TREE_CODE (decl) == FUNCTION_DECL
2588 && DECL_FUNCTION_VERSIONED (decl))
2589 id = ix86_mangle_function_version_assembler_name (decl, id);
2590#ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
2591 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
2592#endif
2593
2594 return id;
2595}
2596
2597/* Make a dispatcher declaration for the multi-versioned function DECL.
2598 Calls to DECL function will be replaced with calls to the dispatcher
2599 by the front-end. Returns the decl of the dispatcher function. */
2600
2601tree
2602ix86_get_function_versions_dispatcher (void *decl)
2603{
2604 tree fn = (tree) decl;
2605 struct cgraph_node *node = NULL;
2606 struct cgraph_node *default_node = NULL;
2607 struct cgraph_function_version_info *node_v = NULL;
2608 struct cgraph_function_version_info *first_v = NULL;
2609
2610 tree dispatch_decl = NULL;
2611
2612 struct cgraph_function_version_info *default_version_info = NULL;
2613
2614 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
2615
2616 node = cgraph_node::get (fn);
2617 gcc_assert (node != NULL);
2618
2619 node_v = node->function_version ();
2620 gcc_assert (node_v != NULL);
2621
2622 if (node_v->dispatcher_resolver != NULL)
2623 return node_v->dispatcher_resolver;
2624
2625 /* Find the default version and make it the first node. */
2626 first_v = node_v;
2627 /* Go to the beginning of the chain. */
2628 while (first_v->prev != NULL)
2629 first_v = first_v->prev;
2630 default_version_info = first_v;
2631 while (default_version_info != NULL)
2632 {
2633 if (is_function_default_version
2634 (default_version_info->this_node->decl))
2635 break;
2636 default_version_info = default_version_info->next;
2637 }
2638
2639 /* If there is no default node, just return NULL. */
2640 if (default_version_info == NULL)
2641 return NULL;
2642
2643 /* Make default info the first node. */
2644 if (first_v != default_version_info)
2645 {
2646 default_version_info->prev->next = default_version_info->next;
2647 if (default_version_info->next)
2648 default_version_info->next->prev = default_version_info->prev;
2649 first_v->prev = default_version_info;
2650 default_version_info->next = first_v;
2651 default_version_info->prev = NULL;
2652 }
2653
2654 default_node = default_version_info->this_node;
2655
2656#if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
2657 if (targetm.has_ifunc_p ())
2658 {
2659 struct cgraph_function_version_info *it_v = NULL;
2660 struct cgraph_node *dispatcher_node = NULL;
2661 struct cgraph_function_version_info *dispatcher_version_info = NULL;
2662
2663 /* Right now, the dispatching is done via ifunc. */
2664 dispatch_decl = make_dispatcher_decl (default_node->decl);
2665
2666 dispatcher_node = cgraph_node::get_create (dispatch_decl);
2667 gcc_assert (dispatcher_node != NULL);
2668 dispatcher_node->dispatcher_function = 1;
2669 dispatcher_version_info
2670 = dispatcher_node->insert_new_function_version ();
2671 dispatcher_version_info->next = default_version_info;
2672 dispatcher_node->definition = 1;
2673
2674 /* Set the dispatcher for all the versions. */
2675 it_v = default_version_info;
2676 while (it_v != NULL)
2677 {
2678 it_v->dispatcher_resolver = dispatch_decl;
2679 it_v = it_v->next;
2680 }
2681 }
2682 else
2683#endif
2684 {
2685 error_at (DECL_SOURCE_LOCATION (default_node->decl),
0ecf545c 2686 "multiversioning needs %<ifunc%> which is not supported "
2bf6d935
ML
2687 "on this target");
2688 }
2689
2690 return dispatch_decl;
2691}
2692
2693/* Make the resolver function decl to dispatch the versions of
2694 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
2695 ifunc alias that will point to the created resolver. Create an
2696 empty basic block in the resolver and store the pointer in
2697 EMPTY_BB. Return the decl of the resolver function. */
2698
2699static tree
2700make_resolver_func (const tree default_decl,
2701 const tree ifunc_alias_decl,
2702 basic_block *empty_bb)
2703{
2704 char *resolver_name;
2705 tree decl, type, decl_name, t;
2706
2707 /* IFUNC's have to be globally visible. So, if the default_decl is
2708 not, then the name of the IFUNC should be made unique. */
2709 if (TREE_PUBLIC (default_decl) == 0)
2710 {
2711 char *ifunc_name = make_unique_name (default_decl, "ifunc", true);
2712 symtab->change_decl_assembler_name (ifunc_alias_decl,
2713 get_identifier (ifunc_name));
2714 XDELETEVEC (ifunc_name);
2715 }
2716
2717 resolver_name = make_unique_name (default_decl, "resolver", false);
2718
2719 /* The resolver function should return a (void *). */
2720 type = build_function_type_list (ptr_type_node, NULL_TREE);
2721
2722 decl = build_fn_decl (resolver_name, type);
2723 decl_name = get_identifier (resolver_name);
2724 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
2725
2726 DECL_NAME (decl) = decl_name;
2727 TREE_USED (decl) = 1;
2728 DECL_ARTIFICIAL (decl) = 1;
2729 DECL_IGNORED_P (decl) = 1;
2730 TREE_PUBLIC (decl) = 0;
2731 DECL_UNINLINABLE (decl) = 1;
2732
2733 /* Resolver is not external, body is generated. */
2734 DECL_EXTERNAL (decl) = 0;
2735 DECL_EXTERNAL (ifunc_alias_decl) = 0;
2736
2737 DECL_CONTEXT (decl) = NULL_TREE;
2738 DECL_INITIAL (decl) = make_node (BLOCK);
2739 DECL_STATIC_CONSTRUCTOR (decl) = 0;
2740
2741 if (DECL_COMDAT_GROUP (default_decl)
2742 || TREE_PUBLIC (default_decl))
2743 {
2744 /* In this case, each translation unit with a call to this
2745 versioned function will put out a resolver. Ensure it
2746 is comdat to keep just one copy. */
2747 DECL_COMDAT (decl) = 1;
2748 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
2749 }
2750 /* Build result decl and add to function_decl. */
2751 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
2752 DECL_CONTEXT (t) = decl;
2753 DECL_ARTIFICIAL (t) = 1;
2754 DECL_IGNORED_P (t) = 1;
2755 DECL_RESULT (decl) = t;
2756
2757 gimplify_function_tree (decl);
2758 push_cfun (DECL_STRUCT_FUNCTION (decl));
2759 *empty_bb = init_lowered_empty_function (decl, false,
2760 profile_count::uninitialized ());
2761
2762 cgraph_node::add_new_function (decl, true);
2763 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
2764
2765 pop_cfun ();
2766
2767 gcc_assert (ifunc_alias_decl != NULL);
2768 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
2769 DECL_ATTRIBUTES (ifunc_alias_decl)
2770 = make_attribute ("ifunc", resolver_name,
2771 DECL_ATTRIBUTES (ifunc_alias_decl));
2772
2773 /* Create the alias for dispatch to resolver here. */
2774 cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
2775 XDELETEVEC (resolver_name);
2776 return decl;
2777}
2778
2779/* Generate the dispatching code body to dispatch multi-versioned function
2780 DECL. The target hook is called to process the "target" attributes and
2781 provide the code to dispatch the right function at run-time. NODE points
2782 to the dispatcher decl whose body will be created. */
2783
2784tree
2785ix86_generate_version_dispatcher_body (void *node_p)
2786{
2787 tree resolver_decl;
2788 basic_block empty_bb;
2789 tree default_ver_decl;
2790 struct cgraph_node *versn;
2791 struct cgraph_node *node;
2792
2793 struct cgraph_function_version_info *node_version_info = NULL;
2794 struct cgraph_function_version_info *versn_info = NULL;
2795
2796 node = (cgraph_node *)node_p;
2797
2798 node_version_info = node->function_version ();
2799 gcc_assert (node->dispatcher_function
2800 && node_version_info != NULL);
2801
2802 if (node_version_info->dispatcher_resolver)
2803 return node_version_info->dispatcher_resolver;
2804
2805 /* The first version in the chain corresponds to the default version. */
2806 default_ver_decl = node_version_info->next->this_node->decl;
2807
2808 /* node is going to be an alias, so remove the finalized bit. */
2809 node->definition = false;
2810
2811 resolver_decl = make_resolver_func (default_ver_decl,
2812 node->decl, &empty_bb);
2813
2814 node_version_info->dispatcher_resolver = resolver_decl;
2815
2816 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
2817
2818 auto_vec<tree, 2> fn_ver_vec;
2819
2820 for (versn_info = node_version_info->next; versn_info;
2821 versn_info = versn_info->next)
2822 {
2823 versn = versn_info->this_node;
2824 /* Check for virtual functions here again, as by this time it should
2825 have been determined if this function needs a vtable index or
2826 not. This happens for methods in derived classes that override
2827 virtual methods in base classes but are not explicitly marked as
2828 virtual. */
2829 if (DECL_VINDEX (versn->decl))
2830 sorry ("virtual function multiversioning not supported");
2831
2832 fn_ver_vec.safe_push (versn->decl);
2833 }
2834
2835 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
2836 cgraph_edge::rebuild_edges ();
2837 pop_cfun ();
2838 return resolver_decl;
2839}
2840
2841
This page took 0.394343 seconds and 5 git commands to generate.