1 /* Copyright (C) 1988-2019 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GCC; see the file COPYING3. If not see
17 <http://www.gnu.org/licenses/>. */
19 #define IN_TARGET_CODE 1
23 #include "coretypes.h"
33 #include "stringpool.h"
40 #include "diagnostic.h"
43 #include "fold-const.h"
46 #include "stor-layout.h"
49 #include "insn-attr.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
60 #include "tm-constrs.h"
63 #include "sched-int.h"
65 #include "tree-pass.h"
67 #include "pass_manager.h"
68 #include "target-globals.h"
69 #include "gimple-iterator.h"
70 #include "tree-vectorizer.h"
71 #include "shrink-wrap.h"
74 #include "tree-iterator.h"
76 #include "case-cfn-macros.h"
78 #include "fold-const-call.h"
80 #include "tree-ssanames.h"
82 #include "selftest-rtl.h"
83 #include "print-rtl.h"
86 #include "symbol-summary.h"
88 #include "ipa-fnsummary.h"
89 #include "wide-int-bitmask.h"
90 #include "tree-vector-builder.h"
92 #include "dwarf2out.h"
93 #include "i386-builtins.h"
94 #include "i386-features.h"
96 const char * const xlogue_layout::STUB_BASE_NAMES
[XLOGUE_STUB_COUNT
] = {
105 const unsigned xlogue_layout::REG_ORDER
[xlogue_layout::MAX_REGS
] = {
106 /* The below offset values are where each register is stored for the layout
107 relative to incoming stack pointer. The value of each m_regs[].offset will
108 be relative to the incoming base pointer (rax or rsi) used by the stub.
111 Offset: realigned or aligned + 8
112 Register aligned aligned + 8 aligned w/HFP w/HFP */
113 XMM15_REG
, /* 0x10 0x18 0x10 0x18 */
114 XMM14_REG
, /* 0x20 0x28 0x20 0x28 */
115 XMM13_REG
, /* 0x30 0x38 0x30 0x38 */
116 XMM12_REG
, /* 0x40 0x48 0x40 0x48 */
117 XMM11_REG
, /* 0x50 0x58 0x50 0x58 */
118 XMM10_REG
, /* 0x60 0x68 0x60 0x68 */
119 XMM9_REG
, /* 0x70 0x78 0x70 0x78 */
120 XMM8_REG
, /* 0x80 0x88 0x80 0x88 */
121 XMM7_REG
, /* 0x90 0x98 0x90 0x98 */
122 XMM6_REG
, /* 0xa0 0xa8 0xa0 0xa8 */
123 SI_REG
, /* 0xa8 0xb0 0xa8 0xb0 */
124 DI_REG
, /* 0xb0 0xb8 0xb0 0xb8 */
125 BX_REG
, /* 0xb8 0xc0 0xb8 0xc0 */
126 BP_REG
, /* 0xc0 0xc8 N/A N/A */
127 R12_REG
, /* 0xc8 0xd0 0xc0 0xc8 */
128 R13_REG
, /* 0xd0 0xd8 0xc8 0xd0 */
129 R14_REG
, /* 0xd8 0xe0 0xd0 0xd8 */
130 R15_REG
, /* 0xe0 0xe8 0xd8 0xe0 */
133 /* Instantiate static const values. */
134 const HOST_WIDE_INT
xlogue_layout::STUB_INDEX_OFFSET
;
135 const unsigned xlogue_layout::MIN_REGS
;
136 const unsigned xlogue_layout::MAX_REGS
;
137 const unsigned xlogue_layout::MAX_EXTRA_REGS
;
138 const unsigned xlogue_layout::VARIANT_COUNT
;
139 const unsigned xlogue_layout::STUB_NAME_MAX_LEN
;
141 /* Initialize xlogue_layout::s_stub_names to zero. */
142 char xlogue_layout::s_stub_names
[2][XLOGUE_STUB_COUNT
][VARIANT_COUNT
]
145 /* Instantiates all xlogue_layout instances. */
146 const xlogue_layout
xlogue_layout::s_instances
[XLOGUE_SET_COUNT
] = {
147 xlogue_layout (0, false),
148 xlogue_layout (8, false),
149 xlogue_layout (0, true),
150 xlogue_layout (8, true)
153 /* Return an appropriate const instance of xlogue_layout based upon values
154 in cfun->machine and crtl. */
155 const class xlogue_layout
&
156 xlogue_layout::get_instance ()
158 enum xlogue_stub_sets stub_set
;
159 bool aligned_plus_8
= cfun
->machine
->call_ms2sysv_pad_in
;
161 if (stack_realign_fp
)
162 stub_set
= XLOGUE_SET_HFP_ALIGNED_OR_REALIGN
;
163 else if (frame_pointer_needed
)
164 stub_set
= aligned_plus_8
165 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
166 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN
;
168 stub_set
= aligned_plus_8
? XLOGUE_SET_ALIGNED_PLUS_8
: XLOGUE_SET_ALIGNED
;
170 return s_instances
[stub_set
];
173 /* Determine how many clobbered registers can be saved by the stub.
174 Returns the count of registers the stub will save and restore. */
176 xlogue_layout::count_stub_managed_regs ()
178 bool hfp
= frame_pointer_needed
|| stack_realign_fp
;
182 for (count
= i
= MIN_REGS
; i
< MAX_REGS
; ++i
)
184 regno
= REG_ORDER
[i
];
185 if (regno
== BP_REG
&& hfp
)
187 if (!ix86_save_reg (regno
, false, false))
194 /* Determine if register REGNO is a stub managed register given the
195 total COUNT of stub managed registers. */
197 xlogue_layout::is_stub_managed_reg (unsigned regno
, unsigned count
)
199 bool hfp
= frame_pointer_needed
|| stack_realign_fp
;
202 for (i
= 0; i
< count
; ++i
)
204 gcc_assert (i
< MAX_REGS
);
205 if (REG_ORDER
[i
] == BP_REG
&& hfp
)
207 else if (REG_ORDER
[i
] == regno
)
213 /* Constructor for xlogue_layout. */
214 xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in
, bool hfp
)
215 : m_hfp (hfp
) , m_nregs (hfp
? 17 : 18),
216 m_stack_align_off_in (stack_align_off_in
)
218 HOST_WIDE_INT offset
= stack_align_off_in
;
221 for (i
= j
= 0; i
< MAX_REGS
; ++i
)
223 unsigned regno
= REG_ORDER
[i
];
225 if (regno
== BP_REG
&& hfp
)
227 if (SSE_REGNO_P (regno
))
230 /* Verify that SSE regs are always aligned. */
231 gcc_assert (!((stack_align_off_in
+ offset
) & 15));
236 m_regs
[j
].regno
= regno
;
237 m_regs
[j
++].offset
= offset
- STUB_INDEX_OFFSET
;
239 gcc_assert (j
== m_nregs
);
243 xlogue_layout::get_stub_name (enum xlogue_stub stub
,
244 unsigned n_extra_regs
)
246 const int have_avx
= TARGET_AVX
;
247 char *name
= s_stub_names
[!!have_avx
][stub
][n_extra_regs
];
252 int res
= snprintf (name
, STUB_NAME_MAX_LEN
, "__%s_%s_%u",
253 (have_avx
? "avx" : "sse"),
254 STUB_BASE_NAMES
[stub
],
255 MIN_REGS
+ n_extra_regs
);
256 gcc_checking_assert (res
< (int)STUB_NAME_MAX_LEN
);
262 /* Return rtx of a symbol ref for the entry point (based upon
263 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
265 xlogue_layout::get_stub_rtx (enum xlogue_stub stub
)
267 const unsigned n_extra_regs
= cfun
->machine
->call_ms2sysv_extra_regs
;
268 gcc_checking_assert (n_extra_regs
<= MAX_EXTRA_REGS
);
269 gcc_assert (stub
< XLOGUE_STUB_COUNT
);
270 gcc_assert (crtl
->stack_realign_finalized
);
272 return gen_rtx_SYMBOL_REF (Pmode
, get_stub_name (stub
, n_extra_regs
));
275 unsigned scalar_chain::max_id
= 0;
279 /* Initialize new chain. */
281 scalar_chain::scalar_chain (enum machine_mode smode_
, enum machine_mode vmode_
)
289 fprintf (dump_file
, "Created a new instruction chain #%d\n", chain_id
);
291 bitmap_obstack_initialize (NULL
);
292 insns
= BITMAP_ALLOC (NULL
);
293 defs
= BITMAP_ALLOC (NULL
);
294 defs_conv
= BITMAP_ALLOC (NULL
);
298 /* Free chain's data. */
300 scalar_chain::~scalar_chain ()
304 BITMAP_FREE (defs_conv
);
305 bitmap_obstack_release (NULL
);
308 /* Add instruction into chains' queue. */
311 scalar_chain::add_to_queue (unsigned insn_uid
)
313 if (bitmap_bit_p (insns
, insn_uid
)
314 || bitmap_bit_p (queue
, insn_uid
))
318 fprintf (dump_file
, " Adding insn %d into chain's #%d queue\n",
320 bitmap_set_bit (queue
, insn_uid
);
323 general_scalar_chain::general_scalar_chain (enum machine_mode smode_
,
324 enum machine_mode vmode_
)
325 : scalar_chain (smode_
, vmode_
)
327 insns_conv
= BITMAP_ALLOC (NULL
);
328 n_sse_to_integer
= 0;
329 n_integer_to_sse
= 0;
332 general_scalar_chain::~general_scalar_chain ()
334 BITMAP_FREE (insns_conv
);
337 /* For DImode conversion, mark register defined by DEF as requiring
341 general_scalar_chain::mark_dual_mode_def (df_ref def
)
343 gcc_assert (DF_REF_REG_DEF_P (def
));
345 /* Record the def/insn pair so we can later efficiently iterate over
346 the defs to convert on insns not in the chain. */
347 bool reg_new
= bitmap_set_bit (defs_conv
, DF_REF_REGNO (def
));
348 if (!bitmap_bit_p (insns
, DF_REF_INSN_UID (def
)))
350 if (!bitmap_set_bit (insns_conv
, DF_REF_INSN_UID (def
))
364 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
365 DF_REF_REGNO (def
), DF_REF_INSN_UID (def
), chain_id
);
368 /* For TImode conversion, it is unused. */
371 timode_scalar_chain::mark_dual_mode_def (df_ref
)
376 /* Check REF's chain to add new insns into a queue
377 and find registers requiring conversion. */
380 scalar_chain::analyze_register_chain (bitmap candidates
, df_ref ref
)
384 gcc_assert (bitmap_bit_p (insns
, DF_REF_INSN_UID (ref
))
385 || bitmap_bit_p (candidates
, DF_REF_INSN_UID (ref
)));
386 add_to_queue (DF_REF_INSN_UID (ref
));
388 for (chain
= DF_REF_CHAIN (ref
); chain
; chain
= chain
->next
)
390 unsigned uid
= DF_REF_INSN_UID (chain
->ref
);
392 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain
->ref
)))
395 if (!DF_REF_REG_MEM_P (chain
->ref
))
397 if (bitmap_bit_p (insns
, uid
))
400 if (bitmap_bit_p (candidates
, uid
))
407 if (DF_REF_REG_DEF_P (chain
->ref
))
410 fprintf (dump_file
, " r%d def in insn %d isn't convertible\n",
411 DF_REF_REGNO (chain
->ref
), uid
);
412 mark_dual_mode_def (chain
->ref
);
417 fprintf (dump_file
, " r%d use in insn %d isn't convertible\n",
418 DF_REF_REGNO (chain
->ref
), uid
);
419 mark_dual_mode_def (ref
);
424 /* Add instruction into a chain. */
427 scalar_chain::add_insn (bitmap candidates
, unsigned int insn_uid
)
429 if (bitmap_bit_p (insns
, insn_uid
))
433 fprintf (dump_file
, " Adding insn %d to chain #%d\n", insn_uid
, chain_id
);
435 bitmap_set_bit (insns
, insn_uid
);
437 rtx_insn
*insn
= DF_INSN_UID_GET (insn_uid
)->insn
;
438 rtx def_set
= single_set (insn
);
439 if (def_set
&& REG_P (SET_DEST (def_set
))
440 && !HARD_REGISTER_P (SET_DEST (def_set
)))
441 bitmap_set_bit (defs
, REGNO (SET_DEST (def_set
)));
443 /* ??? The following is quadratic since analyze_register_chain
444 iterates over all refs to look for dual-mode regs. Instead this
445 should be done separately for all regs mentioned in the chain once. */
447 for (ref
= DF_INSN_UID_DEFS (insn_uid
); ref
; ref
= DF_REF_NEXT_LOC (ref
))
448 if (!HARD_REGISTER_P (DF_REF_REG (ref
)))
449 analyze_register_chain (candidates
, ref
);
450 for (ref
= DF_INSN_UID_USES (insn_uid
); ref
; ref
= DF_REF_NEXT_LOC (ref
))
451 if (!DF_REF_REG_MEM_P (ref
))
452 analyze_register_chain (candidates
, ref
);
455 /* Build new chain starting from insn INSN_UID recursively
456 adding all dependent uses and definitions. */
459 scalar_chain::build (bitmap candidates
, unsigned insn_uid
)
461 queue
= BITMAP_ALLOC (NULL
);
462 bitmap_set_bit (queue
, insn_uid
);
465 fprintf (dump_file
, "Building chain #%d...\n", chain_id
);
467 while (!bitmap_empty_p (queue
))
469 insn_uid
= bitmap_first_set_bit (queue
);
470 bitmap_clear_bit (queue
, insn_uid
);
471 bitmap_clear_bit (candidates
, insn_uid
);
472 add_insn (candidates
, insn_uid
);
477 fprintf (dump_file
, "Collected chain #%d...\n", chain_id
);
478 fprintf (dump_file
, " insns: ");
479 dump_bitmap (dump_file
, insns
);
480 if (!bitmap_empty_p (defs_conv
))
484 const char *comma
= "";
485 fprintf (dump_file
, " defs to convert: ");
486 EXECUTE_IF_SET_IN_BITMAP (defs_conv
, 0, id
, bi
)
488 fprintf (dump_file
, "%sr%d", comma
, id
);
491 fprintf (dump_file
, "\n");
498 /* Return a cost of building a vector costant
499 instead of using a scalar one. */
502 general_scalar_chain::vector_const_cost (rtx exp
)
504 gcc_assert (CONST_INT_P (exp
));
506 if (standard_sse_constant_p (exp
, vmode
))
507 return ix86_cost
->sse_op
;
508 /* We have separate costs for SImode and DImode, use SImode costs
509 for smaller modes. */
510 return ix86_cost
->sse_load
[smode
== DImode
? 1 : 0];
513 /* Compute a gain for chain conversion. */
516 general_scalar_chain::compute_convert_gain ()
524 fprintf (dump_file
, "Computing gain for chain #%d...\n", chain_id
);
526 /* SSE costs distinguish between SImode and DImode loads/stores, for
527 int costs factor in the number of GPRs involved. When supporting
528 smaller modes than SImode the int load/store costs need to be
530 unsigned sse_cost_idx
= smode
== DImode
? 1 : 0;
531 unsigned m
= smode
== DImode
? (TARGET_64BIT
? 1 : 2) : 1;
533 EXECUTE_IF_SET_IN_BITMAP (insns
, 0, insn_uid
, bi
)
535 rtx_insn
*insn
= DF_INSN_UID_GET (insn_uid
)->insn
;
536 rtx def_set
= single_set (insn
);
537 rtx src
= SET_SRC (def_set
);
538 rtx dst
= SET_DEST (def_set
);
541 if (REG_P (src
) && REG_P (dst
))
542 igain
+= 2 * m
- ix86_cost
->xmm_move
;
543 else if (REG_P (src
) && MEM_P (dst
))
545 += m
* ix86_cost
->int_store
[2] - ix86_cost
->sse_store
[sse_cost_idx
];
546 else if (MEM_P (src
) && REG_P (dst
))
547 igain
+= m
* ix86_cost
->int_load
[2] - ix86_cost
->sse_load
[sse_cost_idx
];
548 else if (GET_CODE (src
) == ASHIFT
549 || GET_CODE (src
) == ASHIFTRT
550 || GET_CODE (src
) == LSHIFTRT
)
554 if (INTVAL (XEXP (src
, 1)) >= 32)
555 igain
+= ix86_cost
->add
;
557 igain
+= ix86_cost
->shift_const
;
560 igain
+= ix86_cost
->shift_const
- ix86_cost
->sse_op
;
562 if (CONST_INT_P (XEXP (src
, 0)))
563 igain
-= vector_const_cost (XEXP (src
, 0));
565 else if (GET_CODE (src
) == PLUS
566 || GET_CODE (src
) == MINUS
567 || GET_CODE (src
) == IOR
568 || GET_CODE (src
) == XOR
569 || GET_CODE (src
) == AND
)
571 igain
+= m
* ix86_cost
->add
- ix86_cost
->sse_op
;
572 /* Additional gain for andnot for targets without BMI. */
573 if (GET_CODE (XEXP (src
, 0)) == NOT
575 igain
+= m
* ix86_cost
->add
;
577 if (CONST_INT_P (XEXP (src
, 0)))
578 igain
-= vector_const_cost (XEXP (src
, 0));
579 if (CONST_INT_P (XEXP (src
, 1)))
580 igain
-= vector_const_cost (XEXP (src
, 1));
582 else if (GET_CODE (src
) == NEG
583 || GET_CODE (src
) == NOT
)
584 igain
+= m
* ix86_cost
->add
- ix86_cost
->sse_op
- COSTS_N_INSNS (1);
585 else if (GET_CODE (src
) == SMAX
586 || GET_CODE (src
) == SMIN
587 || GET_CODE (src
) == UMAX
588 || GET_CODE (src
) == UMIN
)
590 /* We do not have any conditional move cost, estimate it as a
591 reg-reg move. Comparisons are costed as adds. */
592 igain
+= m
* (COSTS_N_INSNS (2) + ix86_cost
->add
);
593 /* Integer SSE ops are all costed the same. */
594 igain
-= ix86_cost
->sse_op
;
596 else if (GET_CODE (src
) == COMPARE
)
598 /* Assume comparison cost is the same. */
600 else if (CONST_INT_P (src
))
603 /* DImode can be immediate for TARGET_64BIT and SImode always. */
604 igain
+= m
* COSTS_N_INSNS (1);
605 else if (MEM_P (dst
))
606 igain
+= (m
* ix86_cost
->int_store
[2]
607 - ix86_cost
->sse_store
[sse_cost_idx
]);
608 igain
-= vector_const_cost (src
);
613 if (igain
!= 0 && dump_file
)
615 fprintf (dump_file
, " Instruction gain %d for ", igain
);
616 dump_insn_slim (dump_file
, insn
);
622 fprintf (dump_file
, " Instruction conversion gain: %d\n", gain
);
624 /* Cost the integer to sse and sse to integer moves. */
625 cost
+= n_sse_to_integer
* ix86_cost
->sse_to_integer
;
626 /* ??? integer_to_sse but we only have that in the RA cost table.
627 Assume sse_to_integer/integer_to_sse are the same which they
628 are at the moment. */
629 cost
+= n_integer_to_sse
* ix86_cost
->sse_to_integer
;
632 fprintf (dump_file
, " Registers conversion cost: %d\n", cost
);
637 fprintf (dump_file
, " Total gain: %d\n", gain
);
642 /* Insert generated conversion instruction sequence INSNS
643 after instruction AFTER. New BB may be required in case
644 instruction has EH region attached. */
647 scalar_chain::emit_conversion_insns (rtx insns
, rtx_insn
*after
)
649 if (!control_flow_insn_p (after
))
651 emit_insn_after (insns
, after
);
655 basic_block bb
= BLOCK_FOR_INSN (after
);
656 edge e
= find_fallthru_edge (bb
->succs
);
659 basic_block new_bb
= split_edge (e
);
660 emit_insn_after (insns
, BB_HEAD (new_bb
));
665 /* Generate the canonical SET_SRC to move GPR to a VMODE vector register,
666 zeroing the upper parts. */
669 gen_gpr_to_xmm_move_src (enum machine_mode vmode
, rtx gpr
)
671 if (!nonimmediate_operand (gpr
, GET_MODE_INNER (vmode
)))
672 gpr
= force_reg (GET_MODE_INNER (vmode
), gpr
);
673 switch (GET_MODE_NUNITS (vmode
))
676 /* We are not using this case currently. */
679 return gen_rtx_VEC_CONCAT (vmode
, gpr
,
680 CONST0_RTX (GET_MODE_INNER (vmode
)));
682 return gen_rtx_VEC_MERGE (vmode
, gen_rtx_VEC_DUPLICATE (vmode
, gpr
),
683 CONST0_RTX (vmode
), GEN_INT (HOST_WIDE_INT_1U
));
687 /* Make vector copies for all register REGNO definitions
688 and replace its uses in a chain. */
691 general_scalar_chain::make_vector_copies (rtx_insn
*insn
, rtx reg
)
693 rtx vreg
= *defs_map
.get (reg
);
696 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
698 rtx tmp
= assign_386_stack_local (smode
, SLOT_STV_TEMP
);
699 if (smode
== DImode
&& !TARGET_64BIT
)
701 emit_move_insn (adjust_address (tmp
, SImode
, 0),
702 gen_rtx_SUBREG (SImode
, reg
, 0));
703 emit_move_insn (adjust_address (tmp
, SImode
, 4),
704 gen_rtx_SUBREG (SImode
, reg
, 4));
707 emit_move_insn (copy_rtx (tmp
), reg
);
708 emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode
, vreg
, 0),
709 gen_gpr_to_xmm_move_src (vmode
, tmp
)));
711 else if (!TARGET_64BIT
&& smode
== DImode
)
715 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
716 CONST0_RTX (V4SImode
),
717 gen_rtx_SUBREG (SImode
, reg
, 0)));
718 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
719 gen_rtx_SUBREG (V4SImode
, vreg
, 0),
720 gen_rtx_SUBREG (SImode
, reg
, 4),
725 rtx tmp
= gen_reg_rtx (DImode
);
726 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
727 CONST0_RTX (V4SImode
),
728 gen_rtx_SUBREG (SImode
, reg
, 0)));
729 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode
, tmp
, 0),
730 CONST0_RTX (V4SImode
),
731 gen_rtx_SUBREG (SImode
, reg
, 4)));
732 emit_insn (gen_vec_interleave_lowv4si
733 (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
734 gen_rtx_SUBREG (V4SImode
, vreg
, 0),
735 gen_rtx_SUBREG (V4SImode
, tmp
, 0)));
739 emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode
, vreg
, 0),
740 gen_gpr_to_xmm_move_src (vmode
, reg
)));
741 rtx_insn
*seq
= get_insns ();
743 emit_conversion_insns (seq
, insn
);
747 " Copied r%d to a vector register r%d for insn %d\n",
748 REGNO (reg
), REGNO (vreg
), INSN_UID (insn
));
751 /* Copy the definition SRC of INSN inside the chain to DST for
752 scalar uses outside of the chain. */
755 general_scalar_chain::convert_reg (rtx_insn
*insn
, rtx dst
, rtx src
)
758 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC
)
760 rtx tmp
= assign_386_stack_local (smode
, SLOT_STV_TEMP
);
761 emit_move_insn (tmp
, src
);
762 if (!TARGET_64BIT
&& smode
== DImode
)
764 emit_move_insn (gen_rtx_SUBREG (SImode
, dst
, 0),
765 adjust_address (tmp
, SImode
, 0));
766 emit_move_insn (gen_rtx_SUBREG (SImode
, dst
, 4),
767 adjust_address (tmp
, SImode
, 4));
770 emit_move_insn (dst
, copy_rtx (tmp
));
772 else if (!TARGET_64BIT
&& smode
== DImode
)
776 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
,
777 gen_rtvec (1, const0_rtx
));
780 (gen_rtx_SUBREG (SImode
, dst
, 0),
781 gen_rtx_VEC_SELECT (SImode
,
782 gen_rtx_SUBREG (V4SImode
, src
, 0),
785 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const1_rtx
));
788 (gen_rtx_SUBREG (SImode
, dst
, 4),
789 gen_rtx_VEC_SELECT (SImode
,
790 gen_rtx_SUBREG (V4SImode
, src
, 0),
795 rtx vcopy
= gen_reg_rtx (V2DImode
);
796 emit_move_insn (vcopy
, gen_rtx_SUBREG (V2DImode
, src
, 0));
797 emit_move_insn (gen_rtx_SUBREG (SImode
, dst
, 0),
798 gen_rtx_SUBREG (SImode
, vcopy
, 0));
799 emit_move_insn (vcopy
,
800 gen_rtx_LSHIFTRT (V2DImode
,
801 vcopy
, GEN_INT (32)));
802 emit_move_insn (gen_rtx_SUBREG (SImode
, dst
, 4),
803 gen_rtx_SUBREG (SImode
, vcopy
, 0));
807 emit_move_insn (dst
, src
);
809 rtx_insn
*seq
= get_insns ();
811 emit_conversion_insns (seq
, insn
);
815 " Copied r%d to a scalar register r%d for insn %d\n",
816 REGNO (src
), REGNO (dst
), INSN_UID (insn
));
819 /* Convert operand OP in INSN. We should handle
820 memory operands and uninitialized registers.
821 All other register uses are converted during
822 registers conversion. */
825 general_scalar_chain::convert_op (rtx
*op
, rtx_insn
*insn
)
827 *op
= copy_rtx_if_shared (*op
);
829 if (GET_CODE (*op
) == NOT
)
831 convert_op (&XEXP (*op
, 0), insn
);
832 PUT_MODE (*op
, vmode
);
834 else if (MEM_P (*op
))
836 rtx tmp
= gen_reg_rtx (GET_MODE (*op
));
838 emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode
, tmp
, 0),
839 gen_gpr_to_xmm_move_src (vmode
, *op
)),
841 *op
= gen_rtx_SUBREG (vmode
, tmp
, 0);
844 fprintf (dump_file
, " Preloading operand for insn %d into r%d\n",
845 INSN_UID (insn
), REGNO (tmp
));
847 else if (REG_P (*op
))
849 *op
= gen_rtx_SUBREG (vmode
, *op
, 0);
851 else if (CONST_INT_P (*op
))
854 rtx tmp
= gen_rtx_SUBREG (vmode
, gen_reg_rtx (smode
), 0);
856 /* Prefer all ones vector in case of -1. */
857 if (constm1_operand (*op
, GET_MODE (*op
)))
858 vec_cst
= CONSTM1_RTX (vmode
);
861 unsigned n
= GET_MODE_NUNITS (vmode
);
862 rtx
*v
= XALLOCAVEC (rtx
, n
);
864 for (unsigned i
= 1; i
< n
; ++i
)
866 vec_cst
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (n
, v
));
869 if (!standard_sse_constant_p (vec_cst
, vmode
))
872 vec_cst
= validize_mem (force_const_mem (vmode
, vec_cst
));
873 rtx_insn
*seq
= get_insns ();
875 emit_insn_before (seq
, insn
);
878 emit_insn_before (gen_move_insn (copy_rtx (tmp
), vec_cst
), insn
);
883 gcc_assert (SUBREG_P (*op
));
884 gcc_assert (GET_MODE (*op
) == vmode
);
888 /* Convert INSN to vector mode. */
891 general_scalar_chain::convert_insn (rtx_insn
*insn
)
893 /* Generate copies for out-of-chain uses of defs and adjust debug uses. */
894 for (df_ref ref
= DF_INSN_DEFS (insn
); ref
; ref
= DF_REF_NEXT_LOC (ref
))
895 if (bitmap_bit_p (defs_conv
, DF_REF_REGNO (ref
)))
898 for (use
= DF_REF_CHAIN (ref
); use
; use
= use
->next
)
899 if (NONDEBUG_INSN_P (DF_REF_INSN (use
->ref
))
900 && (DF_REF_REG_MEM_P (use
->ref
)
901 || !bitmap_bit_p (insns
, DF_REF_INSN_UID (use
->ref
))))
904 convert_reg (insn
, DF_REF_REG (ref
),
905 *defs_map
.get (regno_reg_rtx
[DF_REF_REGNO (ref
)]));
906 else if (MAY_HAVE_DEBUG_BIND_INSNS
)
908 /* If we generated a scalar copy we can leave debug-insns
909 as-is, if not, we have to adjust them. */
910 auto_vec
<rtx_insn
*, 5> to_reset_debug_insns
;
911 for (use
= DF_REF_CHAIN (ref
); use
; use
= use
->next
)
912 if (DEBUG_INSN_P (DF_REF_INSN (use
->ref
)))
914 rtx_insn
*debug_insn
= DF_REF_INSN (use
->ref
);
915 /* If there's a reaching definition outside of the
916 chain we have to reset. */
918 for (def
= DF_REF_CHAIN (use
->ref
); def
; def
= def
->next
)
919 if (!bitmap_bit_p (insns
, DF_REF_INSN_UID (def
->ref
)))
922 to_reset_debug_insns
.safe_push (debug_insn
);
925 *DF_REF_REAL_LOC (use
->ref
)
926 = *defs_map
.get (regno_reg_rtx
[DF_REF_REGNO (ref
)]);
927 df_insn_rescan (debug_insn
);
930 /* Have to do the reset outside of the DF_CHAIN walk to not
932 while (!to_reset_debug_insns
.is_empty ())
934 rtx_insn
*debug_insn
= to_reset_debug_insns
.pop ();
935 INSN_VAR_LOCATION_LOC (debug_insn
) = gen_rtx_UNKNOWN_VAR_LOC ();
936 df_insn_rescan_debug_internal (debug_insn
);
941 /* Replace uses in this insn with the defs we use in the chain. */
942 for (df_ref ref
= DF_INSN_USES (insn
); ref
; ref
= DF_REF_NEXT_LOC (ref
))
943 if (!DF_REF_REG_MEM_P (ref
))
944 if (rtx
*vreg
= defs_map
.get (regno_reg_rtx
[DF_REF_REGNO (ref
)]))
946 /* Also update a corresponding REG_DEAD note. */
947 rtx note
= find_reg_note (insn
, REG_DEAD
, DF_REF_REG (ref
));
949 XEXP (note
, 0) = *vreg
;
950 *DF_REF_REAL_LOC (ref
) = *vreg
;
953 rtx def_set
= single_set (insn
);
954 rtx src
= SET_SRC (def_set
);
955 rtx dst
= SET_DEST (def_set
);
958 if (MEM_P (dst
) && !REG_P (src
))
960 /* There are no scalar integer instructions and therefore
961 temporary register usage is required. */
962 rtx tmp
= gen_reg_rtx (smode
);
963 emit_conversion_insns (gen_move_insn (dst
, tmp
), insn
);
964 dst
= gen_rtx_SUBREG (vmode
, tmp
, 0);
966 else if (REG_P (dst
))
968 /* Replace the definition with a SUBREG to the definition we
969 use inside the chain. */
970 rtx
*vdef
= defs_map
.get (dst
);
973 dst
= gen_rtx_SUBREG (vmode
, dst
, 0);
974 /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST
975 is a non-REG_P. So kill those off. */
976 rtx note
= find_reg_equal_equiv_note (insn
);
978 remove_note (insn
, note
);
981 switch (GET_CODE (src
))
986 convert_op (&XEXP (src
, 0), insn
);
987 PUT_MODE (src
, vmode
);
999 convert_op (&XEXP (src
, 0), insn
);
1000 convert_op (&XEXP (src
, 1), insn
);
1001 PUT_MODE (src
, vmode
);
1005 src
= XEXP (src
, 0);
1006 convert_op (&src
, insn
);
1007 subreg
= gen_reg_rtx (vmode
);
1008 emit_insn_before (gen_move_insn (subreg
, CONST0_RTX (vmode
)), insn
);
1009 src
= gen_rtx_MINUS (vmode
, subreg
, src
);
1013 src
= XEXP (src
, 0);
1014 convert_op (&src
, insn
);
1015 subreg
= gen_reg_rtx (vmode
);
1016 emit_insn_before (gen_move_insn (subreg
, CONSTM1_RTX (vmode
)), insn
);
1017 src
= gen_rtx_XOR (vmode
, src
, subreg
);
1022 convert_op (&src
, insn
);
1027 convert_op (&src
, insn
);
1031 gcc_assert (GET_MODE (src
) == vmode
);
1035 src
= SUBREG_REG (XEXP (XEXP (src
, 0), 0));
1037 gcc_assert (REG_P (src
) && GET_MODE (src
) == DImode
);
1038 subreg
= gen_rtx_SUBREG (V2DImode
, src
, 0);
1039 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg
),
1040 copy_rtx_if_shared (subreg
),
1041 copy_rtx_if_shared (subreg
)),
1043 dst
= gen_rtx_REG (CCmode
, FLAGS_REG
);
1044 src
= gen_rtx_UNSPEC (CCmode
, gen_rtvec (2, copy_rtx_if_shared (subreg
),
1045 copy_rtx_if_shared (subreg
)),
1050 convert_op (&src
, insn
);
1057 SET_SRC (def_set
) = src
;
1058 SET_DEST (def_set
) = dst
;
1060 /* Drop possible dead definitions. */
1061 PATTERN (insn
) = def_set
;
1063 INSN_CODE (insn
) = -1;
1064 int patt
= recog_memoized (insn
);
1066 fatal_insn_not_found (insn
);
1067 df_insn_rescan (insn
);
1070 /* Fix uses of converted REG in debug insns. */
1073 timode_scalar_chain::fix_debug_reg_uses (rtx reg
)
1075 if (!flag_var_tracking
)
1079 for (ref
= DF_REG_USE_CHAIN (REGNO (reg
)); ref
; ref
= next
)
1081 rtx_insn
*insn
= DF_REF_INSN (ref
);
1082 /* Make sure the next ref is for a different instruction,
1083 so that we're not affected by the rescan. */
1084 next
= DF_REF_NEXT_REG (ref
);
1085 while (next
&& DF_REF_INSN (next
) == insn
)
1086 next
= DF_REF_NEXT_REG (next
);
1088 if (DEBUG_INSN_P (insn
))
1090 /* It may be a debug insn with a TImode variable in
1092 bool changed
= false;
1093 for (; ref
!= next
; ref
= DF_REF_NEXT_REG (ref
))
1095 rtx
*loc
= DF_REF_LOC (ref
);
1096 if (REG_P (*loc
) && GET_MODE (*loc
) == V1TImode
)
1098 *loc
= gen_rtx_SUBREG (TImode
, *loc
, 0);
1103 df_insn_rescan (insn
);
1108 /* Convert INSN from TImode to V1T1mode. */
1111 timode_scalar_chain::convert_insn (rtx_insn
*insn
)
1113 rtx def_set
= single_set (insn
);
1114 rtx src
= SET_SRC (def_set
);
1115 rtx dst
= SET_DEST (def_set
);
1117 switch (GET_CODE (dst
))
1121 rtx tmp
= find_reg_equal_equiv_note (insn
);
1123 PUT_MODE (XEXP (tmp
, 0), V1TImode
);
1124 PUT_MODE (dst
, V1TImode
);
1125 fix_debug_reg_uses (dst
);
1129 PUT_MODE (dst
, V1TImode
);
1136 switch (GET_CODE (src
))
1139 PUT_MODE (src
, V1TImode
);
1140 /* Call fix_debug_reg_uses only if SRC is never defined. */
1141 if (!DF_REG_DEF_CHAIN (REGNO (src
)))
1142 fix_debug_reg_uses (src
);
1146 PUT_MODE (src
, V1TImode
);
1149 case CONST_WIDE_INT
:
1150 if (NONDEBUG_INSN_P (insn
))
1152 /* Since there are no instructions to store 128-bit constant,
1153 temporary register usage is required. */
1154 rtx tmp
= gen_reg_rtx (V1TImode
);
1156 src
= gen_rtx_CONST_VECTOR (V1TImode
, gen_rtvec (1, src
));
1157 src
= validize_mem (force_const_mem (V1TImode
, src
));
1158 rtx_insn
*seq
= get_insns ();
1161 emit_insn_before (seq
, insn
);
1162 emit_conversion_insns (gen_rtx_SET (dst
, tmp
), insn
);
1168 switch (standard_sse_constant_p (src
, TImode
))
1171 src
= CONST0_RTX (GET_MODE (dst
));
1174 src
= CONSTM1_RTX (GET_MODE (dst
));
1179 if (NONDEBUG_INSN_P (insn
))
1181 rtx tmp
= gen_reg_rtx (V1TImode
);
1182 /* Since there are no instructions to store standard SSE
1183 constant, temporary register usage is required. */
1184 emit_conversion_insns (gen_rtx_SET (dst
, tmp
), insn
);
1193 SET_SRC (def_set
) = src
;
1194 SET_DEST (def_set
) = dst
;
1196 /* Drop possible dead definitions. */
1197 PATTERN (insn
) = def_set
;
1199 INSN_CODE (insn
) = -1;
1200 recog_memoized (insn
);
1201 df_insn_rescan (insn
);
1204 /* Generate copies from defs used by the chain but not defined therein.
1205 Also populates defs_map which is used later by convert_insn. */
1208 general_scalar_chain::convert_registers ()
1212 EXECUTE_IF_SET_IN_BITMAP (defs_conv
, 0, id
, bi
)
1213 defs_map
.put (regno_reg_rtx
[id
], gen_reg_rtx (smode
));
1214 EXECUTE_IF_SET_IN_BITMAP (insns_conv
, 0, id
, bi
)
1215 for (df_ref ref
= DF_INSN_UID_DEFS (id
); ref
; ref
= DF_REF_NEXT_LOC (ref
))
1216 if (bitmap_bit_p (defs_conv
, DF_REF_REGNO (ref
)))
1217 make_vector_copies (DF_REF_INSN (ref
), DF_REF_REAL_REG (ref
));
1220 /* Convert whole chain creating required register
1221 conversions and copies. */
1224 scalar_chain::convert ()
1228 int converted_insns
= 0;
1230 if (!dbg_cnt (stv_conversion
))
1234 fprintf (dump_file
, "Converting chain #%d...\n", chain_id
);
1236 convert_registers ();
1238 EXECUTE_IF_SET_IN_BITMAP (insns
, 0, id
, bi
)
1240 convert_insn (DF_INSN_UID_GET (id
)->insn
);
1244 return converted_insns
;
1247 /* Return 1 if INSN uses or defines a hard register.
1248 Hard register uses in a memory address are ignored.
1249 Clobbers and flags definitions are ignored. */
1252 has_non_address_hard_reg (rtx_insn
*insn
)
1255 FOR_EACH_INSN_DEF (ref
, insn
)
1256 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref
))
1257 && !DF_REF_FLAGS_IS_SET (ref
, DF_REF_MUST_CLOBBER
)
1258 && DF_REF_REGNO (ref
) != FLAGS_REG
)
1261 FOR_EACH_INSN_USE (ref
, insn
)
1262 if (!DF_REF_REG_MEM_P (ref
) && HARD_REGISTER_P (DF_REF_REAL_REG (ref
)))
1268 /* Check if comparison INSN may be transformed
1269 into vector comparison. Currently we transform
1270 zero checks only which look like:
1272 (set (reg:CCZ 17 flags)
1273 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
1274 (subreg:SI (reg:DI x) 0))
1275 (const_int 0 [0]))) */
1278 convertible_comparison_p (rtx_insn
*insn
, enum machine_mode mode
)
1280 /* ??? Currently convertible for double-word DImode chain only. */
1281 if (TARGET_64BIT
|| mode
!= DImode
)
1287 rtx def_set
= single_set (insn
);
1289 gcc_assert (def_set
);
1291 rtx src
= SET_SRC (def_set
);
1292 rtx dst
= SET_DEST (def_set
);
1294 gcc_assert (GET_CODE (src
) == COMPARE
);
1296 if (GET_CODE (dst
) != REG
1297 || REGNO (dst
) != FLAGS_REG
1298 || GET_MODE (dst
) != CCZmode
)
1301 rtx op1
= XEXP (src
, 0);
1302 rtx op2
= XEXP (src
, 1);
1304 if (op2
!= CONST0_RTX (GET_MODE (op2
)))
1307 if (GET_CODE (op1
) != IOR
)
1310 op2
= XEXP (op1
, 1);
1311 op1
= XEXP (op1
, 0);
1315 || GET_MODE (op1
) != SImode
1316 || GET_MODE (op2
) != SImode
1317 || ((SUBREG_BYTE (op1
) != 0
1318 || SUBREG_BYTE (op2
) != GET_MODE_SIZE (SImode
))
1319 && (SUBREG_BYTE (op2
) != 0
1320 || SUBREG_BYTE (op1
) != GET_MODE_SIZE (SImode
))))
1323 op1
= SUBREG_REG (op1
);
1324 op2
= SUBREG_REG (op2
);
1328 || GET_MODE (op1
) != DImode
)
1334 /* The general version of scalar_to_vector_candidate_p. */
1337 general_scalar_to_vector_candidate_p (rtx_insn
*insn
, enum machine_mode mode
)
1339 rtx def_set
= single_set (insn
);
1344 if (has_non_address_hard_reg (insn
))
1347 rtx src
= SET_SRC (def_set
);
1348 rtx dst
= SET_DEST (def_set
);
1350 if (GET_CODE (src
) == COMPARE
)
1351 return convertible_comparison_p (insn
, mode
);
1353 /* We are interested in "mode" only. */
1354 if ((GET_MODE (src
) != mode
1355 && !CONST_INT_P (src
))
1356 || GET_MODE (dst
) != mode
)
1359 if (!REG_P (dst
) && !MEM_P (dst
))
1362 switch (GET_CODE (src
))
1365 if (!TARGET_AVX512VL
)
1371 if (!CONST_INT_P (XEXP (src
, 1))
1372 || !IN_RANGE (INTVAL (XEXP (src
, 1)), 0, GET_MODE_BITSIZE (mode
)-1))
1380 if ((mode
== DImode
&& !TARGET_AVX512VL
)
1381 || (mode
== SImode
&& !TARGET_SSE4_1
))
1390 if (!REG_P (XEXP (src
, 1))
1391 && !MEM_P (XEXP (src
, 1))
1392 && !CONST_INT_P (XEXP (src
, 1)))
1395 if (GET_MODE (XEXP (src
, 1)) != mode
1396 && !CONST_INT_P (XEXP (src
, 1)))
1415 if (!REG_P (XEXP (src
, 0))
1416 && !MEM_P (XEXP (src
, 0))
1417 && !CONST_INT_P (XEXP (src
, 0))
1418 /* Check for andnot case. */
1419 && (GET_CODE (src
) != AND
1420 || GET_CODE (XEXP (src
, 0)) != NOT
1421 || !REG_P (XEXP (XEXP (src
, 0), 0))))
1424 if (GET_MODE (XEXP (src
, 0)) != mode
1425 && !CONST_INT_P (XEXP (src
, 0)))
1431 /* The TImode version of scalar_to_vector_candidate_p. */
1434 timode_scalar_to_vector_candidate_p (rtx_insn
*insn
)
1436 rtx def_set
= single_set (insn
);
1441 if (has_non_address_hard_reg (insn
))
1444 rtx src
= SET_SRC (def_set
);
1445 rtx dst
= SET_DEST (def_set
);
1447 /* Only TImode load and store are allowed. */
1448 if (GET_MODE (dst
) != TImode
)
1453 /* Check for store. Memory must be aligned or unaligned store
1454 is optimal. Only support store from register, standard SSE
1455 constant or CONST_WIDE_INT generated from piecewise store.
1457 ??? Verify performance impact before enabling CONST_INT for
1459 if (misaligned_operand (dst
, TImode
)
1460 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
1463 switch (GET_CODE (src
))
1469 case CONST_WIDE_INT
:
1473 return standard_sse_constant_p (src
, TImode
);
1476 else if (MEM_P (src
))
1478 /* Check for load. Memory must be aligned or unaligned load is
1481 && (!misaligned_operand (src
, TImode
)
1482 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
));
1488 /* For a register REGNO, scan instructions for its defs and uses.
1489 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1492 timode_check_non_convertible_regs (bitmap candidates
, bitmap regs
,
1495 for (df_ref def
= DF_REG_DEF_CHAIN (regno
);
1497 def
= DF_REF_NEXT_REG (def
))
1499 if (!bitmap_bit_p (candidates
, DF_REF_INSN_UID (def
)))
1503 "r%d has non convertible def in insn %d\n",
1504 regno
, DF_REF_INSN_UID (def
));
1506 bitmap_set_bit (regs
, regno
);
1511 for (df_ref ref
= DF_REG_USE_CHAIN (regno
);
1513 ref
= DF_REF_NEXT_REG (ref
))
1515 /* Debug instructions are skipped. */
1516 if (NONDEBUG_INSN_P (DF_REF_INSN (ref
))
1517 && !bitmap_bit_p (candidates
, DF_REF_INSN_UID (ref
)))
1521 "r%d has non convertible use in insn %d\n",
1522 regno
, DF_REF_INSN_UID (ref
));
1524 bitmap_set_bit (regs
, regno
);
1530 /* The TImode version of remove_non_convertible_regs. */
1533 timode_remove_non_convertible_regs (bitmap candidates
)
1537 bitmap regs
= BITMAP_ALLOC (NULL
);
1539 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, id
, bi
)
1541 rtx def_set
= single_set (DF_INSN_UID_GET (id
)->insn
);
1542 rtx dest
= SET_DEST (def_set
);
1543 rtx src
= SET_SRC (def_set
);
1546 || bitmap_bit_p (regs
, REGNO (dest
))
1547 || HARD_REGISTER_P (dest
))
1549 || bitmap_bit_p (regs
, REGNO (src
))
1550 || HARD_REGISTER_P (src
)))
1554 timode_check_non_convertible_regs (candidates
, regs
,
1558 timode_check_non_convertible_regs (candidates
, regs
,
1562 EXECUTE_IF_SET_IN_BITMAP (regs
, 0, id
, bi
)
1564 for (df_ref def
= DF_REG_DEF_CHAIN (id
);
1566 def
= DF_REF_NEXT_REG (def
))
1567 if (bitmap_bit_p (candidates
, DF_REF_INSN_UID (def
)))
1570 fprintf (dump_file
, "Removing insn %d from candidates list\n",
1571 DF_REF_INSN_UID (def
));
1573 bitmap_clear_bit (candidates
, DF_REF_INSN_UID (def
));
1576 for (df_ref ref
= DF_REG_USE_CHAIN (id
);
1578 ref
= DF_REF_NEXT_REG (ref
))
1579 if (bitmap_bit_p (candidates
, DF_REF_INSN_UID (ref
)))
1582 fprintf (dump_file
, "Removing insn %d from candidates list\n",
1583 DF_REF_INSN_UID (ref
));
1585 bitmap_clear_bit (candidates
, DF_REF_INSN_UID (ref
));
1592 /* Main STV pass function. Find and convert scalar
1593 instructions into vector mode when profitable. */
1596 convert_scalars_to_vector (bool timode_p
)
1599 int converted_insns
= 0;
1601 bitmap_obstack_initialize (NULL
);
1602 const machine_mode cand_mode
[3] = { SImode
, DImode
, TImode
};
1603 const machine_mode cand_vmode
[3] = { V4SImode
, V2DImode
, V1TImode
};
1604 bitmap_head candidates
[3]; /* { SImode, DImode, TImode } */
1605 for (unsigned i
= 0; i
< 3; ++i
)
1606 bitmap_initialize (&candidates
[i
], &bitmap_default_obstack
);
1608 calculate_dominance_info (CDI_DOMINATORS
);
1609 df_set_flags (DF_DEFER_INSN_RESCAN
);
1610 df_chain_add_problem (DF_DU_CHAIN
| DF_UD_CHAIN
);
1613 /* Find all instructions we want to convert into vector mode. */
1615 fprintf (dump_file
, "Searching for mode conversion candidates...\n");
1617 FOR_EACH_BB_FN (bb
, cfun
)
1620 FOR_BB_INSNS (bb
, insn
)
1622 && timode_scalar_to_vector_candidate_p (insn
))
1625 fprintf (dump_file
, " insn %d is marked as a TImode candidate\n",
1628 bitmap_set_bit (&candidates
[2], INSN_UID (insn
));
1632 /* Check {SI,DI}mode. */
1633 for (unsigned i
= 0; i
<= 1; ++i
)
1634 if (general_scalar_to_vector_candidate_p (insn
, cand_mode
[i
]))
1637 fprintf (dump_file
, " insn %d is marked as a %s candidate\n",
1638 INSN_UID (insn
), i
== 0 ? "SImode" : "DImode");
1640 bitmap_set_bit (&candidates
[i
], INSN_UID (insn
));
1647 timode_remove_non_convertible_regs (&candidates
[2]);
1649 for (unsigned i
= 0; i
<= 2; ++i
)
1650 if (!bitmap_empty_p (&candidates
[i
]))
1652 else if (i
== 2 && dump_file
)
1653 fprintf (dump_file
, "There are no candidates for optimization.\n");
1655 for (unsigned i
= 0; i
<= 2; ++i
)
1656 while (!bitmap_empty_p (&candidates
[i
]))
1658 unsigned uid
= bitmap_first_set_bit (&candidates
[i
]);
1659 scalar_chain
*chain
;
1661 if (cand_mode
[i
] == TImode
)
1662 chain
= new timode_scalar_chain
;
1664 chain
= new general_scalar_chain (cand_mode
[i
], cand_vmode
[i
]);
1666 /* Find instructions chain we want to convert to vector mode.
1667 Check all uses and definitions to estimate all required
1669 chain
->build (&candidates
[i
], uid
);
1671 if (chain
->compute_convert_gain () > 0)
1672 converted_insns
+= chain
->convert ();
1675 fprintf (dump_file
, "Chain #%d conversion is not profitable\n",
1682 fprintf (dump_file
, "Total insns converted: %d\n", converted_insns
);
1684 for (unsigned i
= 0; i
<= 2; ++i
)
1685 bitmap_release (&candidates
[i
]);
1686 bitmap_obstack_release (NULL
);
1687 df_process_deferred_rescans ();
1689 /* Conversion means we may have 128bit register spills/fills
1690 which require aligned stack. */
1691 if (converted_insns
)
1693 if (crtl
->stack_alignment_needed
< 128)
1694 crtl
->stack_alignment_needed
= 128;
1695 if (crtl
->stack_alignment_estimated
< 128)
1696 crtl
->stack_alignment_estimated
= 128;
1698 crtl
->stack_realign_needed
1699 = INCOMING_STACK_BOUNDARY
< crtl
->stack_alignment_estimated
;
1700 crtl
->stack_realign_tried
= crtl
->stack_realign_needed
;
1702 crtl
->stack_realign_processed
= true;
1704 if (!crtl
->drap_reg
)
1706 rtx drap_rtx
= targetm
.calls
.get_drap_rtx ();
1708 /* stack_realign_drap and drap_rtx must match. */
1709 gcc_assert ((stack_realign_drap
!= 0) == (drap_rtx
!= NULL
));
1711 /* Do nothing if NULL is returned,
1712 which means DRAP is not needed. */
1713 if (drap_rtx
!= NULL
)
1715 crtl
->args
.internal_arg_pointer
= drap_rtx
;
1717 /* Call fixup_tail_calls to clean up
1718 REG_EQUIV note if DRAP is needed. */
1719 fixup_tail_calls ();
1723 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
1725 for (tree parm
= DECL_ARGUMENTS (current_function_decl
);
1726 parm
; parm
= DECL_CHAIN (parm
))
1728 if (TYPE_MODE (TREE_TYPE (parm
)) != TImode
)
1730 if (DECL_RTL_SET_P (parm
)
1731 && GET_MODE (DECL_RTL (parm
)) == V1TImode
)
1733 rtx r
= DECL_RTL (parm
);
1735 SET_DECL_RTL (parm
, gen_rtx_SUBREG (TImode
, r
, 0));
1737 if (DECL_INCOMING_RTL (parm
)
1738 && GET_MODE (DECL_INCOMING_RTL (parm
)) == V1TImode
)
1740 rtx r
= DECL_INCOMING_RTL (parm
);
1742 DECL_INCOMING_RTL (parm
) = gen_rtx_SUBREG (TImode
, r
, 0);
1751 rest_of_handle_insert_vzeroupper (void)
1755 /* vzeroupper instructions are inserted immediately after reload to
1756 account for possible spills from 256bit or 512bit registers. The pass
1757 reuses mode switching infrastructure by re-running mode insertion
1758 pass, so disable entities that have already been processed. */
1759 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
1760 ix86_optimize_mode_switching
[i
] = 0;
1762 ix86_optimize_mode_switching
[AVX_U128
] = 1;
1764 /* Call optimize_mode_switching. */
1765 g
->get_passes ()->execute_pass_mode_switching ();
1771 const pass_data pass_data_insert_vzeroupper
=
1773 RTL_PASS
, /* type */
1774 "vzeroupper", /* name */
1775 OPTGROUP_NONE
, /* optinfo_flags */
1776 TV_MACH_DEP
, /* tv_id */
1777 0, /* properties_required */
1778 0, /* properties_provided */
1779 0, /* properties_destroyed */
1780 0, /* todo_flags_start */
1781 TODO_df_finish
, /* todo_flags_finish */
1784 class pass_insert_vzeroupper
: public rtl_opt_pass
1787 pass_insert_vzeroupper(gcc::context
*ctxt
)
1788 : rtl_opt_pass(pass_data_insert_vzeroupper
, ctxt
)
1791 /* opt_pass methods: */
1792 virtual bool gate (function
*)
1795 && TARGET_VZEROUPPER
&& flag_expensive_optimizations
1799 virtual unsigned int execute (function
*)
1801 return rest_of_handle_insert_vzeroupper ();
1804 }; // class pass_insert_vzeroupper
1806 const pass_data pass_data_stv
=
1808 RTL_PASS
, /* type */
1810 OPTGROUP_NONE
, /* optinfo_flags */
1811 TV_MACH_DEP
, /* tv_id */
1812 0, /* properties_required */
1813 0, /* properties_provided */
1814 0, /* properties_destroyed */
1815 0, /* todo_flags_start */
1816 TODO_df_finish
, /* todo_flags_finish */
1819 class pass_stv
: public rtl_opt_pass
1822 pass_stv (gcc::context
*ctxt
)
1823 : rtl_opt_pass (pass_data_stv
, ctxt
),
1827 /* opt_pass methods: */
1828 virtual bool gate (function
*)
1830 return ((!timode_p
|| TARGET_64BIT
)
1831 && TARGET_STV
&& TARGET_SSE2
&& optimize
> 1);
1834 virtual unsigned int execute (function
*)
1836 return convert_scalars_to_vector (timode_p
);
1841 return new pass_stv (m_ctxt
);
1844 void set_pass_param (unsigned int n
, bool param
)
1846 gcc_assert (n
== 0);
1852 }; // class pass_stv
1857 make_pass_insert_vzeroupper (gcc::context
*ctxt
)
1859 return new pass_insert_vzeroupper (ctxt
);
1863 make_pass_stv (gcc::context
*ctxt
)
1865 return new pass_stv (ctxt
);
1868 /* Inserting ENDBRANCH instructions. */
1871 rest_of_insert_endbranch (void)
1873 timevar_push (TV_MACH_DEP
);
1879 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is
1880 absent among function attributes. Later an optimization will be
1881 introduced to make analysis if an address of a static function is
1882 taken. A static function whose address is not taken will get a
1883 nocf_check attribute. This will allow to reduce the number of EB. */
1885 if (!lookup_attribute ("nocf_check",
1886 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
1887 && (!flag_manual_endbr
1888 || lookup_attribute ("cf_check",
1889 DECL_ATTRIBUTES (cfun
->decl
)))
1890 && !cgraph_node::get (cfun
->decl
)->only_called_directly_p ())
1892 /* Queue ENDBR insertion to x86_function_profiler. */
1893 if (crtl
->profile
&& flag_fentry
)
1894 cfun
->machine
->endbr_queued_at_entrance
= true;
1897 cet_eb
= gen_nop_endbr ();
1899 bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
1900 insn
= BB_HEAD (bb
);
1901 emit_insn_before (cet_eb
, insn
);
1906 FOR_EACH_BB_FN (bb
, cfun
)
1908 for (insn
= BB_HEAD (bb
); insn
!= NEXT_INSN (BB_END (bb
));
1909 insn
= NEXT_INSN (insn
))
1914 need_endbr
= find_reg_note (insn
, REG_SETJMP
, NULL
) != NULL
;
1915 if (!need_endbr
&& !SIBLING_CALL_P (insn
))
1917 rtx call
= get_call_rtx_from (insn
);
1918 rtx fnaddr
= XEXP (call
, 0);
1919 tree fndecl
= NULL_TREE
;
1921 /* Also generate ENDBRANCH for non-tail call which
1922 may return via indirect branch. */
1923 if (GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
1924 fndecl
= SYMBOL_REF_DECL (XEXP (fnaddr
, 0));
1925 if (fndecl
== NULL_TREE
)
1926 fndecl
= MEM_EXPR (fnaddr
);
1928 && TREE_CODE (TREE_TYPE (fndecl
)) != FUNCTION_TYPE
1929 && TREE_CODE (TREE_TYPE (fndecl
)) != METHOD_TYPE
)
1931 if (fndecl
&& TYPE_ARG_TYPES (TREE_TYPE (fndecl
)))
1933 tree fntype
= TREE_TYPE (fndecl
);
1934 if (lookup_attribute ("indirect_return",
1935 TYPE_ATTRIBUTES (fntype
)))
1941 /* Generate ENDBRANCH after CALL, which can return more than
1942 twice, setjmp-like functions. */
1944 cet_eb
= gen_nop_endbr ();
1945 emit_insn_after_setloc (cet_eb
, insn
, INSN_LOCATION (insn
));
1949 if (JUMP_P (insn
) && flag_cet_switch
)
1951 rtx target
= JUMP_LABEL (insn
);
1952 if (target
== NULL_RTX
|| ANY_RETURN_P (target
))
1955 /* Check the jump is a switch table. */
1956 rtx_insn
*label
= as_a
<rtx_insn
*> (target
);
1957 rtx_insn
*table
= next_insn (label
);
1958 if (table
== NULL_RTX
|| !JUMP_TABLE_DATA_P (table
))
1961 /* For the indirect jump find out all places it jumps and insert
1962 ENDBRANCH there. It should be done under a special flag to
1963 control ENDBRANCH generation for switch stmts. */
1966 basic_block dest_blk
;
1968 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1973 insn
= BB_HEAD (dest_blk
);
1974 gcc_assert (LABEL_P (insn
));
1975 cet_eb
= gen_nop_endbr ();
1976 emit_insn_after (cet_eb
, insn
);
1981 if (LABEL_P (insn
) && LABEL_PRESERVE_P (insn
))
1983 cet_eb
= gen_nop_endbr ();
1984 emit_insn_after (cet_eb
, insn
);
1990 timevar_pop (TV_MACH_DEP
);
1996 const pass_data pass_data_insert_endbranch
=
1998 RTL_PASS
, /* type. */
2000 OPTGROUP_NONE
, /* optinfo_flags. */
2001 TV_MACH_DEP
, /* tv_id. */
2002 0, /* properties_required. */
2003 0, /* properties_provided. */
2004 0, /* properties_destroyed. */
2005 0, /* todo_flags_start. */
2006 0, /* todo_flags_finish. */
2009 class pass_insert_endbranch
: public rtl_opt_pass
2012 pass_insert_endbranch (gcc::context
*ctxt
)
2013 : rtl_opt_pass (pass_data_insert_endbranch
, ctxt
)
2016 /* opt_pass methods: */
2017 virtual bool gate (function
*)
2019 return ((flag_cf_protection
& CF_BRANCH
));
2022 virtual unsigned int execute (function
*)
2024 return rest_of_insert_endbranch ();
2027 }; // class pass_insert_endbranch
2032 make_pass_insert_endbranch (gcc::context
*ctxt
)
2034 return new pass_insert_endbranch (ctxt
);
2037 /* At entry of the nearest common dominator for basic blocks with
2038 conversions, generate a single
2039 vxorps %xmmN, %xmmN, %xmmN
2041 vcvtss2sd op, %xmmN, %xmmX
2042 vcvtsd2ss op, %xmmN, %xmmX
2043 vcvtsi2ss op, %xmmN, %xmmX
2044 vcvtsi2sd op, %xmmN, %xmmX
2046 NB: We want to generate only a single vxorps to cover the whole
2047 function. The LCM algorithm isn't appropriate here since it may
2048 place a vxorps inside the loop. */
2051 remove_partial_avx_dependency (void)
2053 timevar_push (TV_MACH_DEP
);
2055 bitmap_obstack_initialize (NULL
);
2056 bitmap convert_bbs
= BITMAP_ALLOC (NULL
);
2059 rtx_insn
*insn
, *set_insn
;
2061 rtx v4sf_const0
= NULL_RTX
;
2063 auto_vec
<rtx_insn
*> control_flow_insns
;
2065 FOR_EACH_BB_FN (bb
, cfun
)
2067 FOR_BB_INSNS (bb
, insn
)
2069 if (!NONDEBUG_INSN_P (insn
))
2072 set
= single_set (insn
);
2076 if (get_attr_avx_partial_xmm_update (insn
)
2077 != AVX_PARTIAL_XMM_UPDATE_TRUE
)
2082 calculate_dominance_info (CDI_DOMINATORS
);
2083 df_set_flags (DF_DEFER_INSN_RESCAN
);
2084 df_chain_add_problem (DF_DU_CHAIN
| DF_UD_CHAIN
);
2085 df_md_add_problem ();
2087 v4sf_const0
= gen_reg_rtx (V4SFmode
);
2090 /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
2091 SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and
2092 vec_merge with subreg. */
2093 rtx src
= SET_SRC (set
);
2094 rtx dest
= SET_DEST (set
);
2095 machine_mode dest_mode
= GET_MODE (dest
);
2098 machine_mode dest_vecmode
;
2099 if (dest_mode
== E_SFmode
)
2101 dest_vecmode
= V4SFmode
;
2106 dest_vecmode
= V2DFmode
;
2107 zero
= gen_rtx_SUBREG (V2DFmode
, v4sf_const0
, 0);
2110 /* Change source to vector mode. */
2111 src
= gen_rtx_VEC_DUPLICATE (dest_vecmode
, src
);
2112 src
= gen_rtx_VEC_MERGE (dest_vecmode
, src
, zero
,
2113 GEN_INT (HOST_WIDE_INT_1U
));
2114 /* Change destination to vector mode. */
2115 rtx vec
= gen_reg_rtx (dest_vecmode
);
2116 /* Generate an XMM vector SET. */
2117 set
= gen_rtx_SET (vec
, src
);
2118 set_insn
= emit_insn_before (set
, insn
);
2119 df_insn_rescan (set_insn
);
2121 if (cfun
->can_throw_non_call_exceptions
)
2123 /* Handle REG_EH_REGION note. */
2124 rtx note
= find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
);
2127 control_flow_insns
.safe_push (set_insn
);
2128 add_reg_note (set_insn
, REG_EH_REGION
, XEXP (note
, 0));
2132 src
= gen_rtx_SUBREG (dest_mode
, vec
, 0);
2133 set
= gen_rtx_SET (dest
, src
);
2135 /* Drop possible dead definitions. */
2136 PATTERN (insn
) = set
;
2138 INSN_CODE (insn
) = -1;
2139 recog_memoized (insn
);
2140 df_insn_rescan (insn
);
2141 bitmap_set_bit (convert_bbs
, bb
->index
);
2147 /* (Re-)discover loops so that bb->loop_father can be used in the
2149 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
2151 /* Generate a vxorps at entry of the nearest dominator for basic
2152 blocks with conversions, which is in the the fake loop that
2153 contains the whole function, so that there is only a single
2154 vxorps in the whole function. */
2155 bb
= nearest_common_dominator_for_set (CDI_DOMINATORS
,
2157 while (bb
->loop_father
->latch
2158 != EXIT_BLOCK_PTR_FOR_FN (cfun
))
2159 bb
= get_immediate_dominator (CDI_DOMINATORS
,
2160 bb
->loop_father
->header
);
2162 set
= gen_rtx_SET (v4sf_const0
, CONST0_RTX (V4SFmode
));
2164 insn
= BB_HEAD (bb
);
2165 while (insn
&& !NONDEBUG_INSN_P (insn
))
2167 if (insn
== BB_END (bb
))
2172 insn
= NEXT_INSN (insn
);
2174 if (insn
== BB_HEAD (bb
))
2175 set_insn
= emit_insn_before (set
, insn
);
2177 set_insn
= emit_insn_after (set
,
2178 insn
? PREV_INSN (insn
) : BB_END (bb
));
2179 df_insn_rescan (set_insn
);
2180 df_process_deferred_rescans ();
2181 loop_optimizer_finalize ();
2183 if (!control_flow_insns
.is_empty ())
2185 free_dominance_info (CDI_DOMINATORS
);
2188 FOR_EACH_VEC_ELT (control_flow_insns
, i
, insn
)
2189 if (control_flow_insn_p (insn
))
2191 /* Split the block after insn. There will be a fallthru
2192 edge, which is OK so we keep it. We have to create
2193 the exception edges ourselves. */
2194 bb
= BLOCK_FOR_INSN (insn
);
2195 split_block (bb
, insn
);
2196 rtl_make_eh_edge (NULL
, bb
, BB_END (bb
));
2201 bitmap_obstack_release (NULL
);
2202 BITMAP_FREE (convert_bbs
);
2204 timevar_pop (TV_MACH_DEP
);
2210 const pass_data pass_data_remove_partial_avx_dependency
=
2212 RTL_PASS
, /* type */
2214 OPTGROUP_NONE
, /* optinfo_flags */
2215 TV_MACH_DEP
, /* tv_id */
2216 0, /* properties_required */
2217 0, /* properties_provided */
2218 0, /* properties_destroyed */
2219 0, /* todo_flags_start */
2220 TODO_df_finish
, /* todo_flags_finish */
2223 class pass_remove_partial_avx_dependency
: public rtl_opt_pass
2226 pass_remove_partial_avx_dependency (gcc::context
*ctxt
)
2227 : rtl_opt_pass (pass_data_remove_partial_avx_dependency
, ctxt
)
2230 /* opt_pass methods: */
2231 virtual bool gate (function
*)
2234 && TARGET_SSE_PARTIAL_REG_DEPENDENCY
2237 && optimize_function_for_speed_p (cfun
));
2240 virtual unsigned int execute (function
*)
2242 return remove_partial_avx_dependency ();
2244 }; // class pass_rpad
2249 make_pass_remove_partial_avx_dependency (gcc::context
*ctxt
)
2251 return new pass_remove_partial_avx_dependency (ctxt
);
2254 /* This compares the priority of target features in function DECL1
2255 and DECL2. It returns positive value if DECL1 is higher priority,
2256 negative value if DECL2 is higher priority and 0 if they are the
2260 ix86_compare_version_priority (tree decl1
, tree decl2
)
2262 unsigned int priority1
= get_builtin_code_for_version (decl1
, NULL
);
2263 unsigned int priority2
= get_builtin_code_for_version (decl2
, NULL
);
2265 return (int)priority1
- (int)priority2
;
2268 /* V1 and V2 point to function versions with different priorities
2269 based on the target ISA. This function compares their priorities. */
2272 feature_compare (const void *v1
, const void *v2
)
2274 typedef struct _function_version_info
2277 tree predicate_chain
;
2278 unsigned int dispatch_priority
;
2279 } function_version_info
;
2281 const function_version_info c1
= *(const function_version_info
*)v1
;
2282 const function_version_info c2
= *(const function_version_info
*)v2
;
2283 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
2286 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
2287 to return a pointer to VERSION_DECL if the outcome of the expression
2288 formed by PREDICATE_CHAIN is true. This function will be called during
2289 version dispatch to decide which function version to execute. It returns
2290 the basic block at the end, to which more conditions can be added. */
2293 add_condition_to_bb (tree function_decl
, tree version_decl
,
2294 tree predicate_chain
, basic_block new_bb
)
2296 gimple
*return_stmt
;
2297 tree convert_expr
, result_var
;
2298 gimple
*convert_stmt
;
2299 gimple
*call_cond_stmt
;
2300 gimple
*if_else_stmt
;
2302 basic_block bb1
, bb2
, bb3
;
2305 tree cond_var
, and_expr_var
= NULL_TREE
;
2308 tree predicate_decl
, predicate_arg
;
2310 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
2312 gcc_assert (new_bb
!= NULL
);
2313 gseq
= bb_seq (new_bb
);
2316 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
2317 build_fold_addr_expr (version_decl
));
2318 result_var
= create_tmp_var (ptr_type_node
);
2319 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
2320 return_stmt
= gimple_build_return (result_var
);
2322 if (predicate_chain
== NULL_TREE
)
2324 gimple_seq_add_stmt (&gseq
, convert_stmt
);
2325 gimple_seq_add_stmt (&gseq
, return_stmt
);
2326 set_bb_seq (new_bb
, gseq
);
2327 gimple_set_bb (convert_stmt
, new_bb
);
2328 gimple_set_bb (return_stmt
, new_bb
);
2333 while (predicate_chain
!= NULL
)
2335 cond_var
= create_tmp_var (integer_type_node
);
2336 predicate_decl
= TREE_PURPOSE (predicate_chain
);
2337 predicate_arg
= TREE_VALUE (predicate_chain
);
2338 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
2339 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
2341 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
2342 gimple_set_bb (call_cond_stmt
, new_bb
);
2343 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
2345 predicate_chain
= TREE_CHAIN (predicate_chain
);
2347 if (and_expr_var
== NULL
)
2348 and_expr_var
= cond_var
;
2351 gimple
*assign_stmt
;
2352 /* Use MIN_EXPR to check if any integer is zero?.
2353 and_expr_var = min_expr <cond_var, and_expr_var> */
2354 assign_stmt
= gimple_build_assign (and_expr_var
,
2355 build2 (MIN_EXPR
, integer_type_node
,
2356 cond_var
, and_expr_var
));
2358 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
2359 gimple_set_bb (assign_stmt
, new_bb
);
2360 gimple_seq_add_stmt (&gseq
, assign_stmt
);
2364 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
2366 NULL_TREE
, NULL_TREE
);
2367 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
2368 gimple_set_bb (if_else_stmt
, new_bb
);
2369 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
2371 gimple_seq_add_stmt (&gseq
, convert_stmt
);
2372 gimple_seq_add_stmt (&gseq
, return_stmt
);
2373 set_bb_seq (new_bb
, gseq
);
2376 e12
= split_block (bb1
, if_else_stmt
);
2378 e12
->flags
&= ~EDGE_FALLTHRU
;
2379 e12
->flags
|= EDGE_TRUE_VALUE
;
2381 e23
= split_block (bb2
, return_stmt
);
2383 gimple_set_bb (convert_stmt
, bb2
);
2384 gimple_set_bb (return_stmt
, bb2
);
2387 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
2390 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
2397 /* This function generates the dispatch function for
2398 multi-versioned functions. DISPATCH_DECL is the function which will
2399 contain the dispatch logic. FNDECLS are the function choices for
2400 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
2401 in DISPATCH_DECL in which the dispatch code is generated. */
2404 dispatch_function_versions (tree dispatch_decl
,
2406 basic_block
*empty_bb
)
2409 gimple
*ifunc_cpu_init_stmt
;
2414 unsigned int num_versions
= 0;
2415 unsigned int actual_versions
= 0;
2418 struct _function_version_info
2421 tree predicate_chain
;
2422 unsigned int dispatch_priority
;
2423 }*function_version_info
;
2425 gcc_assert (dispatch_decl
!= NULL
2426 && fndecls_p
!= NULL
2427 && empty_bb
!= NULL
);
2429 /*fndecls_p is actually a vector. */
2430 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
2432 /* At least one more version other than the default. */
2433 num_versions
= fndecls
->length ();
2434 gcc_assert (num_versions
>= 2);
2436 function_version_info
= (struct _function_version_info
*)
2437 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
2439 /* The first version in the vector is the default decl. */
2440 default_decl
= (*fndecls
)[0];
2442 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
2444 gseq
= bb_seq (*empty_bb
);
2445 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
2446 constructors, so explicity call __builtin_cpu_init here. */
2448 = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT
), vNULL
);
2449 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
2450 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
2451 set_bb_seq (*empty_bb
, gseq
);
2456 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
2458 tree version_decl
= ele
;
2459 tree predicate_chain
= NULL_TREE
;
2460 unsigned int priority
;
2461 /* Get attribute string, parse it and find the right predicate decl.
2462 The predicate function could be a lengthy combination of many
2463 features, like arch-type and various isa-variants. */
2464 priority
= get_builtin_code_for_version (version_decl
,
2467 if (predicate_chain
== NULL_TREE
)
2470 function_version_info
[actual_versions
].version_decl
= version_decl
;
2471 function_version_info
[actual_versions
].predicate_chain
2473 function_version_info
[actual_versions
].dispatch_priority
= priority
;
2477 /* Sort the versions according to descending order of dispatch priority. The
2478 priority is based on the ISA. This is not a perfect solution. There
2479 could still be ambiguity. If more than one function version is suitable
2480 to execute, which one should be dispatched? In future, allow the user
2481 to specify a dispatch priority next to the version. */
2482 qsort (function_version_info
, actual_versions
,
2483 sizeof (struct _function_version_info
), feature_compare
);
2485 for (i
= 0; i
< actual_versions
; ++i
)
2486 *empty_bb
= add_condition_to_bb (dispatch_decl
,
2487 function_version_info
[i
].version_decl
,
2488 function_version_info
[i
].predicate_chain
,
2491 /* dispatch default version at the end. */
2492 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
2495 free (function_version_info
);
2499 /* This function changes the assembler name for functions that are
2500 versions. If DECL is a function version and has a "target"
2501 attribute, it appends the attribute string to its assembler name. */
2504 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
2507 const char *orig_name
, *version_string
;
2508 char *attr_str
, *assembler_name
;
2510 if (DECL_DECLARED_INLINE_P (decl
)
2511 && lookup_attribute ("gnu_inline",
2512 DECL_ATTRIBUTES (decl
)))
2513 error_at (DECL_SOURCE_LOCATION (decl
),
2514 "function versions cannot be marked as %<gnu_inline%>,"
2515 " bodies have to be generated");
2517 if (DECL_VIRTUAL_P (decl
)
2518 || DECL_VINDEX (decl
))
2519 sorry ("virtual function multiversioning not supported");
2521 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
2523 /* target attribute string cannot be NULL. */
2524 gcc_assert (version_attr
!= NULL_TREE
);
2526 orig_name
= IDENTIFIER_POINTER (id
);
2528 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
2530 if (strcmp (version_string
, "default") == 0)
2533 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
2534 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
2536 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
2538 /* Allow assembler name to be modified if already set. */
2539 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
2540 SET_DECL_RTL (decl
, NULL
);
2542 tree ret
= get_identifier (assembler_name
);
2543 XDELETEVEC (attr_str
);
2544 XDELETEVEC (assembler_name
);
2549 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
2551 /* For function version, add the target suffix to the assembler name. */
2552 if (TREE_CODE (decl
) == FUNCTION_DECL
2553 && DECL_FUNCTION_VERSIONED (decl
))
2554 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
2555 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
2556 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
2562 /* Make a dispatcher declaration for the multi-versioned function DECL.
2563 Calls to DECL function will be replaced with calls to the dispatcher
2564 by the front-end. Returns the decl of the dispatcher function. */
2567 ix86_get_function_versions_dispatcher (void *decl
)
2569 tree fn
= (tree
) decl
;
2570 struct cgraph_node
*node
= NULL
;
2571 struct cgraph_node
*default_node
= NULL
;
2572 struct cgraph_function_version_info
*node_v
= NULL
;
2573 struct cgraph_function_version_info
*first_v
= NULL
;
2575 tree dispatch_decl
= NULL
;
2577 struct cgraph_function_version_info
*default_version_info
= NULL
;
2579 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
2581 node
= cgraph_node::get (fn
);
2582 gcc_assert (node
!= NULL
);
2584 node_v
= node
->function_version ();
2585 gcc_assert (node_v
!= NULL
);
2587 if (node_v
->dispatcher_resolver
!= NULL
)
2588 return node_v
->dispatcher_resolver
;
2590 /* Find the default version and make it the first node. */
2592 /* Go to the beginning of the chain. */
2593 while (first_v
->prev
!= NULL
)
2594 first_v
= first_v
->prev
;
2595 default_version_info
= first_v
;
2596 while (default_version_info
!= NULL
)
2598 if (is_function_default_version
2599 (default_version_info
->this_node
->decl
))
2601 default_version_info
= default_version_info
->next
;
2604 /* If there is no default node, just return NULL. */
2605 if (default_version_info
== NULL
)
2608 /* Make default info the first node. */
2609 if (first_v
!= default_version_info
)
2611 default_version_info
->prev
->next
= default_version_info
->next
;
2612 if (default_version_info
->next
)
2613 default_version_info
->next
->prev
= default_version_info
->prev
;
2614 first_v
->prev
= default_version_info
;
2615 default_version_info
->next
= first_v
;
2616 default_version_info
->prev
= NULL
;
2619 default_node
= default_version_info
->this_node
;
2621 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
2622 if (targetm
.has_ifunc_p ())
2624 struct cgraph_function_version_info
*it_v
= NULL
;
2625 struct cgraph_node
*dispatcher_node
= NULL
;
2626 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
2628 /* Right now, the dispatching is done via ifunc. */
2629 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
2631 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
2632 gcc_assert (dispatcher_node
!= NULL
);
2633 dispatcher_node
->dispatcher_function
= 1;
2634 dispatcher_version_info
2635 = dispatcher_node
->insert_new_function_version ();
2636 dispatcher_version_info
->next
= default_version_info
;
2637 dispatcher_node
->definition
= 1;
2639 /* Set the dispatcher for all the versions. */
2640 it_v
= default_version_info
;
2641 while (it_v
!= NULL
)
2643 it_v
->dispatcher_resolver
= dispatch_decl
;
2650 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
2651 "multiversioning needs %<ifunc%> which is not supported "
2655 return dispatch_decl
;
2658 /* Make the resolver function decl to dispatch the versions of
2659 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
2660 ifunc alias that will point to the created resolver. Create an
2661 empty basic block in the resolver and store the pointer in
2662 EMPTY_BB. Return the decl of the resolver function. */
2665 make_resolver_func (const tree default_decl
,
2666 const tree ifunc_alias_decl
,
2667 basic_block
*empty_bb
)
2669 char *resolver_name
;
2670 tree decl
, type
, decl_name
, t
;
2672 /* IFUNC's have to be globally visible. So, if the default_decl is
2673 not, then the name of the IFUNC should be made unique. */
2674 if (TREE_PUBLIC (default_decl
) == 0)
2676 char *ifunc_name
= make_unique_name (default_decl
, "ifunc", true);
2677 symtab
->change_decl_assembler_name (ifunc_alias_decl
,
2678 get_identifier (ifunc_name
));
2679 XDELETEVEC (ifunc_name
);
2682 resolver_name
= make_unique_name (default_decl
, "resolver", false);
2684 /* The resolver function should return a (void *). */
2685 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
2687 decl
= build_fn_decl (resolver_name
, type
);
2688 decl_name
= get_identifier (resolver_name
);
2689 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
2691 DECL_NAME (decl
) = decl_name
;
2692 TREE_USED (decl
) = 1;
2693 DECL_ARTIFICIAL (decl
) = 1;
2694 DECL_IGNORED_P (decl
) = 1;
2695 TREE_PUBLIC (decl
) = 0;
2696 DECL_UNINLINABLE (decl
) = 1;
2698 /* Resolver is not external, body is generated. */
2699 DECL_EXTERNAL (decl
) = 0;
2700 DECL_EXTERNAL (ifunc_alias_decl
) = 0;
2702 DECL_CONTEXT (decl
) = NULL_TREE
;
2703 DECL_INITIAL (decl
) = make_node (BLOCK
);
2704 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
2706 if (DECL_COMDAT_GROUP (default_decl
)
2707 || TREE_PUBLIC (default_decl
))
2709 /* In this case, each translation unit with a call to this
2710 versioned function will put out a resolver. Ensure it
2711 is comdat to keep just one copy. */
2712 DECL_COMDAT (decl
) = 1;
2713 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
2715 /* Build result decl and add to function_decl. */
2716 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
2717 DECL_CONTEXT (t
) = decl
;
2718 DECL_ARTIFICIAL (t
) = 1;
2719 DECL_IGNORED_P (t
) = 1;
2720 DECL_RESULT (decl
) = t
;
2722 gimplify_function_tree (decl
);
2723 push_cfun (DECL_STRUCT_FUNCTION (decl
));
2724 *empty_bb
= init_lowered_empty_function (decl
, false,
2725 profile_count::uninitialized ());
2727 cgraph_node::add_new_function (decl
, true);
2728 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
2732 gcc_assert (ifunc_alias_decl
!= NULL
);
2733 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
2734 DECL_ATTRIBUTES (ifunc_alias_decl
)
2735 = make_attribute ("ifunc", resolver_name
,
2736 DECL_ATTRIBUTES (ifunc_alias_decl
));
2738 /* Create the alias for dispatch to resolver here. */
2739 cgraph_node::create_same_body_alias (ifunc_alias_decl
, decl
);
2740 XDELETEVEC (resolver_name
);
2744 /* Generate the dispatching code body to dispatch multi-versioned function
2745 DECL. The target hook is called to process the "target" attributes and
2746 provide the code to dispatch the right function at run-time. NODE points
2747 to the dispatcher decl whose body will be created. */
2750 ix86_generate_version_dispatcher_body (void *node_p
)
2753 basic_block empty_bb
;
2754 tree default_ver_decl
;
2755 struct cgraph_node
*versn
;
2756 struct cgraph_node
*node
;
2758 struct cgraph_function_version_info
*node_version_info
= NULL
;
2759 struct cgraph_function_version_info
*versn_info
= NULL
;
2761 node
= (cgraph_node
*)node_p
;
2763 node_version_info
= node
->function_version ();
2764 gcc_assert (node
->dispatcher_function
2765 && node_version_info
!= NULL
);
2767 if (node_version_info
->dispatcher_resolver
)
2768 return node_version_info
->dispatcher_resolver
;
2770 /* The first version in the chain corresponds to the default version. */
2771 default_ver_decl
= node_version_info
->next
->this_node
->decl
;
2773 /* node is going to be an alias, so remove the finalized bit. */
2774 node
->definition
= false;
2776 resolver_decl
= make_resolver_func (default_ver_decl
,
2777 node
->decl
, &empty_bb
);
2779 node_version_info
->dispatcher_resolver
= resolver_decl
;
2781 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
2783 auto_vec
<tree
, 2> fn_ver_vec
;
2785 for (versn_info
= node_version_info
->next
; versn_info
;
2786 versn_info
= versn_info
->next
)
2788 versn
= versn_info
->this_node
;
2789 /* Check for virtual functions here again, as by this time it should
2790 have been determined if this function needs a vtable index or
2791 not. This happens for methods in derived classes that override
2792 virtual methods in base classes but are not explicitly marked as
2794 if (DECL_VINDEX (versn
->decl
))
2795 sorry ("virtual function multiversioning not supported");
2797 fn_ver_vec
.safe_push (versn
->decl
);
2800 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
2801 cgraph_edge::rebuild_edges ();
2803 return resolver_decl
;