]>
Commit | Line | Data |
---|---|---|
2bf6d935 ML |
1 | /* Copyright (C) 1988-2019 Free Software Foundation, Inc. |
2 | ||
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify | |
6 | it under the terms of the GNU General Public License as published by | |
7 | the Free Software Foundation; either version 3, or (at your option) | |
8 | any later version. | |
9 | ||
10 | GCC is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | GNU General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU General Public License | |
16 | along with GCC; see the file COPYING3. If not see | |
17 | <http://www.gnu.org/licenses/>. */ | |
18 | ||
19 | #define IN_TARGET_CODE 1 | |
20 | ||
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "backend.h" | |
25 | #include "rtl.h" | |
26 | #include "tree.h" | |
27 | #include "memmodel.h" | |
28 | #include "gimple.h" | |
29 | #include "cfghooks.h" | |
30 | #include "cfgloop.h" | |
31 | #include "df.h" | |
32 | #include "tm_p.h" | |
33 | #include "stringpool.h" | |
34 | #include "expmed.h" | |
35 | #include "optabs.h" | |
36 | #include "regs.h" | |
37 | #include "emit-rtl.h" | |
38 | #include "recog.h" | |
39 | #include "cgraph.h" | |
40 | #include "diagnostic.h" | |
41 | #include "cfgbuild.h" | |
42 | #include "alias.h" | |
43 | #include "fold-const.h" | |
44 | #include "attribs.h" | |
45 | #include "calls.h" | |
46 | #include "stor-layout.h" | |
47 | #include "varasm.h" | |
48 | #include "output.h" | |
49 | #include "insn-attr.h" | |
50 | #include "flags.h" | |
51 | #include "except.h" | |
52 | #include "explow.h" | |
53 | #include "expr.h" | |
54 | #include "cfgrtl.h" | |
55 | #include "common/common-target.h" | |
56 | #include "langhooks.h" | |
57 | #include "reload.h" | |
58 | #include "gimplify.h" | |
59 | #include "dwarf2.h" | |
60 | #include "tm-constrs.h" | |
61 | #include "params.h" | |
62 | #include "cselib.h" | |
63 | #include "sched-int.h" | |
64 | #include "opts.h" | |
65 | #include "tree-pass.h" | |
66 | #include "context.h" | |
67 | #include "pass_manager.h" | |
68 | #include "target-globals.h" | |
69 | #include "gimple-iterator.h" | |
70 | #include "tree-vectorizer.h" | |
71 | #include "shrink-wrap.h" | |
72 | #include "builtins.h" | |
73 | #include "rtl-iter.h" | |
74 | #include "tree-iterator.h" | |
75 | #include "dbgcnt.h" | |
76 | #include "case-cfn-macros.h" | |
77 | #include "dojump.h" | |
78 | #include "fold-const-call.h" | |
79 | #include "tree-vrp.h" | |
80 | #include "tree-ssanames.h" | |
81 | #include "selftest.h" | |
82 | #include "selftest-rtl.h" | |
83 | #include "print-rtl.h" | |
84 | #include "intl.h" | |
85 | #include "ifcvt.h" | |
86 | #include "symbol-summary.h" | |
87 | #include "ipa-prop.h" | |
88 | #include "ipa-fnsummary.h" | |
89 | #include "wide-int-bitmask.h" | |
90 | #include "tree-vector-builder.h" | |
91 | #include "debug.h" | |
92 | #include "dwarf2out.h" | |
93 | #include "i386-builtins.h" | |
94 | #include "i386-features.h" | |
95 | ||
96 | const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = { | |
97 | "savms64", | |
98 | "resms64", | |
99 | "resms64x", | |
100 | "savms64f", | |
101 | "resms64f", | |
102 | "resms64fx" | |
103 | }; | |
104 | ||
105 | const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = { | |
106 | /* The below offset values are where each register is stored for the layout | |
107 | relative to incoming stack pointer. The value of each m_regs[].offset will | |
108 | be relative to the incoming base pointer (rax or rsi) used by the stub. | |
109 | ||
110 | s_instances: 0 1 2 3 | |
111 | Offset: realigned or aligned + 8 | |
112 | Register aligned aligned + 8 aligned w/HFP w/HFP */ | |
113 | XMM15_REG, /* 0x10 0x18 0x10 0x18 */ | |
114 | XMM14_REG, /* 0x20 0x28 0x20 0x28 */ | |
115 | XMM13_REG, /* 0x30 0x38 0x30 0x38 */ | |
116 | XMM12_REG, /* 0x40 0x48 0x40 0x48 */ | |
117 | XMM11_REG, /* 0x50 0x58 0x50 0x58 */ | |
118 | XMM10_REG, /* 0x60 0x68 0x60 0x68 */ | |
119 | XMM9_REG, /* 0x70 0x78 0x70 0x78 */ | |
120 | XMM8_REG, /* 0x80 0x88 0x80 0x88 */ | |
121 | XMM7_REG, /* 0x90 0x98 0x90 0x98 */ | |
122 | XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */ | |
123 | SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */ | |
124 | DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */ | |
125 | BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */ | |
126 | BP_REG, /* 0xc0 0xc8 N/A N/A */ | |
127 | R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */ | |
128 | R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */ | |
129 | R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */ | |
130 | R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */ | |
131 | }; | |
132 | ||
133 | /* Instantiate static const values. */ | |
134 | const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET; | |
135 | const unsigned xlogue_layout::MIN_REGS; | |
136 | const unsigned xlogue_layout::MAX_REGS; | |
137 | const unsigned xlogue_layout::MAX_EXTRA_REGS; | |
138 | const unsigned xlogue_layout::VARIANT_COUNT; | |
139 | const unsigned xlogue_layout::STUB_NAME_MAX_LEN; | |
140 | ||
141 | /* Initialize xlogue_layout::s_stub_names to zero. */ | |
142 | char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT] | |
143 | [STUB_NAME_MAX_LEN]; | |
144 | ||
145 | /* Instantiates all xlogue_layout instances. */ | |
146 | const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = { | |
147 | xlogue_layout (0, false), | |
148 | xlogue_layout (8, false), | |
149 | xlogue_layout (0, true), | |
150 | xlogue_layout (8, true) | |
151 | }; | |
152 | ||
153 | /* Return an appropriate const instance of xlogue_layout based upon values | |
154 | in cfun->machine and crtl. */ | |
99b1c316 | 155 | const class xlogue_layout & |
2bf6d935 ML |
156 | xlogue_layout::get_instance () |
157 | { | |
158 | enum xlogue_stub_sets stub_set; | |
159 | bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in; | |
160 | ||
161 | if (stack_realign_fp) | |
162 | stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; | |
163 | else if (frame_pointer_needed) | |
164 | stub_set = aligned_plus_8 | |
165 | ? XLOGUE_SET_HFP_ALIGNED_PLUS_8 | |
166 | : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; | |
167 | else | |
168 | stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED; | |
169 | ||
170 | return s_instances[stub_set]; | |
171 | } | |
172 | ||
173 | /* Determine how many clobbered registers can be saved by the stub. | |
174 | Returns the count of registers the stub will save and restore. */ | |
175 | unsigned | |
176 | xlogue_layout::count_stub_managed_regs () | |
177 | { | |
178 | bool hfp = frame_pointer_needed || stack_realign_fp; | |
179 | unsigned i, count; | |
180 | unsigned regno; | |
181 | ||
182 | for (count = i = MIN_REGS; i < MAX_REGS; ++i) | |
183 | { | |
184 | regno = REG_ORDER[i]; | |
185 | if (regno == BP_REG && hfp) | |
186 | continue; | |
187 | if (!ix86_save_reg (regno, false, false)) | |
188 | break; | |
189 | ++count; | |
190 | } | |
191 | return count; | |
192 | } | |
193 | ||
194 | /* Determine if register REGNO is a stub managed register given the | |
195 | total COUNT of stub managed registers. */ | |
196 | bool | |
197 | xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count) | |
198 | { | |
199 | bool hfp = frame_pointer_needed || stack_realign_fp; | |
200 | unsigned i; | |
201 | ||
202 | for (i = 0; i < count; ++i) | |
203 | { | |
204 | gcc_assert (i < MAX_REGS); | |
205 | if (REG_ORDER[i] == BP_REG && hfp) | |
206 | ++count; | |
207 | else if (REG_ORDER[i] == regno) | |
208 | return true; | |
209 | } | |
210 | return false; | |
211 | } | |
212 | ||
213 | /* Constructor for xlogue_layout. */ | |
214 | xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp) | |
215 | : m_hfp (hfp) , m_nregs (hfp ? 17 : 18), | |
216 | m_stack_align_off_in (stack_align_off_in) | |
217 | { | |
218 | HOST_WIDE_INT offset = stack_align_off_in; | |
219 | unsigned i, j; | |
220 | ||
221 | for (i = j = 0; i < MAX_REGS; ++i) | |
222 | { | |
223 | unsigned regno = REG_ORDER[i]; | |
224 | ||
225 | if (regno == BP_REG && hfp) | |
226 | continue; | |
227 | if (SSE_REGNO_P (regno)) | |
228 | { | |
229 | offset += 16; | |
230 | /* Verify that SSE regs are always aligned. */ | |
231 | gcc_assert (!((stack_align_off_in + offset) & 15)); | |
232 | } | |
233 | else | |
234 | offset += 8; | |
235 | ||
236 | m_regs[j].regno = regno; | |
237 | m_regs[j++].offset = offset - STUB_INDEX_OFFSET; | |
238 | } | |
239 | gcc_assert (j == m_nregs); | |
240 | } | |
241 | ||
242 | const char * | |
243 | xlogue_layout::get_stub_name (enum xlogue_stub stub, | |
244 | unsigned n_extra_regs) | |
245 | { | |
246 | const int have_avx = TARGET_AVX; | |
247 | char *name = s_stub_names[!!have_avx][stub][n_extra_regs]; | |
248 | ||
249 | /* Lazy init */ | |
250 | if (!*name) | |
251 | { | |
252 | int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u", | |
253 | (have_avx ? "avx" : "sse"), | |
254 | STUB_BASE_NAMES[stub], | |
255 | MIN_REGS + n_extra_regs); | |
256 | gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN); | |
257 | } | |
258 | ||
259 | return name; | |
260 | } | |
261 | ||
262 | /* Return rtx of a symbol ref for the entry point (based upon | |
263 | cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */ | |
264 | rtx | |
265 | xlogue_layout::get_stub_rtx (enum xlogue_stub stub) | |
266 | { | |
267 | const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs; | |
268 | gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS); | |
269 | gcc_assert (stub < XLOGUE_STUB_COUNT); | |
270 | gcc_assert (crtl->stack_realign_finalized); | |
271 | ||
272 | return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs)); | |
273 | } | |
274 | ||
275 | unsigned scalar_chain::max_id = 0; | |
276 | ||
277 | /* Initialize new chain. */ | |
278 | ||
93cf5515 | 279 | scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_) |
2bf6d935 | 280 | { |
93cf5515 RB |
281 | smode = smode_; |
282 | vmode = vmode_; | |
283 | ||
2bf6d935 ML |
284 | chain_id = ++max_id; |
285 | ||
286 | if (dump_file) | |
287 | fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id); | |
288 | ||
289 | bitmap_obstack_initialize (NULL); | |
290 | insns = BITMAP_ALLOC (NULL); | |
291 | defs = BITMAP_ALLOC (NULL); | |
292 | defs_conv = BITMAP_ALLOC (NULL); | |
293 | queue = NULL; | |
294 | } | |
295 | ||
296 | /* Free chain's data. */ | |
297 | ||
298 | scalar_chain::~scalar_chain () | |
299 | { | |
300 | BITMAP_FREE (insns); | |
301 | BITMAP_FREE (defs); | |
302 | BITMAP_FREE (defs_conv); | |
303 | bitmap_obstack_release (NULL); | |
304 | } | |
305 | ||
306 | /* Add instruction into chains' queue. */ | |
307 | ||
308 | void | |
309 | scalar_chain::add_to_queue (unsigned insn_uid) | |
310 | { | |
311 | if (bitmap_bit_p (insns, insn_uid) | |
312 | || bitmap_bit_p (queue, insn_uid)) | |
313 | return; | |
314 | ||
315 | if (dump_file) | |
316 | fprintf (dump_file, " Adding insn %d into chain's #%d queue\n", | |
317 | insn_uid, chain_id); | |
318 | bitmap_set_bit (queue, insn_uid); | |
319 | } | |
320 | ||
321 | /* For DImode conversion, mark register defined by DEF as requiring | |
322 | conversion. */ | |
323 | ||
324 | void | |
93cf5515 | 325 | general_scalar_chain::mark_dual_mode_def (df_ref def) |
2bf6d935 ML |
326 | { |
327 | gcc_assert (DF_REF_REG_DEF_P (def)); | |
328 | ||
329 | if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def))) | |
330 | return; | |
331 | ||
332 | if (dump_file) | |
333 | fprintf (dump_file, | |
334 | " Mark r%d def in insn %d as requiring both modes in chain #%d\n", | |
335 | DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id); | |
336 | ||
337 | bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); | |
338 | } | |
339 | ||
340 | /* For TImode conversion, it is unused. */ | |
341 | ||
342 | void | |
343 | timode_scalar_chain::mark_dual_mode_def (df_ref) | |
344 | { | |
345 | gcc_unreachable (); | |
346 | } | |
347 | ||
348 | /* Check REF's chain to add new insns into a queue | |
349 | and find registers requiring conversion. */ | |
350 | ||
351 | void | |
352 | scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref) | |
353 | { | |
354 | df_link *chain; | |
355 | ||
356 | gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)) | |
357 | || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))); | |
358 | add_to_queue (DF_REF_INSN_UID (ref)); | |
359 | ||
360 | for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next) | |
361 | { | |
362 | unsigned uid = DF_REF_INSN_UID (chain->ref); | |
363 | ||
364 | if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref))) | |
365 | continue; | |
366 | ||
367 | if (!DF_REF_REG_MEM_P (chain->ref)) | |
368 | { | |
369 | if (bitmap_bit_p (insns, uid)) | |
370 | continue; | |
371 | ||
372 | if (bitmap_bit_p (candidates, uid)) | |
373 | { | |
374 | add_to_queue (uid); | |
375 | continue; | |
376 | } | |
377 | } | |
378 | ||
379 | if (DF_REF_REG_DEF_P (chain->ref)) | |
380 | { | |
381 | if (dump_file) | |
382 | fprintf (dump_file, " r%d def in insn %d isn't convertible\n", | |
383 | DF_REF_REGNO (chain->ref), uid); | |
384 | mark_dual_mode_def (chain->ref); | |
385 | } | |
386 | else | |
387 | { | |
388 | if (dump_file) | |
389 | fprintf (dump_file, " r%d use in insn %d isn't convertible\n", | |
390 | DF_REF_REGNO (chain->ref), uid); | |
391 | mark_dual_mode_def (ref); | |
392 | } | |
393 | } | |
394 | } | |
395 | ||
396 | /* Add instruction into a chain. */ | |
397 | ||
398 | void | |
399 | scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid) | |
400 | { | |
401 | if (bitmap_bit_p (insns, insn_uid)) | |
402 | return; | |
403 | ||
404 | if (dump_file) | |
405 | fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id); | |
406 | ||
407 | bitmap_set_bit (insns, insn_uid); | |
408 | ||
409 | rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; | |
410 | rtx def_set = single_set (insn); | |
411 | if (def_set && REG_P (SET_DEST (def_set)) | |
412 | && !HARD_REGISTER_P (SET_DEST (def_set))) | |
413 | bitmap_set_bit (defs, REGNO (SET_DEST (def_set))); | |
414 | ||
93cf5515 RB |
415 | /* ??? The following is quadratic since analyze_register_chain |
416 | iterates over all refs to look for dual-mode regs. Instead this | |
417 | should be done separately for all regs mentioned in the chain once. */ | |
2bf6d935 ML |
418 | df_ref ref; |
419 | df_ref def; | |
420 | for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) | |
421 | if (!HARD_REGISTER_P (DF_REF_REG (ref))) | |
422 | for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref)); | |
423 | def; | |
424 | def = DF_REF_NEXT_REG (def)) | |
425 | analyze_register_chain (candidates, def); | |
426 | for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) | |
427 | if (!DF_REF_REG_MEM_P (ref)) | |
428 | analyze_register_chain (candidates, ref); | |
429 | } | |
430 | ||
431 | /* Build new chain starting from insn INSN_UID recursively | |
432 | adding all dependent uses and definitions. */ | |
433 | ||
434 | void | |
435 | scalar_chain::build (bitmap candidates, unsigned insn_uid) | |
436 | { | |
437 | queue = BITMAP_ALLOC (NULL); | |
438 | bitmap_set_bit (queue, insn_uid); | |
439 | ||
440 | if (dump_file) | |
441 | fprintf (dump_file, "Building chain #%d...\n", chain_id); | |
442 | ||
443 | while (!bitmap_empty_p (queue)) | |
444 | { | |
445 | insn_uid = bitmap_first_set_bit (queue); | |
446 | bitmap_clear_bit (queue, insn_uid); | |
447 | bitmap_clear_bit (candidates, insn_uid); | |
448 | add_insn (candidates, insn_uid); | |
449 | } | |
450 | ||
451 | if (dump_file) | |
452 | { | |
453 | fprintf (dump_file, "Collected chain #%d...\n", chain_id); | |
454 | fprintf (dump_file, " insns: "); | |
455 | dump_bitmap (dump_file, insns); | |
456 | if (!bitmap_empty_p (defs_conv)) | |
457 | { | |
458 | bitmap_iterator bi; | |
459 | unsigned id; | |
460 | const char *comma = ""; | |
461 | fprintf (dump_file, " defs to convert: "); | |
462 | EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi) | |
463 | { | |
464 | fprintf (dump_file, "%sr%d", comma, id); | |
465 | comma = ", "; | |
466 | } | |
467 | fprintf (dump_file, "\n"); | |
468 | } | |
469 | } | |
470 | ||
471 | BITMAP_FREE (queue); | |
472 | } | |
473 | ||
474 | /* Return a cost of building a vector costant | |
475 | instead of using a scalar one. */ | |
476 | ||
477 | int | |
93cf5515 | 478 | general_scalar_chain::vector_const_cost (rtx exp) |
2bf6d935 ML |
479 | { |
480 | gcc_assert (CONST_INT_P (exp)); | |
481 | ||
93cf5515 RB |
482 | if (standard_sse_constant_p (exp, vmode)) |
483 | return ix86_cost->sse_op; | |
484 | /* We have separate costs for SImode and DImode, use SImode costs | |
485 | for smaller modes. */ | |
486 | return ix86_cost->sse_load[smode == DImode ? 1 : 0]; | |
2bf6d935 ML |
487 | } |
488 | ||
489 | /* Compute a gain for chain conversion. */ | |
490 | ||
491 | int | |
93cf5515 | 492 | general_scalar_chain::compute_convert_gain () |
2bf6d935 ML |
493 | { |
494 | bitmap_iterator bi; | |
495 | unsigned insn_uid; | |
496 | int gain = 0; | |
497 | int cost = 0; | |
498 | ||
499 | if (dump_file) | |
500 | fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); | |
501 | ||
93cf5515 RB |
502 | /* SSE costs distinguish between SImode and DImode loads/stores, for |
503 | int costs factor in the number of GPRs involved. When supporting | |
504 | smaller modes than SImode the int load/store costs need to be | |
505 | adjusted as well. */ | |
506 | unsigned sse_cost_idx = smode == DImode ? 1 : 0; | |
507 | unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1; | |
508 | ||
2bf6d935 ML |
509 | EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi) |
510 | { | |
511 | rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; | |
512 | rtx def_set = single_set (insn); | |
513 | rtx src = SET_SRC (def_set); | |
514 | rtx dst = SET_DEST (def_set); | |
c6521daa | 515 | int igain = 0; |
2bf6d935 ML |
516 | |
517 | if (REG_P (src) && REG_P (dst)) | |
93cf5515 | 518 | igain += 2 * m - ix86_cost->xmm_move; |
2bf6d935 | 519 | else if (REG_P (src) && MEM_P (dst)) |
93cf5515 RB |
520 | igain |
521 | += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx]; | |
2bf6d935 | 522 | else if (MEM_P (src) && REG_P (dst)) |
93cf5515 | 523 | igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx]; |
2bf6d935 ML |
524 | else if (GET_CODE (src) == ASHIFT |
525 | || GET_CODE (src) == ASHIFTRT | |
526 | || GET_CODE (src) == LSHIFTRT) | |
527 | { | |
528 | if (CONST_INT_P (XEXP (src, 0))) | |
c6521daa | 529 | igain -= vector_const_cost (XEXP (src, 0)); |
93cf5515 | 530 | igain += m * ix86_cost->shift_const - ix86_cost->sse_op; |
2bf6d935 | 531 | if (INTVAL (XEXP (src, 1)) >= 32) |
c6521daa | 532 | igain -= COSTS_N_INSNS (1); |
2bf6d935 ML |
533 | } |
534 | else if (GET_CODE (src) == PLUS | |
535 | || GET_CODE (src) == MINUS | |
536 | || GET_CODE (src) == IOR | |
537 | || GET_CODE (src) == XOR | |
538 | || GET_CODE (src) == AND) | |
539 | { | |
93cf5515 | 540 | igain += m * ix86_cost->add - ix86_cost->sse_op; |
2bf6d935 ML |
541 | /* Additional gain for andnot for targets without BMI. */ |
542 | if (GET_CODE (XEXP (src, 0)) == NOT | |
543 | && !TARGET_BMI) | |
93cf5515 | 544 | igain += m * ix86_cost->add; |
2bf6d935 ML |
545 | |
546 | if (CONST_INT_P (XEXP (src, 0))) | |
c6521daa | 547 | igain -= vector_const_cost (XEXP (src, 0)); |
2bf6d935 | 548 | if (CONST_INT_P (XEXP (src, 1))) |
c6521daa | 549 | igain -= vector_const_cost (XEXP (src, 1)); |
2bf6d935 ML |
550 | } |
551 | else if (GET_CODE (src) == NEG | |
552 | || GET_CODE (src) == NOT) | |
93cf5515 RB |
553 | igain += m * ix86_cost->add - ix86_cost->sse_op - COSTS_N_INSNS (1); |
554 | else if (GET_CODE (src) == SMAX | |
555 | || GET_CODE (src) == SMIN | |
556 | || GET_CODE (src) == UMAX | |
557 | || GET_CODE (src) == UMIN) | |
558 | { | |
559 | /* We do not have any conditional move cost, estimate it as a | |
560 | reg-reg move. Comparisons are costed as adds. */ | |
561 | igain += m * (COSTS_N_INSNS (2) + ix86_cost->add); | |
562 | /* Integer SSE ops are all costed the same. */ | |
563 | igain -= ix86_cost->sse_op; | |
564 | } | |
2bf6d935 ML |
565 | else if (GET_CODE (src) == COMPARE) |
566 | { | |
567 | /* Assume comparison cost is the same. */ | |
568 | } | |
569 | else if (CONST_INT_P (src)) | |
570 | { | |
571 | if (REG_P (dst)) | |
93cf5515 RB |
572 | /* DImode can be immediate for TARGET_64BIT and SImode always. */ |
573 | igain += m * COSTS_N_INSNS (1); | |
2bf6d935 | 574 | else if (MEM_P (dst)) |
93cf5515 RB |
575 | igain += (m * ix86_cost->int_store[2] |
576 | - ix86_cost->sse_store[sse_cost_idx]); | |
c6521daa | 577 | igain -= vector_const_cost (src); |
2bf6d935 ML |
578 | } |
579 | else | |
580 | gcc_unreachable (); | |
c6521daa RB |
581 | |
582 | if (igain != 0 && dump_file) | |
583 | { | |
584 | fprintf (dump_file, " Instruction gain %d for ", igain); | |
585 | dump_insn_slim (dump_file, insn); | |
586 | } | |
587 | gain += igain; | |
2bf6d935 ML |
588 | } |
589 | ||
590 | if (dump_file) | |
591 | fprintf (dump_file, " Instruction conversion gain: %d\n", gain); | |
592 | ||
93cf5515 | 593 | /* ??? What about integer to SSE? */ |
2bf6d935 | 594 | EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi) |
66574c53 | 595 | cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->sse_to_integer; |
2bf6d935 ML |
596 | |
597 | if (dump_file) | |
598 | fprintf (dump_file, " Registers conversion cost: %d\n", cost); | |
599 | ||
600 | gain -= cost; | |
601 | ||
602 | if (dump_file) | |
603 | fprintf (dump_file, " Total gain: %d\n", gain); | |
604 | ||
605 | return gain; | |
606 | } | |
607 | ||
608 | /* Replace REG in X with a V2DI subreg of NEW_REG. */ | |
609 | ||
610 | rtx | |
93cf5515 | 611 | general_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg) |
2bf6d935 ML |
612 | { |
613 | if (x == reg) | |
93cf5515 | 614 | return gen_rtx_SUBREG (vmode, new_reg, 0); |
2bf6d935 ML |
615 | |
616 | const char *fmt = GET_RTX_FORMAT (GET_CODE (x)); | |
617 | int i, j; | |
618 | for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) | |
619 | { | |
620 | if (fmt[i] == 'e') | |
621 | XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg); | |
622 | else if (fmt[i] == 'E') | |
623 | for (j = XVECLEN (x, i) - 1; j >= 0; j--) | |
624 | XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j), | |
625 | reg, new_reg); | |
626 | } | |
627 | ||
628 | return x; | |
629 | } | |
630 | ||
631 | /* Replace REG in INSN with a V2DI subreg of NEW_REG. */ | |
632 | ||
633 | void | |
93cf5515 | 634 | general_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, |
2bf6d935 ML |
635 | rtx reg, rtx new_reg) |
636 | { | |
637 | replace_with_subreg (single_set (insn), reg, new_reg); | |
638 | } | |
639 | ||
640 | /* Insert generated conversion instruction sequence INSNS | |
641 | after instruction AFTER. New BB may be required in case | |
642 | instruction has EH region attached. */ | |
643 | ||
644 | void | |
645 | scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after) | |
646 | { | |
647 | if (!control_flow_insn_p (after)) | |
648 | { | |
649 | emit_insn_after (insns, after); | |
650 | return; | |
651 | } | |
652 | ||
653 | basic_block bb = BLOCK_FOR_INSN (after); | |
654 | edge e = find_fallthru_edge (bb->succs); | |
655 | gcc_assert (e); | |
656 | ||
657 | basic_block new_bb = split_edge (e); | |
658 | emit_insn_after (insns, BB_HEAD (new_bb)); | |
659 | } | |
660 | ||
661 | /* Make vector copies for all register REGNO definitions | |
662 | and replace its uses in a chain. */ | |
663 | ||
664 | void | |
93cf5515 | 665 | general_scalar_chain::make_vector_copies (unsigned regno) |
2bf6d935 ML |
666 | { |
667 | rtx reg = regno_reg_rtx[regno]; | |
93cf5515 | 668 | rtx vreg = gen_reg_rtx (smode); |
2bf6d935 ML |
669 | df_ref ref; |
670 | ||
671 | for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) | |
672 | if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) | |
673 | { | |
674 | start_sequence (); | |
675 | if (!TARGET_INTER_UNIT_MOVES_TO_VEC) | |
676 | { | |
93cf5515 RB |
677 | rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP); |
678 | if (smode == DImode && !TARGET_64BIT) | |
679 | { | |
680 | emit_move_insn (adjust_address (tmp, SImode, 0), | |
681 | gen_rtx_SUBREG (SImode, reg, 0)); | |
682 | emit_move_insn (adjust_address (tmp, SImode, 4), | |
683 | gen_rtx_SUBREG (SImode, reg, 4)); | |
684 | } | |
685 | else | |
686 | emit_move_insn (tmp, reg); | |
687 | emit_insn (gen_rtx_SET | |
688 | (gen_rtx_SUBREG (vmode, vreg, 0), | |
689 | gen_rtx_VEC_MERGE (vmode, | |
690 | gen_rtx_VEC_DUPLICATE (vmode, | |
691 | tmp), | |
692 | CONST0_RTX (vmode), | |
693 | GEN_INT (HOST_WIDE_INT_1U)))); | |
2bf6d935 | 694 | } |
93cf5515 | 695 | else if (!TARGET_64BIT && smode == DImode) |
2bf6d935 | 696 | { |
93cf5515 RB |
697 | if (TARGET_SSE4_1) |
698 | { | |
699 | emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), | |
700 | CONST0_RTX (V4SImode), | |
701 | gen_rtx_SUBREG (SImode, reg, 0))); | |
702 | emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0), | |
703 | gen_rtx_SUBREG (V4SImode, vreg, 0), | |
704 | gen_rtx_SUBREG (SImode, reg, 4), | |
705 | GEN_INT (2))); | |
706 | } | |
707 | else | |
708 | { | |
709 | rtx tmp = gen_reg_rtx (DImode); | |
710 | emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), | |
711 | CONST0_RTX (V4SImode), | |
712 | gen_rtx_SUBREG (SImode, reg, 0))); | |
713 | emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0), | |
714 | CONST0_RTX (V4SImode), | |
715 | gen_rtx_SUBREG (SImode, reg, 4))); | |
716 | emit_insn (gen_vec_interleave_lowv4si | |
717 | (gen_rtx_SUBREG (V4SImode, vreg, 0), | |
718 | gen_rtx_SUBREG (V4SImode, vreg, 0), | |
719 | gen_rtx_SUBREG (V4SImode, tmp, 0))); | |
720 | } | |
2bf6d935 ML |
721 | } |
722 | else | |
93cf5515 RB |
723 | emit_insn (gen_rtx_SET |
724 | (gen_rtx_SUBREG (vmode, vreg, 0), | |
725 | gen_rtx_VEC_MERGE (vmode, | |
726 | gen_rtx_VEC_DUPLICATE (vmode, | |
727 | reg), | |
728 | CONST0_RTX (vmode), | |
729 | GEN_INT (HOST_WIDE_INT_1U)))); | |
2bf6d935 ML |
730 | rtx_insn *seq = get_insns (); |
731 | end_sequence (); | |
732 | rtx_insn *insn = DF_REF_INSN (ref); | |
733 | emit_conversion_insns (seq, insn); | |
734 | ||
735 | if (dump_file) | |
736 | fprintf (dump_file, | |
737 | " Copied r%d to a vector register r%d for insn %d\n", | |
738 | regno, REGNO (vreg), INSN_UID (insn)); | |
739 | } | |
740 | ||
741 | for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) | |
742 | if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) | |
743 | { | |
744 | rtx_insn *insn = DF_REF_INSN (ref); | |
745 | replace_with_subreg_in_insn (insn, reg, vreg); | |
746 | ||
747 | if (dump_file) | |
748 | fprintf (dump_file, " Replaced r%d with r%d in insn %d\n", | |
749 | regno, REGNO (vreg), INSN_UID (insn)); | |
750 | } | |
751 | } | |
752 | ||
753 | /* Convert all definitions of register REGNO | |
754 | and fix its uses. Scalar copies may be created | |
755 | in case register is used in not convertible insn. */ | |
756 | ||
757 | void | |
93cf5515 | 758 | general_scalar_chain::convert_reg (unsigned regno) |
2bf6d935 ML |
759 | { |
760 | bool scalar_copy = bitmap_bit_p (defs_conv, regno); | |
761 | rtx reg = regno_reg_rtx[regno]; | |
762 | rtx scopy = NULL_RTX; | |
763 | df_ref ref; | |
764 | bitmap conv; | |
765 | ||
766 | conv = BITMAP_ALLOC (NULL); | |
767 | bitmap_copy (conv, insns); | |
768 | ||
769 | if (scalar_copy) | |
93cf5515 | 770 | scopy = gen_reg_rtx (smode); |
2bf6d935 ML |
771 | |
772 | for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) | |
773 | { | |
774 | rtx_insn *insn = DF_REF_INSN (ref); | |
775 | rtx def_set = single_set (insn); | |
776 | rtx src = SET_SRC (def_set); | |
777 | rtx reg = DF_REF_REG (ref); | |
778 | ||
779 | if (!MEM_P (src)) | |
780 | { | |
781 | replace_with_subreg_in_insn (insn, reg, reg); | |
782 | bitmap_clear_bit (conv, INSN_UID (insn)); | |
783 | } | |
784 | ||
785 | if (scalar_copy) | |
786 | { | |
787 | start_sequence (); | |
788 | if (!TARGET_INTER_UNIT_MOVES_FROM_VEC) | |
789 | { | |
93cf5515 | 790 | rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP); |
2bf6d935 | 791 | emit_move_insn (tmp, reg); |
93cf5515 RB |
792 | if (!TARGET_64BIT && smode == DImode) |
793 | { | |
794 | emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), | |
795 | adjust_address (tmp, SImode, 0)); | |
796 | emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), | |
797 | adjust_address (tmp, SImode, 4)); | |
798 | } | |
799 | else | |
800 | emit_move_insn (scopy, tmp); | |
2bf6d935 | 801 | } |
93cf5515 | 802 | else if (!TARGET_64BIT && smode == DImode) |
2bf6d935 | 803 | { |
93cf5515 RB |
804 | if (TARGET_SSE4_1) |
805 | { | |
806 | rtx tmp = gen_rtx_PARALLEL (VOIDmode, | |
807 | gen_rtvec (1, const0_rtx)); | |
808 | emit_insn | |
809 | (gen_rtx_SET | |
810 | (gen_rtx_SUBREG (SImode, scopy, 0), | |
811 | gen_rtx_VEC_SELECT (SImode, | |
812 | gen_rtx_SUBREG (V4SImode, reg, 0), | |
813 | tmp))); | |
814 | ||
815 | tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx)); | |
816 | emit_insn | |
817 | (gen_rtx_SET | |
818 | (gen_rtx_SUBREG (SImode, scopy, 4), | |
819 | gen_rtx_VEC_SELECT (SImode, | |
820 | gen_rtx_SUBREG (V4SImode, reg, 0), | |
821 | tmp))); | |
822 | } | |
823 | else | |
824 | { | |
825 | rtx vcopy = gen_reg_rtx (V2DImode); | |
826 | emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0)); | |
827 | emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), | |
828 | gen_rtx_SUBREG (SImode, vcopy, 0)); | |
829 | emit_move_insn (vcopy, | |
830 | gen_rtx_LSHIFTRT (V2DImode, | |
831 | vcopy, GEN_INT (32))); | |
832 | emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), | |
833 | gen_rtx_SUBREG (SImode, vcopy, 0)); | |
834 | } | |
2bf6d935 ML |
835 | } |
836 | else | |
93cf5515 RB |
837 | emit_move_insn (scopy, reg); |
838 | ||
2bf6d935 ML |
839 | rtx_insn *seq = get_insns (); |
840 | end_sequence (); | |
841 | emit_conversion_insns (seq, insn); | |
842 | ||
843 | if (dump_file) | |
844 | fprintf (dump_file, | |
845 | " Copied r%d to a scalar register r%d for insn %d\n", | |
846 | regno, REGNO (scopy), INSN_UID (insn)); | |
847 | } | |
848 | } | |
849 | ||
850 | for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) | |
851 | if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) | |
852 | { | |
853 | if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref))) | |
854 | { | |
855 | rtx_insn *insn = DF_REF_INSN (ref); | |
856 | ||
857 | rtx def_set = single_set (insn); | |
858 | gcc_assert (def_set); | |
859 | ||
860 | rtx src = SET_SRC (def_set); | |
861 | rtx dst = SET_DEST (def_set); | |
862 | ||
863 | if (!MEM_P (dst) || !REG_P (src)) | |
864 | replace_with_subreg_in_insn (insn, reg, reg); | |
865 | ||
866 | bitmap_clear_bit (conv, INSN_UID (insn)); | |
867 | } | |
868 | } | |
869 | /* Skip debug insns and uninitialized uses. */ | |
870 | else if (DF_REF_CHAIN (ref) | |
871 | && NONDEBUG_INSN_P (DF_REF_INSN (ref))) | |
872 | { | |
873 | gcc_assert (scopy); | |
874 | replace_rtx (DF_REF_INSN (ref), reg, scopy); | |
875 | df_insn_rescan (DF_REF_INSN (ref)); | |
876 | } | |
877 | ||
878 | BITMAP_FREE (conv); | |
879 | } | |
880 | ||
881 | /* Convert operand OP in INSN. We should handle | |
882 | memory operands and uninitialized registers. | |
883 | All other register uses are converted during | |
884 | registers conversion. */ | |
885 | ||
886 | void | |
93cf5515 | 887 | general_scalar_chain::convert_op (rtx *op, rtx_insn *insn) |
2bf6d935 ML |
888 | { |
889 | *op = copy_rtx_if_shared (*op); | |
890 | ||
891 | if (GET_CODE (*op) == NOT) | |
892 | { | |
893 | convert_op (&XEXP (*op, 0), insn); | |
93cf5515 | 894 | PUT_MODE (*op, vmode); |
2bf6d935 ML |
895 | } |
896 | else if (MEM_P (*op)) | |
897 | { | |
93cf5515 | 898 | rtx tmp = gen_reg_rtx (GET_MODE (*op)); |
2bf6d935 ML |
899 | |
900 | emit_insn_before (gen_move_insn (tmp, *op), insn); | |
93cf5515 | 901 | *op = gen_rtx_SUBREG (vmode, tmp, 0); |
2bf6d935 ML |
902 | |
903 | if (dump_file) | |
904 | fprintf (dump_file, " Preloading operand for insn %d into r%d\n", | |
905 | INSN_UID (insn), REGNO (tmp)); | |
906 | } | |
907 | else if (REG_P (*op)) | |
908 | { | |
909 | /* We may have not converted register usage in case | |
910 | this register has no definition. Otherwise it | |
911 | should be converted in convert_reg. */ | |
912 | df_ref ref; | |
913 | FOR_EACH_INSN_USE (ref, insn) | |
914 | if (DF_REF_REGNO (ref) == REGNO (*op)) | |
915 | { | |
916 | gcc_assert (!DF_REF_CHAIN (ref)); | |
917 | break; | |
918 | } | |
93cf5515 | 919 | *op = gen_rtx_SUBREG (vmode, *op, 0); |
2bf6d935 ML |
920 | } |
921 | else if (CONST_INT_P (*op)) | |
922 | { | |
923 | rtx vec_cst; | |
93cf5515 | 924 | rtx tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0); |
2bf6d935 ML |
925 | |
926 | /* Prefer all ones vector in case of -1. */ | |
927 | if (constm1_operand (*op, GET_MODE (*op))) | |
93cf5515 | 928 | vec_cst = CONSTM1_RTX (vmode); |
2bf6d935 | 929 | else |
93cf5515 RB |
930 | { |
931 | unsigned n = GET_MODE_NUNITS (vmode); | |
932 | rtx *v = XALLOCAVEC (rtx, n); | |
933 | v[0] = *op; | |
934 | for (unsigned i = 1; i < n; ++i) | |
935 | v[i] = const0_rtx; | |
936 | vec_cst = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v)); | |
937 | } | |
2bf6d935 | 938 | |
93cf5515 | 939 | if (!standard_sse_constant_p (vec_cst, vmode)) |
2bf6d935 ML |
940 | { |
941 | start_sequence (); | |
93cf5515 | 942 | vec_cst = validize_mem (force_const_mem (vmode, vec_cst)); |
2bf6d935 ML |
943 | rtx_insn *seq = get_insns (); |
944 | end_sequence (); | |
945 | emit_insn_before (seq, insn); | |
946 | } | |
947 | ||
948 | emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn); | |
949 | *op = tmp; | |
950 | } | |
951 | else | |
952 | { | |
953 | gcc_assert (SUBREG_P (*op)); | |
93cf5515 | 954 | gcc_assert (GET_MODE (*op) == vmode); |
2bf6d935 ML |
955 | } |
956 | } | |
957 | ||
958 | /* Convert INSN to vector mode. */ | |
959 | ||
960 | void | |
93cf5515 | 961 | general_scalar_chain::convert_insn (rtx_insn *insn) |
2bf6d935 ML |
962 | { |
963 | rtx def_set = single_set (insn); | |
964 | rtx src = SET_SRC (def_set); | |
965 | rtx dst = SET_DEST (def_set); | |
966 | rtx subreg; | |
967 | ||
968 | if (MEM_P (dst) && !REG_P (src)) | |
969 | { | |
970 | /* There are no scalar integer instructions and therefore | |
971 | temporary register usage is required. */ | |
93cf5515 | 972 | rtx tmp = gen_reg_rtx (smode); |
2bf6d935 | 973 | emit_conversion_insns (gen_move_insn (dst, tmp), insn); |
93cf5515 | 974 | dst = gen_rtx_SUBREG (vmode, tmp, 0); |
2bf6d935 ML |
975 | } |
976 | ||
977 | switch (GET_CODE (src)) | |
978 | { | |
979 | case ASHIFT: | |
980 | case ASHIFTRT: | |
981 | case LSHIFTRT: | |
982 | convert_op (&XEXP (src, 0), insn); | |
93cf5515 | 983 | PUT_MODE (src, vmode); |
2bf6d935 ML |
984 | break; |
985 | ||
986 | case PLUS: | |
987 | case MINUS: | |
988 | case IOR: | |
989 | case XOR: | |
990 | case AND: | |
93cf5515 RB |
991 | case SMAX: |
992 | case SMIN: | |
993 | case UMAX: | |
994 | case UMIN: | |
2bf6d935 ML |
995 | convert_op (&XEXP (src, 0), insn); |
996 | convert_op (&XEXP (src, 1), insn); | |
93cf5515 | 997 | PUT_MODE (src, vmode); |
2bf6d935 ML |
998 | break; |
999 | ||
1000 | case NEG: | |
1001 | src = XEXP (src, 0); | |
1002 | convert_op (&src, insn); | |
93cf5515 RB |
1003 | subreg = gen_reg_rtx (vmode); |
1004 | emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn); | |
1005 | src = gen_rtx_MINUS (vmode, subreg, src); | |
2bf6d935 ML |
1006 | break; |
1007 | ||
1008 | case NOT: | |
1009 | src = XEXP (src, 0); | |
1010 | convert_op (&src, insn); | |
93cf5515 RB |
1011 | subreg = gen_reg_rtx (vmode); |
1012 | emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn); | |
1013 | src = gen_rtx_XOR (vmode, src, subreg); | |
2bf6d935 ML |
1014 | break; |
1015 | ||
1016 | case MEM: | |
1017 | if (!REG_P (dst)) | |
1018 | convert_op (&src, insn); | |
1019 | break; | |
1020 | ||
1021 | case REG: | |
1022 | if (!MEM_P (dst)) | |
1023 | convert_op (&src, insn); | |
1024 | break; | |
1025 | ||
1026 | case SUBREG: | |
93cf5515 | 1027 | gcc_assert (GET_MODE (src) == vmode); |
2bf6d935 ML |
1028 | break; |
1029 | ||
1030 | case COMPARE: | |
1031 | src = SUBREG_REG (XEXP (XEXP (src, 0), 0)); | |
1032 | ||
c839844a UB |
1033 | gcc_assert ((REG_P (src) && GET_MODE (src) == DImode) |
1034 | || (SUBREG_P (src) && GET_MODE (src) == V2DImode)); | |
2bf6d935 ML |
1035 | |
1036 | if (REG_P (src)) | |
c839844a | 1037 | subreg = gen_rtx_SUBREG (V2DImode, src, 0); |
2bf6d935 ML |
1038 | else |
1039 | subreg = copy_rtx_if_shared (src); | |
1040 | emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg), | |
1041 | copy_rtx_if_shared (subreg), | |
1042 | copy_rtx_if_shared (subreg)), | |
1043 | insn); | |
1044 | dst = gen_rtx_REG (CCmode, FLAGS_REG); | |
1045 | src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src), | |
1046 | copy_rtx_if_shared (src)), | |
1047 | UNSPEC_PTEST); | |
1048 | break; | |
1049 | ||
1050 | case CONST_INT: | |
1051 | convert_op (&src, insn); | |
1052 | break; | |
1053 | ||
1054 | default: | |
1055 | gcc_unreachable (); | |
1056 | } | |
1057 | ||
1058 | SET_SRC (def_set) = src; | |
1059 | SET_DEST (def_set) = dst; | |
1060 | ||
1061 | /* Drop possible dead definitions. */ | |
1062 | PATTERN (insn) = def_set; | |
1063 | ||
1064 | INSN_CODE (insn) = -1; | |
93cf5515 RB |
1065 | int patt = recog_memoized (insn); |
1066 | if (patt == -1) | |
1067 | fatal_insn_not_found (insn); | |
2bf6d935 ML |
1068 | df_insn_rescan (insn); |
1069 | } | |
1070 | ||
1071 | /* Fix uses of converted REG in debug insns. */ | |
1072 | ||
1073 | void | |
1074 | timode_scalar_chain::fix_debug_reg_uses (rtx reg) | |
1075 | { | |
1076 | if (!flag_var_tracking) | |
1077 | return; | |
1078 | ||
1079 | df_ref ref, next; | |
1080 | for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next) | |
1081 | { | |
1082 | rtx_insn *insn = DF_REF_INSN (ref); | |
1083 | /* Make sure the next ref is for a different instruction, | |
1084 | so that we're not affected by the rescan. */ | |
1085 | next = DF_REF_NEXT_REG (ref); | |
1086 | while (next && DF_REF_INSN (next) == insn) | |
1087 | next = DF_REF_NEXT_REG (next); | |
1088 | ||
1089 | if (DEBUG_INSN_P (insn)) | |
1090 | { | |
1091 | /* It may be a debug insn with a TImode variable in | |
1092 | register. */ | |
1093 | bool changed = false; | |
1094 | for (; ref != next; ref = DF_REF_NEXT_REG (ref)) | |
1095 | { | |
1096 | rtx *loc = DF_REF_LOC (ref); | |
1097 | if (REG_P (*loc) && GET_MODE (*loc) == V1TImode) | |
1098 | { | |
1099 | *loc = gen_rtx_SUBREG (TImode, *loc, 0); | |
1100 | changed = true; | |
1101 | } | |
1102 | } | |
1103 | if (changed) | |
1104 | df_insn_rescan (insn); | |
1105 | } | |
1106 | } | |
1107 | } | |
1108 | ||
1109 | /* Convert INSN from TImode to V1T1mode. */ | |
1110 | ||
1111 | void | |
1112 | timode_scalar_chain::convert_insn (rtx_insn *insn) | |
1113 | { | |
1114 | rtx def_set = single_set (insn); | |
1115 | rtx src = SET_SRC (def_set); | |
1116 | rtx dst = SET_DEST (def_set); | |
1117 | ||
1118 | switch (GET_CODE (dst)) | |
1119 | { | |
1120 | case REG: | |
1121 | { | |
1122 | rtx tmp = find_reg_equal_equiv_note (insn); | |
1123 | if (tmp) | |
1124 | PUT_MODE (XEXP (tmp, 0), V1TImode); | |
1125 | PUT_MODE (dst, V1TImode); | |
1126 | fix_debug_reg_uses (dst); | |
1127 | } | |
1128 | break; | |
1129 | case MEM: | |
1130 | PUT_MODE (dst, V1TImode); | |
1131 | break; | |
1132 | ||
1133 | default: | |
1134 | gcc_unreachable (); | |
1135 | } | |
1136 | ||
1137 | switch (GET_CODE (src)) | |
1138 | { | |
1139 | case REG: | |
1140 | PUT_MODE (src, V1TImode); | |
1141 | /* Call fix_debug_reg_uses only if SRC is never defined. */ | |
1142 | if (!DF_REG_DEF_CHAIN (REGNO (src))) | |
1143 | fix_debug_reg_uses (src); | |
1144 | break; | |
1145 | ||
1146 | case MEM: | |
1147 | PUT_MODE (src, V1TImode); | |
1148 | break; | |
1149 | ||
1150 | case CONST_WIDE_INT: | |
1151 | if (NONDEBUG_INSN_P (insn)) | |
1152 | { | |
1153 | /* Since there are no instructions to store 128-bit constant, | |
1154 | temporary register usage is required. */ | |
1155 | rtx tmp = gen_reg_rtx (V1TImode); | |
1156 | start_sequence (); | |
1157 | src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src)); | |
1158 | src = validize_mem (force_const_mem (V1TImode, src)); | |
1159 | rtx_insn *seq = get_insns (); | |
1160 | end_sequence (); | |
1161 | if (seq) | |
1162 | emit_insn_before (seq, insn); | |
1163 | emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); | |
1164 | dst = tmp; | |
1165 | } | |
1166 | break; | |
1167 | ||
1168 | case CONST_INT: | |
1169 | switch (standard_sse_constant_p (src, TImode)) | |
1170 | { | |
1171 | case 1: | |
1172 | src = CONST0_RTX (GET_MODE (dst)); | |
1173 | break; | |
1174 | case 2: | |
1175 | src = CONSTM1_RTX (GET_MODE (dst)); | |
1176 | break; | |
1177 | default: | |
1178 | gcc_unreachable (); | |
1179 | } | |
1180 | if (NONDEBUG_INSN_P (insn)) | |
1181 | { | |
1182 | rtx tmp = gen_reg_rtx (V1TImode); | |
1183 | /* Since there are no instructions to store standard SSE | |
1184 | constant, temporary register usage is required. */ | |
1185 | emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); | |
1186 | dst = tmp; | |
1187 | } | |
1188 | break; | |
1189 | ||
1190 | default: | |
1191 | gcc_unreachable (); | |
1192 | } | |
1193 | ||
1194 | SET_SRC (def_set) = src; | |
1195 | SET_DEST (def_set) = dst; | |
1196 | ||
1197 | /* Drop possible dead definitions. */ | |
1198 | PATTERN (insn) = def_set; | |
1199 | ||
1200 | INSN_CODE (insn) = -1; | |
1201 | recog_memoized (insn); | |
1202 | df_insn_rescan (insn); | |
1203 | } | |
1204 | ||
1205 | void | |
93cf5515 | 1206 | general_scalar_chain::convert_registers () |
2bf6d935 ML |
1207 | { |
1208 | bitmap_iterator bi; | |
1209 | unsigned id; | |
1210 | ||
1211 | EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi) | |
1212 | convert_reg (id); | |
1213 | ||
1214 | EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi) | |
1215 | make_vector_copies (id); | |
1216 | } | |
1217 | ||
1218 | /* Convert whole chain creating required register | |
1219 | conversions and copies. */ | |
1220 | ||
1221 | int | |
1222 | scalar_chain::convert () | |
1223 | { | |
1224 | bitmap_iterator bi; | |
1225 | unsigned id; | |
1226 | int converted_insns = 0; | |
1227 | ||
1228 | if (!dbg_cnt (stv_conversion)) | |
1229 | return 0; | |
1230 | ||
1231 | if (dump_file) | |
1232 | fprintf (dump_file, "Converting chain #%d...\n", chain_id); | |
1233 | ||
1234 | convert_registers (); | |
1235 | ||
1236 | EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi) | |
1237 | { | |
1238 | convert_insn (DF_INSN_UID_GET (id)->insn); | |
1239 | converted_insns++; | |
1240 | } | |
1241 | ||
1242 | return converted_insns; | |
1243 | } | |
1244 | ||
1245 | /* Return 1 if INSN uses or defines a hard register. | |
1246 | Hard register uses in a memory address are ignored. | |
1247 | Clobbers and flags definitions are ignored. */ | |
1248 | ||
1249 | static bool | |
1250 | has_non_address_hard_reg (rtx_insn *insn) | |
1251 | { | |
1252 | df_ref ref; | |
1253 | FOR_EACH_INSN_DEF (ref, insn) | |
1254 | if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) | |
1255 | && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) | |
1256 | && DF_REF_REGNO (ref) != FLAGS_REG) | |
1257 | return true; | |
1258 | ||
1259 | FOR_EACH_INSN_USE (ref, insn) | |
1260 | if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref))) | |
1261 | return true; | |
1262 | ||
1263 | return false; | |
1264 | } | |
1265 | ||
1266 | /* Check if comparison INSN may be transformed | |
1267 | into vector comparison. Currently we transform | |
1268 | zero checks only which look like: | |
1269 | ||
1270 | (set (reg:CCZ 17 flags) | |
1271 | (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4) | |
1272 | (subreg:SI (reg:DI x) 0)) | |
1273 | (const_int 0 [0]))) */ | |
1274 | ||
1275 | static bool | |
c839844a | 1276 | convertible_comparison_p (rtx_insn *insn, machine_mode mode) |
2bf6d935 | 1277 | { |
c839844a UB |
1278 | /* ??? Currently convertible for double-word DImode chain only. */ |
1279 | if (TARGET_64BIT || mode != DImode) | |
1280 | return false; | |
1281 | ||
2bf6d935 ML |
1282 | if (!TARGET_SSE4_1) |
1283 | return false; | |
1284 | ||
1285 | rtx def_set = single_set (insn); | |
1286 | ||
1287 | gcc_assert (def_set); | |
1288 | ||
1289 | rtx src = SET_SRC (def_set); | |
1290 | rtx dst = SET_DEST (def_set); | |
1291 | ||
1292 | gcc_assert (GET_CODE (src) == COMPARE); | |
1293 | ||
1294 | if (GET_CODE (dst) != REG | |
1295 | || REGNO (dst) != FLAGS_REG | |
1296 | || GET_MODE (dst) != CCZmode) | |
1297 | return false; | |
1298 | ||
1299 | rtx op1 = XEXP (src, 0); | |
1300 | rtx op2 = XEXP (src, 1); | |
1301 | ||
1302 | if (op2 != CONST0_RTX (GET_MODE (op2))) | |
1303 | return false; | |
1304 | ||
1305 | if (GET_CODE (op1) != IOR) | |
1306 | return false; | |
1307 | ||
1308 | op2 = XEXP (op1, 1); | |
1309 | op1 = XEXP (op1, 0); | |
1310 | ||
1311 | if (!SUBREG_P (op1) | |
1312 | || !SUBREG_P (op2) | |
c839844a UB |
1313 | || GET_MODE (op1) != SImode |
1314 | || GET_MODE (op2) != SImode | |
2bf6d935 | 1315 | || ((SUBREG_BYTE (op1) != 0 |
c839844a | 1316 | || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode)) |
2bf6d935 | 1317 | && (SUBREG_BYTE (op2) != 0 |
c839844a | 1318 | || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode)))) |
2bf6d935 ML |
1319 | return false; |
1320 | ||
1321 | op1 = SUBREG_REG (op1); | |
1322 | op2 = SUBREG_REG (op2); | |
1323 | ||
1324 | if (op1 != op2 | |
1325 | || !REG_P (op1) | |
c839844a | 1326 | || GET_MODE (op1) != DImode) |
2bf6d935 ML |
1327 | return false; |
1328 | ||
1329 | return true; | |
1330 | } | |
1331 | ||
c839844a | 1332 | /* The general version of scalar_to_vector_candidate_p. */ |
2bf6d935 ML |
1333 | |
1334 | static bool | |
93cf5515 | 1335 | general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode) |
2bf6d935 ML |
1336 | { |
1337 | rtx def_set = single_set (insn); | |
1338 | ||
1339 | if (!def_set) | |
1340 | return false; | |
1341 | ||
1342 | if (has_non_address_hard_reg (insn)) | |
1343 | return false; | |
1344 | ||
1345 | rtx src = SET_SRC (def_set); | |
1346 | rtx dst = SET_DEST (def_set); | |
1347 | ||
1348 | if (GET_CODE (src) == COMPARE) | |
93cf5515 | 1349 | return convertible_comparison_p (insn, mode); |
2bf6d935 | 1350 | |
c839844a | 1351 | /* We are interested in "mode" only. */ |
93cf5515 | 1352 | if ((GET_MODE (src) != mode |
2bf6d935 | 1353 | && !CONST_INT_P (src)) |
93cf5515 | 1354 | || GET_MODE (dst) != mode) |
2bf6d935 ML |
1355 | return false; |
1356 | ||
1357 | if (!REG_P (dst) && !MEM_P (dst)) | |
1358 | return false; | |
1359 | ||
1360 | switch (GET_CODE (src)) | |
1361 | { | |
1362 | case ASHIFTRT: | |
1363 | if (!TARGET_AVX512VL) | |
1364 | return false; | |
1365 | /* FALLTHRU */ | |
1366 | ||
1367 | case ASHIFT: | |
1368 | case LSHIFTRT: | |
1369 | if (!CONST_INT_P (XEXP (src, 1)) | |
1370 | || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)) | |
1371 | return false; | |
1372 | break; | |
1373 | ||
93cf5515 RB |
1374 | case SMAX: |
1375 | case SMIN: | |
1376 | case UMAX: | |
1377 | case UMIN: | |
1378 | if ((mode == DImode && !TARGET_AVX512VL) | |
1379 | || (mode == SImode && !TARGET_SSE4_1)) | |
1380 | return false; | |
1381 | /* Fallthru. */ | |
1382 | ||
2bf6d935 ML |
1383 | case PLUS: |
1384 | case MINUS: | |
1385 | case IOR: | |
1386 | case XOR: | |
1387 | case AND: | |
1388 | if (!REG_P (XEXP (src, 1)) | |
1389 | && !MEM_P (XEXP (src, 1)) | |
1390 | && !CONST_INT_P (XEXP (src, 1))) | |
1391 | return false; | |
1392 | ||
93cf5515 | 1393 | if (GET_MODE (XEXP (src, 1)) != mode |
2bf6d935 ML |
1394 | && !CONST_INT_P (XEXP (src, 1))) |
1395 | return false; | |
1396 | break; | |
1397 | ||
1398 | case NEG: | |
1399 | case NOT: | |
1400 | break; | |
1401 | ||
1402 | case REG: | |
1403 | return true; | |
1404 | ||
1405 | case MEM: | |
1406 | case CONST_INT: | |
1407 | return REG_P (dst); | |
1408 | ||
1409 | default: | |
1410 | return false; | |
1411 | } | |
1412 | ||
1413 | if (!REG_P (XEXP (src, 0)) | |
1414 | && !MEM_P (XEXP (src, 0)) | |
1415 | && !CONST_INT_P (XEXP (src, 0)) | |
1416 | /* Check for andnot case. */ | |
1417 | && (GET_CODE (src) != AND | |
1418 | || GET_CODE (XEXP (src, 0)) != NOT | |
1419 | || !REG_P (XEXP (XEXP (src, 0), 0)))) | |
1420 | return false; | |
1421 | ||
93cf5515 | 1422 | if (GET_MODE (XEXP (src, 0)) != mode |
2bf6d935 ML |
1423 | && !CONST_INT_P (XEXP (src, 0))) |
1424 | return false; | |
1425 | ||
1426 | return true; | |
1427 | } | |
1428 | ||
1429 | /* The TImode version of scalar_to_vector_candidate_p. */ | |
1430 | ||
1431 | static bool | |
1432 | timode_scalar_to_vector_candidate_p (rtx_insn *insn) | |
1433 | { | |
1434 | rtx def_set = single_set (insn); | |
1435 | ||
1436 | if (!def_set) | |
1437 | return false; | |
1438 | ||
1439 | if (has_non_address_hard_reg (insn)) | |
1440 | return false; | |
1441 | ||
1442 | rtx src = SET_SRC (def_set); | |
1443 | rtx dst = SET_DEST (def_set); | |
1444 | ||
1445 | /* Only TImode load and store are allowed. */ | |
1446 | if (GET_MODE (dst) != TImode) | |
1447 | return false; | |
1448 | ||
1449 | if (MEM_P (dst)) | |
1450 | { | |
1451 | /* Check for store. Memory must be aligned or unaligned store | |
1452 | is optimal. Only support store from register, standard SSE | |
1453 | constant or CONST_WIDE_INT generated from piecewise store. | |
1454 | ||
1455 | ??? Verify performance impact before enabling CONST_INT for | |
1456 | __int128 store. */ | |
1457 | if (misaligned_operand (dst, TImode) | |
1458 | && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL) | |
1459 | return false; | |
1460 | ||
1461 | switch (GET_CODE (src)) | |
1462 | { | |
1463 | default: | |
1464 | return false; | |
1465 | ||
1466 | case REG: | |
1467 | case CONST_WIDE_INT: | |
1468 | return true; | |
1469 | ||
1470 | case CONST_INT: | |
1471 | return standard_sse_constant_p (src, TImode); | |
1472 | } | |
1473 | } | |
1474 | else if (MEM_P (src)) | |
1475 | { | |
1476 | /* Check for load. Memory must be aligned or unaligned load is | |
1477 | optimal. */ | |
1478 | return (REG_P (dst) | |
1479 | && (!misaligned_operand (src, TImode) | |
1480 | || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)); | |
1481 | } | |
1482 | ||
1483 | return false; | |
1484 | } | |
1485 | ||
93cf5515 RB |
1486 | /* For a given bitmap of insn UIDs scans all instruction and |
1487 | remove insn from CANDIDATES in case it has both convertible | |
1488 | and not convertible definitions. | |
2bf6d935 | 1489 | |
93cf5515 RB |
1490 | All insns in a bitmap are conversion candidates according to |
1491 | scalar_to_vector_candidate_p. Currently it implies all insns | |
1492 | are single_set. */ | |
2bf6d935 ML |
1493 | |
1494 | static void | |
93cf5515 | 1495 | general_remove_non_convertible_regs (bitmap candidates) |
2bf6d935 ML |
1496 | { |
1497 | bitmap_iterator bi; | |
1498 | unsigned id; | |
1499 | bitmap regs = BITMAP_ALLOC (NULL); | |
1500 | ||
1501 | EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) | |
1502 | { | |
1503 | rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); | |
1504 | rtx reg = SET_DEST (def_set); | |
1505 | ||
1506 | if (!REG_P (reg) | |
1507 | || bitmap_bit_p (regs, REGNO (reg)) | |
1508 | || HARD_REGISTER_P (reg)) | |
1509 | continue; | |
1510 | ||
1511 | for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg)); | |
1512 | def; | |
1513 | def = DF_REF_NEXT_REG (def)) | |
1514 | { | |
1515 | if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) | |
1516 | { | |
1517 | if (dump_file) | |
1518 | fprintf (dump_file, | |
1519 | "r%d has non convertible definition in insn %d\n", | |
1520 | REGNO (reg), DF_REF_INSN_UID (def)); | |
1521 | ||
1522 | bitmap_set_bit (regs, REGNO (reg)); | |
1523 | break; | |
1524 | } | |
1525 | } | |
1526 | } | |
1527 | ||
1528 | EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) | |
1529 | { | |
1530 | for (df_ref def = DF_REG_DEF_CHAIN (id); | |
1531 | def; | |
1532 | def = DF_REF_NEXT_REG (def)) | |
1533 | if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) | |
1534 | { | |
1535 | if (dump_file) | |
1536 | fprintf (dump_file, "Removing insn %d from candidates list\n", | |
1537 | DF_REF_INSN_UID (def)); | |
1538 | ||
1539 | bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); | |
1540 | } | |
1541 | } | |
1542 | ||
1543 | BITMAP_FREE (regs); | |
1544 | } | |
1545 | ||
1546 | /* For a register REGNO, scan instructions for its defs and uses. | |
1547 | Put REGNO in REGS if a def or use isn't in CANDIDATES. */ | |
1548 | ||
1549 | static void | |
1550 | timode_check_non_convertible_regs (bitmap candidates, bitmap regs, | |
1551 | unsigned int regno) | |
1552 | { | |
1553 | for (df_ref def = DF_REG_DEF_CHAIN (regno); | |
1554 | def; | |
1555 | def = DF_REF_NEXT_REG (def)) | |
1556 | { | |
1557 | if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) | |
1558 | { | |
1559 | if (dump_file) | |
1560 | fprintf (dump_file, | |
1561 | "r%d has non convertible def in insn %d\n", | |
1562 | regno, DF_REF_INSN_UID (def)); | |
1563 | ||
1564 | bitmap_set_bit (regs, regno); | |
1565 | break; | |
1566 | } | |
1567 | } | |
1568 | ||
1569 | for (df_ref ref = DF_REG_USE_CHAIN (regno); | |
1570 | ref; | |
1571 | ref = DF_REF_NEXT_REG (ref)) | |
1572 | { | |
1573 | /* Debug instructions are skipped. */ | |
1574 | if (NONDEBUG_INSN_P (DF_REF_INSN (ref)) | |
1575 | && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) | |
1576 | { | |
1577 | if (dump_file) | |
1578 | fprintf (dump_file, | |
1579 | "r%d has non convertible use in insn %d\n", | |
1580 | regno, DF_REF_INSN_UID (ref)); | |
1581 | ||
1582 | bitmap_set_bit (regs, regno); | |
1583 | break; | |
1584 | } | |
1585 | } | |
1586 | } | |
1587 | ||
1588 | /* The TImode version of remove_non_convertible_regs. */ | |
1589 | ||
1590 | static void | |
1591 | timode_remove_non_convertible_regs (bitmap candidates) | |
1592 | { | |
1593 | bitmap_iterator bi; | |
1594 | unsigned id; | |
1595 | bitmap regs = BITMAP_ALLOC (NULL); | |
1596 | ||
1597 | EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) | |
1598 | { | |
1599 | rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); | |
1600 | rtx dest = SET_DEST (def_set); | |
1601 | rtx src = SET_SRC (def_set); | |
1602 | ||
1603 | if ((!REG_P (dest) | |
1604 | || bitmap_bit_p (regs, REGNO (dest)) | |
1605 | || HARD_REGISTER_P (dest)) | |
1606 | && (!REG_P (src) | |
1607 | || bitmap_bit_p (regs, REGNO (src)) | |
1608 | || HARD_REGISTER_P (src))) | |
1609 | continue; | |
1610 | ||
1611 | if (REG_P (dest)) | |
1612 | timode_check_non_convertible_regs (candidates, regs, | |
1613 | REGNO (dest)); | |
1614 | ||
1615 | if (REG_P (src)) | |
1616 | timode_check_non_convertible_regs (candidates, regs, | |
1617 | REGNO (src)); | |
1618 | } | |
1619 | ||
1620 | EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) | |
1621 | { | |
1622 | for (df_ref def = DF_REG_DEF_CHAIN (id); | |
1623 | def; | |
1624 | def = DF_REF_NEXT_REG (def)) | |
1625 | if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) | |
1626 | { | |
1627 | if (dump_file) | |
1628 | fprintf (dump_file, "Removing insn %d from candidates list\n", | |
1629 | DF_REF_INSN_UID (def)); | |
1630 | ||
1631 | bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); | |
1632 | } | |
1633 | ||
1634 | for (df_ref ref = DF_REG_USE_CHAIN (id); | |
1635 | ref; | |
1636 | ref = DF_REF_NEXT_REG (ref)) | |
1637 | if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) | |
1638 | { | |
1639 | if (dump_file) | |
1640 | fprintf (dump_file, "Removing insn %d from candidates list\n", | |
1641 | DF_REF_INSN_UID (ref)); | |
1642 | ||
1643 | bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref)); | |
1644 | } | |
1645 | } | |
1646 | ||
1647 | BITMAP_FREE (regs); | |
1648 | } | |
1649 | ||
2bf6d935 ML |
1650 | /* Main STV pass function. Find and convert scalar |
1651 | instructions into vector mode when profitable. */ | |
1652 | ||
1653 | static unsigned int | |
1654 | convert_scalars_to_vector () | |
1655 | { | |
1656 | basic_block bb; | |
2bf6d935 ML |
1657 | int converted_insns = 0; |
1658 | ||
1659 | bitmap_obstack_initialize (NULL); | |
93cf5515 RB |
1660 | const machine_mode cand_mode[3] = { SImode, DImode, TImode }; |
1661 | const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode }; | |
1662 | bitmap_head candidates[3]; /* { SImode, DImode, TImode } */ | |
1663 | for (unsigned i = 0; i < 3; ++i) | |
1664 | bitmap_initialize (&candidates[i], &bitmap_default_obstack); | |
2bf6d935 ML |
1665 | |
1666 | calculate_dominance_info (CDI_DOMINATORS); | |
1667 | df_set_flags (DF_DEFER_INSN_RESCAN); | |
1668 | df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); | |
1669 | df_md_add_problem (); | |
1670 | df_analyze (); | |
1671 | ||
1672 | /* Find all instructions we want to convert into vector mode. */ | |
1673 | if (dump_file) | |
1674 | fprintf (dump_file, "Searching for mode conversion candidates...\n"); | |
1675 | ||
1676 | FOR_EACH_BB_FN (bb, cfun) | |
1677 | { | |
1678 | rtx_insn *insn; | |
1679 | FOR_BB_INSNS (bb, insn) | |
93cf5515 RB |
1680 | if (TARGET_64BIT |
1681 | && timode_scalar_to_vector_candidate_p (insn)) | |
2bf6d935 ML |
1682 | { |
1683 | if (dump_file) | |
93cf5515 | 1684 | fprintf (dump_file, " insn %d is marked as a TImode candidate\n", |
2bf6d935 ML |
1685 | INSN_UID (insn)); |
1686 | ||
93cf5515 RB |
1687 | bitmap_set_bit (&candidates[2], INSN_UID (insn)); |
1688 | } | |
1689 | else | |
1690 | { | |
1691 | /* Check {SI,DI}mode. */ | |
1692 | for (unsigned i = 0; i <= 1; ++i) | |
1693 | if (general_scalar_to_vector_candidate_p (insn, cand_mode[i])) | |
1694 | { | |
1695 | if (dump_file) | |
1696 | fprintf (dump_file, " insn %d is marked as a %s candidate\n", | |
1697 | INSN_UID (insn), i == 0 ? "SImode" : "DImode"); | |
1698 | ||
1699 | bitmap_set_bit (&candidates[i], INSN_UID (insn)); | |
1700 | break; | |
1701 | } | |
2bf6d935 ML |
1702 | } |
1703 | } | |
1704 | ||
93cf5515 RB |
1705 | if (TARGET_64BIT) |
1706 | timode_remove_non_convertible_regs (&candidates[2]); | |
1707 | for (unsigned i = 0; i <= 1; ++i) | |
1708 | general_remove_non_convertible_regs (&candidates[i]); | |
2bf6d935 | 1709 | |
93cf5515 RB |
1710 | for (unsigned i = 0; i <= 2; ++i) |
1711 | if (!bitmap_empty_p (&candidates[i])) | |
1712 | break; | |
1713 | else if (i == 2 && dump_file) | |
2bf6d935 ML |
1714 | fprintf (dump_file, "There are no candidates for optimization.\n"); |
1715 | ||
93cf5515 RB |
1716 | for (unsigned i = 0; i <= 2; ++i) |
1717 | while (!bitmap_empty_p (&candidates[i])) | |
1718 | { | |
1719 | unsigned uid = bitmap_first_set_bit (&candidates[i]); | |
1720 | scalar_chain *chain; | |
2bf6d935 | 1721 | |
93cf5515 RB |
1722 | if (cand_mode[i] == TImode) |
1723 | chain = new timode_scalar_chain; | |
1724 | else | |
1725 | chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]); | |
2bf6d935 | 1726 | |
93cf5515 RB |
1727 | /* Find instructions chain we want to convert to vector mode. |
1728 | Check all uses and definitions to estimate all required | |
1729 | conversions. */ | |
1730 | chain->build (&candidates[i], uid); | |
2bf6d935 | 1731 | |
93cf5515 RB |
1732 | if (chain->compute_convert_gain () > 0) |
1733 | converted_insns += chain->convert (); | |
1734 | else | |
1735 | if (dump_file) | |
1736 | fprintf (dump_file, "Chain #%d conversion is not profitable\n", | |
1737 | chain->chain_id); | |
2bf6d935 | 1738 | |
93cf5515 RB |
1739 | delete chain; |
1740 | } | |
2bf6d935 ML |
1741 | |
1742 | if (dump_file) | |
1743 | fprintf (dump_file, "Total insns converted: %d\n", converted_insns); | |
1744 | ||
93cf5515 RB |
1745 | for (unsigned i = 0; i <= 2; ++i) |
1746 | bitmap_release (&candidates[i]); | |
2bf6d935 ML |
1747 | bitmap_obstack_release (NULL); |
1748 | df_process_deferred_rescans (); | |
1749 | ||
1750 | /* Conversion means we may have 128bit register spills/fills | |
1751 | which require aligned stack. */ | |
1752 | if (converted_insns) | |
1753 | { | |
1754 | if (crtl->stack_alignment_needed < 128) | |
1755 | crtl->stack_alignment_needed = 128; | |
1756 | if (crtl->stack_alignment_estimated < 128) | |
1757 | crtl->stack_alignment_estimated = 128; | |
1758 | /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */ | |
1759 | if (TARGET_64BIT) | |
1760 | for (tree parm = DECL_ARGUMENTS (current_function_decl); | |
1761 | parm; parm = DECL_CHAIN (parm)) | |
1762 | { | |
1763 | if (TYPE_MODE (TREE_TYPE (parm)) != TImode) | |
1764 | continue; | |
1765 | if (DECL_RTL_SET_P (parm) | |
1766 | && GET_MODE (DECL_RTL (parm)) == V1TImode) | |
1767 | { | |
1768 | rtx r = DECL_RTL (parm); | |
1769 | if (REG_P (r)) | |
1770 | SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0)); | |
1771 | } | |
1772 | if (DECL_INCOMING_RTL (parm) | |
1773 | && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode) | |
1774 | { | |
1775 | rtx r = DECL_INCOMING_RTL (parm); | |
1776 | if (REG_P (r)) | |
1777 | DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0); | |
1778 | } | |
1779 | } | |
1780 | } | |
1781 | ||
1782 | return 0; | |
1783 | } | |
1784 | ||
1785 | static unsigned int | |
1786 | rest_of_handle_insert_vzeroupper (void) | |
1787 | { | |
1788 | int i; | |
1789 | ||
1790 | /* vzeroupper instructions are inserted immediately after reload to | |
1791 | account for possible spills from 256bit or 512bit registers. The pass | |
1792 | reuses mode switching infrastructure by re-running mode insertion | |
1793 | pass, so disable entities that have already been processed. */ | |
1794 | for (i = 0; i < MAX_386_ENTITIES; i++) | |
1795 | ix86_optimize_mode_switching[i] = 0; | |
1796 | ||
1797 | ix86_optimize_mode_switching[AVX_U128] = 1; | |
1798 | ||
1799 | /* Call optimize_mode_switching. */ | |
1800 | g->get_passes ()->execute_pass_mode_switching (); | |
1801 | return 0; | |
1802 | } | |
1803 | ||
1804 | namespace { | |
1805 | ||
1806 | const pass_data pass_data_insert_vzeroupper = | |
1807 | { | |
1808 | RTL_PASS, /* type */ | |
1809 | "vzeroupper", /* name */ | |
1810 | OPTGROUP_NONE, /* optinfo_flags */ | |
1811 | TV_MACH_DEP, /* tv_id */ | |
1812 | 0, /* properties_required */ | |
1813 | 0, /* properties_provided */ | |
1814 | 0, /* properties_destroyed */ | |
1815 | 0, /* todo_flags_start */ | |
1816 | TODO_df_finish, /* todo_flags_finish */ | |
1817 | }; | |
1818 | ||
1819 | class pass_insert_vzeroupper : public rtl_opt_pass | |
1820 | { | |
1821 | public: | |
1822 | pass_insert_vzeroupper(gcc::context *ctxt) | |
1823 | : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt) | |
1824 | {} | |
1825 | ||
1826 | /* opt_pass methods: */ | |
1827 | virtual bool gate (function *) | |
1828 | { | |
1829 | return TARGET_AVX | |
1830 | && TARGET_VZEROUPPER && flag_expensive_optimizations | |
1831 | && !optimize_size; | |
1832 | } | |
1833 | ||
1834 | virtual unsigned int execute (function *) | |
1835 | { | |
1836 | return rest_of_handle_insert_vzeroupper (); | |
1837 | } | |
1838 | ||
1839 | }; // class pass_insert_vzeroupper | |
1840 | ||
1841 | const pass_data pass_data_stv = | |
1842 | { | |
1843 | RTL_PASS, /* type */ | |
1844 | "stv", /* name */ | |
1845 | OPTGROUP_NONE, /* optinfo_flags */ | |
1846 | TV_MACH_DEP, /* tv_id */ | |
1847 | 0, /* properties_required */ | |
1848 | 0, /* properties_provided */ | |
1849 | 0, /* properties_destroyed */ | |
1850 | 0, /* todo_flags_start */ | |
1851 | TODO_df_finish, /* todo_flags_finish */ | |
1852 | }; | |
1853 | ||
1854 | class pass_stv : public rtl_opt_pass | |
1855 | { | |
1856 | public: | |
1857 | pass_stv (gcc::context *ctxt) | |
1858 | : rtl_opt_pass (pass_data_stv, ctxt), | |
1859 | timode_p (false) | |
1860 | {} | |
1861 | ||
1862 | /* opt_pass methods: */ | |
1863 | virtual bool gate (function *) | |
1864 | { | |
1865 | return (timode_p == !!TARGET_64BIT | |
1866 | && TARGET_STV && TARGET_SSE2 && optimize > 1); | |
1867 | } | |
1868 | ||
1869 | virtual unsigned int execute (function *) | |
1870 | { | |
1871 | return convert_scalars_to_vector (); | |
1872 | } | |
1873 | ||
1874 | opt_pass *clone () | |
1875 | { | |
1876 | return new pass_stv (m_ctxt); | |
1877 | } | |
1878 | ||
1879 | void set_pass_param (unsigned int n, bool param) | |
1880 | { | |
1881 | gcc_assert (n == 0); | |
1882 | timode_p = param; | |
1883 | } | |
1884 | ||
1885 | private: | |
1886 | bool timode_p; | |
1887 | }; // class pass_stv | |
1888 | ||
1889 | } // anon namespace | |
1890 | ||
1891 | rtl_opt_pass * | |
1892 | make_pass_insert_vzeroupper (gcc::context *ctxt) | |
1893 | { | |
1894 | return new pass_insert_vzeroupper (ctxt); | |
1895 | } | |
1896 | ||
1897 | rtl_opt_pass * | |
1898 | make_pass_stv (gcc::context *ctxt) | |
1899 | { | |
1900 | return new pass_stv (ctxt); | |
1901 | } | |
1902 | ||
1903 | /* Inserting ENDBRANCH instructions. */ | |
1904 | ||
1905 | static unsigned int | |
1906 | rest_of_insert_endbranch (void) | |
1907 | { | |
1908 | timevar_push (TV_MACH_DEP); | |
1909 | ||
1910 | rtx cet_eb; | |
1911 | rtx_insn *insn; | |
1912 | basic_block bb; | |
1913 | ||
1914 | /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is | |
1915 | absent among function attributes. Later an optimization will be | |
1916 | introduced to make analysis if an address of a static function is | |
1917 | taken. A static function whose address is not taken will get a | |
1918 | nocf_check attribute. This will allow to reduce the number of EB. */ | |
1919 | ||
1920 | if (!lookup_attribute ("nocf_check", | |
1921 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) | |
1922 | && (!flag_manual_endbr | |
1923 | || lookup_attribute ("cf_check", | |
1924 | DECL_ATTRIBUTES (cfun->decl))) | |
1925 | && !cgraph_node::get (cfun->decl)->only_called_directly_p ()) | |
1926 | { | |
1927 | /* Queue ENDBR insertion to x86_function_profiler. */ | |
1928 | if (crtl->profile && flag_fentry) | |
1929 | cfun->machine->endbr_queued_at_entrance = true; | |
1930 | else | |
1931 | { | |
1932 | cet_eb = gen_nop_endbr (); | |
1933 | ||
1934 | bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; | |
1935 | insn = BB_HEAD (bb); | |
1936 | emit_insn_before (cet_eb, insn); | |
1937 | } | |
1938 | } | |
1939 | ||
1940 | bb = 0; | |
1941 | FOR_EACH_BB_FN (bb, cfun) | |
1942 | { | |
1943 | for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); | |
1944 | insn = NEXT_INSN (insn)) | |
1945 | { | |
1946 | if (CALL_P (insn)) | |
1947 | { | |
1948 | bool need_endbr; | |
1949 | need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL; | |
1950 | if (!need_endbr && !SIBLING_CALL_P (insn)) | |
1951 | { | |
1952 | rtx call = get_call_rtx_from (insn); | |
1953 | rtx fnaddr = XEXP (call, 0); | |
1954 | tree fndecl = NULL_TREE; | |
1955 | ||
1956 | /* Also generate ENDBRANCH for non-tail call which | |
1957 | may return via indirect branch. */ | |
1958 | if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) | |
1959 | fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0)); | |
1960 | if (fndecl == NULL_TREE) | |
1961 | fndecl = MEM_EXPR (fnaddr); | |
1962 | if (fndecl | |
1963 | && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE | |
1964 | && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE) | |
1965 | fndecl = NULL_TREE; | |
1966 | if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl))) | |
1967 | { | |
1968 | tree fntype = TREE_TYPE (fndecl); | |
1969 | if (lookup_attribute ("indirect_return", | |
1970 | TYPE_ATTRIBUTES (fntype))) | |
1971 | need_endbr = true; | |
1972 | } | |
1973 | } | |
1974 | if (!need_endbr) | |
1975 | continue; | |
1976 | /* Generate ENDBRANCH after CALL, which can return more than | |
1977 | twice, setjmp-like functions. */ | |
1978 | ||
1979 | cet_eb = gen_nop_endbr (); | |
1980 | emit_insn_after_setloc (cet_eb, insn, INSN_LOCATION (insn)); | |
1981 | continue; | |
1982 | } | |
1983 | ||
1984 | if (JUMP_P (insn) && flag_cet_switch) | |
1985 | { | |
1986 | rtx target = JUMP_LABEL (insn); | |
1987 | if (target == NULL_RTX || ANY_RETURN_P (target)) | |
1988 | continue; | |
1989 | ||
1990 | /* Check the jump is a switch table. */ | |
1991 | rtx_insn *label = as_a<rtx_insn *> (target); | |
1992 | rtx_insn *table = next_insn (label); | |
1993 | if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table)) | |
1994 | continue; | |
1995 | ||
1996 | /* For the indirect jump find out all places it jumps and insert | |
1997 | ENDBRANCH there. It should be done under a special flag to | |
1998 | control ENDBRANCH generation for switch stmts. */ | |
1999 | edge_iterator ei; | |
2000 | edge e; | |
2001 | basic_block dest_blk; | |
2002 | ||
2003 | FOR_EACH_EDGE (e, ei, bb->succs) | |
2004 | { | |
2005 | rtx_insn *insn; | |
2006 | ||
2007 | dest_blk = e->dest; | |
2008 | insn = BB_HEAD (dest_blk); | |
2009 | gcc_assert (LABEL_P (insn)); | |
2010 | cet_eb = gen_nop_endbr (); | |
2011 | emit_insn_after (cet_eb, insn); | |
2012 | } | |
2013 | continue; | |
2014 | } | |
2015 | ||
02ed9049 | 2016 | if (LABEL_P (insn) && LABEL_PRESERVE_P (insn)) |
2bf6d935 ML |
2017 | { |
2018 | cet_eb = gen_nop_endbr (); | |
2019 | emit_insn_after (cet_eb, insn); | |
2020 | continue; | |
2021 | } | |
2022 | } | |
2023 | } | |
2024 | ||
2025 | timevar_pop (TV_MACH_DEP); | |
2026 | return 0; | |
2027 | } | |
2028 | ||
2029 | namespace { | |
2030 | ||
2031 | const pass_data pass_data_insert_endbranch = | |
2032 | { | |
2033 | RTL_PASS, /* type. */ | |
2034 | "cet", /* name. */ | |
2035 | OPTGROUP_NONE, /* optinfo_flags. */ | |
2036 | TV_MACH_DEP, /* tv_id. */ | |
2037 | 0, /* properties_required. */ | |
2038 | 0, /* properties_provided. */ | |
2039 | 0, /* properties_destroyed. */ | |
2040 | 0, /* todo_flags_start. */ | |
2041 | 0, /* todo_flags_finish. */ | |
2042 | }; | |
2043 | ||
2044 | class pass_insert_endbranch : public rtl_opt_pass | |
2045 | { | |
2046 | public: | |
2047 | pass_insert_endbranch (gcc::context *ctxt) | |
2048 | : rtl_opt_pass (pass_data_insert_endbranch, ctxt) | |
2049 | {} | |
2050 | ||
2051 | /* opt_pass methods: */ | |
2052 | virtual bool gate (function *) | |
2053 | { | |
2054 | return ((flag_cf_protection & CF_BRANCH)); | |
2055 | } | |
2056 | ||
2057 | virtual unsigned int execute (function *) | |
2058 | { | |
2059 | return rest_of_insert_endbranch (); | |
2060 | } | |
2061 | ||
2062 | }; // class pass_insert_endbranch | |
2063 | ||
2064 | } // anon namespace | |
2065 | ||
2066 | rtl_opt_pass * | |
2067 | make_pass_insert_endbranch (gcc::context *ctxt) | |
2068 | { | |
2069 | return new pass_insert_endbranch (ctxt); | |
2070 | } | |
2071 | ||
2072 | /* At entry of the nearest common dominator for basic blocks with | |
2073 | conversions, generate a single | |
2074 | vxorps %xmmN, %xmmN, %xmmN | |
2075 | for all | |
2076 | vcvtss2sd op, %xmmN, %xmmX | |
2077 | vcvtsd2ss op, %xmmN, %xmmX | |
2078 | vcvtsi2ss op, %xmmN, %xmmX | |
2079 | vcvtsi2sd op, %xmmN, %xmmX | |
2080 | ||
2081 | NB: We want to generate only a single vxorps to cover the whole | |
2082 | function. The LCM algorithm isn't appropriate here since it may | |
2083 | place a vxorps inside the loop. */ | |
2084 | ||
2085 | static unsigned int | |
2086 | remove_partial_avx_dependency (void) | |
2087 | { | |
2088 | timevar_push (TV_MACH_DEP); | |
2089 | ||
2090 | bitmap_obstack_initialize (NULL); | |
2091 | bitmap convert_bbs = BITMAP_ALLOC (NULL); | |
2092 | ||
2093 | basic_block bb; | |
2094 | rtx_insn *insn, *set_insn; | |
2095 | rtx set; | |
2096 | rtx v4sf_const0 = NULL_RTX; | |
2097 | ||
2098 | auto_vec<rtx_insn *> control_flow_insns; | |
2099 | ||
2100 | FOR_EACH_BB_FN (bb, cfun) | |
2101 | { | |
2102 | FOR_BB_INSNS (bb, insn) | |
2103 | { | |
2104 | if (!NONDEBUG_INSN_P (insn)) | |
2105 | continue; | |
2106 | ||
2107 | set = single_set (insn); | |
2108 | if (!set) | |
2109 | continue; | |
2110 | ||
2111 | if (get_attr_avx_partial_xmm_update (insn) | |
2112 | != AVX_PARTIAL_XMM_UPDATE_TRUE) | |
2113 | continue; | |
2114 | ||
2115 | if (!v4sf_const0) | |
2116 | { | |
2117 | calculate_dominance_info (CDI_DOMINATORS); | |
2118 | df_set_flags (DF_DEFER_INSN_RESCAN); | |
2119 | df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); | |
2120 | df_md_add_problem (); | |
2121 | df_analyze (); | |
2122 | v4sf_const0 = gen_reg_rtx (V4SFmode); | |
2123 | } | |
2124 | ||
2125 | /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, | |
2126 | SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and | |
2127 | vec_merge with subreg. */ | |
2128 | rtx src = SET_SRC (set); | |
2129 | rtx dest = SET_DEST (set); | |
2130 | machine_mode dest_mode = GET_MODE (dest); | |
2131 | ||
2132 | rtx zero; | |
2133 | machine_mode dest_vecmode; | |
2134 | if (dest_mode == E_SFmode) | |
2135 | { | |
2136 | dest_vecmode = V4SFmode; | |
2137 | zero = v4sf_const0; | |
2138 | } | |
2139 | else | |
2140 | { | |
2141 | dest_vecmode = V2DFmode; | |
2142 | zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0); | |
2143 | } | |
2144 | ||
2145 | /* Change source to vector mode. */ | |
2146 | src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src); | |
2147 | src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero, | |
2148 | GEN_INT (HOST_WIDE_INT_1U)); | |
2149 | /* Change destination to vector mode. */ | |
2150 | rtx vec = gen_reg_rtx (dest_vecmode); | |
2151 | /* Generate an XMM vector SET. */ | |
2152 | set = gen_rtx_SET (vec, src); | |
2153 | set_insn = emit_insn_before (set, insn); | |
2154 | df_insn_rescan (set_insn); | |
2155 | ||
2156 | if (cfun->can_throw_non_call_exceptions) | |
2157 | { | |
2158 | /* Handle REG_EH_REGION note. */ | |
2159 | rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX); | |
2160 | if (note) | |
2161 | { | |
2162 | control_flow_insns.safe_push (set_insn); | |
2163 | add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0)); | |
2164 | } | |
2165 | } | |
2166 | ||
2167 | src = gen_rtx_SUBREG (dest_mode, vec, 0); | |
2168 | set = gen_rtx_SET (dest, src); | |
2169 | ||
2170 | /* Drop possible dead definitions. */ | |
2171 | PATTERN (insn) = set; | |
2172 | ||
2173 | INSN_CODE (insn) = -1; | |
2174 | recog_memoized (insn); | |
2175 | df_insn_rescan (insn); | |
2176 | bitmap_set_bit (convert_bbs, bb->index); | |
2177 | } | |
2178 | } | |
2179 | ||
2180 | if (v4sf_const0) | |
2181 | { | |
2182 | /* (Re-)discover loops so that bb->loop_father can be used in the | |
2183 | analysis below. */ | |
2184 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); | |
2185 | ||
2186 | /* Generate a vxorps at entry of the nearest dominator for basic | |
2187 | blocks with conversions, which is in the the fake loop that | |
2188 | contains the whole function, so that there is only a single | |
2189 | vxorps in the whole function. */ | |
2190 | bb = nearest_common_dominator_for_set (CDI_DOMINATORS, | |
2191 | convert_bbs); | |
2192 | while (bb->loop_father->latch | |
2193 | != EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
2194 | bb = get_immediate_dominator (CDI_DOMINATORS, | |
2195 | bb->loop_father->header); | |
2196 | ||
2197 | set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode)); | |
2198 | ||
2199 | insn = BB_HEAD (bb); | |
2200 | while (insn && !NONDEBUG_INSN_P (insn)) | |
2201 | { | |
2202 | if (insn == BB_END (bb)) | |
2203 | { | |
2204 | insn = NULL; | |
2205 | break; | |
2206 | } | |
2207 | insn = NEXT_INSN (insn); | |
2208 | } | |
2209 | if (insn == BB_HEAD (bb)) | |
2210 | set_insn = emit_insn_before (set, insn); | |
2211 | else | |
2212 | set_insn = emit_insn_after (set, | |
2213 | insn ? PREV_INSN (insn) : BB_END (bb)); | |
2214 | df_insn_rescan (set_insn); | |
2215 | df_process_deferred_rescans (); | |
2216 | loop_optimizer_finalize (); | |
2217 | ||
2218 | if (!control_flow_insns.is_empty ()) | |
2219 | { | |
2220 | free_dominance_info (CDI_DOMINATORS); | |
2221 | ||
2222 | unsigned int i; | |
2223 | FOR_EACH_VEC_ELT (control_flow_insns, i, insn) | |
2224 | if (control_flow_insn_p (insn)) | |
2225 | { | |
2226 | /* Split the block after insn. There will be a fallthru | |
2227 | edge, which is OK so we keep it. We have to create | |
2228 | the exception edges ourselves. */ | |
2229 | bb = BLOCK_FOR_INSN (insn); | |
2230 | split_block (bb, insn); | |
2231 | rtl_make_eh_edge (NULL, bb, BB_END (bb)); | |
2232 | } | |
2233 | } | |
2234 | } | |
2235 | ||
2236 | bitmap_obstack_release (NULL); | |
2237 | BITMAP_FREE (convert_bbs); | |
2238 | ||
2239 | timevar_pop (TV_MACH_DEP); | |
2240 | return 0; | |
2241 | } | |
2242 | ||
2243 | namespace { | |
2244 | ||
2245 | const pass_data pass_data_remove_partial_avx_dependency = | |
2246 | { | |
2247 | RTL_PASS, /* type */ | |
2248 | "rpad", /* name */ | |
2249 | OPTGROUP_NONE, /* optinfo_flags */ | |
2250 | TV_MACH_DEP, /* tv_id */ | |
2251 | 0, /* properties_required */ | |
2252 | 0, /* properties_provided */ | |
2253 | 0, /* properties_destroyed */ | |
2254 | 0, /* todo_flags_start */ | |
2255 | TODO_df_finish, /* todo_flags_finish */ | |
2256 | }; | |
2257 | ||
2258 | class pass_remove_partial_avx_dependency : public rtl_opt_pass | |
2259 | { | |
2260 | public: | |
2261 | pass_remove_partial_avx_dependency (gcc::context *ctxt) | |
2262 | : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt) | |
2263 | {} | |
2264 | ||
2265 | /* opt_pass methods: */ | |
2266 | virtual bool gate (function *) | |
2267 | { | |
2268 | return (TARGET_AVX | |
2269 | && TARGET_SSE_PARTIAL_REG_DEPENDENCY | |
2270 | && TARGET_SSE_MATH | |
2271 | && optimize | |
2272 | && optimize_function_for_speed_p (cfun)); | |
2273 | } | |
2274 | ||
2275 | virtual unsigned int execute (function *) | |
2276 | { | |
2277 | return remove_partial_avx_dependency (); | |
2278 | } | |
2279 | }; // class pass_rpad | |
2280 | ||
2281 | } // anon namespace | |
2282 | ||
2283 | rtl_opt_pass * | |
2284 | make_pass_remove_partial_avx_dependency (gcc::context *ctxt) | |
2285 | { | |
2286 | return new pass_remove_partial_avx_dependency (ctxt); | |
2287 | } | |
2288 | ||
2289 | /* This compares the priority of target features in function DECL1 | |
2290 | and DECL2. It returns positive value if DECL1 is higher priority, | |
2291 | negative value if DECL2 is higher priority and 0 if they are the | |
2292 | same. */ | |
2293 | ||
2294 | int | |
2295 | ix86_compare_version_priority (tree decl1, tree decl2) | |
2296 | { | |
2297 | unsigned int priority1 = get_builtin_code_for_version (decl1, NULL); | |
2298 | unsigned int priority2 = get_builtin_code_for_version (decl2, NULL); | |
2299 | ||
2300 | return (int)priority1 - (int)priority2; | |
2301 | } | |
2302 | ||
2303 | /* V1 and V2 point to function versions with different priorities | |
2304 | based on the target ISA. This function compares their priorities. */ | |
2305 | ||
2306 | static int | |
2307 | feature_compare (const void *v1, const void *v2) | |
2308 | { | |
2309 | typedef struct _function_version_info | |
2310 | { | |
2311 | tree version_decl; | |
2312 | tree predicate_chain; | |
2313 | unsigned int dispatch_priority; | |
2314 | } function_version_info; | |
2315 | ||
2316 | const function_version_info c1 = *(const function_version_info *)v1; | |
2317 | const function_version_info c2 = *(const function_version_info *)v2; | |
2318 | return (c2.dispatch_priority - c1.dispatch_priority); | |
2319 | } | |
2320 | ||
2321 | /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL | |
2322 | to return a pointer to VERSION_DECL if the outcome of the expression | |
2323 | formed by PREDICATE_CHAIN is true. This function will be called during | |
2324 | version dispatch to decide which function version to execute. It returns | |
2325 | the basic block at the end, to which more conditions can be added. */ | |
2326 | ||
2327 | static basic_block | |
2328 | add_condition_to_bb (tree function_decl, tree version_decl, | |
2329 | tree predicate_chain, basic_block new_bb) | |
2330 | { | |
2331 | gimple *return_stmt; | |
2332 | tree convert_expr, result_var; | |
2333 | gimple *convert_stmt; | |
2334 | gimple *call_cond_stmt; | |
2335 | gimple *if_else_stmt; | |
2336 | ||
2337 | basic_block bb1, bb2, bb3; | |
2338 | edge e12, e23; | |
2339 | ||
2340 | tree cond_var, and_expr_var = NULL_TREE; | |
2341 | gimple_seq gseq; | |
2342 | ||
2343 | tree predicate_decl, predicate_arg; | |
2344 | ||
2345 | push_cfun (DECL_STRUCT_FUNCTION (function_decl)); | |
2346 | ||
2347 | gcc_assert (new_bb != NULL); | |
2348 | gseq = bb_seq (new_bb); | |
2349 | ||
2350 | ||
2351 | convert_expr = build1 (CONVERT_EXPR, ptr_type_node, | |
2352 | build_fold_addr_expr (version_decl)); | |
2353 | result_var = create_tmp_var (ptr_type_node); | |
2354 | convert_stmt = gimple_build_assign (result_var, convert_expr); | |
2355 | return_stmt = gimple_build_return (result_var); | |
2356 | ||
2357 | if (predicate_chain == NULL_TREE) | |
2358 | { | |
2359 | gimple_seq_add_stmt (&gseq, convert_stmt); | |
2360 | gimple_seq_add_stmt (&gseq, return_stmt); | |
2361 | set_bb_seq (new_bb, gseq); | |
2362 | gimple_set_bb (convert_stmt, new_bb); | |
2363 | gimple_set_bb (return_stmt, new_bb); | |
2364 | pop_cfun (); | |
2365 | return new_bb; | |
2366 | } | |
2367 | ||
2368 | while (predicate_chain != NULL) | |
2369 | { | |
2370 | cond_var = create_tmp_var (integer_type_node); | |
2371 | predicate_decl = TREE_PURPOSE (predicate_chain); | |
2372 | predicate_arg = TREE_VALUE (predicate_chain); | |
2373 | call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg); | |
2374 | gimple_call_set_lhs (call_cond_stmt, cond_var); | |
2375 | ||
2376 | gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl)); | |
2377 | gimple_set_bb (call_cond_stmt, new_bb); | |
2378 | gimple_seq_add_stmt (&gseq, call_cond_stmt); | |
2379 | ||
2380 | predicate_chain = TREE_CHAIN (predicate_chain); | |
2381 | ||
2382 | if (and_expr_var == NULL) | |
2383 | and_expr_var = cond_var; | |
2384 | else | |
2385 | { | |
2386 | gimple *assign_stmt; | |
2387 | /* Use MIN_EXPR to check if any integer is zero?. | |
2388 | and_expr_var = min_expr <cond_var, and_expr_var> */ | |
2389 | assign_stmt = gimple_build_assign (and_expr_var, | |
2390 | build2 (MIN_EXPR, integer_type_node, | |
2391 | cond_var, and_expr_var)); | |
2392 | ||
2393 | gimple_set_block (assign_stmt, DECL_INITIAL (function_decl)); | |
2394 | gimple_set_bb (assign_stmt, new_bb); | |
2395 | gimple_seq_add_stmt (&gseq, assign_stmt); | |
2396 | } | |
2397 | } | |
2398 | ||
2399 | if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var, | |
2400 | integer_zero_node, | |
2401 | NULL_TREE, NULL_TREE); | |
2402 | gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl)); | |
2403 | gimple_set_bb (if_else_stmt, new_bb); | |
2404 | gimple_seq_add_stmt (&gseq, if_else_stmt); | |
2405 | ||
2406 | gimple_seq_add_stmt (&gseq, convert_stmt); | |
2407 | gimple_seq_add_stmt (&gseq, return_stmt); | |
2408 | set_bb_seq (new_bb, gseq); | |
2409 | ||
2410 | bb1 = new_bb; | |
2411 | e12 = split_block (bb1, if_else_stmt); | |
2412 | bb2 = e12->dest; | |
2413 | e12->flags &= ~EDGE_FALLTHRU; | |
2414 | e12->flags |= EDGE_TRUE_VALUE; | |
2415 | ||
2416 | e23 = split_block (bb2, return_stmt); | |
2417 | ||
2418 | gimple_set_bb (convert_stmt, bb2); | |
2419 | gimple_set_bb (return_stmt, bb2); | |
2420 | ||
2421 | bb3 = e23->dest; | |
2422 | make_edge (bb1, bb3, EDGE_FALSE_VALUE); | |
2423 | ||
2424 | remove_edge (e23); | |
2425 | make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); | |
2426 | ||
2427 | pop_cfun (); | |
2428 | ||
2429 | return bb3; | |
2430 | } | |
2431 | ||
2432 | /* This function generates the dispatch function for | |
2433 | multi-versioned functions. DISPATCH_DECL is the function which will | |
2434 | contain the dispatch logic. FNDECLS are the function choices for | |
2435 | dispatch, and is a tree chain. EMPTY_BB is the basic block pointer | |
2436 | in DISPATCH_DECL in which the dispatch code is generated. */ | |
2437 | ||
2438 | static int | |
2439 | dispatch_function_versions (tree dispatch_decl, | |
2440 | void *fndecls_p, | |
2441 | basic_block *empty_bb) | |
2442 | { | |
2443 | tree default_decl; | |
2444 | gimple *ifunc_cpu_init_stmt; | |
2445 | gimple_seq gseq; | |
2446 | int ix; | |
2447 | tree ele; | |
2448 | vec<tree> *fndecls; | |
2449 | unsigned int num_versions = 0; | |
2450 | unsigned int actual_versions = 0; | |
2451 | unsigned int i; | |
2452 | ||
2453 | struct _function_version_info | |
2454 | { | |
2455 | tree version_decl; | |
2456 | tree predicate_chain; | |
2457 | unsigned int dispatch_priority; | |
2458 | }*function_version_info; | |
2459 | ||
2460 | gcc_assert (dispatch_decl != NULL | |
2461 | && fndecls_p != NULL | |
2462 | && empty_bb != NULL); | |
2463 | ||
2464 | /*fndecls_p is actually a vector. */ | |
2465 | fndecls = static_cast<vec<tree> *> (fndecls_p); | |
2466 | ||
2467 | /* At least one more version other than the default. */ | |
2468 | num_versions = fndecls->length (); | |
2469 | gcc_assert (num_versions >= 2); | |
2470 | ||
2471 | function_version_info = (struct _function_version_info *) | |
2472 | XNEWVEC (struct _function_version_info, (num_versions - 1)); | |
2473 | ||
2474 | /* The first version in the vector is the default decl. */ | |
2475 | default_decl = (*fndecls)[0]; | |
2476 | ||
2477 | push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl)); | |
2478 | ||
2479 | gseq = bb_seq (*empty_bb); | |
2480 | /* Function version dispatch is via IFUNC. IFUNC resolvers fire before | |
2481 | constructors, so explicity call __builtin_cpu_init here. */ | |
2482 | ifunc_cpu_init_stmt | |
2483 | = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL); | |
2484 | gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt); | |
2485 | gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb); | |
2486 | set_bb_seq (*empty_bb, gseq); | |
2487 | ||
2488 | pop_cfun (); | |
2489 | ||
2490 | ||
2491 | for (ix = 1; fndecls->iterate (ix, &ele); ++ix) | |
2492 | { | |
2493 | tree version_decl = ele; | |
2494 | tree predicate_chain = NULL_TREE; | |
2495 | unsigned int priority; | |
2496 | /* Get attribute string, parse it and find the right predicate decl. | |
2497 | The predicate function could be a lengthy combination of many | |
2498 | features, like arch-type and various isa-variants. */ | |
2499 | priority = get_builtin_code_for_version (version_decl, | |
2500 | &predicate_chain); | |
2501 | ||
2502 | if (predicate_chain == NULL_TREE) | |
2503 | continue; | |
2504 | ||
2505 | function_version_info [actual_versions].version_decl = version_decl; | |
2506 | function_version_info [actual_versions].predicate_chain | |
2507 | = predicate_chain; | |
2508 | function_version_info [actual_versions].dispatch_priority = priority; | |
2509 | actual_versions++; | |
2510 | } | |
2511 | ||
2512 | /* Sort the versions according to descending order of dispatch priority. The | |
2513 | priority is based on the ISA. This is not a perfect solution. There | |
2514 | could still be ambiguity. If more than one function version is suitable | |
2515 | to execute, which one should be dispatched? In future, allow the user | |
2516 | to specify a dispatch priority next to the version. */ | |
2517 | qsort (function_version_info, actual_versions, | |
2518 | sizeof (struct _function_version_info), feature_compare); | |
2519 | ||
2520 | for (i = 0; i < actual_versions; ++i) | |
2521 | *empty_bb = add_condition_to_bb (dispatch_decl, | |
2522 | function_version_info[i].version_decl, | |
2523 | function_version_info[i].predicate_chain, | |
2524 | *empty_bb); | |
2525 | ||
2526 | /* dispatch default version at the end. */ | |
2527 | *empty_bb = add_condition_to_bb (dispatch_decl, default_decl, | |
2528 | NULL, *empty_bb); | |
2529 | ||
2530 | free (function_version_info); | |
2531 | return 0; | |
2532 | } | |
2533 | ||
2534 | /* This function changes the assembler name for functions that are | |
2535 | versions. If DECL is a function version and has a "target" | |
2536 | attribute, it appends the attribute string to its assembler name. */ | |
2537 | ||
2538 | static tree | |
2539 | ix86_mangle_function_version_assembler_name (tree decl, tree id) | |
2540 | { | |
2541 | tree version_attr; | |
2542 | const char *orig_name, *version_string; | |
2543 | char *attr_str, *assembler_name; | |
2544 | ||
2545 | if (DECL_DECLARED_INLINE_P (decl) | |
2546 | && lookup_attribute ("gnu_inline", | |
2547 | DECL_ATTRIBUTES (decl))) | |
2548 | error_at (DECL_SOURCE_LOCATION (decl), | |
a9c697b8 | 2549 | "function versions cannot be marked as %<gnu_inline%>," |
2bf6d935 ML |
2550 | " bodies have to be generated"); |
2551 | ||
2552 | if (DECL_VIRTUAL_P (decl) | |
2553 | || DECL_VINDEX (decl)) | |
2554 | sorry ("virtual function multiversioning not supported"); | |
2555 | ||
2556 | version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); | |
2557 | ||
2558 | /* target attribute string cannot be NULL. */ | |
2559 | gcc_assert (version_attr != NULL_TREE); | |
2560 | ||
2561 | orig_name = IDENTIFIER_POINTER (id); | |
2562 | version_string | |
2563 | = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr))); | |
2564 | ||
2565 | if (strcmp (version_string, "default") == 0) | |
2566 | return id; | |
2567 | ||
2568 | attr_str = sorted_attr_string (TREE_VALUE (version_attr)); | |
2569 | assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2); | |
2570 | ||
2571 | sprintf (assembler_name, "%s.%s", orig_name, attr_str); | |
2572 | ||
2573 | /* Allow assembler name to be modified if already set. */ | |
2574 | if (DECL_ASSEMBLER_NAME_SET_P (decl)) | |
2575 | SET_DECL_RTL (decl, NULL); | |
2576 | ||
2577 | tree ret = get_identifier (assembler_name); | |
2578 | XDELETEVEC (attr_str); | |
2579 | XDELETEVEC (assembler_name); | |
2580 | return ret; | |
2581 | } | |
2582 | ||
2583 | tree | |
2584 | ix86_mangle_decl_assembler_name (tree decl, tree id) | |
2585 | { | |
2586 | /* For function version, add the target suffix to the assembler name. */ | |
2587 | if (TREE_CODE (decl) == FUNCTION_DECL | |
2588 | && DECL_FUNCTION_VERSIONED (decl)) | |
2589 | id = ix86_mangle_function_version_assembler_name (decl, id); | |
2590 | #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME | |
2591 | id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id); | |
2592 | #endif | |
2593 | ||
2594 | return id; | |
2595 | } | |
2596 | ||
2597 | /* Make a dispatcher declaration for the multi-versioned function DECL. | |
2598 | Calls to DECL function will be replaced with calls to the dispatcher | |
2599 | by the front-end. Returns the decl of the dispatcher function. */ | |
2600 | ||
2601 | tree | |
2602 | ix86_get_function_versions_dispatcher (void *decl) | |
2603 | { | |
2604 | tree fn = (tree) decl; | |
2605 | struct cgraph_node *node = NULL; | |
2606 | struct cgraph_node *default_node = NULL; | |
2607 | struct cgraph_function_version_info *node_v = NULL; | |
2608 | struct cgraph_function_version_info *first_v = NULL; | |
2609 | ||
2610 | tree dispatch_decl = NULL; | |
2611 | ||
2612 | struct cgraph_function_version_info *default_version_info = NULL; | |
2613 | ||
2614 | gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); | |
2615 | ||
2616 | node = cgraph_node::get (fn); | |
2617 | gcc_assert (node != NULL); | |
2618 | ||
2619 | node_v = node->function_version (); | |
2620 | gcc_assert (node_v != NULL); | |
2621 | ||
2622 | if (node_v->dispatcher_resolver != NULL) | |
2623 | return node_v->dispatcher_resolver; | |
2624 | ||
2625 | /* Find the default version and make it the first node. */ | |
2626 | first_v = node_v; | |
2627 | /* Go to the beginning of the chain. */ | |
2628 | while (first_v->prev != NULL) | |
2629 | first_v = first_v->prev; | |
2630 | default_version_info = first_v; | |
2631 | while (default_version_info != NULL) | |
2632 | { | |
2633 | if (is_function_default_version | |
2634 | (default_version_info->this_node->decl)) | |
2635 | break; | |
2636 | default_version_info = default_version_info->next; | |
2637 | } | |
2638 | ||
2639 | /* If there is no default node, just return NULL. */ | |
2640 | if (default_version_info == NULL) | |
2641 | return NULL; | |
2642 | ||
2643 | /* Make default info the first node. */ | |
2644 | if (first_v != default_version_info) | |
2645 | { | |
2646 | default_version_info->prev->next = default_version_info->next; | |
2647 | if (default_version_info->next) | |
2648 | default_version_info->next->prev = default_version_info->prev; | |
2649 | first_v->prev = default_version_info; | |
2650 | default_version_info->next = first_v; | |
2651 | default_version_info->prev = NULL; | |
2652 | } | |
2653 | ||
2654 | default_node = default_version_info->this_node; | |
2655 | ||
2656 | #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) | |
2657 | if (targetm.has_ifunc_p ()) | |
2658 | { | |
2659 | struct cgraph_function_version_info *it_v = NULL; | |
2660 | struct cgraph_node *dispatcher_node = NULL; | |
2661 | struct cgraph_function_version_info *dispatcher_version_info = NULL; | |
2662 | ||
2663 | /* Right now, the dispatching is done via ifunc. */ | |
2664 | dispatch_decl = make_dispatcher_decl (default_node->decl); | |
2665 | ||
2666 | dispatcher_node = cgraph_node::get_create (dispatch_decl); | |
2667 | gcc_assert (dispatcher_node != NULL); | |
2668 | dispatcher_node->dispatcher_function = 1; | |
2669 | dispatcher_version_info | |
2670 | = dispatcher_node->insert_new_function_version (); | |
2671 | dispatcher_version_info->next = default_version_info; | |
2672 | dispatcher_node->definition = 1; | |
2673 | ||
2674 | /* Set the dispatcher for all the versions. */ | |
2675 | it_v = default_version_info; | |
2676 | while (it_v != NULL) | |
2677 | { | |
2678 | it_v->dispatcher_resolver = dispatch_decl; | |
2679 | it_v = it_v->next; | |
2680 | } | |
2681 | } | |
2682 | else | |
2683 | #endif | |
2684 | { | |
2685 | error_at (DECL_SOURCE_LOCATION (default_node->decl), | |
0ecf545c | 2686 | "multiversioning needs %<ifunc%> which is not supported " |
2bf6d935 ML |
2687 | "on this target"); |
2688 | } | |
2689 | ||
2690 | return dispatch_decl; | |
2691 | } | |
2692 | ||
2693 | /* Make the resolver function decl to dispatch the versions of | |
2694 | a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is | |
2695 | ifunc alias that will point to the created resolver. Create an | |
2696 | empty basic block in the resolver and store the pointer in | |
2697 | EMPTY_BB. Return the decl of the resolver function. */ | |
2698 | ||
2699 | static tree | |
2700 | make_resolver_func (const tree default_decl, | |
2701 | const tree ifunc_alias_decl, | |
2702 | basic_block *empty_bb) | |
2703 | { | |
2704 | char *resolver_name; | |
2705 | tree decl, type, decl_name, t; | |
2706 | ||
2707 | /* IFUNC's have to be globally visible. So, if the default_decl is | |
2708 | not, then the name of the IFUNC should be made unique. */ | |
2709 | if (TREE_PUBLIC (default_decl) == 0) | |
2710 | { | |
2711 | char *ifunc_name = make_unique_name (default_decl, "ifunc", true); | |
2712 | symtab->change_decl_assembler_name (ifunc_alias_decl, | |
2713 | get_identifier (ifunc_name)); | |
2714 | XDELETEVEC (ifunc_name); | |
2715 | } | |
2716 | ||
2717 | resolver_name = make_unique_name (default_decl, "resolver", false); | |
2718 | ||
2719 | /* The resolver function should return a (void *). */ | |
2720 | type = build_function_type_list (ptr_type_node, NULL_TREE); | |
2721 | ||
2722 | decl = build_fn_decl (resolver_name, type); | |
2723 | decl_name = get_identifier (resolver_name); | |
2724 | SET_DECL_ASSEMBLER_NAME (decl, decl_name); | |
2725 | ||
2726 | DECL_NAME (decl) = decl_name; | |
2727 | TREE_USED (decl) = 1; | |
2728 | DECL_ARTIFICIAL (decl) = 1; | |
2729 | DECL_IGNORED_P (decl) = 1; | |
2730 | TREE_PUBLIC (decl) = 0; | |
2731 | DECL_UNINLINABLE (decl) = 1; | |
2732 | ||
2733 | /* Resolver is not external, body is generated. */ | |
2734 | DECL_EXTERNAL (decl) = 0; | |
2735 | DECL_EXTERNAL (ifunc_alias_decl) = 0; | |
2736 | ||
2737 | DECL_CONTEXT (decl) = NULL_TREE; | |
2738 | DECL_INITIAL (decl) = make_node (BLOCK); | |
2739 | DECL_STATIC_CONSTRUCTOR (decl) = 0; | |
2740 | ||
2741 | if (DECL_COMDAT_GROUP (default_decl) | |
2742 | || TREE_PUBLIC (default_decl)) | |
2743 | { | |
2744 | /* In this case, each translation unit with a call to this | |
2745 | versioned function will put out a resolver. Ensure it | |
2746 | is comdat to keep just one copy. */ | |
2747 | DECL_COMDAT (decl) = 1; | |
2748 | make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); | |
2749 | } | |
2750 | /* Build result decl and add to function_decl. */ | |
2751 | t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node); | |
2752 | DECL_CONTEXT (t) = decl; | |
2753 | DECL_ARTIFICIAL (t) = 1; | |
2754 | DECL_IGNORED_P (t) = 1; | |
2755 | DECL_RESULT (decl) = t; | |
2756 | ||
2757 | gimplify_function_tree (decl); | |
2758 | push_cfun (DECL_STRUCT_FUNCTION (decl)); | |
2759 | *empty_bb = init_lowered_empty_function (decl, false, | |
2760 | profile_count::uninitialized ()); | |
2761 | ||
2762 | cgraph_node::add_new_function (decl, true); | |
2763 | symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl)); | |
2764 | ||
2765 | pop_cfun (); | |
2766 | ||
2767 | gcc_assert (ifunc_alias_decl != NULL); | |
2768 | /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */ | |
2769 | DECL_ATTRIBUTES (ifunc_alias_decl) | |
2770 | = make_attribute ("ifunc", resolver_name, | |
2771 | DECL_ATTRIBUTES (ifunc_alias_decl)); | |
2772 | ||
2773 | /* Create the alias for dispatch to resolver here. */ | |
2774 | cgraph_node::create_same_body_alias (ifunc_alias_decl, decl); | |
2775 | XDELETEVEC (resolver_name); | |
2776 | return decl; | |
2777 | } | |
2778 | ||
2779 | /* Generate the dispatching code body to dispatch multi-versioned function | |
2780 | DECL. The target hook is called to process the "target" attributes and | |
2781 | provide the code to dispatch the right function at run-time. NODE points | |
2782 | to the dispatcher decl whose body will be created. */ | |
2783 | ||
2784 | tree | |
2785 | ix86_generate_version_dispatcher_body (void *node_p) | |
2786 | { | |
2787 | tree resolver_decl; | |
2788 | basic_block empty_bb; | |
2789 | tree default_ver_decl; | |
2790 | struct cgraph_node *versn; | |
2791 | struct cgraph_node *node; | |
2792 | ||
2793 | struct cgraph_function_version_info *node_version_info = NULL; | |
2794 | struct cgraph_function_version_info *versn_info = NULL; | |
2795 | ||
2796 | node = (cgraph_node *)node_p; | |
2797 | ||
2798 | node_version_info = node->function_version (); | |
2799 | gcc_assert (node->dispatcher_function | |
2800 | && node_version_info != NULL); | |
2801 | ||
2802 | if (node_version_info->dispatcher_resolver) | |
2803 | return node_version_info->dispatcher_resolver; | |
2804 | ||
2805 | /* The first version in the chain corresponds to the default version. */ | |
2806 | default_ver_decl = node_version_info->next->this_node->decl; | |
2807 | ||
2808 | /* node is going to be an alias, so remove the finalized bit. */ | |
2809 | node->definition = false; | |
2810 | ||
2811 | resolver_decl = make_resolver_func (default_ver_decl, | |
2812 | node->decl, &empty_bb); | |
2813 | ||
2814 | node_version_info->dispatcher_resolver = resolver_decl; | |
2815 | ||
2816 | push_cfun (DECL_STRUCT_FUNCTION (resolver_decl)); | |
2817 | ||
2818 | auto_vec<tree, 2> fn_ver_vec; | |
2819 | ||
2820 | for (versn_info = node_version_info->next; versn_info; | |
2821 | versn_info = versn_info->next) | |
2822 | { | |
2823 | versn = versn_info->this_node; | |
2824 | /* Check for virtual functions here again, as by this time it should | |
2825 | have been determined if this function needs a vtable index or | |
2826 | not. This happens for methods in derived classes that override | |
2827 | virtual methods in base classes but are not explicitly marked as | |
2828 | virtual. */ | |
2829 | if (DECL_VINDEX (versn->decl)) | |
2830 | sorry ("virtual function multiversioning not supported"); | |
2831 | ||
2832 | fn_ver_vec.safe_push (versn->decl); | |
2833 | } | |
2834 | ||
2835 | dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb); | |
2836 | cgraph_edge::rebuild_edges (); | |
2837 | pop_cfun (); | |
2838 | return resolver_decl; | |
2839 | } | |
2840 | ||
2841 |