]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.cc
target/104762 - vectorization costs of CONSTRUCTORs
[gcc.git] / gcc / config / i386 / i386.cc
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2022 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #define IN_TARGET_CODE 1
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "memmodel.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic.h"
42 #include "cfgbuild.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "calls.h"
47 #include "stor-layout.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "except.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "cfgrtl.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "gimplify.h"
60 #include "dwarf2.h"
61 #include "tm-constrs.h"
62 #include "cselib.h"
63 #include "sched-int.h"
64 #include "opts.h"
65 #include "tree-pass.h"
66 #include "context.h"
67 #include "pass_manager.h"
68 #include "target-globals.h"
69 #include "gimple-iterator.h"
70 #include "gimple-fold.h"
71 #include "tree-vectorizer.h"
72 #include "shrink-wrap.h"
73 #include "builtins.h"
74 #include "rtl-iter.h"
75 #include "tree-iterator.h"
76 #include "dbgcnt.h"
77 #include "case-cfn-macros.h"
78 #include "dojump.h"
79 #include "fold-const-call.h"
80 #include "tree-vrp.h"
81 #include "tree-ssanames.h"
82 #include "selftest.h"
83 #include "selftest-rtl.h"
84 #include "print-rtl.h"
85 #include "intl.h"
86 #include "ifcvt.h"
87 #include "symbol-summary.h"
88 #include "ipa-prop.h"
89 #include "ipa-fnsummary.h"
90 #include "wide-int-bitmask.h"
91 #include "tree-vector-builder.h"
92 #include "debug.h"
93 #include "dwarf2out.h"
94 #include "i386-options.h"
95 #include "i386-builtins.h"
96 #include "i386-expand.h"
97 #include "i386-features.h"
98 #include "function-abi.h"
99
100 /* This file should be included last. */
101 #include "target-def.h"
102
103 static rtx legitimize_dllimport_symbol (rtx, bool);
104 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
105 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
106 static void ix86_emit_restore_reg_using_pop (rtx);
107
108
109 #ifndef CHECK_STACK_LIMIT
110 #define CHECK_STACK_LIMIT (-1)
111 #endif
112
113 /* Return index of given mode in mult and division cost tables. */
114 #define MODE_INDEX(mode) \
115 ((mode) == QImode ? 0 \
116 : (mode) == HImode ? 1 \
117 : (mode) == SImode ? 2 \
118 : (mode) == DImode ? 3 \
119 : 4)
120
121
122 /* Set by -mtune. */
123 const struct processor_costs *ix86_tune_cost = NULL;
124
125 /* Set by -mtune or -Os. */
126 const struct processor_costs *ix86_cost = NULL;
127
128 /* In case the average insn count for single function invocation is
129 lower than this constant, emit fast (but longer) prologue and
130 epilogue code. */
131 #define FAST_PROLOGUE_INSN_COUNT 20
132
133 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
134 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
135 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
136 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
137
138 /* Array of the smallest class containing reg number REGNO, indexed by
139 REGNO. Used by REGNO_REG_CLASS in i386.h. */
140
141 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
142 {
143 /* ax, dx, cx, bx */
144 AREG, DREG, CREG, BREG,
145 /* si, di, bp, sp */
146 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
147 /* FP registers */
148 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
149 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
150 /* arg pointer, flags, fpsr, frame */
151 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
152 /* SSE registers */
153 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
154 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
155 /* MMX registers */
156 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
157 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
158 /* REX registers */
159 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
160 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
161 /* SSE REX registers */
162 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
163 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
164 /* AVX-512 SSE registers */
165 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
166 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
167 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
168 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
169 /* Mask registers. */
170 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
171 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS
172 };
173
174 /* The "default" register map used in 32bit mode. */
175
176 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
177 {
178 /* general regs */
179 0, 2, 1, 3, 6, 7, 4, 5,
180 /* fp regs */
181 12, 13, 14, 15, 16, 17, 18, 19,
182 /* arg, flags, fpsr, frame */
183 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
184 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
185 /* SSE */
186 21, 22, 23, 24, 25, 26, 27, 28,
187 /* MMX */
188 29, 30, 31, 32, 33, 34, 35, 36,
189 /* extended integer registers */
190 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
191 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
192 /* extended sse registers */
193 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
194 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
195 /* AVX-512 registers 16-23 */
196 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
197 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
198 /* AVX-512 registers 24-31 */
199 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
201 /* Mask registers */
202 93, 94, 95, 96, 97, 98, 99, 100
203 };
204
205 /* The "default" register map used in 64bit mode. */
206
207 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
208 {
209 /* general regs */
210 0, 1, 2, 3, 4, 5, 6, 7,
211 /* fp regs */
212 33, 34, 35, 36, 37, 38, 39, 40,
213 /* arg, flags, fpsr, frame */
214 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
215 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
216 /* SSE */
217 17, 18, 19, 20, 21, 22, 23, 24,
218 /* MMX */
219 41, 42, 43, 44, 45, 46, 47, 48,
220 /* extended integer registers */
221 8, 9, 10, 11, 12, 13, 14, 15,
222 /* extended SSE registers */
223 25, 26, 27, 28, 29, 30, 31, 32,
224 /* AVX-512 registers 16-23 */
225 67, 68, 69, 70, 71, 72, 73, 74,
226 /* AVX-512 registers 24-31 */
227 75, 76, 77, 78, 79, 80, 81, 82,
228 /* Mask registers */
229 118, 119, 120, 121, 122, 123, 124, 125
230 };
231
232 /* Define the register numbers to be used in Dwarf debugging information.
233 The SVR4 reference port C compiler uses the following register numbers
234 in its Dwarf output code:
235 0 for %eax (gcc regno = 0)
236 1 for %ecx (gcc regno = 2)
237 2 for %edx (gcc regno = 1)
238 3 for %ebx (gcc regno = 3)
239 4 for %esp (gcc regno = 7)
240 5 for %ebp (gcc regno = 6)
241 6 for %esi (gcc regno = 4)
242 7 for %edi (gcc regno = 5)
243 The following three DWARF register numbers are never generated by
244 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
245 believed these numbers have these meanings.
246 8 for %eip (no gcc equivalent)
247 9 for %eflags (gcc regno = 17)
248 10 for %trapno (no gcc equivalent)
249 It is not at all clear how we should number the FP stack registers
250 for the x86 architecture. If the version of SDB on x86/svr4 were
251 a bit less brain dead with respect to floating-point then we would
252 have a precedent to follow with respect to DWARF register numbers
253 for x86 FP registers, but the SDB on x86/svr4 was so completely
254 broken with respect to FP registers that it is hardly worth thinking
255 of it as something to strive for compatibility with.
256 The version of x86/svr4 SDB I had does (partially)
257 seem to believe that DWARF register number 11 is associated with
258 the x86 register %st(0), but that's about all. Higher DWARF
259 register numbers don't seem to be associated with anything in
260 particular, and even for DWARF regno 11, SDB only seemed to under-
261 stand that it should say that a variable lives in %st(0) (when
262 asked via an `=' command) if we said it was in DWARF regno 11,
263 but SDB still printed garbage when asked for the value of the
264 variable in question (via a `/' command).
265 (Also note that the labels SDB printed for various FP stack regs
266 when doing an `x' command were all wrong.)
267 Note that these problems generally don't affect the native SVR4
268 C compiler because it doesn't allow the use of -O with -g and
269 because when it is *not* optimizing, it allocates a memory
270 location for each floating-point variable, and the memory
271 location is what gets described in the DWARF AT_location
272 attribute for the variable in question.
273 Regardless of the severe mental illness of the x86/svr4 SDB, we
274 do something sensible here and we use the following DWARF
275 register numbers. Note that these are all stack-top-relative
276 numbers.
277 11 for %st(0) (gcc regno = 8)
278 12 for %st(1) (gcc regno = 9)
279 13 for %st(2) (gcc regno = 10)
280 14 for %st(3) (gcc regno = 11)
281 15 for %st(4) (gcc regno = 12)
282 16 for %st(5) (gcc regno = 13)
283 17 for %st(6) (gcc regno = 14)
284 18 for %st(7) (gcc regno = 15)
285 */
286 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
287 {
288 /* general regs */
289 0, 2, 1, 3, 6, 7, 5, 4,
290 /* fp regs */
291 11, 12, 13, 14, 15, 16, 17, 18,
292 /* arg, flags, fpsr, frame */
293 IGNORED_DWARF_REGNUM, 9,
294 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
295 /* SSE registers */
296 21, 22, 23, 24, 25, 26, 27, 28,
297 /* MMX registers */
298 29, 30, 31, 32, 33, 34, 35, 36,
299 /* extended integer registers */
300 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
301 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
302 /* extended sse registers */
303 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
304 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
305 /* AVX-512 registers 16-23 */
306 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
307 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
308 /* AVX-512 registers 24-31 */
309 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
310 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
311 /* Mask registers */
312 93, 94, 95, 96, 97, 98, 99, 100
313 };
314
315 /* Define parameter passing and return registers. */
316
317 static int const x86_64_int_parameter_registers[6] =
318 {
319 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
320 };
321
322 static int const x86_64_ms_abi_int_parameter_registers[4] =
323 {
324 CX_REG, DX_REG, R8_REG, R9_REG
325 };
326
327 static int const x86_64_int_return_registers[4] =
328 {
329 AX_REG, DX_REG, DI_REG, SI_REG
330 };
331
332 /* Define the structure for the machine field in struct function. */
333
334 struct GTY(()) stack_local_entry {
335 unsigned short mode;
336 unsigned short n;
337 rtx rtl;
338 struct stack_local_entry *next;
339 };
340
341 /* Which cpu are we scheduling for. */
342 enum attr_cpu ix86_schedule;
343
344 /* Which cpu are we optimizing for. */
345 enum processor_type ix86_tune;
346
347 /* Which instruction set architecture to use. */
348 enum processor_type ix86_arch;
349
350 /* True if processor has SSE prefetch instruction. */
351 unsigned char ix86_prefetch_sse;
352
353 /* Preferred alignment for stack boundary in bits. */
354 unsigned int ix86_preferred_stack_boundary;
355
356 /* Alignment for incoming stack boundary in bits specified at
357 command line. */
358 unsigned int ix86_user_incoming_stack_boundary;
359
360 /* Default alignment for incoming stack boundary in bits. */
361 unsigned int ix86_default_incoming_stack_boundary;
362
363 /* Alignment for incoming stack boundary in bits. */
364 unsigned int ix86_incoming_stack_boundary;
365
366 /* True if there is no direct access to extern symbols. */
367 bool ix86_has_no_direct_extern_access;
368
369 /* Calling abi specific va_list type nodes. */
370 tree sysv_va_list_type_node;
371 tree ms_va_list_type_node;
372
373 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
374 char internal_label_prefix[16];
375 int internal_label_prefix_len;
376
377 /* Fence to use after loop using movnt. */
378 tree x86_mfence;
379
380 /* Register class used for passing given 64bit part of the argument.
381 These represent classes as documented by the PS ABI, with the exception
382 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
383 use SF or DFmode move instead of DImode to avoid reformatting penalties.
384
385 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
386 whenever possible (upper half does contain padding). */
387 enum x86_64_reg_class
388 {
389 X86_64_NO_CLASS,
390 X86_64_INTEGER_CLASS,
391 X86_64_INTEGERSI_CLASS,
392 X86_64_SSE_CLASS,
393 X86_64_SSEHF_CLASS,
394 X86_64_SSESF_CLASS,
395 X86_64_SSEDF_CLASS,
396 X86_64_SSEUP_CLASS,
397 X86_64_X87_CLASS,
398 X86_64_X87UP_CLASS,
399 X86_64_COMPLEX_X87_CLASS,
400 X86_64_MEMORY_CLASS
401 };
402
403 #define MAX_CLASSES 8
404
405 /* Table of constants used by fldpi, fldln2, etc.... */
406 static REAL_VALUE_TYPE ext_80387_constants_table [5];
407 static bool ext_80387_constants_init;
408
409 \f
410 static rtx ix86_function_value (const_tree, const_tree, bool);
411 static bool ix86_function_value_regno_p (const unsigned int);
412 static unsigned int ix86_function_arg_boundary (machine_mode,
413 const_tree);
414 static rtx ix86_static_chain (const_tree, bool);
415 static int ix86_function_regparm (const_tree, const_tree);
416 static void ix86_compute_frame_layout (void);
417 static tree ix86_canonical_va_list_type (tree);
418 static unsigned int split_stack_prologue_scratch_regno (void);
419 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
420
421 static bool ix86_can_inline_p (tree, tree);
422 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
423
424 \f
425 /* Whether -mtune= or -march= were specified */
426 int ix86_tune_defaulted;
427 int ix86_arch_specified;
428 \f
429 /* Return true if a red-zone is in use. We can't use red-zone when
430 there are local indirect jumps, like "indirect_jump" or "tablejump",
431 which jumps to another place in the function, since "call" in the
432 indirect thunk pushes the return address onto stack, destroying
433 red-zone.
434
435 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
436 for CALL, in red-zone, we can allow local indirect jumps with
437 indirect thunk. */
438
439 bool
440 ix86_using_red_zone (void)
441 {
442 return (TARGET_RED_ZONE
443 && !TARGET_64BIT_MS_ABI
444 && (!cfun->machine->has_local_indirect_jump
445 || cfun->machine->indirect_branch_type == indirect_branch_keep));
446 }
447 \f
448 /* Return true, if profiling code should be emitted before
449 prologue. Otherwise it returns false.
450 Note: For x86 with "hotfix" it is sorried. */
451 static bool
452 ix86_profile_before_prologue (void)
453 {
454 return flag_fentry != 0;
455 }
456
457 /* Update register usage after having seen the compiler flags. */
458
459 static void
460 ix86_conditional_register_usage (void)
461 {
462 int i, c_mask;
463
464 /* If there are no caller-saved registers, preserve all registers.
465 except fixed_regs and registers used for function return value
466 since aggregate_value_p checks call_used_regs[regno] on return
467 value. */
468 if (cfun && cfun->machine->no_caller_saved_registers)
469 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
470 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
471 call_used_regs[i] = 0;
472
473 /* For 32-bit targets, disable the REX registers. */
474 if (! TARGET_64BIT)
475 {
476 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
477 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
478 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
479 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
480 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
481 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
482 }
483
484 /* See the definition of CALL_USED_REGISTERS in i386.h. */
485 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
486
487 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
488
489 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
490 {
491 /* Set/reset conditionally defined registers from
492 CALL_USED_REGISTERS initializer. */
493 if (call_used_regs[i] > 1)
494 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
495
496 /* Calculate registers of CLOBBERED_REGS register set
497 as call used registers from GENERAL_REGS register set. */
498 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
499 && call_used_regs[i])
500 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
501 }
502
503 /* If MMX is disabled, disable the registers. */
504 if (! TARGET_MMX)
505 accessible_reg_set &= ~reg_class_contents[MMX_REGS];
506
507 /* If SSE is disabled, disable the registers. */
508 if (! TARGET_SSE)
509 accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
510
511 /* If the FPU is disabled, disable the registers. */
512 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
513 accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
514
515 /* If AVX512F is disabled, disable the registers. */
516 if (! TARGET_AVX512F)
517 {
518 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
519 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
520
521 accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
522 }
523 }
524
525 /* Canonicalize a comparison from one we don't have to one we do have. */
526
527 static void
528 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
529 bool op0_preserve_value)
530 {
531 /* The order of operands in x87 ficom compare is forced by combine in
532 simplify_comparison () function. Float operator is treated as RTX_OBJ
533 with a precedence over other operators and is always put in the first
534 place. Swap condition and operands to match ficom instruction. */
535 if (!op0_preserve_value
536 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
537 {
538 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
539
540 /* We are called only for compares that are split to SAHF instruction.
541 Ensure that we have setcc/jcc insn for the swapped condition. */
542 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
543 {
544 std::swap (*op0, *op1);
545 *code = (int) scode;
546 }
547 }
548 }
549 \f
550 \f
551 /* Hook to determine if one function can safely inline another. */
552
553 static bool
554 ix86_can_inline_p (tree caller, tree callee)
555 {
556 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
557 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
558
559 /* Changes of those flags can be tolerated for always inlines. Lets hope
560 user knows what he is doing. */
561 unsigned HOST_WIDE_INT always_inline_safe_mask
562 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
563 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
564 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
565 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
566 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
567 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
568 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
569
570
571 if (!callee_tree)
572 callee_tree = target_option_default_node;
573 if (!caller_tree)
574 caller_tree = target_option_default_node;
575 if (callee_tree == caller_tree)
576 return true;
577
578 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
579 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
580 bool ret = false;
581 bool always_inline
582 = (DECL_DISREGARD_INLINE_LIMITS (callee)
583 && lookup_attribute ("always_inline",
584 DECL_ATTRIBUTES (callee)));
585
586 /* If callee only uses GPRs, ignore MASK_80387. */
587 if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
588 always_inline_safe_mask |= MASK_80387;
589
590 cgraph_node *callee_node = cgraph_node::get (callee);
591 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
592 function can inline a SSE2 function but a SSE2 function can't inline
593 a SSE4 function. */
594 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
595 != callee_opts->x_ix86_isa_flags)
596 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
597 != callee_opts->x_ix86_isa_flags2))
598 ret = false;
599
600 /* See if we have the same non-isa options. */
601 else if ((!always_inline
602 && caller_opts->x_target_flags != callee_opts->x_target_flags)
603 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
604 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
605 ret = false;
606
607 /* See if arch, tune, etc. are the same. */
608 else if (caller_opts->arch != callee_opts->arch)
609 ret = false;
610
611 else if (!always_inline && caller_opts->tune != callee_opts->tune)
612 ret = false;
613
614 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
615 /* If the calle doesn't use FP expressions differences in
616 ix86_fpmath can be ignored. We are called from FEs
617 for multi-versioning call optimization, so beware of
618 ipa_fn_summaries not available. */
619 && (! ipa_fn_summaries
620 || ipa_fn_summaries->get (callee_node) == NULL
621 || ipa_fn_summaries->get (callee_node)->fp_expressions))
622 ret = false;
623
624 else if (!always_inline
625 && caller_opts->branch_cost != callee_opts->branch_cost)
626 ret = false;
627
628 else
629 ret = true;
630
631 return ret;
632 }
633 \f
634 /* Return true if this goes in large data/bss. */
635
636 static bool
637 ix86_in_large_data_p (tree exp)
638 {
639 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
640 return false;
641
642 if (exp == NULL_TREE)
643 return false;
644
645 /* Functions are never large data. */
646 if (TREE_CODE (exp) == FUNCTION_DECL)
647 return false;
648
649 /* Automatic variables are never large data. */
650 if (VAR_P (exp) && !is_global_var (exp))
651 return false;
652
653 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
654 {
655 const char *section = DECL_SECTION_NAME (exp);
656 if (strcmp (section, ".ldata") == 0
657 || strcmp (section, ".lbss") == 0)
658 return true;
659 return false;
660 }
661 else
662 {
663 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
664
665 /* If this is an incomplete type with size 0, then we can't put it
666 in data because it might be too big when completed. Also,
667 int_size_in_bytes returns -1 if size can vary or is larger than
668 an integer in which case also it is safer to assume that it goes in
669 large data. */
670 if (size <= 0 || size > ix86_section_threshold)
671 return true;
672 }
673
674 return false;
675 }
676
677 /* i386-specific section flag to mark large sections. */
678 #define SECTION_LARGE SECTION_MACH_DEP
679
680 /* Switch to the appropriate section for output of DECL.
681 DECL is either a `VAR_DECL' node or a constant of some sort.
682 RELOC indicates whether forming the initial value of DECL requires
683 link-time relocations. */
684
685 ATTRIBUTE_UNUSED static section *
686 x86_64_elf_select_section (tree decl, int reloc,
687 unsigned HOST_WIDE_INT align)
688 {
689 if (ix86_in_large_data_p (decl))
690 {
691 const char *sname = NULL;
692 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
693 switch (categorize_decl_for_section (decl, reloc))
694 {
695 case SECCAT_DATA:
696 sname = ".ldata";
697 break;
698 case SECCAT_DATA_REL:
699 sname = ".ldata.rel";
700 break;
701 case SECCAT_DATA_REL_LOCAL:
702 sname = ".ldata.rel.local";
703 break;
704 case SECCAT_DATA_REL_RO:
705 sname = ".ldata.rel.ro";
706 break;
707 case SECCAT_DATA_REL_RO_LOCAL:
708 sname = ".ldata.rel.ro.local";
709 break;
710 case SECCAT_BSS:
711 sname = ".lbss";
712 flags |= SECTION_BSS;
713 break;
714 case SECCAT_RODATA:
715 case SECCAT_RODATA_MERGE_STR:
716 case SECCAT_RODATA_MERGE_STR_INIT:
717 case SECCAT_RODATA_MERGE_CONST:
718 sname = ".lrodata";
719 flags &= ~SECTION_WRITE;
720 break;
721 case SECCAT_SRODATA:
722 case SECCAT_SDATA:
723 case SECCAT_SBSS:
724 gcc_unreachable ();
725 case SECCAT_TEXT:
726 case SECCAT_TDATA:
727 case SECCAT_TBSS:
728 /* We don't split these for medium model. Place them into
729 default sections and hope for best. */
730 break;
731 }
732 if (sname)
733 {
734 /* We might get called with string constants, but get_named_section
735 doesn't like them as they are not DECLs. Also, we need to set
736 flags in that case. */
737 if (!DECL_P (decl))
738 return get_section (sname, flags, NULL);
739 return get_named_section (decl, sname, reloc);
740 }
741 }
742 return default_elf_select_section (decl, reloc, align);
743 }
744
745 /* Select a set of attributes for section NAME based on the properties
746 of DECL and whether or not RELOC indicates that DECL's initializer
747 might contain runtime relocations. */
748
749 static unsigned int ATTRIBUTE_UNUSED
750 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
751 {
752 unsigned int flags = default_section_type_flags (decl, name, reloc);
753
754 if (ix86_in_large_data_p (decl))
755 flags |= SECTION_LARGE;
756
757 if (decl == NULL_TREE
758 && (strcmp (name, ".ldata.rel.ro") == 0
759 || strcmp (name, ".ldata.rel.ro.local") == 0))
760 flags |= SECTION_RELRO;
761
762 if (strcmp (name, ".lbss") == 0
763 || startswith (name, ".lbss.")
764 || startswith (name, ".gnu.linkonce.lb."))
765 flags |= SECTION_BSS;
766
767 return flags;
768 }
769
770 /* Build up a unique section name, expressed as a
771 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
772 RELOC indicates whether the initial value of EXP requires
773 link-time relocations. */
774
775 static void ATTRIBUTE_UNUSED
776 x86_64_elf_unique_section (tree decl, int reloc)
777 {
778 if (ix86_in_large_data_p (decl))
779 {
780 const char *prefix = NULL;
781 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
782 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
783
784 switch (categorize_decl_for_section (decl, reloc))
785 {
786 case SECCAT_DATA:
787 case SECCAT_DATA_REL:
788 case SECCAT_DATA_REL_LOCAL:
789 case SECCAT_DATA_REL_RO:
790 case SECCAT_DATA_REL_RO_LOCAL:
791 prefix = one_only ? ".ld" : ".ldata";
792 break;
793 case SECCAT_BSS:
794 prefix = one_only ? ".lb" : ".lbss";
795 break;
796 case SECCAT_RODATA:
797 case SECCAT_RODATA_MERGE_STR:
798 case SECCAT_RODATA_MERGE_STR_INIT:
799 case SECCAT_RODATA_MERGE_CONST:
800 prefix = one_only ? ".lr" : ".lrodata";
801 break;
802 case SECCAT_SRODATA:
803 case SECCAT_SDATA:
804 case SECCAT_SBSS:
805 gcc_unreachable ();
806 case SECCAT_TEXT:
807 case SECCAT_TDATA:
808 case SECCAT_TBSS:
809 /* We don't split these for medium model. Place them into
810 default sections and hope for best. */
811 break;
812 }
813 if (prefix)
814 {
815 const char *name, *linkonce;
816 char *string;
817
818 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
819 name = targetm.strip_name_encoding (name);
820
821 /* If we're using one_only, then there needs to be a .gnu.linkonce
822 prefix to the section name. */
823 linkonce = one_only ? ".gnu.linkonce" : "";
824
825 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
826
827 set_decl_section_name (decl, string);
828 return;
829 }
830 }
831 default_unique_section (decl, reloc);
832 }
833
834 #ifdef COMMON_ASM_OP
835
836 #ifndef LARGECOMM_SECTION_ASM_OP
837 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
838 #endif
839
840 /* This says how to output assembler code to declare an
841 uninitialized external linkage data object.
842
843 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
844 large objects. */
845 void
846 x86_elf_aligned_decl_common (FILE *file, tree decl,
847 const char *name, unsigned HOST_WIDE_INT size,
848 unsigned align)
849 {
850 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
851 && size > (unsigned int)ix86_section_threshold)
852 {
853 switch_to_section (get_named_section (decl, ".lbss", 0));
854 fputs (LARGECOMM_SECTION_ASM_OP, file);
855 }
856 else
857 fputs (COMMON_ASM_OP, file);
858 assemble_name (file, name);
859 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
860 size, align / BITS_PER_UNIT);
861 }
862 #endif
863
864 /* Utility function for targets to use in implementing
865 ASM_OUTPUT_ALIGNED_BSS. */
866
867 void
868 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
869 unsigned HOST_WIDE_INT size, unsigned align)
870 {
871 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
872 && size > (unsigned int)ix86_section_threshold)
873 switch_to_section (get_named_section (decl, ".lbss", 0));
874 else
875 switch_to_section (bss_section);
876 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
877 #ifdef ASM_DECLARE_OBJECT_NAME
878 last_assemble_variable_decl = decl;
879 ASM_DECLARE_OBJECT_NAME (file, name, decl);
880 #else
881 /* Standard thing is just output label for the object. */
882 ASM_OUTPUT_LABEL (file, name);
883 #endif /* ASM_DECLARE_OBJECT_NAME */
884 ASM_OUTPUT_SKIP (file, size ? size : 1);
885 }
886 \f
887 /* Decide whether we must probe the stack before any space allocation
888 on this target. It's essentially TARGET_STACK_PROBE except when
889 -fstack-check causes the stack to be already probed differently. */
890
891 bool
892 ix86_target_stack_probe (void)
893 {
894 /* Do not probe the stack twice if static stack checking is enabled. */
895 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
896 return false;
897
898 return TARGET_STACK_PROBE;
899 }
900 \f
901 /* Decide whether we can make a sibling call to a function. DECL is the
902 declaration of the function being targeted by the call and EXP is the
903 CALL_EXPR representing the call. */
904
905 static bool
906 ix86_function_ok_for_sibcall (tree decl, tree exp)
907 {
908 tree type, decl_or_type;
909 rtx a, b;
910 bool bind_global = decl && !targetm.binds_local_p (decl);
911
912 if (ix86_function_naked (current_function_decl))
913 return false;
914
915 /* Sibling call isn't OK if there are no caller-saved registers
916 since all registers must be preserved before return. */
917 if (cfun->machine->no_caller_saved_registers)
918 return false;
919
920 /* If we are generating position-independent code, we cannot sibcall
921 optimize direct calls to global functions, as the PLT requires
922 %ebx be live. (Darwin does not have a PLT.) */
923 if (!TARGET_MACHO
924 && !TARGET_64BIT
925 && flag_pic
926 && flag_plt
927 && bind_global)
928 return false;
929
930 /* If we need to align the outgoing stack, then sibcalling would
931 unalign the stack, which may break the called function. */
932 if (ix86_minimum_incoming_stack_boundary (true)
933 < PREFERRED_STACK_BOUNDARY)
934 return false;
935
936 if (decl)
937 {
938 decl_or_type = decl;
939 type = TREE_TYPE (decl);
940 }
941 else
942 {
943 /* We're looking at the CALL_EXPR, we need the type of the function. */
944 type = CALL_EXPR_FN (exp); /* pointer expression */
945 type = TREE_TYPE (type); /* pointer type */
946 type = TREE_TYPE (type); /* function type */
947 decl_or_type = type;
948 }
949
950 /* If outgoing reg parm stack space changes, we cannot do sibcall. */
951 if ((OUTGOING_REG_PARM_STACK_SPACE (type)
952 != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
953 || (REG_PARM_STACK_SPACE (decl_or_type)
954 != REG_PARM_STACK_SPACE (current_function_decl)))
955 {
956 maybe_complain_about_tail_call (exp,
957 "inconsistent size of stack space"
958 " allocated for arguments which are"
959 " passed in registers");
960 return false;
961 }
962
963 /* Check that the return value locations are the same. Like
964 if we are returning floats on the 80387 register stack, we cannot
965 make a sibcall from a function that doesn't return a float to a
966 function that does or, conversely, from a function that does return
967 a float to a function that doesn't; the necessary stack adjustment
968 would not be executed. This is also the place we notice
969 differences in the return value ABI. Note that it is ok for one
970 of the functions to have void return type as long as the return
971 value of the other is passed in a register. */
972 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
973 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
974 cfun->decl, false);
975 if (STACK_REG_P (a) || STACK_REG_P (b))
976 {
977 if (!rtx_equal_p (a, b))
978 return false;
979 }
980 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
981 ;
982 else if (!rtx_equal_p (a, b))
983 return false;
984
985 if (TARGET_64BIT)
986 {
987 /* The SYSV ABI has more call-clobbered registers;
988 disallow sibcalls from MS to SYSV. */
989 if (cfun->machine->call_abi == MS_ABI
990 && ix86_function_type_abi (type) == SYSV_ABI)
991 return false;
992 }
993 else
994 {
995 /* If this call is indirect, we'll need to be able to use a
996 call-clobbered register for the address of the target function.
997 Make sure that all such registers are not used for passing
998 parameters. Note that DLLIMPORT functions and call to global
999 function via GOT slot are indirect. */
1000 if (!decl
1001 || (bind_global && flag_pic && !flag_plt)
1002 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
1003 || flag_force_indirect_call)
1004 {
1005 /* Check if regparm >= 3 since arg_reg_available is set to
1006 false if regparm == 0. If regparm is 1 or 2, there is
1007 always a call-clobbered register available.
1008
1009 ??? The symbol indirect call doesn't need a call-clobbered
1010 register. But we don't know if this is a symbol indirect
1011 call or not here. */
1012 if (ix86_function_regparm (type, decl) >= 3
1013 && !cfun->machine->arg_reg_available)
1014 return false;
1015 }
1016 }
1017
1018 /* Otherwise okay. That also includes certain types of indirect calls. */
1019 return true;
1020 }
1021
1022 /* This function determines from TYPE the calling-convention. */
1023
1024 unsigned int
1025 ix86_get_callcvt (const_tree type)
1026 {
1027 unsigned int ret = 0;
1028 bool is_stdarg;
1029 tree attrs;
1030
1031 if (TARGET_64BIT)
1032 return IX86_CALLCVT_CDECL;
1033
1034 attrs = TYPE_ATTRIBUTES (type);
1035 if (attrs != NULL_TREE)
1036 {
1037 if (lookup_attribute ("cdecl", attrs))
1038 ret |= IX86_CALLCVT_CDECL;
1039 else if (lookup_attribute ("stdcall", attrs))
1040 ret |= IX86_CALLCVT_STDCALL;
1041 else if (lookup_attribute ("fastcall", attrs))
1042 ret |= IX86_CALLCVT_FASTCALL;
1043 else if (lookup_attribute ("thiscall", attrs))
1044 ret |= IX86_CALLCVT_THISCALL;
1045
1046 /* Regparam isn't allowed for thiscall and fastcall. */
1047 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1048 {
1049 if (lookup_attribute ("regparm", attrs))
1050 ret |= IX86_CALLCVT_REGPARM;
1051 if (lookup_attribute ("sseregparm", attrs))
1052 ret |= IX86_CALLCVT_SSEREGPARM;
1053 }
1054
1055 if (IX86_BASE_CALLCVT(ret) != 0)
1056 return ret;
1057 }
1058
1059 is_stdarg = stdarg_p (type);
1060 if (TARGET_RTD && !is_stdarg)
1061 return IX86_CALLCVT_STDCALL | ret;
1062
1063 if (ret != 0
1064 || is_stdarg
1065 || TREE_CODE (type) != METHOD_TYPE
1066 || ix86_function_type_abi (type) != MS_ABI)
1067 return IX86_CALLCVT_CDECL | ret;
1068
1069 return IX86_CALLCVT_THISCALL;
1070 }
1071
1072 /* Return 0 if the attributes for two types are incompatible, 1 if they
1073 are compatible, and 2 if they are nearly compatible (which causes a
1074 warning to be generated). */
1075
1076 static int
1077 ix86_comp_type_attributes (const_tree type1, const_tree type2)
1078 {
1079 unsigned int ccvt1, ccvt2;
1080
1081 if (TREE_CODE (type1) != FUNCTION_TYPE
1082 && TREE_CODE (type1) != METHOD_TYPE)
1083 return 1;
1084
1085 ccvt1 = ix86_get_callcvt (type1);
1086 ccvt2 = ix86_get_callcvt (type2);
1087 if (ccvt1 != ccvt2)
1088 return 0;
1089 if (ix86_function_regparm (type1, NULL)
1090 != ix86_function_regparm (type2, NULL))
1091 return 0;
1092
1093 return 1;
1094 }
1095 \f
1096 /* Return the regparm value for a function with the indicated TYPE and DECL.
1097 DECL may be NULL when calling function indirectly
1098 or considering a libcall. */
1099
1100 static int
1101 ix86_function_regparm (const_tree type, const_tree decl)
1102 {
1103 tree attr;
1104 int regparm;
1105 unsigned int ccvt;
1106
1107 if (TARGET_64BIT)
1108 return (ix86_function_type_abi (type) == SYSV_ABI
1109 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1110 ccvt = ix86_get_callcvt (type);
1111 regparm = ix86_regparm;
1112
1113 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1114 {
1115 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1116 if (attr)
1117 {
1118 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1119 return regparm;
1120 }
1121 }
1122 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1123 return 2;
1124 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1125 return 1;
1126
1127 /* Use register calling convention for local functions when possible. */
1128 if (decl
1129 && TREE_CODE (decl) == FUNCTION_DECL)
1130 {
1131 cgraph_node *target = cgraph_node::get (decl);
1132 if (target)
1133 target = target->function_symbol ();
1134
1135 /* Caller and callee must agree on the calling convention, so
1136 checking here just optimize means that with
1137 __attribute__((optimize (...))) caller could use regparm convention
1138 and callee not, or vice versa. Instead look at whether the callee
1139 is optimized or not. */
1140 if (target && opt_for_fn (target->decl, optimize)
1141 && !(profile_flag && !flag_fentry))
1142 {
1143 if (target->local && target->can_change_signature)
1144 {
1145 int local_regparm, globals = 0, regno;
1146
1147 /* Make sure no regparm register is taken by a
1148 fixed register variable. */
1149 for (local_regparm = 0; local_regparm < REGPARM_MAX;
1150 local_regparm++)
1151 if (fixed_regs[local_regparm])
1152 break;
1153
1154 /* We don't want to use regparm(3) for nested functions as
1155 these use a static chain pointer in the third argument. */
1156 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1157 local_regparm = 2;
1158
1159 /* Save a register for the split stack. */
1160 if (flag_split_stack)
1161 {
1162 if (local_regparm == 3)
1163 local_regparm = 2;
1164 else if (local_regparm == 2
1165 && DECL_STATIC_CHAIN (target->decl))
1166 local_regparm = 1;
1167 }
1168
1169 /* Each fixed register usage increases register pressure,
1170 so less registers should be used for argument passing.
1171 This functionality can be overriden by an explicit
1172 regparm value. */
1173 for (regno = AX_REG; regno <= DI_REG; regno++)
1174 if (fixed_regs[regno])
1175 globals++;
1176
1177 local_regparm
1178 = globals < local_regparm ? local_regparm - globals : 0;
1179
1180 if (local_regparm > regparm)
1181 regparm = local_regparm;
1182 }
1183 }
1184 }
1185
1186 return regparm;
1187 }
1188
1189 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1190 DFmode (2) arguments in SSE registers for a function with the
1191 indicated TYPE and DECL. DECL may be NULL when calling function
1192 indirectly or considering a libcall. Return -1 if any FP parameter
1193 should be rejected by error. This is used in siutation we imply SSE
1194 calling convetion but the function is called from another function with
1195 SSE disabled. Otherwise return 0. */
1196
1197 static int
1198 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1199 {
1200 gcc_assert (!TARGET_64BIT);
1201
1202 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1203 by the sseregparm attribute. */
1204 if (TARGET_SSEREGPARM
1205 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1206 {
1207 if (!TARGET_SSE)
1208 {
1209 if (warn)
1210 {
1211 if (decl)
1212 error ("calling %qD with attribute sseregparm without "
1213 "SSE/SSE2 enabled", decl);
1214 else
1215 error ("calling %qT with attribute sseregparm without "
1216 "SSE/SSE2 enabled", type);
1217 }
1218 return 0;
1219 }
1220
1221 return 2;
1222 }
1223
1224 if (!decl)
1225 return 0;
1226
1227 cgraph_node *target = cgraph_node::get (decl);
1228 if (target)
1229 target = target->function_symbol ();
1230
1231 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1232 (and DFmode for SSE2) arguments in SSE registers. */
1233 if (target
1234 /* TARGET_SSE_MATH */
1235 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1236 && opt_for_fn (target->decl, optimize)
1237 && !(profile_flag && !flag_fentry))
1238 {
1239 if (target->local && target->can_change_signature)
1240 {
1241 /* Refuse to produce wrong code when local function with SSE enabled
1242 is called from SSE disabled function.
1243 FIXME: We need a way to detect these cases cross-ltrans partition
1244 and avoid using SSE calling conventions on local functions called
1245 from function with SSE disabled. For now at least delay the
1246 warning until we know we are going to produce wrong code.
1247 See PR66047 */
1248 if (!TARGET_SSE && warn)
1249 return -1;
1250 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1251 ->x_ix86_isa_flags) ? 2 : 1;
1252 }
1253 }
1254
1255 return 0;
1256 }
1257
1258 /* Return true if EAX is live at the start of the function. Used by
1259 ix86_expand_prologue to determine if we need special help before
1260 calling allocate_stack_worker. */
1261
1262 static bool
1263 ix86_eax_live_at_start_p (void)
1264 {
1265 /* Cheat. Don't bother working forward from ix86_function_regparm
1266 to the function type to whether an actual argument is located in
1267 eax. Instead just look at cfg info, which is still close enough
1268 to correct at this point. This gives false positives for broken
1269 functions that might use uninitialized data that happens to be
1270 allocated in eax, but who cares? */
1271 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1272 }
1273
1274 static bool
1275 ix86_keep_aggregate_return_pointer (tree fntype)
1276 {
1277 tree attr;
1278
1279 if (!TARGET_64BIT)
1280 {
1281 attr = lookup_attribute ("callee_pop_aggregate_return",
1282 TYPE_ATTRIBUTES (fntype));
1283 if (attr)
1284 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1285
1286 /* For 32-bit MS-ABI the default is to keep aggregate
1287 return pointer. */
1288 if (ix86_function_type_abi (fntype) == MS_ABI)
1289 return true;
1290 }
1291 return KEEP_AGGREGATE_RETURN_POINTER != 0;
1292 }
1293
1294 /* Value is the number of bytes of arguments automatically
1295 popped when returning from a subroutine call.
1296 FUNDECL is the declaration node of the function (as a tree),
1297 FUNTYPE is the data type of the function (as a tree),
1298 or for a library call it is an identifier node for the subroutine name.
1299 SIZE is the number of bytes of arguments passed on the stack.
1300
1301 On the 80386, the RTD insn may be used to pop them if the number
1302 of args is fixed, but if the number is variable then the caller
1303 must pop them all. RTD can't be used for library calls now
1304 because the library is compiled with the Unix compiler.
1305 Use of RTD is a selectable option, since it is incompatible with
1306 standard Unix calling sequences. If the option is not selected,
1307 the caller must always pop the args.
1308
1309 The attribute stdcall is equivalent to RTD on a per module basis. */
1310
1311 static poly_int64
1312 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1313 {
1314 unsigned int ccvt;
1315
1316 /* None of the 64-bit ABIs pop arguments. */
1317 if (TARGET_64BIT)
1318 return 0;
1319
1320 ccvt = ix86_get_callcvt (funtype);
1321
1322 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1323 | IX86_CALLCVT_THISCALL)) != 0
1324 && ! stdarg_p (funtype))
1325 return size;
1326
1327 /* Lose any fake structure return argument if it is passed on the stack. */
1328 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1329 && !ix86_keep_aggregate_return_pointer (funtype))
1330 {
1331 int nregs = ix86_function_regparm (funtype, fundecl);
1332 if (nregs == 0)
1333 return GET_MODE_SIZE (Pmode);
1334 }
1335
1336 return 0;
1337 }
1338
1339 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1340
1341 static bool
1342 ix86_legitimate_combined_insn (rtx_insn *insn)
1343 {
1344 int i;
1345
1346 /* Check operand constraints in case hard registers were propagated
1347 into insn pattern. This check prevents combine pass from
1348 generating insn patterns with invalid hard register operands.
1349 These invalid insns can eventually confuse reload to error out
1350 with a spill failure. See also PRs 46829 and 46843. */
1351
1352 gcc_assert (INSN_CODE (insn) >= 0);
1353
1354 extract_insn (insn);
1355 preprocess_constraints (insn);
1356
1357 int n_operands = recog_data.n_operands;
1358 int n_alternatives = recog_data.n_alternatives;
1359 for (i = 0; i < n_operands; i++)
1360 {
1361 rtx op = recog_data.operand[i];
1362 machine_mode mode = GET_MODE (op);
1363 const operand_alternative *op_alt;
1364 int offset = 0;
1365 bool win;
1366 int j;
1367
1368 /* A unary operator may be accepted by the predicate, but it
1369 is irrelevant for matching constraints. */
1370 if (UNARY_P (op))
1371 op = XEXP (op, 0);
1372
1373 if (SUBREG_P (op))
1374 {
1375 if (REG_P (SUBREG_REG (op))
1376 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1377 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1378 GET_MODE (SUBREG_REG (op)),
1379 SUBREG_BYTE (op),
1380 GET_MODE (op));
1381 op = SUBREG_REG (op);
1382 }
1383
1384 if (!(REG_P (op) && HARD_REGISTER_P (op)))
1385 continue;
1386
1387 op_alt = recog_op_alt;
1388
1389 /* Operand has no constraints, anything is OK. */
1390 win = !n_alternatives;
1391
1392 alternative_mask preferred = get_preferred_alternatives (insn);
1393 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1394 {
1395 if (!TEST_BIT (preferred, j))
1396 continue;
1397 if (op_alt[i].anything_ok
1398 || (op_alt[i].matches != -1
1399 && operands_match_p
1400 (recog_data.operand[i],
1401 recog_data.operand[op_alt[i].matches]))
1402 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1403 {
1404 win = true;
1405 break;
1406 }
1407 }
1408
1409 if (!win)
1410 return false;
1411 }
1412
1413 return true;
1414 }
1415 \f
1416 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1417
1418 static unsigned HOST_WIDE_INT
1419 ix86_asan_shadow_offset (void)
1420 {
1421 return SUBTARGET_SHADOW_OFFSET;
1422 }
1423 \f
1424 /* Argument support functions. */
1425
1426 /* Return true when register may be used to pass function parameters. */
1427 bool
1428 ix86_function_arg_regno_p (int regno)
1429 {
1430 int i;
1431 enum calling_abi call_abi;
1432 const int *parm_regs;
1433
1434 if (TARGET_SSE && SSE_REGNO_P (regno)
1435 && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
1436 return true;
1437
1438 if (!TARGET_64BIT)
1439 return (regno < REGPARM_MAX
1440 || (TARGET_MMX && MMX_REGNO_P (regno)
1441 && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
1442
1443 /* TODO: The function should depend on current function ABI but
1444 builtins.cc would need updating then. Therefore we use the
1445 default ABI. */
1446 call_abi = ix86_cfun_abi ();
1447
1448 /* RAX is used as hidden argument to va_arg functions. */
1449 if (call_abi == SYSV_ABI && regno == AX_REG)
1450 return true;
1451
1452 if (call_abi == MS_ABI)
1453 parm_regs = x86_64_ms_abi_int_parameter_registers;
1454 else
1455 parm_regs = x86_64_int_parameter_registers;
1456
1457 for (i = 0; i < (call_abi == MS_ABI
1458 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1459 if (regno == parm_regs[i])
1460 return true;
1461 return false;
1462 }
1463
1464 /* Return if we do not know how to pass ARG solely in registers. */
1465
1466 static bool
1467 ix86_must_pass_in_stack (const function_arg_info &arg)
1468 {
1469 if (must_pass_in_stack_var_size_or_pad (arg))
1470 return true;
1471
1472 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1473 The layout_type routine is crafty and tries to trick us into passing
1474 currently unsupported vector types on the stack by using TImode. */
1475 return (!TARGET_64BIT && arg.mode == TImode
1476 && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1477 }
1478
1479 /* It returns the size, in bytes, of the area reserved for arguments passed
1480 in registers for the function represented by fndecl dependent to the used
1481 abi format. */
1482 int
1483 ix86_reg_parm_stack_space (const_tree fndecl)
1484 {
1485 enum calling_abi call_abi = SYSV_ABI;
1486 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1487 call_abi = ix86_function_abi (fndecl);
1488 else
1489 call_abi = ix86_function_type_abi (fndecl);
1490 if (TARGET_64BIT && call_abi == MS_ABI)
1491 return 32;
1492 return 0;
1493 }
1494
1495 /* We add this as a workaround in order to use libc_has_function
1496 hook in i386.md. */
1497 bool
1498 ix86_libc_has_function (enum function_class fn_class)
1499 {
1500 return targetm.libc_has_function (fn_class, NULL_TREE);
1501 }
1502
1503 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1504 specifying the call abi used. */
1505 enum calling_abi
1506 ix86_function_type_abi (const_tree fntype)
1507 {
1508 enum calling_abi abi = ix86_abi;
1509
1510 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1511 return abi;
1512
1513 if (abi == SYSV_ABI
1514 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1515 {
1516 static int warned;
1517 if (TARGET_X32 && !warned)
1518 {
1519 error ("X32 does not support %<ms_abi%> attribute");
1520 warned = 1;
1521 }
1522
1523 abi = MS_ABI;
1524 }
1525 else if (abi == MS_ABI
1526 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1527 abi = SYSV_ABI;
1528
1529 return abi;
1530 }
1531
1532 enum calling_abi
1533 ix86_function_abi (const_tree fndecl)
1534 {
1535 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1536 }
1537
1538 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1539 specifying the call abi used. */
1540 enum calling_abi
1541 ix86_cfun_abi (void)
1542 {
1543 return cfun ? cfun->machine->call_abi : ix86_abi;
1544 }
1545
1546 bool
1547 ix86_function_ms_hook_prologue (const_tree fn)
1548 {
1549 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1550 {
1551 if (decl_function_context (fn) != NULL_TREE)
1552 error_at (DECL_SOURCE_LOCATION (fn),
1553 "%<ms_hook_prologue%> attribute is not compatible "
1554 "with nested function");
1555 else
1556 return true;
1557 }
1558 return false;
1559 }
1560
1561 bool
1562 ix86_function_naked (const_tree fn)
1563 {
1564 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1565 return true;
1566
1567 return false;
1568 }
1569
1570 /* Write the extra assembler code needed to declare a function properly. */
1571
1572 void
1573 ix86_asm_output_function_label (FILE *out_file, const char *fname,
1574 tree decl)
1575 {
1576 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1577
1578 if (cfun)
1579 cfun->machine->function_label_emitted = true;
1580
1581 if (is_ms_hook)
1582 {
1583 int i, filler_count = (TARGET_64BIT ? 32 : 16);
1584 unsigned int filler_cc = 0xcccccccc;
1585
1586 for (i = 0; i < filler_count; i += 4)
1587 fprintf (out_file, ASM_LONG " %#x\n", filler_cc);
1588 }
1589
1590 #ifdef SUBTARGET_ASM_UNWIND_INIT
1591 SUBTARGET_ASM_UNWIND_INIT (out_file);
1592 #endif
1593
1594 ASM_OUTPUT_LABEL (out_file, fname);
1595
1596 /* Output magic byte marker, if hot-patch attribute is set. */
1597 if (is_ms_hook)
1598 {
1599 if (TARGET_64BIT)
1600 {
1601 /* leaq [%rsp + 0], %rsp */
1602 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1603 out_file);
1604 }
1605 else
1606 {
1607 /* movl.s %edi, %edi
1608 push %ebp
1609 movl.s %esp, %ebp */
1610 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", out_file);
1611 }
1612 }
1613 }
1614
1615 /* Implementation of call abi switching target hook. Specific to FNDECL
1616 the specific call register sets are set. See also
1617 ix86_conditional_register_usage for more details. */
1618 void
1619 ix86_call_abi_override (const_tree fndecl)
1620 {
1621 cfun->machine->call_abi = ix86_function_abi (fndecl);
1622 }
1623
1624 /* Return 1 if pseudo register should be created and used to hold
1625 GOT address for PIC code. */
1626 bool
1627 ix86_use_pseudo_pic_reg (void)
1628 {
1629 if ((TARGET_64BIT
1630 && (ix86_cmodel == CM_SMALL_PIC
1631 || TARGET_PECOFF))
1632 || !flag_pic)
1633 return false;
1634 return true;
1635 }
1636
1637 /* Initialize large model PIC register. */
1638
1639 static void
1640 ix86_init_large_pic_reg (unsigned int tmp_regno)
1641 {
1642 rtx_code_label *label;
1643 rtx tmp_reg;
1644
1645 gcc_assert (Pmode == DImode);
1646 label = gen_label_rtx ();
1647 emit_label (label);
1648 LABEL_PRESERVE_P (label) = 1;
1649 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1650 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1651 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1652 label));
1653 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1654 emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1655 const char *name = LABEL_NAME (label);
1656 PUT_CODE (label, NOTE);
1657 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1658 NOTE_DELETED_LABEL_NAME (label) = name;
1659 }
1660
1661 /* Create and initialize PIC register if required. */
1662 static void
1663 ix86_init_pic_reg (void)
1664 {
1665 edge entry_edge;
1666 rtx_insn *seq;
1667
1668 if (!ix86_use_pseudo_pic_reg ())
1669 return;
1670
1671 start_sequence ();
1672
1673 if (TARGET_64BIT)
1674 {
1675 if (ix86_cmodel == CM_LARGE_PIC)
1676 ix86_init_large_pic_reg (R11_REG);
1677 else
1678 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1679 }
1680 else
1681 {
1682 /* If there is future mcount call in the function it is more profitable
1683 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1684 rtx reg = crtl->profile
1685 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1686 : pic_offset_table_rtx;
1687 rtx_insn *insn = emit_insn (gen_set_got (reg));
1688 RTX_FRAME_RELATED_P (insn) = 1;
1689 if (crtl->profile)
1690 emit_move_insn (pic_offset_table_rtx, reg);
1691 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1692 }
1693
1694 seq = get_insns ();
1695 end_sequence ();
1696
1697 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1698 insert_insn_on_edge (seq, entry_edge);
1699 commit_one_edge_insertion (entry_edge);
1700 }
1701
1702 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1703 for a call to a function whose data type is FNTYPE.
1704 For a library call, FNTYPE is 0. */
1705
1706 void
1707 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1708 tree fntype, /* tree ptr for function decl */
1709 rtx libname, /* SYMBOL_REF of library name or 0 */
1710 tree fndecl,
1711 int caller)
1712 {
1713 struct cgraph_node *local_info_node = NULL;
1714 struct cgraph_node *target = NULL;
1715
1716 /* Set silent_p to false to raise an error for invalid calls when
1717 expanding function body. */
1718 cfun->machine->silent_p = false;
1719
1720 memset (cum, 0, sizeof (*cum));
1721
1722 if (fndecl)
1723 {
1724 target = cgraph_node::get (fndecl);
1725 if (target)
1726 {
1727 target = target->function_symbol ();
1728 local_info_node = cgraph_node::local_info_node (target->decl);
1729 cum->call_abi = ix86_function_abi (target->decl);
1730 }
1731 else
1732 cum->call_abi = ix86_function_abi (fndecl);
1733 }
1734 else
1735 cum->call_abi = ix86_function_type_abi (fntype);
1736
1737 cum->caller = caller;
1738
1739 /* Set up the number of registers to use for passing arguments. */
1740 cum->nregs = ix86_regparm;
1741 if (TARGET_64BIT)
1742 {
1743 cum->nregs = (cum->call_abi == SYSV_ABI
1744 ? X86_64_REGPARM_MAX
1745 : X86_64_MS_REGPARM_MAX);
1746 }
1747 if (TARGET_SSE)
1748 {
1749 cum->sse_nregs = SSE_REGPARM_MAX;
1750 if (TARGET_64BIT)
1751 {
1752 cum->sse_nregs = (cum->call_abi == SYSV_ABI
1753 ? X86_64_SSE_REGPARM_MAX
1754 : X86_64_MS_SSE_REGPARM_MAX);
1755 }
1756 }
1757 if (TARGET_MMX)
1758 cum->mmx_nregs = MMX_REGPARM_MAX;
1759 cum->warn_avx512f = true;
1760 cum->warn_avx = true;
1761 cum->warn_sse = true;
1762 cum->warn_mmx = true;
1763
1764 /* Because type might mismatch in between caller and callee, we need to
1765 use actual type of function for local calls.
1766 FIXME: cgraph_analyze can be told to actually record if function uses
1767 va_start so for local functions maybe_vaarg can be made aggressive
1768 helping K&R code.
1769 FIXME: once typesytem is fixed, we won't need this code anymore. */
1770 if (local_info_node && local_info_node->local
1771 && local_info_node->can_change_signature)
1772 fntype = TREE_TYPE (target->decl);
1773 cum->stdarg = stdarg_p (fntype);
1774 cum->maybe_vaarg = (fntype
1775 ? (!prototype_p (fntype) || stdarg_p (fntype))
1776 : !libname);
1777
1778 cum->decl = fndecl;
1779
1780 cum->warn_empty = !warn_abi || cum->stdarg;
1781 if (!cum->warn_empty && fntype)
1782 {
1783 function_args_iterator iter;
1784 tree argtype;
1785 bool seen_empty_type = false;
1786 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1787 {
1788 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1789 break;
1790 if (TYPE_EMPTY_P (argtype))
1791 seen_empty_type = true;
1792 else if (seen_empty_type)
1793 {
1794 cum->warn_empty = true;
1795 break;
1796 }
1797 }
1798 }
1799
1800 if (!TARGET_64BIT)
1801 {
1802 /* If there are variable arguments, then we won't pass anything
1803 in registers in 32-bit mode. */
1804 if (stdarg_p (fntype))
1805 {
1806 cum->nregs = 0;
1807 /* Since in 32-bit, variable arguments are always passed on
1808 stack, there is scratch register available for indirect
1809 sibcall. */
1810 cfun->machine->arg_reg_available = true;
1811 cum->sse_nregs = 0;
1812 cum->mmx_nregs = 0;
1813 cum->warn_avx512f = false;
1814 cum->warn_avx = false;
1815 cum->warn_sse = false;
1816 cum->warn_mmx = false;
1817 return;
1818 }
1819
1820 /* Use ecx and edx registers if function has fastcall attribute,
1821 else look for regparm information. */
1822 if (fntype)
1823 {
1824 unsigned int ccvt = ix86_get_callcvt (fntype);
1825 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1826 {
1827 cum->nregs = 1;
1828 cum->fastcall = 1; /* Same first register as in fastcall. */
1829 }
1830 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1831 {
1832 cum->nregs = 2;
1833 cum->fastcall = 1;
1834 }
1835 else
1836 cum->nregs = ix86_function_regparm (fntype, fndecl);
1837 }
1838
1839 /* Set up the number of SSE registers used for passing SFmode
1840 and DFmode arguments. Warn for mismatching ABI. */
1841 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
1842 }
1843
1844 cfun->machine->arg_reg_available = (cum->nregs > 0);
1845 }
1846
1847 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1848 But in the case of vector types, it is some vector mode.
1849
1850 When we have only some of our vector isa extensions enabled, then there
1851 are some modes for which vector_mode_supported_p is false. For these
1852 modes, the generic vector support in gcc will choose some non-vector mode
1853 in order to implement the type. By computing the natural mode, we'll
1854 select the proper ABI location for the operand and not depend on whatever
1855 the middle-end decides to do with these vector types.
1856
1857 The midde-end can't deal with the vector types > 16 bytes. In this
1858 case, we return the original mode and warn ABI change if CUM isn't
1859 NULL.
1860
1861 If INT_RETURN is true, warn ABI change if the vector mode isn't
1862 available for function return value. */
1863
1864 static machine_mode
1865 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1866 bool in_return)
1867 {
1868 machine_mode mode = TYPE_MODE (type);
1869
1870 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
1871 {
1872 HOST_WIDE_INT size = int_size_in_bytes (type);
1873 if ((size == 8 || size == 16 || size == 32 || size == 64)
1874 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1875 && TYPE_VECTOR_SUBPARTS (type) > 1)
1876 {
1877 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1878
1879 /* There are no XFmode vector modes ... */
1880 if (innermode == XFmode)
1881 return mode;
1882
1883 /* ... and no decimal float vector modes. */
1884 if (DECIMAL_FLOAT_MODE_P (innermode))
1885 return mode;
1886
1887 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
1888 mode = MIN_MODE_VECTOR_FLOAT;
1889 else
1890 mode = MIN_MODE_VECTOR_INT;
1891
1892 /* Get the mode which has this inner mode and number of units. */
1893 FOR_EACH_MODE_FROM (mode, mode)
1894 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
1895 && GET_MODE_INNER (mode) == innermode)
1896 {
1897 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
1898 {
1899 static bool warnedavx512f;
1900 static bool warnedavx512f_ret;
1901
1902 if (cum && cum->warn_avx512f && !warnedavx512f)
1903 {
1904 if (warning (OPT_Wpsabi, "AVX512F vector argument "
1905 "without AVX512F enabled changes the ABI"))
1906 warnedavx512f = true;
1907 }
1908 else if (in_return && !warnedavx512f_ret)
1909 {
1910 if (warning (OPT_Wpsabi, "AVX512F vector return "
1911 "without AVX512F enabled changes the ABI"))
1912 warnedavx512f_ret = true;
1913 }
1914
1915 return TYPE_MODE (type);
1916 }
1917 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
1918 {
1919 static bool warnedavx;
1920 static bool warnedavx_ret;
1921
1922 if (cum && cum->warn_avx && !warnedavx)
1923 {
1924 if (warning (OPT_Wpsabi, "AVX vector argument "
1925 "without AVX enabled changes the ABI"))
1926 warnedavx = true;
1927 }
1928 else if (in_return && !warnedavx_ret)
1929 {
1930 if (warning (OPT_Wpsabi, "AVX vector return "
1931 "without AVX enabled changes the ABI"))
1932 warnedavx_ret = true;
1933 }
1934
1935 return TYPE_MODE (type);
1936 }
1937 else if (((size == 8 && TARGET_64BIT) || size == 16)
1938 && !TARGET_SSE
1939 && !TARGET_IAMCU)
1940 {
1941 static bool warnedsse;
1942 static bool warnedsse_ret;
1943
1944 if (cum && cum->warn_sse && !warnedsse)
1945 {
1946 if (warning (OPT_Wpsabi, "SSE vector argument "
1947 "without SSE enabled changes the ABI"))
1948 warnedsse = true;
1949 }
1950 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
1951 {
1952 if (warning (OPT_Wpsabi, "SSE vector return "
1953 "without SSE enabled changes the ABI"))
1954 warnedsse_ret = true;
1955 }
1956 }
1957 else if ((size == 8 && !TARGET_64BIT)
1958 && (!cfun
1959 || cfun->machine->func_type == TYPE_NORMAL)
1960 && !TARGET_MMX
1961 && !TARGET_IAMCU)
1962 {
1963 static bool warnedmmx;
1964 static bool warnedmmx_ret;
1965
1966 if (cum && cum->warn_mmx && !warnedmmx)
1967 {
1968 if (warning (OPT_Wpsabi, "MMX vector argument "
1969 "without MMX enabled changes the ABI"))
1970 warnedmmx = true;
1971 }
1972 else if (in_return && !warnedmmx_ret)
1973 {
1974 if (warning (OPT_Wpsabi, "MMX vector return "
1975 "without MMX enabled changes the ABI"))
1976 warnedmmx_ret = true;
1977 }
1978 }
1979 return mode;
1980 }
1981
1982 gcc_unreachable ();
1983 }
1984 }
1985
1986 return mode;
1987 }
1988
1989 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
1990 this may not agree with the mode that the type system has chosen for the
1991 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
1992 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
1993
1994 static rtx
1995 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
1996 unsigned int regno)
1997 {
1998 rtx tmp;
1999
2000 if (orig_mode != BLKmode)
2001 tmp = gen_rtx_REG (orig_mode, regno);
2002 else
2003 {
2004 tmp = gen_rtx_REG (mode, regno);
2005 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2006 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2007 }
2008
2009 return tmp;
2010 }
2011
2012 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2013 of this code is to classify each 8bytes of incoming argument by the register
2014 class and assign registers accordingly. */
2015
2016 /* Return the union class of CLASS1 and CLASS2.
2017 See the x86-64 PS ABI for details. */
2018
2019 static enum x86_64_reg_class
2020 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2021 {
2022 /* Rule #1: If both classes are equal, this is the resulting class. */
2023 if (class1 == class2)
2024 return class1;
2025
2026 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2027 the other class. */
2028 if (class1 == X86_64_NO_CLASS)
2029 return class2;
2030 if (class2 == X86_64_NO_CLASS)
2031 return class1;
2032
2033 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2034 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2035 return X86_64_MEMORY_CLASS;
2036
2037 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2038 if ((class1 == X86_64_INTEGERSI_CLASS
2039 && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
2040 || (class2 == X86_64_INTEGERSI_CLASS
2041 && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
2042 return X86_64_INTEGERSI_CLASS;
2043 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2044 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2045 return X86_64_INTEGER_CLASS;
2046
2047 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2048 MEMORY is used. */
2049 if (class1 == X86_64_X87_CLASS
2050 || class1 == X86_64_X87UP_CLASS
2051 || class1 == X86_64_COMPLEX_X87_CLASS
2052 || class2 == X86_64_X87_CLASS
2053 || class2 == X86_64_X87UP_CLASS
2054 || class2 == X86_64_COMPLEX_X87_CLASS)
2055 return X86_64_MEMORY_CLASS;
2056
2057 /* Rule #6: Otherwise class SSE is used. */
2058 return X86_64_SSE_CLASS;
2059 }
2060
2061 /* Classify the argument of type TYPE and mode MODE.
2062 CLASSES will be filled by the register class used to pass each word
2063 of the operand. The number of words is returned. In case the parameter
2064 should be passed in memory, 0 is returned. As a special case for zero
2065 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2066
2067 BIT_OFFSET is used internally for handling records and specifies offset
2068 of the offset in bits modulo 512 to avoid overflow cases.
2069
2070 See the x86-64 PS ABI for details.
2071 */
2072
2073 static int
2074 classify_argument (machine_mode mode, const_tree type,
2075 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
2076 int &zero_width_bitfields)
2077 {
2078 HOST_WIDE_INT bytes
2079 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2080 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2081
2082 /* Variable sized entities are always passed/returned in memory. */
2083 if (bytes < 0)
2084 return 0;
2085
2086 if (mode != VOIDmode)
2087 {
2088 /* The value of "named" doesn't matter. */
2089 function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2090 if (targetm.calls.must_pass_in_stack (arg))
2091 return 0;
2092 }
2093
2094 if (type && AGGREGATE_TYPE_P (type))
2095 {
2096 int i;
2097 tree field;
2098 enum x86_64_reg_class subclasses[MAX_CLASSES];
2099
2100 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2101 if (bytes > 64)
2102 return 0;
2103
2104 for (i = 0; i < words; i++)
2105 classes[i] = X86_64_NO_CLASS;
2106
2107 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2108 signalize memory class, so handle it as special case. */
2109 if (!words)
2110 {
2111 classes[0] = X86_64_NO_CLASS;
2112 return 1;
2113 }
2114
2115 /* Classify each field of record and merge classes. */
2116 switch (TREE_CODE (type))
2117 {
2118 case RECORD_TYPE:
2119 /* And now merge the fields of structure. */
2120 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2121 {
2122 if (TREE_CODE (field) == FIELD_DECL)
2123 {
2124 int num;
2125
2126 if (TREE_TYPE (field) == error_mark_node)
2127 continue;
2128
2129 /* Bitfields are always classified as integer. Handle them
2130 early, since later code would consider them to be
2131 misaligned integers. */
2132 if (DECL_BIT_FIELD (field))
2133 {
2134 if (integer_zerop (DECL_SIZE (field)))
2135 {
2136 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
2137 continue;
2138 if (zero_width_bitfields != 2)
2139 {
2140 zero_width_bitfields = 1;
2141 continue;
2142 }
2143 }
2144 for (i = (int_bit_position (field)
2145 + (bit_offset % 64)) / 8 / 8;
2146 i < ((int_bit_position (field) + (bit_offset % 64))
2147 + tree_to_shwi (DECL_SIZE (field))
2148 + 63) / 8 / 8; i++)
2149 classes[i]
2150 = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2151 }
2152 else
2153 {
2154 int pos;
2155
2156 type = TREE_TYPE (field);
2157
2158 /* Flexible array member is ignored. */
2159 if (TYPE_MODE (type) == BLKmode
2160 && TREE_CODE (type) == ARRAY_TYPE
2161 && TYPE_SIZE (type) == NULL_TREE
2162 && TYPE_DOMAIN (type) != NULL_TREE
2163 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2164 == NULL_TREE))
2165 {
2166 static bool warned;
2167
2168 if (!warned && warn_psabi)
2169 {
2170 warned = true;
2171 inform (input_location,
2172 "the ABI of passing struct with"
2173 " a flexible array member has"
2174 " changed in GCC 4.4");
2175 }
2176 continue;
2177 }
2178 num = classify_argument (TYPE_MODE (type), type,
2179 subclasses,
2180 (int_bit_position (field)
2181 + bit_offset) % 512,
2182 zero_width_bitfields);
2183 if (!num)
2184 return 0;
2185 pos = (int_bit_position (field)
2186 + (bit_offset % 64)) / 8 / 8;
2187 for (i = 0; i < num && (i + pos) < words; i++)
2188 classes[i + pos]
2189 = merge_classes (subclasses[i], classes[i + pos]);
2190 }
2191 }
2192 }
2193 break;
2194
2195 case ARRAY_TYPE:
2196 /* Arrays are handled as small records. */
2197 {
2198 int num;
2199 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2200 TREE_TYPE (type), subclasses, bit_offset,
2201 zero_width_bitfields);
2202 if (!num)
2203 return 0;
2204
2205 /* The partial classes are now full classes. */
2206 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2207 subclasses[0] = X86_64_SSE_CLASS;
2208 if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
2209 subclasses[0] = X86_64_SSE_CLASS;
2210 if (subclasses[0] == X86_64_INTEGERSI_CLASS
2211 && !((bit_offset % 64) == 0 && bytes == 4))
2212 subclasses[0] = X86_64_INTEGER_CLASS;
2213
2214 for (i = 0; i < words; i++)
2215 classes[i] = subclasses[i % num];
2216
2217 break;
2218 }
2219 case UNION_TYPE:
2220 case QUAL_UNION_TYPE:
2221 /* Unions are similar to RECORD_TYPE but offset is always 0.
2222 */
2223 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2224 {
2225 if (TREE_CODE (field) == FIELD_DECL)
2226 {
2227 int num;
2228
2229 if (TREE_TYPE (field) == error_mark_node)
2230 continue;
2231
2232 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2233 TREE_TYPE (field), subclasses,
2234 bit_offset, zero_width_bitfields);
2235 if (!num)
2236 return 0;
2237 for (i = 0; i < num && i < words; i++)
2238 classes[i] = merge_classes (subclasses[i], classes[i]);
2239 }
2240 }
2241 break;
2242
2243 default:
2244 gcc_unreachable ();
2245 }
2246
2247 if (words > 2)
2248 {
2249 /* When size > 16 bytes, if the first one isn't
2250 X86_64_SSE_CLASS or any other ones aren't
2251 X86_64_SSEUP_CLASS, everything should be passed in
2252 memory. */
2253 if (classes[0] != X86_64_SSE_CLASS)
2254 return 0;
2255
2256 for (i = 1; i < words; i++)
2257 if (classes[i] != X86_64_SSEUP_CLASS)
2258 return 0;
2259 }
2260
2261 /* Final merger cleanup. */
2262 for (i = 0; i < words; i++)
2263 {
2264 /* If one class is MEMORY, everything should be passed in
2265 memory. */
2266 if (classes[i] == X86_64_MEMORY_CLASS)
2267 return 0;
2268
2269 /* The X86_64_SSEUP_CLASS should be always preceded by
2270 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2271 if (classes[i] == X86_64_SSEUP_CLASS
2272 && classes[i - 1] != X86_64_SSE_CLASS
2273 && classes[i - 1] != X86_64_SSEUP_CLASS)
2274 {
2275 /* The first one should never be X86_64_SSEUP_CLASS. */
2276 gcc_assert (i != 0);
2277 classes[i] = X86_64_SSE_CLASS;
2278 }
2279
2280 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2281 everything should be passed in memory. */
2282 if (classes[i] == X86_64_X87UP_CLASS
2283 && (classes[i - 1] != X86_64_X87_CLASS))
2284 {
2285 static bool warned;
2286
2287 /* The first one should never be X86_64_X87UP_CLASS. */
2288 gcc_assert (i != 0);
2289 if (!warned && warn_psabi)
2290 {
2291 warned = true;
2292 inform (input_location,
2293 "the ABI of passing union with %<long double%>"
2294 " has changed in GCC 4.4");
2295 }
2296 return 0;
2297 }
2298 }
2299 return words;
2300 }
2301
2302 /* Compute alignment needed. We align all types to natural boundaries with
2303 exception of XFmode that is aligned to 64bits. */
2304 if (mode != VOIDmode && mode != BLKmode)
2305 {
2306 int mode_alignment = GET_MODE_BITSIZE (mode);
2307
2308 if (mode == XFmode)
2309 mode_alignment = 128;
2310 else if (mode == XCmode)
2311 mode_alignment = 256;
2312 if (COMPLEX_MODE_P (mode))
2313 mode_alignment /= 2;
2314 /* Misaligned fields are always returned in memory. */
2315 if (bit_offset % mode_alignment)
2316 return 0;
2317 }
2318
2319 /* for V1xx modes, just use the base mode */
2320 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2321 && GET_MODE_UNIT_SIZE (mode) == bytes)
2322 mode = GET_MODE_INNER (mode);
2323
2324 /* Classification of atomic types. */
2325 switch (mode)
2326 {
2327 case E_SDmode:
2328 case E_DDmode:
2329 classes[0] = X86_64_SSE_CLASS;
2330 return 1;
2331 case E_TDmode:
2332 classes[0] = X86_64_SSE_CLASS;
2333 classes[1] = X86_64_SSEUP_CLASS;
2334 return 2;
2335 case E_DImode:
2336 case E_SImode:
2337 case E_HImode:
2338 case E_QImode:
2339 case E_CSImode:
2340 case E_CHImode:
2341 case E_CQImode:
2342 {
2343 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2344
2345 /* Analyze last 128 bits only. */
2346 size = (size - 1) & 0x7f;
2347
2348 if (size < 32)
2349 {
2350 classes[0] = X86_64_INTEGERSI_CLASS;
2351 return 1;
2352 }
2353 else if (size < 64)
2354 {
2355 classes[0] = X86_64_INTEGER_CLASS;
2356 return 1;
2357 }
2358 else if (size < 64+32)
2359 {
2360 classes[0] = X86_64_INTEGER_CLASS;
2361 classes[1] = X86_64_INTEGERSI_CLASS;
2362 return 2;
2363 }
2364 else if (size < 64+64)
2365 {
2366 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2367 return 2;
2368 }
2369 else
2370 gcc_unreachable ();
2371 }
2372 case E_CDImode:
2373 case E_TImode:
2374 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2375 return 2;
2376 case E_COImode:
2377 case E_OImode:
2378 /* OImode shouldn't be used directly. */
2379 gcc_unreachable ();
2380 case E_CTImode:
2381 return 0;
2382 case E_HFmode:
2383 if (!(bit_offset % 64))
2384 classes[0] = X86_64_SSEHF_CLASS;
2385 else
2386 classes[0] = X86_64_SSE_CLASS;
2387 return 1;
2388 case E_SFmode:
2389 if (!(bit_offset % 64))
2390 classes[0] = X86_64_SSESF_CLASS;
2391 else
2392 classes[0] = X86_64_SSE_CLASS;
2393 return 1;
2394 case E_DFmode:
2395 classes[0] = X86_64_SSEDF_CLASS;
2396 return 1;
2397 case E_XFmode:
2398 classes[0] = X86_64_X87_CLASS;
2399 classes[1] = X86_64_X87UP_CLASS;
2400 return 2;
2401 case E_TFmode:
2402 classes[0] = X86_64_SSE_CLASS;
2403 classes[1] = X86_64_SSEUP_CLASS;
2404 return 2;
2405 case E_HCmode:
2406 classes[0] = X86_64_SSE_CLASS;
2407 if (!(bit_offset % 64))
2408 return 1;
2409 else
2410 {
2411 classes[1] = X86_64_SSEHF_CLASS;
2412 return 2;
2413 }
2414 case E_SCmode:
2415 classes[0] = X86_64_SSE_CLASS;
2416 if (!(bit_offset % 64))
2417 return 1;
2418 else
2419 {
2420 static bool warned;
2421
2422 if (!warned && warn_psabi)
2423 {
2424 warned = true;
2425 inform (input_location,
2426 "the ABI of passing structure with %<complex float%>"
2427 " member has changed in GCC 4.4");
2428 }
2429 classes[1] = X86_64_SSESF_CLASS;
2430 return 2;
2431 }
2432 case E_DCmode:
2433 classes[0] = X86_64_SSEDF_CLASS;
2434 classes[1] = X86_64_SSEDF_CLASS;
2435 return 2;
2436 case E_XCmode:
2437 classes[0] = X86_64_COMPLEX_X87_CLASS;
2438 return 1;
2439 case E_TCmode:
2440 /* This modes is larger than 16 bytes. */
2441 return 0;
2442 case E_V8SFmode:
2443 case E_V8SImode:
2444 case E_V32QImode:
2445 case E_V16HFmode:
2446 case E_V16HImode:
2447 case E_V4DFmode:
2448 case E_V4DImode:
2449 classes[0] = X86_64_SSE_CLASS;
2450 classes[1] = X86_64_SSEUP_CLASS;
2451 classes[2] = X86_64_SSEUP_CLASS;
2452 classes[3] = X86_64_SSEUP_CLASS;
2453 return 4;
2454 case E_V8DFmode:
2455 case E_V16SFmode:
2456 case E_V32HFmode:
2457 case E_V8DImode:
2458 case E_V16SImode:
2459 case E_V32HImode:
2460 case E_V64QImode:
2461 classes[0] = X86_64_SSE_CLASS;
2462 classes[1] = X86_64_SSEUP_CLASS;
2463 classes[2] = X86_64_SSEUP_CLASS;
2464 classes[3] = X86_64_SSEUP_CLASS;
2465 classes[4] = X86_64_SSEUP_CLASS;
2466 classes[5] = X86_64_SSEUP_CLASS;
2467 classes[6] = X86_64_SSEUP_CLASS;
2468 classes[7] = X86_64_SSEUP_CLASS;
2469 return 8;
2470 case E_V4SFmode:
2471 case E_V4SImode:
2472 case E_V16QImode:
2473 case E_V8HImode:
2474 case E_V8HFmode:
2475 case E_V2DFmode:
2476 case E_V2DImode:
2477 classes[0] = X86_64_SSE_CLASS;
2478 classes[1] = X86_64_SSEUP_CLASS;
2479 return 2;
2480 case E_V1TImode:
2481 case E_V1DImode:
2482 case E_V2SFmode:
2483 case E_V2SImode:
2484 case E_V4HImode:
2485 case E_V4HFmode:
2486 case E_V2HFmode:
2487 case E_V8QImode:
2488 classes[0] = X86_64_SSE_CLASS;
2489 return 1;
2490 case E_BLKmode:
2491 case E_VOIDmode:
2492 return 0;
2493 default:
2494 gcc_assert (VECTOR_MODE_P (mode));
2495
2496 if (bytes > 16)
2497 return 0;
2498
2499 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2500
2501 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2502 classes[0] = X86_64_INTEGERSI_CLASS;
2503 else
2504 classes[0] = X86_64_INTEGER_CLASS;
2505 classes[1] = X86_64_INTEGER_CLASS;
2506 return 1 + (bytes > 8);
2507 }
2508 }
2509
2510 /* Wrapper around classify_argument with the extra zero_width_bitfields
2511 argument, to diagnose GCC 12.1 ABI differences for C. */
2512
2513 static int
2514 classify_argument (machine_mode mode, const_tree type,
2515 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2516 {
2517 int zero_width_bitfields = 0;
2518 static bool warned = false;
2519 int n = classify_argument (mode, type, classes, bit_offset,
2520 zero_width_bitfields);
2521 if (!zero_width_bitfields || warned || !warn_psabi)
2522 return n;
2523 enum x86_64_reg_class alt_classes[MAX_CLASSES];
2524 zero_width_bitfields = 2;
2525 if (classify_argument (mode, type, alt_classes, bit_offset,
2526 zero_width_bitfields) != n)
2527 zero_width_bitfields = 3;
2528 else
2529 for (int i = 0; i < n; i++)
2530 if (classes[i] != alt_classes[i])
2531 {
2532 zero_width_bitfields = 3;
2533 break;
2534 }
2535 if (zero_width_bitfields == 3)
2536 {
2537 warned = true;
2538 const char *url
2539 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
2540
2541 inform (input_location,
2542 "the ABI of passing C structures with zero-width bit-fields"
2543 " has changed in GCC %{12.1%}", url);
2544 }
2545 return n;
2546 }
2547
2548 /* Examine the argument and return set number of register required in each
2549 class. Return true iff parameter should be passed in memory. */
2550
2551 static bool
2552 examine_argument (machine_mode mode, const_tree type, int in_return,
2553 int *int_nregs, int *sse_nregs)
2554 {
2555 enum x86_64_reg_class regclass[MAX_CLASSES];
2556 int n = classify_argument (mode, type, regclass, 0);
2557
2558 *int_nregs = 0;
2559 *sse_nregs = 0;
2560
2561 if (!n)
2562 return true;
2563 for (n--; n >= 0; n--)
2564 switch (regclass[n])
2565 {
2566 case X86_64_INTEGER_CLASS:
2567 case X86_64_INTEGERSI_CLASS:
2568 (*int_nregs)++;
2569 break;
2570 case X86_64_SSE_CLASS:
2571 case X86_64_SSEHF_CLASS:
2572 case X86_64_SSESF_CLASS:
2573 case X86_64_SSEDF_CLASS:
2574 (*sse_nregs)++;
2575 break;
2576 case X86_64_NO_CLASS:
2577 case X86_64_SSEUP_CLASS:
2578 break;
2579 case X86_64_X87_CLASS:
2580 case X86_64_X87UP_CLASS:
2581 case X86_64_COMPLEX_X87_CLASS:
2582 if (!in_return)
2583 return true;
2584 break;
2585 case X86_64_MEMORY_CLASS:
2586 gcc_unreachable ();
2587 }
2588
2589 return false;
2590 }
2591
2592 /* Construct container for the argument used by GCC interface. See
2593 FUNCTION_ARG for the detailed description. */
2594
2595 static rtx
2596 construct_container (machine_mode mode, machine_mode orig_mode,
2597 const_tree type, int in_return, int nintregs, int nsseregs,
2598 const int *intreg, int sse_regno)
2599 {
2600 /* The following variables hold the static issued_error state. */
2601 static bool issued_sse_arg_error;
2602 static bool issued_sse_ret_error;
2603 static bool issued_x87_ret_error;
2604
2605 machine_mode tmpmode;
2606 int bytes
2607 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2608 enum x86_64_reg_class regclass[MAX_CLASSES];
2609 int n;
2610 int i;
2611 int nexps = 0;
2612 int needed_sseregs, needed_intregs;
2613 rtx exp[MAX_CLASSES];
2614 rtx ret;
2615
2616 n = classify_argument (mode, type, regclass, 0);
2617 if (!n)
2618 return NULL;
2619 if (examine_argument (mode, type, in_return, &needed_intregs,
2620 &needed_sseregs))
2621 return NULL;
2622 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2623 return NULL;
2624
2625 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2626 some less clueful developer tries to use floating-point anyway. */
2627 if (needed_sseregs && !TARGET_SSE)
2628 {
2629 /* Return early if we shouldn't raise an error for invalid
2630 calls. */
2631 if (cfun != NULL && cfun->machine->silent_p)
2632 return NULL;
2633 if (in_return)
2634 {
2635 if (!issued_sse_ret_error)
2636 {
2637 error ("SSE register return with SSE disabled");
2638 issued_sse_ret_error = true;
2639 }
2640 }
2641 else if (!issued_sse_arg_error)
2642 {
2643 error ("SSE register argument with SSE disabled");
2644 issued_sse_arg_error = true;
2645 }
2646 return NULL;
2647 }
2648
2649 /* Likewise, error if the ABI requires us to return values in the
2650 x87 registers and the user specified -mno-80387. */
2651 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2652 for (i = 0; i < n; i++)
2653 if (regclass[i] == X86_64_X87_CLASS
2654 || regclass[i] == X86_64_X87UP_CLASS
2655 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2656 {
2657 /* Return early if we shouldn't raise an error for invalid
2658 calls. */
2659 if (cfun != NULL && cfun->machine->silent_p)
2660 return NULL;
2661 if (!issued_x87_ret_error)
2662 {
2663 error ("x87 register return with x87 disabled");
2664 issued_x87_ret_error = true;
2665 }
2666 return NULL;
2667 }
2668
2669 /* First construct simple cases. Avoid SCmode, since we want to use
2670 single register to pass this type. */
2671 if (n == 1 && mode != SCmode && mode != HCmode)
2672 switch (regclass[0])
2673 {
2674 case X86_64_INTEGER_CLASS:
2675 case X86_64_INTEGERSI_CLASS:
2676 return gen_rtx_REG (mode, intreg[0]);
2677 case X86_64_SSE_CLASS:
2678 case X86_64_SSEHF_CLASS:
2679 case X86_64_SSESF_CLASS:
2680 case X86_64_SSEDF_CLASS:
2681 if (mode != BLKmode)
2682 return gen_reg_or_parallel (mode, orig_mode,
2683 GET_SSE_REGNO (sse_regno));
2684 break;
2685 case X86_64_X87_CLASS:
2686 case X86_64_COMPLEX_X87_CLASS:
2687 return gen_rtx_REG (mode, FIRST_STACK_REG);
2688 case X86_64_NO_CLASS:
2689 /* Zero sized array, struct or class. */
2690 return NULL;
2691 default:
2692 gcc_unreachable ();
2693 }
2694 if (n == 2
2695 && regclass[0] == X86_64_SSE_CLASS
2696 && regclass[1] == X86_64_SSEUP_CLASS
2697 && mode != BLKmode)
2698 return gen_reg_or_parallel (mode, orig_mode,
2699 GET_SSE_REGNO (sse_regno));
2700 if (n == 4
2701 && regclass[0] == X86_64_SSE_CLASS
2702 && regclass[1] == X86_64_SSEUP_CLASS
2703 && regclass[2] == X86_64_SSEUP_CLASS
2704 && regclass[3] == X86_64_SSEUP_CLASS
2705 && mode != BLKmode)
2706 return gen_reg_or_parallel (mode, orig_mode,
2707 GET_SSE_REGNO (sse_regno));
2708 if (n == 8
2709 && regclass[0] == X86_64_SSE_CLASS
2710 && regclass[1] == X86_64_SSEUP_CLASS
2711 && regclass[2] == X86_64_SSEUP_CLASS
2712 && regclass[3] == X86_64_SSEUP_CLASS
2713 && regclass[4] == X86_64_SSEUP_CLASS
2714 && regclass[5] == X86_64_SSEUP_CLASS
2715 && regclass[6] == X86_64_SSEUP_CLASS
2716 && regclass[7] == X86_64_SSEUP_CLASS
2717 && mode != BLKmode)
2718 return gen_reg_or_parallel (mode, orig_mode,
2719 GET_SSE_REGNO (sse_regno));
2720 if (n == 2
2721 && regclass[0] == X86_64_X87_CLASS
2722 && regclass[1] == X86_64_X87UP_CLASS)
2723 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2724
2725 if (n == 2
2726 && regclass[0] == X86_64_INTEGER_CLASS
2727 && regclass[1] == X86_64_INTEGER_CLASS
2728 && (mode == CDImode || mode == TImode || mode == BLKmode)
2729 && intreg[0] + 1 == intreg[1])
2730 {
2731 if (mode == BLKmode)
2732 {
2733 /* Use TImode for BLKmode values in 2 integer registers. */
2734 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2735 gen_rtx_REG (TImode, intreg[0]),
2736 GEN_INT (0));
2737 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2738 XVECEXP (ret, 0, 0) = exp[0];
2739 return ret;
2740 }
2741 else
2742 return gen_rtx_REG (mode, intreg[0]);
2743 }
2744
2745 /* Otherwise figure out the entries of the PARALLEL. */
2746 for (i = 0; i < n; i++)
2747 {
2748 int pos;
2749
2750 switch (regclass[i])
2751 {
2752 case X86_64_NO_CLASS:
2753 break;
2754 case X86_64_INTEGER_CLASS:
2755 case X86_64_INTEGERSI_CLASS:
2756 /* Merge TImodes on aligned occasions here too. */
2757 if (i * 8 + 8 > bytes)
2758 {
2759 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2760 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2761 /* We've requested 24 bytes we
2762 don't have mode for. Use DImode. */
2763 tmpmode = DImode;
2764 }
2765 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2766 tmpmode = SImode;
2767 else
2768 tmpmode = DImode;
2769 exp [nexps++]
2770 = gen_rtx_EXPR_LIST (VOIDmode,
2771 gen_rtx_REG (tmpmode, *intreg),
2772 GEN_INT (i*8));
2773 intreg++;
2774 break;
2775 case X86_64_SSEHF_CLASS:
2776 exp [nexps++]
2777 = gen_rtx_EXPR_LIST (VOIDmode,
2778 gen_rtx_REG (HFmode,
2779 GET_SSE_REGNO (sse_regno)),
2780 GEN_INT (i*8));
2781 sse_regno++;
2782 break;
2783 case X86_64_SSESF_CLASS:
2784 exp [nexps++]
2785 = gen_rtx_EXPR_LIST (VOIDmode,
2786 gen_rtx_REG (SFmode,
2787 GET_SSE_REGNO (sse_regno)),
2788 GEN_INT (i*8));
2789 sse_regno++;
2790 break;
2791 case X86_64_SSEDF_CLASS:
2792 exp [nexps++]
2793 = gen_rtx_EXPR_LIST (VOIDmode,
2794 gen_rtx_REG (DFmode,
2795 GET_SSE_REGNO (sse_regno)),
2796 GEN_INT (i*8));
2797 sse_regno++;
2798 break;
2799 case X86_64_SSE_CLASS:
2800 pos = i;
2801 switch (n)
2802 {
2803 case 1:
2804 tmpmode = DImode;
2805 break;
2806 case 2:
2807 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2808 {
2809 tmpmode = TImode;
2810 i++;
2811 }
2812 else
2813 tmpmode = DImode;
2814 break;
2815 case 4:
2816 gcc_assert (i == 0
2817 && regclass[1] == X86_64_SSEUP_CLASS
2818 && regclass[2] == X86_64_SSEUP_CLASS
2819 && regclass[3] == X86_64_SSEUP_CLASS);
2820 tmpmode = OImode;
2821 i += 3;
2822 break;
2823 case 8:
2824 gcc_assert (i == 0
2825 && regclass[1] == X86_64_SSEUP_CLASS
2826 && regclass[2] == X86_64_SSEUP_CLASS
2827 && regclass[3] == X86_64_SSEUP_CLASS
2828 && regclass[4] == X86_64_SSEUP_CLASS
2829 && regclass[5] == X86_64_SSEUP_CLASS
2830 && regclass[6] == X86_64_SSEUP_CLASS
2831 && regclass[7] == X86_64_SSEUP_CLASS);
2832 tmpmode = XImode;
2833 i += 7;
2834 break;
2835 default:
2836 gcc_unreachable ();
2837 }
2838 exp [nexps++]
2839 = gen_rtx_EXPR_LIST (VOIDmode,
2840 gen_rtx_REG (tmpmode,
2841 GET_SSE_REGNO (sse_regno)),
2842 GEN_INT (pos*8));
2843 sse_regno++;
2844 break;
2845 default:
2846 gcc_unreachable ();
2847 }
2848 }
2849
2850 /* Empty aligned struct, union or class. */
2851 if (nexps == 0)
2852 return NULL;
2853
2854 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2855 for (i = 0; i < nexps; i++)
2856 XVECEXP (ret, 0, i) = exp [i];
2857 return ret;
2858 }
2859
2860 /* Update the data in CUM to advance over an argument of mode MODE
2861 and data type TYPE. (TYPE is null for libcalls where that information
2862 may not be available.)
2863
2864 Return a number of integer regsiters advanced over. */
2865
2866 static int
2867 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2868 const_tree type, HOST_WIDE_INT bytes,
2869 HOST_WIDE_INT words)
2870 {
2871 int res = 0;
2872 bool error_p = false;
2873
2874 if (TARGET_IAMCU)
2875 {
2876 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2877 bytes in registers. */
2878 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2879 goto pass_in_reg;
2880 return res;
2881 }
2882
2883 switch (mode)
2884 {
2885 default:
2886 break;
2887
2888 case E_BLKmode:
2889 if (bytes < 0)
2890 break;
2891 /* FALLTHRU */
2892
2893 case E_DImode:
2894 case E_SImode:
2895 case E_HImode:
2896 case E_QImode:
2897 pass_in_reg:
2898 cum->words += words;
2899 cum->nregs -= words;
2900 cum->regno += words;
2901 if (cum->nregs >= 0)
2902 res = words;
2903 if (cum->nregs <= 0)
2904 {
2905 cum->nregs = 0;
2906 cfun->machine->arg_reg_available = false;
2907 cum->regno = 0;
2908 }
2909 break;
2910
2911 case E_OImode:
2912 /* OImode shouldn't be used directly. */
2913 gcc_unreachable ();
2914
2915 case E_DFmode:
2916 if (cum->float_in_sse == -1)
2917 error_p = true;
2918 if (cum->float_in_sse < 2)
2919 break;
2920 /* FALLTHRU */
2921 case E_SFmode:
2922 if (cum->float_in_sse == -1)
2923 error_p = true;
2924 if (cum->float_in_sse < 1)
2925 break;
2926 /* FALLTHRU */
2927
2928 case E_V16HFmode:
2929 case E_V8SFmode:
2930 case E_V8SImode:
2931 case E_V64QImode:
2932 case E_V32HImode:
2933 case E_V16SImode:
2934 case E_V8DImode:
2935 case E_V32HFmode:
2936 case E_V16SFmode:
2937 case E_V8DFmode:
2938 case E_V32QImode:
2939 case E_V16HImode:
2940 case E_V4DFmode:
2941 case E_V4DImode:
2942 case E_TImode:
2943 case E_V16QImode:
2944 case E_V8HImode:
2945 case E_V4SImode:
2946 case E_V2DImode:
2947 case E_V8HFmode:
2948 case E_V4SFmode:
2949 case E_V2DFmode:
2950 if (!type || !AGGREGATE_TYPE_P (type))
2951 {
2952 cum->sse_words += words;
2953 cum->sse_nregs -= 1;
2954 cum->sse_regno += 1;
2955 if (cum->sse_nregs <= 0)
2956 {
2957 cum->sse_nregs = 0;
2958 cum->sse_regno = 0;
2959 }
2960 }
2961 break;
2962
2963 case E_V8QImode:
2964 case E_V4HImode:
2965 case E_V4HFmode:
2966 case E_V2SImode:
2967 case E_V2SFmode:
2968 case E_V1TImode:
2969 case E_V1DImode:
2970 if (!type || !AGGREGATE_TYPE_P (type))
2971 {
2972 cum->mmx_words += words;
2973 cum->mmx_nregs -= 1;
2974 cum->mmx_regno += 1;
2975 if (cum->mmx_nregs <= 0)
2976 {
2977 cum->mmx_nregs = 0;
2978 cum->mmx_regno = 0;
2979 }
2980 }
2981 break;
2982 }
2983 if (error_p)
2984 {
2985 cum->float_in_sse = 0;
2986 error ("calling %qD with SSE calling convention without "
2987 "SSE/SSE2 enabled", cum->decl);
2988 sorry ("this is a GCC bug that can be worked around by adding "
2989 "attribute used to function called");
2990 }
2991
2992 return res;
2993 }
2994
2995 static int
2996 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
2997 const_tree type, HOST_WIDE_INT words, bool named)
2998 {
2999 int int_nregs, sse_nregs;
3000
3001 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
3002 if (!named && (VALID_AVX512F_REG_MODE (mode)
3003 || VALID_AVX256_REG_MODE (mode)))
3004 return 0;
3005
3006 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
3007 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3008 {
3009 cum->nregs -= int_nregs;
3010 cum->sse_nregs -= sse_nregs;
3011 cum->regno += int_nregs;
3012 cum->sse_regno += sse_nregs;
3013 return int_nregs;
3014 }
3015 else
3016 {
3017 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
3018 cum->words = ROUND_UP (cum->words, align);
3019 cum->words += words;
3020 return 0;
3021 }
3022 }
3023
3024 static int
3025 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3026 HOST_WIDE_INT words)
3027 {
3028 /* Otherwise, this should be passed indirect. */
3029 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3030
3031 cum->words += words;
3032 if (cum->nregs > 0)
3033 {
3034 cum->nregs -= 1;
3035 cum->regno += 1;
3036 return 1;
3037 }
3038 return 0;
3039 }
3040
3041 /* Update the data in CUM to advance over argument ARG. */
3042
3043 static void
3044 ix86_function_arg_advance (cumulative_args_t cum_v,
3045 const function_arg_info &arg)
3046 {
3047 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3048 machine_mode mode = arg.mode;
3049 HOST_WIDE_INT bytes, words;
3050 int nregs;
3051
3052 /* The argument of interrupt handler is a special case and is
3053 handled in ix86_function_arg. */
3054 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3055 return;
3056
3057 bytes = arg.promoted_size_in_bytes ();
3058 words = CEIL (bytes, UNITS_PER_WORD);
3059
3060 if (arg.type)
3061 mode = type_natural_mode (arg.type, NULL, false);
3062
3063 if (TARGET_64BIT)
3064 {
3065 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3066
3067 if (call_abi == MS_ABI)
3068 nregs = function_arg_advance_ms_64 (cum, bytes, words);
3069 else
3070 nregs = function_arg_advance_64 (cum, mode, arg.type, words,
3071 arg.named);
3072 }
3073 else
3074 nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
3075
3076 if (!nregs)
3077 {
3078 /* Track if there are outgoing arguments on stack. */
3079 if (cum->caller)
3080 cfun->machine->outgoing_args_on_stack = true;
3081 }
3082 }
3083
3084 /* Define where to put the arguments to a function.
3085 Value is zero to push the argument on the stack,
3086 or a hard register in which to store the argument.
3087
3088 MODE is the argument's machine mode.
3089 TYPE is the data type of the argument (as a tree).
3090 This is null for libcalls where that information may
3091 not be available.
3092 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3093 the preceding args and about the function being called.
3094 NAMED is nonzero if this argument is a named parameter
3095 (otherwise it is an extra parameter matching an ellipsis). */
3096
3097 static rtx
3098 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3099 machine_mode orig_mode, const_tree type,
3100 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3101 {
3102 bool error_p = false;
3103
3104 /* Avoid the AL settings for the Unix64 ABI. */
3105 if (mode == VOIDmode)
3106 return constm1_rtx;
3107
3108 if (TARGET_IAMCU)
3109 {
3110 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3111 bytes in registers. */
3112 if (!VECTOR_MODE_P (mode) && bytes <= 8)
3113 goto pass_in_reg;
3114 return NULL_RTX;
3115 }
3116
3117 switch (mode)
3118 {
3119 default:
3120 break;
3121
3122 case E_BLKmode:
3123 if (bytes < 0)
3124 break;
3125 /* FALLTHRU */
3126 case E_DImode:
3127 case E_SImode:
3128 case E_HImode:
3129 case E_QImode:
3130 pass_in_reg:
3131 if (words <= cum->nregs)
3132 {
3133 int regno = cum->regno;
3134
3135 /* Fastcall allocates the first two DWORD (SImode) or
3136 smaller arguments to ECX and EDX if it isn't an
3137 aggregate type . */
3138 if (cum->fastcall)
3139 {
3140 if (mode == BLKmode
3141 || mode == DImode
3142 || (type && AGGREGATE_TYPE_P (type)))
3143 break;
3144
3145 /* ECX not EAX is the first allocated register. */
3146 if (regno == AX_REG)
3147 regno = CX_REG;
3148 }
3149 return gen_rtx_REG (mode, regno);
3150 }
3151 break;
3152
3153 case E_DFmode:
3154 if (cum->float_in_sse == -1)
3155 error_p = true;
3156 if (cum->float_in_sse < 2)
3157 break;
3158 /* FALLTHRU */
3159 case E_SFmode:
3160 if (cum->float_in_sse == -1)
3161 error_p = true;
3162 if (cum->float_in_sse < 1)
3163 break;
3164 /* FALLTHRU */
3165 case E_TImode:
3166 /* In 32bit, we pass TImode in xmm registers. */
3167 case E_V16QImode:
3168 case E_V8HImode:
3169 case E_V4SImode:
3170 case E_V2DImode:
3171 case E_V8HFmode:
3172 case E_V4SFmode:
3173 case E_V2DFmode:
3174 if (!type || !AGGREGATE_TYPE_P (type))
3175 {
3176 if (cum->sse_nregs)
3177 return gen_reg_or_parallel (mode, orig_mode,
3178 cum->sse_regno + FIRST_SSE_REG);
3179 }
3180 break;
3181
3182 case E_OImode:
3183 case E_XImode:
3184 /* OImode and XImode shouldn't be used directly. */
3185 gcc_unreachable ();
3186
3187 case E_V64QImode:
3188 case E_V32HImode:
3189 case E_V16SImode:
3190 case E_V8DImode:
3191 case E_V32HFmode:
3192 case E_V16SFmode:
3193 case E_V8DFmode:
3194 case E_V16HFmode:
3195 case E_V8SFmode:
3196 case E_V8SImode:
3197 case E_V32QImode:
3198 case E_V16HImode:
3199 case E_V4DFmode:
3200 case E_V4DImode:
3201 if (!type || !AGGREGATE_TYPE_P (type))
3202 {
3203 if (cum->sse_nregs)
3204 return gen_reg_or_parallel (mode, orig_mode,
3205 cum->sse_regno + FIRST_SSE_REG);
3206 }
3207 break;
3208
3209 case E_V8QImode:
3210 case E_V4HImode:
3211 case E_V4HFmode:
3212 case E_V2SImode:
3213 case E_V2SFmode:
3214 case E_V1TImode:
3215 case E_V1DImode:
3216 if (!type || !AGGREGATE_TYPE_P (type))
3217 {
3218 if (cum->mmx_nregs)
3219 return gen_reg_or_parallel (mode, orig_mode,
3220 cum->mmx_regno + FIRST_MMX_REG);
3221 }
3222 break;
3223 }
3224 if (error_p)
3225 {
3226 cum->float_in_sse = 0;
3227 error ("calling %qD with SSE calling convention without "
3228 "SSE/SSE2 enabled", cum->decl);
3229 sorry ("this is a GCC bug that can be worked around by adding "
3230 "attribute used to function called");
3231 }
3232
3233 return NULL_RTX;
3234 }
3235
3236 static rtx
3237 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3238 machine_mode orig_mode, const_tree type, bool named)
3239 {
3240 /* Handle a hidden AL argument containing number of registers
3241 for varargs x86-64 functions. */
3242 if (mode == VOIDmode)
3243 return GEN_INT (cum->maybe_vaarg
3244 ? (cum->sse_nregs < 0
3245 ? X86_64_SSE_REGPARM_MAX
3246 : cum->sse_regno)
3247 : -1);
3248
3249 switch (mode)
3250 {
3251 default:
3252 break;
3253
3254 case E_V16HFmode:
3255 case E_V8SFmode:
3256 case E_V8SImode:
3257 case E_V32QImode:
3258 case E_V16HImode:
3259 case E_V4DFmode:
3260 case E_V4DImode:
3261 case E_V32HFmode:
3262 case E_V16SFmode:
3263 case E_V16SImode:
3264 case E_V64QImode:
3265 case E_V32HImode:
3266 case E_V8DFmode:
3267 case E_V8DImode:
3268 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3269 if (!named)
3270 return NULL;
3271 break;
3272 }
3273
3274 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3275 cum->sse_nregs,
3276 &x86_64_int_parameter_registers [cum->regno],
3277 cum->sse_regno);
3278 }
3279
3280 static rtx
3281 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3282 machine_mode orig_mode, bool named, const_tree type,
3283 HOST_WIDE_INT bytes)
3284 {
3285 unsigned int regno;
3286
3287 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3288 We use value of -2 to specify that current function call is MSABI. */
3289 if (mode == VOIDmode)
3290 return GEN_INT (-2);
3291
3292 /* If we've run out of registers, it goes on the stack. */
3293 if (cum->nregs == 0)
3294 return NULL_RTX;
3295
3296 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3297
3298 /* Only floating point modes are passed in anything but integer regs. */
3299 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3300 {
3301 if (named)
3302 {
3303 if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3304 regno = cum->regno + FIRST_SSE_REG;
3305 }
3306 else
3307 {
3308 rtx t1, t2;
3309
3310 /* Unnamed floating parameters are passed in both the
3311 SSE and integer registers. */
3312 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3313 t2 = gen_rtx_REG (mode, regno);
3314 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3315 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3316 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3317 }
3318 }
3319 /* Handle aggregated types passed in register. */
3320 if (orig_mode == BLKmode)
3321 {
3322 if (bytes > 0 && bytes <= 8)
3323 mode = (bytes > 4 ? DImode : SImode);
3324 if (mode == BLKmode)
3325 mode = DImode;
3326 }
3327
3328 return gen_reg_or_parallel (mode, orig_mode, regno);
3329 }
3330
3331 /* Return where to put the arguments to a function.
3332 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3333
3334 ARG describes the argument while CUM gives information about the
3335 preceding args and about the function being called. */
3336
3337 static rtx
3338 ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3339 {
3340 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3341 machine_mode mode = arg.mode;
3342 HOST_WIDE_INT bytes, words;
3343 rtx reg;
3344
3345 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3346 {
3347 gcc_assert (arg.type != NULL_TREE);
3348 if (POINTER_TYPE_P (arg.type))
3349 {
3350 /* This is the pointer argument. */
3351 gcc_assert (TYPE_MODE (arg.type) == Pmode);
3352 /* It is at -WORD(AP) in the current frame in interrupt and
3353 exception handlers. */
3354 reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3355 }
3356 else
3357 {
3358 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3359 && TREE_CODE (arg.type) == INTEGER_TYPE
3360 && TYPE_MODE (arg.type) == word_mode);
3361 /* The error code is the word-mode integer argument at
3362 -2 * WORD(AP) in the current frame of the exception
3363 handler. */
3364 reg = gen_rtx_MEM (word_mode,
3365 plus_constant (Pmode,
3366 arg_pointer_rtx,
3367 -2 * UNITS_PER_WORD));
3368 }
3369 return reg;
3370 }
3371
3372 bytes = arg.promoted_size_in_bytes ();
3373 words = CEIL (bytes, UNITS_PER_WORD);
3374
3375 /* To simplify the code below, represent vector types with a vector mode
3376 even if MMX/SSE are not active. */
3377 if (arg.type && TREE_CODE (arg.type) == VECTOR_TYPE)
3378 mode = type_natural_mode (arg.type, cum, false);
3379
3380 if (TARGET_64BIT)
3381 {
3382 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3383
3384 if (call_abi == MS_ABI)
3385 reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
3386 arg.type, bytes);
3387 else
3388 reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3389 }
3390 else
3391 reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3392
3393 /* Track if there are outgoing arguments on stack. */
3394 if (reg == NULL_RTX && cum->caller)
3395 cfun->machine->outgoing_args_on_stack = true;
3396
3397 return reg;
3398 }
3399
3400 /* A C expression that indicates when an argument must be passed by
3401 reference. If nonzero for an argument, a copy of that argument is
3402 made in memory and a pointer to the argument is passed instead of
3403 the argument itself. The pointer is passed in whatever way is
3404 appropriate for passing a pointer to that type. */
3405
3406 static bool
3407 ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3408 {
3409 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3410
3411 if (TARGET_64BIT)
3412 {
3413 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3414
3415 /* See Windows x64 Software Convention. */
3416 if (call_abi == MS_ABI)
3417 {
3418 HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3419
3420 if (tree type = arg.type)
3421 {
3422 /* Arrays are passed by reference. */
3423 if (TREE_CODE (type) == ARRAY_TYPE)
3424 return true;
3425
3426 if (RECORD_OR_UNION_TYPE_P (type))
3427 {
3428 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3429 are passed by reference. */
3430 msize = int_size_in_bytes (type);
3431 }
3432 }
3433
3434 /* __m128 is passed by reference. */
3435 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3436 }
3437 else if (arg.type && int_size_in_bytes (arg.type) == -1)
3438 return true;
3439 }
3440
3441 return false;
3442 }
3443
3444 /* Return true when TYPE should be 128bit aligned for 32bit argument
3445 passing ABI. XXX: This function is obsolete and is only used for
3446 checking psABI compatibility with previous versions of GCC. */
3447
3448 static bool
3449 ix86_compat_aligned_value_p (const_tree type)
3450 {
3451 machine_mode mode = TYPE_MODE (type);
3452 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3453 || mode == TDmode
3454 || mode == TFmode
3455 || mode == TCmode)
3456 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3457 return true;
3458 if (TYPE_ALIGN (type) < 128)
3459 return false;
3460
3461 if (AGGREGATE_TYPE_P (type))
3462 {
3463 /* Walk the aggregates recursively. */
3464 switch (TREE_CODE (type))
3465 {
3466 case RECORD_TYPE:
3467 case UNION_TYPE:
3468 case QUAL_UNION_TYPE:
3469 {
3470 tree field;
3471
3472 /* Walk all the structure fields. */
3473 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3474 {
3475 if (TREE_CODE (field) == FIELD_DECL
3476 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3477 return true;
3478 }
3479 break;
3480 }
3481
3482 case ARRAY_TYPE:
3483 /* Just for use if some languages passes arrays by value. */
3484 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3485 return true;
3486 break;
3487
3488 default:
3489 gcc_unreachable ();
3490 }
3491 }
3492 return false;
3493 }
3494
3495 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3496 XXX: This function is obsolete and is only used for checking psABI
3497 compatibility with previous versions of GCC. */
3498
3499 static unsigned int
3500 ix86_compat_function_arg_boundary (machine_mode mode,
3501 const_tree type, unsigned int align)
3502 {
3503 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3504 natural boundaries. */
3505 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3506 {
3507 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3508 make an exception for SSE modes since these require 128bit
3509 alignment.
3510
3511 The handling here differs from field_alignment. ICC aligns MMX
3512 arguments to 4 byte boundaries, while structure fields are aligned
3513 to 8 byte boundaries. */
3514 if (!type)
3515 {
3516 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3517 align = PARM_BOUNDARY;
3518 }
3519 else
3520 {
3521 if (!ix86_compat_aligned_value_p (type))
3522 align = PARM_BOUNDARY;
3523 }
3524 }
3525 if (align > BIGGEST_ALIGNMENT)
3526 align = BIGGEST_ALIGNMENT;
3527 return align;
3528 }
3529
3530 /* Return true when TYPE should be 128bit aligned for 32bit argument
3531 passing ABI. */
3532
3533 static bool
3534 ix86_contains_aligned_value_p (const_tree type)
3535 {
3536 machine_mode mode = TYPE_MODE (type);
3537
3538 if (mode == XFmode || mode == XCmode)
3539 return false;
3540
3541 if (TYPE_ALIGN (type) < 128)
3542 return false;
3543
3544 if (AGGREGATE_TYPE_P (type))
3545 {
3546 /* Walk the aggregates recursively. */
3547 switch (TREE_CODE (type))
3548 {
3549 case RECORD_TYPE:
3550 case UNION_TYPE:
3551 case QUAL_UNION_TYPE:
3552 {
3553 tree field;
3554
3555 /* Walk all the structure fields. */
3556 for (field = TYPE_FIELDS (type);
3557 field;
3558 field = DECL_CHAIN (field))
3559 {
3560 if (TREE_CODE (field) == FIELD_DECL
3561 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3562 return true;
3563 }
3564 break;
3565 }
3566
3567 case ARRAY_TYPE:
3568 /* Just for use if some languages passes arrays by value. */
3569 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3570 return true;
3571 break;
3572
3573 default:
3574 gcc_unreachable ();
3575 }
3576 }
3577 else
3578 return TYPE_ALIGN (type) >= 128;
3579
3580 return false;
3581 }
3582
3583 /* Gives the alignment boundary, in bits, of an argument with the
3584 specified mode and type. */
3585
3586 static unsigned int
3587 ix86_function_arg_boundary (machine_mode mode, const_tree type)
3588 {
3589 unsigned int align;
3590 if (type)
3591 {
3592 /* Since the main variant type is used for call, we convert it to
3593 the main variant type. */
3594 type = TYPE_MAIN_VARIANT (type);
3595 align = TYPE_ALIGN (type);
3596 if (TYPE_EMPTY_P (type))
3597 return PARM_BOUNDARY;
3598 }
3599 else
3600 align = GET_MODE_ALIGNMENT (mode);
3601 if (align < PARM_BOUNDARY)
3602 align = PARM_BOUNDARY;
3603 else
3604 {
3605 static bool warned;
3606 unsigned int saved_align = align;
3607
3608 if (!TARGET_64BIT)
3609 {
3610 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3611 if (!type)
3612 {
3613 if (mode == XFmode || mode == XCmode)
3614 align = PARM_BOUNDARY;
3615 }
3616 else if (!ix86_contains_aligned_value_p (type))
3617 align = PARM_BOUNDARY;
3618
3619 if (align < 128)
3620 align = PARM_BOUNDARY;
3621 }
3622
3623 if (warn_psabi
3624 && !warned
3625 && align != ix86_compat_function_arg_boundary (mode, type,
3626 saved_align))
3627 {
3628 warned = true;
3629 inform (input_location,
3630 "the ABI for passing parameters with %d-byte"
3631 " alignment has changed in GCC 4.6",
3632 align / BITS_PER_UNIT);
3633 }
3634 }
3635
3636 return align;
3637 }
3638
3639 /* Return true if N is a possible register number of function value. */
3640
3641 static bool
3642 ix86_function_value_regno_p (const unsigned int regno)
3643 {
3644 switch (regno)
3645 {
3646 case AX_REG:
3647 return true;
3648 case DX_REG:
3649 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3650 case DI_REG:
3651 case SI_REG:
3652 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3653
3654 /* Complex values are returned in %st(0)/%st(1) pair. */
3655 case ST0_REG:
3656 case ST1_REG:
3657 /* TODO: The function should depend on current function ABI but
3658 builtins.cc would need updating then. Therefore we use the
3659 default ABI. */
3660 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3661 return false;
3662 return TARGET_FLOAT_RETURNS_IN_80387;
3663
3664 /* Complex values are returned in %xmm0/%xmm1 pair. */
3665 case XMM0_REG:
3666 case XMM1_REG:
3667 return TARGET_SSE;
3668
3669 case MM0_REG:
3670 if (TARGET_MACHO || TARGET_64BIT)
3671 return false;
3672 return TARGET_MMX;
3673 }
3674
3675 return false;
3676 }
3677
3678 /* Check whether the register REGNO should be zeroed on X86.
3679 When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3680 together, no need to zero it again.
3681 When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3682
3683 static bool
3684 zero_call_used_regno_p (const unsigned int regno,
3685 bool all_sse_zeroed,
3686 bool need_zero_mmx)
3687 {
3688 return GENERAL_REGNO_P (regno)
3689 || (!all_sse_zeroed && SSE_REGNO_P (regno))
3690 || MASK_REGNO_P (regno)
3691 || (need_zero_mmx && MMX_REGNO_P (regno));
3692 }
3693
3694 /* Return the machine_mode that is used to zero register REGNO. */
3695
3696 static machine_mode
3697 zero_call_used_regno_mode (const unsigned int regno)
3698 {
3699 /* NB: We only need to zero the lower 32 bits for integer registers
3700 and the lower 128 bits for vector registers since destination are
3701 zero-extended to the full register width. */
3702 if (GENERAL_REGNO_P (regno))
3703 return SImode;
3704 else if (SSE_REGNO_P (regno))
3705 return V4SFmode;
3706 else if (MASK_REGNO_P (regno))
3707 return HImode;
3708 else if (MMX_REGNO_P (regno))
3709 return V4HImode;
3710 else
3711 gcc_unreachable ();
3712 }
3713
3714 /* Generate a rtx to zero all vector registers together if possible,
3715 otherwise, return NULL. */
3716
3717 static rtx
3718 zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
3719 {
3720 if (!TARGET_AVX)
3721 return NULL;
3722
3723 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3724 if ((LEGACY_SSE_REGNO_P (regno)
3725 || (TARGET_64BIT
3726 && (REX_SSE_REGNO_P (regno)
3727 || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
3728 && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3729 return NULL;
3730
3731 return gen_avx_vzeroall ();
3732 }
3733
3734 /* Generate insns to zero all st registers together.
3735 Return true when zeroing instructions are generated.
3736 Assume the number of st registers that are zeroed is num_of_st,
3737 we will emit the following sequence to zero them together:
3738 fldz; \
3739 fldz; \
3740 ...
3741 fldz; \
3742 fstp %%st(0); \
3743 fstp %%st(0); \
3744 ...
3745 fstp %%st(0);
3746 i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3747 mark stack slots empty.
3748
3749 How to compute the num_of_st:
3750 There is no direct mapping from stack registers to hard register
3751 numbers. If one stack register needs to be cleared, we don't know
3752 where in the stack the value remains. So, if any stack register
3753 needs to be cleared, the whole stack should be cleared. However,
3754 x87 stack registers that hold the return value should be excluded.
3755 x87 returns in the top (two for complex values) register, so
3756 num_of_st should be 7/6 when x87 returns, otherwise it will be 8. */
3757
3758
3759 static bool
3760 zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
3761 {
3762
3763 /* If the FPU is disabled, no need to zero all st registers. */
3764 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3765 return false;
3766
3767 unsigned int num_of_st = 0;
3768 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3769 if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
3770 && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3771 {
3772 num_of_st++;
3773 break;
3774 }
3775
3776 if (num_of_st == 0)
3777 return false;
3778
3779 bool return_with_x87 = false;
3780 return_with_x87 = (crtl->return_rtx
3781 && (STACK_REG_P (crtl->return_rtx)));
3782
3783 bool complex_return = false;
3784 complex_return = (crtl->return_rtx
3785 && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
3786
3787 if (return_with_x87)
3788 if (complex_return)
3789 num_of_st = 6;
3790 else
3791 num_of_st = 7;
3792 else
3793 num_of_st = 8;
3794
3795 rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
3796 for (unsigned int i = 0; i < num_of_st; i++)
3797 emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
3798
3799 for (unsigned int i = 0; i < num_of_st; i++)
3800 {
3801 rtx insn;
3802 insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
3803 add_reg_note (insn, REG_DEAD, st_reg);
3804 }
3805 return true;
3806 }
3807
3808
3809 /* When the routine exit in MMX mode, if any ST register needs
3810 to be zeroed, we should clear all MMX registers except the
3811 RET_MMX_REGNO that holds the return value. */
3812 static bool
3813 zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
3814 unsigned int ret_mmx_regno)
3815 {
3816 bool need_zero_all_mm = false;
3817 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3818 if (STACK_REGNO_P (regno)
3819 && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3820 {
3821 need_zero_all_mm = true;
3822 break;
3823 }
3824
3825 if (!need_zero_all_mm)
3826 return false;
3827
3828 rtx zero_mmx = NULL_RTX;
3829 machine_mode mode = V4HImode;
3830 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3831 if (regno != ret_mmx_regno)
3832 {
3833 rtx reg = gen_rtx_REG (mode, regno);
3834 if (zero_mmx == NULL_RTX)
3835 {
3836 zero_mmx = reg;
3837 emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
3838 }
3839 else
3840 emit_move_insn (reg, zero_mmx);
3841 }
3842 return true;
3843 }
3844
3845 /* TARGET_ZERO_CALL_USED_REGS. */
3846 /* Generate a sequence of instructions that zero registers specified by
3847 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
3848 zeroed. */
3849 static HARD_REG_SET
3850 ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
3851 {
3852 HARD_REG_SET zeroed_hardregs;
3853 bool all_sse_zeroed = false;
3854 bool all_st_zeroed = false;
3855 bool all_mm_zeroed = false;
3856
3857 CLEAR_HARD_REG_SET (zeroed_hardregs);
3858
3859 /* first, let's see whether we can zero all vector registers together. */
3860 rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
3861 if (zero_all_vec_insn)
3862 {
3863 emit_insn (zero_all_vec_insn);
3864 all_sse_zeroed = true;
3865 }
3866
3867 /* mm/st registers are shared registers set, we should follow the following
3868 rules to clear them:
3869 MMX exit mode x87 exit mode
3870 -------------|----------------------|---------------
3871 uses x87 reg | clear all MMX | clear all x87
3872 uses MMX reg | clear individual MMX | clear all x87
3873 x87 + MMX | clear all MMX | clear all x87
3874
3875 first, we should decide which mode (MMX mode or x87 mode) the function
3876 exit with. */
3877
3878 bool exit_with_mmx_mode = (crtl->return_rtx
3879 && (MMX_REG_P (crtl->return_rtx)));
3880
3881 if (!exit_with_mmx_mode)
3882 /* x87 exit mode, we should zero all st registers together. */
3883 {
3884 all_st_zeroed = zero_all_st_registers (need_zeroed_hardregs);
3885 if (all_st_zeroed)
3886 SET_HARD_REG_BIT (zeroed_hardregs, FIRST_STACK_REG);
3887 }
3888 else
3889 /* MMX exit mode, check whether we can zero all mm registers. */
3890 {
3891 unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
3892 all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
3893 exit_mmx_regno);
3894 if (all_mm_zeroed)
3895 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3896 if (regno != exit_mmx_regno)
3897 SET_HARD_REG_BIT (zeroed_hardregs, regno);
3898 }
3899
3900 /* Now, generate instructions to zero all the other registers. */
3901
3902 rtx zero_gpr = NULL_RTX;
3903 rtx zero_vector = NULL_RTX;
3904 rtx zero_mask = NULL_RTX;
3905 rtx zero_mmx = NULL_RTX;
3906
3907 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3908 {
3909 if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3910 continue;
3911 if (!zero_call_used_regno_p (regno, all_sse_zeroed,
3912 exit_with_mmx_mode && !all_mm_zeroed))
3913 continue;
3914
3915 SET_HARD_REG_BIT (zeroed_hardregs, regno);
3916
3917 rtx reg, tmp, zero_rtx;
3918 machine_mode mode = zero_call_used_regno_mode (regno);
3919
3920 reg = gen_rtx_REG (mode, regno);
3921 zero_rtx = CONST0_RTX (mode);
3922
3923 if (mode == SImode)
3924 if (zero_gpr == NULL_RTX)
3925 {
3926 zero_gpr = reg;
3927 tmp = gen_rtx_SET (reg, zero_rtx);
3928 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
3929 {
3930 rtx clob = gen_rtx_CLOBBER (VOIDmode,
3931 gen_rtx_REG (CCmode,
3932 FLAGS_REG));
3933 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
3934 tmp,
3935 clob));
3936 }
3937 emit_insn (tmp);
3938 }
3939 else
3940 emit_move_insn (reg, zero_gpr);
3941 else if (mode == V4SFmode)
3942 if (zero_vector == NULL_RTX)
3943 {
3944 zero_vector = reg;
3945 tmp = gen_rtx_SET (reg, zero_rtx);
3946 emit_insn (tmp);
3947 }
3948 else
3949 emit_move_insn (reg, zero_vector);
3950 else if (mode == HImode)
3951 if (zero_mask == NULL_RTX)
3952 {
3953 zero_mask = reg;
3954 tmp = gen_rtx_SET (reg, zero_rtx);
3955 emit_insn (tmp);
3956 }
3957 else
3958 emit_move_insn (reg, zero_mask);
3959 else if (mode == V4HImode)
3960 if (zero_mmx == NULL_RTX)
3961 {
3962 zero_mmx = reg;
3963 tmp = gen_rtx_SET (reg, zero_rtx);
3964 emit_insn (tmp);
3965 }
3966 else
3967 emit_move_insn (reg, zero_mmx);
3968 else
3969 gcc_unreachable ();
3970 }
3971 return zeroed_hardregs;
3972 }
3973
3974 /* Define how to find the value returned by a function.
3975 VALTYPE is the data type of the value (as a tree).
3976 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3977 otherwise, FUNC is 0. */
3978
3979 static rtx
3980 function_value_32 (machine_mode orig_mode, machine_mode mode,
3981 const_tree fntype, const_tree fn)
3982 {
3983 unsigned int regno;
3984
3985 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3986 we normally prevent this case when mmx is not available. However
3987 some ABIs may require the result to be returned like DImode. */
3988 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3989 regno = FIRST_MMX_REG;
3990
3991 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3992 we prevent this case when sse is not available. However some ABIs
3993 may require the result to be returned like integer TImode. */
3994 else if (mode == TImode
3995 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3996 regno = FIRST_SSE_REG;
3997
3998 /* 32-byte vector modes in %ymm0. */
3999 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
4000 regno = FIRST_SSE_REG;
4001
4002 /* 64-byte vector modes in %zmm0. */
4003 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
4004 regno = FIRST_SSE_REG;
4005
4006 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4007 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4008 regno = FIRST_FLOAT_REG;
4009 else
4010 /* Most things go in %eax. */
4011 regno = AX_REG;
4012
4013 /* Return _Float16/_Complex _Foat16 by sse register. */
4014 if (mode == HFmode)
4015 regno = FIRST_SSE_REG;
4016 if (mode == HCmode)
4017 {
4018 rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
4019 XVECEXP (ret, 0, 0)
4020 = gen_rtx_EXPR_LIST (VOIDmode,
4021 gen_rtx_REG (SImode, FIRST_SSE_REG),
4022 GEN_INT (0));
4023 return ret;
4024 }
4025
4026 /* Override FP return register with %xmm0 for local functions when
4027 SSE math is enabled or for functions with sseregparm attribute. */
4028 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4029 {
4030 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4031 if (sse_level == -1)
4032 {
4033 error ("calling %qD with SSE calling convention without "
4034 "SSE/SSE2 enabled", fn);
4035 sorry ("this is a GCC bug that can be worked around by adding "
4036 "attribute used to function called");
4037 }
4038 else if ((sse_level >= 1 && mode == SFmode)
4039 || (sse_level == 2 && mode == DFmode))
4040 regno = FIRST_SSE_REG;
4041 }
4042
4043 /* OImode shouldn't be used directly. */
4044 gcc_assert (mode != OImode);
4045
4046 return gen_rtx_REG (orig_mode, regno);
4047 }
4048
4049 static rtx
4050 function_value_64 (machine_mode orig_mode, machine_mode mode,
4051 const_tree valtype)
4052 {
4053 rtx ret;
4054
4055 /* Handle libcalls, which don't provide a type node. */
4056 if (valtype == NULL)
4057 {
4058 unsigned int regno;
4059
4060 switch (mode)
4061 {
4062 case E_HFmode:
4063 case E_HCmode:
4064 case E_SFmode:
4065 case E_SCmode:
4066 case E_DFmode:
4067 case E_DCmode:
4068 case E_TFmode:
4069 case E_SDmode:
4070 case E_DDmode:
4071 case E_TDmode:
4072 regno = FIRST_SSE_REG;
4073 break;
4074 case E_XFmode:
4075 case E_XCmode:
4076 regno = FIRST_FLOAT_REG;
4077 break;
4078 case E_TCmode:
4079 return NULL;
4080 default:
4081 regno = AX_REG;
4082 }
4083
4084 return gen_rtx_REG (mode, regno);
4085 }
4086 else if (POINTER_TYPE_P (valtype))
4087 {
4088 /* Pointers are always returned in word_mode. */
4089 mode = word_mode;
4090 }
4091
4092 ret = construct_container (mode, orig_mode, valtype, 1,
4093 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4094 x86_64_int_return_registers, 0);
4095
4096 /* For zero sized structures, construct_container returns NULL, but we
4097 need to keep rest of compiler happy by returning meaningful value. */
4098 if (!ret)
4099 ret = gen_rtx_REG (orig_mode, AX_REG);
4100
4101 return ret;
4102 }
4103
4104 static rtx
4105 function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
4106 const_tree fntype, const_tree fn, const_tree valtype)
4107 {
4108 unsigned int regno;
4109
4110 /* Floating point return values in %st(0)
4111 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
4112 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
4113 && (GET_MODE_SIZE (mode) > 8
4114 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
4115 {
4116 regno = FIRST_FLOAT_REG;
4117 return gen_rtx_REG (orig_mode, regno);
4118 }
4119 else
4120 return function_value_32(orig_mode, mode, fntype,fn);
4121 }
4122
4123 static rtx
4124 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
4125 const_tree valtype)
4126 {
4127 unsigned int regno = AX_REG;
4128
4129 if (TARGET_SSE)
4130 {
4131 switch (GET_MODE_SIZE (mode))
4132 {
4133 case 16:
4134 if (valtype != NULL_TREE
4135 && !VECTOR_INTEGER_TYPE_P (valtype)
4136 && !VECTOR_INTEGER_TYPE_P (valtype)
4137 && !INTEGRAL_TYPE_P (valtype)
4138 && !VECTOR_FLOAT_TYPE_P (valtype))
4139 break;
4140 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4141 && !COMPLEX_MODE_P (mode))
4142 regno = FIRST_SSE_REG;
4143 break;
4144 case 8:
4145 case 4:
4146 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
4147 break;
4148 if (mode == SFmode || mode == DFmode)
4149 regno = FIRST_SSE_REG;
4150 break;
4151 default:
4152 break;
4153 }
4154 }
4155 return gen_rtx_REG (orig_mode, regno);
4156 }
4157
4158 static rtx
4159 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4160 machine_mode orig_mode, machine_mode mode)
4161 {
4162 const_tree fn, fntype;
4163
4164 fn = NULL_TREE;
4165 if (fntype_or_decl && DECL_P (fntype_or_decl))
4166 fn = fntype_or_decl;
4167 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4168
4169 if (ix86_function_type_abi (fntype) == MS_ABI)
4170 {
4171 if (TARGET_64BIT)
4172 return function_value_ms_64 (orig_mode, mode, valtype);
4173 else
4174 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
4175 }
4176 else if (TARGET_64BIT)
4177 return function_value_64 (orig_mode, mode, valtype);
4178 else
4179 return function_value_32 (orig_mode, mode, fntype, fn);
4180 }
4181
4182 static rtx
4183 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
4184 {
4185 machine_mode mode, orig_mode;
4186
4187 orig_mode = TYPE_MODE (valtype);
4188 mode = type_natural_mode (valtype, NULL, true);
4189 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4190 }
4191
4192 /* Pointer function arguments and return values are promoted to
4193 word_mode for normal functions. */
4194
4195 static machine_mode
4196 ix86_promote_function_mode (const_tree type, machine_mode mode,
4197 int *punsignedp, const_tree fntype,
4198 int for_return)
4199 {
4200 if (cfun->machine->func_type == TYPE_NORMAL
4201 && type != NULL_TREE
4202 && POINTER_TYPE_P (type))
4203 {
4204 *punsignedp = POINTERS_EXTEND_UNSIGNED;
4205 return word_mode;
4206 }
4207 return default_promote_function_mode (type, mode, punsignedp, fntype,
4208 for_return);
4209 }
4210
4211 /* Return true if a structure, union or array with MODE containing FIELD
4212 should be accessed using BLKmode. */
4213
4214 static bool
4215 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
4216 {
4217 /* Union with XFmode must be in BLKmode. */
4218 return (mode == XFmode
4219 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
4220 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
4221 }
4222
4223 rtx
4224 ix86_libcall_value (machine_mode mode)
4225 {
4226 return ix86_function_value_1 (NULL, NULL, mode, mode);
4227 }
4228
4229 /* Return true iff type is returned in memory. */
4230
4231 static bool
4232 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4233 {
4234 const machine_mode mode = type_natural_mode (type, NULL, true);
4235 HOST_WIDE_INT size;
4236
4237 if (TARGET_64BIT)
4238 {
4239 if (ix86_function_type_abi (fntype) == MS_ABI)
4240 {
4241 size = int_size_in_bytes (type);
4242
4243 /* __m128 is returned in xmm0. */
4244 if ((!type || VECTOR_INTEGER_TYPE_P (type)
4245 || INTEGRAL_TYPE_P (type)
4246 || VECTOR_FLOAT_TYPE_P (type))
4247 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4248 && !COMPLEX_MODE_P (mode)
4249 && (GET_MODE_SIZE (mode) == 16 || size == 16))
4250 return false;
4251
4252 /* Otherwise, the size must be exactly in [1248]. */
4253 return size != 1 && size != 2 && size != 4 && size != 8;
4254 }
4255 else
4256 {
4257 int needed_intregs, needed_sseregs;
4258
4259 return examine_argument (mode, type, 1,
4260 &needed_intregs, &needed_sseregs);
4261 }
4262 }
4263 else
4264 {
4265 size = int_size_in_bytes (type);
4266
4267 /* Intel MCU psABI returns scalars and aggregates no larger than 8
4268 bytes in registers. */
4269 if (TARGET_IAMCU)
4270 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
4271
4272 if (mode == BLKmode)
4273 return true;
4274
4275 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4276 return false;
4277
4278 if (VECTOR_MODE_P (mode) || mode == TImode)
4279 {
4280 /* User-created vectors small enough to fit in EAX. */
4281 if (size < 8)
4282 return false;
4283
4284 /* Unless ABI prescibes otherwise,
4285 MMX/3dNow values are returned in MM0 if available. */
4286
4287 if (size == 8)
4288 return TARGET_VECT8_RETURNS || !TARGET_MMX;
4289
4290 /* SSE values are returned in XMM0 if available. */
4291 if (size == 16)
4292 return !TARGET_SSE;
4293
4294 /* AVX values are returned in YMM0 if available. */
4295 if (size == 32)
4296 return !TARGET_AVX;
4297
4298 /* AVX512F values are returned in ZMM0 if available. */
4299 if (size == 64)
4300 return !TARGET_AVX512F;
4301 }
4302
4303 if (mode == XFmode)
4304 return false;
4305
4306 if (size > 12)
4307 return true;
4308
4309 /* OImode shouldn't be used directly. */
4310 gcc_assert (mode != OImode);
4311
4312 return false;
4313 }
4314 }
4315
4316 /* Implement TARGET_PUSH_ARGUMENT. */
4317
4318 static bool
4319 ix86_push_argument (unsigned int npush)
4320 {
4321 /* If SSE2 is available, use vector move to put large argument onto
4322 stack. NB: In 32-bit mode, use 8-byte vector move. */
4323 return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
4324 && TARGET_PUSH_ARGS
4325 && !ACCUMULATE_OUTGOING_ARGS);
4326 }
4327
4328 \f
4329 /* Create the va_list data type. */
4330
4331 static tree
4332 ix86_build_builtin_va_list_64 (void)
4333 {
4334 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4335
4336 record = lang_hooks.types.make_type (RECORD_TYPE);
4337 type_decl = build_decl (BUILTINS_LOCATION,
4338 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4339
4340 f_gpr = build_decl (BUILTINS_LOCATION,
4341 FIELD_DECL, get_identifier ("gp_offset"),
4342 unsigned_type_node);
4343 f_fpr = build_decl (BUILTINS_LOCATION,
4344 FIELD_DECL, get_identifier ("fp_offset"),
4345 unsigned_type_node);
4346 f_ovf = build_decl (BUILTINS_LOCATION,
4347 FIELD_DECL, get_identifier ("overflow_arg_area"),
4348 ptr_type_node);
4349 f_sav = build_decl (BUILTINS_LOCATION,
4350 FIELD_DECL, get_identifier ("reg_save_area"),
4351 ptr_type_node);
4352
4353 va_list_gpr_counter_field = f_gpr;
4354 va_list_fpr_counter_field = f_fpr;
4355
4356 DECL_FIELD_CONTEXT (f_gpr) = record;
4357 DECL_FIELD_CONTEXT (f_fpr) = record;
4358 DECL_FIELD_CONTEXT (f_ovf) = record;
4359 DECL_FIELD_CONTEXT (f_sav) = record;
4360
4361 TYPE_STUB_DECL (record) = type_decl;
4362 TYPE_NAME (record) = type_decl;
4363 TYPE_FIELDS (record) = f_gpr;
4364 DECL_CHAIN (f_gpr) = f_fpr;
4365 DECL_CHAIN (f_fpr) = f_ovf;
4366 DECL_CHAIN (f_ovf) = f_sav;
4367
4368 layout_type (record);
4369
4370 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
4371 NULL_TREE, TYPE_ATTRIBUTES (record));
4372
4373 /* The correct type is an array type of one element. */
4374 return build_array_type (record, build_index_type (size_zero_node));
4375 }
4376
4377 /* Setup the builtin va_list data type and for 64-bit the additional
4378 calling convention specific va_list data types. */
4379
4380 static tree
4381 ix86_build_builtin_va_list (void)
4382 {
4383 if (TARGET_64BIT)
4384 {
4385 /* Initialize ABI specific va_list builtin types.
4386
4387 In lto1, we can encounter two va_list types:
4388 - one as a result of the type-merge across TUs, and
4389 - the one constructed here.
4390 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4391 a type identity check in canonical_va_list_type based on
4392 TYPE_MAIN_VARIANT (which we used to have) will not work.
4393 Instead, we tag each va_list_type_node with its unique attribute, and
4394 look for the attribute in the type identity check in
4395 canonical_va_list_type.
4396
4397 Tagging sysv_va_list_type_node directly with the attribute is
4398 problematic since it's a array of one record, which will degrade into a
4399 pointer to record when used as parameter (see build_va_arg comments for
4400 an example), dropping the attribute in the process. So we tag the
4401 record instead. */
4402
4403 /* For SYSV_ABI we use an array of one record. */
4404 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
4405
4406 /* For MS_ABI we use plain pointer to argument area. */
4407 tree char_ptr_type = build_pointer_type (char_type_node);
4408 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
4409 TYPE_ATTRIBUTES (char_ptr_type));
4410 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
4411
4412 return ((ix86_abi == MS_ABI)
4413 ? ms_va_list_type_node
4414 : sysv_va_list_type_node);
4415 }
4416 else
4417 {
4418 /* For i386 we use plain pointer to argument area. */
4419 return build_pointer_type (char_type_node);
4420 }
4421 }
4422
4423 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4424
4425 static void
4426 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4427 {
4428 rtx save_area, mem;
4429 alias_set_type set;
4430 int i, max;
4431
4432 /* GPR size of varargs save area. */
4433 if (cfun->va_list_gpr_size)
4434 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4435 else
4436 ix86_varargs_gpr_size = 0;
4437
4438 /* FPR size of varargs save area. We don't need it if we don't pass
4439 anything in SSE registers. */
4440 if (TARGET_SSE && cfun->va_list_fpr_size)
4441 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4442 else
4443 ix86_varargs_fpr_size = 0;
4444
4445 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4446 return;
4447
4448 save_area = frame_pointer_rtx;
4449 set = get_varargs_alias_set ();
4450
4451 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4452 if (max > X86_64_REGPARM_MAX)
4453 max = X86_64_REGPARM_MAX;
4454
4455 for (i = cum->regno; i < max; i++)
4456 {
4457 mem = gen_rtx_MEM (word_mode,
4458 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4459 MEM_NOTRAP_P (mem) = 1;
4460 set_mem_alias_set (mem, set);
4461 emit_move_insn (mem,
4462 gen_rtx_REG (word_mode,
4463 x86_64_int_parameter_registers[i]));
4464 }
4465
4466 if (ix86_varargs_fpr_size)
4467 {
4468 machine_mode smode;
4469 rtx_code_label *label;
4470 rtx test;
4471
4472 /* Now emit code to save SSE registers. The AX parameter contains number
4473 of SSE parameter registers used to call this function, though all we
4474 actually check here is the zero/non-zero status. */
4475
4476 label = gen_label_rtx ();
4477 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4478 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4479 label));
4480
4481 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4482 we used movdqa (i.e. TImode) instead? Perhaps even better would
4483 be if we could determine the real mode of the data, via a hook
4484 into pass_stdarg. Ignore all that for now. */
4485 smode = V4SFmode;
4486 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4487 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4488
4489 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4490 if (max > X86_64_SSE_REGPARM_MAX)
4491 max = X86_64_SSE_REGPARM_MAX;
4492
4493 for (i = cum->sse_regno; i < max; ++i)
4494 {
4495 mem = plus_constant (Pmode, save_area,
4496 i * 16 + ix86_varargs_gpr_size);
4497 mem = gen_rtx_MEM (smode, mem);
4498 MEM_NOTRAP_P (mem) = 1;
4499 set_mem_alias_set (mem, set);
4500 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4501
4502 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4503 }
4504
4505 emit_label (label);
4506 }
4507 }
4508
4509 static void
4510 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4511 {
4512 alias_set_type set = get_varargs_alias_set ();
4513 int i;
4514
4515 /* Reset to zero, as there might be a sysv vaarg used
4516 before. */
4517 ix86_varargs_gpr_size = 0;
4518 ix86_varargs_fpr_size = 0;
4519
4520 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4521 {
4522 rtx reg, mem;
4523
4524 mem = gen_rtx_MEM (Pmode,
4525 plus_constant (Pmode, virtual_incoming_args_rtx,
4526 i * UNITS_PER_WORD));
4527 MEM_NOTRAP_P (mem) = 1;
4528 set_mem_alias_set (mem, set);
4529
4530 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4531 emit_move_insn (mem, reg);
4532 }
4533 }
4534
4535 static void
4536 ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4537 const function_arg_info &arg,
4538 int *, int no_rtl)
4539 {
4540 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4541 CUMULATIVE_ARGS next_cum;
4542 tree fntype;
4543
4544 /* This argument doesn't appear to be used anymore. Which is good,
4545 because the old code here didn't suppress rtl generation. */
4546 gcc_assert (!no_rtl);
4547
4548 if (!TARGET_64BIT)
4549 return;
4550
4551 fntype = TREE_TYPE (current_function_decl);
4552
4553 /* For varargs, we do not want to skip the dummy va_dcl argument.
4554 For stdargs, we do want to skip the last named argument. */
4555 next_cum = *cum;
4556 if (stdarg_p (fntype))
4557 ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4558
4559 if (cum->call_abi == MS_ABI)
4560 setup_incoming_varargs_ms_64 (&next_cum);
4561 else
4562 setup_incoming_varargs_64 (&next_cum);
4563 }
4564
4565 /* Checks if TYPE is of kind va_list char *. */
4566
4567 static bool
4568 is_va_list_char_pointer (tree type)
4569 {
4570 tree canonic;
4571
4572 /* For 32-bit it is always true. */
4573 if (!TARGET_64BIT)
4574 return true;
4575 canonic = ix86_canonical_va_list_type (type);
4576 return (canonic == ms_va_list_type_node
4577 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4578 }
4579
4580 /* Implement va_start. */
4581
4582 static void
4583 ix86_va_start (tree valist, rtx nextarg)
4584 {
4585 HOST_WIDE_INT words, n_gpr, n_fpr;
4586 tree f_gpr, f_fpr, f_ovf, f_sav;
4587 tree gpr, fpr, ovf, sav, t;
4588 tree type;
4589 rtx ovf_rtx;
4590
4591 if (flag_split_stack
4592 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4593 {
4594 unsigned int scratch_regno;
4595
4596 /* When we are splitting the stack, we can't refer to the stack
4597 arguments using internal_arg_pointer, because they may be on
4598 the old stack. The split stack prologue will arrange to
4599 leave a pointer to the old stack arguments in a scratch
4600 register, which we here copy to a pseudo-register. The split
4601 stack prologue can't set the pseudo-register directly because
4602 it (the prologue) runs before any registers have been saved. */
4603
4604 scratch_regno = split_stack_prologue_scratch_regno ();
4605 if (scratch_regno != INVALID_REGNUM)
4606 {
4607 rtx reg;
4608 rtx_insn *seq;
4609
4610 reg = gen_reg_rtx (Pmode);
4611 cfun->machine->split_stack_varargs_pointer = reg;
4612
4613 start_sequence ();
4614 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4615 seq = get_insns ();
4616 end_sequence ();
4617
4618 push_topmost_sequence ();
4619 emit_insn_after (seq, entry_of_function ());
4620 pop_topmost_sequence ();
4621 }
4622 }
4623
4624 /* Only 64bit target needs something special. */
4625 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4626 {
4627 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4628 std_expand_builtin_va_start (valist, nextarg);
4629 else
4630 {
4631 rtx va_r, next;
4632
4633 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4634 next = expand_binop (ptr_mode, add_optab,
4635 cfun->machine->split_stack_varargs_pointer,
4636 crtl->args.arg_offset_rtx,
4637 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4638 convert_move (va_r, next, 0);
4639 }
4640 return;
4641 }
4642
4643 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4644 f_fpr = DECL_CHAIN (f_gpr);
4645 f_ovf = DECL_CHAIN (f_fpr);
4646 f_sav = DECL_CHAIN (f_ovf);
4647
4648 valist = build_simple_mem_ref (valist);
4649 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4650 /* The following should be folded into the MEM_REF offset. */
4651 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4652 f_gpr, NULL_TREE);
4653 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4654 f_fpr, NULL_TREE);
4655 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4656 f_ovf, NULL_TREE);
4657 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4658 f_sav, NULL_TREE);
4659
4660 /* Count number of gp and fp argument registers used. */
4661 words = crtl->args.info.words;
4662 n_gpr = crtl->args.info.regno;
4663 n_fpr = crtl->args.info.sse_regno;
4664
4665 if (cfun->va_list_gpr_size)
4666 {
4667 type = TREE_TYPE (gpr);
4668 t = build2 (MODIFY_EXPR, type,
4669 gpr, build_int_cst (type, n_gpr * 8));
4670 TREE_SIDE_EFFECTS (t) = 1;
4671 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4672 }
4673
4674 if (TARGET_SSE && cfun->va_list_fpr_size)
4675 {
4676 type = TREE_TYPE (fpr);
4677 t = build2 (MODIFY_EXPR, type, fpr,
4678 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4679 TREE_SIDE_EFFECTS (t) = 1;
4680 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4681 }
4682
4683 /* Find the overflow area. */
4684 type = TREE_TYPE (ovf);
4685 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4686 ovf_rtx = crtl->args.internal_arg_pointer;
4687 else
4688 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4689 t = make_tree (type, ovf_rtx);
4690 if (words != 0)
4691 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4692
4693 t = build2 (MODIFY_EXPR, type, ovf, t);
4694 TREE_SIDE_EFFECTS (t) = 1;
4695 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4696
4697 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4698 {
4699 /* Find the register save area.
4700 Prologue of the function save it right above stack frame. */
4701 type = TREE_TYPE (sav);
4702 t = make_tree (type, frame_pointer_rtx);
4703 if (!ix86_varargs_gpr_size)
4704 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4705
4706 t = build2 (MODIFY_EXPR, type, sav, t);
4707 TREE_SIDE_EFFECTS (t) = 1;
4708 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4709 }
4710 }
4711
4712 /* Implement va_arg. */
4713
4714 static tree
4715 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4716 gimple_seq *post_p)
4717 {
4718 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4719 tree f_gpr, f_fpr, f_ovf, f_sav;
4720 tree gpr, fpr, ovf, sav, t;
4721 int size, rsize;
4722 tree lab_false, lab_over = NULL_TREE;
4723 tree addr, t2;
4724 rtx container;
4725 int indirect_p = 0;
4726 tree ptrtype;
4727 machine_mode nat_mode;
4728 unsigned int arg_boundary;
4729 unsigned int type_align;
4730
4731 /* Only 64bit target needs something special. */
4732 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4733 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4734
4735 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4736 f_fpr = DECL_CHAIN (f_gpr);
4737 f_ovf = DECL_CHAIN (f_fpr);
4738 f_sav = DECL_CHAIN (f_ovf);
4739
4740 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4741 valist, f_gpr, NULL_TREE);
4742
4743 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4744 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4745 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4746
4747 indirect_p = pass_va_arg_by_reference (type);
4748 if (indirect_p)
4749 type = build_pointer_type (type);
4750 size = arg_int_size_in_bytes (type);
4751 rsize = CEIL (size, UNITS_PER_WORD);
4752
4753 nat_mode = type_natural_mode (type, NULL, false);
4754 switch (nat_mode)
4755 {
4756 case E_V16HFmode:
4757 case E_V8SFmode:
4758 case E_V8SImode:
4759 case E_V32QImode:
4760 case E_V16HImode:
4761 case E_V4DFmode:
4762 case E_V4DImode:
4763 case E_V32HFmode:
4764 case E_V16SFmode:
4765 case E_V16SImode:
4766 case E_V64QImode:
4767 case E_V32HImode:
4768 case E_V8DFmode:
4769 case E_V8DImode:
4770 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4771 if (!TARGET_64BIT_MS_ABI)
4772 {
4773 container = NULL;
4774 break;
4775 }
4776 /* FALLTHRU */
4777
4778 default:
4779 container = construct_container (nat_mode, TYPE_MODE (type),
4780 type, 0, X86_64_REGPARM_MAX,
4781 X86_64_SSE_REGPARM_MAX, intreg,
4782 0);
4783 break;
4784 }
4785
4786 /* Pull the value out of the saved registers. */
4787
4788 addr = create_tmp_var (ptr_type_node, "addr");
4789 type_align = TYPE_ALIGN (type);
4790
4791 if (container)
4792 {
4793 int needed_intregs, needed_sseregs;
4794 bool need_temp;
4795 tree int_addr, sse_addr;
4796
4797 lab_false = create_artificial_label (UNKNOWN_LOCATION);
4798 lab_over = create_artificial_label (UNKNOWN_LOCATION);
4799
4800 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4801
4802 need_temp = (!REG_P (container)
4803 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4804 || TYPE_ALIGN (type) > 128));
4805
4806 /* In case we are passing structure, verify that it is consecutive block
4807 on the register save area. If not we need to do moves. */
4808 if (!need_temp && !REG_P (container))
4809 {
4810 /* Verify that all registers are strictly consecutive */
4811 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4812 {
4813 int i;
4814
4815 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4816 {
4817 rtx slot = XVECEXP (container, 0, i);
4818 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4819 || INTVAL (XEXP (slot, 1)) != i * 16)
4820 need_temp = true;
4821 }
4822 }
4823 else
4824 {
4825 int i;
4826
4827 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4828 {
4829 rtx slot = XVECEXP (container, 0, i);
4830 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4831 || INTVAL (XEXP (slot, 1)) != i * 8)
4832 need_temp = true;
4833 }
4834 }
4835 }
4836 if (!need_temp)
4837 {
4838 int_addr = addr;
4839 sse_addr = addr;
4840 }
4841 else
4842 {
4843 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4844 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4845 }
4846
4847 /* First ensure that we fit completely in registers. */
4848 if (needed_intregs)
4849 {
4850 t = build_int_cst (TREE_TYPE (gpr),
4851 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
4852 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4853 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4854 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4855 gimplify_and_add (t, pre_p);
4856 }
4857 if (needed_sseregs)
4858 {
4859 t = build_int_cst (TREE_TYPE (fpr),
4860 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4861 + X86_64_REGPARM_MAX * 8);
4862 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4863 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4864 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4865 gimplify_and_add (t, pre_p);
4866 }
4867
4868 /* Compute index to start of area used for integer regs. */
4869 if (needed_intregs)
4870 {
4871 /* int_addr = gpr + sav; */
4872 t = fold_build_pointer_plus (sav, gpr);
4873 gimplify_assign (int_addr, t, pre_p);
4874 }
4875 if (needed_sseregs)
4876 {
4877 /* sse_addr = fpr + sav; */
4878 t = fold_build_pointer_plus (sav, fpr);
4879 gimplify_assign (sse_addr, t, pre_p);
4880 }
4881 if (need_temp)
4882 {
4883 int i, prev_size = 0;
4884 tree temp = create_tmp_var (type, "va_arg_tmp");
4885
4886 /* addr = &temp; */
4887 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4888 gimplify_assign (addr, t, pre_p);
4889
4890 for (i = 0; i < XVECLEN (container, 0); i++)
4891 {
4892 rtx slot = XVECEXP (container, 0, i);
4893 rtx reg = XEXP (slot, 0);
4894 machine_mode mode = GET_MODE (reg);
4895 tree piece_type;
4896 tree addr_type;
4897 tree daddr_type;
4898 tree src_addr, src;
4899 int src_offset;
4900 tree dest_addr, dest;
4901 int cur_size = GET_MODE_SIZE (mode);
4902
4903 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
4904 prev_size = INTVAL (XEXP (slot, 1));
4905 if (prev_size + cur_size > size)
4906 {
4907 cur_size = size - prev_size;
4908 unsigned int nbits = cur_size * BITS_PER_UNIT;
4909 if (!int_mode_for_size (nbits, 1).exists (&mode))
4910 mode = QImode;
4911 }
4912 piece_type = lang_hooks.types.type_for_mode (mode, 1);
4913 if (mode == GET_MODE (reg))
4914 addr_type = build_pointer_type (piece_type);
4915 else
4916 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4917 true);
4918 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4919 true);
4920
4921 if (SSE_REGNO_P (REGNO (reg)))
4922 {
4923 src_addr = sse_addr;
4924 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4925 }
4926 else
4927 {
4928 src_addr = int_addr;
4929 src_offset = REGNO (reg) * 8;
4930 }
4931 src_addr = fold_convert (addr_type, src_addr);
4932 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
4933
4934 dest_addr = fold_convert (daddr_type, addr);
4935 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
4936 if (cur_size == GET_MODE_SIZE (mode))
4937 {
4938 src = build_va_arg_indirect_ref (src_addr);
4939 dest = build_va_arg_indirect_ref (dest_addr);
4940
4941 gimplify_assign (dest, src, pre_p);
4942 }
4943 else
4944 {
4945 tree copy
4946 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
4947 3, dest_addr, src_addr,
4948 size_int (cur_size));
4949 gimplify_and_add (copy, pre_p);
4950 }
4951 prev_size += cur_size;
4952 }
4953 }
4954
4955 if (needed_intregs)
4956 {
4957 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4958 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4959 gimplify_assign (gpr, t, pre_p);
4960 /* The GPR save area guarantees only 8-byte alignment. */
4961 if (!need_temp)
4962 type_align = MIN (type_align, 64);
4963 }
4964
4965 if (needed_sseregs)
4966 {
4967 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4968 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4969 gimplify_assign (unshare_expr (fpr), t, pre_p);
4970 }
4971
4972 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
4973
4974 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
4975 }
4976
4977 /* ... otherwise out of the overflow area. */
4978
4979 /* When we align parameter on stack for caller, if the parameter
4980 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
4981 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
4982 here with caller. */
4983 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
4984 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
4985 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
4986
4987 /* Care for on-stack alignment if needed. */
4988 if (arg_boundary <= 64 || size == 0)
4989 t = ovf;
4990 else
4991 {
4992 HOST_WIDE_INT align = arg_boundary / 8;
4993 t = fold_build_pointer_plus_hwi (ovf, align - 1);
4994 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4995 build_int_cst (TREE_TYPE (t), -align));
4996 }
4997
4998 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4999 gimplify_assign (addr, t, pre_p);
5000
5001 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
5002 gimplify_assign (unshare_expr (ovf), t, pre_p);
5003
5004 if (container)
5005 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
5006
5007 type = build_aligned_type (type, type_align);
5008 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
5009 addr = fold_convert (ptrtype, addr);
5010
5011 if (indirect_p)
5012 addr = build_va_arg_indirect_ref (addr);
5013 return build_va_arg_indirect_ref (addr);
5014 }
5015 \f
5016 /* Return true if OPNUM's MEM should be matched
5017 in movabs* patterns. */
5018
5019 bool
5020 ix86_check_movabs (rtx insn, int opnum)
5021 {
5022 rtx set, mem;
5023
5024 set = PATTERN (insn);
5025 if (GET_CODE (set) == PARALLEL)
5026 set = XVECEXP (set, 0, 0);
5027 gcc_assert (GET_CODE (set) == SET);
5028 mem = XEXP (set, opnum);
5029 while (SUBREG_P (mem))
5030 mem = SUBREG_REG (mem);
5031 gcc_assert (MEM_P (mem));
5032 return volatile_ok || !MEM_VOLATILE_P (mem);
5033 }
5034
5035 /* Return false if INSN contains a MEM with a non-default address space. */
5036 bool
5037 ix86_check_no_addr_space (rtx insn)
5038 {
5039 subrtx_var_iterator::array_type array;
5040 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
5041 {
5042 rtx x = *iter;
5043 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
5044 return false;
5045 }
5046 return true;
5047 }
5048 \f
5049 /* Initialize the table of extra 80387 mathematical constants. */
5050
5051 static void
5052 init_ext_80387_constants (void)
5053 {
5054 static const char * cst[5] =
5055 {
5056 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5057 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5058 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5059 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5060 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5061 };
5062 int i;
5063
5064 for (i = 0; i < 5; i++)
5065 {
5066 real_from_string (&ext_80387_constants_table[i], cst[i]);
5067 /* Ensure each constant is rounded to XFmode precision. */
5068 real_convert (&ext_80387_constants_table[i],
5069 XFmode, &ext_80387_constants_table[i]);
5070 }
5071
5072 ext_80387_constants_init = 1;
5073 }
5074
5075 /* Return non-zero if the constant is something that
5076 can be loaded with a special instruction. */
5077
5078 int
5079 standard_80387_constant_p (rtx x)
5080 {
5081 machine_mode mode = GET_MODE (x);
5082
5083 const REAL_VALUE_TYPE *r;
5084
5085 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
5086 return -1;
5087
5088 if (x == CONST0_RTX (mode))
5089 return 1;
5090 if (x == CONST1_RTX (mode))
5091 return 2;
5092
5093 r = CONST_DOUBLE_REAL_VALUE (x);
5094
5095 /* For XFmode constants, try to find a special 80387 instruction when
5096 optimizing for size or on those CPUs that benefit from them. */
5097 if (mode == XFmode
5098 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
5099 && !flag_rounding_math)
5100 {
5101 int i;
5102
5103 if (! ext_80387_constants_init)
5104 init_ext_80387_constants ();
5105
5106 for (i = 0; i < 5; i++)
5107 if (real_identical (r, &ext_80387_constants_table[i]))
5108 return i + 3;
5109 }
5110
5111 /* Load of the constant -0.0 or -1.0 will be split as
5112 fldz;fchs or fld1;fchs sequence. */
5113 if (real_isnegzero (r))
5114 return 8;
5115 if (real_identical (r, &dconstm1))
5116 return 9;
5117
5118 return 0;
5119 }
5120
5121 /* Return the opcode of the special instruction to be used to load
5122 the constant X. */
5123
5124 const char *
5125 standard_80387_constant_opcode (rtx x)
5126 {
5127 switch (standard_80387_constant_p (x))
5128 {
5129 case 1:
5130 return "fldz";
5131 case 2:
5132 return "fld1";
5133 case 3:
5134 return "fldlg2";
5135 case 4:
5136 return "fldln2";
5137 case 5:
5138 return "fldl2e";
5139 case 6:
5140 return "fldl2t";
5141 case 7:
5142 return "fldpi";
5143 case 8:
5144 case 9:
5145 return "#";
5146 default:
5147 gcc_unreachable ();
5148 }
5149 }
5150
5151 /* Return the CONST_DOUBLE representing the 80387 constant that is
5152 loaded by the specified special instruction. The argument IDX
5153 matches the return value from standard_80387_constant_p. */
5154
5155 rtx
5156 standard_80387_constant_rtx (int idx)
5157 {
5158 int i;
5159
5160 if (! ext_80387_constants_init)
5161 init_ext_80387_constants ();
5162
5163 switch (idx)
5164 {
5165 case 3:
5166 case 4:
5167 case 5:
5168 case 6:
5169 case 7:
5170 i = idx - 3;
5171 break;
5172
5173 default:
5174 gcc_unreachable ();
5175 }
5176
5177 return const_double_from_real_value (ext_80387_constants_table[i],
5178 XFmode);
5179 }
5180
5181 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
5182 in supported SSE/AVX vector mode. */
5183
5184 int
5185 standard_sse_constant_p (rtx x, machine_mode pred_mode)
5186 {
5187 machine_mode mode;
5188
5189 if (!TARGET_SSE)
5190 return 0;
5191
5192 mode = GET_MODE (x);
5193
5194 if (x == const0_rtx || const0_operand (x, mode))
5195 return 1;
5196
5197 if (x == constm1_rtx
5198 || vector_all_ones_operand (x, mode)
5199 || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5200 || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
5201 && float_vector_all_ones_operand (x, mode)))
5202 {
5203 /* VOIDmode integer constant, get mode from the predicate. */
5204 if (mode == VOIDmode)
5205 mode = pred_mode;
5206
5207 switch (GET_MODE_SIZE (mode))
5208 {
5209 case 64:
5210 if (TARGET_AVX512F)
5211 return 2;
5212 break;
5213 case 32:
5214 if (TARGET_AVX2)
5215 return 2;
5216 break;
5217 case 16:
5218 if (TARGET_SSE2)
5219 return 2;
5220 break;
5221 case 0:
5222 /* VOIDmode */
5223 gcc_unreachable ();
5224 default:
5225 break;
5226 }
5227 }
5228
5229 return 0;
5230 }
5231
5232 /* Return the opcode of the special instruction to be used to load
5233 the constant operands[1] into operands[0]. */
5234
5235 const char *
5236 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
5237 {
5238 machine_mode mode;
5239 rtx x = operands[1];
5240
5241 gcc_assert (TARGET_SSE);
5242
5243 mode = GET_MODE (x);
5244
5245 if (x == const0_rtx || const0_operand (x, mode))
5246 {
5247 switch (get_attr_mode (insn))
5248 {
5249 case MODE_TI:
5250 if (!EXT_REX_SSE_REG_P (operands[0]))
5251 return "%vpxor\t%0, %d0";
5252 /* FALLTHRU */
5253 case MODE_XI:
5254 case MODE_OI:
5255 if (EXT_REX_SSE_REG_P (operands[0]))
5256 return (TARGET_AVX512VL
5257 ? "vpxord\t%x0, %x0, %x0"
5258 : "vpxord\t%g0, %g0, %g0");
5259 return "vpxor\t%x0, %x0, %x0";
5260
5261 case MODE_V2DF:
5262 if (!EXT_REX_SSE_REG_P (operands[0]))
5263 return "%vxorpd\t%0, %d0";
5264 /* FALLTHRU */
5265 case MODE_V8DF:
5266 case MODE_V4DF:
5267 if (!EXT_REX_SSE_REG_P (operands[0]))
5268 return "vxorpd\t%x0, %x0, %x0";
5269 else if (TARGET_AVX512DQ)
5270 return (TARGET_AVX512VL
5271 ? "vxorpd\t%x0, %x0, %x0"
5272 : "vxorpd\t%g0, %g0, %g0");
5273 else
5274 return (TARGET_AVX512VL
5275 ? "vpxorq\t%x0, %x0, %x0"
5276 : "vpxorq\t%g0, %g0, %g0");
5277
5278 case MODE_V4SF:
5279 if (!EXT_REX_SSE_REG_P (operands[0]))
5280 return "%vxorps\t%0, %d0";
5281 /* FALLTHRU */
5282 case MODE_V16SF:
5283 case MODE_V8SF:
5284 if (!EXT_REX_SSE_REG_P (operands[0]))
5285 return "vxorps\t%x0, %x0, %x0";
5286 else if (TARGET_AVX512DQ)
5287 return (TARGET_AVX512VL
5288 ? "vxorps\t%x0, %x0, %x0"
5289 : "vxorps\t%g0, %g0, %g0");
5290 else
5291 return (TARGET_AVX512VL
5292 ? "vpxord\t%x0, %x0, %x0"
5293 : "vpxord\t%g0, %g0, %g0");
5294
5295 default:
5296 gcc_unreachable ();
5297 }
5298 }
5299 else if (x == constm1_rtx
5300 || vector_all_ones_operand (x, mode)
5301 || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5302 && float_vector_all_ones_operand (x, mode)))
5303 {
5304 enum attr_mode insn_mode = get_attr_mode (insn);
5305
5306 switch (insn_mode)
5307 {
5308 case MODE_XI:
5309 case MODE_V8DF:
5310 case MODE_V16SF:
5311 gcc_assert (TARGET_AVX512F);
5312 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5313
5314 case MODE_OI:
5315 case MODE_V4DF:
5316 case MODE_V8SF:
5317 gcc_assert (TARGET_AVX2);
5318 /* FALLTHRU */
5319 case MODE_TI:
5320 case MODE_V2DF:
5321 case MODE_V4SF:
5322 gcc_assert (TARGET_SSE2);
5323 if (!EXT_REX_SSE_REG_P (operands[0]))
5324 return (TARGET_AVX
5325 ? "vpcmpeqd\t%0, %0, %0"
5326 : "pcmpeqd\t%0, %0");
5327 else if (TARGET_AVX512VL)
5328 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5329 else
5330 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5331
5332 default:
5333 gcc_unreachable ();
5334 }
5335 }
5336
5337 gcc_unreachable ();
5338 }
5339
5340 /* Returns true if INSN can be transformed from a memory load
5341 to a supported FP constant load. */
5342
5343 bool
5344 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
5345 {
5346 rtx src = find_constant_src (insn);
5347
5348 gcc_assert (REG_P (dst));
5349
5350 if (src == NULL
5351 || (SSE_REGNO_P (REGNO (dst))
5352 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
5353 || (STACK_REGNO_P (REGNO (dst))
5354 && standard_80387_constant_p (src) < 1))
5355 return false;
5356
5357 return true;
5358 }
5359
5360 /* Predicate for pre-reload splitters with associated instructions,
5361 which can match any time before the split1 pass (usually combine),
5362 then are unconditionally split in that pass and should not be
5363 matched again afterwards. */
5364
5365 bool
5366 ix86_pre_reload_split (void)
5367 {
5368 return (can_create_pseudo_p ()
5369 && !(cfun->curr_properties & PROP_rtl_split_insns));
5370 }
5371
5372 /* Return the opcode of the TYPE_SSEMOV instruction. To move from
5373 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5374 TARGET_AVX512VL or it is a register to register move which can
5375 be done with zmm register move. */
5376
5377 static const char *
5378 ix86_get_ssemov (rtx *operands, unsigned size,
5379 enum attr_mode insn_mode, machine_mode mode)
5380 {
5381 char buf[128];
5382 bool misaligned_p = (misaligned_operand (operands[0], mode)
5383 || misaligned_operand (operands[1], mode));
5384 bool evex_reg_p = (size == 64
5385 || EXT_REX_SSE_REG_P (operands[0])
5386 || EXT_REX_SSE_REG_P (operands[1]));
5387 machine_mode scalar_mode;
5388
5389 const char *opcode = NULL;
5390 enum
5391 {
5392 opcode_int,
5393 opcode_float,
5394 opcode_double
5395 } type = opcode_int;
5396
5397 switch (insn_mode)
5398 {
5399 case MODE_V16SF:
5400 case MODE_V8SF:
5401 case MODE_V4SF:
5402 scalar_mode = E_SFmode;
5403 type = opcode_float;
5404 break;
5405 case MODE_V8DF:
5406 case MODE_V4DF:
5407 case MODE_V2DF:
5408 scalar_mode = E_DFmode;
5409 type = opcode_double;
5410 break;
5411 case MODE_XI:
5412 case MODE_OI:
5413 case MODE_TI:
5414 scalar_mode = GET_MODE_INNER (mode);
5415 break;
5416 default:
5417 gcc_unreachable ();
5418 }
5419
5420 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5421 we can only use zmm register move without memory operand. */
5422 if (evex_reg_p
5423 && !TARGET_AVX512VL
5424 && GET_MODE_SIZE (mode) < 64)
5425 {
5426 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5427 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5428 AVX512VL is disabled, LRA can still generate reg to
5429 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5430 modes. */
5431 if (memory_operand (operands[0], mode)
5432 || memory_operand (operands[1], mode))
5433 gcc_unreachable ();
5434 size = 64;
5435 switch (type)
5436 {
5437 case opcode_int:
5438 if (scalar_mode == E_HFmode)
5439 opcode = (misaligned_p
5440 ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
5441 : "vmovdqa64");
5442 else
5443 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5444 break;
5445 case opcode_float:
5446 opcode = misaligned_p ? "vmovups" : "vmovaps";
5447 break;
5448 case opcode_double:
5449 opcode = misaligned_p ? "vmovupd" : "vmovapd";
5450 break;
5451 }
5452 }
5453 else if (SCALAR_FLOAT_MODE_P (scalar_mode))
5454 {
5455 switch (scalar_mode)
5456 {
5457 case E_HFmode:
5458 if (evex_reg_p)
5459 opcode = (misaligned_p
5460 ? (TARGET_AVX512BW
5461 ? "vmovdqu16"
5462 : "vmovdqu64")
5463 : "vmovdqa64");
5464 else
5465 opcode = (misaligned_p
5466 ? (TARGET_AVX512BW
5467 ? "vmovdqu16"
5468 : "%vmovdqu")
5469 : "%vmovdqa");
5470 break;
5471 case E_SFmode:
5472 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5473 break;
5474 case E_DFmode:
5475 opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5476 break;
5477 case E_TFmode:
5478 if (evex_reg_p)
5479 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5480 else
5481 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5482 break;
5483 default:
5484 gcc_unreachable ();
5485 }
5486 }
5487 else if (SCALAR_INT_MODE_P (scalar_mode))
5488 {
5489 switch (scalar_mode)
5490 {
5491 case E_QImode:
5492 if (evex_reg_p)
5493 opcode = (misaligned_p
5494 ? (TARGET_AVX512BW
5495 ? "vmovdqu8"
5496 : "vmovdqu64")
5497 : "vmovdqa64");
5498 else
5499 opcode = (misaligned_p
5500 ? (TARGET_AVX512BW
5501 ? "vmovdqu8"
5502 : "%vmovdqu")
5503 : "%vmovdqa");
5504 break;
5505 case E_HImode:
5506 if (evex_reg_p)
5507 opcode = (misaligned_p
5508 ? (TARGET_AVX512BW
5509 ? "vmovdqu16"
5510 : "vmovdqu64")
5511 : "vmovdqa64");
5512 else
5513 opcode = (misaligned_p
5514 ? (TARGET_AVX512BW
5515 ? "vmovdqu16"
5516 : "%vmovdqu")
5517 : "%vmovdqa");
5518 break;
5519 case E_SImode:
5520 if (evex_reg_p)
5521 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5522 else
5523 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5524 break;
5525 case E_DImode:
5526 case E_TImode:
5527 case E_OImode:
5528 if (evex_reg_p)
5529 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5530 else
5531 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5532 break;
5533 case E_XImode:
5534 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5535 break;
5536 default:
5537 gcc_unreachable ();
5538 }
5539 }
5540 else
5541 gcc_unreachable ();
5542
5543 switch (size)
5544 {
5545 case 64:
5546 snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5547 opcode);
5548 break;
5549 case 32:
5550 snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5551 opcode);
5552 break;
5553 case 16:
5554 snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5555 opcode);
5556 break;
5557 default:
5558 gcc_unreachable ();
5559 }
5560 output_asm_insn (buf, operands);
5561 return "";
5562 }
5563
5564 /* Return the template of the TYPE_SSEMOV instruction to move
5565 operands[1] into operands[0]. */
5566
5567 const char *
5568 ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5569 {
5570 machine_mode mode = GET_MODE (operands[0]);
5571 if (get_attr_type (insn) != TYPE_SSEMOV
5572 || mode != GET_MODE (operands[1]))
5573 gcc_unreachable ();
5574
5575 enum attr_mode insn_mode = get_attr_mode (insn);
5576
5577 switch (insn_mode)
5578 {
5579 case MODE_XI:
5580 case MODE_V8DF:
5581 case MODE_V16SF:
5582 return ix86_get_ssemov (operands, 64, insn_mode, mode);
5583
5584 case MODE_OI:
5585 case MODE_V4DF:
5586 case MODE_V8SF:
5587 return ix86_get_ssemov (operands, 32, insn_mode, mode);
5588
5589 case MODE_TI:
5590 case MODE_V2DF:
5591 case MODE_V4SF:
5592 return ix86_get_ssemov (operands, 16, insn_mode, mode);
5593
5594 case MODE_DI:
5595 /* Handle broken assemblers that require movd instead of movq. */
5596 if (GENERAL_REG_P (operands[0]))
5597 {
5598 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5599 return "%vmovq\t{%1, %q0|%q0, %1}";
5600 else
5601 return "%vmovd\t{%1, %q0|%q0, %1}";
5602 }
5603 else if (GENERAL_REG_P (operands[1]))
5604 {
5605 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5606 return "%vmovq\t{%q1, %0|%0, %q1}";
5607 else
5608 return "%vmovd\t{%q1, %0|%0, %q1}";
5609 }
5610 else
5611 return "%vmovq\t{%1, %0|%0, %1}";
5612
5613 case MODE_SI:
5614 if (GENERAL_REG_P (operands[0]))
5615 return "%vmovd\t{%1, %k0|%k0, %1}";
5616 else if (GENERAL_REG_P (operands[1]))
5617 return "%vmovd\t{%k1, %0|%0, %k1}";
5618 else
5619 return "%vmovd\t{%1, %0|%0, %1}";
5620
5621 case MODE_HI:
5622 if (GENERAL_REG_P (operands[0]))
5623 return "vmovw\t{%1, %k0|%k0, %1}";
5624 else if (GENERAL_REG_P (operands[1]))
5625 return "vmovw\t{%k1, %0|%0, %k1}";
5626 else
5627 return "vmovw\t{%1, %0|%0, %1}";
5628
5629 case MODE_DF:
5630 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5631 return "vmovsd\t{%d1, %0|%0, %d1}";
5632 else
5633 return "%vmovsd\t{%1, %0|%0, %1}";
5634
5635 case MODE_SF:
5636 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5637 return "vmovss\t{%d1, %0|%0, %d1}";
5638 else
5639 return "%vmovss\t{%1, %0|%0, %1}";
5640
5641 case MODE_HF:
5642 if (REG_P (operands[0]) && REG_P (operands[1]))
5643 return "vmovsh\t{%d1, %0|%0, %d1}";
5644 else
5645 return "vmovsh\t{%1, %0|%0, %1}";
5646
5647 case MODE_V1DF:
5648 gcc_assert (!TARGET_AVX);
5649 return "movlpd\t{%1, %0|%0, %1}";
5650
5651 case MODE_V2SF:
5652 if (TARGET_AVX && REG_P (operands[0]))
5653 return "vmovlps\t{%1, %d0|%d0, %1}";
5654 else
5655 return "%vmovlps\t{%1, %0|%0, %1}";
5656
5657 default:
5658 gcc_unreachable ();
5659 }
5660 }
5661
5662 /* Returns true if OP contains a symbol reference */
5663
5664 bool
5665 symbolic_reference_mentioned_p (rtx op)
5666 {
5667 const char *fmt;
5668 int i;
5669
5670 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5671 return true;
5672
5673 fmt = GET_RTX_FORMAT (GET_CODE (op));
5674 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5675 {
5676 if (fmt[i] == 'E')
5677 {
5678 int j;
5679
5680 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5681 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5682 return true;
5683 }
5684
5685 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5686 return true;
5687 }
5688
5689 return false;
5690 }
5691
5692 /* Return true if it is appropriate to emit `ret' instructions in the
5693 body of a function. Do this only if the epilogue is simple, needing a
5694 couple of insns. Prior to reloading, we can't tell how many registers
5695 must be saved, so return false then. Return false if there is no frame
5696 marker to de-allocate. */
5697
5698 bool
5699 ix86_can_use_return_insn_p (void)
5700 {
5701 if (ix86_function_ms_hook_prologue (current_function_decl))
5702 return false;
5703
5704 if (ix86_function_naked (current_function_decl))
5705 return false;
5706
5707 /* Don't use `ret' instruction in interrupt handler. */
5708 if (! reload_completed
5709 || frame_pointer_needed
5710 || cfun->machine->func_type != TYPE_NORMAL)
5711 return 0;
5712
5713 /* Don't allow more than 32k pop, since that's all we can do
5714 with one instruction. */
5715 if (crtl->args.pops_args && crtl->args.size >= 32768)
5716 return 0;
5717
5718 struct ix86_frame &frame = cfun->machine->frame;
5719 return (frame.stack_pointer_offset == UNITS_PER_WORD
5720 && (frame.nregs + frame.nsseregs) == 0);
5721 }
5722 \f
5723 /* Return stack frame size. get_frame_size () returns used stack slots
5724 during compilation, which may be optimized out later. If stack frame
5725 is needed, stack_frame_required should be true. */
5726
5727 static HOST_WIDE_INT
5728 ix86_get_frame_size (void)
5729 {
5730 if (cfun->machine->stack_frame_required)
5731 return get_frame_size ();
5732 else
5733 return 0;
5734 }
5735
5736 /* Value should be nonzero if functions must have frame pointers.
5737 Zero means the frame pointer need not be set up (and parms may
5738 be accessed via the stack pointer) in functions that seem suitable. */
5739
5740 static bool
5741 ix86_frame_pointer_required (void)
5742 {
5743 /* If we accessed previous frames, then the generated code expects
5744 to be able to access the saved ebp value in our frame. */
5745 if (cfun->machine->accesses_prev_frame)
5746 return true;
5747
5748 /* Several x86 os'es need a frame pointer for other reasons,
5749 usually pertaining to setjmp. */
5750 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5751 return true;
5752
5753 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5754 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
5755 return true;
5756
5757 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5758 allocation is 4GB. */
5759 if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
5760 return true;
5761
5762 /* SSE saves require frame-pointer when stack is misaligned. */
5763 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
5764 return true;
5765
5766 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5767 turns off the frame pointer by default. Turn it back on now if
5768 we've not got a leaf function. */
5769 if (TARGET_OMIT_LEAF_FRAME_POINTER
5770 && (!crtl->is_leaf
5771 || ix86_current_function_calls_tls_descriptor))
5772 return true;
5773
5774 /* Several versions of mcount for the x86 assumes that there is a
5775 frame, so we cannot allow profiling without a frame pointer. */
5776 if (crtl->profile && !flag_fentry)
5777 return true;
5778
5779 return false;
5780 }
5781
5782 /* Record that the current function accesses previous call frames. */
5783
5784 void
5785 ix86_setup_frame_addresses (void)
5786 {
5787 cfun->machine->accesses_prev_frame = 1;
5788 }
5789 \f
5790 #ifndef USE_HIDDEN_LINKONCE
5791 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5792 # define USE_HIDDEN_LINKONCE 1
5793 # else
5794 # define USE_HIDDEN_LINKONCE 0
5795 # endif
5796 #endif
5797
5798 /* Label count for call and return thunks. It is used to make unique
5799 labels in call and return thunks. */
5800 static int indirectlabelno;
5801
5802 /* True if call thunk function is needed. */
5803 static bool indirect_thunk_needed = false;
5804
5805 /* Bit masks of integer registers, which contain branch target, used
5806 by call thunk functions. */
5807 static HARD_REG_SET indirect_thunks_used;
5808
5809 /* True if return thunk function is needed. */
5810 static bool indirect_return_needed = false;
5811
5812 /* True if return thunk function via CX is needed. */
5813 static bool indirect_return_via_cx;
5814
5815 #ifndef INDIRECT_LABEL
5816 # define INDIRECT_LABEL "LIND"
5817 #endif
5818
5819 /* Indicate what prefix is needed for an indirect branch. */
5820 enum indirect_thunk_prefix
5821 {
5822 indirect_thunk_prefix_none,
5823 indirect_thunk_prefix_nt
5824 };
5825
5826 /* Return the prefix needed for an indirect branch INSN. */
5827
5828 enum indirect_thunk_prefix
5829 indirect_thunk_need_prefix (rtx_insn *insn)
5830 {
5831 enum indirect_thunk_prefix need_prefix;
5832 if ((cfun->machine->indirect_branch_type
5833 == indirect_branch_thunk_extern)
5834 && ix86_notrack_prefixed_insn_p (insn))
5835 {
5836 /* NOTRACK prefix is only used with external thunk so that it
5837 can be properly updated to support CET at run-time. */
5838 need_prefix = indirect_thunk_prefix_nt;
5839 }
5840 else
5841 need_prefix = indirect_thunk_prefix_none;
5842 return need_prefix;
5843 }
5844
5845 /* Fills in the label name that should be used for the indirect thunk. */
5846
5847 static void
5848 indirect_thunk_name (char name[32], unsigned int regno,
5849 enum indirect_thunk_prefix need_prefix,
5850 bool ret_p)
5851 {
5852 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
5853 gcc_unreachable ();
5854
5855 if (USE_HIDDEN_LINKONCE)
5856 {
5857 const char *prefix;
5858
5859 if (need_prefix == indirect_thunk_prefix_nt
5860 && regno != INVALID_REGNUM)
5861 {
5862 /* NOTRACK prefix is only used with external thunk via
5863 register so that NOTRACK prefix can be added to indirect
5864 branch via register to support CET at run-time. */
5865 prefix = "_nt";
5866 }
5867 else
5868 prefix = "";
5869
5870 const char *ret = ret_p ? "return" : "indirect";
5871
5872 if (regno != INVALID_REGNUM)
5873 {
5874 const char *reg_prefix;
5875 if (LEGACY_INT_REGNO_P (regno))
5876 reg_prefix = TARGET_64BIT ? "r" : "e";
5877 else
5878 reg_prefix = "";
5879 sprintf (name, "__x86_%s_thunk%s_%s%s",
5880 ret, prefix, reg_prefix, reg_names[regno]);
5881 }
5882 else
5883 sprintf (name, "__x86_%s_thunk%s", ret, prefix);
5884 }
5885 else
5886 {
5887 if (regno != INVALID_REGNUM)
5888 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
5889 else
5890 {
5891 if (ret_p)
5892 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
5893 else
5894 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
5895 }
5896 }
5897 }
5898
5899 /* Output a call and return thunk for indirect branch. If REGNO != -1,
5900 the function address is in REGNO and the call and return thunk looks like:
5901
5902 call L2
5903 L1:
5904 pause
5905 lfence
5906 jmp L1
5907 L2:
5908 mov %REG, (%sp)
5909 ret
5910
5911 Otherwise, the function address is on the top of stack and the
5912 call and return thunk looks like:
5913
5914 call L2
5915 L1:
5916 pause
5917 lfence
5918 jmp L1
5919 L2:
5920 lea WORD_SIZE(%sp), %sp
5921 ret
5922 */
5923
5924 static void
5925 output_indirect_thunk (unsigned int regno)
5926 {
5927 char indirectlabel1[32];
5928 char indirectlabel2[32];
5929
5930 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
5931 indirectlabelno++);
5932 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
5933 indirectlabelno++);
5934
5935 /* Call */
5936 fputs ("\tcall\t", asm_out_file);
5937 assemble_name_raw (asm_out_file, indirectlabel2);
5938 fputc ('\n', asm_out_file);
5939
5940 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
5941
5942 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
5943 Usage of both pause + lfence is compromise solution. */
5944 fprintf (asm_out_file, "\tpause\n\tlfence\n");
5945
5946 /* Jump. */
5947 fputs ("\tjmp\t", asm_out_file);
5948 assemble_name_raw (asm_out_file, indirectlabel1);
5949 fputc ('\n', asm_out_file);
5950
5951 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
5952
5953 /* The above call insn pushed a word to stack. Adjust CFI info. */
5954 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
5955 {
5956 if (! dwarf2out_do_cfi_asm ())
5957 {
5958 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5959 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
5960 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
5961 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5962 }
5963 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5964 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
5965 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
5966 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5967 dwarf2out_emit_cfi (xcfi);
5968 }
5969
5970 if (regno != INVALID_REGNUM)
5971 {
5972 /* MOV. */
5973 rtx xops[2];
5974 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
5975 xops[1] = gen_rtx_REG (word_mode, regno);
5976 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
5977 }
5978 else
5979 {
5980 /* LEA. */
5981 rtx xops[2];
5982 xops[0] = stack_pointer_rtx;
5983 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
5984 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
5985 }
5986
5987 fputs ("\tret\n", asm_out_file);
5988 if ((ix86_harden_sls & harden_sls_return))
5989 fputs ("\tint3\n", asm_out_file);
5990 }
5991
5992 /* Output a funtion with a call and return thunk for indirect branch.
5993 If REGNO != INVALID_REGNUM, the function address is in REGNO.
5994 Otherwise, the function address is on the top of stack. Thunk is
5995 used for function return if RET_P is true. */
5996
5997 static void
5998 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
5999 unsigned int regno, bool ret_p)
6000 {
6001 char name[32];
6002 tree decl;
6003
6004 /* Create __x86_indirect_thunk. */
6005 indirect_thunk_name (name, regno, need_prefix, ret_p);
6006 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6007 get_identifier (name),
6008 build_function_type_list (void_type_node, NULL_TREE));
6009 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6010 NULL_TREE, void_type_node);
6011 TREE_PUBLIC (decl) = 1;
6012 TREE_STATIC (decl) = 1;
6013 DECL_IGNORED_P (decl) = 1;
6014
6015 #if TARGET_MACHO
6016 if (TARGET_MACHO)
6017 {
6018 switch_to_section (darwin_sections[picbase_thunk_section]);
6019 fputs ("\t.weak_definition\t", asm_out_file);
6020 assemble_name (asm_out_file, name);
6021 fputs ("\n\t.private_extern\t", asm_out_file);
6022 assemble_name (asm_out_file, name);
6023 putc ('\n', asm_out_file);
6024 ASM_OUTPUT_LABEL (asm_out_file, name);
6025 DECL_WEAK (decl) = 1;
6026 }
6027 else
6028 #endif
6029 if (USE_HIDDEN_LINKONCE)
6030 {
6031 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6032
6033 targetm.asm_out.unique_section (decl, 0);
6034 switch_to_section (get_named_section (decl, NULL, 0));
6035
6036 targetm.asm_out.globalize_label (asm_out_file, name);
6037 fputs ("\t.hidden\t", asm_out_file);
6038 assemble_name (asm_out_file, name);
6039 putc ('\n', asm_out_file);
6040 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6041 }
6042 else
6043 {
6044 switch_to_section (text_section);
6045 ASM_OUTPUT_LABEL (asm_out_file, name);
6046 }
6047
6048 DECL_INITIAL (decl) = make_node (BLOCK);
6049 current_function_decl = decl;
6050 allocate_struct_function (decl, false);
6051 init_function_start (decl);
6052 /* We're about to hide the function body from callees of final_* by
6053 emitting it directly; tell them we're a thunk, if they care. */
6054 cfun->is_thunk = true;
6055 first_function_block_is_cold = false;
6056 /* Make sure unwind info is emitted for the thunk if needed. */
6057 final_start_function (emit_barrier (), asm_out_file, 1);
6058
6059 output_indirect_thunk (regno);
6060
6061 final_end_function ();
6062 init_insn_lengths ();
6063 free_after_compilation (cfun);
6064 set_cfun (NULL);
6065 current_function_decl = NULL;
6066 }
6067
6068 static int pic_labels_used;
6069
6070 /* Fills in the label name that should be used for a pc thunk for
6071 the given register. */
6072
6073 static void
6074 get_pc_thunk_name (char name[32], unsigned int regno)
6075 {
6076 gcc_assert (!TARGET_64BIT);
6077
6078 if (USE_HIDDEN_LINKONCE)
6079 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
6080 else
6081 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6082 }
6083
6084
6085 /* This function generates code for -fpic that loads %ebx with
6086 the return address of the caller and then returns. */
6087
6088 static void
6089 ix86_code_end (void)
6090 {
6091 rtx xops[2];
6092 unsigned int regno;
6093
6094 if (indirect_return_needed)
6095 output_indirect_thunk_function (indirect_thunk_prefix_none,
6096 INVALID_REGNUM, true);
6097 if (indirect_return_via_cx)
6098 output_indirect_thunk_function (indirect_thunk_prefix_none,
6099 CX_REG, true);
6100 if (indirect_thunk_needed)
6101 output_indirect_thunk_function (indirect_thunk_prefix_none,
6102 INVALID_REGNUM, false);
6103
6104 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
6105 {
6106 if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6107 output_indirect_thunk_function (indirect_thunk_prefix_none,
6108 regno, false);
6109 }
6110
6111 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
6112 {
6113 char name[32];
6114 tree decl;
6115
6116 if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6117 output_indirect_thunk_function (indirect_thunk_prefix_none,
6118 regno, false);
6119
6120 if (!(pic_labels_used & (1 << regno)))
6121 continue;
6122
6123 get_pc_thunk_name (name, regno);
6124
6125 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6126 get_identifier (name),
6127 build_function_type_list (void_type_node, NULL_TREE));
6128 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6129 NULL_TREE, void_type_node);
6130 TREE_PUBLIC (decl) = 1;
6131 TREE_STATIC (decl) = 1;
6132 DECL_IGNORED_P (decl) = 1;
6133
6134 #if TARGET_MACHO
6135 if (TARGET_MACHO)
6136 {
6137 switch_to_section (darwin_sections[picbase_thunk_section]);
6138 fputs ("\t.weak_definition\t", asm_out_file);
6139 assemble_name (asm_out_file, name);
6140 fputs ("\n\t.private_extern\t", asm_out_file);
6141 assemble_name (asm_out_file, name);
6142 putc ('\n', asm_out_file);
6143 ASM_OUTPUT_LABEL (asm_out_file, name);
6144 DECL_WEAK (decl) = 1;
6145 }
6146 else
6147 #endif
6148 if (USE_HIDDEN_LINKONCE)
6149 {
6150 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6151
6152 targetm.asm_out.unique_section (decl, 0);
6153 switch_to_section (get_named_section (decl, NULL, 0));
6154
6155 targetm.asm_out.globalize_label (asm_out_file, name);
6156 fputs ("\t.hidden\t", asm_out_file);
6157 assemble_name (asm_out_file, name);
6158 putc ('\n', asm_out_file);
6159 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6160 }
6161 else
6162 {
6163 switch_to_section (text_section);
6164 ASM_OUTPUT_LABEL (asm_out_file, name);
6165 }
6166
6167 DECL_INITIAL (decl) = make_node (BLOCK);
6168 current_function_decl = decl;
6169 allocate_struct_function (decl, false);
6170 init_function_start (decl);
6171 /* We're about to hide the function body from callees of final_* by
6172 emitting it directly; tell them we're a thunk, if they care. */
6173 cfun->is_thunk = true;
6174 first_function_block_is_cold = false;
6175 /* Make sure unwind info is emitted for the thunk if needed. */
6176 final_start_function (emit_barrier (), asm_out_file, 1);
6177
6178 /* Pad stack IP move with 4 instructions (two NOPs count
6179 as one instruction). */
6180 if (TARGET_PAD_SHORT_FUNCTION)
6181 {
6182 int i = 8;
6183
6184 while (i--)
6185 fputs ("\tnop\n", asm_out_file);
6186 }
6187
6188 xops[0] = gen_rtx_REG (Pmode, regno);
6189 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6190 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6191 fputs ("\tret\n", asm_out_file);
6192 final_end_function ();
6193 init_insn_lengths ();
6194 free_after_compilation (cfun);
6195 set_cfun (NULL);
6196 current_function_decl = NULL;
6197 }
6198
6199 if (flag_split_stack)
6200 file_end_indicate_split_stack ();
6201 }
6202
6203 /* Emit code for the SET_GOT patterns. */
6204
6205 const char *
6206 output_set_got (rtx dest, rtx label)
6207 {
6208 rtx xops[3];
6209
6210 xops[0] = dest;
6211
6212 if (TARGET_VXWORKS_RTP && flag_pic)
6213 {
6214 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6215 xops[2] = gen_rtx_MEM (Pmode,
6216 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6217 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6218
6219 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6220 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6221 an unadorned address. */
6222 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6223 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6224 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6225 return "";
6226 }
6227
6228 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6229
6230 if (flag_pic)
6231 {
6232 char name[32];
6233 get_pc_thunk_name (name, REGNO (dest));
6234 pic_labels_used |= 1 << REGNO (dest);
6235
6236 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6237 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6238 output_asm_insn ("%!call\t%X2", xops);
6239
6240 #if TARGET_MACHO
6241 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6242 This is what will be referenced by the Mach-O PIC subsystem. */
6243 if (machopic_should_output_picbase_label () || !label)
6244 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
6245
6246 /* When we are restoring the pic base at the site of a nonlocal label,
6247 and we decided to emit the pic base above, we will still output a
6248 local label used for calculating the correction offset (even though
6249 the offset will be 0 in that case). */
6250 if (label)
6251 targetm.asm_out.internal_label (asm_out_file, "L",
6252 CODE_LABEL_NUMBER (label));
6253 #endif
6254 }
6255 else
6256 {
6257 if (TARGET_MACHO)
6258 /* We don't need a pic base, we're not producing pic. */
6259 gcc_unreachable ();
6260
6261 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6262 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6263 targetm.asm_out.internal_label (asm_out_file, "L",
6264 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6265 }
6266
6267 if (!TARGET_MACHO)
6268 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6269
6270 return "";
6271 }
6272
6273 /* Generate an "push" pattern for input ARG. */
6274
6275 rtx
6276 gen_push (rtx arg)
6277 {
6278 struct machine_function *m = cfun->machine;
6279
6280 if (m->fs.cfa_reg == stack_pointer_rtx)
6281 m->fs.cfa_offset += UNITS_PER_WORD;
6282 m->fs.sp_offset += UNITS_PER_WORD;
6283
6284 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6285 arg = gen_rtx_REG (word_mode, REGNO (arg));
6286
6287 return gen_rtx_SET (gen_rtx_MEM (word_mode,
6288 gen_rtx_PRE_DEC (Pmode,
6289 stack_pointer_rtx)),
6290 arg);
6291 }
6292
6293 /* Generate an "pop" pattern for input ARG. */
6294
6295 rtx
6296 gen_pop (rtx arg)
6297 {
6298 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6299 arg = gen_rtx_REG (word_mode, REGNO (arg));
6300
6301 return gen_rtx_SET (arg,
6302 gen_rtx_MEM (word_mode,
6303 gen_rtx_POST_INC (Pmode,
6304 stack_pointer_rtx)));
6305 }
6306
6307 /* Return >= 0 if there is an unused call-clobbered register available
6308 for the entire function. */
6309
6310 static unsigned int
6311 ix86_select_alt_pic_regnum (void)
6312 {
6313 if (ix86_use_pseudo_pic_reg ())
6314 return INVALID_REGNUM;
6315
6316 if (crtl->is_leaf
6317 && !crtl->profile
6318 && !ix86_current_function_calls_tls_descriptor)
6319 {
6320 int i, drap;
6321 /* Can't use the same register for both PIC and DRAP. */
6322 if (crtl->drap_reg)
6323 drap = REGNO (crtl->drap_reg);
6324 else
6325 drap = -1;
6326 for (i = 2; i >= 0; --i)
6327 if (i != drap && !df_regs_ever_live_p (i))
6328 return i;
6329 }
6330
6331 return INVALID_REGNUM;
6332 }
6333
6334 /* Return true if REGNO is used by the epilogue. */
6335
6336 bool
6337 ix86_epilogue_uses (int regno)
6338 {
6339 /* If there are no caller-saved registers, we preserve all registers,
6340 except for MMX and x87 registers which aren't supported when saving
6341 and restoring registers. Don't explicitly save SP register since
6342 it is always preserved. */
6343 return (epilogue_completed
6344 && cfun->machine->no_caller_saved_registers
6345 && !fixed_regs[regno]
6346 && !STACK_REGNO_P (regno)
6347 && !MMX_REGNO_P (regno));
6348 }
6349
6350 /* Return nonzero if register REGNO can be used as a scratch register
6351 in peephole2. */
6352
6353 static bool
6354 ix86_hard_regno_scratch_ok (unsigned int regno)
6355 {
6356 /* If there are no caller-saved registers, we can't use any register
6357 as a scratch register after epilogue and use REGNO as scratch
6358 register only if it has been used before to avoid saving and
6359 restoring it. */
6360 return (!cfun->machine->no_caller_saved_registers
6361 || (!epilogue_completed
6362 && df_regs_ever_live_p (regno)));
6363 }
6364
6365 /* Return TRUE if we need to save REGNO. */
6366
6367 bool
6368 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
6369 {
6370 /* If there are no caller-saved registers, we preserve all registers,
6371 except for MMX and x87 registers which aren't supported when saving
6372 and restoring registers. Don't explicitly save SP register since
6373 it is always preserved. */
6374 if (cfun->machine->no_caller_saved_registers)
6375 {
6376 /* Don't preserve registers used for function return value. */
6377 rtx reg = crtl->return_rtx;
6378 if (reg)
6379 {
6380 unsigned int i = REGNO (reg);
6381 unsigned int nregs = REG_NREGS (reg);
6382 while (nregs-- > 0)
6383 if ((i + nregs) == regno)
6384 return false;
6385 }
6386
6387 return (df_regs_ever_live_p (regno)
6388 && !fixed_regs[regno]
6389 && !STACK_REGNO_P (regno)
6390 && !MMX_REGNO_P (regno)
6391 && (regno != HARD_FRAME_POINTER_REGNUM
6392 || !frame_pointer_needed));
6393 }
6394
6395 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
6396 && pic_offset_table_rtx)
6397 {
6398 if (ix86_use_pseudo_pic_reg ())
6399 {
6400 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6401 _mcount in prologue. */
6402 if (!TARGET_64BIT && flag_pic && crtl->profile)
6403 return true;
6404 }
6405 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6406 || crtl->profile
6407 || crtl->calls_eh_return
6408 || crtl->uses_const_pool
6409 || cfun->has_nonlocal_label)
6410 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
6411 }
6412
6413 if (crtl->calls_eh_return && maybe_eh_return)
6414 {
6415 unsigned i;
6416 for (i = 0; ; i++)
6417 {
6418 unsigned test = EH_RETURN_DATA_REGNO (i);
6419 if (test == INVALID_REGNUM)
6420 break;
6421 if (test == regno)
6422 return true;
6423 }
6424 }
6425
6426 if (ignore_outlined && cfun->machine->call_ms2sysv)
6427 {
6428 unsigned count = cfun->machine->call_ms2sysv_extra_regs
6429 + xlogue_layout::MIN_REGS;
6430 if (xlogue_layout::is_stub_managed_reg (regno, count))
6431 return false;
6432 }
6433
6434 if (crtl->drap_reg
6435 && regno == REGNO (crtl->drap_reg)
6436 && !cfun->machine->no_drap_save_restore)
6437 return true;
6438
6439 return (df_regs_ever_live_p (regno)
6440 && !call_used_or_fixed_reg_p (regno)
6441 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6442 }
6443
6444 /* Return number of saved general prupose registers. */
6445
6446 static int
6447 ix86_nsaved_regs (void)
6448 {
6449 int nregs = 0;
6450 int regno;
6451
6452 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6453 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6454 nregs ++;
6455 return nregs;
6456 }
6457
6458 /* Return number of saved SSE registers. */
6459
6460 static int
6461 ix86_nsaved_sseregs (void)
6462 {
6463 int nregs = 0;
6464 int regno;
6465
6466 if (!TARGET_64BIT_MS_ABI)
6467 return 0;
6468 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6469 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6470 nregs ++;
6471 return nregs;
6472 }
6473
6474 /* Given FROM and TO register numbers, say whether this elimination is
6475 allowed. If stack alignment is needed, we can only replace argument
6476 pointer with hard frame pointer, or replace frame pointer with stack
6477 pointer. Otherwise, frame pointer elimination is automatically
6478 handled and all other eliminations are valid. */
6479
6480 static bool
6481 ix86_can_eliminate (const int from, const int to)
6482 {
6483 if (stack_realign_fp)
6484 return ((from == ARG_POINTER_REGNUM
6485 && to == HARD_FRAME_POINTER_REGNUM)
6486 || (from == FRAME_POINTER_REGNUM
6487 && to == STACK_POINTER_REGNUM));
6488 else
6489 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
6490 }
6491
6492 /* Return the offset between two registers, one to be eliminated, and the other
6493 its replacement, at the start of a routine. */
6494
6495 HOST_WIDE_INT
6496 ix86_initial_elimination_offset (int from, int to)
6497 {
6498 struct ix86_frame &frame = cfun->machine->frame;
6499
6500 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6501 return frame.hard_frame_pointer_offset;
6502 else if (from == FRAME_POINTER_REGNUM
6503 && to == HARD_FRAME_POINTER_REGNUM)
6504 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6505 else
6506 {
6507 gcc_assert (to == STACK_POINTER_REGNUM);
6508
6509 if (from == ARG_POINTER_REGNUM)
6510 return frame.stack_pointer_offset;
6511
6512 gcc_assert (from == FRAME_POINTER_REGNUM);
6513 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6514 }
6515 }
6516
6517 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6518 void warn_once_call_ms2sysv_xlogues (const char *feature)
6519 {
6520 static bool warned_once = false;
6521 if (!warned_once)
6522 {
6523 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6524 feature);
6525 warned_once = true;
6526 }
6527 }
6528
6529 /* Return the probing interval for -fstack-clash-protection. */
6530
6531 static HOST_WIDE_INT
6532 get_probe_interval (void)
6533 {
6534 if (flag_stack_clash_protection)
6535 return (HOST_WIDE_INT_1U
6536 << param_stack_clash_protection_probe_interval);
6537 else
6538 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6539 }
6540
6541 /* When using -fsplit-stack, the allocation routines set a field in
6542 the TCB to the bottom of the stack plus this much space, measured
6543 in bytes. */
6544
6545 #define SPLIT_STACK_AVAILABLE 256
6546
6547 /* Fill structure ix86_frame about frame of currently computed function. */
6548
6549 static void
6550 ix86_compute_frame_layout (void)
6551 {
6552 struct ix86_frame *frame = &cfun->machine->frame;
6553 struct machine_function *m = cfun->machine;
6554 unsigned HOST_WIDE_INT stack_alignment_needed;
6555 HOST_WIDE_INT offset;
6556 unsigned HOST_WIDE_INT preferred_alignment;
6557 HOST_WIDE_INT size = ix86_get_frame_size ();
6558 HOST_WIDE_INT to_allocate;
6559
6560 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6561 * ms_abi functions that call a sysv function. We now need to prune away
6562 * cases where it should be disabled. */
6563 if (TARGET_64BIT && m->call_ms2sysv)
6564 {
6565 gcc_assert (TARGET_64BIT_MS_ABI);
6566 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
6567 gcc_assert (!TARGET_SEH);
6568 gcc_assert (TARGET_SSE);
6569 gcc_assert (!ix86_using_red_zone ());
6570
6571 if (crtl->calls_eh_return)
6572 {
6573 gcc_assert (!reload_completed);
6574 m->call_ms2sysv = false;
6575 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
6576 }
6577
6578 else if (ix86_static_chain_on_stack)
6579 {
6580 gcc_assert (!reload_completed);
6581 m->call_ms2sysv = false;
6582 warn_once_call_ms2sysv_xlogues ("static call chains");
6583 }
6584
6585 /* Finally, compute which registers the stub will manage. */
6586 else
6587 {
6588 unsigned count = xlogue_layout::count_stub_managed_regs ();
6589 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
6590 m->call_ms2sysv_pad_in = 0;
6591 }
6592 }
6593
6594 frame->nregs = ix86_nsaved_regs ();
6595 frame->nsseregs = ix86_nsaved_sseregs ();
6596
6597 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6598 except for function prologues, leaf functions and when the defult
6599 incoming stack boundary is overriden at command line or via
6600 force_align_arg_pointer attribute.
6601
6602 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6603 at call sites, including profile function calls.
6604 */
6605 if (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
6606 && crtl->preferred_stack_boundary < 128)
6607 && (!crtl->is_leaf || cfun->calls_alloca != 0
6608 || ix86_current_function_calls_tls_descriptor
6609 || (TARGET_MACHO && crtl->profile)
6610 || ix86_incoming_stack_boundary < 128))
6611 {
6612 crtl->preferred_stack_boundary = 128;
6613 crtl->stack_alignment_needed = 128;
6614 }
6615
6616 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6617 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6618
6619 gcc_assert (!size || stack_alignment_needed);
6620 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6621 gcc_assert (preferred_alignment <= stack_alignment_needed);
6622
6623 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6624 gcc_assert (TARGET_64BIT || !frame->nsseregs);
6625 if (TARGET_64BIT && m->call_ms2sysv)
6626 {
6627 gcc_assert (stack_alignment_needed >= 16);
6628 gcc_assert (!frame->nsseregs);
6629 }
6630
6631 /* For SEH we have to limit the amount of code movement into the prologue.
6632 At present we do this via a BLOCKAGE, at which point there's very little
6633 scheduling that can be done, which means that there's very little point
6634 in doing anything except PUSHs. */
6635 if (TARGET_SEH)
6636 m->use_fast_prologue_epilogue = false;
6637 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
6638 {
6639 int count = frame->nregs;
6640 struct cgraph_node *node = cgraph_node::get (current_function_decl);
6641
6642 /* The fast prologue uses move instead of push to save registers. This
6643 is significantly longer, but also executes faster as modern hardware
6644 can execute the moves in parallel, but can't do that for push/pop.
6645
6646 Be careful about choosing what prologue to emit: When function takes
6647 many instructions to execute we may use slow version as well as in
6648 case function is known to be outside hot spot (this is known with
6649 feedback only). Weight the size of function by number of registers
6650 to save as it is cheap to use one or two push instructions but very
6651 slow to use many of them.
6652
6653 Calling this hook multiple times with the same frame requirements
6654 must produce the same layout, since the RA might otherwise be
6655 unable to reach a fixed point or might fail its final sanity checks.
6656 This means that once we've assumed that a function does or doesn't
6657 have a particular size, we have to stick to that assumption
6658 regardless of how the function has changed since. */
6659 if (count)
6660 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6661 if (node->frequency < NODE_FREQUENCY_NORMAL
6662 || (flag_branch_probabilities
6663 && node->frequency < NODE_FREQUENCY_HOT))
6664 m->use_fast_prologue_epilogue = false;
6665 else
6666 {
6667 if (count != frame->expensive_count)
6668 {
6669 frame->expensive_count = count;
6670 frame->expensive_p = expensive_function_p (count);
6671 }
6672 m->use_fast_prologue_epilogue = !frame->expensive_p;
6673 }
6674 }
6675
6676 frame->save_regs_using_mov
6677 = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
6678
6679 /* Skip return address and error code in exception handler. */
6680 offset = INCOMING_FRAME_SP_OFFSET;
6681
6682 /* Skip pushed static chain. */
6683 if (ix86_static_chain_on_stack)
6684 offset += UNITS_PER_WORD;
6685
6686 /* Skip saved base pointer. */
6687 if (frame_pointer_needed)
6688 offset += UNITS_PER_WORD;
6689 frame->hfp_save_offset = offset;
6690
6691 /* The traditional frame pointer location is at the top of the frame. */
6692 frame->hard_frame_pointer_offset = offset;
6693
6694 /* Register save area */
6695 offset += frame->nregs * UNITS_PER_WORD;
6696 frame->reg_save_offset = offset;
6697
6698 /* Calculate the size of the va-arg area (not including padding, if any). */
6699 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
6700
6701 /* Also adjust stack_realign_offset for the largest alignment of
6702 stack slot actually used. */
6703 if (stack_realign_fp
6704 || (cfun->machine->max_used_stack_alignment != 0
6705 && (offset % cfun->machine->max_used_stack_alignment) != 0))
6706 {
6707 /* We may need a 16-byte aligned stack for the remainder of the
6708 register save area, but the stack frame for the local function
6709 may require a greater alignment if using AVX/2/512. In order
6710 to avoid wasting space, we first calculate the space needed for
6711 the rest of the register saves, add that to the stack pointer,
6712 and then realign the stack to the boundary of the start of the
6713 frame for the local function. */
6714 HOST_WIDE_INT space_needed = 0;
6715 HOST_WIDE_INT sse_reg_space_needed = 0;
6716
6717 if (TARGET_64BIT)
6718 {
6719 if (m->call_ms2sysv)
6720 {
6721 m->call_ms2sysv_pad_in = 0;
6722 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
6723 }
6724
6725 else if (frame->nsseregs)
6726 /* The only ABI that has saved SSE registers (Win64) also has a
6727 16-byte aligned default stack. However, many programs violate
6728 the ABI, and Wine64 forces stack realignment to compensate. */
6729 space_needed = frame->nsseregs * 16;
6730
6731 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
6732
6733 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
6734 rounding to be pedantic. */
6735 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
6736 }
6737 else
6738 space_needed = frame->va_arg_size;
6739
6740 /* Record the allocation size required prior to the realignment AND. */
6741 frame->stack_realign_allocate = space_needed;
6742
6743 /* The re-aligned stack starts at frame->stack_realign_offset. Values
6744 before this point are not directly comparable with values below
6745 this point. Use sp_valid_at to determine if the stack pointer is
6746 valid for a given offset, fp_valid_at for the frame pointer, or
6747 choose_baseaddr to have a base register chosen for you.
6748
6749 Note that the result of (frame->stack_realign_offset
6750 & (stack_alignment_needed - 1)) may not equal zero. */
6751 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
6752 frame->stack_realign_offset = offset - space_needed;
6753 frame->sse_reg_save_offset = frame->stack_realign_offset
6754 + sse_reg_space_needed;
6755 }
6756 else
6757 {
6758 frame->stack_realign_offset = offset;
6759
6760 if (TARGET_64BIT && m->call_ms2sysv)
6761 {
6762 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
6763 offset += xlogue_layout::get_instance ().get_stack_space_used ();
6764 }
6765
6766 /* Align and set SSE register save area. */
6767 else if (frame->nsseregs)
6768 {
6769 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
6770 required and the DRAP re-alignment boundary is at least 16 bytes,
6771 then we want the SSE register save area properly aligned. */
6772 if (ix86_incoming_stack_boundary >= 128
6773 || (stack_realign_drap && stack_alignment_needed >= 16))
6774 offset = ROUND_UP (offset, 16);
6775 offset += frame->nsseregs * 16;
6776 }
6777 frame->sse_reg_save_offset = offset;
6778 offset += frame->va_arg_size;
6779 }
6780
6781 /* Align start of frame for local function. When a function call
6782 is removed, it may become a leaf function. But if argument may
6783 be passed on stack, we need to align the stack when there is no
6784 tail call. */
6785 if (m->call_ms2sysv
6786 || frame->va_arg_size != 0
6787 || size != 0
6788 || !crtl->is_leaf
6789 || (!crtl->tail_call_emit
6790 && cfun->machine->outgoing_args_on_stack)
6791 || cfun->calls_alloca
6792 || ix86_current_function_calls_tls_descriptor)
6793 offset = ROUND_UP (offset, stack_alignment_needed);
6794
6795 /* Frame pointer points here. */
6796 frame->frame_pointer_offset = offset;
6797
6798 offset += size;
6799
6800 /* Add outgoing arguments area. Can be skipped if we eliminated
6801 all the function calls as dead code.
6802 Skipping is however impossible when function calls alloca. Alloca
6803 expander assumes that last crtl->outgoing_args_size
6804 of stack frame are unused. */
6805 if (ACCUMULATE_OUTGOING_ARGS
6806 && (!crtl->is_leaf || cfun->calls_alloca
6807 || ix86_current_function_calls_tls_descriptor))
6808 {
6809 offset += crtl->outgoing_args_size;
6810 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6811 }
6812 else
6813 frame->outgoing_arguments_size = 0;
6814
6815 /* Align stack boundary. Only needed if we're calling another function
6816 or using alloca. */
6817 if (!crtl->is_leaf || cfun->calls_alloca
6818 || ix86_current_function_calls_tls_descriptor)
6819 offset = ROUND_UP (offset, preferred_alignment);
6820
6821 /* We've reached end of stack frame. */
6822 frame->stack_pointer_offset = offset;
6823
6824 /* Size prologue needs to allocate. */
6825 to_allocate = offset - frame->sse_reg_save_offset;
6826
6827 if ((!to_allocate && frame->nregs <= 1)
6828 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
6829 /* If static stack checking is enabled and done with probes,
6830 the registers need to be saved before allocating the frame. */
6831 || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6832 /* If stack clash probing needs a loop, then it needs a
6833 scratch register. But the returned register is only guaranteed
6834 to be safe to use after register saves are complete. So if
6835 stack clash protections are enabled and the allocated frame is
6836 larger than the probe interval, then use pushes to save
6837 callee saved registers. */
6838 || (flag_stack_clash_protection && to_allocate > get_probe_interval ()))
6839 frame->save_regs_using_mov = false;
6840
6841 if (ix86_using_red_zone ()
6842 && crtl->sp_is_unchanging
6843 && crtl->is_leaf
6844 && !ix86_pc_thunk_call_expanded
6845 && !ix86_current_function_calls_tls_descriptor)
6846 {
6847 frame->red_zone_size = to_allocate;
6848 if (frame->save_regs_using_mov)
6849 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6850 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6851 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6852 }
6853 else
6854 frame->red_zone_size = 0;
6855 frame->stack_pointer_offset -= frame->red_zone_size;
6856
6857 /* The SEH frame pointer location is near the bottom of the frame.
6858 This is enforced by the fact that the difference between the
6859 stack pointer and the frame pointer is limited to 240 bytes in
6860 the unwind data structure. */
6861 if (TARGET_SEH)
6862 {
6863 /* Force the frame pointer to point at or below the lowest register save
6864 area, see the SEH code in config/i386/winnt.cc for the rationale. */
6865 frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
6866
6867 /* If we can leave the frame pointer where it is, do so; however return
6868 the establisher frame for __builtin_frame_address (0) or else if the
6869 frame overflows the SEH maximum frame size.
6870
6871 Note that the value returned by __builtin_frame_address (0) is quite
6872 constrained, because setjmp is piggybacked on the SEH machinery with
6873 recent versions of MinGW:
6874
6875 # elif defined(__SEH__)
6876 # if defined(__aarch64__) || defined(_ARM64_)
6877 # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
6878 # elif (__MINGW_GCC_VERSION < 40702)
6879 # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
6880 # else
6881 # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
6882 # endif
6883
6884 and the second argument passed to _setjmp, if not null, is forwarded
6885 to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
6886 built an ExceptionRecord on the fly describing the setjmp buffer). */
6887 const HOST_WIDE_INT diff
6888 = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
6889 if (diff <= 255 && !crtl->accesses_prior_frames)
6890 {
6891 /* The resulting diff will be a multiple of 16 lower than 255,
6892 i.e. at most 240 as required by the unwind data structure. */
6893 frame->hard_frame_pointer_offset += (diff & 15);
6894 }
6895 else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
6896 {
6897 /* Ideally we'd determine what portion of the local stack frame
6898 (within the constraint of the lowest 240) is most heavily used.
6899 But without that complication, simply bias the frame pointer
6900 by 128 bytes so as to maximize the amount of the local stack
6901 frame that is addressable with 8-bit offsets. */
6902 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
6903 }
6904 else
6905 frame->hard_frame_pointer_offset = frame->hfp_save_offset;
6906 }
6907 }
6908
6909 /* This is semi-inlined memory_address_length, but simplified
6910 since we know that we're always dealing with reg+offset, and
6911 to avoid having to create and discard all that rtl. */
6912
6913 static inline int
6914 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
6915 {
6916 int len = 4;
6917
6918 if (offset == 0)
6919 {
6920 /* EBP and R13 cannot be encoded without an offset. */
6921 len = (regno == BP_REG || regno == R13_REG);
6922 }
6923 else if (IN_RANGE (offset, -128, 127))
6924 len = 1;
6925
6926 /* ESP and R12 must be encoded with a SIB byte. */
6927 if (regno == SP_REG || regno == R12_REG)
6928 len++;
6929
6930 return len;
6931 }
6932
6933 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
6934 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6935
6936 static bool
6937 sp_valid_at (HOST_WIDE_INT cfa_offset)
6938 {
6939 const struct machine_frame_state &fs = cfun->machine->fs;
6940 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
6941 {
6942 /* Validate that the cfa_offset isn't in a "no-man's land". */
6943 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
6944 return false;
6945 }
6946 return fs.sp_valid;
6947 }
6948
6949 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
6950 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6951
6952 static inline bool
6953 fp_valid_at (HOST_WIDE_INT cfa_offset)
6954 {
6955 const struct machine_frame_state &fs = cfun->machine->fs;
6956 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
6957 {
6958 /* Validate that the cfa_offset isn't in a "no-man's land". */
6959 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
6960 return false;
6961 }
6962 return fs.fp_valid;
6963 }
6964
6965 /* Choose a base register based upon alignment requested, speed and/or
6966 size. */
6967
6968 static void
6969 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
6970 HOST_WIDE_INT &base_offset,
6971 unsigned int align_reqested, unsigned int *align)
6972 {
6973 const struct machine_function *m = cfun->machine;
6974 unsigned int hfp_align;
6975 unsigned int drap_align;
6976 unsigned int sp_align;
6977 bool hfp_ok = fp_valid_at (cfa_offset);
6978 bool drap_ok = m->fs.drap_valid;
6979 bool sp_ok = sp_valid_at (cfa_offset);
6980
6981 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
6982
6983 /* Filter out any registers that don't meet the requested alignment
6984 criteria. */
6985 if (align_reqested)
6986 {
6987 if (m->fs.realigned)
6988 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
6989 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
6990 notes (which we would need to use a realigned stack pointer),
6991 so disable on SEH targets. */
6992 else if (m->fs.sp_realigned)
6993 sp_align = crtl->stack_alignment_needed;
6994
6995 hfp_ok = hfp_ok && hfp_align >= align_reqested;
6996 drap_ok = drap_ok && drap_align >= align_reqested;
6997 sp_ok = sp_ok && sp_align >= align_reqested;
6998 }
6999
7000 if (m->use_fast_prologue_epilogue)
7001 {
7002 /* Choose the base register most likely to allow the most scheduling
7003 opportunities. Generally FP is valid throughout the function,
7004 while DRAP must be reloaded within the epilogue. But choose either
7005 over the SP due to increased encoding size. */
7006
7007 if (hfp_ok)
7008 {
7009 base_reg = hard_frame_pointer_rtx;
7010 base_offset = m->fs.fp_offset - cfa_offset;
7011 }
7012 else if (drap_ok)
7013 {
7014 base_reg = crtl->drap_reg;
7015 base_offset = 0 - cfa_offset;
7016 }
7017 else if (sp_ok)
7018 {
7019 base_reg = stack_pointer_rtx;
7020 base_offset = m->fs.sp_offset - cfa_offset;
7021 }
7022 }
7023 else
7024 {
7025 HOST_WIDE_INT toffset;
7026 int len = 16, tlen;
7027
7028 /* Choose the base register with the smallest address encoding.
7029 With a tie, choose FP > DRAP > SP. */
7030 if (sp_ok)
7031 {
7032 base_reg = stack_pointer_rtx;
7033 base_offset = m->fs.sp_offset - cfa_offset;
7034 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
7035 }
7036 if (drap_ok)
7037 {
7038 toffset = 0 - cfa_offset;
7039 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
7040 if (tlen <= len)
7041 {
7042 base_reg = crtl->drap_reg;
7043 base_offset = toffset;
7044 len = tlen;
7045 }
7046 }
7047 if (hfp_ok)
7048 {
7049 toffset = m->fs.fp_offset - cfa_offset;
7050 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
7051 if (tlen <= len)
7052 {
7053 base_reg = hard_frame_pointer_rtx;
7054 base_offset = toffset;
7055 }
7056 }
7057 }
7058
7059 /* Set the align return value. */
7060 if (align)
7061 {
7062 if (base_reg == stack_pointer_rtx)
7063 *align = sp_align;
7064 else if (base_reg == crtl->drap_reg)
7065 *align = drap_align;
7066 else if (base_reg == hard_frame_pointer_rtx)
7067 *align = hfp_align;
7068 }
7069 }
7070
7071 /* Return an RTX that points to CFA_OFFSET within the stack frame and
7072 the alignment of address. If ALIGN is non-null, it should point to
7073 an alignment value (in bits) that is preferred or zero and will
7074 recieve the alignment of the base register that was selected,
7075 irrespective of rather or not CFA_OFFSET is a multiple of that
7076 alignment value. If it is possible for the base register offset to be
7077 non-immediate then SCRATCH_REGNO should specify a scratch register to
7078 use.
7079
7080 The valid base registers are taken from CFUN->MACHINE->FS. */
7081
7082 static rtx
7083 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
7084 unsigned int scratch_regno = INVALID_REGNUM)
7085 {
7086 rtx base_reg = NULL;
7087 HOST_WIDE_INT base_offset = 0;
7088
7089 /* If a specific alignment is requested, try to get a base register
7090 with that alignment first. */
7091 if (align && *align)
7092 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
7093
7094 if (!base_reg)
7095 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
7096
7097 gcc_assert (base_reg != NULL);
7098
7099 rtx base_offset_rtx = GEN_INT (base_offset);
7100
7101 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
7102 {
7103 gcc_assert (scratch_regno != INVALID_REGNUM);
7104
7105 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
7106 emit_move_insn (scratch_reg, base_offset_rtx);
7107
7108 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
7109 }
7110
7111 return plus_constant (Pmode, base_reg, base_offset);
7112 }
7113
7114 /* Emit code to save registers in the prologue. */
7115
7116 static void
7117 ix86_emit_save_regs (void)
7118 {
7119 unsigned int regno;
7120 rtx_insn *insn;
7121
7122 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7123 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7124 {
7125 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
7126 RTX_FRAME_RELATED_P (insn) = 1;
7127 }
7128 }
7129
7130 /* Emit a single register save at CFA - CFA_OFFSET. */
7131
7132 static void
7133 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
7134 HOST_WIDE_INT cfa_offset)
7135 {
7136 struct machine_function *m = cfun->machine;
7137 rtx reg = gen_rtx_REG (mode, regno);
7138 rtx mem, addr, base, insn;
7139 unsigned int align = GET_MODE_ALIGNMENT (mode);
7140
7141 addr = choose_baseaddr (cfa_offset, &align);
7142 mem = gen_frame_mem (mode, addr);
7143
7144 /* The location aligment depends upon the base register. */
7145 align = MIN (GET_MODE_ALIGNMENT (mode), align);
7146 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
7147 set_mem_align (mem, align);
7148
7149 insn = emit_insn (gen_rtx_SET (mem, reg));
7150 RTX_FRAME_RELATED_P (insn) = 1;
7151
7152 base = addr;
7153 if (GET_CODE (base) == PLUS)
7154 base = XEXP (base, 0);
7155 gcc_checking_assert (REG_P (base));
7156
7157 /* When saving registers into a re-aligned local stack frame, avoid
7158 any tricky guessing by dwarf2out. */
7159 if (m->fs.realigned)
7160 {
7161 gcc_checking_assert (stack_realign_drap);
7162
7163 if (regno == REGNO (crtl->drap_reg))
7164 {
7165 /* A bit of a hack. We force the DRAP register to be saved in
7166 the re-aligned stack frame, which provides us with a copy
7167 of the CFA that will last past the prologue. Install it. */
7168 gcc_checking_assert (cfun->machine->fs.fp_valid);
7169 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7170 cfun->machine->fs.fp_offset - cfa_offset);
7171 mem = gen_rtx_MEM (mode, addr);
7172 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
7173 }
7174 else
7175 {
7176 /* The frame pointer is a stable reference within the
7177 aligned frame. Use it. */
7178 gcc_checking_assert (cfun->machine->fs.fp_valid);
7179 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7180 cfun->machine->fs.fp_offset - cfa_offset);
7181 mem = gen_rtx_MEM (mode, addr);
7182 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7183 }
7184 }
7185
7186 else if (base == stack_pointer_rtx && m->fs.sp_realigned
7187 && cfa_offset >= m->fs.sp_realigned_offset)
7188 {
7189 gcc_checking_assert (stack_realign_fp);
7190 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7191 }
7192
7193 /* The memory may not be relative to the current CFA register,
7194 which means that we may need to generate a new pattern for
7195 use by the unwind info. */
7196 else if (base != m->fs.cfa_reg)
7197 {
7198 addr = plus_constant (Pmode, m->fs.cfa_reg,
7199 m->fs.cfa_offset - cfa_offset);
7200 mem = gen_rtx_MEM (mode, addr);
7201 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7202 }
7203 }
7204
7205 /* Emit code to save registers using MOV insns.
7206 First register is stored at CFA - CFA_OFFSET. */
7207 static void
7208 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
7209 {
7210 unsigned int regno;
7211
7212 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7213 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7214 {
7215 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
7216 cfa_offset -= UNITS_PER_WORD;
7217 }
7218 }
7219
7220 /* Emit code to save SSE registers using MOV insns.
7221 First register is stored at CFA - CFA_OFFSET. */
7222 static void
7223 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
7224 {
7225 unsigned int regno;
7226
7227 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7228 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7229 {
7230 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
7231 cfa_offset -= GET_MODE_SIZE (V4SFmode);
7232 }
7233 }
7234
7235 static GTY(()) rtx queued_cfa_restores;
7236
7237 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7238 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7239 Don't add the note if the previously saved value will be left untouched
7240 within stack red-zone till return, as unwinders can find the same value
7241 in the register and on the stack. */
7242
7243 static void
7244 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
7245 {
7246 if (!crtl->shrink_wrapped
7247 && cfa_offset <= cfun->machine->fs.red_zone_offset)
7248 return;
7249
7250 if (insn)
7251 {
7252 add_reg_note (insn, REG_CFA_RESTORE, reg);
7253 RTX_FRAME_RELATED_P (insn) = 1;
7254 }
7255 else
7256 queued_cfa_restores
7257 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
7258 }
7259
7260 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
7261
7262 static void
7263 ix86_add_queued_cfa_restore_notes (rtx insn)
7264 {
7265 rtx last;
7266 if (!queued_cfa_restores)
7267 return;
7268 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
7269 ;
7270 XEXP (last, 1) = REG_NOTES (insn);
7271 REG_NOTES (insn) = queued_cfa_restores;
7272 queued_cfa_restores = NULL_RTX;
7273 RTX_FRAME_RELATED_P (insn) = 1;
7274 }
7275
7276 /* Expand prologue or epilogue stack adjustment.
7277 The pattern exist to put a dependency on all ebp-based memory accesses.
7278 STYLE should be negative if instructions should be marked as frame related,
7279 zero if %r11 register is live and cannot be freely used and positive
7280 otherwise. */
7281
7282 static rtx
7283 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
7284 int style, bool set_cfa)
7285 {
7286 struct machine_function *m = cfun->machine;
7287 rtx addend = offset;
7288 rtx insn;
7289 bool add_frame_related_expr = false;
7290
7291 if (!x86_64_immediate_operand (offset, Pmode))
7292 {
7293 /* r11 is used by indirect sibcall return as well, set before the
7294 epilogue and used after the epilogue. */
7295 if (style)
7296 addend = gen_rtx_REG (Pmode, R11_REG);
7297 else
7298 {
7299 gcc_assert (src != hard_frame_pointer_rtx
7300 && dest != hard_frame_pointer_rtx);
7301 addend = hard_frame_pointer_rtx;
7302 }
7303 emit_insn (gen_rtx_SET (addend, offset));
7304 if (style < 0)
7305 add_frame_related_expr = true;
7306 }
7307
7308 insn = emit_insn (gen_pro_epilogue_adjust_stack_add
7309 (Pmode, dest, src, addend));
7310 if (style >= 0)
7311 ix86_add_queued_cfa_restore_notes (insn);
7312
7313 if (set_cfa)
7314 {
7315 rtx r;
7316
7317 gcc_assert (m->fs.cfa_reg == src);
7318 m->fs.cfa_offset += INTVAL (offset);
7319 m->fs.cfa_reg = dest;
7320
7321 r = gen_rtx_PLUS (Pmode, src, offset);
7322 r = gen_rtx_SET (dest, r);
7323 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
7324 RTX_FRAME_RELATED_P (insn) = 1;
7325 }
7326 else if (style < 0)
7327 {
7328 RTX_FRAME_RELATED_P (insn) = 1;
7329 if (add_frame_related_expr)
7330 {
7331 rtx r = gen_rtx_PLUS (Pmode, src, offset);
7332 r = gen_rtx_SET (dest, r);
7333 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
7334 }
7335 }
7336
7337 if (dest == stack_pointer_rtx)
7338 {
7339 HOST_WIDE_INT ooffset = m->fs.sp_offset;
7340 bool valid = m->fs.sp_valid;
7341 bool realigned = m->fs.sp_realigned;
7342
7343 if (src == hard_frame_pointer_rtx)
7344 {
7345 valid = m->fs.fp_valid;
7346 realigned = false;
7347 ooffset = m->fs.fp_offset;
7348 }
7349 else if (src == crtl->drap_reg)
7350 {
7351 valid = m->fs.drap_valid;
7352 realigned = false;
7353 ooffset = 0;
7354 }
7355 else
7356 {
7357 /* Else there are two possibilities: SP itself, which we set
7358 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7359 taken care of this by hand along the eh_return path. */
7360 gcc_checking_assert (src == stack_pointer_rtx
7361 || offset == const0_rtx);
7362 }
7363
7364 m->fs.sp_offset = ooffset - INTVAL (offset);
7365 m->fs.sp_valid = valid;
7366 m->fs.sp_realigned = realigned;
7367 }
7368 return insn;
7369 }
7370
7371 /* Find an available register to be used as dynamic realign argument
7372 pointer regsiter. Such a register will be written in prologue and
7373 used in begin of body, so it must not be
7374 1. parameter passing register.
7375 2. GOT pointer.
7376 We reuse static-chain register if it is available. Otherwise, we
7377 use DI for i386 and R13 for x86-64. We chose R13 since it has
7378 shorter encoding.
7379
7380 Return: the regno of chosen register. */
7381
7382 static unsigned int
7383 find_drap_reg (void)
7384 {
7385 tree decl = cfun->decl;
7386
7387 /* Always use callee-saved register if there are no caller-saved
7388 registers. */
7389 if (TARGET_64BIT)
7390 {
7391 /* Use R13 for nested function or function need static chain.
7392 Since function with tail call may use any caller-saved
7393 registers in epilogue, DRAP must not use caller-saved
7394 register in such case. */
7395 if (DECL_STATIC_CHAIN (decl)
7396 || cfun->machine->no_caller_saved_registers
7397 || crtl->tail_call_emit)
7398 return R13_REG;
7399
7400 return R10_REG;
7401 }
7402 else
7403 {
7404 /* Use DI for nested function or function need static chain.
7405 Since function with tail call may use any caller-saved
7406 registers in epilogue, DRAP must not use caller-saved
7407 register in such case. */
7408 if (DECL_STATIC_CHAIN (decl)
7409 || cfun->machine->no_caller_saved_registers
7410 || crtl->tail_call_emit
7411 || crtl->calls_eh_return)
7412 return DI_REG;
7413
7414 /* Reuse static chain register if it isn't used for parameter
7415 passing. */
7416 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
7417 {
7418 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
7419 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
7420 return CX_REG;
7421 }
7422 return DI_REG;
7423 }
7424 }
7425
7426 /* Return minimum incoming stack alignment. */
7427
7428 static unsigned int
7429 ix86_minimum_incoming_stack_boundary (bool sibcall)
7430 {
7431 unsigned int incoming_stack_boundary;
7432
7433 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7434 if (cfun->machine->func_type != TYPE_NORMAL)
7435 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
7436 /* Prefer the one specified at command line. */
7437 else if (ix86_user_incoming_stack_boundary)
7438 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
7439 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7440 if -mstackrealign is used, it isn't used for sibcall check and
7441 estimated stack alignment is 128bit. */
7442 else if (!sibcall
7443 && ix86_force_align_arg_pointer
7444 && crtl->stack_alignment_estimated == 128)
7445 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7446 else
7447 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
7448
7449 /* Incoming stack alignment can be changed on individual functions
7450 via force_align_arg_pointer attribute. We use the smallest
7451 incoming stack boundary. */
7452 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
7453 && lookup_attribute ("force_align_arg_pointer",
7454 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7455 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7456
7457 /* The incoming stack frame has to be aligned at least at
7458 parm_stack_boundary. */
7459 if (incoming_stack_boundary < crtl->parm_stack_boundary)
7460 incoming_stack_boundary = crtl->parm_stack_boundary;
7461
7462 /* Stack at entrance of main is aligned by runtime. We use the
7463 smallest incoming stack boundary. */
7464 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
7465 && DECL_NAME (current_function_decl)
7466 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7467 && DECL_FILE_SCOPE_P (current_function_decl))
7468 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7469
7470 return incoming_stack_boundary;
7471 }
7472
7473 /* Update incoming stack boundary and estimated stack alignment. */
7474
7475 static void
7476 ix86_update_stack_boundary (void)
7477 {
7478 ix86_incoming_stack_boundary
7479 = ix86_minimum_incoming_stack_boundary (false);
7480
7481 /* x86_64 vararg needs 16byte stack alignment for register save area. */
7482 if (TARGET_64BIT
7483 && cfun->stdarg
7484 && crtl->stack_alignment_estimated < 128)
7485 crtl->stack_alignment_estimated = 128;
7486
7487 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
7488 if (ix86_tls_descriptor_calls_expanded_in_cfun
7489 && crtl->preferred_stack_boundary < 128)
7490 crtl->preferred_stack_boundary = 128;
7491 }
7492
7493 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7494 needed or an rtx for DRAP otherwise. */
7495
7496 static rtx
7497 ix86_get_drap_rtx (void)
7498 {
7499 /* We must use DRAP if there are outgoing arguments on stack or
7500 the stack pointer register is clobbered by asm statment and
7501 ACCUMULATE_OUTGOING_ARGS is false. */
7502 if (ix86_force_drap
7503 || ((cfun->machine->outgoing_args_on_stack
7504 || crtl->sp_is_clobbered_by_asm)
7505 && !ACCUMULATE_OUTGOING_ARGS))
7506 crtl->need_drap = true;
7507
7508 if (stack_realign_drap)
7509 {
7510 /* Assign DRAP to vDRAP and returns vDRAP */
7511 unsigned int regno = find_drap_reg ();
7512 rtx drap_vreg;
7513 rtx arg_ptr;
7514 rtx_insn *seq, *insn;
7515
7516 arg_ptr = gen_rtx_REG (Pmode, regno);
7517 crtl->drap_reg = arg_ptr;
7518
7519 start_sequence ();
7520 drap_vreg = copy_to_reg (arg_ptr);
7521 seq = get_insns ();
7522 end_sequence ();
7523
7524 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7525 if (!optimize)
7526 {
7527 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
7528 RTX_FRAME_RELATED_P (insn) = 1;
7529 }
7530 return drap_vreg;
7531 }
7532 else
7533 return NULL;
7534 }
7535
7536 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7537
7538 static rtx
7539 ix86_internal_arg_pointer (void)
7540 {
7541 return virtual_incoming_args_rtx;
7542 }
7543
7544 struct scratch_reg {
7545 rtx reg;
7546 bool saved;
7547 };
7548
7549 /* Return a short-lived scratch register for use on function entry.
7550 In 32-bit mode, it is valid only after the registers are saved
7551 in the prologue. This register must be released by means of
7552 release_scratch_register_on_entry once it is dead. */
7553
7554 static void
7555 get_scratch_register_on_entry (struct scratch_reg *sr)
7556 {
7557 int regno;
7558
7559 sr->saved = false;
7560
7561 if (TARGET_64BIT)
7562 {
7563 /* We always use R11 in 64-bit mode. */
7564 regno = R11_REG;
7565 }
7566 else
7567 {
7568 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
7569 bool fastcall_p
7570 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7571 bool thiscall_p
7572 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7573 bool static_chain_p = DECL_STATIC_CHAIN (decl);
7574 int regparm = ix86_function_regparm (fntype, decl);
7575 int drap_regno
7576 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
7577
7578 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7579 for the static chain register. */
7580 if ((regparm < 1 || (fastcall_p && !static_chain_p))
7581 && drap_regno != AX_REG)
7582 regno = AX_REG;
7583 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7584 for the static chain register. */
7585 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
7586 regno = AX_REG;
7587 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
7588 regno = DX_REG;
7589 /* ecx is the static chain register. */
7590 else if (regparm < 3 && !fastcall_p && !thiscall_p
7591 && !static_chain_p
7592 && drap_regno != CX_REG)
7593 regno = CX_REG;
7594 else if (ix86_save_reg (BX_REG, true, false))
7595 regno = BX_REG;
7596 /* esi is the static chain register. */
7597 else if (!(regparm == 3 && static_chain_p)
7598 && ix86_save_reg (SI_REG, true, false))
7599 regno = SI_REG;
7600 else if (ix86_save_reg (DI_REG, true, false))
7601 regno = DI_REG;
7602 else
7603 {
7604 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
7605 sr->saved = true;
7606 }
7607 }
7608
7609 sr->reg = gen_rtx_REG (Pmode, regno);
7610 if (sr->saved)
7611 {
7612 rtx_insn *insn = emit_insn (gen_push (sr->reg));
7613 RTX_FRAME_RELATED_P (insn) = 1;
7614 }
7615 }
7616
7617 /* Release a scratch register obtained from the preceding function.
7618
7619 If RELEASE_VIA_POP is true, we just pop the register off the stack
7620 to release it. This is what non-Linux systems use with -fstack-check.
7621
7622 Otherwise we use OFFSET to locate the saved register and the
7623 allocated stack space becomes part of the local frame and is
7624 deallocated by the epilogue. */
7625
7626 static void
7627 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
7628 bool release_via_pop)
7629 {
7630 if (sr->saved)
7631 {
7632 if (release_via_pop)
7633 {
7634 struct machine_function *m = cfun->machine;
7635 rtx x, insn = emit_insn (gen_pop (sr->reg));
7636
7637 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
7638 RTX_FRAME_RELATED_P (insn) = 1;
7639 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7640 x = gen_rtx_SET (stack_pointer_rtx, x);
7641 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
7642 m->fs.sp_offset -= UNITS_PER_WORD;
7643 }
7644 else
7645 {
7646 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
7647 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
7648 emit_insn (x);
7649 }
7650 }
7651 }
7652
7653 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7654
7655 If INT_REGISTERS_SAVED is true, then integer registers have already been
7656 pushed on the stack.
7657
7658 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
7659 beyond SIZE bytes.
7660
7661 This assumes no knowledge of the current probing state, i.e. it is never
7662 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
7663 a suitable probe. */
7664
7665 static void
7666 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
7667 const bool int_registers_saved,
7668 const bool protection_area)
7669 {
7670 struct machine_function *m = cfun->machine;
7671
7672 /* If this function does not statically allocate stack space, then
7673 no probes are needed. */
7674 if (!size)
7675 {
7676 /* However, the allocation of space via pushes for register
7677 saves could be viewed as allocating space, but without the
7678 need to probe. */
7679 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
7680 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7681 else
7682 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
7683 return;
7684 }
7685
7686 /* If we are a noreturn function, then we have to consider the
7687 possibility that we're called via a jump rather than a call.
7688
7689 Thus we don't have the implicit probe generated by saving the
7690 return address into the stack at the call. Thus, the stack
7691 pointer could be anywhere in the guard page. The safe thing
7692 to do is emit a probe now.
7693
7694 The probe can be avoided if we have already emitted any callee
7695 register saves into the stack or have a frame pointer (which will
7696 have been saved as well). Those saves will function as implicit
7697 probes.
7698
7699 ?!? This should be revamped to work like aarch64 and s390 where
7700 we track the offset from the most recent probe. Normally that
7701 offset would be zero. For a noreturn function we would reset
7702 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
7703 we just probe when we cross PROBE_INTERVAL. */
7704 if (TREE_THIS_VOLATILE (cfun->decl)
7705 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
7706 {
7707 /* We can safely use any register here since we're just going to push
7708 its value and immediately pop it back. But we do try and avoid
7709 argument passing registers so as not to introduce dependencies in
7710 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
7711 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
7712 rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
7713 rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
7714 m->fs.sp_offset -= UNITS_PER_WORD;
7715 if (m->fs.cfa_reg == stack_pointer_rtx)
7716 {
7717 m->fs.cfa_offset -= UNITS_PER_WORD;
7718 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
7719 x = gen_rtx_SET (stack_pointer_rtx, x);
7720 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
7721 RTX_FRAME_RELATED_P (insn_push) = 1;
7722 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7723 x = gen_rtx_SET (stack_pointer_rtx, x);
7724 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
7725 RTX_FRAME_RELATED_P (insn_pop) = 1;
7726 }
7727 emit_insn (gen_blockage ());
7728 }
7729
7730 const HOST_WIDE_INT probe_interval = get_probe_interval ();
7731 const int dope = 4 * UNITS_PER_WORD;
7732
7733 /* If there is protection area, take it into account in the size. */
7734 if (protection_area)
7735 size += probe_interval + dope;
7736
7737 /* If we allocate less than the size of the guard statically,
7738 then no probing is necessary, but we do need to allocate
7739 the stack. */
7740 else if (size < (1 << param_stack_clash_protection_guard_size))
7741 {
7742 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7743 GEN_INT (-size), -1,
7744 m->fs.cfa_reg == stack_pointer_rtx);
7745 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7746 return;
7747 }
7748
7749 /* We're allocating a large enough stack frame that we need to
7750 emit probes. Either emit them inline or in a loop depending
7751 on the size. */
7752 if (size <= 4 * probe_interval)
7753 {
7754 HOST_WIDE_INT i;
7755 for (i = probe_interval; i <= size; i += probe_interval)
7756 {
7757 /* Allocate PROBE_INTERVAL bytes. */
7758 rtx insn
7759 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7760 GEN_INT (-probe_interval), -1,
7761 m->fs.cfa_reg == stack_pointer_rtx);
7762 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
7763
7764 /* And probe at *sp. */
7765 emit_stack_probe (stack_pointer_rtx);
7766 emit_insn (gen_blockage ());
7767 }
7768
7769 /* We need to allocate space for the residual, but we do not need
7770 to probe the residual... */
7771 HOST_WIDE_INT residual = (i - probe_interval - size);
7772 if (residual)
7773 {
7774 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7775 GEN_INT (residual), -1,
7776 m->fs.cfa_reg == stack_pointer_rtx);
7777
7778 /* ...except if there is a protection area to maintain. */
7779 if (protection_area)
7780 emit_stack_probe (stack_pointer_rtx);
7781 }
7782
7783 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
7784 }
7785 else
7786 {
7787 /* We expect the GP registers to be saved when probes are used
7788 as the probing sequences might need a scratch register and
7789 the routine to allocate one assumes the integer registers
7790 have already been saved. */
7791 gcc_assert (int_registers_saved);
7792
7793 struct scratch_reg sr;
7794 get_scratch_register_on_entry (&sr);
7795
7796 /* If we needed to save a register, then account for any space
7797 that was pushed (we are not going to pop the register when
7798 we do the restore). */
7799 if (sr.saved)
7800 size -= UNITS_PER_WORD;
7801
7802 /* Step 1: round SIZE down to a multiple of the interval. */
7803 HOST_WIDE_INT rounded_size = size & -probe_interval;
7804
7805 /* Step 2: compute final value of the loop counter. Use lea if
7806 possible. */
7807 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
7808 rtx insn;
7809 if (address_no_seg_operand (addr, Pmode))
7810 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
7811 else
7812 {
7813 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
7814 insn = emit_insn (gen_rtx_SET (sr.reg,
7815 gen_rtx_PLUS (Pmode, sr.reg,
7816 stack_pointer_rtx)));
7817 }
7818 if (m->fs.cfa_reg == stack_pointer_rtx)
7819 {
7820 add_reg_note (insn, REG_CFA_DEF_CFA,
7821 plus_constant (Pmode, sr.reg,
7822 m->fs.cfa_offset + rounded_size));
7823 RTX_FRAME_RELATED_P (insn) = 1;
7824 }
7825
7826 /* Step 3: the loop. */
7827 rtx size_rtx = GEN_INT (rounded_size);
7828 insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
7829 size_rtx));
7830 if (m->fs.cfa_reg == stack_pointer_rtx)
7831 {
7832 m->fs.cfa_offset += rounded_size;
7833 add_reg_note (insn, REG_CFA_DEF_CFA,
7834 plus_constant (Pmode, stack_pointer_rtx,
7835 m->fs.cfa_offset));
7836 RTX_FRAME_RELATED_P (insn) = 1;
7837 }
7838 m->fs.sp_offset += rounded_size;
7839 emit_insn (gen_blockage ());
7840
7841 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7842 is equal to ROUNDED_SIZE. */
7843
7844 if (size != rounded_size)
7845 {
7846 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7847 GEN_INT (rounded_size - size), -1,
7848 m->fs.cfa_reg == stack_pointer_rtx);
7849
7850 if (protection_area)
7851 emit_stack_probe (stack_pointer_rtx);
7852 }
7853
7854 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
7855
7856 /* This does not deallocate the space reserved for the scratch
7857 register. That will be deallocated in the epilogue. */
7858 release_scratch_register_on_entry (&sr, size, false);
7859 }
7860
7861 /* Adjust back to account for the protection area. */
7862 if (protection_area)
7863 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7864 GEN_INT (probe_interval + dope), -1,
7865 m->fs.cfa_reg == stack_pointer_rtx);
7866
7867 /* Make sure nothing is scheduled before we are done. */
7868 emit_insn (gen_blockage ());
7869 }
7870
7871 /* Adjust the stack pointer up to REG while probing it. */
7872
7873 const char *
7874 output_adjust_stack_and_probe (rtx reg)
7875 {
7876 static int labelno = 0;
7877 char loop_lab[32];
7878 rtx xops[2];
7879
7880 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7881
7882 /* Loop. */
7883 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7884
7885 /* SP = SP + PROBE_INTERVAL. */
7886 xops[0] = stack_pointer_rtx;
7887 xops[1] = GEN_INT (get_probe_interval ());
7888 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7889
7890 /* Probe at SP. */
7891 xops[1] = const0_rtx;
7892 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
7893
7894 /* Test if SP == LAST_ADDR. */
7895 xops[0] = stack_pointer_rtx;
7896 xops[1] = reg;
7897 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7898
7899 /* Branch. */
7900 fputs ("\tjne\t", asm_out_file);
7901 assemble_name_raw (asm_out_file, loop_lab);
7902 fputc ('\n', asm_out_file);
7903
7904 return "";
7905 }
7906
7907 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7908 inclusive. These are offsets from the current stack pointer.
7909
7910 INT_REGISTERS_SAVED is true if integer registers have already been
7911 pushed on the stack. */
7912
7913 static void
7914 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
7915 const bool int_registers_saved)
7916 {
7917 const HOST_WIDE_INT probe_interval = get_probe_interval ();
7918
7919 /* See if we have a constant small number of probes to generate. If so,
7920 that's the easy case. The run-time loop is made up of 6 insns in the
7921 generic case while the compile-time loop is made up of n insns for n #
7922 of intervals. */
7923 if (size <= 6 * probe_interval)
7924 {
7925 HOST_WIDE_INT i;
7926
7927 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
7928 it exceeds SIZE. If only one probe is needed, this will not
7929 generate any code. Then probe at FIRST + SIZE. */
7930 for (i = probe_interval; i < size; i += probe_interval)
7931 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7932 -(first + i)));
7933
7934 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7935 -(first + size)));
7936 }
7937
7938 /* Otherwise, do the same as above, but in a loop. Note that we must be
7939 extra careful with variables wrapping around because we might be at
7940 the very top (or the very bottom) of the address space and we have
7941 to be able to handle this case properly; in particular, we use an
7942 equality test for the loop condition. */
7943 else
7944 {
7945 /* We expect the GP registers to be saved when probes are used
7946 as the probing sequences might need a scratch register and
7947 the routine to allocate one assumes the integer registers
7948 have already been saved. */
7949 gcc_assert (int_registers_saved);
7950
7951 HOST_WIDE_INT rounded_size, last;
7952 struct scratch_reg sr;
7953
7954 get_scratch_register_on_entry (&sr);
7955
7956
7957 /* Step 1: round SIZE to the previous multiple of the interval. */
7958
7959 rounded_size = ROUND_DOWN (size, probe_interval);
7960
7961
7962 /* Step 2: compute initial and final value of the loop counter. */
7963
7964 /* TEST_OFFSET = FIRST. */
7965 emit_move_insn (sr.reg, GEN_INT (-first));
7966
7967 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
7968 last = first + rounded_size;
7969
7970
7971 /* Step 3: the loop
7972
7973 do
7974 {
7975 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
7976 probe at TEST_ADDR
7977 }
7978 while (TEST_ADDR != LAST_ADDR)
7979
7980 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
7981 until it is equal to ROUNDED_SIZE. */
7982
7983 emit_insn
7984 (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
7985
7986
7987 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
7988 that SIZE is equal to ROUNDED_SIZE. */
7989
7990 if (size != rounded_size)
7991 emit_stack_probe (plus_constant (Pmode,
7992 gen_rtx_PLUS (Pmode,
7993 stack_pointer_rtx,
7994 sr.reg),
7995 rounded_size - size));
7996
7997 release_scratch_register_on_entry (&sr, size, true);
7998 }
7999
8000 /* Make sure nothing is scheduled before we are done. */
8001 emit_insn (gen_blockage ());
8002 }
8003
8004 /* Probe a range of stack addresses from REG to END, inclusive. These are
8005 offsets from the current stack pointer. */
8006
8007 const char *
8008 output_probe_stack_range (rtx reg, rtx end)
8009 {
8010 static int labelno = 0;
8011 char loop_lab[32];
8012 rtx xops[3];
8013
8014 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8015
8016 /* Loop. */
8017 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8018
8019 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
8020 xops[0] = reg;
8021 xops[1] = GEN_INT (get_probe_interval ());
8022 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8023
8024 /* Probe at TEST_ADDR. */
8025 xops[0] = stack_pointer_rtx;
8026 xops[1] = reg;
8027 xops[2] = const0_rtx;
8028 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
8029
8030 /* Test if TEST_ADDR == LAST_ADDR. */
8031 xops[0] = reg;
8032 xops[1] = end;
8033 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8034
8035 /* Branch. */
8036 fputs ("\tjne\t", asm_out_file);
8037 assemble_name_raw (asm_out_file, loop_lab);
8038 fputc ('\n', asm_out_file);
8039
8040 return "";
8041 }
8042
8043 /* Set stack_frame_required to false if stack frame isn't required.
8044 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
8045 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
8046
8047 static void
8048 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
8049 bool check_stack_slot)
8050 {
8051 HARD_REG_SET set_up_by_prologue, prologue_used;
8052 basic_block bb;
8053
8054 CLEAR_HARD_REG_SET (prologue_used);
8055 CLEAR_HARD_REG_SET (set_up_by_prologue);
8056 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
8057 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
8058 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
8059 HARD_FRAME_POINTER_REGNUM);
8060
8061 /* The preferred stack alignment is the minimum stack alignment. */
8062 if (stack_alignment > crtl->preferred_stack_boundary)
8063 stack_alignment = crtl->preferred_stack_boundary;
8064
8065 bool require_stack_frame = false;
8066
8067 FOR_EACH_BB_FN (bb, cfun)
8068 {
8069 rtx_insn *insn;
8070 FOR_BB_INSNS (bb, insn)
8071 if (NONDEBUG_INSN_P (insn)
8072 && requires_stack_frame_p (insn, prologue_used,
8073 set_up_by_prologue))
8074 {
8075 require_stack_frame = true;
8076
8077 if (check_stack_slot)
8078 {
8079 /* Find the maximum stack alignment. */
8080 subrtx_iterator::array_type array;
8081 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
8082 if (MEM_P (*iter)
8083 && (reg_mentioned_p (stack_pointer_rtx,
8084 *iter)
8085 || reg_mentioned_p (frame_pointer_rtx,
8086 *iter)))
8087 {
8088 unsigned int alignment = MEM_ALIGN (*iter);
8089 if (alignment > stack_alignment)
8090 stack_alignment = alignment;
8091 }
8092 }
8093 }
8094 }
8095
8096 cfun->machine->stack_frame_required = require_stack_frame;
8097 }
8098
8099 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
8100 will guide prologue/epilogue to be generated in correct form. */
8101
8102 static void
8103 ix86_finalize_stack_frame_flags (void)
8104 {
8105 /* Check if stack realign is really needed after reload, and
8106 stores result in cfun */
8107 unsigned int incoming_stack_boundary
8108 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8109 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8110 unsigned int stack_alignment
8111 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
8112 ? crtl->max_used_stack_slot_alignment
8113 : crtl->stack_alignment_needed);
8114 unsigned int stack_realign
8115 = (incoming_stack_boundary < stack_alignment);
8116 bool recompute_frame_layout_p = false;
8117
8118 if (crtl->stack_realign_finalized)
8119 {
8120 /* After stack_realign_needed is finalized, we can't no longer
8121 change it. */
8122 gcc_assert (crtl->stack_realign_needed == stack_realign);
8123 return;
8124 }
8125
8126 /* It is always safe to compute max_used_stack_alignment. We
8127 compute it only if 128-bit aligned load/store may be generated
8128 on misaligned stack slot which will lead to segfault. */
8129 bool check_stack_slot
8130 = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
8131 ix86_find_max_used_stack_alignment (stack_alignment,
8132 check_stack_slot);
8133
8134 /* If the only reason for frame_pointer_needed is that we conservatively
8135 assumed stack realignment might be needed or -fno-omit-frame-pointer
8136 is used, but in the end nothing that needed the stack alignment had
8137 been spilled nor stack access, clear frame_pointer_needed and say we
8138 don't need stack realignment.
8139
8140 When vector register is used for piecewise move and store, we don't
8141 increase stack_alignment_needed as there is no register spill for
8142 piecewise move and store. Since stack_realign_needed is set to true
8143 by checking stack_alignment_estimated which is updated by pseudo
8144 vector register usage, we also need to check stack_realign_needed to
8145 eliminate frame pointer. */
8146 if ((stack_realign
8147 || (!flag_omit_frame_pointer && optimize)
8148 || crtl->stack_realign_needed)
8149 && frame_pointer_needed
8150 && crtl->is_leaf
8151 && crtl->sp_is_unchanging
8152 && !ix86_current_function_calls_tls_descriptor
8153 && !crtl->accesses_prior_frames
8154 && !cfun->calls_alloca
8155 && !crtl->calls_eh_return
8156 /* See ira_setup_eliminable_regset for the rationale. */
8157 && !(STACK_CHECK_MOVING_SP
8158 && flag_stack_check
8159 && flag_exceptions
8160 && cfun->can_throw_non_call_exceptions)
8161 && !ix86_frame_pointer_required ()
8162 && ix86_get_frame_size () == 0
8163 && ix86_nsaved_sseregs () == 0
8164 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
8165 {
8166 if (cfun->machine->stack_frame_required)
8167 {
8168 /* Stack frame is required. If stack alignment needed is less
8169 than incoming stack boundary, don't realign stack. */
8170 stack_realign = incoming_stack_boundary < stack_alignment;
8171 if (!stack_realign)
8172 {
8173 crtl->max_used_stack_slot_alignment
8174 = incoming_stack_boundary;
8175 crtl->stack_alignment_needed
8176 = incoming_stack_boundary;
8177 /* Also update preferred_stack_boundary for leaf
8178 functions. */
8179 crtl->preferred_stack_boundary
8180 = incoming_stack_boundary;
8181 }
8182 }
8183 else
8184 {
8185 /* If drap has been set, but it actually isn't live at the
8186 start of the function, there is no reason to set it up. */
8187 if (crtl->drap_reg)
8188 {
8189 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8190 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
8191 REGNO (crtl->drap_reg)))
8192 {
8193 crtl->drap_reg = NULL_RTX;
8194 crtl->need_drap = false;
8195 }
8196 }
8197 else
8198 cfun->machine->no_drap_save_restore = true;
8199
8200 frame_pointer_needed = false;
8201 stack_realign = false;
8202 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
8203 crtl->stack_alignment_needed = incoming_stack_boundary;
8204 crtl->stack_alignment_estimated = incoming_stack_boundary;
8205 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
8206 crtl->preferred_stack_boundary = incoming_stack_boundary;
8207 df_finish_pass (true);
8208 df_scan_alloc (NULL);
8209 df_scan_blocks ();
8210 df_compute_regs_ever_live (true);
8211 df_analyze ();
8212
8213 if (flag_var_tracking)
8214 {
8215 /* Since frame pointer is no longer available, replace it with
8216 stack pointer - UNITS_PER_WORD in debug insns. */
8217 df_ref ref, next;
8218 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
8219 ref; ref = next)
8220 {
8221 next = DF_REF_NEXT_REG (ref);
8222 if (!DF_REF_INSN_INFO (ref))
8223 continue;
8224
8225 /* Make sure the next ref is for a different instruction,
8226 so that we're not affected by the rescan. */
8227 rtx_insn *insn = DF_REF_INSN (ref);
8228 while (next && DF_REF_INSN (next) == insn)
8229 next = DF_REF_NEXT_REG (next);
8230
8231 if (DEBUG_INSN_P (insn))
8232 {
8233 bool changed = false;
8234 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
8235 {
8236 rtx *loc = DF_REF_LOC (ref);
8237 if (*loc == hard_frame_pointer_rtx)
8238 {
8239 *loc = plus_constant (Pmode,
8240 stack_pointer_rtx,
8241 -UNITS_PER_WORD);
8242 changed = true;
8243 }
8244 }
8245 if (changed)
8246 df_insn_rescan (insn);
8247 }
8248 }
8249 }
8250
8251 recompute_frame_layout_p = true;
8252 }
8253 }
8254 else if (crtl->max_used_stack_slot_alignment >= 128
8255 && cfun->machine->stack_frame_required)
8256 {
8257 /* We don't need to realign stack. max_used_stack_alignment is
8258 used to decide how stack frame should be aligned. This is
8259 independent of any psABIs nor 32-bit vs 64-bit. */
8260 cfun->machine->max_used_stack_alignment
8261 = stack_alignment / BITS_PER_UNIT;
8262 }
8263
8264 if (crtl->stack_realign_needed != stack_realign)
8265 recompute_frame_layout_p = true;
8266 crtl->stack_realign_needed = stack_realign;
8267 crtl->stack_realign_finalized = true;
8268 if (recompute_frame_layout_p)
8269 ix86_compute_frame_layout ();
8270 }
8271
8272 /* Delete SET_GOT right after entry block if it is allocated to reg. */
8273
8274 static void
8275 ix86_elim_entry_set_got (rtx reg)
8276 {
8277 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8278 rtx_insn *c_insn = BB_HEAD (bb);
8279 if (!NONDEBUG_INSN_P (c_insn))
8280 c_insn = next_nonnote_nondebug_insn (c_insn);
8281 if (c_insn && NONJUMP_INSN_P (c_insn))
8282 {
8283 rtx pat = PATTERN (c_insn);
8284 if (GET_CODE (pat) == PARALLEL)
8285 {
8286 rtx vec = XVECEXP (pat, 0, 0);
8287 if (GET_CODE (vec) == SET
8288 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
8289 && REGNO (XEXP (vec, 0)) == REGNO (reg))
8290 delete_insn (c_insn);
8291 }
8292 }
8293 }
8294
8295 static rtx
8296 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
8297 {
8298 rtx addr, mem;
8299
8300 if (offset)
8301 addr = plus_constant (Pmode, frame_reg, offset);
8302 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
8303 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
8304 }
8305
8306 static inline rtx
8307 gen_frame_load (rtx reg, rtx frame_reg, int offset)
8308 {
8309 return gen_frame_set (reg, frame_reg, offset, false);
8310 }
8311
8312 static inline rtx
8313 gen_frame_store (rtx reg, rtx frame_reg, int offset)
8314 {
8315 return gen_frame_set (reg, frame_reg, offset, true);
8316 }
8317
8318 static void
8319 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
8320 {
8321 struct machine_function *m = cfun->machine;
8322 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8323 + m->call_ms2sysv_extra_regs;
8324 rtvec v = rtvec_alloc (ncregs + 1);
8325 unsigned int align, i, vi = 0;
8326 rtx_insn *insn;
8327 rtx sym, addr;
8328 rtx rax = gen_rtx_REG (word_mode, AX_REG);
8329 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8330
8331 /* AL should only be live with sysv_abi. */
8332 gcc_assert (!ix86_eax_live_at_start_p ());
8333 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
8334
8335 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
8336 we've actually realigned the stack or not. */
8337 align = GET_MODE_ALIGNMENT (V4SFmode);
8338 addr = choose_baseaddr (frame.stack_realign_offset
8339 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
8340 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8341
8342 emit_insn (gen_rtx_SET (rax, addr));
8343
8344 /* Get the stub symbol. */
8345 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
8346 : XLOGUE_STUB_SAVE);
8347 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8348
8349 for (i = 0; i < ncregs; ++i)
8350 {
8351 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
8352 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
8353 r.regno);
8354 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
8355 }
8356
8357 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
8358
8359 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
8360 RTX_FRAME_RELATED_P (insn) = true;
8361 }
8362
8363 /* Generate and return an insn body to AND X with Y. */
8364
8365 static rtx_insn *
8366 gen_and2_insn (rtx x, rtx y)
8367 {
8368 enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
8369
8370 gcc_assert (insn_operand_matches (icode, 0, x));
8371 gcc_assert (insn_operand_matches (icode, 1, x));
8372 gcc_assert (insn_operand_matches (icode, 2, y));
8373
8374 return GEN_FCN (icode) (x, x, y);
8375 }
8376
8377 /* Expand the prologue into a bunch of separate insns. */
8378
8379 void
8380 ix86_expand_prologue (void)
8381 {
8382 struct machine_function *m = cfun->machine;
8383 rtx insn, t;
8384 HOST_WIDE_INT allocate;
8385 bool int_registers_saved;
8386 bool sse_registers_saved;
8387 bool save_stub_call_needed;
8388 rtx static_chain = NULL_RTX;
8389
8390 ix86_last_zero_store_uid = 0;
8391 if (ix86_function_naked (current_function_decl))
8392 {
8393 if (flag_stack_usage_info)
8394 current_function_static_stack_size = 0;
8395 return;
8396 }
8397
8398 ix86_finalize_stack_frame_flags ();
8399
8400 /* DRAP should not coexist with stack_realign_fp */
8401 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8402
8403 memset (&m->fs, 0, sizeof (m->fs));
8404
8405 /* Initialize CFA state for before the prologue. */
8406 m->fs.cfa_reg = stack_pointer_rtx;
8407 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
8408
8409 /* Track SP offset to the CFA. We continue tracking this after we've
8410 swapped the CFA register away from SP. In the case of re-alignment
8411 this is fudged; we're interested to offsets within the local frame. */
8412 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8413 m->fs.sp_valid = true;
8414 m->fs.sp_realigned = false;
8415
8416 const struct ix86_frame &frame = cfun->machine->frame;
8417
8418 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
8419 {
8420 /* We should have already generated an error for any use of
8421 ms_hook on a nested function. */
8422 gcc_checking_assert (!ix86_static_chain_on_stack);
8423
8424 /* Check if profiling is active and we shall use profiling before
8425 prologue variant. If so sorry. */
8426 if (crtl->profile && flag_fentry != 0)
8427 sorry ("%<ms_hook_prologue%> attribute is not compatible "
8428 "with %<-mfentry%> for 32-bit");
8429
8430 /* In ix86_asm_output_function_label we emitted:
8431 8b ff movl.s %edi,%edi
8432 55 push %ebp
8433 8b ec movl.s %esp,%ebp
8434
8435 This matches the hookable function prologue in Win32 API
8436 functions in Microsoft Windows XP Service Pack 2 and newer.
8437 Wine uses this to enable Windows apps to hook the Win32 API
8438 functions provided by Wine.
8439
8440 What that means is that we've already set up the frame pointer. */
8441
8442 if (frame_pointer_needed
8443 && !(crtl->drap_reg && crtl->stack_realign_needed))
8444 {
8445 rtx push, mov;
8446
8447 /* We've decided to use the frame pointer already set up.
8448 Describe this to the unwinder by pretending that both
8449 push and mov insns happen right here.
8450
8451 Putting the unwind info here at the end of the ms_hook
8452 is done so that we can make absolutely certain we get
8453 the required byte sequence at the start of the function,
8454 rather than relying on an assembler that can produce
8455 the exact encoding required.
8456
8457 However it does mean (in the unpatched case) that we have
8458 a 1 insn window where the asynchronous unwind info is
8459 incorrect. However, if we placed the unwind info at
8460 its correct location we would have incorrect unwind info
8461 in the patched case. Which is probably all moot since
8462 I don't expect Wine generates dwarf2 unwind info for the
8463 system libraries that use this feature. */
8464
8465 insn = emit_insn (gen_blockage ());
8466
8467 push = gen_push (hard_frame_pointer_rtx);
8468 mov = gen_rtx_SET (hard_frame_pointer_rtx,
8469 stack_pointer_rtx);
8470 RTX_FRAME_RELATED_P (push) = 1;
8471 RTX_FRAME_RELATED_P (mov) = 1;
8472
8473 RTX_FRAME_RELATED_P (insn) = 1;
8474 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8475 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
8476
8477 /* Note that gen_push incremented m->fs.cfa_offset, even
8478 though we didn't emit the push insn here. */
8479 m->fs.cfa_reg = hard_frame_pointer_rtx;
8480 m->fs.fp_offset = m->fs.cfa_offset;
8481 m->fs.fp_valid = true;
8482 }
8483 else
8484 {
8485 /* The frame pointer is not needed so pop %ebp again.
8486 This leaves us with a pristine state. */
8487 emit_insn (gen_pop (hard_frame_pointer_rtx));
8488 }
8489 }
8490
8491 /* The first insn of a function that accepts its static chain on the
8492 stack is to push the register that would be filled in by a direct
8493 call. This insn will be skipped by the trampoline. */
8494 else if (ix86_static_chain_on_stack)
8495 {
8496 static_chain = ix86_static_chain (cfun->decl, false);
8497 insn = emit_insn (gen_push (static_chain));
8498 emit_insn (gen_blockage ());
8499
8500 /* We don't want to interpret this push insn as a register save,
8501 only as a stack adjustment. The real copy of the register as
8502 a save will be done later, if needed. */
8503 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8504 t = gen_rtx_SET (stack_pointer_rtx, t);
8505 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8506 RTX_FRAME_RELATED_P (insn) = 1;
8507 }
8508
8509 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8510 of DRAP is needed and stack realignment is really needed after reload */
8511 if (stack_realign_drap)
8512 {
8513 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8514
8515 /* Can't use DRAP in interrupt function. */
8516 if (cfun->machine->func_type != TYPE_NORMAL)
8517 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8518 "in interrupt service routine. This may be worked "
8519 "around by avoiding functions with aggregate return.");
8520
8521 /* Only need to push parameter pointer reg if it is caller saved. */
8522 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8523 {
8524 /* Push arg pointer reg */
8525 insn = emit_insn (gen_push (crtl->drap_reg));
8526 RTX_FRAME_RELATED_P (insn) = 1;
8527 }
8528
8529 /* Grab the argument pointer. */
8530 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
8531 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8532 RTX_FRAME_RELATED_P (insn) = 1;
8533 m->fs.cfa_reg = crtl->drap_reg;
8534 m->fs.cfa_offset = 0;
8535
8536 /* Align the stack. */
8537 insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
8538 GEN_INT (-align_bytes)));
8539 RTX_FRAME_RELATED_P (insn) = 1;
8540
8541 /* Replicate the return address on the stack so that return
8542 address can be reached via (argp - 1) slot. This is needed
8543 to implement macro RETURN_ADDR_RTX and intrinsic function
8544 expand_builtin_return_addr etc. */
8545 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
8546 t = gen_frame_mem (word_mode, t);
8547 insn = emit_insn (gen_push (t));
8548 RTX_FRAME_RELATED_P (insn) = 1;
8549
8550 /* For the purposes of frame and register save area addressing,
8551 we've started over with a new frame. */
8552 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8553 m->fs.realigned = true;
8554
8555 if (static_chain)
8556 {
8557 /* Replicate static chain on the stack so that static chain
8558 can be reached via (argp - 2) slot. This is needed for
8559 nested function with stack realignment. */
8560 insn = emit_insn (gen_push (static_chain));
8561 RTX_FRAME_RELATED_P (insn) = 1;
8562 }
8563 }
8564
8565 int_registers_saved = (frame.nregs == 0);
8566 sse_registers_saved = (frame.nsseregs == 0);
8567 save_stub_call_needed = (m->call_ms2sysv);
8568 gcc_assert (sse_registers_saved || !save_stub_call_needed);
8569
8570 if (frame_pointer_needed && !m->fs.fp_valid)
8571 {
8572 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8573 slower on all targets. Also sdb didn't like it. */
8574 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8575 RTX_FRAME_RELATED_P (insn) = 1;
8576
8577 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
8578 {
8579 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8580 RTX_FRAME_RELATED_P (insn) = 1;
8581
8582 if (m->fs.cfa_reg == stack_pointer_rtx)
8583 m->fs.cfa_reg = hard_frame_pointer_rtx;
8584 m->fs.fp_offset = m->fs.sp_offset;
8585 m->fs.fp_valid = true;
8586 }
8587 }
8588
8589 if (!int_registers_saved)
8590 {
8591 /* If saving registers via PUSH, do so now. */
8592 if (!frame.save_regs_using_mov)
8593 {
8594 ix86_emit_save_regs ();
8595 int_registers_saved = true;
8596 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8597 }
8598
8599 /* When using red zone we may start register saving before allocating
8600 the stack frame saving one cycle of the prologue. However, avoid
8601 doing this if we have to probe the stack; at least on x86_64 the
8602 stack probe can turn into a call that clobbers a red zone location. */
8603 else if (ix86_using_red_zone ()
8604 && (! TARGET_STACK_PROBE
8605 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
8606 {
8607 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8608 cfun->machine->red_zone_used = true;
8609 int_registers_saved = true;
8610 }
8611 }
8612
8613 if (frame.red_zone_size != 0)
8614 cfun->machine->red_zone_used = true;
8615
8616 if (stack_realign_fp)
8617 {
8618 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8619 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8620
8621 /* Record last valid frame pointer offset. */
8622 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
8623
8624 /* The computation of the size of the re-aligned stack frame means
8625 that we must allocate the size of the register save area before
8626 performing the actual alignment. Otherwise we cannot guarantee
8627 that there's enough storage above the realignment point. */
8628 allocate = frame.reg_save_offset - m->fs.sp_offset
8629 + frame.stack_realign_allocate;
8630 if (allocate)
8631 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8632 GEN_INT (-allocate), -1, false);
8633
8634 /* Align the stack. */
8635 emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
8636 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
8637 m->fs.sp_realigned_offset = m->fs.sp_offset
8638 - frame.stack_realign_allocate;
8639 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
8640 Beyond this point, stack access should be done via choose_baseaddr or
8641 by using sp_valid_at and fp_valid_at to determine the correct base
8642 register. Henceforth, any CFA offset should be thought of as logical
8643 and not physical. */
8644 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
8645 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
8646 m->fs.sp_realigned = true;
8647
8648 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
8649 is needed to describe where a register is saved using a realigned
8650 stack pointer, so we need to invalidate the stack pointer for that
8651 target. */
8652 if (TARGET_SEH)
8653 m->fs.sp_valid = false;
8654
8655 /* If SP offset is non-immediate after allocation of the stack frame,
8656 then emit SSE saves or stub call prior to allocating the rest of the
8657 stack frame. This is less efficient for the out-of-line stub because
8658 we can't combine allocations across the call barrier, but it's better
8659 than using a scratch register. */
8660 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
8661 - m->fs.sp_realigned_offset),
8662 Pmode))
8663 {
8664 if (!sse_registers_saved)
8665 {
8666 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8667 sse_registers_saved = true;
8668 }
8669 else if (save_stub_call_needed)
8670 {
8671 ix86_emit_outlined_ms2sysv_save (frame);
8672 save_stub_call_needed = false;
8673 }
8674 }
8675 }
8676
8677 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
8678
8679 if (flag_stack_usage_info)
8680 {
8681 /* We start to count from ARG_POINTER. */
8682 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
8683
8684 /* If it was realigned, take into account the fake frame. */
8685 if (stack_realign_drap)
8686 {
8687 if (ix86_static_chain_on_stack)
8688 stack_size += UNITS_PER_WORD;
8689
8690 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8691 stack_size += UNITS_PER_WORD;
8692
8693 /* This over-estimates by 1 minimal-stack-alignment-unit but
8694 mitigates that by counting in the new return address slot. */
8695 current_function_dynamic_stack_size
8696 += crtl->stack_alignment_needed / BITS_PER_UNIT;
8697 }
8698
8699 current_function_static_stack_size = stack_size;
8700 }
8701
8702 /* On SEH target with very large frame size, allocate an area to save
8703 SSE registers (as the very large allocation won't be described). */
8704 if (TARGET_SEH
8705 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
8706 && !sse_registers_saved)
8707 {
8708 HOST_WIDE_INT sse_size
8709 = frame.sse_reg_save_offset - frame.reg_save_offset;
8710
8711 gcc_assert (int_registers_saved);
8712
8713 /* No need to do stack checking as the area will be immediately
8714 written. */
8715 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8716 GEN_INT (-sse_size), -1,
8717 m->fs.cfa_reg == stack_pointer_rtx);
8718 allocate -= sse_size;
8719 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8720 sse_registers_saved = true;
8721 }
8722
8723 /* If stack clash protection is requested, then probe the stack. */
8724 if (allocate >= 0 && flag_stack_clash_protection)
8725 {
8726 ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
8727 allocate = 0;
8728 }
8729
8730 /* The stack has already been decremented by the instruction calling us
8731 so probe if the size is non-negative to preserve the protection area. */
8732 else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8733 {
8734 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8735
8736 if (STACK_CHECK_MOVING_SP)
8737 {
8738 if (crtl->is_leaf
8739 && !cfun->calls_alloca
8740 && allocate <= probe_interval)
8741 ;
8742
8743 else
8744 {
8745 ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
8746 allocate = 0;
8747 }
8748 }
8749
8750 else
8751 {
8752 HOST_WIDE_INT size = allocate;
8753
8754 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
8755 size = 0x80000000 - get_stack_check_protect () - 1;
8756
8757 if (TARGET_STACK_PROBE)
8758 {
8759 if (crtl->is_leaf && !cfun->calls_alloca)
8760 {
8761 if (size > probe_interval)
8762 ix86_emit_probe_stack_range (0, size, int_registers_saved);
8763 }
8764 else
8765 ix86_emit_probe_stack_range (0,
8766 size + get_stack_check_protect (),
8767 int_registers_saved);
8768 }
8769 else
8770 {
8771 if (crtl->is_leaf && !cfun->calls_alloca)
8772 {
8773 if (size > probe_interval
8774 && size > get_stack_check_protect ())
8775 ix86_emit_probe_stack_range (get_stack_check_protect (),
8776 (size
8777 - get_stack_check_protect ()),
8778 int_registers_saved);
8779 }
8780 else
8781 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
8782 int_registers_saved);
8783 }
8784 }
8785 }
8786
8787 if (allocate == 0)
8788 ;
8789 else if (!ix86_target_stack_probe ()
8790 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
8791 {
8792 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8793 GEN_INT (-allocate), -1,
8794 m->fs.cfa_reg == stack_pointer_rtx);
8795 }
8796 else
8797 {
8798 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8799 rtx r10 = NULL;
8800 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
8801 bool eax_live = ix86_eax_live_at_start_p ();
8802 bool r10_live = false;
8803
8804 if (TARGET_64BIT)
8805 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
8806
8807 if (eax_live)
8808 {
8809 insn = emit_insn (gen_push (eax));
8810 allocate -= UNITS_PER_WORD;
8811 /* Note that SEH directives need to continue tracking the stack
8812 pointer even after the frame pointer has been set up. */
8813 if (sp_is_cfa_reg || TARGET_SEH)
8814 {
8815 if (sp_is_cfa_reg)
8816 m->fs.cfa_offset += UNITS_PER_WORD;
8817 RTX_FRAME_RELATED_P (insn) = 1;
8818 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8819 gen_rtx_SET (stack_pointer_rtx,
8820 plus_constant (Pmode,
8821 stack_pointer_rtx,
8822 -UNITS_PER_WORD)));
8823 }
8824 }
8825
8826 if (r10_live)
8827 {
8828 r10 = gen_rtx_REG (Pmode, R10_REG);
8829 insn = emit_insn (gen_push (r10));
8830 allocate -= UNITS_PER_WORD;
8831 if (sp_is_cfa_reg || TARGET_SEH)
8832 {
8833 if (sp_is_cfa_reg)
8834 m->fs.cfa_offset += UNITS_PER_WORD;
8835 RTX_FRAME_RELATED_P (insn) = 1;
8836 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8837 gen_rtx_SET (stack_pointer_rtx,
8838 plus_constant (Pmode,
8839 stack_pointer_rtx,
8840 -UNITS_PER_WORD)));
8841 }
8842 }
8843
8844 emit_move_insn (eax, GEN_INT (allocate));
8845 emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
8846
8847 /* Use the fact that AX still contains ALLOCATE. */
8848 insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
8849 (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
8850
8851 if (sp_is_cfa_reg || TARGET_SEH)
8852 {
8853 if (sp_is_cfa_reg)
8854 m->fs.cfa_offset += allocate;
8855 RTX_FRAME_RELATED_P (insn) = 1;
8856 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8857 gen_rtx_SET (stack_pointer_rtx,
8858 plus_constant (Pmode, stack_pointer_rtx,
8859 -allocate)));
8860 }
8861 m->fs.sp_offset += allocate;
8862
8863 /* Use stack_pointer_rtx for relative addressing so that code works for
8864 realigned stack. But this means that we need a blockage to prevent
8865 stores based on the frame pointer from being scheduled before. */
8866 if (r10_live && eax_live)
8867 {
8868 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8869 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
8870 gen_frame_mem (word_mode, t));
8871 t = plus_constant (Pmode, t, UNITS_PER_WORD);
8872 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
8873 gen_frame_mem (word_mode, t));
8874 emit_insn (gen_memory_blockage ());
8875 }
8876 else if (eax_live || r10_live)
8877 {
8878 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8879 emit_move_insn (gen_rtx_REG (word_mode,
8880 (eax_live ? AX_REG : R10_REG)),
8881 gen_frame_mem (word_mode, t));
8882 emit_insn (gen_memory_blockage ());
8883 }
8884 }
8885 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
8886
8887 /* If we havn't already set up the frame pointer, do so now. */
8888 if (frame_pointer_needed && !m->fs.fp_valid)
8889 {
8890 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
8891 GEN_INT (frame.stack_pointer_offset
8892 - frame.hard_frame_pointer_offset));
8893 insn = emit_insn (insn);
8894 RTX_FRAME_RELATED_P (insn) = 1;
8895 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
8896
8897 if (m->fs.cfa_reg == stack_pointer_rtx)
8898 m->fs.cfa_reg = hard_frame_pointer_rtx;
8899 m->fs.fp_offset = frame.hard_frame_pointer_offset;
8900 m->fs.fp_valid = true;
8901 }
8902
8903 if (!int_registers_saved)
8904 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8905 if (!sse_registers_saved)
8906 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8907 else if (save_stub_call_needed)
8908 ix86_emit_outlined_ms2sysv_save (frame);
8909
8910 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8911 in PROLOGUE. */
8912 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
8913 {
8914 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
8915 insn = emit_insn (gen_set_got (pic));
8916 RTX_FRAME_RELATED_P (insn) = 1;
8917 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
8918 emit_insn (gen_prologue_use (pic));
8919 /* Deleting already emmitted SET_GOT if exist and allocated to
8920 REAL_PIC_OFFSET_TABLE_REGNUM. */
8921 ix86_elim_entry_set_got (pic);
8922 }
8923
8924 if (crtl->drap_reg && !crtl->stack_realign_needed)
8925 {
8926 /* vDRAP is setup but after reload it turns out stack realign
8927 isn't necessary, here we will emit prologue to setup DRAP
8928 without stack realign adjustment */
8929 t = choose_baseaddr (0, NULL);
8930 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8931 }
8932
8933 /* Prevent instructions from being scheduled into register save push
8934 sequence when access to the redzone area is done through frame pointer.
8935 The offset between the frame pointer and the stack pointer is calculated
8936 relative to the value of the stack pointer at the end of the function
8937 prologue, and moving instructions that access redzone area via frame
8938 pointer inside push sequence violates this assumption. */
8939 if (frame_pointer_needed && frame.red_zone_size)
8940 emit_insn (gen_memory_blockage ());
8941
8942 /* SEH requires that the prologue end within 256 bytes of the start of
8943 the function. Prevent instruction schedules that would extend that.
8944 Further, prevent alloca modifications to the stack pointer from being
8945 combined with prologue modifications. */
8946 if (TARGET_SEH)
8947 emit_insn (gen_prologue_use (stack_pointer_rtx));
8948 }
8949
8950 /* Emit code to restore REG using a POP insn. */
8951
8952 static void
8953 ix86_emit_restore_reg_using_pop (rtx reg)
8954 {
8955 struct machine_function *m = cfun->machine;
8956 rtx_insn *insn = emit_insn (gen_pop (reg));
8957
8958 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
8959 m->fs.sp_offset -= UNITS_PER_WORD;
8960
8961 if (m->fs.cfa_reg == crtl->drap_reg
8962 && REGNO (reg) == REGNO (crtl->drap_reg))
8963 {
8964 /* Previously we'd represented the CFA as an expression
8965 like *(%ebp - 8). We've just popped that value from
8966 the stack, which means we need to reset the CFA to
8967 the drap register. This will remain until we restore
8968 the stack pointer. */
8969 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8970 RTX_FRAME_RELATED_P (insn) = 1;
8971
8972 /* This means that the DRAP register is valid for addressing too. */
8973 m->fs.drap_valid = true;
8974 return;
8975 }
8976
8977 if (m->fs.cfa_reg == stack_pointer_rtx)
8978 {
8979 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8980 x = gen_rtx_SET (stack_pointer_rtx, x);
8981 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
8982 RTX_FRAME_RELATED_P (insn) = 1;
8983
8984 m->fs.cfa_offset -= UNITS_PER_WORD;
8985 }
8986
8987 /* When the frame pointer is the CFA, and we pop it, we are
8988 swapping back to the stack pointer as the CFA. This happens
8989 for stack frames that don't allocate other data, so we assume
8990 the stack pointer is now pointing at the return address, i.e.
8991 the function entry state, which makes the offset be 1 word. */
8992 if (reg == hard_frame_pointer_rtx)
8993 {
8994 m->fs.fp_valid = false;
8995 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8996 {
8997 m->fs.cfa_reg = stack_pointer_rtx;
8998 m->fs.cfa_offset -= UNITS_PER_WORD;
8999
9000 add_reg_note (insn, REG_CFA_DEF_CFA,
9001 plus_constant (Pmode, stack_pointer_rtx,
9002 m->fs.cfa_offset));
9003 RTX_FRAME_RELATED_P (insn) = 1;
9004 }
9005 }
9006 }
9007
9008 /* Emit code to restore saved registers using POP insns. */
9009
9010 static void
9011 ix86_emit_restore_regs_using_pop (void)
9012 {
9013 unsigned int regno;
9014
9015 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9016 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
9017 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
9018 }
9019
9020 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
9021 omits the emit and only attaches the notes. */
9022
9023 static void
9024 ix86_emit_leave (rtx_insn *insn)
9025 {
9026 struct machine_function *m = cfun->machine;
9027
9028 if (!insn)
9029 insn = emit_insn (gen_leave (word_mode));
9030
9031 ix86_add_queued_cfa_restore_notes (insn);
9032
9033 gcc_assert (m->fs.fp_valid);
9034 m->fs.sp_valid = true;
9035 m->fs.sp_realigned = false;
9036 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9037 m->fs.fp_valid = false;
9038
9039 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9040 {
9041 m->fs.cfa_reg = stack_pointer_rtx;
9042 m->fs.cfa_offset = m->fs.sp_offset;
9043
9044 add_reg_note (insn, REG_CFA_DEF_CFA,
9045 plus_constant (Pmode, stack_pointer_rtx,
9046 m->fs.sp_offset));
9047 RTX_FRAME_RELATED_P (insn) = 1;
9048 }
9049 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9050 m->fs.fp_offset);
9051 }
9052
9053 /* Emit code to restore saved registers using MOV insns.
9054 First register is restored from CFA - CFA_OFFSET. */
9055 static void
9056 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9057 bool maybe_eh_return)
9058 {
9059 struct machine_function *m = cfun->machine;
9060 unsigned int regno;
9061
9062 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9063 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
9064 {
9065 rtx reg = gen_rtx_REG (word_mode, regno);
9066 rtx mem;
9067 rtx_insn *insn;
9068
9069 mem = choose_baseaddr (cfa_offset, NULL);
9070 mem = gen_frame_mem (word_mode, mem);
9071 insn = emit_move_insn (reg, mem);
9072
9073 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9074 {
9075 /* Previously we'd represented the CFA as an expression
9076 like *(%ebp - 8). We've just popped that value from
9077 the stack, which means we need to reset the CFA to
9078 the drap register. This will remain until we restore
9079 the stack pointer. */
9080 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9081 RTX_FRAME_RELATED_P (insn) = 1;
9082
9083 /* This means that the DRAP register is valid for addressing. */
9084 m->fs.drap_valid = true;
9085 }
9086 else
9087 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9088
9089 cfa_offset -= UNITS_PER_WORD;
9090 }
9091 }
9092
9093 /* Emit code to restore saved registers using MOV insns.
9094 First register is restored from CFA - CFA_OFFSET. */
9095 static void
9096 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9097 bool maybe_eh_return)
9098 {
9099 unsigned int regno;
9100
9101 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9102 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
9103 {
9104 rtx reg = gen_rtx_REG (V4SFmode, regno);
9105 rtx mem;
9106 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
9107
9108 mem = choose_baseaddr (cfa_offset, &align);
9109 mem = gen_rtx_MEM (V4SFmode, mem);
9110
9111 /* The location aligment depends upon the base register. */
9112 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
9113 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
9114 set_mem_align (mem, align);
9115 emit_insn (gen_rtx_SET (reg, mem));
9116
9117 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9118
9119 cfa_offset -= GET_MODE_SIZE (V4SFmode);
9120 }
9121 }
9122
9123 static void
9124 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
9125 bool use_call, int style)
9126 {
9127 struct machine_function *m = cfun->machine;
9128 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
9129 + m->call_ms2sysv_extra_regs;
9130 rtvec v;
9131 unsigned int elems_needed, align, i, vi = 0;
9132 rtx_insn *insn;
9133 rtx sym, tmp;
9134 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
9135 rtx r10 = NULL_RTX;
9136 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
9137 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
9138 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
9139 rtx rsi_frame_load = NULL_RTX;
9140 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
9141 enum xlogue_stub stub;
9142
9143 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
9144
9145 /* If using a realigned stack, we should never start with padding. */
9146 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
9147
9148 /* Setup RSI as the stub's base pointer. */
9149 align = GET_MODE_ALIGNMENT (V4SFmode);
9150 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
9151 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
9152
9153 emit_insn (gen_rtx_SET (rsi, tmp));
9154
9155 /* Get a symbol for the stub. */
9156 if (frame_pointer_needed)
9157 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
9158 : XLOGUE_STUB_RESTORE_HFP_TAIL;
9159 else
9160 stub = use_call ? XLOGUE_STUB_RESTORE
9161 : XLOGUE_STUB_RESTORE_TAIL;
9162 sym = xlogue.get_stub_rtx (stub);
9163
9164 elems_needed = ncregs;
9165 if (use_call)
9166 elems_needed += 1;
9167 else
9168 elems_needed += frame_pointer_needed ? 5 : 3;
9169 v = rtvec_alloc (elems_needed);
9170
9171 /* We call the epilogue stub when we need to pop incoming args or we are
9172 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
9173 epilogue stub and it is the tail-call. */
9174 if (use_call)
9175 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9176 else
9177 {
9178 RTVEC_ELT (v, vi++) = ret_rtx;
9179 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9180 if (frame_pointer_needed)
9181 {
9182 rtx rbp = gen_rtx_REG (DImode, BP_REG);
9183 gcc_assert (m->fs.fp_valid);
9184 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
9185
9186 tmp = plus_constant (DImode, rbp, 8);
9187 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
9188 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
9189 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
9190 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
9191 }
9192 else
9193 {
9194 /* If no hard frame pointer, we set R10 to the SP restore value. */
9195 gcc_assert (!m->fs.fp_valid);
9196 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9197 gcc_assert (m->fs.sp_valid);
9198
9199 r10 = gen_rtx_REG (DImode, R10_REG);
9200 tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
9201 emit_insn (gen_rtx_SET (r10, tmp));
9202
9203 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
9204 }
9205 }
9206
9207 /* Generate frame load insns and restore notes. */
9208 for (i = 0; i < ncregs; ++i)
9209 {
9210 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
9211 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
9212 rtx reg, frame_load;
9213
9214 reg = gen_rtx_REG (mode, r.regno);
9215 frame_load = gen_frame_load (reg, rsi, r.offset);
9216
9217 /* Save RSI frame load insn & note to add last. */
9218 if (r.regno == SI_REG)
9219 {
9220 gcc_assert (!rsi_frame_load);
9221 rsi_frame_load = frame_load;
9222 rsi_restore_offset = r.offset;
9223 }
9224 else
9225 {
9226 RTVEC_ELT (v, vi++) = frame_load;
9227 ix86_add_cfa_restore_note (NULL, reg, r.offset);
9228 }
9229 }
9230
9231 /* Add RSI frame load & restore note at the end. */
9232 gcc_assert (rsi_frame_load);
9233 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
9234 RTVEC_ELT (v, vi++) = rsi_frame_load;
9235 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
9236 rsi_restore_offset);
9237
9238 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
9239 if (!use_call && !frame_pointer_needed)
9240 {
9241 gcc_assert (m->fs.sp_valid);
9242 gcc_assert (!m->fs.sp_realigned);
9243
9244 /* At this point, R10 should point to frame.stack_realign_offset. */
9245 if (m->fs.cfa_reg == stack_pointer_rtx)
9246 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
9247 m->fs.sp_offset = frame.stack_realign_offset;
9248 }
9249
9250 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
9251 tmp = gen_rtx_PARALLEL (VOIDmode, v);
9252 if (use_call)
9253 insn = emit_insn (tmp);
9254 else
9255 {
9256 insn = emit_jump_insn (tmp);
9257 JUMP_LABEL (insn) = ret_rtx;
9258
9259 if (frame_pointer_needed)
9260 ix86_emit_leave (insn);
9261 else
9262 {
9263 /* Need CFA adjust note. */
9264 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
9265 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
9266 }
9267 }
9268
9269 RTX_FRAME_RELATED_P (insn) = true;
9270 ix86_add_queued_cfa_restore_notes (insn);
9271
9272 /* If we're not doing a tail-call, we need to adjust the stack. */
9273 if (use_call && m->fs.sp_valid)
9274 {
9275 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
9276 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9277 GEN_INT (dealloc), style,
9278 m->fs.cfa_reg == stack_pointer_rtx);
9279 }
9280 }
9281
9282 /* Restore function stack, frame, and registers. */
9283
9284 void
9285 ix86_expand_epilogue (int style)
9286 {
9287 struct machine_function *m = cfun->machine;
9288 struct machine_frame_state frame_state_save = m->fs;
9289 bool restore_regs_via_mov;
9290 bool using_drap;
9291 bool restore_stub_is_tail = false;
9292
9293 if (ix86_function_naked (current_function_decl))
9294 {
9295 /* The program should not reach this point. */
9296 emit_insn (gen_ud2 ());
9297 return;
9298 }
9299
9300 ix86_finalize_stack_frame_flags ();
9301 const struct ix86_frame &frame = cfun->machine->frame;
9302
9303 m->fs.sp_realigned = stack_realign_fp;
9304 m->fs.sp_valid = stack_realign_fp
9305 || !frame_pointer_needed
9306 || crtl->sp_is_unchanging;
9307 gcc_assert (!m->fs.sp_valid
9308 || m->fs.sp_offset == frame.stack_pointer_offset);
9309
9310 /* The FP must be valid if the frame pointer is present. */
9311 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
9312 gcc_assert (!m->fs.fp_valid
9313 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
9314
9315 /* We must have *some* valid pointer to the stack frame. */
9316 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
9317
9318 /* The DRAP is never valid at this point. */
9319 gcc_assert (!m->fs.drap_valid);
9320
9321 /* See the comment about red zone and frame
9322 pointer usage in ix86_expand_prologue. */
9323 if (frame_pointer_needed && frame.red_zone_size)
9324 emit_insn (gen_memory_blockage ());
9325
9326 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9327 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
9328
9329 /* Determine the CFA offset of the end of the red-zone. */
9330 m->fs.red_zone_offset = 0;
9331 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
9332 {
9333 /* The red-zone begins below return address and error code in
9334 exception handler. */
9335 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
9336
9337 /* When the register save area is in the aligned portion of
9338 the stack, determine the maximum runtime displacement that
9339 matches up with the aligned frame. */
9340 if (stack_realign_drap)
9341 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
9342 + UNITS_PER_WORD);
9343 }
9344
9345 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
9346
9347 /* Special care must be taken for the normal return case of a function
9348 using eh_return: the eax and edx registers are marked as saved, but
9349 not restored along this path. Adjust the save location to match. */
9350 if (crtl->calls_eh_return && style != 2)
9351 reg_save_offset -= 2 * UNITS_PER_WORD;
9352
9353 /* EH_RETURN requires the use of moves to function properly. */
9354 if (crtl->calls_eh_return)
9355 restore_regs_via_mov = true;
9356 /* SEH requires the use of pops to identify the epilogue. */
9357 else if (TARGET_SEH)
9358 restore_regs_via_mov = false;
9359 /* If we're only restoring one register and sp cannot be used then
9360 using a move instruction to restore the register since it's
9361 less work than reloading sp and popping the register. */
9362 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
9363 restore_regs_via_mov = true;
9364 else if (TARGET_EPILOGUE_USING_MOVE
9365 && cfun->machine->use_fast_prologue_epilogue
9366 && (frame.nregs > 1
9367 || m->fs.sp_offset != reg_save_offset))
9368 restore_regs_via_mov = true;
9369 else if (frame_pointer_needed
9370 && !frame.nregs
9371 && m->fs.sp_offset != reg_save_offset)
9372 restore_regs_via_mov = true;
9373 else if (frame_pointer_needed
9374 && TARGET_USE_LEAVE
9375 && cfun->machine->use_fast_prologue_epilogue
9376 && frame.nregs == 1)
9377 restore_regs_via_mov = true;
9378 else
9379 restore_regs_via_mov = false;
9380
9381 if (restore_regs_via_mov || frame.nsseregs)
9382 {
9383 /* Ensure that the entire register save area is addressable via
9384 the stack pointer, if we will restore SSE regs via sp. */
9385 if (TARGET_64BIT
9386 && m->fs.sp_offset > 0x7fffffff
9387 && sp_valid_at (frame.stack_realign_offset + 1)
9388 && (frame.nsseregs + frame.nregs) != 0)
9389 {
9390 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9391 GEN_INT (m->fs.sp_offset
9392 - frame.sse_reg_save_offset),
9393 style,
9394 m->fs.cfa_reg == stack_pointer_rtx);
9395 }
9396 }
9397
9398 /* If there are any SSE registers to restore, then we have to do it
9399 via moves, since there's obviously no pop for SSE regs. */
9400 if (frame.nsseregs)
9401 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
9402 style == 2);
9403
9404 if (m->call_ms2sysv)
9405 {
9406 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
9407
9408 /* We cannot use a tail-call for the stub if:
9409 1. We have to pop incoming args,
9410 2. We have additional int regs to restore, or
9411 3. A sibling call will be the tail-call, or
9412 4. We are emitting an eh_return_internal epilogue.
9413
9414 TODO: Item 4 has not yet tested!
9415
9416 If any of the above are true, we will call the stub rather than
9417 jump to it. */
9418 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
9419 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
9420 }
9421
9422 /* If using out-of-line stub that is a tail-call, then...*/
9423 if (m->call_ms2sysv && restore_stub_is_tail)
9424 {
9425 /* TODO: parinoid tests. (remove eventually) */
9426 gcc_assert (m->fs.sp_valid);
9427 gcc_assert (!m->fs.sp_realigned);
9428 gcc_assert (!m->fs.fp_valid);
9429 gcc_assert (!m->fs.realigned);
9430 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
9431 gcc_assert (!crtl->drap_reg);
9432 gcc_assert (!frame.nregs);
9433 }
9434 else if (restore_regs_via_mov)
9435 {
9436 rtx t;
9437
9438 if (frame.nregs)
9439 ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
9440
9441 /* eh_return epilogues need %ecx added to the stack pointer. */
9442 if (style == 2)
9443 {
9444 rtx sa = EH_RETURN_STACKADJ_RTX;
9445 rtx_insn *insn;
9446
9447 /* Stack realignment doesn't work with eh_return. */
9448 if (crtl->stack_realign_needed)
9449 sorry ("Stack realignment not supported with "
9450 "%<__builtin_eh_return%>");
9451
9452 /* regparm nested functions don't work with eh_return. */
9453 if (ix86_static_chain_on_stack)
9454 sorry ("regparm nested function not supported with "
9455 "%<__builtin_eh_return%>");
9456
9457 if (frame_pointer_needed)
9458 {
9459 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9460 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
9461 emit_insn (gen_rtx_SET (sa, t));
9462
9463 /* NB: eh_return epilogues must restore the frame pointer
9464 in word_mode since the upper 32 bits of RBP register
9465 can have any values. */
9466 t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
9467 rtx frame_reg = gen_rtx_REG (word_mode,
9468 HARD_FRAME_POINTER_REGNUM);
9469 insn = emit_move_insn (frame_reg, t);
9470
9471 /* Note that we use SA as a temporary CFA, as the return
9472 address is at the proper place relative to it. We
9473 pretend this happens at the FP restore insn because
9474 prior to this insn the FP would be stored at the wrong
9475 offset relative to SA, and after this insn we have no
9476 other reasonable register to use for the CFA. We don't
9477 bother resetting the CFA to the SP for the duration of
9478 the return insn, unless the control flow instrumentation
9479 is done. In this case the SP is used later and we have
9480 to reset CFA to SP. */
9481 add_reg_note (insn, REG_CFA_DEF_CFA,
9482 plus_constant (Pmode, sa, UNITS_PER_WORD));
9483 ix86_add_queued_cfa_restore_notes (insn);
9484 add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
9485 RTX_FRAME_RELATED_P (insn) = 1;
9486
9487 m->fs.cfa_reg = sa;
9488 m->fs.cfa_offset = UNITS_PER_WORD;
9489 m->fs.fp_valid = false;
9490
9491 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9492 const0_rtx, style,
9493 flag_cf_protection);
9494 }
9495 else
9496 {
9497 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9498 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
9499 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
9500 ix86_add_queued_cfa_restore_notes (insn);
9501
9502 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9503 if (m->fs.cfa_offset != UNITS_PER_WORD)
9504 {
9505 m->fs.cfa_offset = UNITS_PER_WORD;
9506 add_reg_note (insn, REG_CFA_DEF_CFA,
9507 plus_constant (Pmode, stack_pointer_rtx,
9508 UNITS_PER_WORD));
9509 RTX_FRAME_RELATED_P (insn) = 1;
9510 }
9511 }
9512 m->fs.sp_offset = UNITS_PER_WORD;
9513 m->fs.sp_valid = true;
9514 m->fs.sp_realigned = false;
9515 }
9516 }
9517 else
9518 {
9519 /* SEH requires that the function end with (1) a stack adjustment
9520 if necessary, (2) a sequence of pops, and (3) a return or
9521 jump instruction. Prevent insns from the function body from
9522 being scheduled into this sequence. */
9523 if (TARGET_SEH)
9524 {
9525 /* Prevent a catch region from being adjacent to the standard
9526 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
9527 nor several other flags that would be interesting to test are
9528 set up yet. */
9529 if (flag_non_call_exceptions)
9530 emit_insn (gen_nops (const1_rtx));
9531 else
9532 emit_insn (gen_blockage ());
9533 }
9534
9535 /* First step is to deallocate the stack frame so that we can
9536 pop the registers. If the stack pointer was realigned, it needs
9537 to be restored now. Also do it on SEH target for very large
9538 frame as the emitted instructions aren't allowed by the ABI
9539 in epilogues. */
9540 if (!m->fs.sp_valid || m->fs.sp_realigned
9541 || (TARGET_SEH
9542 && (m->fs.sp_offset - reg_save_offset
9543 >= SEH_MAX_FRAME_SIZE)))
9544 {
9545 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
9546 GEN_INT (m->fs.fp_offset
9547 - reg_save_offset),
9548 style, false);
9549 }
9550 else if (m->fs.sp_offset != reg_save_offset)
9551 {
9552 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9553 GEN_INT (m->fs.sp_offset
9554 - reg_save_offset),
9555 style,
9556 m->fs.cfa_reg == stack_pointer_rtx);
9557 }
9558
9559 ix86_emit_restore_regs_using_pop ();
9560 }
9561
9562 /* If we used a stack pointer and haven't already got rid of it,
9563 then do so now. */
9564 if (m->fs.fp_valid)
9565 {
9566 /* If the stack pointer is valid and pointing at the frame
9567 pointer store address, then we only need a pop. */
9568 if (sp_valid_at (frame.hfp_save_offset)
9569 && m->fs.sp_offset == frame.hfp_save_offset)
9570 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9571 /* Leave results in shorter dependency chains on CPUs that are
9572 able to grok it fast. */
9573 else if (TARGET_USE_LEAVE
9574 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
9575 || !cfun->machine->use_fast_prologue_epilogue)
9576 ix86_emit_leave (NULL);
9577 else
9578 {
9579 pro_epilogue_adjust_stack (stack_pointer_rtx,
9580 hard_frame_pointer_rtx,
9581 const0_rtx, style, !using_drap);
9582 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9583 }
9584 }
9585
9586 if (using_drap)
9587 {
9588 int param_ptr_offset = UNITS_PER_WORD;
9589 rtx_insn *insn;
9590
9591 gcc_assert (stack_realign_drap);
9592
9593 if (ix86_static_chain_on_stack)
9594 param_ptr_offset += UNITS_PER_WORD;
9595 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9596 param_ptr_offset += UNITS_PER_WORD;
9597
9598 insn = emit_insn (gen_rtx_SET
9599 (stack_pointer_rtx,
9600 plus_constant (Pmode, crtl->drap_reg,
9601 -param_ptr_offset)));
9602 m->fs.cfa_reg = stack_pointer_rtx;
9603 m->fs.cfa_offset = param_ptr_offset;
9604 m->fs.sp_offset = param_ptr_offset;
9605 m->fs.realigned = false;
9606
9607 add_reg_note (insn, REG_CFA_DEF_CFA,
9608 plus_constant (Pmode, stack_pointer_rtx,
9609 param_ptr_offset));
9610 RTX_FRAME_RELATED_P (insn) = 1;
9611
9612 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9613 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
9614 }
9615
9616 /* At this point the stack pointer must be valid, and we must have
9617 restored all of the registers. We may not have deallocated the
9618 entire stack frame. We've delayed this until now because it may
9619 be possible to merge the local stack deallocation with the
9620 deallocation forced by ix86_static_chain_on_stack. */
9621 gcc_assert (m->fs.sp_valid);
9622 gcc_assert (!m->fs.sp_realigned);
9623 gcc_assert (!m->fs.fp_valid);
9624 gcc_assert (!m->fs.realigned);
9625 if (m->fs.sp_offset != UNITS_PER_WORD)
9626 {
9627 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9628 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
9629 style, true);
9630 }
9631 else
9632 ix86_add_queued_cfa_restore_notes (get_last_insn ());
9633
9634 /* Sibcall epilogues don't want a return instruction. */
9635 if (style == 0)
9636 {
9637 m->fs = frame_state_save;
9638 return;
9639 }
9640
9641 if (cfun->machine->func_type != TYPE_NORMAL)
9642 emit_jump_insn (gen_interrupt_return ());
9643 else if (crtl->args.pops_args && crtl->args.size)
9644 {
9645 rtx popc = GEN_INT (crtl->args.pops_args);
9646
9647 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9648 address, do explicit add, and jump indirectly to the caller. */
9649
9650 if (crtl->args.pops_args >= 65536)
9651 {
9652 rtx ecx = gen_rtx_REG (SImode, CX_REG);
9653 rtx_insn *insn;
9654
9655 /* There is no "pascal" calling convention in any 64bit ABI. */
9656 gcc_assert (!TARGET_64BIT);
9657
9658 insn = emit_insn (gen_pop (ecx));
9659 m->fs.cfa_offset -= UNITS_PER_WORD;
9660 m->fs.sp_offset -= UNITS_PER_WORD;
9661
9662 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9663 x = gen_rtx_SET (stack_pointer_rtx, x);
9664 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9665 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9666 RTX_FRAME_RELATED_P (insn) = 1;
9667
9668 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9669 popc, -1, true);
9670 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9671 }
9672 else
9673 emit_jump_insn (gen_simple_return_pop_internal (popc));
9674 }
9675 else if (!m->call_ms2sysv || !restore_stub_is_tail)
9676 {
9677 /* In case of return from EH a simple return cannot be used
9678 as a return address will be compared with a shadow stack
9679 return address. Use indirect jump instead. */
9680 if (style == 2 && flag_cf_protection)
9681 {
9682 /* Register used in indirect jump must be in word_mode. But
9683 Pmode may not be the same as word_mode for x32. */
9684 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
9685 rtx_insn *insn;
9686
9687 insn = emit_insn (gen_pop (ecx));
9688 m->fs.cfa_offset -= UNITS_PER_WORD;
9689 m->fs.sp_offset -= UNITS_PER_WORD;
9690
9691 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9692 x = gen_rtx_SET (stack_pointer_rtx, x);
9693 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9694 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9695 RTX_FRAME_RELATED_P (insn) = 1;
9696
9697 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9698 }
9699 else
9700 emit_jump_insn (gen_simple_return_internal ());
9701 }
9702
9703 /* Restore the state back to the state from the prologue,
9704 so that it's correct for the next epilogue. */
9705 m->fs = frame_state_save;
9706 }
9707
9708 /* Reset from the function's potential modifications. */
9709
9710 static void
9711 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
9712 {
9713 if (pic_offset_table_rtx
9714 && !ix86_use_pseudo_pic_reg ())
9715 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9716
9717 if (TARGET_MACHO)
9718 {
9719 rtx_insn *insn = get_last_insn ();
9720 rtx_insn *deleted_debug_label = NULL;
9721
9722 /* Mach-O doesn't support labels at the end of objects, so if
9723 it looks like we might want one, take special action.
9724 First, collect any sequence of deleted debug labels. */
9725 while (insn
9726 && NOTE_P (insn)
9727 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9728 {
9729 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9730 notes only, instead set their CODE_LABEL_NUMBER to -1,
9731 otherwise there would be code generation differences
9732 in between -g and -g0. */
9733 if (NOTE_P (insn) && NOTE_KIND (insn)
9734 == NOTE_INSN_DELETED_DEBUG_LABEL)
9735 deleted_debug_label = insn;
9736 insn = PREV_INSN (insn);
9737 }
9738
9739 /* If we have:
9740 label:
9741 barrier
9742 then this needs to be detected, so skip past the barrier. */
9743
9744 if (insn && BARRIER_P (insn))
9745 insn = PREV_INSN (insn);
9746
9747 /* Up to now we've only seen notes or barriers. */
9748 if (insn)
9749 {
9750 if (LABEL_P (insn)
9751 || (NOTE_P (insn)
9752 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
9753 /* Trailing label. */
9754 fputs ("\tnop\n", file);
9755 else if (cfun && ! cfun->is_thunk)
9756 {
9757 /* See if we have a completely empty function body, skipping
9758 the special case of the picbase thunk emitted as asm. */
9759 while (insn && ! INSN_P (insn))
9760 insn = PREV_INSN (insn);
9761 /* If we don't find any insns, we've got an empty function body;
9762 I.e. completely empty - without a return or branch. This is
9763 taken as the case where a function body has been removed
9764 because it contains an inline __builtin_unreachable(). GCC
9765 declares that reaching __builtin_unreachable() means UB so
9766 we're not obliged to do anything special; however, we want
9767 non-zero-sized function bodies. To meet this, and help the
9768 user out, let's trap the case. */
9769 if (insn == NULL)
9770 fputs ("\tud2\n", file);
9771 }
9772 }
9773 else if (deleted_debug_label)
9774 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
9775 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
9776 CODE_LABEL_NUMBER (insn) = -1;
9777 }
9778 }
9779
9780 /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
9781
9782 void
9783 ix86_print_patchable_function_entry (FILE *file,
9784 unsigned HOST_WIDE_INT patch_area_size,
9785 bool record_p)
9786 {
9787 if (cfun->machine->function_label_emitted)
9788 {
9789 /* NB: When ix86_print_patchable_function_entry is called after
9790 function table has been emitted, we have inserted or queued
9791 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
9792 place. There is nothing to do here. */
9793 return;
9794 }
9795
9796 default_print_patchable_function_entry (file, patch_area_size,
9797 record_p);
9798 }
9799
9800 /* Output patchable area. NB: default_print_patchable_function_entry
9801 isn't available in i386.md. */
9802
9803 void
9804 ix86_output_patchable_area (unsigned int patch_area_size,
9805 bool record_p)
9806 {
9807 default_print_patchable_function_entry (asm_out_file,
9808 patch_area_size,
9809 record_p);
9810 }
9811
9812 /* Return a scratch register to use in the split stack prologue. The
9813 split stack prologue is used for -fsplit-stack. It is the first
9814 instructions in the function, even before the regular prologue.
9815 The scratch register can be any caller-saved register which is not
9816 used for parameters or for the static chain. */
9817
9818 static unsigned int
9819 split_stack_prologue_scratch_regno (void)
9820 {
9821 if (TARGET_64BIT)
9822 return R11_REG;
9823 else
9824 {
9825 bool is_fastcall, is_thiscall;
9826 int regparm;
9827
9828 is_fastcall = (lookup_attribute ("fastcall",
9829 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9830 != NULL);
9831 is_thiscall = (lookup_attribute ("thiscall",
9832 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9833 != NULL);
9834 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
9835
9836 if (is_fastcall)
9837 {
9838 if (DECL_STATIC_CHAIN (cfun->decl))
9839 {
9840 sorry ("%<-fsplit-stack%> does not support fastcall with "
9841 "nested function");
9842 return INVALID_REGNUM;
9843 }
9844 return AX_REG;
9845 }
9846 else if (is_thiscall)
9847 {
9848 if (!DECL_STATIC_CHAIN (cfun->decl))
9849 return DX_REG;
9850 return AX_REG;
9851 }
9852 else if (regparm < 3)
9853 {
9854 if (!DECL_STATIC_CHAIN (cfun->decl))
9855 return CX_REG;
9856 else
9857 {
9858 if (regparm >= 2)
9859 {
9860 sorry ("%<-fsplit-stack%> does not support 2 register "
9861 "parameters for a nested function");
9862 return INVALID_REGNUM;
9863 }
9864 return DX_REG;
9865 }
9866 }
9867 else
9868 {
9869 /* FIXME: We could make this work by pushing a register
9870 around the addition and comparison. */
9871 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9872 return INVALID_REGNUM;
9873 }
9874 }
9875 }
9876
9877 /* A SYMBOL_REF for the function which allocates new stackspace for
9878 -fsplit-stack. */
9879
9880 static GTY(()) rtx split_stack_fn;
9881
9882 /* A SYMBOL_REF for the more stack function when using the large
9883 model. */
9884
9885 static GTY(()) rtx split_stack_fn_large;
9886
9887 /* Return location of the stack guard value in the TLS block. */
9888
9889 rtx
9890 ix86_split_stack_guard (void)
9891 {
9892 int offset;
9893 addr_space_t as = DEFAULT_TLS_SEG_REG;
9894 rtx r;
9895
9896 gcc_assert (flag_split_stack);
9897
9898 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9899 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
9900 #else
9901 gcc_unreachable ();
9902 #endif
9903
9904 r = GEN_INT (offset);
9905 r = gen_const_mem (Pmode, r);
9906 set_mem_addr_space (r, as);
9907
9908 return r;
9909 }
9910
9911 /* Handle -fsplit-stack. These are the first instructions in the
9912 function, even before the regular prologue. */
9913
9914 void
9915 ix86_expand_split_stack_prologue (void)
9916 {
9917 HOST_WIDE_INT allocate;
9918 unsigned HOST_WIDE_INT args_size;
9919 rtx_code_label *label;
9920 rtx limit, current, allocate_rtx, call_fusage;
9921 rtx_insn *call_insn;
9922 rtx scratch_reg = NULL_RTX;
9923 rtx_code_label *varargs_label = NULL;
9924 rtx fn;
9925
9926 gcc_assert (flag_split_stack && reload_completed);
9927
9928 ix86_finalize_stack_frame_flags ();
9929 struct ix86_frame &frame = cfun->machine->frame;
9930 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
9931
9932 /* This is the label we will branch to if we have enough stack
9933 space. We expect the basic block reordering pass to reverse this
9934 branch if optimizing, so that we branch in the unlikely case. */
9935 label = gen_label_rtx ();
9936
9937 /* We need to compare the stack pointer minus the frame size with
9938 the stack boundary in the TCB. The stack boundary always gives
9939 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
9940 can compare directly. Otherwise we need to do an addition. */
9941
9942 limit = ix86_split_stack_guard ();
9943
9944 if (allocate < SPLIT_STACK_AVAILABLE)
9945 current = stack_pointer_rtx;
9946 else
9947 {
9948 unsigned int scratch_regno;
9949 rtx offset;
9950
9951 /* We need a scratch register to hold the stack pointer minus
9952 the required frame size. Since this is the very start of the
9953 function, the scratch register can be any caller-saved
9954 register which is not used for parameters. */
9955 offset = GEN_INT (- allocate);
9956 scratch_regno = split_stack_prologue_scratch_regno ();
9957 if (scratch_regno == INVALID_REGNUM)
9958 return;
9959 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9960 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
9961 {
9962 /* We don't use gen_add in this case because it will
9963 want to split to lea, but when not optimizing the insn
9964 will not be split after this point. */
9965 emit_insn (gen_rtx_SET (scratch_reg,
9966 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9967 offset)));
9968 }
9969 else
9970 {
9971 emit_move_insn (scratch_reg, offset);
9972 emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
9973 }
9974 current = scratch_reg;
9975 }
9976
9977 ix86_expand_branch (GEU, current, limit, label);
9978 rtx_insn *jump_insn = get_last_insn ();
9979 JUMP_LABEL (jump_insn) = label;
9980
9981 /* Mark the jump as very likely to be taken. */
9982 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
9983
9984 if (split_stack_fn == NULL_RTX)
9985 {
9986 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
9987 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
9988 }
9989 fn = split_stack_fn;
9990
9991 /* Get more stack space. We pass in the desired stack space and the
9992 size of the arguments to copy to the new stack. In 32-bit mode
9993 we push the parameters; __morestack will return on a new stack
9994 anyhow. In 64-bit mode we pass the parameters in r10 and
9995 r11. */
9996 allocate_rtx = GEN_INT (allocate);
9997 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
9998 call_fusage = NULL_RTX;
9999 rtx pop = NULL_RTX;
10000 if (TARGET_64BIT)
10001 {
10002 rtx reg10, reg11;
10003
10004 reg10 = gen_rtx_REG (Pmode, R10_REG);
10005 reg11 = gen_rtx_REG (Pmode, R11_REG);
10006
10007 /* If this function uses a static chain, it will be in %r10.
10008 Preserve it across the call to __morestack. */
10009 if (DECL_STATIC_CHAIN (cfun->decl))
10010 {
10011 rtx rax;
10012
10013 rax = gen_rtx_REG (word_mode, AX_REG);
10014 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
10015 use_reg (&call_fusage, rax);
10016 }
10017
10018 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10019 && !TARGET_PECOFF)
10020 {
10021 HOST_WIDE_INT argval;
10022
10023 gcc_assert (Pmode == DImode);
10024 /* When using the large model we need to load the address
10025 into a register, and we've run out of registers. So we
10026 switch to a different calling convention, and we call a
10027 different function: __morestack_large. We pass the
10028 argument size in the upper 32 bits of r10 and pass the
10029 frame size in the lower 32 bits. */
10030 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
10031 gcc_assert ((args_size & 0xffffffff) == args_size);
10032
10033 if (split_stack_fn_large == NULL_RTX)
10034 {
10035 split_stack_fn_large
10036 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10037 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
10038 }
10039 if (ix86_cmodel == CM_LARGE_PIC)
10040 {
10041 rtx_code_label *label;
10042 rtx x;
10043
10044 label = gen_label_rtx ();
10045 emit_label (label);
10046 LABEL_PRESERVE_P (label) = 1;
10047 emit_insn (gen_set_rip_rex64 (reg10, label));
10048 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10049 emit_insn (gen_add2_insn (reg10, reg11));
10050 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
10051 UNSPEC_GOT);
10052 x = gen_rtx_CONST (Pmode, x);
10053 emit_move_insn (reg11, x);
10054 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10055 x = gen_const_mem (Pmode, x);
10056 emit_move_insn (reg11, x);
10057 }
10058 else
10059 emit_move_insn (reg11, split_stack_fn_large);
10060
10061 fn = reg11;
10062
10063 argval = ((args_size << 16) << 16) + allocate;
10064 emit_move_insn (reg10, GEN_INT (argval));
10065 }
10066 else
10067 {
10068 emit_move_insn (reg10, allocate_rtx);
10069 emit_move_insn (reg11, GEN_INT (args_size));
10070 use_reg (&call_fusage, reg11);
10071 }
10072
10073 use_reg (&call_fusage, reg10);
10074 }
10075 else
10076 {
10077 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
10078 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
10079 insn = emit_insn (gen_push (allocate_rtx));
10080 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
10081 pop = GEN_INT (2 * UNITS_PER_WORD);
10082 }
10083 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
10084 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10085 pop, false);
10086 add_function_usage_to (call_insn, call_fusage);
10087 if (!TARGET_64BIT)
10088 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
10089 /* Indicate that this function can't jump to non-local gotos. */
10090 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
10091
10092 /* In order to make call/return prediction work right, we now need
10093 to execute a return instruction. See
10094 libgcc/config/i386/morestack.S for the details on how this works.
10095
10096 For flow purposes gcc must not see this as a return
10097 instruction--we need control flow to continue at the subsequent
10098 label. Therefore, we use an unspec. */
10099 gcc_assert (crtl->args.pops_args < 65536);
10100 rtx_insn *ret_insn
10101 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10102
10103 if ((flag_cf_protection & CF_BRANCH))
10104 {
10105 /* Insert ENDBR since __morestack will jump back here via indirect
10106 call. */
10107 rtx cet_eb = gen_nop_endbr ();
10108 emit_insn_after (cet_eb, ret_insn);
10109 }
10110
10111 /* If we are in 64-bit mode and this function uses a static chain,
10112 we saved %r10 in %rax before calling _morestack. */
10113 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10114 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
10115 gen_rtx_REG (word_mode, AX_REG));
10116
10117 /* If this function calls va_start, we need to store a pointer to
10118 the arguments on the old stack, because they may not have been
10119 all copied to the new stack. At this point the old stack can be
10120 found at the frame pointer value used by __morestack, because
10121 __morestack has set that up before calling back to us. Here we
10122 store that pointer in a scratch register, and in
10123 ix86_expand_prologue we store the scratch register in a stack
10124 slot. */
10125 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10126 {
10127 unsigned int scratch_regno;
10128 rtx frame_reg;
10129 int words;
10130
10131 scratch_regno = split_stack_prologue_scratch_regno ();
10132 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10133 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10134
10135 /* 64-bit:
10136 fp -> old fp value
10137 return address within this function
10138 return address of caller of this function
10139 stack arguments
10140 So we add three words to get to the stack arguments.
10141
10142 32-bit:
10143 fp -> old fp value
10144 return address within this function
10145 first argument to __morestack
10146 second argument to __morestack
10147 return address of caller of this function
10148 stack arguments
10149 So we add five words to get to the stack arguments.
10150 */
10151 words = TARGET_64BIT ? 3 : 5;
10152 emit_insn (gen_rtx_SET (scratch_reg,
10153 plus_constant (Pmode, frame_reg,
10154 words * UNITS_PER_WORD)));
10155
10156 varargs_label = gen_label_rtx ();
10157 emit_jump_insn (gen_jump (varargs_label));
10158 JUMP_LABEL (get_last_insn ()) = varargs_label;
10159
10160 emit_barrier ();
10161 }
10162
10163 emit_label (label);
10164 LABEL_NUSES (label) = 1;
10165
10166 /* If this function calls va_start, we now have to set the scratch
10167 register for the case where we do not call __morestack. In this
10168 case we need to set it based on the stack pointer. */
10169 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10170 {
10171 emit_insn (gen_rtx_SET (scratch_reg,
10172 plus_constant (Pmode, stack_pointer_rtx,
10173 UNITS_PER_WORD)));
10174
10175 emit_label (varargs_label);
10176 LABEL_NUSES (varargs_label) = 1;
10177 }
10178 }
10179
10180 /* We may have to tell the dataflow pass that the split stack prologue
10181 is initializing a scratch register. */
10182
10183 static void
10184 ix86_live_on_entry (bitmap regs)
10185 {
10186 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10187 {
10188 gcc_assert (flag_split_stack);
10189 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10190 }
10191 }
10192 \f
10193 /* Extract the parts of an RTL expression that is a valid memory address
10194 for an instruction. Return false if the structure of the address is
10195 grossly off. */
10196
10197 bool
10198 ix86_decompose_address (rtx addr, struct ix86_address *out)
10199 {
10200 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10201 rtx base_reg, index_reg;
10202 HOST_WIDE_INT scale = 1;
10203 rtx scale_rtx = NULL_RTX;
10204 rtx tmp;
10205 addr_space_t seg = ADDR_SPACE_GENERIC;
10206
10207 /* Allow zero-extended SImode addresses,
10208 they will be emitted with addr32 prefix. */
10209 if (TARGET_64BIT && GET_MODE (addr) == DImode)
10210 {
10211 if (GET_CODE (addr) == ZERO_EXTEND
10212 && GET_MODE (XEXP (addr, 0)) == SImode)
10213 {
10214 addr = XEXP (addr, 0);
10215 if (CONST_INT_P (addr))
10216 return false;
10217 }
10218 else if (GET_CODE (addr) == AND
10219 && const_32bit_mask (XEXP (addr, 1), DImode))
10220 {
10221 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
10222 if (addr == NULL_RTX)
10223 return false;
10224
10225 if (CONST_INT_P (addr))
10226 return false;
10227 }
10228 else if (GET_CODE (addr) == AND)
10229 {
10230 /* For ASHIFT inside AND, combine will not generate
10231 canonical zero-extend. Merge mask for AND and shift_count
10232 to check if it is canonical zero-extend. */
10233 tmp = XEXP (addr, 0);
10234 rtx mask = XEXP (addr, 1);
10235 if (tmp && GET_CODE(tmp) == ASHIFT)
10236 {
10237 rtx shift_val = XEXP (tmp, 1);
10238 if (CONST_INT_P (mask) && CONST_INT_P (shift_val)
10239 && (((unsigned HOST_WIDE_INT) INTVAL(mask)
10240 | ((HOST_WIDE_INT_1U << INTVAL(shift_val)) - 1))
10241 == 0xffffffff))
10242 {
10243 addr = lowpart_subreg (SImode, XEXP (addr, 0),
10244 DImode);
10245 }
10246 }
10247
10248 }
10249 }
10250
10251 /* Allow SImode subregs of DImode addresses,
10252 they will be emitted with addr32 prefix. */
10253 if (TARGET_64BIT && GET_MODE (addr) == SImode)
10254 {
10255 if (SUBREG_P (addr)
10256 && GET_MODE (SUBREG_REG (addr)) == DImode)
10257 {
10258 addr = SUBREG_REG (addr);
10259 if (CONST_INT_P (addr))
10260 return false;
10261 }
10262 }
10263
10264 if (REG_P (addr))
10265 base = addr;
10266 else if (SUBREG_P (addr))
10267 {
10268 if (REG_P (SUBREG_REG (addr)))
10269 base = addr;
10270 else
10271 return false;
10272 }
10273 else if (GET_CODE (addr) == PLUS)
10274 {
10275 rtx addends[4], op;
10276 int n = 0, i;
10277
10278 op = addr;
10279 do
10280 {
10281 if (n >= 4)
10282 return false;
10283 addends[n++] = XEXP (op, 1);
10284 op = XEXP (op, 0);
10285 }
10286 while (GET_CODE (op) == PLUS);
10287 if (n >= 4)
10288 return false;
10289 addends[n] = op;
10290
10291 for (i = n; i >= 0; --i)
10292 {
10293 op = addends[i];
10294 switch (GET_CODE (op))
10295 {
10296 case MULT:
10297 if (index)
10298 return false;
10299 index = XEXP (op, 0);
10300 scale_rtx = XEXP (op, 1);
10301 break;
10302
10303 case ASHIFT:
10304 if (index)
10305 return false;
10306 index = XEXP (op, 0);
10307 tmp = XEXP (op, 1);
10308 if (!CONST_INT_P (tmp))
10309 return false;
10310 scale = INTVAL (tmp);
10311 if ((unsigned HOST_WIDE_INT) scale > 3)
10312 return false;
10313 scale = 1 << scale;
10314 break;
10315
10316 case ZERO_EXTEND:
10317 op = XEXP (op, 0);
10318 if (GET_CODE (op) != UNSPEC)
10319 return false;
10320 /* FALLTHRU */
10321
10322 case UNSPEC:
10323 if (XINT (op, 1) == UNSPEC_TP
10324 && TARGET_TLS_DIRECT_SEG_REFS
10325 && seg == ADDR_SPACE_GENERIC)
10326 seg = DEFAULT_TLS_SEG_REG;
10327 else
10328 return false;
10329 break;
10330
10331 case SUBREG:
10332 if (!REG_P (SUBREG_REG (op)))
10333 return false;
10334 /* FALLTHRU */
10335
10336 case REG:
10337 if (!base)
10338 base = op;
10339 else if (!index)
10340 index = op;
10341 else
10342 return false;
10343 break;
10344
10345 case CONST:
10346 case CONST_INT:
10347 case SYMBOL_REF:
10348 case LABEL_REF:
10349 if (disp)
10350 return false;
10351 disp = op;
10352 break;
10353
10354 default:
10355 return false;
10356 }
10357 }
10358 }
10359 else if (GET_CODE (addr) == MULT)
10360 {
10361 index = XEXP (addr, 0); /* index*scale */
10362 scale_rtx = XEXP (addr, 1);
10363 }
10364 else if (GET_CODE (addr) == ASHIFT)
10365 {
10366 /* We're called for lea too, which implements ashift on occasion. */
10367 index = XEXP (addr, 0);
10368 tmp = XEXP (addr, 1);
10369 if (!CONST_INT_P (tmp))
10370 return false;
10371 scale = INTVAL (tmp);
10372 if ((unsigned HOST_WIDE_INT) scale > 3)
10373 return false;
10374 scale = 1 << scale;
10375 }
10376 else
10377 disp = addr; /* displacement */
10378
10379 if (index)
10380 {
10381 if (REG_P (index))
10382 ;
10383 else if (SUBREG_P (index)
10384 && REG_P (SUBREG_REG (index)))
10385 ;
10386 else
10387 return false;
10388 }
10389
10390 /* Extract the integral value of scale. */
10391 if (scale_rtx)
10392 {
10393 if (!CONST_INT_P (scale_rtx))
10394 return false;
10395 scale = INTVAL (scale_rtx);
10396 }
10397
10398 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
10399 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
10400
10401 /* Avoid useless 0 displacement. */
10402 if (disp == const0_rtx && (base || index))
10403 disp = NULL_RTX;
10404
10405 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10406 if (base_reg && index_reg && scale == 1
10407 && (REGNO (index_reg) == ARG_POINTER_REGNUM
10408 || REGNO (index_reg) == FRAME_POINTER_REGNUM
10409 || REGNO (index_reg) == SP_REG))
10410 {
10411 std::swap (base, index);
10412 std::swap (base_reg, index_reg);
10413 }
10414
10415 /* Special case: %ebp cannot be encoded as a base without a displacement.
10416 Similarly %r13. */
10417 if (!disp && base_reg
10418 && (REGNO (base_reg) == ARG_POINTER_REGNUM
10419 || REGNO (base_reg) == FRAME_POINTER_REGNUM
10420 || REGNO (base_reg) == BP_REG
10421 || REGNO (base_reg) == R13_REG))
10422 disp = const0_rtx;
10423
10424 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10425 Avoid this by transforming to [%esi+0].
10426 Reload calls address legitimization without cfun defined, so we need
10427 to test cfun for being non-NULL. */
10428 if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
10429 && base_reg && !index_reg && !disp
10430 && REGNO (base_reg) == SI_REG)
10431 disp = const0_rtx;
10432
10433 /* Special case: encode reg+reg instead of reg*2. */
10434 if (!base && index && scale == 2)
10435 base = index, base_reg = index_reg, scale = 1;
10436
10437 /* Special case: scaling cannot be encoded without base or displacement. */
10438 if (!base && !disp && index && scale != 1)
10439 disp = const0_rtx;
10440
10441 out->base = base;
10442 out->index = index;
10443 out->disp = disp;
10444 out->scale = scale;
10445 out->seg = seg;
10446
10447 return true;
10448 }
10449 \f
10450 /* Return cost of the memory address x.
10451 For i386, it is better to use a complex address than let gcc copy
10452 the address into a reg and make a new pseudo. But not if the address
10453 requires to two regs - that would mean more pseudos with longer
10454 lifetimes. */
10455 static int
10456 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
10457 {
10458 struct ix86_address parts;
10459 int cost = 1;
10460 int ok = ix86_decompose_address (x, &parts);
10461
10462 gcc_assert (ok);
10463
10464 if (parts.base && SUBREG_P (parts.base))
10465 parts.base = SUBREG_REG (parts.base);
10466 if (parts.index && SUBREG_P (parts.index))
10467 parts.index = SUBREG_REG (parts.index);
10468
10469 /* Attempt to minimize number of registers in the address by increasing
10470 address cost for each used register. We don't increase address cost
10471 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
10472 is not invariant itself it most likely means that base or index is not
10473 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
10474 which is not profitable for x86. */
10475 if (parts.base
10476 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10477 && (current_pass->type == GIMPLE_PASS
10478 || !pic_offset_table_rtx
10479 || !REG_P (parts.base)
10480 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
10481 cost++;
10482
10483 if (parts.index
10484 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10485 && (current_pass->type == GIMPLE_PASS
10486 || !pic_offset_table_rtx
10487 || !REG_P (parts.index)
10488 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
10489 cost++;
10490
10491 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10492 since it's predecode logic can't detect the length of instructions
10493 and it degenerates to vector decoded. Increase cost of such
10494 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10495 to split such addresses or even refuse such addresses at all.
10496
10497 Following addressing modes are affected:
10498 [base+scale*index]
10499 [scale*index+disp]
10500 [base+index]
10501
10502 The first and last case may be avoidable by explicitly coding the zero in
10503 memory address, but I don't have AMD-K6 machine handy to check this
10504 theory. */
10505
10506 if (TARGET_CPU_P (K6)
10507 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10508 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10509 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10510 cost += 10;
10511
10512 return cost;
10513 }
10514 \f
10515 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10516 this is used for to form addresses to local data when -fPIC is in
10517 use. */
10518
10519 static bool
10520 darwin_local_data_pic (rtx disp)
10521 {
10522 return (GET_CODE (disp) == UNSPEC
10523 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10524 }
10525
10526 /* True if the function symbol operand X should be loaded from GOT.
10527 If CALL_P is true, X is a call operand.
10528
10529 NB: -mno-direct-extern-access doesn't force load from GOT for
10530 call.
10531
10532 NB: In 32-bit mode, only non-PIC is allowed in inline assembly
10533 statements, since a PIC register could not be available at the
10534 call site. */
10535
10536 bool
10537 ix86_force_load_from_GOT_p (rtx x, bool call_p)
10538 {
10539 return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
10540 && !TARGET_PECOFF && !TARGET_MACHO
10541 && (!flag_pic || this_is_asm_operands)
10542 && ix86_cmodel != CM_LARGE
10543 && ix86_cmodel != CM_LARGE_PIC
10544 && GET_CODE (x) == SYMBOL_REF
10545 && ((!call_p
10546 && (!ix86_direct_extern_access
10547 || (SYMBOL_REF_DECL (x)
10548 && lookup_attribute ("nodirect_extern_access",
10549 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
10550 || (SYMBOL_REF_FUNCTION_P (x)
10551 && (!flag_plt
10552 || (SYMBOL_REF_DECL (x)
10553 && lookup_attribute ("noplt",
10554 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
10555 && !SYMBOL_REF_LOCAL_P (x));
10556 }
10557
10558 /* Determine if a given RTX is a valid constant. We already know this
10559 satisfies CONSTANT_P. */
10560
10561 static bool
10562 ix86_legitimate_constant_p (machine_mode mode, rtx x)
10563 {
10564 switch (GET_CODE (x))
10565 {
10566 case CONST:
10567 x = XEXP (x, 0);
10568
10569 if (GET_CODE (x) == PLUS)
10570 {
10571 if (!CONST_INT_P (XEXP (x, 1)))
10572 return false;
10573 x = XEXP (x, 0);
10574 }
10575
10576 if (TARGET_MACHO && darwin_local_data_pic (x))
10577 return true;
10578
10579 /* Only some unspecs are valid as "constants". */
10580 if (GET_CODE (x) == UNSPEC)
10581 switch (XINT (x, 1))
10582 {
10583 case UNSPEC_GOT:
10584 case UNSPEC_GOTOFF:
10585 case UNSPEC_PLTOFF:
10586 return TARGET_64BIT;
10587 case UNSPEC_TPOFF:
10588 case UNSPEC_NTPOFF:
10589 x = XVECEXP (x, 0, 0);
10590 return (GET_CODE (x) == SYMBOL_REF
10591 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10592 case UNSPEC_DTPOFF:
10593 x = XVECEXP (x, 0, 0);
10594 return (GET_CODE (x) == SYMBOL_REF
10595 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10596 default:
10597 return false;
10598 }
10599
10600 /* We must have drilled down to a symbol. */
10601 if (GET_CODE (x) == LABEL_REF)
10602 return true;
10603 if (GET_CODE (x) != SYMBOL_REF)
10604 return false;
10605 /* FALLTHRU */
10606
10607 case SYMBOL_REF:
10608 /* TLS symbols are never valid. */
10609 if (SYMBOL_REF_TLS_MODEL (x))
10610 return false;
10611
10612 /* DLLIMPORT symbols are never valid. */
10613 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10614 && SYMBOL_REF_DLLIMPORT_P (x))
10615 return false;
10616
10617 #if TARGET_MACHO
10618 /* mdynamic-no-pic */
10619 if (MACHO_DYNAMIC_NO_PIC_P)
10620 return machopic_symbol_defined_p (x);
10621 #endif
10622
10623 /* External function address should be loaded
10624 via the GOT slot to avoid PLT. */
10625 if (ix86_force_load_from_GOT_p (x))
10626 return false;
10627
10628 break;
10629
10630 CASE_CONST_SCALAR_INT:
10631 if (ix86_endbr_immediate_operand (x, VOIDmode))
10632 return false;
10633
10634 switch (mode)
10635 {
10636 case E_TImode:
10637 if (TARGET_64BIT)
10638 return true;
10639 /* FALLTHRU */
10640 case E_OImode:
10641 case E_XImode:
10642 if (!standard_sse_constant_p (x, mode)
10643 && GET_MODE_SIZE (TARGET_AVX512F
10644 ? XImode
10645 : (TARGET_AVX
10646 ? OImode
10647 : (TARGET_SSE2
10648 ? TImode : DImode))) < GET_MODE_SIZE (mode))
10649 return false;
10650 default:
10651 break;
10652 }
10653 break;
10654
10655 case CONST_VECTOR:
10656 if (!standard_sse_constant_p (x, mode))
10657 return false;
10658
10659 default:
10660 break;
10661 }
10662
10663 /* Otherwise we handle everything else in the move patterns. */
10664 return true;
10665 }
10666
10667 /* Determine if it's legal to put X into the constant pool. This
10668 is not possible for the address of thread-local symbols, which
10669 is checked above. */
10670
10671 static bool
10672 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
10673 {
10674 /* We can put any immediate constant in memory. */
10675 switch (GET_CODE (x))
10676 {
10677 CASE_CONST_ANY:
10678 return false;
10679
10680 default:
10681 break;
10682 }
10683
10684 return !ix86_legitimate_constant_p (mode, x);
10685 }
10686
10687 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
10688 otherwise zero. */
10689
10690 static bool
10691 is_imported_p (rtx x)
10692 {
10693 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
10694 || GET_CODE (x) != SYMBOL_REF)
10695 return false;
10696
10697 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
10698 }
10699
10700
10701 /* Nonzero if the constant value X is a legitimate general operand
10702 when generating PIC code. It is given that flag_pic is on and
10703 that X satisfies CONSTANT_P. */
10704
10705 bool
10706 legitimate_pic_operand_p (rtx x)
10707 {
10708 rtx inner;
10709
10710 switch (GET_CODE (x))
10711 {
10712 case CONST:
10713 inner = XEXP (x, 0);
10714 if (GET_CODE (inner) == PLUS
10715 && CONST_INT_P (XEXP (inner, 1)))
10716 inner = XEXP (inner, 0);
10717
10718 /* Only some unspecs are valid as "constants". */
10719 if (GET_CODE (inner) == UNSPEC)
10720 switch (XINT (inner, 1))
10721 {
10722 case UNSPEC_GOT:
10723 case UNSPEC_GOTOFF:
10724 case UNSPEC_PLTOFF:
10725 return TARGET_64BIT;
10726 case UNSPEC_TPOFF:
10727 x = XVECEXP (inner, 0, 0);
10728 return (GET_CODE (x) == SYMBOL_REF
10729 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10730 case UNSPEC_MACHOPIC_OFFSET:
10731 return legitimate_pic_address_disp_p (x);
10732 default:
10733 return false;
10734 }
10735 /* FALLTHRU */
10736
10737 case SYMBOL_REF:
10738 case LABEL_REF:
10739 return legitimate_pic_address_disp_p (x);
10740
10741 default:
10742 return true;
10743 }
10744 }
10745
10746 /* Determine if a given CONST RTX is a valid memory displacement
10747 in PIC mode. */
10748
10749 bool
10750 legitimate_pic_address_disp_p (rtx disp)
10751 {
10752 bool saw_plus;
10753
10754 /* In 64bit mode we can allow direct addresses of symbols and labels
10755 when they are not dynamic symbols. */
10756 if (TARGET_64BIT)
10757 {
10758 rtx op0 = disp, op1;
10759
10760 switch (GET_CODE (disp))
10761 {
10762 case LABEL_REF:
10763 return true;
10764
10765 case CONST:
10766 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10767 break;
10768 op0 = XEXP (XEXP (disp, 0), 0);
10769 op1 = XEXP (XEXP (disp, 0), 1);
10770 if (!CONST_INT_P (op1))
10771 break;
10772 if (GET_CODE (op0) == UNSPEC
10773 && (XINT (op0, 1) == UNSPEC_DTPOFF
10774 || XINT (op0, 1) == UNSPEC_NTPOFF)
10775 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
10776 return true;
10777 if (INTVAL (op1) >= 16*1024*1024
10778 || INTVAL (op1) < -16*1024*1024)
10779 break;
10780 if (GET_CODE (op0) == LABEL_REF)
10781 return true;
10782 if (GET_CODE (op0) == CONST
10783 && GET_CODE (XEXP (op0, 0)) == UNSPEC
10784 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
10785 return true;
10786 if (GET_CODE (op0) == UNSPEC
10787 && XINT (op0, 1) == UNSPEC_PCREL)
10788 return true;
10789 if (GET_CODE (op0) != SYMBOL_REF)
10790 break;
10791 /* FALLTHRU */
10792
10793 case SYMBOL_REF:
10794 /* TLS references should always be enclosed in UNSPEC.
10795 The dllimported symbol needs always to be resolved. */
10796 if (SYMBOL_REF_TLS_MODEL (op0)
10797 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
10798 return false;
10799
10800 if (TARGET_PECOFF)
10801 {
10802 if (is_imported_p (op0))
10803 return true;
10804
10805 if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
10806 break;
10807
10808 /* Non-external-weak function symbols need to be resolved only
10809 for the large model. Non-external symbols don't need to be
10810 resolved for large and medium models. For the small model,
10811 we don't need to resolve anything here. */
10812 if ((ix86_cmodel != CM_LARGE_PIC
10813 && SYMBOL_REF_FUNCTION_P (op0)
10814 && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
10815 || !SYMBOL_REF_EXTERNAL_P (op0)
10816 || ix86_cmodel == CM_SMALL_PIC)
10817 return true;
10818 }
10819 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
10820 && (SYMBOL_REF_LOCAL_P (op0)
10821 || ((ix86_direct_extern_access
10822 && !(SYMBOL_REF_DECL (op0)
10823 && lookup_attribute ("nodirect_extern_access",
10824 DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
10825 && HAVE_LD_PIE_COPYRELOC
10826 && flag_pie
10827 && !SYMBOL_REF_WEAK (op0)
10828 && !SYMBOL_REF_FUNCTION_P (op0)))
10829 && ix86_cmodel != CM_LARGE_PIC)
10830 return true;
10831 break;
10832
10833 default:
10834 break;
10835 }
10836 }
10837 if (GET_CODE (disp) != CONST)
10838 return false;
10839 disp = XEXP (disp, 0);
10840
10841 if (TARGET_64BIT)
10842 {
10843 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10844 of GOT tables. We should not need these anyway. */
10845 if (GET_CODE (disp) != UNSPEC
10846 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10847 && XINT (disp, 1) != UNSPEC_GOTOFF
10848 && XINT (disp, 1) != UNSPEC_PCREL
10849 && XINT (disp, 1) != UNSPEC_PLTOFF))
10850 return false;
10851
10852 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10853 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10854 return false;
10855 return true;
10856 }
10857
10858 saw_plus = false;
10859 if (GET_CODE (disp) == PLUS)
10860 {
10861 if (!CONST_INT_P (XEXP (disp, 1)))
10862 return false;
10863 disp = XEXP (disp, 0);
10864 saw_plus = true;
10865 }
10866
10867 if (TARGET_MACHO && darwin_local_data_pic (disp))
10868 return true;
10869
10870 if (GET_CODE (disp) != UNSPEC)
10871 return false;
10872
10873 switch (XINT (disp, 1))
10874 {
10875 case UNSPEC_GOT:
10876 if (saw_plus)
10877 return false;
10878 /* We need to check for both symbols and labels because VxWorks loads
10879 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10880 details. */
10881 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10882 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10883 case UNSPEC_GOTOFF:
10884 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10885 While ABI specify also 32bit relocation but we don't produce it in
10886 small PIC model at all. */
10887 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10888 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10889 && !TARGET_64BIT)
10890 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10891 return false;
10892 case UNSPEC_GOTTPOFF:
10893 case UNSPEC_GOTNTPOFF:
10894 case UNSPEC_INDNTPOFF:
10895 if (saw_plus)
10896 return false;
10897 disp = XVECEXP (disp, 0, 0);
10898 return (GET_CODE (disp) == SYMBOL_REF
10899 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10900 case UNSPEC_NTPOFF:
10901 disp = XVECEXP (disp, 0, 0);
10902 return (GET_CODE (disp) == SYMBOL_REF
10903 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10904 case UNSPEC_DTPOFF:
10905 disp = XVECEXP (disp, 0, 0);
10906 return (GET_CODE (disp) == SYMBOL_REF
10907 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10908 }
10909
10910 return false;
10911 }
10912
10913 /* Determine if op is suitable RTX for an address register.
10914 Return naked register if a register or a register subreg is
10915 found, otherwise return NULL_RTX. */
10916
10917 static rtx
10918 ix86_validate_address_register (rtx op)
10919 {
10920 machine_mode mode = GET_MODE (op);
10921
10922 /* Only SImode or DImode registers can form the address. */
10923 if (mode != SImode && mode != DImode)
10924 return NULL_RTX;
10925
10926 if (REG_P (op))
10927 return op;
10928 else if (SUBREG_P (op))
10929 {
10930 rtx reg = SUBREG_REG (op);
10931
10932 if (!REG_P (reg))
10933 return NULL_RTX;
10934
10935 mode = GET_MODE (reg);
10936
10937 /* Don't allow SUBREGs that span more than a word. It can
10938 lead to spill failures when the register is one word out
10939 of a two word structure. */
10940 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
10941 return NULL_RTX;
10942
10943 /* Allow only SUBREGs of non-eliminable hard registers. */
10944 if (register_no_elim_operand (reg, mode))
10945 return reg;
10946 }
10947
10948 /* Op is not a register. */
10949 return NULL_RTX;
10950 }
10951
10952 /* Recognizes RTL expressions that are valid memory addresses for an
10953 instruction. The MODE argument is the machine mode for the MEM
10954 expression that wants to use this address.
10955
10956 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10957 convert common non-canonical forms to canonical form so that they will
10958 be recognized. */
10959
10960 static bool
10961 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
10962 {
10963 struct ix86_address parts;
10964 rtx base, index, disp;
10965 HOST_WIDE_INT scale;
10966 addr_space_t seg;
10967
10968 if (ix86_decompose_address (addr, &parts) == 0)
10969 /* Decomposition failed. */
10970 return false;
10971
10972 base = parts.base;
10973 index = parts.index;
10974 disp = parts.disp;
10975 scale = parts.scale;
10976 seg = parts.seg;
10977
10978 /* Validate base register. */
10979 if (base)
10980 {
10981 rtx reg = ix86_validate_address_register (base);
10982
10983 if (reg == NULL_RTX)
10984 return false;
10985
10986 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10987 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10988 /* Base is not valid. */
10989 return false;
10990 }
10991
10992 /* Validate index register. */
10993 if (index)
10994 {
10995 rtx reg = ix86_validate_address_register (index);
10996
10997 if (reg == NULL_RTX)
10998 return false;
10999
11000 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11001 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11002 /* Index is not valid. */
11003 return false;
11004 }
11005
11006 /* Index and base should have the same mode. */
11007 if (base && index
11008 && GET_MODE (base) != GET_MODE (index))
11009 return false;
11010
11011 /* Address override works only on the (%reg) part of %fs:(%reg). */
11012 if (seg != ADDR_SPACE_GENERIC
11013 && ((base && GET_MODE (base) != word_mode)
11014 || (index && GET_MODE (index) != word_mode)))
11015 return false;
11016
11017 /* Validate scale factor. */
11018 if (scale != 1)
11019 {
11020 if (!index)
11021 /* Scale without index. */
11022 return false;
11023
11024 if (scale != 2 && scale != 4 && scale != 8)
11025 /* Scale is not a valid multiplier. */
11026 return false;
11027 }
11028
11029 /* Validate displacement. */
11030 if (disp)
11031 {
11032 if (ix86_endbr_immediate_operand (disp, VOIDmode))
11033 return false;
11034
11035 if (GET_CODE (disp) == CONST
11036 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11037 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11038 switch (XINT (XEXP (disp, 0), 1))
11039 {
11040 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
11041 when used. While ABI specify also 32bit relocations, we
11042 don't produce them at all and use IP relative instead.
11043 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
11044 should be loaded via GOT. */
11045 case UNSPEC_GOT:
11046 if (!TARGET_64BIT
11047 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11048 goto is_legitimate_pic;
11049 /* FALLTHRU */
11050 case UNSPEC_GOTOFF:
11051 gcc_assert (flag_pic);
11052 if (!TARGET_64BIT)
11053 goto is_legitimate_pic;
11054
11055 /* 64bit address unspec. */
11056 return false;
11057
11058 case UNSPEC_GOTPCREL:
11059 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11060 goto is_legitimate_pic;
11061 /* FALLTHRU */
11062 case UNSPEC_PCREL:
11063 gcc_assert (flag_pic);
11064 goto is_legitimate_pic;
11065
11066 case UNSPEC_GOTTPOFF:
11067 case UNSPEC_GOTNTPOFF:
11068 case UNSPEC_INDNTPOFF:
11069 case UNSPEC_NTPOFF:
11070 case UNSPEC_DTPOFF:
11071 break;
11072
11073 default:
11074 /* Invalid address unspec. */
11075 return false;
11076 }
11077
11078 else if (SYMBOLIC_CONST (disp)
11079 && (flag_pic
11080 #if TARGET_MACHO
11081 || (MACHOPIC_INDIRECT
11082 && !machopic_operand_p (disp))
11083 #endif
11084 ))
11085 {
11086
11087 is_legitimate_pic:
11088 if (TARGET_64BIT && (index || base))
11089 {
11090 /* foo@dtpoff(%rX) is ok. */
11091 if (GET_CODE (disp) != CONST
11092 || GET_CODE (XEXP (disp, 0)) != PLUS
11093 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11094 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11095 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11096 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11097 /* Non-constant pic memory reference. */
11098 return false;
11099 }
11100 else if ((!TARGET_MACHO || flag_pic)
11101 && ! legitimate_pic_address_disp_p (disp))
11102 /* Displacement is an invalid pic construct. */
11103 return false;
11104 #if TARGET_MACHO
11105 else if (MACHO_DYNAMIC_NO_PIC_P
11106 && !ix86_legitimate_constant_p (Pmode, disp))
11107 /* displacment must be referenced via non_lazy_pointer */
11108 return false;
11109 #endif
11110
11111 /* This code used to verify that a symbolic pic displacement
11112 includes the pic_offset_table_rtx register.
11113
11114 While this is good idea, unfortunately these constructs may
11115 be created by "adds using lea" optimization for incorrect
11116 code like:
11117
11118 int a;
11119 int foo(int i)
11120 {
11121 return *(&a+i);
11122 }
11123
11124 This code is nonsensical, but results in addressing
11125 GOT table with pic_offset_table_rtx base. We can't
11126 just refuse it easily, since it gets matched by
11127 "addsi3" pattern, that later gets split to lea in the
11128 case output register differs from input. While this
11129 can be handled by separate addsi pattern for this case
11130 that never results in lea, this seems to be easier and
11131 correct fix for crash to disable this test. */
11132 }
11133 else if (GET_CODE (disp) != LABEL_REF
11134 && !CONST_INT_P (disp)
11135 && (GET_CODE (disp) != CONST
11136 || !ix86_legitimate_constant_p (Pmode, disp))
11137 && (GET_CODE (disp) != SYMBOL_REF
11138 || !ix86_legitimate_constant_p (Pmode, disp)))
11139 /* Displacement is not constant. */
11140 return false;
11141 else if (TARGET_64BIT
11142 && !x86_64_immediate_operand (disp, VOIDmode))
11143 /* Displacement is out of range. */
11144 return false;
11145 /* In x32 mode, constant addresses are sign extended to 64bit, so
11146 we have to prevent addresses from 0x80000000 to 0xffffffff. */
11147 else if (TARGET_X32 && !(index || base)
11148 && CONST_INT_P (disp)
11149 && val_signbit_known_set_p (SImode, INTVAL (disp)))
11150 return false;
11151 }
11152
11153 /* Everything looks valid. */
11154 return true;
11155 }
11156
11157 /* Determine if a given RTX is a valid constant address. */
11158
11159 bool
11160 constant_address_p (rtx x)
11161 {
11162 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11163 }
11164 \f
11165 /* Return a unique alias set for the GOT. */
11166
11167 alias_set_type
11168 ix86_GOT_alias_set (void)
11169 {
11170 static alias_set_type set = -1;
11171 if (set == -1)
11172 set = new_alias_set ();
11173 return set;
11174 }
11175
11176 /* Return a legitimate reference for ORIG (an address) using the
11177 register REG. If REG is 0, a new pseudo is generated.
11178
11179 There are two types of references that must be handled:
11180
11181 1. Global data references must load the address from the GOT, via
11182 the PIC reg. An insn is emitted to do this load, and the reg is
11183 returned.
11184
11185 2. Static data references, constant pool addresses, and code labels
11186 compute the address as an offset from the GOT, whose base is in
11187 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11188 differentiate them from global data objects. The returned
11189 address is the PIC reg + an unspec constant.
11190
11191 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11192 reg also appears in the address. */
11193
11194 rtx
11195 legitimize_pic_address (rtx orig, rtx reg)
11196 {
11197 rtx addr = orig;
11198 rtx new_rtx = orig;
11199
11200 #if TARGET_MACHO
11201 if (TARGET_MACHO && !TARGET_64BIT)
11202 {
11203 if (reg == 0)
11204 reg = gen_reg_rtx (Pmode);
11205 /* Use the generic Mach-O PIC machinery. */
11206 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11207 }
11208 #endif
11209
11210 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11211 {
11212 rtx tmp = legitimize_pe_coff_symbol (addr, true);
11213 if (tmp)
11214 return tmp;
11215 }
11216
11217 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11218 new_rtx = addr;
11219 else if ((!TARGET_64BIT
11220 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
11221 && !TARGET_PECOFF
11222 && gotoff_operand (addr, Pmode))
11223 {
11224 /* This symbol may be referenced via a displacement
11225 from the PIC base address (@GOTOFF). */
11226 if (GET_CODE (addr) == CONST)
11227 addr = XEXP (addr, 0);
11228
11229 if (GET_CODE (addr) == PLUS)
11230 {
11231 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11232 UNSPEC_GOTOFF);
11233 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11234 }
11235 else
11236 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11237
11238 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11239
11240 if (TARGET_64BIT)
11241 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11242
11243 if (reg != 0)
11244 {
11245 gcc_assert (REG_P (reg));
11246 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
11247 new_rtx, reg, 1, OPTAB_DIRECT);
11248 }
11249 else
11250 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11251 }
11252 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11253 /* We can't always use @GOTOFF for text labels
11254 on VxWorks, see gotoff_operand. */
11255 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11256 {
11257 rtx tmp = legitimize_pe_coff_symbol (addr, true);
11258 if (tmp)
11259 return tmp;
11260
11261 /* For x64 PE-COFF there is no GOT table,
11262 so we use address directly. */
11263 if (TARGET_64BIT && TARGET_PECOFF)
11264 {
11265 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
11266 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11267 }
11268 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11269 {
11270 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
11271 UNSPEC_GOTPCREL);
11272 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11273 new_rtx = gen_const_mem (Pmode, new_rtx);
11274 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11275 }
11276 else
11277 {
11278 /* This symbol must be referenced via a load
11279 from the Global Offset Table (@GOT). */
11280 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11281 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11282
11283 if (TARGET_64BIT)
11284 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11285
11286 if (reg != 0)
11287 {
11288 gcc_assert (REG_P (reg));
11289 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
11290 new_rtx, reg, 1, OPTAB_DIRECT);
11291 }
11292 else
11293 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11294
11295 new_rtx = gen_const_mem (Pmode, new_rtx);
11296 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11297 }
11298
11299 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11300 }
11301 else
11302 {
11303 if (CONST_INT_P (addr)
11304 && !x86_64_immediate_operand (addr, VOIDmode))
11305 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
11306 else if (GET_CODE (addr) == CONST)
11307 {
11308 addr = XEXP (addr, 0);
11309
11310 /* We must match stuff we generate before. Assume the only
11311 unspecs that can get here are ours. Not that we could do
11312 anything with them anyway.... */
11313 if (GET_CODE (addr) == UNSPEC
11314 || (GET_CODE (addr) == PLUS
11315 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11316 return orig;
11317 gcc_assert (GET_CODE (addr) == PLUS);
11318 }
11319
11320 if (GET_CODE (addr) == PLUS)
11321 {
11322 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11323
11324 /* Check first to see if this is a constant
11325 offset from a @GOTOFF symbol reference. */
11326 if (!TARGET_PECOFF
11327 && gotoff_operand (op0, Pmode)
11328 && CONST_INT_P (op1))
11329 {
11330 if (!TARGET_64BIT)
11331 {
11332 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11333 UNSPEC_GOTOFF);
11334 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11335 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11336
11337 if (reg != 0)
11338 {
11339 gcc_assert (REG_P (reg));
11340 new_rtx = expand_simple_binop (Pmode, PLUS,
11341 pic_offset_table_rtx,
11342 new_rtx, reg, 1,
11343 OPTAB_DIRECT);
11344 }
11345 else
11346 new_rtx
11347 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11348 }
11349 else
11350 {
11351 if (INTVAL (op1) < -16*1024*1024
11352 || INTVAL (op1) >= 16*1024*1024)
11353 {
11354 if (!x86_64_immediate_operand (op1, Pmode))
11355 op1 = force_reg (Pmode, op1);
11356
11357 new_rtx
11358 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11359 }
11360 }
11361 }
11362 else
11363 {
11364 rtx base = legitimize_pic_address (op0, reg);
11365 machine_mode mode = GET_MODE (base);
11366 new_rtx
11367 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
11368
11369 if (CONST_INT_P (new_rtx))
11370 {
11371 if (INTVAL (new_rtx) < -16*1024*1024
11372 || INTVAL (new_rtx) >= 16*1024*1024)
11373 {
11374 if (!x86_64_immediate_operand (new_rtx, mode))
11375 new_rtx = force_reg (mode, new_rtx);
11376
11377 new_rtx
11378 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
11379 }
11380 else
11381 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
11382 }
11383 else
11384 {
11385 /* For %rip addressing, we have to use
11386 just disp32, not base nor index. */
11387 if (TARGET_64BIT
11388 && (GET_CODE (base) == SYMBOL_REF
11389 || GET_CODE (base) == LABEL_REF))
11390 base = force_reg (mode, base);
11391 if (GET_CODE (new_rtx) == PLUS
11392 && CONSTANT_P (XEXP (new_rtx, 1)))
11393 {
11394 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
11395 new_rtx = XEXP (new_rtx, 1);
11396 }
11397 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
11398 }
11399 }
11400 }
11401 }
11402 return new_rtx;
11403 }
11404 \f
11405 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11406
11407 static rtx
11408 get_thread_pointer (machine_mode tp_mode, bool to_reg)
11409 {
11410 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11411
11412 if (GET_MODE (tp) != tp_mode)
11413 {
11414 gcc_assert (GET_MODE (tp) == SImode);
11415 gcc_assert (tp_mode == DImode);
11416
11417 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
11418 }
11419
11420 if (to_reg)
11421 tp = copy_to_mode_reg (tp_mode, tp);
11422
11423 return tp;
11424 }
11425
11426 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11427
11428 static GTY(()) rtx ix86_tls_symbol;
11429
11430 static rtx
11431 ix86_tls_get_addr (void)
11432 {
11433 if (!ix86_tls_symbol)
11434 {
11435 const char *sym
11436 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
11437 ? "___tls_get_addr" : "__tls_get_addr");
11438
11439 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
11440 }
11441
11442 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
11443 {
11444 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
11445 UNSPEC_PLTOFF);
11446 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
11447 gen_rtx_CONST (Pmode, unspec));
11448 }
11449
11450 return ix86_tls_symbol;
11451 }
11452
11453 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
11454
11455 static GTY(()) rtx ix86_tls_module_base_symbol;
11456
11457 rtx
11458 ix86_tls_module_base (void)
11459 {
11460 if (!ix86_tls_module_base_symbol)
11461 {
11462 ix86_tls_module_base_symbol
11463 = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
11464
11465 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
11466 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
11467 }
11468
11469 return ix86_tls_module_base_symbol;
11470 }
11471
11472 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11473 false if we expect this to be used for a memory address and true if
11474 we expect to load the address into a register. */
11475
11476 rtx
11477 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
11478 {
11479 rtx dest, base, off;
11480 rtx pic = NULL_RTX, tp = NULL_RTX;
11481 machine_mode tp_mode = Pmode;
11482 int type;
11483
11484 /* Fall back to global dynamic model if tool chain cannot support local
11485 dynamic. */
11486 if (TARGET_SUN_TLS && !TARGET_64BIT
11487 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
11488 && model == TLS_MODEL_LOCAL_DYNAMIC)
11489 model = TLS_MODEL_GLOBAL_DYNAMIC;
11490
11491 switch (model)
11492 {
11493 case TLS_MODEL_GLOBAL_DYNAMIC:
11494 if (!TARGET_64BIT)
11495 {
11496 if (flag_pic && !TARGET_PECOFF)
11497 pic = pic_offset_table_rtx;
11498 else
11499 {
11500 pic = gen_reg_rtx (Pmode);
11501 emit_insn (gen_set_got (pic));
11502 }
11503 }
11504
11505 if (TARGET_GNU2_TLS)
11506 {
11507 dest = gen_reg_rtx (ptr_mode);
11508 if (TARGET_64BIT)
11509 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
11510 else
11511 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
11512
11513 tp = get_thread_pointer (ptr_mode, true);
11514 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11515 if (GET_MODE (dest) != Pmode)
11516 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11517 dest = force_reg (Pmode, dest);
11518
11519 if (GET_MODE (x) != Pmode)
11520 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11521
11522 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11523 }
11524 else
11525 {
11526 rtx caddr = ix86_tls_get_addr ();
11527
11528 dest = gen_reg_rtx (Pmode);
11529 if (TARGET_64BIT)
11530 {
11531 rtx rax = gen_rtx_REG (Pmode, AX_REG);
11532 rtx_insn *insns;
11533
11534 start_sequence ();
11535 emit_call_insn
11536 (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
11537 insns = get_insns ();
11538 end_sequence ();
11539
11540 if (GET_MODE (x) != Pmode)
11541 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11542
11543 RTL_CONST_CALL_P (insns) = 1;
11544 emit_libcall_block (insns, dest, rax, x);
11545 }
11546 else
11547 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
11548 }
11549 break;
11550
11551 case TLS_MODEL_LOCAL_DYNAMIC:
11552 if (!TARGET_64BIT)
11553 {
11554 if (flag_pic)
11555 pic = pic_offset_table_rtx;
11556 else
11557 {
11558 pic = gen_reg_rtx (Pmode);
11559 emit_insn (gen_set_got (pic));
11560 }
11561 }
11562
11563 if (TARGET_GNU2_TLS)
11564 {
11565 rtx tmp = ix86_tls_module_base ();
11566
11567 base = gen_reg_rtx (ptr_mode);
11568 if (TARGET_64BIT)
11569 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
11570 else
11571 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
11572
11573 tp = get_thread_pointer (ptr_mode, true);
11574 if (GET_MODE (base) != Pmode)
11575 base = gen_rtx_ZERO_EXTEND (Pmode, base);
11576 base = force_reg (Pmode, base);
11577 }
11578 else
11579 {
11580 rtx caddr = ix86_tls_get_addr ();
11581
11582 base = gen_reg_rtx (Pmode);
11583 if (TARGET_64BIT)
11584 {
11585 rtx rax = gen_rtx_REG (Pmode, AX_REG);
11586 rtx_insn *insns;
11587 rtx eqv;
11588
11589 start_sequence ();
11590 emit_call_insn
11591 (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
11592 insns = get_insns ();
11593 end_sequence ();
11594
11595 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
11596 share the LD_BASE result with other LD model accesses. */
11597 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11598 UNSPEC_TLS_LD_BASE);
11599
11600 RTL_CONST_CALL_P (insns) = 1;
11601 emit_libcall_block (insns, base, rax, eqv);
11602 }
11603 else
11604 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
11605 }
11606
11607 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11608 off = gen_rtx_CONST (Pmode, off);
11609
11610 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11611
11612 if (TARGET_GNU2_TLS)
11613 {
11614 if (GET_MODE (tp) != Pmode)
11615 {
11616 dest = lowpart_subreg (ptr_mode, dest, Pmode);
11617 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11618 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11619 }
11620 else
11621 dest = gen_rtx_PLUS (Pmode, tp, dest);
11622 dest = force_reg (Pmode, dest);
11623
11624 if (GET_MODE (x) != Pmode)
11625 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11626
11627 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11628 }
11629 break;
11630
11631 case TLS_MODEL_INITIAL_EXEC:
11632 if (TARGET_64BIT)
11633 {
11634 if (TARGET_SUN_TLS && !TARGET_X32)
11635 {
11636 /* The Sun linker took the AMD64 TLS spec literally
11637 and can only handle %rax as destination of the
11638 initial executable code sequence. */
11639
11640 dest = gen_reg_rtx (DImode);
11641 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
11642 return dest;
11643 }
11644
11645 /* Generate DImode references to avoid %fs:(%reg32)
11646 problems and linker IE->LE relaxation bug. */
11647 tp_mode = DImode;
11648 pic = NULL;
11649 type = UNSPEC_GOTNTPOFF;
11650 }
11651 else if (flag_pic)
11652 {
11653 pic = pic_offset_table_rtx;
11654 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11655 }
11656 else if (!TARGET_ANY_GNU_TLS)
11657 {
11658 pic = gen_reg_rtx (Pmode);
11659 emit_insn (gen_set_got (pic));
11660 type = UNSPEC_GOTTPOFF;
11661 }
11662 else
11663 {
11664 pic = NULL;
11665 type = UNSPEC_INDNTPOFF;
11666 }
11667
11668 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
11669 off = gen_rtx_CONST (tp_mode, off);
11670 if (pic)
11671 off = gen_rtx_PLUS (tp_mode, pic, off);
11672 off = gen_const_mem (tp_mode, off);
11673 set_mem_alias_set (off, ix86_GOT_alias_set ());
11674
11675 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11676 {
11677 base = get_thread_pointer (tp_mode,
11678 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11679 off = force_reg (tp_mode, off);
11680 dest = gen_rtx_PLUS (tp_mode, base, off);
11681 if (tp_mode != Pmode)
11682 dest = convert_to_mode (Pmode, dest, 1);
11683 }
11684 else
11685 {
11686 base = get_thread_pointer (Pmode, true);
11687 dest = gen_reg_rtx (Pmode);
11688 emit_insn (gen_sub3_insn (dest, base, off));
11689 }
11690 break;
11691
11692 case TLS_MODEL_LOCAL_EXEC:
11693 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11694 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11695 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11696 off = gen_rtx_CONST (Pmode, off);
11697
11698 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11699 {
11700 base = get_thread_pointer (Pmode,
11701 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11702 return gen_rtx_PLUS (Pmode, base, off);
11703 }
11704 else
11705 {
11706 base = get_thread_pointer (Pmode, true);
11707 dest = gen_reg_rtx (Pmode);
11708 emit_insn (gen_sub3_insn (dest, base, off));
11709 }
11710 break;
11711
11712 default:
11713 gcc_unreachable ();
11714 }
11715
11716 return dest;
11717 }
11718
11719 /* Return true if the TLS address requires insn using integer registers.
11720 It's used to prevent KMOV/VMOV in TLS code sequences which require integer
11721 MOV instructions, refer to PR103275. */
11722 bool
11723 ix86_gpr_tls_address_pattern_p (rtx mem)
11724 {
11725 gcc_assert (MEM_P (mem));
11726
11727 rtx addr = XEXP (mem, 0);
11728 subrtx_var_iterator::array_type array;
11729 FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
11730 {
11731 rtx op = *iter;
11732 if (GET_CODE (op) == UNSPEC)
11733 switch (XINT (op, 1))
11734 {
11735 case UNSPEC_GOTNTPOFF:
11736 return true;
11737 case UNSPEC_TPOFF:
11738 if (!TARGET_64BIT)
11739 return true;
11740 break;
11741 default:
11742 break;
11743 }
11744 }
11745
11746 return false;
11747 }
11748
11749 /* Return true if OP refers to a TLS address. */
11750 bool
11751 ix86_tls_address_pattern_p (rtx op)
11752 {
11753 subrtx_var_iterator::array_type array;
11754 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
11755 {
11756 rtx op = *iter;
11757 if (MEM_P (op))
11758 {
11759 rtx *x = &XEXP (op, 0);
11760 while (GET_CODE (*x) == PLUS)
11761 {
11762 int i;
11763 for (i = 0; i < 2; i++)
11764 {
11765 rtx u = XEXP (*x, i);
11766 if (GET_CODE (u) == ZERO_EXTEND)
11767 u = XEXP (u, 0);
11768 if (GET_CODE (u) == UNSPEC
11769 && XINT (u, 1) == UNSPEC_TP)
11770 return true;
11771 }
11772 x = &XEXP (*x, 0);
11773 }
11774
11775 iter.skip_subrtxes ();
11776 }
11777 }
11778
11779 return false;
11780 }
11781
11782 /* Rewrite *LOC so that it refers to a default TLS address space. */
11783 void
11784 ix86_rewrite_tls_address_1 (rtx *loc)
11785 {
11786 subrtx_ptr_iterator::array_type array;
11787 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
11788 {
11789 rtx *loc = *iter;
11790 if (MEM_P (*loc))
11791 {
11792 rtx addr = XEXP (*loc, 0);
11793 rtx *x = &addr;
11794 while (GET_CODE (*x) == PLUS)
11795 {
11796 int i;
11797 for (i = 0; i < 2; i++)
11798 {
11799 rtx u = XEXP (*x, i);
11800 if (GET_CODE (u) == ZERO_EXTEND)
11801 u = XEXP (u, 0);
11802 if (GET_CODE (u) == UNSPEC
11803 && XINT (u, 1) == UNSPEC_TP)
11804 {
11805 addr_space_t as = DEFAULT_TLS_SEG_REG;
11806
11807 *x = XEXP (*x, 1 - i);
11808
11809 *loc = replace_equiv_address_nv (*loc, addr, true);
11810 set_mem_addr_space (*loc, as);
11811 return;
11812 }
11813 }
11814 x = &XEXP (*x, 0);
11815 }
11816
11817 iter.skip_subrtxes ();
11818 }
11819 }
11820 }
11821
11822 /* Rewrite instruction pattern involvning TLS address
11823 so that it refers to a default TLS address space. */
11824 rtx
11825 ix86_rewrite_tls_address (rtx pattern)
11826 {
11827 pattern = copy_insn (pattern);
11828 ix86_rewrite_tls_address_1 (&pattern);
11829 return pattern;
11830 }
11831
11832 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11833 to symbol DECL if BEIMPORT is true. Otherwise create or return the
11834 unique refptr-DECL symbol corresponding to symbol DECL. */
11835
11836 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
11837 {
11838 static inline hashval_t hash (tree_map *m) { return m->hash; }
11839 static inline bool
11840 equal (tree_map *a, tree_map *b)
11841 {
11842 return a->base.from == b->base.from;
11843 }
11844
11845 static int
11846 keep_cache_entry (tree_map *&m)
11847 {
11848 return ggc_marked_p (m->base.from);
11849 }
11850 };
11851
11852 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
11853
11854 static tree
11855 get_dllimport_decl (tree decl, bool beimport)
11856 {
11857 struct tree_map *h, in;
11858 const char *name;
11859 const char *prefix;
11860 size_t namelen, prefixlen;
11861 char *imp_name;
11862 tree to;
11863 rtx rtl;
11864
11865 if (!dllimport_map)
11866 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
11867
11868 in.hash = htab_hash_pointer (decl);
11869 in.base.from = decl;
11870 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
11871 h = *loc;
11872 if (h)
11873 return h->to;
11874
11875 *loc = h = ggc_alloc<tree_map> ();
11876 h->hash = in.hash;
11877 h->base.from = decl;
11878 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11879 VAR_DECL, NULL, ptr_type_node);
11880 DECL_ARTIFICIAL (to) = 1;
11881 DECL_IGNORED_P (to) = 1;
11882 DECL_EXTERNAL (to) = 1;
11883 TREE_READONLY (to) = 1;
11884
11885 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11886 name = targetm.strip_name_encoding (name);
11887 if (beimport)
11888 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11889 ? "*__imp_" : "*__imp__";
11890 else
11891 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
11892 namelen = strlen (name);
11893 prefixlen = strlen (prefix);
11894 imp_name = (char *) alloca (namelen + prefixlen + 1);
11895 memcpy (imp_name, prefix, prefixlen);
11896 memcpy (imp_name + prefixlen, name, namelen + 1);
11897
11898 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11899 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11900 SET_SYMBOL_REF_DECL (rtl, to);
11901 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
11902 if (!beimport)
11903 {
11904 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
11905 #ifdef SUB_TARGET_RECORD_STUB
11906 SUB_TARGET_RECORD_STUB (name);
11907 #endif
11908 }
11909
11910 rtl = gen_const_mem (Pmode, rtl);
11911 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11912
11913 SET_DECL_RTL (to, rtl);
11914 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11915
11916 return to;
11917 }
11918
11919 /* Expand SYMBOL into its corresponding far-address symbol.
11920 WANT_REG is true if we require the result be a register. */
11921
11922 static rtx
11923 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
11924 {
11925 tree imp_decl;
11926 rtx x;
11927
11928 gcc_assert (SYMBOL_REF_DECL (symbol));
11929 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
11930
11931 x = DECL_RTL (imp_decl);
11932 if (want_reg)
11933 x = force_reg (Pmode, x);
11934 return x;
11935 }
11936
11937 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11938 true if we require the result be a register. */
11939
11940 static rtx
11941 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11942 {
11943 tree imp_decl;
11944 rtx x;
11945
11946 gcc_assert (SYMBOL_REF_DECL (symbol));
11947 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
11948
11949 x = DECL_RTL (imp_decl);
11950 if (want_reg)
11951 x = force_reg (Pmode, x);
11952 return x;
11953 }
11954
11955 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
11956 is true if we require the result be a register. */
11957
11958 rtx
11959 legitimize_pe_coff_symbol (rtx addr, bool inreg)
11960 {
11961 if (!TARGET_PECOFF)
11962 return NULL_RTX;
11963
11964 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11965 {
11966 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11967 return legitimize_dllimport_symbol (addr, inreg);
11968 if (GET_CODE (addr) == CONST
11969 && GET_CODE (XEXP (addr, 0)) == PLUS
11970 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11971 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11972 {
11973 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
11974 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11975 }
11976 }
11977
11978 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
11979 return NULL_RTX;
11980 if (GET_CODE (addr) == SYMBOL_REF
11981 && !is_imported_p (addr)
11982 && SYMBOL_REF_EXTERNAL_P (addr)
11983 && SYMBOL_REF_DECL (addr))
11984 return legitimize_pe_coff_extern_decl (addr, inreg);
11985
11986 if (GET_CODE (addr) == CONST
11987 && GET_CODE (XEXP (addr, 0)) == PLUS
11988 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11989 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
11990 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
11991 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
11992 {
11993 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
11994 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11995 }
11996 return NULL_RTX;
11997 }
11998
11999 /* Try machine-dependent ways of modifying an illegitimate address
12000 to be legitimate. If we find one, return the new, valid address.
12001 This macro is used in only one place: `memory_address' in explow.cc.
12002
12003 OLDX is the address as it was before break_out_memory_refs was called.
12004 In some cases it is useful to look at this to decide what needs to be done.
12005
12006 It is always safe for this macro to do nothing. It exists to recognize
12007 opportunities to optimize the output.
12008
12009 For the 80386, we handle X+REG by loading X into a register R and
12010 using R+REG. R will go in a general reg and indexing will be used.
12011 However, if REG is a broken-out memory address or multiplication,
12012 nothing needs to be done because REG can certainly go in a general reg.
12013
12014 When -fpic is used, special handling is needed for symbolic references.
12015 See comments by legitimize_pic_address in i386.cc for details. */
12016
12017 static rtx
12018 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
12019 {
12020 bool changed = false;
12021 unsigned log;
12022
12023 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12024 if (log)
12025 return legitimize_tls_address (x, (enum tls_model) log, false);
12026 if (GET_CODE (x) == CONST
12027 && GET_CODE (XEXP (x, 0)) == PLUS
12028 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12029 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12030 {
12031 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12032 (enum tls_model) log, false);
12033 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12034 }
12035
12036 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12037 {
12038 rtx tmp = legitimize_pe_coff_symbol (x, true);
12039 if (tmp)
12040 return tmp;
12041 }
12042
12043 if (flag_pic && SYMBOLIC_CONST (x))
12044 return legitimize_pic_address (x, 0);
12045
12046 #if TARGET_MACHO
12047 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12048 return machopic_indirect_data_reference (x, 0);
12049 #endif
12050
12051 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12052 if (GET_CODE (x) == ASHIFT
12053 && CONST_INT_P (XEXP (x, 1))
12054 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12055 {
12056 changed = true;
12057 log = INTVAL (XEXP (x, 1));
12058 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12059 GEN_INT (1 << log));
12060 }
12061
12062 if (GET_CODE (x) == PLUS)
12063 {
12064 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12065
12066 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12067 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12068 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12069 {
12070 changed = true;
12071 log = INTVAL (XEXP (XEXP (x, 0), 1));
12072 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12073 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12074 GEN_INT (1 << log));
12075 }
12076
12077 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12078 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12079 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12080 {
12081 changed = true;
12082 log = INTVAL (XEXP (XEXP (x, 1), 1));
12083 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12084 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12085 GEN_INT (1 << log));
12086 }
12087
12088 /* Put multiply first if it isn't already. */
12089 if (GET_CODE (XEXP (x, 1)) == MULT)
12090 {
12091 std::swap (XEXP (x, 0), XEXP (x, 1));
12092 changed = true;
12093 }
12094
12095 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12096 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12097 created by virtual register instantiation, register elimination, and
12098 similar optimizations. */
12099 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12100 {
12101 changed = true;
12102 x = gen_rtx_PLUS (Pmode,
12103 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12104 XEXP (XEXP (x, 1), 0)),
12105 XEXP (XEXP (x, 1), 1));
12106 }
12107
12108 /* Canonicalize
12109 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12110 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12111 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12112 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12113 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12114 && CONSTANT_P (XEXP (x, 1)))
12115 {
12116 rtx constant;
12117 rtx other = NULL_RTX;
12118
12119 if (CONST_INT_P (XEXP (x, 1)))
12120 {
12121 constant = XEXP (x, 1);
12122 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12123 }
12124 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12125 {
12126 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12127 other = XEXP (x, 1);
12128 }
12129 else
12130 constant = 0;
12131
12132 if (constant)
12133 {
12134 changed = true;
12135 x = gen_rtx_PLUS (Pmode,
12136 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12137 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12138 plus_constant (Pmode, other,
12139 INTVAL (constant)));
12140 }
12141 }
12142
12143 if (changed && ix86_legitimate_address_p (mode, x, false))
12144 return x;
12145
12146 if (GET_CODE (XEXP (x, 0)) == MULT)
12147 {
12148 changed = true;
12149 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
12150 }
12151
12152 if (GET_CODE (XEXP (x, 1)) == MULT)
12153 {
12154 changed = true;
12155 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
12156 }
12157
12158 if (changed
12159 && REG_P (XEXP (x, 1))
12160 && REG_P (XEXP (x, 0)))
12161 return x;
12162
12163 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12164 {
12165 changed = true;
12166 x = legitimize_pic_address (x, 0);
12167 }
12168
12169 if (changed && ix86_legitimate_address_p (mode, x, false))
12170 return x;
12171
12172 if (REG_P (XEXP (x, 0)))
12173 {
12174 rtx temp = gen_reg_rtx (Pmode);
12175 rtx val = force_operand (XEXP (x, 1), temp);
12176 if (val != temp)
12177 {
12178 val = convert_to_mode (Pmode, val, 1);
12179 emit_move_insn (temp, val);
12180 }
12181
12182 XEXP (x, 1) = temp;
12183 return x;
12184 }
12185
12186 else if (REG_P (XEXP (x, 1)))
12187 {
12188 rtx temp = gen_reg_rtx (Pmode);
12189 rtx val = force_operand (XEXP (x, 0), temp);
12190 if (val != temp)
12191 {
12192 val = convert_to_mode (Pmode, val, 1);
12193 emit_move_insn (temp, val);
12194 }
12195
12196 XEXP (x, 0) = temp;
12197 return x;
12198 }
12199 }
12200
12201 return x;
12202 }
12203 \f
12204 /* Print an integer constant expression in assembler syntax. Addition
12205 and subtraction are the only arithmetic that may appear in these
12206 expressions. FILE is the stdio stream to write to, X is the rtx, and
12207 CODE is the operand print code from the output string. */
12208
12209 static void
12210 output_pic_addr_const (FILE *file, rtx x, int code)
12211 {
12212 char buf[256];
12213
12214 switch (GET_CODE (x))
12215 {
12216 case PC:
12217 gcc_assert (flag_pic);
12218 putc ('.', file);
12219 break;
12220
12221 case SYMBOL_REF:
12222 if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
12223 output_addr_const (file, x);
12224 else
12225 {
12226 const char *name = XSTR (x, 0);
12227
12228 /* Mark the decl as referenced so that cgraph will
12229 output the function. */
12230 if (SYMBOL_REF_DECL (x))
12231 mark_decl_referenced (SYMBOL_REF_DECL (x));
12232
12233 #if TARGET_MACHO
12234 if (MACHOPIC_INDIRECT
12235 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12236 name = machopic_indirection_name (x, /*stub_p=*/true);
12237 #endif
12238 assemble_name (file, name);
12239 }
12240 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
12241 && code == 'P' && ix86_call_use_plt_p (x))
12242 fputs ("@PLT", file);
12243 break;
12244
12245 case LABEL_REF:
12246 x = XEXP (x, 0);
12247 /* FALLTHRU */
12248 case CODE_LABEL:
12249 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12250 assemble_name (asm_out_file, buf);
12251 break;
12252
12253 case CONST_INT:
12254 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12255 break;
12256
12257 case CONST:
12258 /* This used to output parentheses around the expression,
12259 but that does not work on the 386 (either ATT or BSD assembler). */
12260 output_pic_addr_const (file, XEXP (x, 0), code);
12261 break;
12262
12263 case CONST_DOUBLE:
12264 /* We can't handle floating point constants;
12265 TARGET_PRINT_OPERAND must handle them. */
12266 output_operand_lossage ("floating constant misused");
12267 break;
12268
12269 case PLUS:
12270 /* Some assemblers need integer constants to appear first. */
12271 if (CONST_INT_P (XEXP (x, 0)))
12272 {
12273 output_pic_addr_const (file, XEXP (x, 0), code);
12274 putc ('+', file);
12275 output_pic_addr_const (file, XEXP (x, 1), code);
12276 }
12277 else
12278 {
12279 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12280 output_pic_addr_const (file, XEXP (x, 1), code);
12281 putc ('+', file);
12282 output_pic_addr_const (file, XEXP (x, 0), code);
12283 }
12284 break;
12285
12286 case MINUS:
12287 if (!TARGET_MACHO)
12288 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12289 output_pic_addr_const (file, XEXP (x, 0), code);
12290 putc ('-', file);
12291 output_pic_addr_const (file, XEXP (x, 1), code);
12292 if (!TARGET_MACHO)
12293 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12294 break;
12295
12296 case UNSPEC:
12297 gcc_assert (XVECLEN (x, 0) == 1);
12298 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12299 switch (XINT (x, 1))
12300 {
12301 case UNSPEC_GOT:
12302 fputs ("@GOT", file);
12303 break;
12304 case UNSPEC_GOTOFF:
12305 fputs ("@GOTOFF", file);
12306 break;
12307 case UNSPEC_PLTOFF:
12308 fputs ("@PLTOFF", file);
12309 break;
12310 case UNSPEC_PCREL:
12311 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12312 "(%rip)" : "[rip]", file);
12313 break;
12314 case UNSPEC_GOTPCREL:
12315 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12316 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12317 break;
12318 case UNSPEC_GOTTPOFF:
12319 /* FIXME: This might be @TPOFF in Sun ld too. */
12320 fputs ("@gottpoff", file);
12321 break;
12322 case UNSPEC_TPOFF:
12323 fputs ("@tpoff", file);
12324 break;
12325 case UNSPEC_NTPOFF:
12326 if (TARGET_64BIT)
12327 fputs ("@tpoff", file);
12328 else
12329 fputs ("@ntpoff", file);
12330 break;
12331 case UNSPEC_DTPOFF:
12332 fputs ("@dtpoff", file);
12333 break;
12334 case UNSPEC_GOTNTPOFF:
12335 if (TARGET_64BIT)
12336 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12337 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12338 else
12339 fputs ("@gotntpoff", file);
12340 break;
12341 case UNSPEC_INDNTPOFF:
12342 fputs ("@indntpoff", file);
12343 break;
12344 #if TARGET_MACHO
12345 case UNSPEC_MACHOPIC_OFFSET:
12346 putc ('-', file);
12347 machopic_output_function_base_name (file);
12348 break;
12349 #endif
12350 default:
12351 output_operand_lossage ("invalid UNSPEC as operand");
12352 break;
12353 }
12354 break;
12355
12356 default:
12357 output_operand_lossage ("invalid expression as operand");
12358 }
12359 }
12360
12361 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12362 We need to emit DTP-relative relocations. */
12363
12364 static void ATTRIBUTE_UNUSED
12365 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12366 {
12367 fputs (ASM_LONG, file);
12368 output_addr_const (file, x);
12369 fputs ("@dtpoff", file);
12370 switch (size)
12371 {
12372 case 4:
12373 break;
12374 case 8:
12375 fputs (", 0", file);
12376 break;
12377 default:
12378 gcc_unreachable ();
12379 }
12380 }
12381
12382 /* Return true if X is a representation of the PIC register. This copes
12383 with calls from ix86_find_base_term, where the register might have
12384 been replaced by a cselib value. */
12385
12386 static bool
12387 ix86_pic_register_p (rtx x)
12388 {
12389 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12390 return (pic_offset_table_rtx
12391 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12392 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
12393 return true;
12394 else if (!REG_P (x))
12395 return false;
12396 else if (pic_offset_table_rtx)
12397 {
12398 if (REGNO (x) == REGNO (pic_offset_table_rtx))
12399 return true;
12400 if (HARD_REGISTER_P (x)
12401 && !HARD_REGISTER_P (pic_offset_table_rtx)
12402 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
12403 return true;
12404 return false;
12405 }
12406 else
12407 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12408 }
12409
12410 /* Helper function for ix86_delegitimize_address.
12411 Attempt to delegitimize TLS local-exec accesses. */
12412
12413 static rtx
12414 ix86_delegitimize_tls_address (rtx orig_x)
12415 {
12416 rtx x = orig_x, unspec;
12417 struct ix86_address addr;
12418
12419 if (!TARGET_TLS_DIRECT_SEG_REFS)
12420 return orig_x;
12421 if (MEM_P (x))
12422 x = XEXP (x, 0);
12423 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12424 return orig_x;
12425 if (ix86_decompose_address (x, &addr) == 0
12426 || addr.seg != DEFAULT_TLS_SEG_REG
12427 || addr.disp == NULL_RTX
12428 || GET_CODE (addr.disp) != CONST)
12429 return orig_x;
12430 unspec = XEXP (addr.disp, 0);
12431 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12432 unspec = XEXP (unspec, 0);
12433 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12434 return orig_x;
12435 x = XVECEXP (unspec, 0, 0);
12436 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12437 if (unspec != XEXP (addr.disp, 0))
12438 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12439 if (addr.index)
12440 {
12441 rtx idx = addr.index;
12442 if (addr.scale != 1)
12443 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12444 x = gen_rtx_PLUS (Pmode, idx, x);
12445 }
12446 if (addr.base)
12447 x = gen_rtx_PLUS (Pmode, addr.base, x);
12448 if (MEM_P (orig_x))
12449 x = replace_equiv_address_nv (orig_x, x);
12450 return x;
12451 }
12452
12453 /* In the name of slightly smaller debug output, and to cater to
12454 general assembler lossage, recognize PIC+GOTOFF and turn it back
12455 into a direct symbol reference.
12456
12457 On Darwin, this is necessary to avoid a crash, because Darwin
12458 has a different PIC label for each routine but the DWARF debugging
12459 information is not associated with any particular routine, so it's
12460 necessary to remove references to the PIC label from RTL stored by
12461 the DWARF output code.
12462
12463 This helper is used in the normal ix86_delegitimize_address
12464 entrypoint (e.g. used in the target delegitimization hook) and
12465 in ix86_find_base_term. As compile time memory optimization, we
12466 avoid allocating rtxes that will not change anything on the outcome
12467 of the callers (find_base_value and find_base_term). */
12468
12469 static inline rtx
12470 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
12471 {
12472 rtx orig_x = delegitimize_mem_from_attrs (x);
12473 /* addend is NULL or some rtx if x is something+GOTOFF where
12474 something doesn't include the PIC register. */
12475 rtx addend = NULL_RTX;
12476 /* reg_addend is NULL or a multiple of some register. */
12477 rtx reg_addend = NULL_RTX;
12478 /* const_addend is NULL or a const_int. */
12479 rtx const_addend = NULL_RTX;
12480 /* This is the result, or NULL. */
12481 rtx result = NULL_RTX;
12482
12483 x = orig_x;
12484
12485 if (MEM_P (x))
12486 x = XEXP (x, 0);
12487
12488 if (TARGET_64BIT)
12489 {
12490 if (GET_CODE (x) == CONST
12491 && GET_CODE (XEXP (x, 0)) == PLUS
12492 && GET_MODE (XEXP (x, 0)) == Pmode
12493 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12494 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
12495 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
12496 {
12497 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
12498 base. A CONST can't be arg_pointer_rtx based. */
12499 if (base_term_p && MEM_P (orig_x))
12500 return orig_x;
12501 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
12502 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
12503 if (MEM_P (orig_x))
12504 x = replace_equiv_address_nv (orig_x, x);
12505 return x;
12506 }
12507
12508 if (GET_CODE (x) == CONST
12509 && GET_CODE (XEXP (x, 0)) == UNSPEC
12510 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
12511 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
12512 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
12513 {
12514 x = XVECEXP (XEXP (x, 0), 0, 0);
12515 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
12516 {
12517 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
12518 if (x == NULL_RTX)
12519 return orig_x;
12520 }
12521 return x;
12522 }
12523
12524 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
12525 return ix86_delegitimize_tls_address (orig_x);
12526
12527 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
12528 and -mcmodel=medium -fpic. */
12529 }
12530
12531 if (GET_CODE (x) != PLUS
12532 || GET_CODE (XEXP (x, 1)) != CONST)
12533 return ix86_delegitimize_tls_address (orig_x);
12534
12535 if (ix86_pic_register_p (XEXP (x, 0)))
12536 /* %ebx + GOT/GOTOFF */
12537 ;
12538 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12539 {
12540 /* %ebx + %reg * scale + GOT/GOTOFF */
12541 reg_addend = XEXP (x, 0);
12542 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12543 reg_addend = XEXP (reg_addend, 1);
12544 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12545 reg_addend = XEXP (reg_addend, 0);
12546 else
12547 {
12548 reg_addend = NULL_RTX;
12549 addend = XEXP (x, 0);
12550 }
12551 }
12552 else
12553 addend = XEXP (x, 0);
12554
12555 x = XEXP (XEXP (x, 1), 0);
12556 if (GET_CODE (x) == PLUS
12557 && CONST_INT_P (XEXP (x, 1)))
12558 {
12559 const_addend = XEXP (x, 1);
12560 x = XEXP (x, 0);
12561 }
12562
12563 if (GET_CODE (x) == UNSPEC
12564 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12565 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
12566 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
12567 && !MEM_P (orig_x) && !addend)))
12568 result = XVECEXP (x, 0, 0);
12569
12570 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
12571 && !MEM_P (orig_x))
12572 result = XVECEXP (x, 0, 0);
12573
12574 if (! result)
12575 return ix86_delegitimize_tls_address (orig_x);
12576
12577 /* For (PLUS something CONST_INT) both find_base_{value,term} just
12578 recurse on the first operand. */
12579 if (const_addend && !base_term_p)
12580 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12581 if (reg_addend)
12582 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12583 if (addend)
12584 {
12585 /* If the rest of original X doesn't involve the PIC register, add
12586 addend and subtract pic_offset_table_rtx. This can happen e.g.
12587 for code like:
12588 leal (%ebx, %ecx, 4), %ecx
12589 ...
12590 movl foo@GOTOFF(%ecx), %edx
12591 in which case we return (%ecx - %ebx) + foo
12592 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
12593 and reload has completed. Don't do the latter for debug,
12594 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
12595 if (pic_offset_table_rtx
12596 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
12597 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12598 pic_offset_table_rtx),
12599 result);
12600 else if (base_term_p
12601 && pic_offset_table_rtx
12602 && !TARGET_MACHO
12603 && !TARGET_VXWORKS_RTP)
12604 {
12605 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
12606 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
12607 result = gen_rtx_PLUS (Pmode, tmp, result);
12608 }
12609 else
12610 return orig_x;
12611 }
12612 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12613 {
12614 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
12615 if (result == NULL_RTX)
12616 return orig_x;
12617 }
12618 return result;
12619 }
12620
12621 /* The normal instantiation of the above template. */
12622
12623 static rtx
12624 ix86_delegitimize_address (rtx x)
12625 {
12626 return ix86_delegitimize_address_1 (x, false);
12627 }
12628
12629 /* If X is a machine specific address (i.e. a symbol or label being
12630 referenced as a displacement from the GOT implemented using an
12631 UNSPEC), then return the base term. Otherwise return X. */
12632
12633 rtx
12634 ix86_find_base_term (rtx x)
12635 {
12636 rtx term;
12637
12638 if (TARGET_64BIT)
12639 {
12640 if (GET_CODE (x) != CONST)
12641 return x;
12642 term = XEXP (x, 0);
12643 if (GET_CODE (term) == PLUS
12644 && CONST_INT_P (XEXP (term, 1)))
12645 term = XEXP (term, 0);
12646 if (GET_CODE (term) != UNSPEC
12647 || (XINT (term, 1) != UNSPEC_GOTPCREL
12648 && XINT (term, 1) != UNSPEC_PCREL))
12649 return x;
12650
12651 return XVECEXP (term, 0, 0);
12652 }
12653
12654 return ix86_delegitimize_address_1 (x, true);
12655 }
12656
12657 /* Return true if X shouldn't be emitted into the debug info.
12658 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
12659 symbol easily into the .debug_info section, so we need not to
12660 delegitimize, but instead assemble as @gotoff.
12661 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
12662 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
12663
12664 static bool
12665 ix86_const_not_ok_for_debug_p (rtx x)
12666 {
12667 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
12668 return true;
12669
12670 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
12671 return true;
12672
12673 return false;
12674 }
12675 \f
12676 static void
12677 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
12678 bool fp, FILE *file)
12679 {
12680 const char *suffix;
12681
12682 if (mode == CCFPmode)
12683 {
12684 code = ix86_fp_compare_code_to_integer (code);
12685 mode = CCmode;
12686 }
12687 if (reverse)
12688 code = reverse_condition (code);
12689
12690 switch (code)
12691 {
12692 case EQ:
12693 gcc_assert (mode != CCGZmode);
12694 switch (mode)
12695 {
12696 case E_CCAmode:
12697 suffix = "a";
12698 break;
12699 case E_CCCmode:
12700 suffix = "c";
12701 break;
12702 case E_CCOmode:
12703 suffix = "o";
12704 break;
12705 case E_CCPmode:
12706 suffix = "p";
12707 break;
12708 case E_CCSmode:
12709 suffix = "s";
12710 break;
12711 default:
12712 suffix = "e";
12713 break;
12714 }
12715 break;
12716 case NE:
12717 gcc_assert (mode != CCGZmode);
12718 switch (mode)
12719 {
12720 case E_CCAmode:
12721 suffix = "na";
12722 break;
12723 case E_CCCmode:
12724 suffix = "nc";
12725 break;
12726 case E_CCOmode:
12727 suffix = "no";
12728 break;
12729 case E_CCPmode:
12730 suffix = "np";
12731 break;
12732 case E_CCSmode:
12733 suffix = "ns";
12734 break;
12735 default:
12736 suffix = "ne";
12737 break;
12738 }
12739 break;
12740 case GT:
12741 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12742 suffix = "g";
12743 break;
12744 case GTU:
12745 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12746 Those same assemblers have the same but opposite lossage on cmov. */
12747 if (mode == CCmode)
12748 suffix = fp ? "nbe" : "a";
12749 else
12750 gcc_unreachable ();
12751 break;
12752 case LT:
12753 switch (mode)
12754 {
12755 case E_CCNOmode:
12756 case E_CCGOCmode:
12757 suffix = "s";
12758 break;
12759
12760 case E_CCmode:
12761 case E_CCGCmode:
12762 case E_CCGZmode:
12763 suffix = "l";
12764 break;
12765
12766 default:
12767 gcc_unreachable ();
12768 }
12769 break;
12770 case LTU:
12771 if (mode == CCmode || mode == CCGZmode)
12772 suffix = "b";
12773 else if (mode == CCCmode)
12774 suffix = fp ? "b" : "c";
12775 else
12776 gcc_unreachable ();
12777 break;
12778 case GE:
12779 switch (mode)
12780 {
12781 case E_CCNOmode:
12782 case E_CCGOCmode:
12783 suffix = "ns";
12784 break;
12785
12786 case E_CCmode:
12787 case E_CCGCmode:
12788 case E_CCGZmode:
12789 suffix = "ge";
12790 break;
12791
12792 default:
12793 gcc_unreachable ();
12794 }
12795 break;
12796 case GEU:
12797 if (mode == CCmode || mode == CCGZmode)
12798 suffix = "nb";
12799 else if (mode == CCCmode)
12800 suffix = fp ? "nb" : "nc";
12801 else
12802 gcc_unreachable ();
12803 break;
12804 case LE:
12805 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12806 suffix = "le";
12807 break;
12808 case LEU:
12809 if (mode == CCmode)
12810 suffix = "be";
12811 else
12812 gcc_unreachable ();
12813 break;
12814 case UNORDERED:
12815 suffix = fp ? "u" : "p";
12816 break;
12817 case ORDERED:
12818 suffix = fp ? "nu" : "np";
12819 break;
12820 default:
12821 gcc_unreachable ();
12822 }
12823 fputs (suffix, file);
12824 }
12825
12826 /* Print the name of register X to FILE based on its machine mode and number.
12827 If CODE is 'w', pretend the mode is HImode.
12828 If CODE is 'b', pretend the mode is QImode.
12829 If CODE is 'k', pretend the mode is SImode.
12830 If CODE is 'q', pretend the mode is DImode.
12831 If CODE is 'x', pretend the mode is V4SFmode.
12832 If CODE is 't', pretend the mode is V8SFmode.
12833 If CODE is 'g', pretend the mode is V16SFmode.
12834 If CODE is 'h', pretend the reg is the 'high' byte register.
12835 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12836 If CODE is 'd', duplicate the operand for AVX instruction.
12837 If CODE is 'V', print naked full integer register name without %.
12838 */
12839
12840 void
12841 print_reg (rtx x, int code, FILE *file)
12842 {
12843 const char *reg;
12844 int msize;
12845 unsigned int regno;
12846 bool duplicated;
12847
12848 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
12849 putc ('%', file);
12850
12851 if (x == pc_rtx)
12852 {
12853 gcc_assert (TARGET_64BIT);
12854 fputs ("rip", file);
12855 return;
12856 }
12857
12858 if (code == 'y' && STACK_TOP_P (x))
12859 {
12860 fputs ("st(0)", file);
12861 return;
12862 }
12863
12864 if (code == 'w')
12865 msize = 2;
12866 else if (code == 'b')
12867 msize = 1;
12868 else if (code == 'k')
12869 msize = 4;
12870 else if (code == 'q')
12871 msize = 8;
12872 else if (code == 'h')
12873 msize = 0;
12874 else if (code == 'x')
12875 msize = 16;
12876 else if (code == 't')
12877 msize = 32;
12878 else if (code == 'g')
12879 msize = 64;
12880 else
12881 msize = GET_MODE_SIZE (GET_MODE (x));
12882
12883 regno = REGNO (x);
12884
12885 if (regno == ARG_POINTER_REGNUM
12886 || regno == FRAME_POINTER_REGNUM
12887 || regno == FPSR_REG)
12888 {
12889 output_operand_lossage
12890 ("invalid use of register '%s'", reg_names[regno]);
12891 return;
12892 }
12893 else if (regno == FLAGS_REG)
12894 {
12895 output_operand_lossage ("invalid use of asm flag output");
12896 return;
12897 }
12898
12899 if (code == 'V')
12900 {
12901 if (GENERAL_REGNO_P (regno))
12902 msize = GET_MODE_SIZE (word_mode);
12903 else
12904 error ("%<V%> modifier on non-integer register");
12905 }
12906
12907 duplicated = code == 'd' && TARGET_AVX;
12908
12909 switch (msize)
12910 {
12911 case 16:
12912 case 12:
12913 case 8:
12914 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
12915 warning (0, "unsupported size for integer register");
12916 /* FALLTHRU */
12917 case 4:
12918 if (LEGACY_INT_REGNO_P (regno))
12919 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
12920 /* FALLTHRU */
12921 case 2:
12922 normal:
12923 reg = hi_reg_name[regno];
12924 break;
12925 case 1:
12926 if (regno >= ARRAY_SIZE (qi_reg_name))
12927 goto normal;
12928 if (!ANY_QI_REGNO_P (regno))
12929 error ("unsupported size for integer register");
12930 reg = qi_reg_name[regno];
12931 break;
12932 case 0:
12933 if (regno >= ARRAY_SIZE (qi_high_reg_name))
12934 goto normal;
12935 reg = qi_high_reg_name[regno];
12936 break;
12937 case 32:
12938 case 64:
12939 if (SSE_REGNO_P (regno))
12940 {
12941 gcc_assert (!duplicated);
12942 putc (msize == 32 ? 'y' : 'z', file);
12943 reg = hi_reg_name[regno] + 1;
12944 break;
12945 }
12946 goto normal;
12947 default:
12948 gcc_unreachable ();
12949 }
12950
12951 fputs (reg, file);
12952
12953 /* Irritatingly, AMD extended registers use
12954 different naming convention: "r%d[bwd]" */
12955 if (REX_INT_REGNO_P (regno))
12956 {
12957 gcc_assert (TARGET_64BIT);
12958 switch (msize)
12959 {
12960 case 0:
12961 error ("extended registers have no high halves");
12962 break;
12963 case 1:
12964 putc ('b', file);
12965 break;
12966 case 2:
12967 putc ('w', file);
12968 break;
12969 case 4:
12970 putc ('d', file);
12971 break;
12972 case 8:
12973 /* no suffix */
12974 break;
12975 default:
12976 error ("unsupported operand size for extended register");
12977 break;
12978 }
12979 return;
12980 }
12981
12982 if (duplicated)
12983 {
12984 if (ASSEMBLER_DIALECT == ASM_ATT)
12985 fprintf (file, ", %%%s", reg);
12986 else
12987 fprintf (file, ", %s", reg);
12988 }
12989 }
12990
12991 /* Meaning of CODE:
12992 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12993 C -- print opcode suffix for set/cmov insn.
12994 c -- like C, but print reversed condition
12995 F,f -- likewise, but for floating-point.
12996 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12997 otherwise nothing
12998 R -- print embedded rounding and sae.
12999 r -- print only sae.
13000 z -- print the opcode suffix for the size of the current operand.
13001 Z -- likewise, with special suffixes for x87 instructions.
13002 * -- print a star (in certain assembler syntax)
13003 A -- print an absolute memory reference.
13004 E -- print address with DImode register names if TARGET_64BIT.
13005 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13006 s -- print a shift double count, followed by the assemblers argument
13007 delimiter.
13008 b -- print the QImode name of the register for the indicated operand.
13009 %b0 would print %al if operands[0] is reg 0.
13010 w -- likewise, print the HImode name of the register.
13011 k -- likewise, print the SImode name of the register.
13012 q -- likewise, print the DImode name of the register.
13013 x -- likewise, print the V4SFmode name of the register.
13014 t -- likewise, print the V8SFmode name of the register.
13015 g -- likewise, print the V16SFmode name of the register.
13016 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13017 y -- print "st(0)" instead of "st" as a register.
13018 d -- print duplicated register operand for AVX instruction.
13019 D -- print condition for SSE cmp instruction.
13020 P -- if PIC, print an @PLT suffix. For -fno-plt, load function
13021 address from GOT.
13022 p -- print raw symbol name.
13023 X -- don't print any sort of PIC '@' suffix for a symbol.
13024 & -- print some in-use local-dynamic symbol name.
13025 H -- print a memory address offset by 8; used for sse high-parts
13026 Y -- print condition for XOP pcom* instruction.
13027 V -- print naked full integer register name without %.
13028 + -- print a branch hint as 'cs' or 'ds' prefix
13029 ; -- print a semicolon (after prefixes due to bug in older gas).
13030 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13031 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13032 M -- print addr32 prefix for TARGET_X32 with VSIB address.
13033 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
13034 N -- print maskz if it's constant 0 operand.
13035 */
13036
13037 void
13038 ix86_print_operand (FILE *file, rtx x, int code)
13039 {
13040 if (code)
13041 {
13042 switch (code)
13043 {
13044 case 'A':
13045 switch (ASSEMBLER_DIALECT)
13046 {
13047 case ASM_ATT:
13048 putc ('*', file);
13049 break;
13050
13051 case ASM_INTEL:
13052 /* Intel syntax. For absolute addresses, registers should not
13053 be surrounded by braces. */
13054 if (!REG_P (x))
13055 {
13056 putc ('[', file);
13057 ix86_print_operand (file, x, 0);
13058 putc (']', file);
13059 return;
13060 }
13061 break;
13062
13063 default:
13064 gcc_unreachable ();
13065 }
13066
13067 ix86_print_operand (file, x, 0);
13068 return;
13069
13070 case 'E':
13071 /* Wrap address in an UNSPEC to declare special handling. */
13072 if (TARGET_64BIT)
13073 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
13074
13075 output_address (VOIDmode, x);
13076 return;
13077
13078 case 'L':
13079 if (ASSEMBLER_DIALECT == ASM_ATT)
13080 putc ('l', file);
13081 return;
13082
13083 case 'W':
13084 if (ASSEMBLER_DIALECT == ASM_ATT)
13085 putc ('w', file);
13086 return;
13087
13088 case 'B':
13089 if (ASSEMBLER_DIALECT == ASM_ATT)
13090 putc ('b', file);
13091 return;
13092
13093 case 'Q':
13094 if (ASSEMBLER_DIALECT == ASM_ATT)
13095 putc ('l', file);
13096 return;
13097
13098 case 'S':
13099 if (ASSEMBLER_DIALECT == ASM_ATT)
13100 putc ('s', file);
13101 return;
13102
13103 case 'T':
13104 if (ASSEMBLER_DIALECT == ASM_ATT)
13105 putc ('t', file);
13106 return;
13107
13108 case 'O':
13109 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13110 if (ASSEMBLER_DIALECT != ASM_ATT)
13111 return;
13112
13113 switch (GET_MODE_SIZE (GET_MODE (x)))
13114 {
13115 case 2:
13116 putc ('w', file);
13117 break;
13118
13119 case 4:
13120 putc ('l', file);
13121 break;
13122
13123 case 8:
13124 putc ('q', file);
13125 break;
13126
13127 default:
13128 output_operand_lossage ("invalid operand size for operand "
13129 "code 'O'");
13130 return;
13131 }
13132
13133 putc ('.', file);
13134 #endif
13135 return;
13136
13137 case 'z':
13138 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13139 {
13140 /* Opcodes don't get size suffixes if using Intel opcodes. */
13141 if (ASSEMBLER_DIALECT == ASM_INTEL)
13142 return;
13143
13144 switch (GET_MODE_SIZE (GET_MODE (x)))
13145 {
13146 case 1:
13147 putc ('b', file);
13148 return;
13149
13150 case 2:
13151 putc ('w', file);
13152 return;
13153
13154 case 4:
13155 putc ('l', file);
13156 return;
13157
13158 case 8:
13159 putc ('q', file);
13160 return;
13161
13162 default:
13163 output_operand_lossage ("invalid operand size for operand "
13164 "code 'z'");
13165 return;
13166 }
13167 }
13168
13169 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13170 warning (0, "non-integer operand used with operand code %<z%>");
13171 /* FALLTHRU */
13172
13173 case 'Z':
13174 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13175 if (ASSEMBLER_DIALECT == ASM_INTEL)
13176 return;
13177
13178 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13179 {
13180 switch (GET_MODE_SIZE (GET_MODE (x)))
13181 {
13182 case 2:
13183 #ifdef HAVE_AS_IX86_FILDS
13184 putc ('s', file);
13185 #endif
13186 return;
13187
13188 case 4:
13189 putc ('l', file);
13190 return;
13191
13192 case 8:
13193 #ifdef HAVE_AS_IX86_FILDQ
13194 putc ('q', file);
13195 #else
13196 fputs ("ll", file);
13197 #endif
13198 return;
13199
13200 default:
13201 break;
13202 }
13203 }
13204 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13205 {
13206 /* 387 opcodes don't get size suffixes
13207 if the operands are registers. */
13208 if (STACK_REG_P (x))
13209 return;
13210
13211 switch (GET_MODE_SIZE (GET_MODE (x)))
13212 {
13213 case 4:
13214 putc ('s', file);
13215 return;
13216
13217 case 8:
13218 putc ('l', file);
13219 return;
13220
13221 case 12:
13222 case 16:
13223 putc ('t', file);
13224 return;
13225
13226 default:
13227 break;
13228 }
13229 }
13230 else
13231 {
13232 output_operand_lossage ("invalid operand type used with "
13233 "operand code 'Z'");
13234 return;
13235 }
13236
13237 output_operand_lossage ("invalid operand size for operand code 'Z'");
13238 return;
13239
13240 case 'd':
13241 case 'b':
13242 case 'w':
13243 case 'k':
13244 case 'q':
13245 case 'h':
13246 case 't':
13247 case 'g':
13248 case 'y':
13249 case 'x':
13250 case 'X':
13251 case 'P':
13252 case 'p':
13253 case 'V':
13254 break;
13255
13256 case 's':
13257 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13258 {
13259 ix86_print_operand (file, x, 0);
13260 fputs (", ", file);
13261 }
13262 return;
13263
13264 case 'Y':
13265 switch (GET_CODE (x))
13266 {
13267 case NE:
13268 fputs ("neq", file);
13269 break;
13270 case EQ:
13271 fputs ("eq", file);
13272 break;
13273 case GE:
13274 case GEU:
13275 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13276 break;
13277 case GT:
13278 case GTU:
13279 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13280 break;
13281 case LE:
13282 case LEU:
13283 fputs ("le", file);
13284 break;
13285 case LT:
13286 case LTU:
13287 fputs ("lt", file);
13288 break;
13289 case UNORDERED:
13290 fputs ("unord", file);
13291 break;
13292 case ORDERED:
13293 fputs ("ord", file);
13294 break;
13295 case UNEQ:
13296 fputs ("ueq", file);
13297 break;
13298 case UNGE:
13299 fputs ("nlt", file);
13300 break;
13301 case UNGT:
13302 fputs ("nle", file);
13303 break;
13304 case UNLE:
13305 fputs ("ule", file);
13306 break;
13307 case UNLT:
13308 fputs ("ult", file);
13309 break;
13310 case LTGT:
13311 fputs ("une", file);
13312 break;
13313 default:
13314 output_operand_lossage ("operand is not a condition code, "
13315 "invalid operand code 'Y'");
13316 return;
13317 }
13318 return;
13319
13320 case 'D':
13321 /* Little bit of braindamage here. The SSE compare instructions
13322 does use completely different names for the comparisons that the
13323 fp conditional moves. */
13324 switch (GET_CODE (x))
13325 {
13326 case UNEQ:
13327 if (TARGET_AVX)
13328 {
13329 fputs ("eq_us", file);
13330 break;
13331 }
13332 /* FALLTHRU */
13333 case EQ:
13334 fputs ("eq", file);
13335 break;
13336 case UNLT:
13337 if (TARGET_AVX)
13338 {
13339 fputs ("nge", file);
13340 break;
13341 }
13342 /* FALLTHRU */
13343 case LT:
13344 fputs ("lt", file);
13345 break;
13346 case UNLE:
13347 if (TARGET_AVX)
13348 {
13349 fputs ("ngt", file);
13350 break;
13351 }
13352 /* FALLTHRU */
13353 case LE:
13354 fputs ("le", file);
13355 break;
13356 case UNORDERED:
13357 fputs ("unord", file);
13358 break;
13359 case LTGT:
13360 if (TARGET_AVX)
13361 {
13362 fputs ("neq_oq", file);
13363 break;
13364 }
13365 /* FALLTHRU */
13366 case NE:
13367 fputs ("neq", file);
13368 break;
13369 case GE:
13370 if (TARGET_AVX)
13371 {
13372 fputs ("ge", file);
13373 break;
13374 }
13375 /* FALLTHRU */
13376 case UNGE:
13377 fputs ("nlt", file);
13378 break;
13379 case GT:
13380 if (TARGET_AVX)
13381 {
13382 fputs ("gt", file);
13383 break;
13384 }
13385 /* FALLTHRU */
13386 case UNGT:
13387 fputs ("nle", file);
13388 break;
13389 case ORDERED:
13390 fputs ("ord", file);
13391 break;
13392 default:
13393 output_operand_lossage ("operand is not a condition code, "
13394 "invalid operand code 'D'");
13395 return;
13396 }
13397 return;
13398
13399 case 'F':
13400 case 'f':
13401 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13402 if (ASSEMBLER_DIALECT == ASM_ATT)
13403 putc ('.', file);
13404 gcc_fallthrough ();
13405 #endif
13406
13407 case 'C':
13408 case 'c':
13409 if (!COMPARISON_P (x))
13410 {
13411 output_operand_lossage ("operand is not a condition code, "
13412 "invalid operand code '%c'", code);
13413 return;
13414 }
13415 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
13416 code == 'c' || code == 'f',
13417 code == 'F' || code == 'f',
13418 file);
13419 return;
13420
13421 case 'H':
13422 if (!offsettable_memref_p (x))
13423 {
13424 output_operand_lossage ("operand is not an offsettable memory "
13425 "reference, invalid operand code 'H'");
13426 return;
13427 }
13428 /* It doesn't actually matter what mode we use here, as we're
13429 only going to use this for printing. */
13430 x = adjust_address_nv (x, DImode, 8);
13431 /* Output 'qword ptr' for intel assembler dialect. */
13432 if (ASSEMBLER_DIALECT == ASM_INTEL)
13433 code = 'q';
13434 break;
13435
13436 case 'K':
13437 if (!CONST_INT_P (x))
13438 {
13439 output_operand_lossage ("operand is not an integer, invalid "
13440 "operand code 'K'");
13441 return;
13442 }
13443
13444 if (INTVAL (x) & IX86_HLE_ACQUIRE)
13445 #ifdef HAVE_AS_IX86_HLE
13446 fputs ("xacquire ", file);
13447 #else
13448 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
13449 #endif
13450 else if (INTVAL (x) & IX86_HLE_RELEASE)
13451 #ifdef HAVE_AS_IX86_HLE
13452 fputs ("xrelease ", file);
13453 #else
13454 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
13455 #endif
13456 /* We do not want to print value of the operand. */
13457 return;
13458
13459 case 'N':
13460 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
13461 fputs ("{z}", file);
13462 return;
13463
13464 case 'r':
13465 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
13466 {
13467 output_operand_lossage ("operand is not a specific integer, "
13468 "invalid operand code 'r'");
13469 return;
13470 }
13471
13472 if (ASSEMBLER_DIALECT == ASM_INTEL)
13473 fputs (", ", file);
13474
13475 fputs ("{sae}", file);
13476
13477 if (ASSEMBLER_DIALECT == ASM_ATT)
13478 fputs (", ", file);
13479
13480 return;
13481
13482 case 'R':
13483 if (!CONST_INT_P (x))
13484 {
13485 output_operand_lossage ("operand is not an integer, invalid "
13486 "operand code 'R'");
13487 return;
13488 }
13489
13490 if (ASSEMBLER_DIALECT == ASM_INTEL)
13491 fputs (", ", file);
13492
13493 switch (INTVAL (x))
13494 {
13495 case ROUND_NEAREST_INT | ROUND_SAE:
13496 fputs ("{rn-sae}", file);
13497 break;
13498 case ROUND_NEG_INF | ROUND_SAE:
13499 fputs ("{rd-sae}", file);
13500 break;
13501 case ROUND_POS_INF | ROUND_SAE:
13502 fputs ("{ru-sae}", file);
13503 break;
13504 case ROUND_ZERO | ROUND_SAE:
13505 fputs ("{rz-sae}", file);
13506 break;
13507 default:
13508 output_operand_lossage ("operand is not a specific integer, "
13509 "invalid operand code 'R'");
13510 }
13511
13512 if (ASSEMBLER_DIALECT == ASM_ATT)
13513 fputs (", ", file);
13514
13515 return;
13516
13517 case '*':
13518 if (ASSEMBLER_DIALECT == ASM_ATT)
13519 putc ('*', file);
13520 return;
13521
13522 case '&':
13523 {
13524 const char *name = get_some_local_dynamic_name ();
13525 if (name == NULL)
13526 output_operand_lossage ("'%%&' used without any "
13527 "local dynamic TLS references");
13528 else
13529 assemble_name (file, name);
13530 return;
13531 }
13532
13533 case '+':
13534 {
13535 rtx x;
13536
13537 if (!optimize
13538 || optimize_function_for_size_p (cfun)
13539 || !TARGET_BRANCH_PREDICTION_HINTS)
13540 return;
13541
13542 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13543 if (x)
13544 {
13545 int pred_val = profile_probability::from_reg_br_prob_note
13546 (XINT (x, 0)).to_reg_br_prob_base ();
13547
13548 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13549 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13550 {
13551 bool taken = pred_val > REG_BR_PROB_BASE / 2;
13552 bool cputaken
13553 = final_forward_branch_p (current_output_insn) == 0;
13554
13555 /* Emit hints only in the case default branch prediction
13556 heuristics would fail. */
13557 if (taken != cputaken)
13558 {
13559 /* We use 3e (DS) prefix for taken branches and
13560 2e (CS) prefix for not taken branches. */
13561 if (taken)
13562 fputs ("ds ; ", file);
13563 else
13564 fputs ("cs ; ", file);
13565 }
13566 }
13567 }
13568 return;
13569 }
13570
13571 case ';':
13572 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13573 putc (';', file);
13574 #endif
13575 return;
13576
13577 case '~':
13578 putc (TARGET_AVX2 ? 'i' : 'f', file);
13579 return;
13580
13581 case 'M':
13582 if (TARGET_X32)
13583 {
13584 /* NB: 32-bit indices in VSIB address are sign-extended
13585 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
13586 sign-extended to 0xfffffffff7fa3010 which is invalid
13587 address. Add addr32 prefix if there is no base
13588 register nor symbol. */
13589 bool ok;
13590 struct ix86_address parts;
13591 ok = ix86_decompose_address (x, &parts);
13592 gcc_assert (ok && parts.index == NULL_RTX);
13593 if (parts.base == NULL_RTX
13594 && (parts.disp == NULL_RTX
13595 || !symbolic_operand (parts.disp,
13596 GET_MODE (parts.disp))))
13597 fputs ("addr32 ", file);
13598 }
13599 return;
13600
13601 case '^':
13602 if (TARGET_64BIT && Pmode != word_mode)
13603 fputs ("addr32 ", file);
13604 return;
13605
13606 case '!':
13607 if (ix86_notrack_prefixed_insn_p (current_output_insn))
13608 fputs ("notrack ", file);
13609 return;
13610
13611 default:
13612 output_operand_lossage ("invalid operand code '%c'", code);
13613 }
13614 }
13615
13616 if (REG_P (x))
13617 print_reg (x, code, file);
13618
13619 else if (MEM_P (x))
13620 {
13621 rtx addr = XEXP (x, 0);
13622
13623 /* No `byte ptr' prefix for call instructions ... */
13624 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
13625 {
13626 machine_mode mode = GET_MODE (x);
13627 const char *size;
13628
13629 /* Check for explicit size override codes. */
13630 if (code == 'b')
13631 size = "BYTE";
13632 else if (code == 'w')
13633 size = "WORD";
13634 else if (code == 'k')
13635 size = "DWORD";
13636 else if (code == 'q')
13637 size = "QWORD";
13638 else if (code == 'x')
13639 size = "XMMWORD";
13640 else if (code == 't')
13641 size = "YMMWORD";
13642 else if (code == 'g')
13643 size = "ZMMWORD";
13644 else if (mode == BLKmode)
13645 /* ... or BLKmode operands, when not overridden. */
13646 size = NULL;
13647 else
13648 switch (GET_MODE_SIZE (mode))
13649 {
13650 case 1: size = "BYTE"; break;
13651 case 2: size = "WORD"; break;
13652 case 4: size = "DWORD"; break;
13653 case 8: size = "QWORD"; break;
13654 case 12: size = "TBYTE"; break;
13655 case 16:
13656 if (mode == XFmode)
13657 size = "TBYTE";
13658 else
13659 size = "XMMWORD";
13660 break;
13661 case 32: size = "YMMWORD"; break;
13662 case 64: size = "ZMMWORD"; break;
13663 default:
13664 gcc_unreachable ();
13665 }
13666 if (size)
13667 {
13668 fputs (size, file);
13669 fputs (" PTR ", file);
13670 }
13671 }
13672
13673 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
13674 output_operand_lossage ("invalid constraints for operand");
13675 else
13676 ix86_print_operand_address_as
13677 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
13678 }
13679
13680 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
13681 {
13682 long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
13683 REAL_MODE_FORMAT (HFmode));
13684 if (ASSEMBLER_DIALECT == ASM_ATT)
13685 putc ('$', file);
13686 fprintf (file, "0x%04x", (unsigned int) l);
13687 }
13688
13689 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
13690 {
13691 long l;
13692
13693 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13694
13695 if (ASSEMBLER_DIALECT == ASM_ATT)
13696 putc ('$', file);
13697 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13698 if (code == 'q')
13699 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
13700 (unsigned long long) (int) l);
13701 else
13702 fprintf (file, "0x%08x", (unsigned int) l);
13703 }
13704
13705 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
13706 {
13707 long l[2];
13708
13709 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13710
13711 if (ASSEMBLER_DIALECT == ASM_ATT)
13712 putc ('$', file);
13713 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
13714 }
13715
13716 /* These float cases don't actually occur as immediate operands. */
13717 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
13718 {
13719 char dstr[30];
13720
13721 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13722 fputs (dstr, file);
13723 }
13724
13725 /* Print bcst_mem_operand. */
13726 else if (GET_CODE (x) == VEC_DUPLICATE)
13727 {
13728 machine_mode vmode = GET_MODE (x);
13729 /* Must be bcst_memory_operand. */
13730 gcc_assert (bcst_mem_operand (x, vmode));
13731
13732 rtx mem = XEXP (x,0);
13733 ix86_print_operand (file, mem, 0);
13734
13735 switch (vmode)
13736 {
13737 case E_V2DImode:
13738 case E_V2DFmode:
13739 fputs ("{1to2}", file);
13740 break;
13741 case E_V4SImode:
13742 case E_V4SFmode:
13743 case E_V4DImode:
13744 case E_V4DFmode:
13745 fputs ("{1to4}", file);
13746 break;
13747 case E_V8SImode:
13748 case E_V8SFmode:
13749 case E_V8DFmode:
13750 case E_V8DImode:
13751 case E_V8HFmode:
13752 fputs ("{1to8}", file);
13753 break;
13754 case E_V16SFmode:
13755 case E_V16SImode:
13756 case E_V16HFmode:
13757 fputs ("{1to16}", file);
13758 break;
13759 case E_V32HFmode:
13760 fputs ("{1to32}", file);
13761 break;
13762 default:
13763 gcc_unreachable ();
13764 }
13765 }
13766
13767 else
13768 {
13769 /* We have patterns that allow zero sets of memory, for instance.
13770 In 64-bit mode, we should probably support all 8-byte vectors,
13771 since we can in fact encode that into an immediate. */
13772 if (GET_CODE (x) == CONST_VECTOR)
13773 {
13774 if (x != CONST0_RTX (GET_MODE (x)))
13775 output_operand_lossage ("invalid vector immediate");
13776 x = const0_rtx;
13777 }
13778
13779 if (code == 'P')
13780 {
13781 if (ix86_force_load_from_GOT_p (x, true))
13782 {
13783 /* For inline assembly statement, load function address
13784 from GOT with 'P' operand modifier to avoid PLT. */
13785 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13786 (TARGET_64BIT
13787 ? UNSPEC_GOTPCREL
13788 : UNSPEC_GOT));
13789 x = gen_rtx_CONST (Pmode, x);
13790 x = gen_const_mem (Pmode, x);
13791 ix86_print_operand (file, x, 'A');
13792 return;
13793 }
13794 }
13795 else if (code != 'p')
13796 {
13797 if (CONST_INT_P (x))
13798 {
13799 if (ASSEMBLER_DIALECT == ASM_ATT)
13800 putc ('$', file);
13801 }
13802 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13803 || GET_CODE (x) == LABEL_REF)
13804 {
13805 if (ASSEMBLER_DIALECT == ASM_ATT)
13806 putc ('$', file);
13807 else
13808 fputs ("OFFSET FLAT:", file);
13809 }
13810 }
13811 if (CONST_INT_P (x))
13812 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13813 else if (flag_pic || MACHOPIC_INDIRECT)
13814 output_pic_addr_const (file, x, code);
13815 else
13816 output_addr_const (file, x);
13817 }
13818 }
13819
13820 static bool
13821 ix86_print_operand_punct_valid_p (unsigned char code)
13822 {
13823 return (code == '*' || code == '+' || code == '&' || code == ';'
13824 || code == '~' || code == '^' || code == '!');
13825 }
13826 \f
13827 /* Print a memory operand whose address is ADDR. */
13828
13829 static void
13830 ix86_print_operand_address_as (FILE *file, rtx addr,
13831 addr_space_t as, bool raw)
13832 {
13833 struct ix86_address parts;
13834 rtx base, index, disp;
13835 int scale;
13836 int ok;
13837 bool vsib = false;
13838 int code = 0;
13839
13840 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
13841 {
13842 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13843 gcc_assert (parts.index == NULL_RTX);
13844 parts.index = XVECEXP (addr, 0, 1);
13845 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
13846 addr = XVECEXP (addr, 0, 0);
13847 vsib = true;
13848 }
13849 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
13850 {
13851 gcc_assert (TARGET_64BIT);
13852 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13853 code = 'q';
13854 }
13855 else
13856 ok = ix86_decompose_address (addr, &parts);
13857
13858 gcc_assert (ok);
13859
13860 base = parts.base;
13861 index = parts.index;
13862 disp = parts.disp;
13863 scale = parts.scale;
13864
13865 if (ADDR_SPACE_GENERIC_P (as))
13866 as = parts.seg;
13867 else
13868 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
13869
13870 if (!ADDR_SPACE_GENERIC_P (as) && !raw)
13871 {
13872 if (ASSEMBLER_DIALECT == ASM_ATT)
13873 putc ('%', file);
13874
13875 switch (as)
13876 {
13877 case ADDR_SPACE_SEG_FS:
13878 fputs ("fs:", file);
13879 break;
13880 case ADDR_SPACE_SEG_GS:
13881 fputs ("gs:", file);
13882 break;
13883 default:
13884 gcc_unreachable ();
13885 }
13886 }
13887
13888 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13889 if (TARGET_64BIT && !base && !index && !raw)
13890 {
13891 rtx symbol = disp;
13892
13893 if (GET_CODE (disp) == CONST
13894 && GET_CODE (XEXP (disp, 0)) == PLUS
13895 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13896 symbol = XEXP (XEXP (disp, 0), 0);
13897
13898 if (GET_CODE (symbol) == LABEL_REF
13899 || (GET_CODE (symbol) == SYMBOL_REF
13900 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13901 base = pc_rtx;
13902 }
13903
13904 if (!base && !index)
13905 {
13906 /* Displacement only requires special attention. */
13907 if (CONST_INT_P (disp))
13908 {
13909 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
13910 fputs ("ds:", file);
13911 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13912 }
13913 /* Load the external function address via the GOT slot to avoid PLT. */
13914 else if (GET_CODE (disp) == CONST
13915 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13916 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
13917 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
13918 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
13919 output_pic_addr_const (file, disp, 0);
13920 else if (flag_pic)
13921 output_pic_addr_const (file, disp, 0);
13922 else
13923 output_addr_const (file, disp);
13924 }
13925 else
13926 {
13927 /* Print SImode register names to force addr32 prefix. */
13928 if (SImode_address_operand (addr, VOIDmode))
13929 {
13930 if (flag_checking)
13931 {
13932 gcc_assert (TARGET_64BIT);
13933 switch (GET_CODE (addr))
13934 {
13935 case SUBREG:
13936 gcc_assert (GET_MODE (addr) == SImode);
13937 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
13938 break;
13939 case ZERO_EXTEND:
13940 case AND:
13941 gcc_assert (GET_MODE (addr) == DImode);
13942 break;
13943 default:
13944 gcc_unreachable ();
13945 }
13946 }
13947 gcc_assert (!code);
13948 code = 'k';
13949 }
13950 else if (code == 0
13951 && TARGET_X32
13952 && disp
13953 && CONST_INT_P (disp)
13954 && INTVAL (disp) < -16*1024*1024)
13955 {
13956 /* X32 runs in 64-bit mode, where displacement, DISP, in
13957 address DISP(%r64), is encoded as 32-bit immediate sign-
13958 extended from 32-bit to 64-bit. For -0x40000300(%r64),
13959 address is %r64 + 0xffffffffbffffd00. When %r64 <
13960 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
13961 which is invalid for x32. The correct address is %r64
13962 - 0x40000300 == 0xf7ffdd64. To properly encode
13963 -0x40000300(%r64) for x32, we zero-extend negative
13964 displacement by forcing addr32 prefix which truncates
13965 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
13966 zero-extend all negative displacements, including -1(%rsp).
13967 However, for small negative displacements, sign-extension
13968 won't cause overflow. We only zero-extend negative
13969 displacements if they < -16*1024*1024, which is also used
13970 to check legitimate address displacements for PIC. */
13971 code = 'k';
13972 }
13973
13974 /* Since the upper 32 bits of RSP are always zero for x32,
13975 we can encode %esp as %rsp to avoid 0x67 prefix if
13976 there is no index register. */
13977 if (TARGET_X32 && Pmode == SImode
13978 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
13979 code = 'q';
13980
13981 if (ASSEMBLER_DIALECT == ASM_ATT)
13982 {
13983 if (disp)
13984 {
13985 if (flag_pic)
13986 output_pic_addr_const (file, disp, 0);
13987 else if (GET_CODE (disp) == LABEL_REF)
13988 output_asm_label (disp);
13989 else
13990 output_addr_const (file, disp);
13991 }
13992
13993 putc ('(', file);
13994 if (base)
13995 print_reg (base, code, file);
13996 if (index)
13997 {
13998 putc (',', file);
13999 print_reg (index, vsib ? 0 : code, file);
14000 if (scale != 1 || vsib)
14001 fprintf (file, ",%d", scale);
14002 }
14003 putc (')', file);
14004 }
14005 else
14006 {
14007 rtx offset = NULL_RTX;
14008
14009 if (disp)
14010 {
14011 /* Pull out the offset of a symbol; print any symbol itself. */
14012 if (GET_CODE (disp) == CONST
14013 && GET_CODE (XEXP (disp, 0)) == PLUS
14014 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14015 {
14016 offset = XEXP (XEXP (disp, 0), 1);
14017 disp = gen_rtx_CONST (VOIDmode,
14018 XEXP (XEXP (disp, 0), 0));
14019 }
14020
14021 if (flag_pic)
14022 output_pic_addr_const (file, disp, 0);
14023 else if (GET_CODE (disp) == LABEL_REF)
14024 output_asm_label (disp);
14025 else if (CONST_INT_P (disp))
14026 offset = disp;
14027 else
14028 output_addr_const (file, disp);
14029 }
14030
14031 putc ('[', file);
14032 if (base)
14033 {
14034 print_reg (base, code, file);
14035 if (offset)
14036 {
14037 if (INTVAL (offset) >= 0)
14038 putc ('+', file);
14039 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14040 }
14041 }
14042 else if (offset)
14043 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14044 else
14045 putc ('0', file);
14046
14047 if (index)
14048 {
14049 putc ('+', file);
14050 print_reg (index, vsib ? 0 : code, file);
14051 if (scale != 1 || vsib)
14052 fprintf (file, "*%d", scale);
14053 }
14054 putc (']', file);
14055 }
14056 }
14057 }
14058
14059 static void
14060 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
14061 {
14062 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14063 output_operand_lossage ("invalid constraints for operand");
14064 else
14065 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
14066 }
14067
14068 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14069
14070 static bool
14071 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14072 {
14073 rtx op;
14074
14075 if (GET_CODE (x) != UNSPEC)
14076 return false;
14077
14078 op = XVECEXP (x, 0, 0);
14079 switch (XINT (x, 1))
14080 {
14081 case UNSPEC_GOTOFF:
14082 output_addr_const (file, op);
14083 fputs ("@gotoff", file);
14084 break;
14085 case UNSPEC_GOTTPOFF:
14086 output_addr_const (file, op);
14087 /* FIXME: This might be @TPOFF in Sun ld. */
14088 fputs ("@gottpoff", file);
14089 break;
14090 case UNSPEC_TPOFF:
14091 output_addr_const (file, op);
14092 fputs ("@tpoff", file);
14093 break;
14094 case UNSPEC_NTPOFF:
14095 output_addr_const (file, op);
14096 if (TARGET_64BIT)
14097 fputs ("@tpoff", file);
14098 else
14099 fputs ("@ntpoff", file);
14100 break;
14101 case UNSPEC_DTPOFF:
14102 output_addr_const (file, op);
14103 fputs ("@dtpoff", file);
14104 break;
14105 case UNSPEC_GOTNTPOFF:
14106 output_addr_const (file, op);
14107 if (TARGET_64BIT)
14108 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14109 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14110 else
14111 fputs ("@gotntpoff", file);
14112 break;
14113 case UNSPEC_INDNTPOFF:
14114 output_addr_const (file, op);
14115 fputs ("@indntpoff", file);
14116 break;
14117 #if TARGET_MACHO
14118 case UNSPEC_MACHOPIC_OFFSET:
14119 output_addr_const (file, op);
14120 putc ('-', file);
14121 machopic_output_function_base_name (file);
14122 break;
14123 #endif
14124
14125 default:
14126 return false;
14127 }
14128
14129 return true;
14130 }
14131 \f
14132 \f
14133 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14134 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14135 is the expression of the binary operation. The output may either be
14136 emitted here, or returned to the caller, like all output_* functions.
14137
14138 There is no guarantee that the operands are the same mode, as they
14139 might be within FLOAT or FLOAT_EXTEND expressions. */
14140
14141 #ifndef SYSV386_COMPAT
14142 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14143 wants to fix the assemblers because that causes incompatibility
14144 with gcc. No-one wants to fix gcc because that causes
14145 incompatibility with assemblers... You can use the option of
14146 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14147 #define SYSV386_COMPAT 1
14148 #endif
14149
14150 const char *
14151 output_387_binary_op (rtx_insn *insn, rtx *operands)
14152 {
14153 static char buf[40];
14154 const char *p;
14155 bool is_sse
14156 = (SSE_REG_P (operands[0])
14157 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
14158
14159 if (is_sse)
14160 p = "%v";
14161 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14162 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14163 p = "fi";
14164 else
14165 p = "f";
14166
14167 strcpy (buf, p);
14168
14169 switch (GET_CODE (operands[3]))
14170 {
14171 case PLUS:
14172 p = "add"; break;
14173 case MINUS:
14174 p = "sub"; break;
14175 case MULT:
14176 p = "mul"; break;
14177 case DIV:
14178 p = "div"; break;
14179 default:
14180 gcc_unreachable ();
14181 }
14182
14183 strcat (buf, p);
14184
14185 if (is_sse)
14186 {
14187 p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
14188 strcat (buf, p);
14189
14190 if (TARGET_AVX)
14191 p = "\t{%2, %1, %0|%0, %1, %2}";
14192 else
14193 p = "\t{%2, %0|%0, %2}";
14194
14195 strcat (buf, p);
14196 return buf;
14197 }
14198
14199 /* Even if we do not want to check the inputs, this documents input
14200 constraints. Which helps in understanding the following code. */
14201 if (flag_checking)
14202 {
14203 if (STACK_REG_P (operands[0])
14204 && ((REG_P (operands[1])
14205 && REGNO (operands[0]) == REGNO (operands[1])
14206 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14207 || (REG_P (operands[2])
14208 && REGNO (operands[0]) == REGNO (operands[2])
14209 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14210 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14211 ; /* ok */
14212 else
14213 gcc_unreachable ();
14214 }
14215
14216 switch (GET_CODE (operands[3]))
14217 {
14218 case MULT:
14219 case PLUS:
14220 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14221 std::swap (operands[1], operands[2]);
14222
14223 /* know operands[0] == operands[1]. */
14224
14225 if (MEM_P (operands[2]))
14226 {
14227 p = "%Z2\t%2";
14228 break;
14229 }
14230
14231 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14232 {
14233 if (STACK_TOP_P (operands[0]))
14234 /* How is it that we are storing to a dead operand[2]?
14235 Well, presumably operands[1] is dead too. We can't
14236 store the result to st(0) as st(0) gets popped on this
14237 instruction. Instead store to operands[2] (which I
14238 think has to be st(1)). st(1) will be popped later.
14239 gcc <= 2.8.1 didn't have this check and generated
14240 assembly code that the Unixware assembler rejected. */
14241 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14242 else
14243 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14244 break;
14245 }
14246
14247 if (STACK_TOP_P (operands[0]))
14248 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14249 else
14250 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14251 break;
14252
14253 case MINUS:
14254 case DIV:
14255 if (MEM_P (operands[1]))
14256 {
14257 p = "r%Z1\t%1";
14258 break;
14259 }
14260
14261 if (MEM_P (operands[2]))
14262 {
14263 p = "%Z2\t%2";
14264 break;
14265 }
14266
14267 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14268 {
14269 #if SYSV386_COMPAT
14270 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14271 derived assemblers, confusingly reverse the direction of
14272 the operation for fsub{r} and fdiv{r} when the
14273 destination register is not st(0). The Intel assembler
14274 doesn't have this brain damage. Read !SYSV386_COMPAT to
14275 figure out what the hardware really does. */
14276 if (STACK_TOP_P (operands[0]))
14277 p = "{p\t%0, %2|rp\t%2, %0}";
14278 else
14279 p = "{rp\t%2, %0|p\t%0, %2}";
14280 #else
14281 if (STACK_TOP_P (operands[0]))
14282 /* As above for fmul/fadd, we can't store to st(0). */
14283 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14284 else
14285 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14286 #endif
14287 break;
14288 }
14289
14290 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14291 {
14292 #if SYSV386_COMPAT
14293 if (STACK_TOP_P (operands[0]))
14294 p = "{rp\t%0, %1|p\t%1, %0}";
14295 else
14296 p = "{p\t%1, %0|rp\t%0, %1}";
14297 #else
14298 if (STACK_TOP_P (operands[0]))
14299 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14300 else
14301 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14302 #endif
14303 break;
14304 }
14305
14306 if (STACK_TOP_P (operands[0]))
14307 {
14308 if (STACK_TOP_P (operands[1]))
14309 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14310 else
14311 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14312 break;
14313 }
14314 else if (STACK_TOP_P (operands[1]))
14315 {
14316 #if SYSV386_COMPAT
14317 p = "{\t%1, %0|r\t%0, %1}";
14318 #else
14319 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14320 #endif
14321 }
14322 else
14323 {
14324 #if SYSV386_COMPAT
14325 p = "{r\t%2, %0|\t%0, %2}";
14326 #else
14327 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14328 #endif
14329 }
14330 break;
14331
14332 default:
14333 gcc_unreachable ();
14334 }
14335
14336 strcat (buf, p);
14337 return buf;
14338 }
14339
14340 /* Return needed mode for entity in optimize_mode_switching pass. */
14341
14342 static int
14343 ix86_dirflag_mode_needed (rtx_insn *insn)
14344 {
14345 if (CALL_P (insn))
14346 {
14347 if (cfun->machine->func_type == TYPE_NORMAL)
14348 return X86_DIRFLAG_ANY;
14349 else
14350 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
14351 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
14352 }
14353
14354 if (recog_memoized (insn) < 0)
14355 return X86_DIRFLAG_ANY;
14356
14357 if (get_attr_type (insn) == TYPE_STR)
14358 {
14359 /* Emit cld instruction if stringops are used in the function. */
14360 if (cfun->machine->func_type == TYPE_NORMAL)
14361 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
14362 else
14363 return X86_DIRFLAG_RESET;
14364 }
14365
14366 return X86_DIRFLAG_ANY;
14367 }
14368
14369 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
14370
14371 static bool
14372 ix86_check_avx_upper_register (const_rtx exp)
14373 {
14374 return (SSE_REG_P (exp)
14375 && !EXT_REX_SSE_REG_P (exp)
14376 && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
14377 }
14378
14379 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
14380
14381 static void
14382 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
14383 {
14384 if (ix86_check_avx_upper_register (dest))
14385 {
14386 bool *used = (bool *) data;
14387 *used = true;
14388 }
14389 }
14390
14391 /* Return needed mode for entity in optimize_mode_switching pass. */
14392
14393 static int
14394 ix86_avx_u128_mode_needed (rtx_insn *insn)
14395 {
14396 if (DEBUG_INSN_P (insn))
14397 return AVX_U128_ANY;
14398
14399 if (CALL_P (insn))
14400 {
14401 rtx link;
14402
14403 /* Needed mode is set to AVX_U128_CLEAN if there are
14404 no 256bit or 512bit modes used in function arguments. */
14405 for (link = CALL_INSN_FUNCTION_USAGE (insn);
14406 link;
14407 link = XEXP (link, 1))
14408 {
14409 if (GET_CODE (XEXP (link, 0)) == USE)
14410 {
14411 rtx arg = XEXP (XEXP (link, 0), 0);
14412
14413 if (ix86_check_avx_upper_register (arg))
14414 return AVX_U128_DIRTY;
14415 }
14416 }
14417
14418 /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
14419 nor 512bit registers used in the function return register. */
14420 bool avx_upper_reg_found = false;
14421 note_stores (insn, ix86_check_avx_upper_stores,
14422 &avx_upper_reg_found);
14423 if (avx_upper_reg_found)
14424 return AVX_U128_DIRTY;
14425
14426 /* If the function is known to preserve some SSE registers,
14427 RA and previous passes can legitimately rely on that for
14428 modes wider than 256 bits. It's only safe to issue a
14429 vzeroupper if all SSE registers are clobbered. */
14430 const function_abi &abi = insn_callee_abi (insn);
14431 if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
14432 abi.mode_clobbers (V4DImode)))
14433 return AVX_U128_ANY;
14434
14435 return AVX_U128_CLEAN;
14436 }
14437
14438 subrtx_iterator::array_type array;
14439
14440 rtx set = single_set (insn);
14441 if (set)
14442 {
14443 rtx dest = SET_DEST (set);
14444 rtx src = SET_SRC (set);
14445 if (ix86_check_avx_upper_register (dest))
14446 {
14447 /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
14448 source isn't zero. */
14449 if (standard_sse_constant_p (src, GET_MODE (dest)) != 1)
14450 return AVX_U128_DIRTY;
14451 else
14452 return AVX_U128_ANY;
14453 }
14454 else
14455 {
14456 FOR_EACH_SUBRTX (iter, array, src, NONCONST)
14457 if (ix86_check_avx_upper_register (*iter))
14458 return AVX_U128_DIRTY;
14459 }
14460
14461 /* This isn't YMM/ZMM load/store. */
14462 return AVX_U128_ANY;
14463 }
14464
14465 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
14466 Hardware changes state only when a 256bit register is written to,
14467 but we need to prevent the compiler from moving optimal insertion
14468 point above eventual read from 256bit or 512 bit register. */
14469 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14470 if (ix86_check_avx_upper_register (*iter))
14471 return AVX_U128_DIRTY;
14472
14473 return AVX_U128_ANY;
14474 }
14475
14476 /* Return mode that i387 must be switched into
14477 prior to the execution of insn. */
14478
14479 static int
14480 ix86_i387_mode_needed (int entity, rtx_insn *insn)
14481 {
14482 enum attr_i387_cw mode;
14483
14484 /* The mode UNINITIALIZED is used to store control word after a
14485 function call or ASM pattern. The mode ANY specify that function
14486 has no requirements on the control word and make no changes in the
14487 bits we are interested in. */
14488
14489 if (CALL_P (insn)
14490 || (NONJUMP_INSN_P (insn)
14491 && (asm_noperands (PATTERN (insn)) >= 0
14492 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14493 return I387_CW_UNINITIALIZED;
14494
14495 if (recog_memoized (insn) < 0)
14496 return I387_CW_ANY;
14497
14498 mode = get_attr_i387_cw (insn);
14499
14500 switch (entity)
14501 {
14502 case I387_ROUNDEVEN:
14503 if (mode == I387_CW_ROUNDEVEN)
14504 return mode;
14505 break;
14506
14507 case I387_TRUNC:
14508 if (mode == I387_CW_TRUNC)
14509 return mode;
14510 break;
14511
14512 case I387_FLOOR:
14513 if (mode == I387_CW_FLOOR)
14514 return mode;
14515 break;
14516
14517 case I387_CEIL:
14518 if (mode == I387_CW_CEIL)
14519 return mode;
14520 break;
14521
14522 default:
14523 gcc_unreachable ();
14524 }
14525
14526 return I387_CW_ANY;
14527 }
14528
14529 /* Return mode that entity must be switched into
14530 prior to the execution of insn. */
14531
14532 static int
14533 ix86_mode_needed (int entity, rtx_insn *insn)
14534 {
14535 switch (entity)
14536 {
14537 case X86_DIRFLAG:
14538 return ix86_dirflag_mode_needed (insn);
14539 case AVX_U128:
14540 return ix86_avx_u128_mode_needed (insn);
14541 case I387_ROUNDEVEN:
14542 case I387_TRUNC:
14543 case I387_FLOOR:
14544 case I387_CEIL:
14545 return ix86_i387_mode_needed (entity, insn);
14546 default:
14547 gcc_unreachable ();
14548 }
14549 return 0;
14550 }
14551
14552 /* Calculate mode of upper 128bit AVX registers after the insn. */
14553
14554 static int
14555 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
14556 {
14557 rtx pat = PATTERN (insn);
14558
14559 if (vzeroupper_pattern (pat, VOIDmode)
14560 || vzeroall_pattern (pat, VOIDmode))
14561 return AVX_U128_CLEAN;
14562
14563 /* We know that state is clean after CALL insn if there are no
14564 256bit or 512bit registers used in the function return register. */
14565 if (CALL_P (insn))
14566 {
14567 bool avx_upper_reg_found = false;
14568 note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
14569
14570 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
14571 }
14572
14573 /* Otherwise, return current mode. Remember that if insn
14574 references AVX 256bit or 512bit registers, the mode was already
14575 changed to DIRTY from MODE_NEEDED. */
14576 return mode;
14577 }
14578
14579 /* Return the mode that an insn results in. */
14580
14581 static int
14582 ix86_mode_after (int entity, int mode, rtx_insn *insn)
14583 {
14584 switch (entity)
14585 {
14586 case X86_DIRFLAG:
14587 return mode;
14588 case AVX_U128:
14589 return ix86_avx_u128_mode_after (mode, insn);
14590 case I387_ROUNDEVEN:
14591 case I387_TRUNC:
14592 case I387_FLOOR:
14593 case I387_CEIL:
14594 return mode;
14595 default:
14596 gcc_unreachable ();
14597 }
14598 }
14599
14600 static int
14601 ix86_dirflag_mode_entry (void)
14602 {
14603 /* For TARGET_CLD or in the interrupt handler we can't assume
14604 direction flag state at function entry. */
14605 if (TARGET_CLD
14606 || cfun->machine->func_type != TYPE_NORMAL)
14607 return X86_DIRFLAG_ANY;
14608
14609 return X86_DIRFLAG_RESET;
14610 }
14611
14612 static int
14613 ix86_avx_u128_mode_entry (void)
14614 {
14615 tree arg;
14616
14617 /* Entry mode is set to AVX_U128_DIRTY if there are
14618 256bit or 512bit modes used in function arguments. */
14619 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
14620 arg = TREE_CHAIN (arg))
14621 {
14622 rtx incoming = DECL_INCOMING_RTL (arg);
14623
14624 if (incoming && ix86_check_avx_upper_register (incoming))
14625 return AVX_U128_DIRTY;
14626 }
14627
14628 return AVX_U128_CLEAN;
14629 }
14630
14631 /* Return a mode that ENTITY is assumed to be
14632 switched to at function entry. */
14633
14634 static int
14635 ix86_mode_entry (int entity)
14636 {
14637 switch (entity)
14638 {
14639 case X86_DIRFLAG:
14640 return ix86_dirflag_mode_entry ();
14641 case AVX_U128:
14642 return ix86_avx_u128_mode_entry ();
14643 case I387_ROUNDEVEN:
14644 case I387_TRUNC:
14645 case I387_FLOOR:
14646 case I387_CEIL:
14647 return I387_CW_ANY;
14648 default:
14649 gcc_unreachable ();
14650 }
14651 }
14652
14653 static int
14654 ix86_avx_u128_mode_exit (void)
14655 {
14656 rtx reg = crtl->return_rtx;
14657
14658 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
14659 or 512 bit modes used in the function return register. */
14660 if (reg && ix86_check_avx_upper_register (reg))
14661 return AVX_U128_DIRTY;
14662
14663 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
14664 modes used in function arguments, otherwise return AVX_U128_CLEAN.
14665 */
14666 return ix86_avx_u128_mode_entry ();
14667 }
14668
14669 /* Return a mode that ENTITY is assumed to be
14670 switched to at function exit. */
14671
14672 static int
14673 ix86_mode_exit (int entity)
14674 {
14675 switch (entity)
14676 {
14677 case X86_DIRFLAG:
14678 return X86_DIRFLAG_ANY;
14679 case AVX_U128:
14680 return ix86_avx_u128_mode_exit ();
14681 case I387_ROUNDEVEN:
14682 case I387_TRUNC:
14683 case I387_FLOOR:
14684 case I387_CEIL:
14685 return I387_CW_ANY;
14686 default:
14687 gcc_unreachable ();
14688 }
14689 }
14690
14691 static int
14692 ix86_mode_priority (int, int n)
14693 {
14694 return n;
14695 }
14696
14697 /* Output code to initialize control word copies used by trunc?f?i and
14698 rounding patterns. CURRENT_MODE is set to current control word,
14699 while NEW_MODE is set to new control word. */
14700
14701 static void
14702 emit_i387_cw_initialization (int mode)
14703 {
14704 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14705 rtx new_mode;
14706
14707 enum ix86_stack_slot slot;
14708
14709 rtx reg = gen_reg_rtx (HImode);
14710
14711 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14712 emit_move_insn (reg, copy_rtx (stored_mode));
14713
14714 switch (mode)
14715 {
14716 case I387_CW_ROUNDEVEN:
14717 /* round to nearest */
14718 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14719 slot = SLOT_CW_ROUNDEVEN;
14720 break;
14721
14722 case I387_CW_TRUNC:
14723 /* round toward zero (truncate) */
14724 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14725 slot = SLOT_CW_TRUNC;
14726 break;
14727
14728 case I387_CW_FLOOR:
14729 /* round down toward -oo */
14730 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14731 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14732 slot = SLOT_CW_FLOOR;
14733 break;
14734
14735 case I387_CW_CEIL:
14736 /* round up toward +oo */
14737 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14738 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14739 slot = SLOT_CW_CEIL;
14740 break;
14741
14742 default:
14743 gcc_unreachable ();
14744 }
14745
14746 gcc_assert (slot < MAX_386_STACK_LOCALS);
14747
14748 new_mode = assign_386_stack_local (HImode, slot);
14749 emit_move_insn (new_mode, reg);
14750 }
14751
14752 /* Generate one or more insns to set ENTITY to MODE. */
14753
14754 static void
14755 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
14756 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14757 {
14758 switch (entity)
14759 {
14760 case X86_DIRFLAG:
14761 if (mode == X86_DIRFLAG_RESET)
14762 emit_insn (gen_cld ());
14763 break;
14764 case AVX_U128:
14765 if (mode == AVX_U128_CLEAN)
14766 ix86_expand_avx_vzeroupper ();
14767 break;
14768 case I387_ROUNDEVEN:
14769 case I387_TRUNC:
14770 case I387_FLOOR:
14771 case I387_CEIL:
14772 if (mode != I387_CW_ANY
14773 && mode != I387_CW_UNINITIALIZED)
14774 emit_i387_cw_initialization (mode);
14775 break;
14776 default:
14777 gcc_unreachable ();
14778 }
14779 }
14780
14781 /* Output code for INSN to convert a float to a signed int. OPERANDS
14782 are the insn operands. The output may be [HSD]Imode and the input
14783 operand may be [SDX]Fmode. */
14784
14785 const char *
14786 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
14787 {
14788 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14789 bool dimode_p = GET_MODE (operands[0]) == DImode;
14790 int round_mode = get_attr_i387_cw (insn);
14791
14792 static char buf[40];
14793 const char *p;
14794
14795 /* Jump through a hoop or two for DImode, since the hardware has no
14796 non-popping instruction. We used to do this a different way, but
14797 that was somewhat fragile and broke with post-reload splitters. */
14798 if ((dimode_p || fisttp) && !stack_top_dies)
14799 output_asm_insn ("fld\t%y1", operands);
14800
14801 gcc_assert (STACK_TOP_P (operands[1]));
14802 gcc_assert (MEM_P (operands[0]));
14803 gcc_assert (GET_MODE (operands[1]) != TFmode);
14804
14805 if (fisttp)
14806 return "fisttp%Z0\t%0";
14807
14808 strcpy (buf, "fist");
14809
14810 if (round_mode != I387_CW_ANY)
14811 output_asm_insn ("fldcw\t%3", operands);
14812
14813 p = "p%Z0\t%0";
14814 strcat (buf, p + !(stack_top_dies || dimode_p));
14815
14816 output_asm_insn (buf, operands);
14817
14818 if (round_mode != I387_CW_ANY)
14819 output_asm_insn ("fldcw\t%2", operands);
14820
14821 return "";
14822 }
14823
14824 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14825 have the values zero or one, indicates the ffreep insn's operand
14826 from the OPERANDS array. */
14827
14828 static const char *
14829 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14830 {
14831 if (TARGET_USE_FFREEP)
14832 #ifdef HAVE_AS_IX86_FFREEP
14833 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14834 #else
14835 {
14836 static char retval[32];
14837 int regno = REGNO (operands[opno]);
14838
14839 gcc_assert (STACK_REGNO_P (regno));
14840
14841 regno -= FIRST_STACK_REG;
14842
14843 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14844 return retval;
14845 }
14846 #endif
14847
14848 return opno ? "fstp\t%y1" : "fstp\t%y0";
14849 }
14850
14851
14852 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14853 should be used. UNORDERED_P is true when fucom should be used. */
14854
14855 const char *
14856 output_fp_compare (rtx_insn *insn, rtx *operands,
14857 bool eflags_p, bool unordered_p)
14858 {
14859 rtx *xops = eflags_p ? &operands[0] : &operands[1];
14860 bool stack_top_dies;
14861
14862 static char buf[40];
14863 const char *p;
14864
14865 gcc_assert (STACK_TOP_P (xops[0]));
14866
14867 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14868
14869 if (eflags_p)
14870 {
14871 p = unordered_p ? "fucomi" : "fcomi";
14872 strcpy (buf, p);
14873
14874 p = "p\t{%y1, %0|%0, %y1}";
14875 strcat (buf, p + !stack_top_dies);
14876
14877 return buf;
14878 }
14879
14880 if (STACK_REG_P (xops[1])
14881 && stack_top_dies
14882 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
14883 {
14884 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
14885
14886 /* If both the top of the 387 stack die, and the other operand
14887 is also a stack register that dies, then this must be a
14888 `fcompp' float compare. */
14889 p = unordered_p ? "fucompp" : "fcompp";
14890 strcpy (buf, p);
14891 }
14892 else if (const0_operand (xops[1], VOIDmode))
14893 {
14894 gcc_assert (!unordered_p);
14895 strcpy (buf, "ftst");
14896 }
14897 else
14898 {
14899 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
14900 {
14901 gcc_assert (!unordered_p);
14902 p = "ficom";
14903 }
14904 else
14905 p = unordered_p ? "fucom" : "fcom";
14906
14907 strcpy (buf, p);
14908
14909 p = "p%Z2\t%y2";
14910 strcat (buf, p + !stack_top_dies);
14911 }
14912
14913 output_asm_insn (buf, operands);
14914 return "fnstsw\t%0";
14915 }
14916
14917 void
14918 ix86_output_addr_vec_elt (FILE *file, int value)
14919 {
14920 const char *directive = ASM_LONG;
14921
14922 #ifdef ASM_QUAD
14923 if (TARGET_LP64)
14924 directive = ASM_QUAD;
14925 #else
14926 gcc_assert (!TARGET_64BIT);
14927 #endif
14928
14929 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14930 }
14931
14932 void
14933 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14934 {
14935 const char *directive = ASM_LONG;
14936
14937 #ifdef ASM_QUAD
14938 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14939 directive = ASM_QUAD;
14940 #else
14941 gcc_assert (!TARGET_64BIT);
14942 #endif
14943 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14944 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14945 fprintf (file, "%s%s%d-%s%d\n",
14946 directive, LPREFIX, value, LPREFIX, rel);
14947 #if TARGET_MACHO
14948 else if (TARGET_MACHO)
14949 {
14950 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14951 machopic_output_function_base_name (file);
14952 putc ('\n', file);
14953 }
14954 #endif
14955 else if (HAVE_AS_GOTOFF_IN_DATA)
14956 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14957 else
14958 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14959 GOT_SYMBOL_NAME, LPREFIX, value);
14960 }
14961 \f
14962 #define LEA_MAX_STALL (3)
14963 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
14964
14965 /* Increase given DISTANCE in half-cycles according to
14966 dependencies between PREV and NEXT instructions.
14967 Add 1 half-cycle if there is no dependency and
14968 go to next cycle if there is some dependecy. */
14969
14970 static unsigned int
14971 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
14972 {
14973 df_ref def, use;
14974
14975 if (!prev || !next)
14976 return distance + (distance & 1) + 2;
14977
14978 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
14979 return distance + 1;
14980
14981 FOR_EACH_INSN_USE (use, next)
14982 FOR_EACH_INSN_DEF (def, prev)
14983 if (!DF_REF_IS_ARTIFICIAL (def)
14984 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
14985 return distance + (distance & 1) + 2;
14986
14987 return distance + 1;
14988 }
14989
14990 /* Function checks if instruction INSN defines register number
14991 REGNO1 or REGNO2. */
14992
14993 bool
14994 insn_defines_reg (unsigned int regno1, unsigned int regno2,
14995 rtx_insn *insn)
14996 {
14997 df_ref def;
14998
14999 FOR_EACH_INSN_DEF (def, insn)
15000 if (DF_REF_REG_DEF_P (def)
15001 && !DF_REF_IS_ARTIFICIAL (def)
15002 && (regno1 == DF_REF_REGNO (def)
15003 || regno2 == DF_REF_REGNO (def)))
15004 return true;
15005
15006 return false;
15007 }
15008
15009 /* Function checks if instruction INSN uses register number
15010 REGNO as a part of address expression. */
15011
15012 static bool
15013 insn_uses_reg_mem (unsigned int regno, rtx insn)
15014 {
15015 df_ref use;
15016
15017 FOR_EACH_INSN_USE (use, insn)
15018 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
15019 return true;
15020
15021 return false;
15022 }
15023
15024 /* Search backward for non-agu definition of register number REGNO1
15025 or register number REGNO2 in basic block starting from instruction
15026 START up to head of basic block or instruction INSN.
15027
15028 Function puts true value into *FOUND var if definition was found
15029 and false otherwise.
15030
15031 Distance in half-cycles between START and found instruction or head
15032 of BB is added to DISTANCE and returned. */
15033
15034 static int
15035 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
15036 rtx_insn *insn, int distance,
15037 rtx_insn *start, bool *found)
15038 {
15039 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
15040 rtx_insn *prev = start;
15041 rtx_insn *next = NULL;
15042
15043 *found = false;
15044
15045 while (prev
15046 && prev != insn
15047 && distance < LEA_SEARCH_THRESHOLD)
15048 {
15049 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
15050 {
15051 distance = increase_distance (prev, next, distance);
15052 if (insn_defines_reg (regno1, regno2, prev))
15053 {
15054 if (recog_memoized (prev) < 0
15055 || get_attr_type (prev) != TYPE_LEA)
15056 {
15057 *found = true;
15058 return distance;
15059 }
15060 }
15061
15062 next = prev;
15063 }
15064 if (prev == BB_HEAD (bb))
15065 break;
15066
15067 prev = PREV_INSN (prev);
15068 }
15069
15070 return distance;
15071 }
15072
15073 /* Search backward for non-agu definition of register number REGNO1
15074 or register number REGNO2 in INSN's basic block until
15075 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15076 2. Reach neighbor BBs boundary, or
15077 3. Reach agu definition.
15078 Returns the distance between the non-agu definition point and INSN.
15079 If no definition point, returns -1. */
15080
15081 static int
15082 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15083 rtx_insn *insn)
15084 {
15085 basic_block bb = BLOCK_FOR_INSN (insn);
15086 int distance = 0;
15087 bool found = false;
15088
15089 if (insn != BB_HEAD (bb))
15090 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
15091 distance, PREV_INSN (insn),
15092 &found);
15093
15094 if (!found && distance < LEA_SEARCH_THRESHOLD)
15095 {
15096 edge e;
15097 edge_iterator ei;
15098 bool simple_loop = false;
15099
15100 FOR_EACH_EDGE (e, ei, bb->preds)
15101 if (e->src == bb)
15102 {
15103 simple_loop = true;
15104 break;
15105 }
15106
15107 if (simple_loop)
15108 distance = distance_non_agu_define_in_bb (regno1, regno2,
15109 insn, distance,
15110 BB_END (bb), &found);
15111 else
15112 {
15113 int shortest_dist = -1;
15114 bool found_in_bb = false;
15115
15116 FOR_EACH_EDGE (e, ei, bb->preds)
15117 {
15118 int bb_dist
15119 = distance_non_agu_define_in_bb (regno1, regno2,
15120 insn, distance,
15121 BB_END (e->src),
15122 &found_in_bb);
15123 if (found_in_bb)
15124 {
15125 if (shortest_dist < 0)
15126 shortest_dist = bb_dist;
15127 else if (bb_dist > 0)
15128 shortest_dist = MIN (bb_dist, shortest_dist);
15129
15130 found = true;
15131 }
15132 }
15133
15134 distance = shortest_dist;
15135 }
15136 }
15137
15138 if (!found)
15139 return -1;
15140
15141 return distance >> 1;
15142 }
15143
15144 /* Return the distance in half-cycles between INSN and the next
15145 insn that uses register number REGNO in memory address added
15146 to DISTANCE. Return -1 if REGNO0 is set.
15147
15148 Put true value into *FOUND if register usage was found and
15149 false otherwise.
15150 Put true value into *REDEFINED if register redefinition was
15151 found and false otherwise. */
15152
15153 static int
15154 distance_agu_use_in_bb (unsigned int regno,
15155 rtx_insn *insn, int distance, rtx_insn *start,
15156 bool *found, bool *redefined)
15157 {
15158 basic_block bb = NULL;
15159 rtx_insn *next = start;
15160 rtx_insn *prev = NULL;
15161
15162 *found = false;
15163 *redefined = false;
15164
15165 if (start != NULL_RTX)
15166 {
15167 bb = BLOCK_FOR_INSN (start);
15168 if (start != BB_HEAD (bb))
15169 /* If insn and start belong to the same bb, set prev to insn,
15170 so the call to increase_distance will increase the distance
15171 between insns by 1. */
15172 prev = insn;
15173 }
15174
15175 while (next
15176 && next != insn
15177 && distance < LEA_SEARCH_THRESHOLD)
15178 {
15179 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
15180 {
15181 distance = increase_distance(prev, next, distance);
15182 if (insn_uses_reg_mem (regno, next))
15183 {
15184 /* Return DISTANCE if OP0 is used in memory
15185 address in NEXT. */
15186 *found = true;
15187 return distance;
15188 }
15189
15190 if (insn_defines_reg (regno, INVALID_REGNUM, next))
15191 {
15192 /* Return -1 if OP0 is set in NEXT. */
15193 *redefined = true;
15194 return -1;
15195 }
15196
15197 prev = next;
15198 }
15199
15200 if (next == BB_END (bb))
15201 break;
15202
15203 next = NEXT_INSN (next);
15204 }
15205
15206 return distance;
15207 }
15208
15209 /* Return the distance between INSN and the next insn that uses
15210 register number REGNO0 in memory address. Return -1 if no such
15211 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15212
15213 static int
15214 distance_agu_use (unsigned int regno0, rtx_insn *insn)
15215 {
15216 basic_block bb = BLOCK_FOR_INSN (insn);
15217 int distance = 0;
15218 bool found = false;
15219 bool redefined = false;
15220
15221 if (insn != BB_END (bb))
15222 distance = distance_agu_use_in_bb (regno0, insn, distance,
15223 NEXT_INSN (insn),
15224 &found, &redefined);
15225
15226 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
15227 {
15228 edge e;
15229 edge_iterator ei;
15230 bool simple_loop = false;
15231
15232 FOR_EACH_EDGE (e, ei, bb->succs)
15233 if (e->dest == bb)
15234 {
15235 simple_loop = true;
15236 break;
15237 }
15238
15239 if (simple_loop)
15240 distance = distance_agu_use_in_bb (regno0, insn,
15241 distance, BB_HEAD (bb),
15242 &found, &redefined);
15243 else
15244 {
15245 int shortest_dist = -1;
15246 bool found_in_bb = false;
15247 bool redefined_in_bb = false;
15248
15249 FOR_EACH_EDGE (e, ei, bb->succs)
15250 {
15251 int bb_dist
15252 = distance_agu_use_in_bb (regno0, insn,
15253 distance, BB_HEAD (e->dest),
15254 &found_in_bb, &redefined_in_bb);
15255 if (found_in_bb)
15256 {
15257 if (shortest_dist < 0)
15258 shortest_dist = bb_dist;
15259 else if (bb_dist > 0)
15260 shortest_dist = MIN (bb_dist, shortest_dist);
15261
15262 found = true;
15263 }
15264 }
15265
15266 distance = shortest_dist;
15267 }
15268 }
15269
15270 if (!found || redefined)
15271 return -1;
15272
15273 return distance >> 1;
15274 }
15275
15276 /* Define this macro to tune LEA priority vs ADD, it take effect when
15277 there is a dilemma of choosing LEA or ADD
15278 Negative value: ADD is more preferred than LEA
15279 Zero: Neutral
15280 Positive value: LEA is more preferred than ADD. */
15281 #define IX86_LEA_PRIORITY 0
15282
15283 /* Return true if usage of lea INSN has performance advantage
15284 over a sequence of instructions. Instructions sequence has
15285 SPLIT_COST cycles higher latency than lea latency. */
15286
15287 static bool
15288 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
15289 unsigned int regno2, int split_cost, bool has_scale)
15290 {
15291 int dist_define, dist_use;
15292
15293 /* For Atom processors newer than Bonnell, if using a 2-source or
15294 3-source LEA for non-destructive destination purposes, or due to
15295 wanting ability to use SCALE, the use of LEA is justified. */
15296 if (!TARGET_CPU_P (BONNELL))
15297 {
15298 if (has_scale)
15299 return true;
15300 if (split_cost < 1)
15301 return false;
15302 if (regno0 == regno1 || regno0 == regno2)
15303 return false;
15304 return true;
15305 }
15306
15307 /* Remember recog_data content. */
15308 struct recog_data_d recog_data_save = recog_data;
15309
15310 dist_define = distance_non_agu_define (regno1, regno2, insn);
15311 dist_use = distance_agu_use (regno0, insn);
15312
15313 /* distance_non_agu_define can call get_attr_type which can call
15314 recog_memoized, restore recog_data back to previous content. */
15315 recog_data = recog_data_save;
15316
15317 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
15318 {
15319 /* If there is no non AGU operand definition, no AGU
15320 operand usage and split cost is 0 then both lea
15321 and non lea variants have same priority. Currently
15322 we prefer lea for 64 bit code and non lea on 32 bit
15323 code. */
15324 if (dist_use < 0 && split_cost == 0)
15325 return TARGET_64BIT || IX86_LEA_PRIORITY;
15326 else
15327 return true;
15328 }
15329
15330 /* With longer definitions distance lea is more preferable.
15331 Here we change it to take into account splitting cost and
15332 lea priority. */
15333 dist_define += split_cost + IX86_LEA_PRIORITY;
15334
15335 /* If there is no use in memory addess then we just check
15336 that split cost exceeds AGU stall. */
15337 if (dist_use < 0)
15338 return dist_define > LEA_MAX_STALL;
15339
15340 /* If this insn has both backward non-agu dependence and forward
15341 agu dependence, the one with short distance takes effect. */
15342 return dist_define >= dist_use;
15343 }
15344
15345 /* Return true if we need to split op0 = op1 + op2 into a sequence of
15346 move and add to avoid AGU stalls. */
15347
15348 bool
15349 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
15350 {
15351 unsigned int regno0, regno1, regno2;
15352
15353 /* Check if we need to optimize. */
15354 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15355 return false;
15356
15357 regno0 = true_regnum (operands[0]);
15358 regno1 = true_regnum (operands[1]);
15359 regno2 = true_regnum (operands[2]);
15360
15361 /* We need to split only adds with non destructive
15362 destination operand. */
15363 if (regno0 == regno1 || regno0 == regno2)
15364 return false;
15365 else
15366 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
15367 }
15368
15369 /* Return true if we should emit lea instruction instead of mov
15370 instruction. */
15371
15372 bool
15373 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
15374 {
15375 unsigned int regno0, regno1;
15376
15377 /* Check if we need to optimize. */
15378 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15379 return false;
15380
15381 /* Use lea for reg to reg moves only. */
15382 if (!REG_P (operands[0]) || !REG_P (operands[1]))
15383 return false;
15384
15385 regno0 = true_regnum (operands[0]);
15386 regno1 = true_regnum (operands[1]);
15387
15388 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
15389 }
15390
15391 /* Return true if we need to split lea into a sequence of
15392 instructions to avoid AGU stalls during peephole2. */
15393
15394 bool
15395 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
15396 {
15397 unsigned int regno0, regno1, regno2;
15398 int split_cost;
15399 struct ix86_address parts;
15400 int ok;
15401
15402 /* The "at least two components" test below might not catch simple
15403 move or zero extension insns if parts.base is non-NULL and parts.disp
15404 is const0_rtx as the only components in the address, e.g. if the
15405 register is %rbp or %r13. As this test is much cheaper and moves or
15406 zero extensions are the common case, do this check first. */
15407 if (REG_P (operands[1])
15408 || (SImode_address_operand (operands[1], VOIDmode)
15409 && REG_P (XEXP (operands[1], 0))))
15410 return false;
15411
15412 ok = ix86_decompose_address (operands[1], &parts);
15413 gcc_assert (ok);
15414
15415 /* There should be at least two components in the address. */
15416 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
15417 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
15418 return false;
15419
15420 /* We should not split into add if non legitimate pic
15421 operand is used as displacement. */
15422 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
15423 return false;
15424
15425 regno0 = true_regnum (operands[0]) ;
15426 regno1 = INVALID_REGNUM;
15427 regno2 = INVALID_REGNUM;
15428
15429 if (parts.base)
15430 regno1 = true_regnum (parts.base);
15431 if (parts.index)
15432 regno2 = true_regnum (parts.index);
15433
15434 /* Use add for a = a + b and a = b + a since it is faster and shorter
15435 than lea for most processors. For the processors like BONNELL, if
15436 the destination register of LEA holds an actual address which will
15437 be used soon, LEA is better and otherwise ADD is better. */
15438 if (!TARGET_CPU_P (BONNELL)
15439 && parts.scale == 1
15440 && (!parts.disp || parts.disp == const0_rtx)
15441 && (regno0 == regno1 || regno0 == regno2))
15442 return true;
15443
15444 /* Check we need to optimize. */
15445 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
15446 return false;
15447
15448 split_cost = 0;
15449
15450 /* Compute how many cycles we will add to execution time
15451 if split lea into a sequence of instructions. */
15452 if (parts.base || parts.index)
15453 {
15454 /* Have to use mov instruction if non desctructive
15455 destination form is used. */
15456 if (regno1 != regno0 && regno2 != regno0)
15457 split_cost += 1;
15458
15459 /* Have to add index to base if both exist. */
15460 if (parts.base && parts.index)
15461 split_cost += 1;
15462
15463 /* Have to use shift and adds if scale is 2 or greater. */
15464 if (parts.scale > 1)
15465 {
15466 if (regno0 != regno1)
15467 split_cost += 1;
15468 else if (regno2 == regno0)
15469 split_cost += 4;
15470 else
15471 split_cost += parts.scale;
15472 }
15473
15474 /* Have to use add instruction with immediate if
15475 disp is non zero. */
15476 if (parts.disp && parts.disp != const0_rtx)
15477 split_cost += 1;
15478
15479 /* Subtract the price of lea. */
15480 split_cost -= 1;
15481 }
15482
15483 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
15484 parts.scale > 1);
15485 }
15486
15487 /* Return true if it is ok to optimize an ADD operation to LEA
15488 operation to avoid flag register consumation. For most processors,
15489 ADD is faster than LEA. For the processors like BONNELL, if the
15490 destination register of LEA holds an actual address which will be
15491 used soon, LEA is better and otherwise ADD is better. */
15492
15493 bool
15494 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
15495 {
15496 unsigned int regno0 = true_regnum (operands[0]);
15497 unsigned int regno1 = true_regnum (operands[1]);
15498 unsigned int regno2 = true_regnum (operands[2]);
15499
15500 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15501 if (regno0 != regno1 && regno0 != regno2)
15502 return true;
15503
15504 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15505 return false;
15506
15507 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
15508 }
15509
15510 /* Return true if destination reg of SET_BODY is shift count of
15511 USE_BODY. */
15512
15513 static bool
15514 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15515 {
15516 rtx set_dest;
15517 rtx shift_rtx;
15518 int i;
15519
15520 /* Retrieve destination of SET_BODY. */
15521 switch (GET_CODE (set_body))
15522 {
15523 case SET:
15524 set_dest = SET_DEST (set_body);
15525 if (!set_dest || !REG_P (set_dest))
15526 return false;
15527 break;
15528 case PARALLEL:
15529 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15530 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15531 use_body))
15532 return true;
15533 /* FALLTHROUGH */
15534 default:
15535 return false;
15536 }
15537
15538 /* Retrieve shift count of USE_BODY. */
15539 switch (GET_CODE (use_body))
15540 {
15541 case SET:
15542 shift_rtx = XEXP (use_body, 1);
15543 break;
15544 case PARALLEL:
15545 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15546 if (ix86_dep_by_shift_count_body (set_body,
15547 XVECEXP (use_body, 0, i)))
15548 return true;
15549 /* FALLTHROUGH */
15550 default:
15551 return false;
15552 }
15553
15554 if (shift_rtx
15555 && (GET_CODE (shift_rtx) == ASHIFT
15556 || GET_CODE (shift_rtx) == LSHIFTRT
15557 || GET_CODE (shift_rtx) == ASHIFTRT
15558 || GET_CODE (shift_rtx) == ROTATE
15559 || GET_CODE (shift_rtx) == ROTATERT))
15560 {
15561 rtx shift_count = XEXP (shift_rtx, 1);
15562
15563 /* Return true if shift count is dest of SET_BODY. */
15564 if (REG_P (shift_count))
15565 {
15566 /* Add check since it can be invoked before register
15567 allocation in pre-reload schedule. */
15568 if (reload_completed
15569 && true_regnum (set_dest) == true_regnum (shift_count))
15570 return true;
15571 else if (REGNO(set_dest) == REGNO(shift_count))
15572 return true;
15573 }
15574 }
15575
15576 return false;
15577 }
15578
15579 /* Return true if destination reg of SET_INSN is shift count of
15580 USE_INSN. */
15581
15582 bool
15583 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15584 {
15585 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15586 PATTERN (use_insn));
15587 }
15588
15589 /* Return TRUE or FALSE depending on whether the unary operator meets the
15590 appropriate constraints. */
15591
15592 bool
15593 ix86_unary_operator_ok (enum rtx_code,
15594 machine_mode,
15595 rtx operands[2])
15596 {
15597 /* If one of operands is memory, source and destination must match. */
15598 if ((MEM_P (operands[0])
15599 || MEM_P (operands[1]))
15600 && ! rtx_equal_p (operands[0], operands[1]))
15601 return false;
15602 return true;
15603 }
15604
15605 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15606 are ok, keeping in mind the possible movddup alternative. */
15607
15608 bool
15609 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15610 {
15611 if (MEM_P (operands[0]))
15612 return rtx_equal_p (operands[0], operands[1 + high]);
15613 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15614 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15615 return true;
15616 }
15617
15618 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15619 then replicate the value for all elements of the vector
15620 register. */
15621
15622 rtx
15623 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
15624 {
15625 int i, n_elt;
15626 rtvec v;
15627 machine_mode scalar_mode;
15628
15629 switch (mode)
15630 {
15631 case E_V64QImode:
15632 case E_V32QImode:
15633 case E_V16QImode:
15634 case E_V32HImode:
15635 case E_V16HImode:
15636 case E_V8HImode:
15637 case E_V16SImode:
15638 case E_V8SImode:
15639 case E_V4SImode:
15640 case E_V2SImode:
15641 case E_V8DImode:
15642 case E_V4DImode:
15643 case E_V2DImode:
15644 gcc_assert (vect);
15645 /* FALLTHRU */
15646 case E_V8HFmode:
15647 case E_V16HFmode:
15648 case E_V32HFmode:
15649 case E_V16SFmode:
15650 case E_V8SFmode:
15651 case E_V4SFmode:
15652 case E_V2SFmode:
15653 case E_V8DFmode:
15654 case E_V4DFmode:
15655 case E_V2DFmode:
15656 n_elt = GET_MODE_NUNITS (mode);
15657 v = rtvec_alloc (n_elt);
15658 scalar_mode = GET_MODE_INNER (mode);
15659
15660 RTVEC_ELT (v, 0) = value;
15661
15662 for (i = 1; i < n_elt; ++i)
15663 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
15664
15665 return gen_rtx_CONST_VECTOR (mode, v);
15666
15667 default:
15668 gcc_unreachable ();
15669 }
15670 }
15671
15672 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15673 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15674 for an SSE register. If VECT is true, then replicate the mask for
15675 all elements of the vector register. If INVERT is true, then create
15676 a mask excluding the sign bit. */
15677
15678 rtx
15679 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
15680 {
15681 machine_mode vec_mode, imode;
15682 wide_int w;
15683 rtx mask, v;
15684
15685 switch (mode)
15686 {
15687 case E_V8HFmode:
15688 case E_V16HFmode:
15689 case E_V32HFmode:
15690 vec_mode = mode;
15691 imode = HImode;
15692 break;
15693
15694 case E_V16SImode:
15695 case E_V16SFmode:
15696 case E_V8SImode:
15697 case E_V4SImode:
15698 case E_V8SFmode:
15699 case E_V4SFmode:
15700 case E_V2SFmode:
15701 case E_V2SImode:
15702 vec_mode = mode;
15703 imode = SImode;
15704 break;
15705
15706 case E_V8DImode:
15707 case E_V4DImode:
15708 case E_V2DImode:
15709 case E_V8DFmode:
15710 case E_V4DFmode:
15711 case E_V2DFmode:
15712 vec_mode = mode;
15713 imode = DImode;
15714 break;
15715
15716 case E_TImode:
15717 case E_TFmode:
15718 vec_mode = VOIDmode;
15719 imode = TImode;
15720 break;
15721
15722 default:
15723 gcc_unreachable ();
15724 }
15725
15726 machine_mode inner_mode = GET_MODE_INNER (mode);
15727 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
15728 GET_MODE_BITSIZE (inner_mode));
15729 if (invert)
15730 w = wi::bit_not (w);
15731
15732 /* Force this value into the low part of a fp vector constant. */
15733 mask = immed_wide_int_const (w, imode);
15734 mask = gen_lowpart (inner_mode, mask);
15735
15736 if (vec_mode == VOIDmode)
15737 return force_reg (inner_mode, mask);
15738
15739 v = ix86_build_const_vector (vec_mode, vect, mask);
15740 return force_reg (vec_mode, v);
15741 }
15742
15743 /* Return TRUE or FALSE depending on whether the first SET in INSN
15744 has source and destination with matching CC modes, and that the
15745 CC mode is at least as constrained as REQ_MODE. */
15746
15747 bool
15748 ix86_match_ccmode (rtx insn, machine_mode req_mode)
15749 {
15750 rtx set;
15751 machine_mode set_mode;
15752
15753 set = PATTERN (insn);
15754 if (GET_CODE (set) == PARALLEL)
15755 set = XVECEXP (set, 0, 0);
15756 gcc_assert (GET_CODE (set) == SET);
15757 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15758
15759 set_mode = GET_MODE (SET_DEST (set));
15760 switch (set_mode)
15761 {
15762 case E_CCNOmode:
15763 if (req_mode != CCNOmode
15764 && (req_mode != CCmode
15765 || XEXP (SET_SRC (set), 1) != const0_rtx))
15766 return false;
15767 break;
15768 case E_CCmode:
15769 if (req_mode == CCGCmode)
15770 return false;
15771 /* FALLTHRU */
15772 case E_CCGCmode:
15773 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15774 return false;
15775 /* FALLTHRU */
15776 case E_CCGOCmode:
15777 if (req_mode == CCZmode)
15778 return false;
15779 /* FALLTHRU */
15780 case E_CCZmode:
15781 break;
15782
15783 case E_CCGZmode:
15784
15785 case E_CCAmode:
15786 case E_CCCmode:
15787 case E_CCOmode:
15788 case E_CCPmode:
15789 case E_CCSmode:
15790 if (set_mode != req_mode)
15791 return false;
15792 break;
15793
15794 default:
15795 gcc_unreachable ();
15796 }
15797
15798 return GET_MODE (SET_SRC (set)) == set_mode;
15799 }
15800
15801 machine_mode
15802 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15803 {
15804 machine_mode mode = GET_MODE (op0);
15805
15806 if (SCALAR_FLOAT_MODE_P (mode))
15807 {
15808 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15809 return CCFPmode;
15810 }
15811
15812 switch (code)
15813 {
15814 /* Only zero flag is needed. */
15815 case EQ: /* ZF=0 */
15816 case NE: /* ZF!=0 */
15817 return CCZmode;
15818 /* Codes needing carry flag. */
15819 case GEU: /* CF=0 */
15820 case LTU: /* CF=1 */
15821 rtx geu;
15822 /* Detect overflow checks. They need just the carry flag. */
15823 if (GET_CODE (op0) == PLUS
15824 && (rtx_equal_p (op1, XEXP (op0, 0))
15825 || rtx_equal_p (op1, XEXP (op0, 1))))
15826 return CCCmode;
15827 /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
15828 Match LTU of op0
15829 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
15830 and op1
15831 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
15832 where CC_CCC is either CC or CCC. */
15833 else if (code == LTU
15834 && GET_CODE (op0) == NEG
15835 && GET_CODE (geu = XEXP (op0, 0)) == GEU
15836 && REG_P (XEXP (geu, 0))
15837 && (GET_MODE (XEXP (geu, 0)) == CCCmode
15838 || GET_MODE (XEXP (geu, 0)) == CCmode)
15839 && REGNO (XEXP (geu, 0)) == FLAGS_REG
15840 && XEXP (geu, 1) == const0_rtx
15841 && GET_CODE (op1) == LTU
15842 && REG_P (XEXP (op1, 0))
15843 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
15844 && REGNO (XEXP (op1, 0)) == FLAGS_REG
15845 && XEXP (op1, 1) == const0_rtx)
15846 return CCCmode;
15847 else
15848 return CCmode;
15849 case GTU: /* CF=0 & ZF=0 */
15850 case LEU: /* CF=1 | ZF=1 */
15851 return CCmode;
15852 /* Codes possibly doable only with sign flag when
15853 comparing against zero. */
15854 case GE: /* SF=OF or SF=0 */
15855 case LT: /* SF<>OF or SF=1 */
15856 if (op1 == const0_rtx)
15857 return CCGOCmode;
15858 else
15859 /* For other cases Carry flag is not required. */
15860 return CCGCmode;
15861 /* Codes doable only with sign flag when comparing
15862 against zero, but we miss jump instruction for it
15863 so we need to use relational tests against overflow
15864 that thus needs to be zero. */
15865 case GT: /* ZF=0 & SF=OF */
15866 case LE: /* ZF=1 | SF<>OF */
15867 if (op1 == const0_rtx)
15868 return CCNOmode;
15869 else
15870 return CCGCmode;
15871 /* strcmp pattern do (use flags) and combine may ask us for proper
15872 mode. */
15873 case USE:
15874 return CCmode;
15875 default:
15876 gcc_unreachable ();
15877 }
15878 }
15879
15880 /* Return the fixed registers used for condition codes. */
15881
15882 static bool
15883 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15884 {
15885 *p1 = FLAGS_REG;
15886 *p2 = INVALID_REGNUM;
15887 return true;
15888 }
15889
15890 /* If two condition code modes are compatible, return a condition code
15891 mode which is compatible with both. Otherwise, return
15892 VOIDmode. */
15893
15894 static machine_mode
15895 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
15896 {
15897 if (m1 == m2)
15898 return m1;
15899
15900 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15901 return VOIDmode;
15902
15903 if ((m1 == CCGCmode && m2 == CCGOCmode)
15904 || (m1 == CCGOCmode && m2 == CCGCmode))
15905 return CCGCmode;
15906
15907 if ((m1 == CCNOmode && m2 == CCGOCmode)
15908 || (m1 == CCGOCmode && m2 == CCNOmode))
15909 return CCNOmode;
15910
15911 if (m1 == CCZmode
15912 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
15913 return m2;
15914 else if (m2 == CCZmode
15915 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
15916 return m1;
15917
15918 switch (m1)
15919 {
15920 default:
15921 gcc_unreachable ();
15922
15923 case E_CCmode:
15924 case E_CCGCmode:
15925 case E_CCGOCmode:
15926 case E_CCNOmode:
15927 case E_CCAmode:
15928 case E_CCCmode:
15929 case E_CCOmode:
15930 case E_CCPmode:
15931 case E_CCSmode:
15932 case E_CCZmode:
15933 switch (m2)
15934 {
15935 default:
15936 return VOIDmode;
15937
15938 case E_CCmode:
15939 case E_CCGCmode:
15940 case E_CCGOCmode:
15941 case E_CCNOmode:
15942 case E_CCAmode:
15943 case E_CCCmode:
15944 case E_CCOmode:
15945 case E_CCPmode:
15946 case E_CCSmode:
15947 case E_CCZmode:
15948 return CCmode;
15949 }
15950
15951 case E_CCFPmode:
15952 /* These are only compatible with themselves, which we already
15953 checked above. */
15954 return VOIDmode;
15955 }
15956 }
15957
15958 /* Return strategy to use for floating-point. We assume that fcomi is always
15959 preferrable where available, since that is also true when looking at size
15960 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15961
15962 enum ix86_fpcmp_strategy
15963 ix86_fp_comparison_strategy (enum rtx_code)
15964 {
15965 /* Do fcomi/sahf based test when profitable. */
15966
15967 if (TARGET_CMOVE)
15968 return IX86_FPCMP_COMI;
15969
15970 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
15971 return IX86_FPCMP_SAHF;
15972
15973 return IX86_FPCMP_ARITH;
15974 }
15975
15976 /* Convert comparison codes we use to represent FP comparison to integer
15977 code that will result in proper branch. Return UNKNOWN if no such code
15978 is available. */
15979
15980 enum rtx_code
15981 ix86_fp_compare_code_to_integer (enum rtx_code code)
15982 {
15983 switch (code)
15984 {
15985 case GT:
15986 return GTU;
15987 case GE:
15988 return GEU;
15989 case ORDERED:
15990 case UNORDERED:
15991 return code;
15992 case UNEQ:
15993 return EQ;
15994 case UNLT:
15995 return LTU;
15996 case UNLE:
15997 return LEU;
15998 case LTGT:
15999 return NE;
16000 default:
16001 return UNKNOWN;
16002 }
16003 }
16004
16005 /* Zero extend possibly SImode EXP to Pmode register. */
16006 rtx
16007 ix86_zero_extend_to_Pmode (rtx exp)
16008 {
16009 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
16010 }
16011
16012 /* Return true if the function is called via PLT. */
16013
16014 bool
16015 ix86_call_use_plt_p (rtx call_op)
16016 {
16017 if (SYMBOL_REF_LOCAL_P (call_op))
16018 {
16019 if (SYMBOL_REF_DECL (call_op)
16020 && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
16021 {
16022 /* NB: All ifunc functions must be called via PLT. */
16023 cgraph_node *node
16024 = cgraph_node::get (SYMBOL_REF_DECL (call_op));
16025 if (node && node->ifunc_resolver)
16026 return true;
16027 }
16028 return false;
16029 }
16030 return true;
16031 }
16032
16033 /* Return true if the function being called was marked with attribute
16034 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
16035 to handle the non-PIC case in the backend because there is no easy
16036 interface for the front-end to force non-PLT calls to use the GOT.
16037 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
16038 to call the function marked "noplt" indirectly. */
16039
16040 static bool
16041 ix86_nopic_noplt_attribute_p (rtx call_op)
16042 {
16043 if (flag_pic || ix86_cmodel == CM_LARGE
16044 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
16045 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
16046 || SYMBOL_REF_LOCAL_P (call_op))
16047 return false;
16048
16049 tree symbol_decl = SYMBOL_REF_DECL (call_op);
16050
16051 if (!flag_plt
16052 || (symbol_decl != NULL_TREE
16053 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
16054 return true;
16055
16056 return false;
16057 }
16058
16059 /* Helper to output the jmp/call. */
16060 static void
16061 ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
16062 {
16063 if (thunk_name != NULL)
16064 {
16065 if (REX_INT_REGNO_P (regno)
16066 && ix86_indirect_branch_cs_prefix)
16067 fprintf (asm_out_file, "\tcs\n");
16068 fprintf (asm_out_file, "\tjmp\t");
16069 assemble_name (asm_out_file, thunk_name);
16070 putc ('\n', asm_out_file);
16071 if ((ix86_harden_sls & harden_sls_indirect_jmp))
16072 fputs ("\tint3\n", asm_out_file);
16073 }
16074 else
16075 output_indirect_thunk (regno);
16076 }
16077
16078 /* Output indirect branch via a call and return thunk. CALL_OP is a
16079 register which contains the branch target. XASM is the assembly
16080 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
16081 A normal call is converted to:
16082
16083 call __x86_indirect_thunk_reg
16084
16085 and a tail call is converted to:
16086
16087 jmp __x86_indirect_thunk_reg
16088 */
16089
16090 static void
16091 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
16092 {
16093 char thunk_name_buf[32];
16094 char *thunk_name;
16095 enum indirect_thunk_prefix need_prefix
16096 = indirect_thunk_need_prefix (current_output_insn);
16097 int regno = REGNO (call_op);
16098
16099 if (cfun->machine->indirect_branch_type
16100 != indirect_branch_thunk_inline)
16101 {
16102 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
16103 SET_HARD_REG_BIT (indirect_thunks_used, regno);
16104
16105 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
16106 thunk_name = thunk_name_buf;
16107 }
16108 else
16109 thunk_name = NULL;
16110
16111 if (sibcall_p)
16112 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16113 else
16114 {
16115 if (thunk_name != NULL)
16116 {
16117 if (REX_INT_REGNO_P (regno)
16118 && ix86_indirect_branch_cs_prefix)
16119 fprintf (asm_out_file, "\tcs\n");
16120 fprintf (asm_out_file, "\tcall\t");
16121 assemble_name (asm_out_file, thunk_name);
16122 putc ('\n', asm_out_file);
16123 return;
16124 }
16125
16126 char indirectlabel1[32];
16127 char indirectlabel2[32];
16128
16129 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
16130 INDIRECT_LABEL,
16131 indirectlabelno++);
16132 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
16133 INDIRECT_LABEL,
16134 indirectlabelno++);
16135
16136 /* Jump. */
16137 fputs ("\tjmp\t", asm_out_file);
16138 assemble_name_raw (asm_out_file, indirectlabel2);
16139 fputc ('\n', asm_out_file);
16140
16141 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
16142
16143 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16144
16145 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
16146
16147 /* Call. */
16148 fputs ("\tcall\t", asm_out_file);
16149 assemble_name_raw (asm_out_file, indirectlabel1);
16150 fputc ('\n', asm_out_file);
16151 }
16152 }
16153
16154 /* Output indirect branch via a call and return thunk. CALL_OP is
16155 the branch target. XASM is the assembly template for CALL_OP.
16156 Branch is a tail call if SIBCALL_P is true. A normal call is
16157 converted to:
16158
16159 jmp L2
16160 L1:
16161 push CALL_OP
16162 jmp __x86_indirect_thunk
16163 L2:
16164 call L1
16165
16166 and a tail call is converted to:
16167
16168 push CALL_OP
16169 jmp __x86_indirect_thunk
16170 */
16171
16172 static void
16173 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
16174 bool sibcall_p)
16175 {
16176 char thunk_name_buf[32];
16177 char *thunk_name;
16178 char push_buf[64];
16179 enum indirect_thunk_prefix need_prefix
16180 = indirect_thunk_need_prefix (current_output_insn);
16181 int regno = -1;
16182
16183 if (cfun->machine->indirect_branch_type
16184 != indirect_branch_thunk_inline)
16185 {
16186 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
16187 indirect_thunk_needed = true;
16188 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
16189 thunk_name = thunk_name_buf;
16190 }
16191 else
16192 thunk_name = NULL;
16193
16194 snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
16195 TARGET_64BIT ? 'q' : 'l', xasm);
16196
16197 if (sibcall_p)
16198 {
16199 output_asm_insn (push_buf, &call_op);
16200 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16201 }
16202 else
16203 {
16204 char indirectlabel1[32];
16205 char indirectlabel2[32];
16206
16207 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
16208 INDIRECT_LABEL,
16209 indirectlabelno++);
16210 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
16211 INDIRECT_LABEL,
16212 indirectlabelno++);
16213
16214 /* Jump. */
16215 fputs ("\tjmp\t", asm_out_file);
16216 assemble_name_raw (asm_out_file, indirectlabel2);
16217 fputc ('\n', asm_out_file);
16218
16219 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
16220
16221 /* An external function may be called via GOT, instead of PLT. */
16222 if (MEM_P (call_op))
16223 {
16224 struct ix86_address parts;
16225 rtx addr = XEXP (call_op, 0);
16226 if (ix86_decompose_address (addr, &parts)
16227 && parts.base == stack_pointer_rtx)
16228 {
16229 /* Since call will adjust stack by -UNITS_PER_WORD,
16230 we must convert "disp(stack, index, scale)" to
16231 "disp+UNITS_PER_WORD(stack, index, scale)". */
16232 if (parts.index)
16233 {
16234 addr = gen_rtx_MULT (Pmode, parts.index,
16235 GEN_INT (parts.scale));
16236 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16237 addr);
16238 }
16239 else
16240 addr = stack_pointer_rtx;
16241
16242 rtx disp;
16243 if (parts.disp != NULL_RTX)
16244 disp = plus_constant (Pmode, parts.disp,
16245 UNITS_PER_WORD);
16246 else
16247 disp = GEN_INT (UNITS_PER_WORD);
16248
16249 addr = gen_rtx_PLUS (Pmode, addr, disp);
16250 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
16251 }
16252 }
16253
16254 output_asm_insn (push_buf, &call_op);
16255
16256 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16257
16258 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
16259
16260 /* Call. */
16261 fputs ("\tcall\t", asm_out_file);
16262 assemble_name_raw (asm_out_file, indirectlabel1);
16263 fputc ('\n', asm_out_file);
16264 }
16265 }
16266
16267 /* Output indirect branch via a call and return thunk. CALL_OP is
16268 the branch target. XASM is the assembly template for CALL_OP.
16269 Branch is a tail call if SIBCALL_P is true. */
16270
16271 static void
16272 ix86_output_indirect_branch (rtx call_op, const char *xasm,
16273 bool sibcall_p)
16274 {
16275 if (REG_P (call_op))
16276 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
16277 else
16278 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
16279 }
16280
16281 /* Output indirect jump. CALL_OP is the jump target. */
16282
16283 const char *
16284 ix86_output_indirect_jmp (rtx call_op)
16285 {
16286 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
16287 {
16288 /* We can't have red-zone since "call" in the indirect thunk
16289 pushes the return address onto stack, destroying red-zone. */
16290 if (ix86_red_zone_used)
16291 gcc_unreachable ();
16292
16293 ix86_output_indirect_branch (call_op, "%0", true);
16294 }
16295 else
16296 output_asm_insn ("%!jmp\t%A0", &call_op);
16297 return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
16298 }
16299
16300 /* Output return instrumentation for current function if needed. */
16301
16302 static void
16303 output_return_instrumentation (void)
16304 {
16305 if (ix86_instrument_return != instrument_return_none
16306 && flag_fentry
16307 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
16308 {
16309 if (ix86_flag_record_return)
16310 fprintf (asm_out_file, "1:\n");
16311 switch (ix86_instrument_return)
16312 {
16313 case instrument_return_call:
16314 fprintf (asm_out_file, "\tcall\t__return__\n");
16315 break;
16316 case instrument_return_nop5:
16317 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
16318 fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
16319 break;
16320 case instrument_return_none:
16321 break;
16322 }
16323
16324 if (ix86_flag_record_return)
16325 {
16326 fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
16327 fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
16328 fprintf (asm_out_file, "\t.previous\n");
16329 }
16330 }
16331 }
16332
16333 /* Output function return. CALL_OP is the jump target. Add a REP
16334 prefix to RET if LONG_P is true and function return is kept. */
16335
16336 const char *
16337 ix86_output_function_return (bool long_p)
16338 {
16339 output_return_instrumentation ();
16340
16341 if (cfun->machine->function_return_type != indirect_branch_keep)
16342 {
16343 char thunk_name[32];
16344 enum indirect_thunk_prefix need_prefix
16345 = indirect_thunk_need_prefix (current_output_insn);
16346
16347 if (cfun->machine->function_return_type
16348 != indirect_branch_thunk_inline)
16349 {
16350 bool need_thunk = (cfun->machine->function_return_type
16351 == indirect_branch_thunk);
16352 indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
16353 true);
16354 indirect_return_needed |= need_thunk;
16355 fprintf (asm_out_file, "\tjmp\t");
16356 assemble_name (asm_out_file, thunk_name);
16357 putc ('\n', asm_out_file);
16358 }
16359 else
16360 output_indirect_thunk (INVALID_REGNUM);
16361
16362 return "";
16363 }
16364
16365 output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
16366 return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
16367 }
16368
16369 /* Output indirect function return. RET_OP is the function return
16370 target. */
16371
16372 const char *
16373 ix86_output_indirect_function_return (rtx ret_op)
16374 {
16375 if (cfun->machine->function_return_type != indirect_branch_keep)
16376 {
16377 char thunk_name[32];
16378 enum indirect_thunk_prefix need_prefix
16379 = indirect_thunk_need_prefix (current_output_insn);
16380 unsigned int regno = REGNO (ret_op);
16381 gcc_assert (regno == CX_REG);
16382
16383 if (cfun->machine->function_return_type
16384 != indirect_branch_thunk_inline)
16385 {
16386 bool need_thunk = (cfun->machine->function_return_type
16387 == indirect_branch_thunk);
16388 indirect_thunk_name (thunk_name, regno, need_prefix, true);
16389
16390 if (need_thunk)
16391 {
16392 indirect_return_via_cx = true;
16393 SET_HARD_REG_BIT (indirect_thunks_used, CX_REG);
16394 }
16395 fprintf (asm_out_file, "\tjmp\t");
16396 assemble_name (asm_out_file, thunk_name);
16397 putc ('\n', asm_out_file);
16398 }
16399 else
16400 output_indirect_thunk (regno);
16401 }
16402 else
16403 {
16404 output_asm_insn ("%!jmp\t%A0", &ret_op);
16405 if (ix86_harden_sls & harden_sls_indirect_jmp)
16406 fputs ("\tint3\n", asm_out_file);
16407 }
16408 return "";
16409 }
16410
16411 /* Output the assembly for a call instruction. */
16412
16413 const char *
16414 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
16415 {
16416 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
16417 bool output_indirect_p
16418 = (!TARGET_SEH
16419 && cfun->machine->indirect_branch_type != indirect_branch_keep);
16420 bool seh_nop_p = false;
16421 const char *xasm;
16422
16423 if (SIBLING_CALL_P (insn))
16424 {
16425 output_return_instrumentation ();
16426 if (direct_p)
16427 {
16428 if (ix86_nopic_noplt_attribute_p (call_op))
16429 {
16430 direct_p = false;
16431 if (TARGET_64BIT)
16432 {
16433 if (output_indirect_p)
16434 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16435 else
16436 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16437 }
16438 else
16439 {
16440 if (output_indirect_p)
16441 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16442 else
16443 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16444 }
16445 }
16446 else
16447 xasm = "%!jmp\t%P0";
16448 }
16449 /* SEH epilogue detection requires the indirect branch case
16450 to include REX.W. */
16451 else if (TARGET_SEH)
16452 xasm = "%!rex.W jmp\t%A0";
16453 else
16454 {
16455 if (output_indirect_p)
16456 xasm = "%0";
16457 else
16458 xasm = "%!jmp\t%A0";
16459 }
16460
16461 if (output_indirect_p && !direct_p)
16462 ix86_output_indirect_branch (call_op, xasm, true);
16463 else
16464 {
16465 output_asm_insn (xasm, &call_op);
16466 if (!direct_p
16467 && (ix86_harden_sls & harden_sls_indirect_jmp))
16468 return "int3";
16469 }
16470 return "";
16471 }
16472
16473 /* SEH unwinding can require an extra nop to be emitted in several
16474 circumstances. Determine if we have one of those. */
16475 if (TARGET_SEH)
16476 {
16477 rtx_insn *i;
16478
16479 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
16480 {
16481 /* Prevent a catch region from being adjacent to a jump that would
16482 be interpreted as an epilogue sequence by the unwinder. */
16483 if (JUMP_P(i) && CROSSING_JUMP_P (i))
16484 {
16485 seh_nop_p = true;
16486 break;
16487 }
16488
16489 /* If we get to another real insn, we don't need the nop. */
16490 if (INSN_P (i))
16491 break;
16492
16493 /* If we get to the epilogue note, prevent a catch region from
16494 being adjacent to the standard epilogue sequence. Note that,
16495 if non-call exceptions are enabled, we already did it during
16496 epilogue expansion, or else, if the insn can throw internally,
16497 we already did it during the reorg pass. */
16498 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
16499 && !flag_non_call_exceptions
16500 && !can_throw_internal (insn))
16501 {
16502 seh_nop_p = true;
16503 break;
16504 }
16505 }
16506
16507 /* If we didn't find a real insn following the call, prevent the
16508 unwinder from looking into the next function. */
16509 if (i == NULL)
16510 seh_nop_p = true;
16511 }
16512
16513 if (direct_p)
16514 {
16515 if (ix86_nopic_noplt_attribute_p (call_op))
16516 {
16517 direct_p = false;
16518 if (TARGET_64BIT)
16519 {
16520 if (output_indirect_p)
16521 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16522 else
16523 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16524 }
16525 else
16526 {
16527 if (output_indirect_p)
16528 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16529 else
16530 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16531 }
16532 }
16533 else
16534 xasm = "%!call\t%P0";
16535 }
16536 else
16537 {
16538 if (output_indirect_p)
16539 xasm = "%0";
16540 else
16541 xasm = "%!call\t%A0";
16542 }
16543
16544 if (output_indirect_p && !direct_p)
16545 ix86_output_indirect_branch (call_op, xasm, false);
16546 else
16547 output_asm_insn (xasm, &call_op);
16548
16549 if (seh_nop_p)
16550 return "nop";
16551
16552 return "";
16553 }
16554 \f
16555 /* Return a MEM corresponding to a stack slot with mode MODE.
16556 Allocate a new slot if necessary.
16557
16558 The RTL for a function can have several slots available: N is
16559 which slot to use. */
16560
16561 rtx
16562 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
16563 {
16564 struct stack_local_entry *s;
16565
16566 gcc_assert (n < MAX_386_STACK_LOCALS);
16567
16568 for (s = ix86_stack_locals; s; s = s->next)
16569 if (s->mode == mode && s->n == n)
16570 return validize_mem (copy_rtx (s->rtl));
16571
16572 s = ggc_alloc<stack_local_entry> ();
16573 s->n = n;
16574 s->mode = mode;
16575 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16576
16577 s->next = ix86_stack_locals;
16578 ix86_stack_locals = s;
16579 return validize_mem (copy_rtx (s->rtl));
16580 }
16581
16582 static void
16583 ix86_instantiate_decls (void)
16584 {
16585 struct stack_local_entry *s;
16586
16587 for (s = ix86_stack_locals; s; s = s->next)
16588 if (s->rtl != NULL_RTX)
16589 instantiate_decl_rtl (s->rtl);
16590 }
16591 \f
16592 /* Check whether x86 address PARTS is a pc-relative address. */
16593
16594 bool
16595 ix86_rip_relative_addr_p (struct ix86_address *parts)
16596 {
16597 rtx base, index, disp;
16598
16599 base = parts->base;
16600 index = parts->index;
16601 disp = parts->disp;
16602
16603 if (disp && !base && !index)
16604 {
16605 if (TARGET_64BIT)
16606 {
16607 rtx symbol = disp;
16608
16609 if (GET_CODE (disp) == CONST)
16610 symbol = XEXP (disp, 0);
16611 if (GET_CODE (symbol) == PLUS
16612 && CONST_INT_P (XEXP (symbol, 1)))
16613 symbol = XEXP (symbol, 0);
16614
16615 if (GET_CODE (symbol) == LABEL_REF
16616 || (GET_CODE (symbol) == SYMBOL_REF
16617 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
16618 || (GET_CODE (symbol) == UNSPEC
16619 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
16620 || XINT (symbol, 1) == UNSPEC_PCREL
16621 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
16622 return true;
16623 }
16624 }
16625 return false;
16626 }
16627
16628 /* Calculate the length of the memory address in the instruction encoding.
16629 Includes addr32 prefix, does not include the one-byte modrm, opcode,
16630 or other prefixes. We never generate addr32 prefix for LEA insn. */
16631
16632 int
16633 memory_address_length (rtx addr, bool lea)
16634 {
16635 struct ix86_address parts;
16636 rtx base, index, disp;
16637 int len;
16638 int ok;
16639
16640 if (GET_CODE (addr) == PRE_DEC
16641 || GET_CODE (addr) == POST_INC
16642 || GET_CODE (addr) == PRE_MODIFY
16643 || GET_CODE (addr) == POST_MODIFY)
16644 return 0;
16645
16646 ok = ix86_decompose_address (addr, &parts);
16647 gcc_assert (ok);
16648
16649 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
16650
16651 /* If this is not LEA instruction, add the length of addr32 prefix. */
16652 if (TARGET_64BIT && !lea
16653 && (SImode_address_operand (addr, VOIDmode)
16654 || (parts.base && GET_MODE (parts.base) == SImode)
16655 || (parts.index && GET_MODE (parts.index) == SImode)))
16656 len++;
16657
16658 base = parts.base;
16659 index = parts.index;
16660 disp = parts.disp;
16661
16662 if (base && SUBREG_P (base))
16663 base = SUBREG_REG (base);
16664 if (index && SUBREG_P (index))
16665 index = SUBREG_REG (index);
16666
16667 gcc_assert (base == NULL_RTX || REG_P (base));
16668 gcc_assert (index == NULL_RTX || REG_P (index));
16669
16670 /* Rule of thumb:
16671 - esp as the base always wants an index,
16672 - ebp as the base always wants a displacement,
16673 - r12 as the base always wants an index,
16674 - r13 as the base always wants a displacement. */
16675
16676 /* Register Indirect. */
16677 if (base && !index && !disp)
16678 {
16679 /* esp (for its index) and ebp (for its displacement) need
16680 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
16681 code. */
16682 if (base == arg_pointer_rtx
16683 || base == frame_pointer_rtx
16684 || REGNO (base) == SP_REG
16685 || REGNO (base) == BP_REG
16686 || REGNO (base) == R12_REG
16687 || REGNO (base) == R13_REG)
16688 len++;
16689 }
16690
16691 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
16692 is not disp32, but disp32(%rip), so for disp32
16693 SIB byte is needed, unless print_operand_address
16694 optimizes it into disp32(%rip) or (%rip) is implied
16695 by UNSPEC. */
16696 else if (disp && !base && !index)
16697 {
16698 len += 4;
16699 if (!ix86_rip_relative_addr_p (&parts))
16700 len++;
16701 }
16702 else
16703 {
16704 /* Find the length of the displacement constant. */
16705 if (disp)
16706 {
16707 if (base && satisfies_constraint_K (disp))
16708 len += 1;
16709 else
16710 len += 4;
16711 }
16712 /* ebp always wants a displacement. Similarly r13. */
16713 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
16714 len++;
16715
16716 /* An index requires the two-byte modrm form.... */
16717 if (index
16718 /* ...like esp (or r12), which always wants an index. */
16719 || base == arg_pointer_rtx
16720 || base == frame_pointer_rtx
16721 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
16722 len++;
16723 }
16724
16725 return len;
16726 }
16727
16728 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16729 is set, expect that insn have 8bit immediate alternative. */
16730 int
16731 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
16732 {
16733 int len = 0;
16734 int i;
16735 extract_insn_cached (insn);
16736 for (i = recog_data.n_operands - 1; i >= 0; --i)
16737 if (CONSTANT_P (recog_data.operand[i]))
16738 {
16739 enum attr_mode mode = get_attr_mode (insn);
16740
16741 gcc_assert (!len);
16742 if (shortform && CONST_INT_P (recog_data.operand[i]))
16743 {
16744 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
16745 switch (mode)
16746 {
16747 case MODE_QI:
16748 len = 1;
16749 continue;
16750 case MODE_HI:
16751 ival = trunc_int_for_mode (ival, HImode);
16752 break;
16753 case MODE_SI:
16754 ival = trunc_int_for_mode (ival, SImode);
16755 break;
16756 default:
16757 break;
16758 }
16759 if (IN_RANGE (ival, -128, 127))
16760 {
16761 len = 1;
16762 continue;
16763 }
16764 }
16765 switch (mode)
16766 {
16767 case MODE_QI:
16768 len = 1;
16769 break;
16770 case MODE_HI:
16771 len = 2;
16772 break;
16773 case MODE_SI:
16774 len = 4;
16775 break;
16776 /* Immediates for DImode instructions are encoded
16777 as 32bit sign extended values. */
16778 case MODE_DI:
16779 len = 4;
16780 break;
16781 default:
16782 fatal_insn ("unknown insn mode", insn);
16783 }
16784 }
16785 return len;
16786 }
16787
16788 /* Compute default value for "length_address" attribute. */
16789 int
16790 ix86_attr_length_address_default (rtx_insn *insn)
16791 {
16792 int i;
16793
16794 if (get_attr_type (insn) == TYPE_LEA)
16795 {
16796 rtx set = PATTERN (insn), addr;
16797
16798 if (GET_CODE (set) == PARALLEL)
16799 set = XVECEXP (set, 0, 0);
16800
16801 gcc_assert (GET_CODE (set) == SET);
16802
16803 addr = SET_SRC (set);
16804
16805 return memory_address_length (addr, true);
16806 }
16807
16808 extract_insn_cached (insn);
16809 for (i = recog_data.n_operands - 1; i >= 0; --i)
16810 {
16811 rtx op = recog_data.operand[i];
16812 if (MEM_P (op))
16813 {
16814 constrain_operands_cached (insn, reload_completed);
16815 if (which_alternative != -1)
16816 {
16817 const char *constraints = recog_data.constraints[i];
16818 int alt = which_alternative;
16819
16820 while (*constraints == '=' || *constraints == '+')
16821 constraints++;
16822 while (alt-- > 0)
16823 while (*constraints++ != ',')
16824 ;
16825 /* Skip ignored operands. */
16826 if (*constraints == 'X')
16827 continue;
16828 }
16829
16830 int len = memory_address_length (XEXP (op, 0), false);
16831
16832 /* Account for segment prefix for non-default addr spaces. */
16833 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
16834 len++;
16835
16836 return len;
16837 }
16838 }
16839 return 0;
16840 }
16841
16842 /* Compute default value for "length_vex" attribute. It includes
16843 2 or 3 byte VEX prefix and 1 opcode byte. */
16844
16845 int
16846 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
16847 bool has_vex_w)
16848 {
16849 int i;
16850
16851 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
16852 byte VEX prefix. */
16853 if (!has_0f_opcode || has_vex_w)
16854 return 3 + 1;
16855
16856 /* We can always use 2 byte VEX prefix in 32bit. */
16857 if (!TARGET_64BIT)
16858 return 2 + 1;
16859
16860 extract_insn_cached (insn);
16861
16862 for (i = recog_data.n_operands - 1; i >= 0; --i)
16863 if (REG_P (recog_data.operand[i]))
16864 {
16865 /* REX.W bit uses 3 byte VEX prefix. */
16866 if (GET_MODE (recog_data.operand[i]) == DImode
16867 && GENERAL_REG_P (recog_data.operand[i]))
16868 return 3 + 1;
16869 }
16870 else
16871 {
16872 /* REX.X or REX.B bits use 3 byte VEX prefix. */
16873 if (MEM_P (recog_data.operand[i])
16874 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
16875 return 3 + 1;
16876 }
16877
16878 return 2 + 1;
16879 }
16880 \f
16881
16882 static bool
16883 ix86_class_likely_spilled_p (reg_class_t);
16884
16885 /* Returns true if lhs of insn is HW function argument register and set up
16886 is_spilled to true if it is likely spilled HW register. */
16887 static bool
16888 insn_is_function_arg (rtx insn, bool* is_spilled)
16889 {
16890 rtx dst;
16891
16892 if (!NONDEBUG_INSN_P (insn))
16893 return false;
16894 /* Call instructions are not movable, ignore it. */
16895 if (CALL_P (insn))
16896 return false;
16897 insn = PATTERN (insn);
16898 if (GET_CODE (insn) == PARALLEL)
16899 insn = XVECEXP (insn, 0, 0);
16900 if (GET_CODE (insn) != SET)
16901 return false;
16902 dst = SET_DEST (insn);
16903 if (REG_P (dst) && HARD_REGISTER_P (dst)
16904 && ix86_function_arg_regno_p (REGNO (dst)))
16905 {
16906 /* Is it likely spilled HW register? */
16907 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
16908 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
16909 *is_spilled = true;
16910 return true;
16911 }
16912 return false;
16913 }
16914
16915 /* Add output dependencies for chain of function adjacent arguments if only
16916 there is a move to likely spilled HW register. Return first argument
16917 if at least one dependence was added or NULL otherwise. */
16918 static rtx_insn *
16919 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
16920 {
16921 rtx_insn *insn;
16922 rtx_insn *last = call;
16923 rtx_insn *first_arg = NULL;
16924 bool is_spilled = false;
16925
16926 head = PREV_INSN (head);
16927
16928 /* Find nearest to call argument passing instruction. */
16929 while (true)
16930 {
16931 last = PREV_INSN (last);
16932 if (last == head)
16933 return NULL;
16934 if (!NONDEBUG_INSN_P (last))
16935 continue;
16936 if (insn_is_function_arg (last, &is_spilled))
16937 break;
16938 return NULL;
16939 }
16940
16941 first_arg = last;
16942 while (true)
16943 {
16944 insn = PREV_INSN (last);
16945 if (!INSN_P (insn))
16946 break;
16947 if (insn == head)
16948 break;
16949 if (!NONDEBUG_INSN_P (insn))
16950 {
16951 last = insn;
16952 continue;
16953 }
16954 if (insn_is_function_arg (insn, &is_spilled))
16955 {
16956 /* Add output depdendence between two function arguments if chain
16957 of output arguments contains likely spilled HW registers. */
16958 if (is_spilled)
16959 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16960 first_arg = last = insn;
16961 }
16962 else
16963 break;
16964 }
16965 if (!is_spilled)
16966 return NULL;
16967 return first_arg;
16968 }
16969
16970 /* Add output or anti dependency from insn to first_arg to restrict its code
16971 motion. */
16972 static void
16973 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
16974 {
16975 rtx set;
16976 rtx tmp;
16977
16978 set = single_set (insn);
16979 if (!set)
16980 return;
16981 tmp = SET_DEST (set);
16982 if (REG_P (tmp))
16983 {
16984 /* Add output dependency to the first function argument. */
16985 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16986 return;
16987 }
16988 /* Add anti dependency. */
16989 add_dependence (first_arg, insn, REG_DEP_ANTI);
16990 }
16991
16992 /* Avoid cross block motion of function argument through adding dependency
16993 from the first non-jump instruction in bb. */
16994 static void
16995 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
16996 {
16997 rtx_insn *insn = BB_END (bb);
16998
16999 while (insn)
17000 {
17001 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
17002 {
17003 rtx set = single_set (insn);
17004 if (set)
17005 {
17006 avoid_func_arg_motion (arg, insn);
17007 return;
17008 }
17009 }
17010 if (insn == BB_HEAD (bb))
17011 return;
17012 insn = PREV_INSN (insn);
17013 }
17014 }
17015
17016 /* Hook for pre-reload schedule - avoid motion of function arguments
17017 passed in likely spilled HW registers. */
17018 static void
17019 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
17020 {
17021 rtx_insn *insn;
17022 rtx_insn *first_arg = NULL;
17023 if (reload_completed)
17024 return;
17025 while (head != tail && DEBUG_INSN_P (head))
17026 head = NEXT_INSN (head);
17027 for (insn = tail; insn != head; insn = PREV_INSN (insn))
17028 if (INSN_P (insn) && CALL_P (insn))
17029 {
17030 first_arg = add_parameter_dependencies (insn, head);
17031 if (first_arg)
17032 {
17033 /* Add dependee for first argument to predecessors if only
17034 region contains more than one block. */
17035 basic_block bb = BLOCK_FOR_INSN (insn);
17036 int rgn = CONTAINING_RGN (bb->index);
17037 int nr_blks = RGN_NR_BLOCKS (rgn);
17038 /* Skip trivial regions and region head blocks that can have
17039 predecessors outside of region. */
17040 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
17041 {
17042 edge e;
17043 edge_iterator ei;
17044
17045 /* Regions are SCCs with the exception of selective
17046 scheduling with pipelining of outer blocks enabled.
17047 So also check that immediate predecessors of a non-head
17048 block are in the same region. */
17049 FOR_EACH_EDGE (e, ei, bb->preds)
17050 {
17051 /* Avoid creating of loop-carried dependencies through
17052 using topological ordering in the region. */
17053 if (rgn == CONTAINING_RGN (e->src->index)
17054 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
17055 add_dependee_for_func_arg (first_arg, e->src);
17056 }
17057 }
17058 insn = first_arg;
17059 if (insn == head)
17060 break;
17061 }
17062 }
17063 else if (first_arg)
17064 avoid_func_arg_motion (first_arg, insn);
17065 }
17066
17067 /* Hook for pre-reload schedule - set priority of moves from likely spilled
17068 HW registers to maximum, to schedule them at soon as possible. These are
17069 moves from function argument registers at the top of the function entry
17070 and moves from function return value registers after call. */
17071 static int
17072 ix86_adjust_priority (rtx_insn *insn, int priority)
17073 {
17074 rtx set;
17075
17076 if (reload_completed)
17077 return priority;
17078
17079 if (!NONDEBUG_INSN_P (insn))
17080 return priority;
17081
17082 set = single_set (insn);
17083 if (set)
17084 {
17085 rtx tmp = SET_SRC (set);
17086 if (REG_P (tmp)
17087 && HARD_REGISTER_P (tmp)
17088 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
17089 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
17090 return current_sched_info->sched_max_insns_priority;
17091 }
17092
17093 return priority;
17094 }
17095
17096 /* Prepare for scheduling pass. */
17097 static void
17098 ix86_sched_init_global (FILE *, int, int)
17099 {
17100 /* Install scheduling hooks for current CPU. Some of these hooks are used
17101 in time-critical parts of the scheduler, so we only set them up when
17102 they are actually used. */
17103 switch (ix86_tune)
17104 {
17105 case PROCESSOR_CORE2:
17106 case PROCESSOR_NEHALEM:
17107 case PROCESSOR_SANDYBRIDGE:
17108 case PROCESSOR_HASWELL:
17109 case PROCESSOR_TREMONT:
17110 case PROCESSOR_ALDERLAKE:
17111 case PROCESSOR_GENERIC:
17112 /* Do not perform multipass scheduling for pre-reload schedule
17113 to save compile time. */
17114 if (reload_completed)
17115 {
17116 ix86_core2i7_init_hooks ();
17117 break;
17118 }
17119 /* Fall through. */
17120 default:
17121 targetm.sched.dfa_post_advance_cycle = NULL;
17122 targetm.sched.first_cycle_multipass_init = NULL;
17123 targetm.sched.first_cycle_multipass_begin = NULL;
17124 targetm.sched.first_cycle_multipass_issue = NULL;
17125 targetm.sched.first_cycle_multipass_backtrack = NULL;
17126 targetm.sched.first_cycle_multipass_end = NULL;
17127 targetm.sched.first_cycle_multipass_fini = NULL;
17128 break;
17129 }
17130 }
17131
17132 \f
17133 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
17134
17135 static HOST_WIDE_INT
17136 ix86_static_rtx_alignment (machine_mode mode)
17137 {
17138 if (mode == DFmode)
17139 return 64;
17140 if (ALIGN_MODE_128 (mode))
17141 return MAX (128, GET_MODE_ALIGNMENT (mode));
17142 return GET_MODE_ALIGNMENT (mode);
17143 }
17144
17145 /* Implement TARGET_CONSTANT_ALIGNMENT. */
17146
17147 static HOST_WIDE_INT
17148 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
17149 {
17150 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
17151 || TREE_CODE (exp) == INTEGER_CST)
17152 {
17153 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
17154 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
17155 return MAX (mode_align, align);
17156 }
17157 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
17158 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
17159 return BITS_PER_WORD;
17160
17161 return align;
17162 }
17163
17164 /* Implement TARGET_EMPTY_RECORD_P. */
17165
17166 static bool
17167 ix86_is_empty_record (const_tree type)
17168 {
17169 if (!TARGET_64BIT)
17170 return false;
17171 return default_is_empty_record (type);
17172 }
17173
17174 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
17175
17176 static void
17177 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
17178 {
17179 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
17180
17181 if (!cum->warn_empty)
17182 return;
17183
17184 if (!TYPE_EMPTY_P (type))
17185 return;
17186
17187 /* Don't warn if the function isn't visible outside of the TU. */
17188 if (cum->decl && !TREE_PUBLIC (cum->decl))
17189 return;
17190
17191 const_tree ctx = get_ultimate_context (cum->decl);
17192 if (ctx != NULL_TREE
17193 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
17194 return;
17195
17196 /* If the actual size of the type is zero, then there is no change
17197 in how objects of this size are passed. */
17198 if (int_size_in_bytes (type) == 0)
17199 return;
17200
17201 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
17202 "changes in %<-fabi-version=12%> (GCC 8)", type);
17203
17204 /* Only warn once. */
17205 cum->warn_empty = false;
17206 }
17207
17208 /* This hook returns name of multilib ABI. */
17209
17210 static const char *
17211 ix86_get_multilib_abi_name (void)
17212 {
17213 if (!(TARGET_64BIT_P (ix86_isa_flags)))
17214 return "i386";
17215 else if (TARGET_X32_P (ix86_isa_flags))
17216 return "x32";
17217 else
17218 return "x86_64";
17219 }
17220
17221 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
17222 the data type, and ALIGN is the alignment that the object would
17223 ordinarily have. */
17224
17225 static int
17226 iamcu_alignment (tree type, int align)
17227 {
17228 machine_mode mode;
17229
17230 if (align < 32 || TYPE_USER_ALIGN (type))
17231 return align;
17232
17233 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
17234 bytes. */
17235 type = strip_array_types (type);
17236 if (TYPE_ATOMIC (type))
17237 return align;
17238
17239 mode = TYPE_MODE (type);
17240 switch (GET_MODE_CLASS (mode))
17241 {
17242 case MODE_INT:
17243 case MODE_COMPLEX_INT:
17244 case MODE_COMPLEX_FLOAT:
17245 case MODE_FLOAT:
17246 case MODE_DECIMAL_FLOAT:
17247 return 32;
17248 default:
17249 return align;
17250 }
17251 }
17252
17253 /* Compute the alignment for a static variable.
17254 TYPE is the data type, and ALIGN is the alignment that
17255 the object would ordinarily have. The value of this function is used
17256 instead of that alignment to align the object. */
17257
17258 int
17259 ix86_data_alignment (tree type, unsigned int align, bool opt)
17260 {
17261 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
17262 for symbols from other compilation units or symbols that don't need
17263 to bind locally. In order to preserve some ABI compatibility with
17264 those compilers, ensure we don't decrease alignment from what we
17265 used to assume. */
17266
17267 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
17268
17269 /* A data structure, equal or greater than the size of a cache line
17270 (64 bytes in the Pentium 4 and other recent Intel processors, including
17271 processors based on Intel Core microarchitecture) should be aligned
17272 so that its base address is a multiple of a cache line size. */
17273
17274 unsigned int max_align
17275 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
17276
17277 if (max_align < BITS_PER_WORD)
17278 max_align = BITS_PER_WORD;
17279
17280 switch (ix86_align_data_type)
17281 {
17282 case ix86_align_data_type_abi: opt = false; break;
17283 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
17284 case ix86_align_data_type_cacheline: break;
17285 }
17286
17287 if (TARGET_IAMCU)
17288 align = iamcu_alignment (type, align);
17289
17290 if (opt
17291 && AGGREGATE_TYPE_P (type)
17292 && TYPE_SIZE (type)
17293 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
17294 {
17295 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
17296 && align < max_align_compat)
17297 align = max_align_compat;
17298 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
17299 && align < max_align)
17300 align = max_align;
17301 }
17302
17303 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17304 to 16byte boundary. */
17305 if (TARGET_64BIT)
17306 {
17307 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
17308 && TYPE_SIZE (type)
17309 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17310 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
17311 && align < 128)
17312 return 128;
17313 }
17314
17315 if (!opt)
17316 return align;
17317
17318 if (TREE_CODE (type) == ARRAY_TYPE)
17319 {
17320 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17321 return 64;
17322 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17323 return 128;
17324 }
17325 else if (TREE_CODE (type) == COMPLEX_TYPE)
17326 {
17327
17328 if (TYPE_MODE (type) == DCmode && align < 64)
17329 return 64;
17330 if ((TYPE_MODE (type) == XCmode
17331 || TYPE_MODE (type) == TCmode) && align < 128)
17332 return 128;
17333 }
17334 else if ((TREE_CODE (type) == RECORD_TYPE
17335 || TREE_CODE (type) == UNION_TYPE
17336 || TREE_CODE (type) == QUAL_UNION_TYPE)
17337 && TYPE_FIELDS (type))
17338 {
17339 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17340 return 64;
17341 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17342 return 128;
17343 }
17344 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17345 || TREE_CODE (type) == INTEGER_TYPE)
17346 {
17347 if (TYPE_MODE (type) == DFmode && align < 64)
17348 return 64;
17349 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17350 return 128;
17351 }
17352
17353 return align;
17354 }
17355
17356 /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
17357 static void
17358 ix86_lower_local_decl_alignment (tree decl)
17359 {
17360 unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
17361 DECL_ALIGN (decl), true);
17362 if (new_align < DECL_ALIGN (decl))
17363 SET_DECL_ALIGN (decl, new_align);
17364 }
17365
17366 /* Compute the alignment for a local variable or a stack slot. EXP is
17367 the data type or decl itself, MODE is the widest mode available and
17368 ALIGN is the alignment that the object would ordinarily have. The
17369 value of this macro is used instead of that alignment to align the
17370 object. */
17371
17372 unsigned int
17373 ix86_local_alignment (tree exp, machine_mode mode,
17374 unsigned int align, bool may_lower)
17375 {
17376 tree type, decl;
17377
17378 if (exp && DECL_P (exp))
17379 {
17380 type = TREE_TYPE (exp);
17381 decl = exp;
17382 }
17383 else
17384 {
17385 type = exp;
17386 decl = NULL;
17387 }
17388
17389 /* Don't do dynamic stack realignment for long long objects with
17390 -mpreferred-stack-boundary=2. */
17391 if (may_lower
17392 && !TARGET_64BIT
17393 && align == 64
17394 && ix86_preferred_stack_boundary < 64
17395 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
17396 && (!type || (!TYPE_USER_ALIGN (type)
17397 && !TYPE_ATOMIC (strip_array_types (type))))
17398 && (!decl || !DECL_USER_ALIGN (decl)))
17399 align = 32;
17400
17401 /* If TYPE is NULL, we are allocating a stack slot for caller-save
17402 register in MODE. We will return the largest alignment of XF
17403 and DF. */
17404 if (!type)
17405 {
17406 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
17407 align = GET_MODE_ALIGNMENT (DFmode);
17408 return align;
17409 }
17410
17411 /* Don't increase alignment for Intel MCU psABI. */
17412 if (TARGET_IAMCU)
17413 return align;
17414
17415 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17416 to 16byte boundary. Exact wording is:
17417
17418 An array uses the same alignment as its elements, except that a local or
17419 global array variable of length at least 16 bytes or
17420 a C99 variable-length array variable always has alignment of at least 16 bytes.
17421
17422 This was added to allow use of aligned SSE instructions at arrays. This
17423 rule is meant for static storage (where compiler cannot do the analysis
17424 by itself). We follow it for automatic variables only when convenient.
17425 We fully control everything in the function compiled and functions from
17426 other unit cannot rely on the alignment.
17427
17428 Exclude va_list type. It is the common case of local array where
17429 we cannot benefit from the alignment.
17430
17431 TODO: Probably one should optimize for size only when var is not escaping. */
17432 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
17433 && TARGET_SSE)
17434 {
17435 if (AGGREGATE_TYPE_P (type)
17436 && (va_list_type_node == NULL_TREE
17437 || (TYPE_MAIN_VARIANT (type)
17438 != TYPE_MAIN_VARIANT (va_list_type_node)))
17439 && TYPE_SIZE (type)
17440 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17441 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
17442 && align < 128)
17443 return 128;
17444 }
17445 if (TREE_CODE (type) == ARRAY_TYPE)
17446 {
17447 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17448 return 64;
17449 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17450 return 128;
17451 }
17452 else if (TREE_CODE (type) == COMPLEX_TYPE)
17453 {
17454 if (TYPE_MODE (type) == DCmode && align < 64)
17455 return 64;
17456 if ((TYPE_MODE (type) == XCmode
17457 || TYPE_MODE (type) == TCmode) && align < 128)
17458 return 128;
17459 }
17460 else if ((TREE_CODE (type) == RECORD_TYPE
17461 || TREE_CODE (type) == UNION_TYPE
17462 || TREE_CODE (type) == QUAL_UNION_TYPE)
17463 && TYPE_FIELDS (type))
17464 {
17465 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17466 return 64;
17467 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17468 return 128;
17469 }
17470 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17471 || TREE_CODE (type) == INTEGER_TYPE)
17472 {
17473
17474 if (TYPE_MODE (type) == DFmode && align < 64)
17475 return 64;
17476 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17477 return 128;
17478 }
17479 return align;
17480 }
17481
17482 /* Compute the minimum required alignment for dynamic stack realignment
17483 purposes for a local variable, parameter or a stack slot. EXP is
17484 the data type or decl itself, MODE is its mode and ALIGN is the
17485 alignment that the object would ordinarily have. */
17486
17487 unsigned int
17488 ix86_minimum_alignment (tree exp, machine_mode mode,
17489 unsigned int align)
17490 {
17491 tree type, decl;
17492
17493 if (exp && DECL_P (exp))
17494 {
17495 type = TREE_TYPE (exp);
17496 decl = exp;
17497 }
17498 else
17499 {
17500 type = exp;
17501 decl = NULL;
17502 }
17503
17504 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
17505 return align;
17506
17507 /* Don't do dynamic stack realignment for long long objects with
17508 -mpreferred-stack-boundary=2. */
17509 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
17510 && (!type || (!TYPE_USER_ALIGN (type)
17511 && !TYPE_ATOMIC (strip_array_types (type))))
17512 && (!decl || !DECL_USER_ALIGN (decl)))
17513 {
17514 gcc_checking_assert (!TARGET_STV);
17515 return 32;
17516 }
17517
17518 return align;
17519 }
17520 \f
17521 /* Find a location for the static chain incoming to a nested function.
17522 This is a register, unless all free registers are used by arguments. */
17523
17524 static rtx
17525 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
17526 {
17527 unsigned regno;
17528
17529 if (TARGET_64BIT)
17530 {
17531 /* We always use R10 in 64-bit mode. */
17532 regno = R10_REG;
17533 }
17534 else
17535 {
17536 const_tree fntype, fndecl;
17537 unsigned int ccvt;
17538
17539 /* By default in 32-bit mode we use ECX to pass the static chain. */
17540 regno = CX_REG;
17541
17542 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
17543 {
17544 fntype = TREE_TYPE (fndecl_or_type);
17545 fndecl = fndecl_or_type;
17546 }
17547 else
17548 {
17549 fntype = fndecl_or_type;
17550 fndecl = NULL;
17551 }
17552
17553 ccvt = ix86_get_callcvt (fntype);
17554 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
17555 {
17556 /* Fastcall functions use ecx/edx for arguments, which leaves
17557 us with EAX for the static chain.
17558 Thiscall functions use ecx for arguments, which also
17559 leaves us with EAX for the static chain. */
17560 regno = AX_REG;
17561 }
17562 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
17563 {
17564 /* Thiscall functions use ecx for arguments, which leaves
17565 us with EAX and EDX for the static chain.
17566 We are using for abi-compatibility EAX. */
17567 regno = AX_REG;
17568 }
17569 else if (ix86_function_regparm (fntype, fndecl) == 3)
17570 {
17571 /* For regparm 3, we have no free call-clobbered registers in
17572 which to store the static chain. In order to implement this,
17573 we have the trampoline push the static chain to the stack.
17574 However, we can't push a value below the return address when
17575 we call the nested function directly, so we have to use an
17576 alternate entry point. For this we use ESI, and have the
17577 alternate entry point push ESI, so that things appear the
17578 same once we're executing the nested function. */
17579 if (incoming_p)
17580 {
17581 if (fndecl == current_function_decl
17582 && !ix86_static_chain_on_stack)
17583 {
17584 gcc_assert (!reload_completed);
17585 ix86_static_chain_on_stack = true;
17586 }
17587 return gen_frame_mem (SImode,
17588 plus_constant (Pmode,
17589 arg_pointer_rtx, -8));
17590 }
17591 regno = SI_REG;
17592 }
17593 }
17594
17595 return gen_rtx_REG (Pmode, regno);
17596 }
17597
17598 /* Emit RTL insns to initialize the variable parts of a trampoline.
17599 FNDECL is the decl of the target address; M_TRAMP is a MEM for
17600 the trampoline, and CHAIN_VALUE is an RTX for the static chain
17601 to be passed to the target function. */
17602
17603 static void
17604 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
17605 {
17606 rtx mem, fnaddr;
17607 int opcode;
17608 int offset = 0;
17609 bool need_endbr = (flag_cf_protection & CF_BRANCH);
17610
17611 fnaddr = XEXP (DECL_RTL (fndecl), 0);
17612
17613 if (TARGET_64BIT)
17614 {
17615 int size;
17616
17617 if (need_endbr)
17618 {
17619 /* Insert ENDBR64. */
17620 mem = adjust_address (m_tramp, SImode, offset);
17621 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
17622 offset += 4;
17623 }
17624
17625 /* Load the function address to r11. Try to load address using
17626 the shorter movl instead of movabs. We may want to support
17627 movq for kernel mode, but kernel does not use trampolines at
17628 the moment. FNADDR is a 32bit address and may not be in
17629 DImode when ptr_mode == SImode. Always use movl in this
17630 case. */
17631 if (ptr_mode == SImode
17632 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17633 {
17634 fnaddr = copy_addr_to_reg (fnaddr);
17635
17636 mem = adjust_address (m_tramp, HImode, offset);
17637 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
17638
17639 mem = adjust_address (m_tramp, SImode, offset + 2);
17640 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
17641 offset += 6;
17642 }
17643 else
17644 {
17645 mem = adjust_address (m_tramp, HImode, offset);
17646 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
17647
17648 mem = adjust_address (m_tramp, DImode, offset + 2);
17649 emit_move_insn (mem, fnaddr);
17650 offset += 10;
17651 }
17652
17653 /* Load static chain using movabs to r10. Use the shorter movl
17654 instead of movabs when ptr_mode == SImode. */
17655 if (ptr_mode == SImode)
17656 {
17657 opcode = 0xba41;
17658 size = 6;
17659 }
17660 else
17661 {
17662 opcode = 0xba49;
17663 size = 10;
17664 }
17665
17666 mem = adjust_address (m_tramp, HImode, offset);
17667 emit_move_insn (mem, gen_int_mode (opcode, HImode));
17668
17669 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
17670 emit_move_insn (mem, chain_value);
17671 offset += size;
17672
17673 /* Jump to r11; the last (unused) byte is a nop, only there to
17674 pad the write out to a single 32-bit store. */
17675 mem = adjust_address (m_tramp, SImode, offset);
17676 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
17677 offset += 4;
17678 }
17679 else
17680 {
17681 rtx disp, chain;
17682
17683 /* Depending on the static chain location, either load a register
17684 with a constant, or push the constant to the stack. All of the
17685 instructions are the same size. */
17686 chain = ix86_static_chain (fndecl, true);
17687 if (REG_P (chain))
17688 {
17689 switch (REGNO (chain))
17690 {
17691 case AX_REG:
17692 opcode = 0xb8; break;
17693 case CX_REG:
17694 opcode = 0xb9; break;
17695 default:
17696 gcc_unreachable ();
17697 }
17698 }
17699 else
17700 opcode = 0x68;
17701
17702 if (need_endbr)
17703 {
17704 /* Insert ENDBR32. */
17705 mem = adjust_address (m_tramp, SImode, offset);
17706 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
17707 offset += 4;
17708 }
17709
17710 mem = adjust_address (m_tramp, QImode, offset);
17711 emit_move_insn (mem, gen_int_mode (opcode, QImode));
17712
17713 mem = adjust_address (m_tramp, SImode, offset + 1);
17714 emit_move_insn (mem, chain_value);
17715 offset += 5;
17716
17717 mem = adjust_address (m_tramp, QImode, offset);
17718 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
17719
17720 mem = adjust_address (m_tramp, SImode, offset + 1);
17721
17722 /* Compute offset from the end of the jmp to the target function.
17723 In the case in which the trampoline stores the static chain on
17724 the stack, we need to skip the first insn which pushes the
17725 (call-saved) register static chain; this push is 1 byte. */
17726 offset += 5;
17727 int skip = MEM_P (chain) ? 1 : 0;
17728 /* Skip ENDBR32 at the entry of the target function. */
17729 if (need_endbr
17730 && !cgraph_node::get (fndecl)->only_called_directly_p ())
17731 skip += 4;
17732 disp = expand_binop (SImode, sub_optab, fnaddr,
17733 plus_constant (Pmode, XEXP (m_tramp, 0),
17734 offset - skip),
17735 NULL_RTX, 1, OPTAB_DIRECT);
17736 emit_move_insn (mem, disp);
17737 }
17738
17739 gcc_assert (offset <= TRAMPOLINE_SIZE);
17740
17741 #ifdef HAVE_ENABLE_EXECUTE_STACK
17742 #ifdef CHECK_EXECUTE_STACK_ENABLED
17743 if (CHECK_EXECUTE_STACK_ENABLED)
17744 #endif
17745 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17746 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
17747 #endif
17748 }
17749
17750 static bool
17751 ix86_allocate_stack_slots_for_args (void)
17752 {
17753 /* Naked functions should not allocate stack slots for arguments. */
17754 return !ix86_function_naked (current_function_decl);
17755 }
17756
17757 static bool
17758 ix86_warn_func_return (tree decl)
17759 {
17760 /* Naked functions are implemented entirely in assembly, including the
17761 return sequence, so suppress warnings about this. */
17762 return !ix86_function_naked (decl);
17763 }
17764 \f
17765 /* Return the shift count of a vector by scalar shift builtin second argument
17766 ARG1. */
17767 static tree
17768 ix86_vector_shift_count (tree arg1)
17769 {
17770 if (tree_fits_uhwi_p (arg1))
17771 return arg1;
17772 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
17773 {
17774 /* The count argument is weird, passed in as various 128-bit
17775 (or 64-bit) vectors, the low 64 bits from it are the count. */
17776 unsigned char buf[16];
17777 int len = native_encode_expr (arg1, buf, 16);
17778 if (len == 0)
17779 return NULL_TREE;
17780 tree t = native_interpret_expr (uint64_type_node, buf, len);
17781 if (t && tree_fits_uhwi_p (t))
17782 return t;
17783 }
17784 return NULL_TREE;
17785 }
17786
17787 /* Return true if arg_mask is all ones, ELEMS is elements number of
17788 corresponding vector. */
17789 static bool
17790 ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
17791 {
17792 if (TREE_CODE (arg_mask) != INTEGER_CST)
17793 return false;
17794
17795 unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
17796 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
17797 return false;
17798
17799 return true;
17800 }
17801
17802 static tree
17803 ix86_fold_builtin (tree fndecl, int n_args,
17804 tree *args, bool ignore ATTRIBUTE_UNUSED)
17805 {
17806 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
17807 {
17808 enum ix86_builtins fn_code
17809 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
17810 enum rtx_code rcode;
17811 bool is_vshift;
17812 unsigned HOST_WIDE_INT mask;
17813
17814 switch (fn_code)
17815 {
17816 case IX86_BUILTIN_CPU_IS:
17817 case IX86_BUILTIN_CPU_SUPPORTS:
17818 gcc_assert (n_args == 1);
17819 return fold_builtin_cpu (fndecl, args);
17820
17821 case IX86_BUILTIN_NANQ:
17822 case IX86_BUILTIN_NANSQ:
17823 {
17824 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17825 const char *str = c_getstr (*args);
17826 int quiet = fn_code == IX86_BUILTIN_NANQ;
17827 REAL_VALUE_TYPE real;
17828
17829 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
17830 return build_real (type, real);
17831 return NULL_TREE;
17832 }
17833
17834 case IX86_BUILTIN_INFQ:
17835 case IX86_BUILTIN_HUGE_VALQ:
17836 {
17837 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17838 REAL_VALUE_TYPE inf;
17839 real_inf (&inf);
17840 return build_real (type, inf);
17841 }
17842
17843 case IX86_BUILTIN_TZCNT16:
17844 case IX86_BUILTIN_CTZS:
17845 case IX86_BUILTIN_TZCNT32:
17846 case IX86_BUILTIN_TZCNT64:
17847 gcc_assert (n_args == 1);
17848 if (TREE_CODE (args[0]) == INTEGER_CST)
17849 {
17850 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17851 tree arg = args[0];
17852 if (fn_code == IX86_BUILTIN_TZCNT16
17853 || fn_code == IX86_BUILTIN_CTZS)
17854 arg = fold_convert (short_unsigned_type_node, arg);
17855 if (integer_zerop (arg))
17856 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
17857 else
17858 return fold_const_call (CFN_CTZ, type, arg);
17859 }
17860 break;
17861
17862 case IX86_BUILTIN_LZCNT16:
17863 case IX86_BUILTIN_CLZS:
17864 case IX86_BUILTIN_LZCNT32:
17865 case IX86_BUILTIN_LZCNT64:
17866 gcc_assert (n_args == 1);
17867 if (TREE_CODE (args[0]) == INTEGER_CST)
17868 {
17869 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17870 tree arg = args[0];
17871 if (fn_code == IX86_BUILTIN_LZCNT16
17872 || fn_code == IX86_BUILTIN_CLZS)
17873 arg = fold_convert (short_unsigned_type_node, arg);
17874 if (integer_zerop (arg))
17875 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
17876 else
17877 return fold_const_call (CFN_CLZ, type, arg);
17878 }
17879 break;
17880
17881 case IX86_BUILTIN_BEXTR32:
17882 case IX86_BUILTIN_BEXTR64:
17883 case IX86_BUILTIN_BEXTRI32:
17884 case IX86_BUILTIN_BEXTRI64:
17885 gcc_assert (n_args == 2);
17886 if (tree_fits_uhwi_p (args[1]))
17887 {
17888 unsigned HOST_WIDE_INT res = 0;
17889 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
17890 unsigned int start = tree_to_uhwi (args[1]);
17891 unsigned int len = (start & 0xff00) >> 8;
17892 start &= 0xff;
17893 if (start >= prec || len == 0)
17894 res = 0;
17895 else if (!tree_fits_uhwi_p (args[0]))
17896 break;
17897 else
17898 res = tree_to_uhwi (args[0]) >> start;
17899 if (len > prec)
17900 len = prec;
17901 if (len < HOST_BITS_PER_WIDE_INT)
17902 res &= (HOST_WIDE_INT_1U << len) - 1;
17903 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17904 }
17905 break;
17906
17907 case IX86_BUILTIN_BZHI32:
17908 case IX86_BUILTIN_BZHI64:
17909 gcc_assert (n_args == 2);
17910 if (tree_fits_uhwi_p (args[1]))
17911 {
17912 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
17913 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
17914 return args[0];
17915 if (idx == 0)
17916 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
17917 if (!tree_fits_uhwi_p (args[0]))
17918 break;
17919 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
17920 res &= ~(HOST_WIDE_INT_M1U << idx);
17921 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17922 }
17923 break;
17924
17925 case IX86_BUILTIN_PDEP32:
17926 case IX86_BUILTIN_PDEP64:
17927 gcc_assert (n_args == 2);
17928 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
17929 {
17930 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
17931 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
17932 unsigned HOST_WIDE_INT res = 0;
17933 unsigned HOST_WIDE_INT m, k = 1;
17934 for (m = 1; m; m <<= 1)
17935 if ((mask & m) != 0)
17936 {
17937 if ((src & k) != 0)
17938 res |= m;
17939 k <<= 1;
17940 }
17941 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17942 }
17943 break;
17944
17945 case IX86_BUILTIN_PEXT32:
17946 case IX86_BUILTIN_PEXT64:
17947 gcc_assert (n_args == 2);
17948 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
17949 {
17950 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
17951 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
17952 unsigned HOST_WIDE_INT res = 0;
17953 unsigned HOST_WIDE_INT m, k = 1;
17954 for (m = 1; m; m <<= 1)
17955 if ((mask & m) != 0)
17956 {
17957 if ((src & m) != 0)
17958 res |= k;
17959 k <<= 1;
17960 }
17961 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17962 }
17963 break;
17964
17965 case IX86_BUILTIN_MOVMSKPS:
17966 case IX86_BUILTIN_PMOVMSKB:
17967 case IX86_BUILTIN_MOVMSKPD:
17968 case IX86_BUILTIN_PMOVMSKB128:
17969 case IX86_BUILTIN_MOVMSKPD256:
17970 case IX86_BUILTIN_MOVMSKPS256:
17971 case IX86_BUILTIN_PMOVMSKB256:
17972 gcc_assert (n_args == 1);
17973 if (TREE_CODE (args[0]) == VECTOR_CST)
17974 {
17975 HOST_WIDE_INT res = 0;
17976 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
17977 {
17978 tree e = VECTOR_CST_ELT (args[0], i);
17979 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
17980 {
17981 if (wi::neg_p (wi::to_wide (e)))
17982 res |= HOST_WIDE_INT_1 << i;
17983 }
17984 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
17985 {
17986 if (TREE_REAL_CST (e).sign)
17987 res |= HOST_WIDE_INT_1 << i;
17988 }
17989 else
17990 return NULL_TREE;
17991 }
17992 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
17993 }
17994 break;
17995
17996 case IX86_BUILTIN_PSLLD:
17997 case IX86_BUILTIN_PSLLD128:
17998 case IX86_BUILTIN_PSLLD128_MASK:
17999 case IX86_BUILTIN_PSLLD256:
18000 case IX86_BUILTIN_PSLLD256_MASK:
18001 case IX86_BUILTIN_PSLLD512:
18002 case IX86_BUILTIN_PSLLDI:
18003 case IX86_BUILTIN_PSLLDI128:
18004 case IX86_BUILTIN_PSLLDI128_MASK:
18005 case IX86_BUILTIN_PSLLDI256:
18006 case IX86_BUILTIN_PSLLDI256_MASK:
18007 case IX86_BUILTIN_PSLLDI512:
18008 case IX86_BUILTIN_PSLLQ:
18009 case IX86_BUILTIN_PSLLQ128:
18010 case IX86_BUILTIN_PSLLQ128_MASK:
18011 case IX86_BUILTIN_PSLLQ256:
18012 case IX86_BUILTIN_PSLLQ256_MASK:
18013 case IX86_BUILTIN_PSLLQ512:
18014 case IX86_BUILTIN_PSLLQI:
18015 case IX86_BUILTIN_PSLLQI128:
18016 case IX86_BUILTIN_PSLLQI128_MASK:
18017 case IX86_BUILTIN_PSLLQI256:
18018 case IX86_BUILTIN_PSLLQI256_MASK:
18019 case IX86_BUILTIN_PSLLQI512:
18020 case IX86_BUILTIN_PSLLW:
18021 case IX86_BUILTIN_PSLLW128:
18022 case IX86_BUILTIN_PSLLW128_MASK:
18023 case IX86_BUILTIN_PSLLW256:
18024 case IX86_BUILTIN_PSLLW256_MASK:
18025 case IX86_BUILTIN_PSLLW512_MASK:
18026 case IX86_BUILTIN_PSLLWI:
18027 case IX86_BUILTIN_PSLLWI128:
18028 case IX86_BUILTIN_PSLLWI128_MASK:
18029 case IX86_BUILTIN_PSLLWI256:
18030 case IX86_BUILTIN_PSLLWI256_MASK:
18031 case IX86_BUILTIN_PSLLWI512_MASK:
18032 rcode = ASHIFT;
18033 is_vshift = false;
18034 goto do_shift;
18035 case IX86_BUILTIN_PSRAD:
18036 case IX86_BUILTIN_PSRAD128:
18037 case IX86_BUILTIN_PSRAD128_MASK:
18038 case IX86_BUILTIN_PSRAD256:
18039 case IX86_BUILTIN_PSRAD256_MASK:
18040 case IX86_BUILTIN_PSRAD512:
18041 case IX86_BUILTIN_PSRADI:
18042 case IX86_BUILTIN_PSRADI128:
18043 case IX86_BUILTIN_PSRADI128_MASK:
18044 case IX86_BUILTIN_PSRADI256:
18045 case IX86_BUILTIN_PSRADI256_MASK:
18046 case IX86_BUILTIN_PSRADI512:
18047 case IX86_BUILTIN_PSRAQ128_MASK:
18048 case IX86_BUILTIN_PSRAQ256_MASK:
18049 case IX86_BUILTIN_PSRAQ512:
18050 case IX86_BUILTIN_PSRAQI128_MASK:
18051 case IX86_BUILTIN_PSRAQI256_MASK:
18052 case IX86_BUILTIN_PSRAQI512:
18053 case IX86_BUILTIN_PSRAW:
18054 case IX86_BUILTIN_PSRAW128:
18055 case IX86_BUILTIN_PSRAW128_MASK:
18056 case IX86_BUILTIN_PSRAW256:
18057 case IX86_BUILTIN_PSRAW256_MASK:
18058 case IX86_BUILTIN_PSRAW512:
18059 case IX86_BUILTIN_PSRAWI:
18060 case IX86_BUILTIN_PSRAWI128:
18061 case IX86_BUILTIN_PSRAWI128_MASK:
18062 case IX86_BUILTIN_PSRAWI256:
18063 case IX86_BUILTIN_PSRAWI256_MASK:
18064 case IX86_BUILTIN_PSRAWI512:
18065 rcode = ASHIFTRT;
18066 is_vshift = false;
18067 goto do_shift;
18068 case IX86_BUILTIN_PSRLD:
18069 case IX86_BUILTIN_PSRLD128:
18070 case IX86_BUILTIN_PSRLD128_MASK:
18071 case IX86_BUILTIN_PSRLD256:
18072 case IX86_BUILTIN_PSRLD256_MASK:
18073 case IX86_BUILTIN_PSRLD512:
18074 case IX86_BUILTIN_PSRLDI:
18075 case IX86_BUILTIN_PSRLDI128:
18076 case IX86_BUILTIN_PSRLDI128_MASK:
18077 case IX86_BUILTIN_PSRLDI256:
18078 case IX86_BUILTIN_PSRLDI256_MASK:
18079 case IX86_BUILTIN_PSRLDI512:
18080 case IX86_BUILTIN_PSRLQ:
18081 case IX86_BUILTIN_PSRLQ128:
18082 case IX86_BUILTIN_PSRLQ128_MASK:
18083 case IX86_BUILTIN_PSRLQ256:
18084 case IX86_BUILTIN_PSRLQ256_MASK:
18085 case IX86_BUILTIN_PSRLQ512:
18086 case IX86_BUILTIN_PSRLQI:
18087 case IX86_BUILTIN_PSRLQI128:
18088 case IX86_BUILTIN_PSRLQI128_MASK:
18089 case IX86_BUILTIN_PSRLQI256:
18090 case IX86_BUILTIN_PSRLQI256_MASK:
18091 case IX86_BUILTIN_PSRLQI512:
18092 case IX86_BUILTIN_PSRLW:
18093 case IX86_BUILTIN_PSRLW128:
18094 case IX86_BUILTIN_PSRLW128_MASK:
18095 case IX86_BUILTIN_PSRLW256:
18096 case IX86_BUILTIN_PSRLW256_MASK:
18097 case IX86_BUILTIN_PSRLW512:
18098 case IX86_BUILTIN_PSRLWI:
18099 case IX86_BUILTIN_PSRLWI128:
18100 case IX86_BUILTIN_PSRLWI128_MASK:
18101 case IX86_BUILTIN_PSRLWI256:
18102 case IX86_BUILTIN_PSRLWI256_MASK:
18103 case IX86_BUILTIN_PSRLWI512:
18104 rcode = LSHIFTRT;
18105 is_vshift = false;
18106 goto do_shift;
18107 case IX86_BUILTIN_PSLLVV16HI:
18108 case IX86_BUILTIN_PSLLVV16SI:
18109 case IX86_BUILTIN_PSLLVV2DI:
18110 case IX86_BUILTIN_PSLLVV2DI_MASK:
18111 case IX86_BUILTIN_PSLLVV32HI:
18112 case IX86_BUILTIN_PSLLVV4DI:
18113 case IX86_BUILTIN_PSLLVV4DI_MASK:
18114 case IX86_BUILTIN_PSLLVV4SI:
18115 case IX86_BUILTIN_PSLLVV4SI_MASK:
18116 case IX86_BUILTIN_PSLLVV8DI:
18117 case IX86_BUILTIN_PSLLVV8HI:
18118 case IX86_BUILTIN_PSLLVV8SI:
18119 case IX86_BUILTIN_PSLLVV8SI_MASK:
18120 rcode = ASHIFT;
18121 is_vshift = true;
18122 goto do_shift;
18123 case IX86_BUILTIN_PSRAVQ128:
18124 case IX86_BUILTIN_PSRAVQ256:
18125 case IX86_BUILTIN_PSRAVV16HI:
18126 case IX86_BUILTIN_PSRAVV16SI:
18127 case IX86_BUILTIN_PSRAVV32HI:
18128 case IX86_BUILTIN_PSRAVV4SI:
18129 case IX86_BUILTIN_PSRAVV4SI_MASK:
18130 case IX86_BUILTIN_PSRAVV8DI:
18131 case IX86_BUILTIN_PSRAVV8HI:
18132 case IX86_BUILTIN_PSRAVV8SI:
18133 case IX86_BUILTIN_PSRAVV8SI_MASK:
18134 rcode = ASHIFTRT;
18135 is_vshift = true;
18136 goto do_shift;
18137 case IX86_BUILTIN_PSRLVV16HI:
18138 case IX86_BUILTIN_PSRLVV16SI:
18139 case IX86_BUILTIN_PSRLVV2DI:
18140 case IX86_BUILTIN_PSRLVV2DI_MASK:
18141 case IX86_BUILTIN_PSRLVV32HI:
18142 case IX86_BUILTIN_PSRLVV4DI:
18143 case IX86_BUILTIN_PSRLVV4DI_MASK:
18144 case IX86_BUILTIN_PSRLVV4SI:
18145 case IX86_BUILTIN_PSRLVV4SI_MASK:
18146 case IX86_BUILTIN_PSRLVV8DI:
18147 case IX86_BUILTIN_PSRLVV8HI:
18148 case IX86_BUILTIN_PSRLVV8SI:
18149 case IX86_BUILTIN_PSRLVV8SI_MASK:
18150 rcode = LSHIFTRT;
18151 is_vshift = true;
18152 goto do_shift;
18153
18154 do_shift:
18155 gcc_assert (n_args >= 2);
18156 if (TREE_CODE (args[0]) != VECTOR_CST)
18157 break;
18158 mask = HOST_WIDE_INT_M1U;
18159 if (n_args > 2)
18160 {
18161 /* This is masked shift. */
18162 if (!tree_fits_uhwi_p (args[n_args - 1])
18163 || TREE_SIDE_EFFECTS (args[n_args - 2]))
18164 break;
18165 mask = tree_to_uhwi (args[n_args - 1]);
18166 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
18167 mask |= HOST_WIDE_INT_M1U << elems;
18168 if (mask != HOST_WIDE_INT_M1U
18169 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
18170 break;
18171 if (mask == (HOST_WIDE_INT_M1U << elems))
18172 return args[n_args - 2];
18173 }
18174 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
18175 break;
18176 if (tree tem = (is_vshift ? integer_one_node
18177 : ix86_vector_shift_count (args[1])))
18178 {
18179 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
18180 unsigned HOST_WIDE_INT prec
18181 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
18182 if (count == 0 && mask == HOST_WIDE_INT_M1U)
18183 return args[0];
18184 if (count >= prec)
18185 {
18186 if (rcode == ASHIFTRT)
18187 count = prec - 1;
18188 else if (mask == HOST_WIDE_INT_M1U)
18189 return build_zero_cst (TREE_TYPE (args[0]));
18190 }
18191 tree countt = NULL_TREE;
18192 if (!is_vshift)
18193 {
18194 if (count >= prec)
18195 countt = integer_zero_node;
18196 else
18197 countt = build_int_cst (integer_type_node, count);
18198 }
18199 tree_vector_builder builder;
18200 if (mask != HOST_WIDE_INT_M1U || is_vshift)
18201 builder.new_vector (TREE_TYPE (args[0]),
18202 TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
18203 1);
18204 else
18205 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
18206 false);
18207 unsigned int cnt = builder.encoded_nelts ();
18208 for (unsigned int i = 0; i < cnt; ++i)
18209 {
18210 tree elt = VECTOR_CST_ELT (args[0], i);
18211 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
18212 return NULL_TREE;
18213 tree type = TREE_TYPE (elt);
18214 if (rcode == LSHIFTRT)
18215 elt = fold_convert (unsigned_type_for (type), elt);
18216 if (is_vshift)
18217 {
18218 countt = VECTOR_CST_ELT (args[1], i);
18219 if (TREE_CODE (countt) != INTEGER_CST
18220 || TREE_OVERFLOW (countt))
18221 return NULL_TREE;
18222 if (wi::neg_p (wi::to_wide (countt))
18223 || wi::to_widest (countt) >= prec)
18224 {
18225 if (rcode == ASHIFTRT)
18226 countt = build_int_cst (TREE_TYPE (countt),
18227 prec - 1);
18228 else
18229 {
18230 elt = build_zero_cst (TREE_TYPE (elt));
18231 countt = build_zero_cst (TREE_TYPE (countt));
18232 }
18233 }
18234 }
18235 else if (count >= prec)
18236 elt = build_zero_cst (TREE_TYPE (elt));
18237 elt = const_binop (rcode == ASHIFT
18238 ? LSHIFT_EXPR : RSHIFT_EXPR,
18239 TREE_TYPE (elt), elt, countt);
18240 if (!elt || TREE_CODE (elt) != INTEGER_CST)
18241 return NULL_TREE;
18242 if (rcode == LSHIFTRT)
18243 elt = fold_convert (type, elt);
18244 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
18245 {
18246 elt = VECTOR_CST_ELT (args[n_args - 2], i);
18247 if (TREE_CODE (elt) != INTEGER_CST
18248 || TREE_OVERFLOW (elt))
18249 return NULL_TREE;
18250 }
18251 builder.quick_push (elt);
18252 }
18253 return builder.build ();
18254 }
18255 break;
18256
18257 default:
18258 break;
18259 }
18260 }
18261
18262 #ifdef SUBTARGET_FOLD_BUILTIN
18263 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
18264 #endif
18265
18266 return NULL_TREE;
18267 }
18268
18269 /* Fold a MD builtin (use ix86_fold_builtin for folding into
18270 constant) in GIMPLE. */
18271
18272 bool
18273 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
18274 {
18275 gimple *stmt = gsi_stmt (*gsi);
18276 tree fndecl = gimple_call_fndecl (stmt);
18277 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
18278 int n_args = gimple_call_num_args (stmt);
18279 enum ix86_builtins fn_code
18280 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
18281 tree decl = NULL_TREE;
18282 tree arg0, arg1, arg2;
18283 enum rtx_code rcode;
18284 enum tree_code tcode;
18285 unsigned HOST_WIDE_INT count;
18286 bool is_vshift;
18287 unsigned HOST_WIDE_INT elems;
18288
18289 switch (fn_code)
18290 {
18291 case IX86_BUILTIN_TZCNT32:
18292 decl = builtin_decl_implicit (BUILT_IN_CTZ);
18293 goto fold_tzcnt_lzcnt;
18294
18295 case IX86_BUILTIN_TZCNT64:
18296 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
18297 goto fold_tzcnt_lzcnt;
18298
18299 case IX86_BUILTIN_LZCNT32:
18300 decl = builtin_decl_implicit (BUILT_IN_CLZ);
18301 goto fold_tzcnt_lzcnt;
18302
18303 case IX86_BUILTIN_LZCNT64:
18304 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
18305 goto fold_tzcnt_lzcnt;
18306
18307 fold_tzcnt_lzcnt:
18308 gcc_assert (n_args == 1);
18309 arg0 = gimple_call_arg (stmt, 0);
18310 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
18311 {
18312 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
18313 /* If arg0 is provably non-zero, optimize into generic
18314 __builtin_c[tl]z{,ll} function the middle-end handles
18315 better. */
18316 if (!expr_not_equal_to (arg0, wi::zero (prec)))
18317 return false;
18318
18319 location_t loc = gimple_location (stmt);
18320 gimple *g = gimple_build_call (decl, 1, arg0);
18321 gimple_set_location (g, loc);
18322 tree lhs = make_ssa_name (integer_type_node);
18323 gimple_call_set_lhs (g, lhs);
18324 gsi_insert_before (gsi, g, GSI_SAME_STMT);
18325 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
18326 gimple_set_location (g, loc);
18327 gsi_replace (gsi, g, false);
18328 return true;
18329 }
18330 break;
18331
18332 case IX86_BUILTIN_BZHI32:
18333 case IX86_BUILTIN_BZHI64:
18334 gcc_assert (n_args == 2);
18335 arg1 = gimple_call_arg (stmt, 1);
18336 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
18337 {
18338 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
18339 arg0 = gimple_call_arg (stmt, 0);
18340 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
18341 break;
18342 location_t loc = gimple_location (stmt);
18343 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
18344 gimple_set_location (g, loc);
18345 gsi_replace (gsi, g, false);
18346 return true;
18347 }
18348 break;
18349
18350 case IX86_BUILTIN_PDEP32:
18351 case IX86_BUILTIN_PDEP64:
18352 case IX86_BUILTIN_PEXT32:
18353 case IX86_BUILTIN_PEXT64:
18354 gcc_assert (n_args == 2);
18355 arg1 = gimple_call_arg (stmt, 1);
18356 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
18357 {
18358 location_t loc = gimple_location (stmt);
18359 arg0 = gimple_call_arg (stmt, 0);
18360 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
18361 gimple_set_location (g, loc);
18362 gsi_replace (gsi, g, false);
18363 return true;
18364 }
18365 break;
18366
18367 case IX86_BUILTIN_PBLENDVB128:
18368 case IX86_BUILTIN_PBLENDVB256:
18369 case IX86_BUILTIN_BLENDVPS:
18370 case IX86_BUILTIN_BLENDVPD:
18371 case IX86_BUILTIN_BLENDVPS256:
18372 case IX86_BUILTIN_BLENDVPD256:
18373 gcc_assert (n_args == 3);
18374 arg0 = gimple_call_arg (stmt, 0);
18375 arg1 = gimple_call_arg (stmt, 1);
18376 arg2 = gimple_call_arg (stmt, 2);
18377 if (gimple_call_lhs (stmt))
18378 {
18379 location_t loc = gimple_location (stmt);
18380 tree type = TREE_TYPE (arg2);
18381 gimple_seq stmts = NULL;
18382 if (VECTOR_FLOAT_TYPE_P (type))
18383 {
18384 tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
18385 ? intSI_type_node : intDI_type_node;
18386 type = get_same_sized_vectype (itype, type);
18387 arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
18388 }
18389 tree zero_vec = build_zero_cst (type);
18390 tree cmp_type = truth_type_for (type);
18391 tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
18392 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
18393 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
18394 VEC_COND_EXPR, cmp,
18395 arg1, arg0);
18396 gimple_set_location (g, loc);
18397 gsi_replace (gsi, g, false);
18398 }
18399 else
18400 gsi_replace (gsi, gimple_build_nop (), false);
18401 return true;
18402
18403
18404 case IX86_BUILTIN_PCMPEQB128:
18405 case IX86_BUILTIN_PCMPEQW128:
18406 case IX86_BUILTIN_PCMPEQD128:
18407 case IX86_BUILTIN_PCMPEQQ:
18408 case IX86_BUILTIN_PCMPEQB256:
18409 case IX86_BUILTIN_PCMPEQW256:
18410 case IX86_BUILTIN_PCMPEQD256:
18411 case IX86_BUILTIN_PCMPEQQ256:
18412 tcode = EQ_EXPR;
18413 goto do_cmp;
18414
18415 case IX86_BUILTIN_PCMPGTB128:
18416 case IX86_BUILTIN_PCMPGTW128:
18417 case IX86_BUILTIN_PCMPGTD128:
18418 case IX86_BUILTIN_PCMPGTQ:
18419 case IX86_BUILTIN_PCMPGTB256:
18420 case IX86_BUILTIN_PCMPGTW256:
18421 case IX86_BUILTIN_PCMPGTD256:
18422 case IX86_BUILTIN_PCMPGTQ256:
18423 tcode = GT_EXPR;
18424
18425 do_cmp:
18426 gcc_assert (n_args == 2);
18427 arg0 = gimple_call_arg (stmt, 0);
18428 arg1 = gimple_call_arg (stmt, 1);
18429 if (gimple_call_lhs (stmt))
18430 {
18431 location_t loc = gimple_location (stmt);
18432 tree type = TREE_TYPE (arg0);
18433 tree zero_vec = build_zero_cst (type);
18434 tree minus_one_vec = build_minus_one_cst (type);
18435 tree cmp_type = truth_type_for (type);
18436 gimple_seq stmts = NULL;
18437 tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
18438 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
18439 gimple* g = gimple_build_assign (gimple_call_lhs (stmt),
18440 VEC_COND_EXPR, cmp,
18441 minus_one_vec, zero_vec);
18442 gimple_set_location (g, loc);
18443 gsi_replace (gsi, g, false);
18444 }
18445 else
18446 gsi_replace (gsi, gimple_build_nop (), false);
18447 return true;
18448
18449 case IX86_BUILTIN_PSLLD:
18450 case IX86_BUILTIN_PSLLD128:
18451 case IX86_BUILTIN_PSLLD128_MASK:
18452 case IX86_BUILTIN_PSLLD256:
18453 case IX86_BUILTIN_PSLLD256_MASK:
18454 case IX86_BUILTIN_PSLLD512:
18455 case IX86_BUILTIN_PSLLDI:
18456 case IX86_BUILTIN_PSLLDI128:
18457 case IX86_BUILTIN_PSLLDI128_MASK:
18458 case IX86_BUILTIN_PSLLDI256:
18459 case IX86_BUILTIN_PSLLDI256_MASK:
18460 case IX86_BUILTIN_PSLLDI512:
18461 case IX86_BUILTIN_PSLLQ:
18462 case IX86_BUILTIN_PSLLQ128:
18463 case IX86_BUILTIN_PSLLQ128_MASK:
18464 case IX86_BUILTIN_PSLLQ256:
18465 case IX86_BUILTIN_PSLLQ256_MASK:
18466 case IX86_BUILTIN_PSLLQ512:
18467 case IX86_BUILTIN_PSLLQI:
18468 case IX86_BUILTIN_PSLLQI128:
18469 case IX86_BUILTIN_PSLLQI128_MASK:
18470 case IX86_BUILTIN_PSLLQI256:
18471 case IX86_BUILTIN_PSLLQI256_MASK:
18472 case IX86_BUILTIN_PSLLQI512:
18473 case IX86_BUILTIN_PSLLW:
18474 case IX86_BUILTIN_PSLLW128:
18475 case IX86_BUILTIN_PSLLW128_MASK:
18476 case IX86_BUILTIN_PSLLW256:
18477 case IX86_BUILTIN_PSLLW256_MASK:
18478 case IX86_BUILTIN_PSLLW512_MASK:
18479 case IX86_BUILTIN_PSLLWI:
18480 case IX86_BUILTIN_PSLLWI128:
18481 case IX86_BUILTIN_PSLLWI128_MASK:
18482 case IX86_BUILTIN_PSLLWI256:
18483 case IX86_BUILTIN_PSLLWI256_MASK:
18484 case IX86_BUILTIN_PSLLWI512_MASK:
18485 rcode = ASHIFT;
18486 is_vshift = false;
18487 goto do_shift;
18488 case IX86_BUILTIN_PSRAD:
18489 case IX86_BUILTIN_PSRAD128:
18490 case IX86_BUILTIN_PSRAD128_MASK:
18491 case IX86_BUILTIN_PSRAD256:
18492 case IX86_BUILTIN_PSRAD256_MASK:
18493 case IX86_BUILTIN_PSRAD512:
18494 case IX86_BUILTIN_PSRADI:
18495 case IX86_BUILTIN_PSRADI128:
18496 case IX86_BUILTIN_PSRADI128_MASK:
18497 case IX86_BUILTIN_PSRADI256:
18498 case IX86_BUILTIN_PSRADI256_MASK:
18499 case IX86_BUILTIN_PSRADI512:
18500 case IX86_BUILTIN_PSRAQ128_MASK:
18501 case IX86_BUILTIN_PSRAQ256_MASK:
18502 case IX86_BUILTIN_PSRAQ512:
18503 case IX86_BUILTIN_PSRAQI128_MASK:
18504 case IX86_BUILTIN_PSRAQI256_MASK:
18505 case IX86_BUILTIN_PSRAQI512:
18506 case IX86_BUILTIN_PSRAW:
18507 case IX86_BUILTIN_PSRAW128:
18508 case IX86_BUILTIN_PSRAW128_MASK:
18509 case IX86_BUILTIN_PSRAW256:
18510 case IX86_BUILTIN_PSRAW256_MASK:
18511 case IX86_BUILTIN_PSRAW512:
18512 case IX86_BUILTIN_PSRAWI:
18513 case IX86_BUILTIN_PSRAWI128:
18514 case IX86_BUILTIN_PSRAWI128_MASK:
18515 case IX86_BUILTIN_PSRAWI256:
18516 case IX86_BUILTIN_PSRAWI256_MASK:
18517 case IX86_BUILTIN_PSRAWI512:
18518 rcode = ASHIFTRT;
18519 is_vshift = false;
18520 goto do_shift;
18521 case IX86_BUILTIN_PSRLD:
18522 case IX86_BUILTIN_PSRLD128:
18523 case IX86_BUILTIN_PSRLD128_MASK:
18524 case IX86_BUILTIN_PSRLD256:
18525 case IX86_BUILTIN_PSRLD256_MASK:
18526 case IX86_BUILTIN_PSRLD512:
18527 case IX86_BUILTIN_PSRLDI:
18528 case IX86_BUILTIN_PSRLDI128:
18529 case IX86_BUILTIN_PSRLDI128_MASK:
18530 case IX86_BUILTIN_PSRLDI256:
18531 case IX86_BUILTIN_PSRLDI256_MASK:
18532 case IX86_BUILTIN_PSRLDI512:
18533 case IX86_BUILTIN_PSRLQ:
18534 case IX86_BUILTIN_PSRLQ128:
18535 case IX86_BUILTIN_PSRLQ128_MASK:
18536 case IX86_BUILTIN_PSRLQ256:
18537 case IX86_BUILTIN_PSRLQ256_MASK:
18538 case IX86_BUILTIN_PSRLQ512:
18539 case IX86_BUILTIN_PSRLQI:
18540 case IX86_BUILTIN_PSRLQI128:
18541 case IX86_BUILTIN_PSRLQI128_MASK:
18542 case IX86_BUILTIN_PSRLQI256:
18543 case IX86_BUILTIN_PSRLQI256_MASK:
18544 case IX86_BUILTIN_PSRLQI512:
18545 case IX86_BUILTIN_PSRLW:
18546 case IX86_BUILTIN_PSRLW128:
18547 case IX86_BUILTIN_PSRLW128_MASK:
18548 case IX86_BUILTIN_PSRLW256:
18549 case IX86_BUILTIN_PSRLW256_MASK:
18550 case IX86_BUILTIN_PSRLW512:
18551 case IX86_BUILTIN_PSRLWI:
18552 case IX86_BUILTIN_PSRLWI128:
18553 case IX86_BUILTIN_PSRLWI128_MASK:
18554 case IX86_BUILTIN_PSRLWI256:
18555 case IX86_BUILTIN_PSRLWI256_MASK:
18556 case IX86_BUILTIN_PSRLWI512:
18557 rcode = LSHIFTRT;
18558 is_vshift = false;
18559 goto do_shift;
18560 case IX86_BUILTIN_PSLLVV16HI:
18561 case IX86_BUILTIN_PSLLVV16SI:
18562 case IX86_BUILTIN_PSLLVV2DI:
18563 case IX86_BUILTIN_PSLLVV2DI_MASK:
18564 case IX86_BUILTIN_PSLLVV32HI:
18565 case IX86_BUILTIN_PSLLVV4DI:
18566 case IX86_BUILTIN_PSLLVV4DI_MASK:
18567 case IX86_BUILTIN_PSLLVV4SI:
18568 case IX86_BUILTIN_PSLLVV4SI_MASK:
18569 case IX86_BUILTIN_PSLLVV8DI:
18570 case IX86_BUILTIN_PSLLVV8HI:
18571 case IX86_BUILTIN_PSLLVV8SI:
18572 case IX86_BUILTIN_PSLLVV8SI_MASK:
18573 rcode = ASHIFT;
18574 is_vshift = true;
18575 goto do_shift;
18576 case IX86_BUILTIN_PSRAVQ128:
18577 case IX86_BUILTIN_PSRAVQ256:
18578 case IX86_BUILTIN_PSRAVV16HI:
18579 case IX86_BUILTIN_PSRAVV16SI:
18580 case IX86_BUILTIN_PSRAVV32HI:
18581 case IX86_BUILTIN_PSRAVV4SI:
18582 case IX86_BUILTIN_PSRAVV4SI_MASK:
18583 case IX86_BUILTIN_PSRAVV8DI:
18584 case IX86_BUILTIN_PSRAVV8HI:
18585 case IX86_BUILTIN_PSRAVV8SI:
18586 case IX86_BUILTIN_PSRAVV8SI_MASK:
18587 rcode = ASHIFTRT;
18588 is_vshift = true;
18589 goto do_shift;
18590 case IX86_BUILTIN_PSRLVV16HI:
18591 case IX86_BUILTIN_PSRLVV16SI:
18592 case IX86_BUILTIN_PSRLVV2DI:
18593 case IX86_BUILTIN_PSRLVV2DI_MASK:
18594 case IX86_BUILTIN_PSRLVV32HI:
18595 case IX86_BUILTIN_PSRLVV4DI:
18596 case IX86_BUILTIN_PSRLVV4DI_MASK:
18597 case IX86_BUILTIN_PSRLVV4SI:
18598 case IX86_BUILTIN_PSRLVV4SI_MASK:
18599 case IX86_BUILTIN_PSRLVV8DI:
18600 case IX86_BUILTIN_PSRLVV8HI:
18601 case IX86_BUILTIN_PSRLVV8SI:
18602 case IX86_BUILTIN_PSRLVV8SI_MASK:
18603 rcode = LSHIFTRT;
18604 is_vshift = true;
18605 goto do_shift;
18606
18607 do_shift:
18608 gcc_assert (n_args >= 2);
18609 if (!gimple_call_lhs (stmt))
18610 break;
18611 arg0 = gimple_call_arg (stmt, 0);
18612 arg1 = gimple_call_arg (stmt, 1);
18613 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
18614 /* For masked shift, only optimize if the mask is all ones. */
18615 if (n_args > 2
18616 && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
18617 break;
18618 if (is_vshift)
18619 {
18620 if (TREE_CODE (arg1) != VECTOR_CST)
18621 break;
18622 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
18623 if (integer_zerop (arg1))
18624 count = 0;
18625 else if (rcode == ASHIFTRT)
18626 break;
18627 else
18628 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
18629 {
18630 tree elt = VECTOR_CST_ELT (arg1, i);
18631 if (!wi::neg_p (wi::to_wide (elt))
18632 && wi::to_widest (elt) < count)
18633 return false;
18634 }
18635 }
18636 else
18637 {
18638 arg1 = ix86_vector_shift_count (arg1);
18639 if (!arg1)
18640 break;
18641 count = tree_to_uhwi (arg1);
18642 }
18643 if (count == 0)
18644 {
18645 /* Just return the first argument for shift by 0. */
18646 location_t loc = gimple_location (stmt);
18647 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
18648 gimple_set_location (g, loc);
18649 gsi_replace (gsi, g, false);
18650 return true;
18651 }
18652 if (rcode != ASHIFTRT
18653 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
18654 {
18655 /* For shift counts equal or greater than precision, except for
18656 arithmetic right shift the result is zero. */
18657 location_t loc = gimple_location (stmt);
18658 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
18659 build_zero_cst (TREE_TYPE (arg0)));
18660 gimple_set_location (g, loc);
18661 gsi_replace (gsi, g, false);
18662 return true;
18663 }
18664 break;
18665
18666 case IX86_BUILTIN_SHUFPD512:
18667 case IX86_BUILTIN_SHUFPS512:
18668 case IX86_BUILTIN_SHUFPD:
18669 case IX86_BUILTIN_SHUFPD256:
18670 case IX86_BUILTIN_SHUFPS:
18671 case IX86_BUILTIN_SHUFPS256:
18672 arg0 = gimple_call_arg (stmt, 0);
18673 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
18674 /* This is masked shuffle. Only optimize if the mask is all ones. */
18675 if (n_args > 3
18676 && !ix86_masked_all_ones (elems,
18677 gimple_call_arg (stmt, n_args - 1)))
18678 break;
18679 arg2 = gimple_call_arg (stmt, 2);
18680 if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
18681 {
18682 unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
18683 /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
18684 if (shuffle_mask > 255)
18685 return false;
18686
18687 machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
18688 location_t loc = gimple_location (stmt);
18689 tree itype = (imode == E_DFmode
18690 ? long_long_integer_type_node : integer_type_node);
18691 tree vtype = build_vector_type (itype, elems);
18692 tree_vector_builder elts (vtype, elems, 1);
18693
18694
18695 /* Transform integer shuffle_mask to vector perm_mask which
18696 is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */
18697 for (unsigned i = 0; i != elems; i++)
18698 {
18699 unsigned sel_idx;
18700 /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
18701 provide 2 select constrols for each element of the
18702 destination. */
18703 if (imode == E_DFmode)
18704 sel_idx = (i & 1) * elems + (i & ~1)
18705 + ((shuffle_mask >> i) & 1);
18706 else
18707 {
18708 /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
18709 controls for each element of the destination. */
18710 unsigned j = i % 4;
18711 sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
18712 + ((shuffle_mask >> 2 * j) & 3);
18713 }
18714 elts.quick_push (build_int_cst (itype, sel_idx));
18715 }
18716
18717 tree perm_mask = elts.build ();
18718 arg1 = gimple_call_arg (stmt, 1);
18719 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
18720 VEC_PERM_EXPR,
18721 arg0, arg1, perm_mask);
18722 gimple_set_location (g, loc);
18723 gsi_replace (gsi, g, false);
18724 return true;
18725 }
18726 // Do not error yet, the constant could be propagated later?
18727 break;
18728
18729 default:
18730 break;
18731 }
18732
18733 return false;
18734 }
18735
18736 /* Handler for an SVML-style interface to
18737 a library with vectorized intrinsics. */
18738
18739 tree
18740 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
18741 {
18742 char name[20];
18743 tree fntype, new_fndecl, args;
18744 unsigned arity;
18745 const char *bname;
18746 machine_mode el_mode, in_mode;
18747 int n, in_n;
18748
18749 /* The SVML is suitable for unsafe math only. */
18750 if (!flag_unsafe_math_optimizations)
18751 return NULL_TREE;
18752
18753 el_mode = TYPE_MODE (TREE_TYPE (type_out));
18754 n = TYPE_VECTOR_SUBPARTS (type_out);
18755 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18756 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18757 if (el_mode != in_mode
18758 || n != in_n)
18759 return NULL_TREE;
18760
18761 switch (fn)
18762 {
18763 CASE_CFN_EXP:
18764 CASE_CFN_LOG:
18765 CASE_CFN_LOG10:
18766 CASE_CFN_POW:
18767 CASE_CFN_TANH:
18768 CASE_CFN_TAN:
18769 CASE_CFN_ATAN:
18770 CASE_CFN_ATAN2:
18771 CASE_CFN_ATANH:
18772 CASE_CFN_CBRT:
18773 CASE_CFN_SINH:
18774 CASE_CFN_SIN:
18775 CASE_CFN_ASINH:
18776 CASE_CFN_ASIN:
18777 CASE_CFN_COSH:
18778 CASE_CFN_COS:
18779 CASE_CFN_ACOSH:
18780 CASE_CFN_ACOS:
18781 if ((el_mode != DFmode || n != 2)
18782 && (el_mode != SFmode || n != 4))
18783 return NULL_TREE;
18784 break;
18785
18786 default:
18787 return NULL_TREE;
18788 }
18789
18790 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
18791 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
18792
18793 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
18794 strcpy (name, "vmlsLn4");
18795 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
18796 strcpy (name, "vmldLn2");
18797 else if (n == 4)
18798 {
18799 sprintf (name, "vmls%s", bname+10);
18800 name[strlen (name)-1] = '4';
18801 }
18802 else
18803 sprintf (name, "vmld%s2", bname+10);
18804
18805 /* Convert to uppercase. */
18806 name[4] &= ~0x20;
18807
18808 arity = 0;
18809 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
18810 arity++;
18811
18812 if (arity == 1)
18813 fntype = build_function_type_list (type_out, type_in, NULL);
18814 else
18815 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
18816
18817 /* Build a function declaration for the vectorized function. */
18818 new_fndecl = build_decl (BUILTINS_LOCATION,
18819 FUNCTION_DECL, get_identifier (name), fntype);
18820 TREE_PUBLIC (new_fndecl) = 1;
18821 DECL_EXTERNAL (new_fndecl) = 1;
18822 DECL_IS_NOVOPS (new_fndecl) = 1;
18823 TREE_READONLY (new_fndecl) = 1;
18824
18825 return new_fndecl;
18826 }
18827
18828 /* Handler for an ACML-style interface to
18829 a library with vectorized intrinsics. */
18830
18831 tree
18832 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
18833 {
18834 char name[20] = "__vr.._";
18835 tree fntype, new_fndecl, args;
18836 unsigned arity;
18837 const char *bname;
18838 machine_mode el_mode, in_mode;
18839 int n, in_n;
18840
18841 /* The ACML is 64bits only and suitable for unsafe math only as
18842 it does not correctly support parts of IEEE with the required
18843 precision such as denormals. */
18844 if (!TARGET_64BIT
18845 || !flag_unsafe_math_optimizations)
18846 return NULL_TREE;
18847
18848 el_mode = TYPE_MODE (TREE_TYPE (type_out));
18849 n = TYPE_VECTOR_SUBPARTS (type_out);
18850 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18851 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18852 if (el_mode != in_mode
18853 || n != in_n)
18854 return NULL_TREE;
18855
18856 switch (fn)
18857 {
18858 CASE_CFN_SIN:
18859 CASE_CFN_COS:
18860 CASE_CFN_EXP:
18861 CASE_CFN_LOG:
18862 CASE_CFN_LOG2:
18863 CASE_CFN_LOG10:
18864 if (el_mode == DFmode && n == 2)
18865 {
18866 name[4] = 'd';
18867 name[5] = '2';
18868 }
18869 else if (el_mode == SFmode && n == 4)
18870 {
18871 name[4] = 's';
18872 name[5] = '4';
18873 }
18874 else
18875 return NULL_TREE;
18876 break;
18877
18878 default:
18879 return NULL_TREE;
18880 }
18881
18882 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
18883 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
18884 sprintf (name + 7, "%s", bname+10);
18885
18886 arity = 0;
18887 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
18888 arity++;
18889
18890 if (arity == 1)
18891 fntype = build_function_type_list (type_out, type_in, NULL);
18892 else
18893 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
18894
18895 /* Build a function declaration for the vectorized function. */
18896 new_fndecl = build_decl (BUILTINS_LOCATION,
18897 FUNCTION_DECL, get_identifier (name), fntype);
18898 TREE_PUBLIC (new_fndecl) = 1;
18899 DECL_EXTERNAL (new_fndecl) = 1;
18900 DECL_IS_NOVOPS (new_fndecl) = 1;
18901 TREE_READONLY (new_fndecl) = 1;
18902
18903 return new_fndecl;
18904 }
18905
18906 /* Returns a decl of a function that implements scatter store with
18907 register type VECTYPE and index type INDEX_TYPE and SCALE.
18908 Return NULL_TREE if it is not available. */
18909
18910 static tree
18911 ix86_vectorize_builtin_scatter (const_tree vectype,
18912 const_tree index_type, int scale)
18913 {
18914 bool si;
18915 enum ix86_builtins code;
18916
18917 if (!TARGET_AVX512F)
18918 return NULL_TREE;
18919
18920 if ((TREE_CODE (index_type) != INTEGER_TYPE
18921 && !POINTER_TYPE_P (index_type))
18922 || (TYPE_MODE (index_type) != SImode
18923 && TYPE_MODE (index_type) != DImode))
18924 return NULL_TREE;
18925
18926 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
18927 return NULL_TREE;
18928
18929 /* v*scatter* insn sign extends index to pointer mode. */
18930 if (TYPE_PRECISION (index_type) < POINTER_SIZE
18931 && TYPE_UNSIGNED (index_type))
18932 return NULL_TREE;
18933
18934 /* Scale can be 1, 2, 4 or 8. */
18935 if (scale <= 0
18936 || scale > 8
18937 || (scale & (scale - 1)) != 0)
18938 return NULL_TREE;
18939
18940 si = TYPE_MODE (index_type) == SImode;
18941 switch (TYPE_MODE (vectype))
18942 {
18943 case E_V8DFmode:
18944 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
18945 break;
18946 case E_V8DImode:
18947 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
18948 break;
18949 case E_V16SFmode:
18950 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
18951 break;
18952 case E_V16SImode:
18953 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
18954 break;
18955 case E_V4DFmode:
18956 if (TARGET_AVX512VL)
18957 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
18958 else
18959 return NULL_TREE;
18960 break;
18961 case E_V4DImode:
18962 if (TARGET_AVX512VL)
18963 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
18964 else
18965 return NULL_TREE;
18966 break;
18967 case E_V8SFmode:
18968 if (TARGET_AVX512VL)
18969 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
18970 else
18971 return NULL_TREE;
18972 break;
18973 case E_V8SImode:
18974 if (TARGET_AVX512VL)
18975 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
18976 else
18977 return NULL_TREE;
18978 break;
18979 case E_V2DFmode:
18980 if (TARGET_AVX512VL)
18981 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
18982 else
18983 return NULL_TREE;
18984 break;
18985 case E_V2DImode:
18986 if (TARGET_AVX512VL)
18987 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
18988 else
18989 return NULL_TREE;
18990 break;
18991 case E_V4SFmode:
18992 if (TARGET_AVX512VL)
18993 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
18994 else
18995 return NULL_TREE;
18996 break;
18997 case E_V4SImode:
18998 if (TARGET_AVX512VL)
18999 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
19000 else
19001 return NULL_TREE;
19002 break;
19003 default:
19004 return NULL_TREE;
19005 }
19006
19007 return get_ix86_builtin (code);
19008 }
19009
19010 /* Return true if it is safe to use the rsqrt optabs to optimize
19011 1.0/sqrt. */
19012
19013 static bool
19014 use_rsqrt_p (machine_mode mode)
19015 {
19016 return ((mode == HFmode
19017 || (TARGET_SSE && TARGET_SSE_MATH))
19018 && flag_finite_math_only
19019 && !flag_trapping_math
19020 && flag_unsafe_math_optimizations);
19021 }
19022 \f
19023 /* Helper for avx_vpermilps256_operand et al. This is also used by
19024 the expansion functions to turn the parallel back into a mask.
19025 The return value is 0 for no match and the imm8+1 for a match. */
19026
19027 int
19028 avx_vpermilp_parallel (rtx par, machine_mode mode)
19029 {
19030 unsigned i, nelt = GET_MODE_NUNITS (mode);
19031 unsigned mask = 0;
19032 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
19033
19034 if (XVECLEN (par, 0) != (int) nelt)
19035 return 0;
19036
19037 /* Validate that all of the elements are constants, and not totally
19038 out of range. Copy the data into an integral array to make the
19039 subsequent checks easier. */
19040 for (i = 0; i < nelt; ++i)
19041 {
19042 rtx er = XVECEXP (par, 0, i);
19043 unsigned HOST_WIDE_INT ei;
19044
19045 if (!CONST_INT_P (er))
19046 return 0;
19047 ei = INTVAL (er);
19048 if (ei >= nelt)
19049 return 0;
19050 ipar[i] = ei;
19051 }
19052
19053 switch (mode)
19054 {
19055 case E_V8DFmode:
19056 /* In the 512-bit DFmode case, we can only move elements within
19057 a 128-bit lane. First fill the second part of the mask,
19058 then fallthru. */
19059 for (i = 4; i < 6; ++i)
19060 {
19061 if (ipar[i] < 4 || ipar[i] >= 6)
19062 return 0;
19063 mask |= (ipar[i] - 4) << i;
19064 }
19065 for (i = 6; i < 8; ++i)
19066 {
19067 if (ipar[i] < 6)
19068 return 0;
19069 mask |= (ipar[i] - 6) << i;
19070 }
19071 /* FALLTHRU */
19072
19073 case E_V4DFmode:
19074 /* In the 256-bit DFmode case, we can only move elements within
19075 a 128-bit lane. */
19076 for (i = 0; i < 2; ++i)
19077 {
19078 if (ipar[i] >= 2)
19079 return 0;
19080 mask |= ipar[i] << i;
19081 }
19082 for (i = 2; i < 4; ++i)
19083 {
19084 if (ipar[i] < 2)
19085 return 0;
19086 mask |= (ipar[i] - 2) << i;
19087 }
19088 break;
19089
19090 case E_V16SFmode:
19091 /* In 512 bit SFmode case, permutation in the upper 256 bits
19092 must mirror the permutation in the lower 256-bits. */
19093 for (i = 0; i < 8; ++i)
19094 if (ipar[i] + 8 != ipar[i + 8])
19095 return 0;
19096 /* FALLTHRU */
19097
19098 case E_V8SFmode:
19099 /* In 256 bit SFmode case, we have full freedom of
19100 movement within the low 128-bit lane, but the high 128-bit
19101 lane must mirror the exact same pattern. */
19102 for (i = 0; i < 4; ++i)
19103 if (ipar[i] + 4 != ipar[i + 4])
19104 return 0;
19105 nelt = 4;
19106 /* FALLTHRU */
19107
19108 case E_V2DFmode:
19109 case E_V4SFmode:
19110 /* In the 128-bit case, we've full freedom in the placement of
19111 the elements from the source operand. */
19112 for (i = 0; i < nelt; ++i)
19113 mask |= ipar[i] << (i * (nelt / 2));
19114 break;
19115
19116 default:
19117 gcc_unreachable ();
19118 }
19119
19120 /* Make sure success has a non-zero value by adding one. */
19121 return mask + 1;
19122 }
19123
19124 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
19125 the expansion functions to turn the parallel back into a mask.
19126 The return value is 0 for no match and the imm8+1 for a match. */
19127
19128 int
19129 avx_vperm2f128_parallel (rtx par, machine_mode mode)
19130 {
19131 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
19132 unsigned mask = 0;
19133 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
19134
19135 if (XVECLEN (par, 0) != (int) nelt)
19136 return 0;
19137
19138 /* Validate that all of the elements are constants, and not totally
19139 out of range. Copy the data into an integral array to make the
19140 subsequent checks easier. */
19141 for (i = 0; i < nelt; ++i)
19142 {
19143 rtx er = XVECEXP (par, 0, i);
19144 unsigned HOST_WIDE_INT ei;
19145
19146 if (!CONST_INT_P (er))
19147 return 0;
19148 ei = INTVAL (er);
19149 if (ei >= 2 * nelt)
19150 return 0;
19151 ipar[i] = ei;
19152 }
19153
19154 /* Validate that the halves of the permute are halves. */
19155 for (i = 0; i < nelt2 - 1; ++i)
19156 if (ipar[i] + 1 != ipar[i + 1])
19157 return 0;
19158 for (i = nelt2; i < nelt - 1; ++i)
19159 if (ipar[i] + 1 != ipar[i + 1])
19160 return 0;
19161
19162 /* Reconstruct the mask. */
19163 for (i = 0; i < 2; ++i)
19164 {
19165 unsigned e = ipar[i * nelt2];
19166 if (e % nelt2)
19167 return 0;
19168 e /= nelt2;
19169 mask |= e << (i * 4);
19170 }
19171
19172 /* Make sure success has a non-zero value by adding one. */
19173 return mask + 1;
19174 }
19175 \f
19176 /* Return a register priority for hard reg REGNO. */
19177 static int
19178 ix86_register_priority (int hard_regno)
19179 {
19180 /* ebp and r13 as the base always wants a displacement, r12 as the
19181 base always wants an index. So discourage their usage in an
19182 address. */
19183 if (hard_regno == R12_REG || hard_regno == R13_REG)
19184 return 0;
19185 if (hard_regno == BP_REG)
19186 return 1;
19187 /* New x86-64 int registers result in bigger code size. Discourage them. */
19188 if (REX_INT_REGNO_P (hard_regno))
19189 return 2;
19190 /* New x86-64 SSE registers result in bigger code size. Discourage them. */
19191 if (REX_SSE_REGNO_P (hard_regno))
19192 return 2;
19193 if (EXT_REX_SSE_REGNO_P (hard_regno))
19194 return 1;
19195 /* Usage of AX register results in smaller code. Prefer it. */
19196 if (hard_regno == AX_REG)
19197 return 4;
19198 return 3;
19199 }
19200
19201 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
19202
19203 Put float CONST_DOUBLE in the constant pool instead of fp regs.
19204 QImode must go into class Q_REGS.
19205 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
19206 movdf to do mem-to-mem moves through integer regs. */
19207
19208 static reg_class_t
19209 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
19210 {
19211 machine_mode mode = GET_MODE (x);
19212
19213 /* We're only allowed to return a subclass of CLASS. Many of the
19214 following checks fail for NO_REGS, so eliminate that early. */
19215 if (regclass == NO_REGS)
19216 return NO_REGS;
19217
19218 /* All classes can load zeros. */
19219 if (x == CONST0_RTX (mode))
19220 return regclass;
19221
19222 /* Force constants into memory if we are loading a (nonzero) constant into
19223 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
19224 instructions to load from a constant. */
19225 if (CONSTANT_P (x)
19226 && (MAYBE_MMX_CLASS_P (regclass)
19227 || MAYBE_SSE_CLASS_P (regclass)
19228 || MAYBE_MASK_CLASS_P (regclass)))
19229 return NO_REGS;
19230
19231 /* Floating-point constants need more complex checks. */
19232 if (CONST_DOUBLE_P (x))
19233 {
19234 /* General regs can load everything. */
19235 if (INTEGER_CLASS_P (regclass))
19236 return regclass;
19237
19238 /* Floats can load 0 and 1 plus some others. Note that we eliminated
19239 zero above. We only want to wind up preferring 80387 registers if
19240 we plan on doing computation with them. */
19241 if (IS_STACK_MODE (mode)
19242 && standard_80387_constant_p (x) > 0)
19243 {
19244 /* Limit class to FP regs. */
19245 if (FLOAT_CLASS_P (regclass))
19246 return FLOAT_REGS;
19247 }
19248
19249 return NO_REGS;
19250 }
19251
19252 /* Prefer SSE if we can use them for math. Also allow integer regs
19253 when moves between register units are cheap. */
19254 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19255 {
19256 if (TARGET_INTER_UNIT_MOVES_FROM_VEC
19257 && TARGET_INTER_UNIT_MOVES_TO_VEC
19258 && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
19259 return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
19260 else
19261 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
19262 }
19263
19264 /* Generally when we see PLUS here, it's the function invariant
19265 (plus soft-fp const_int). Which can only be computed into general
19266 regs. */
19267 if (GET_CODE (x) == PLUS)
19268 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
19269
19270 /* QImode constants are easy to load, but non-constant QImode data
19271 must go into Q_REGS or ALL_MASK_REGS. */
19272 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
19273 {
19274 if (Q_CLASS_P (regclass))
19275 return regclass;
19276 else if (reg_class_subset_p (Q_REGS, regclass))
19277 return Q_REGS;
19278 else if (MASK_CLASS_P (regclass))
19279 return regclass;
19280 else
19281 return NO_REGS;
19282 }
19283
19284 return regclass;
19285 }
19286
19287 /* Discourage putting floating-point values in SSE registers unless
19288 SSE math is being used, and likewise for the 387 registers. */
19289 static reg_class_t
19290 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
19291 {
19292 /* Restrict the output reload class to the register bank that we are doing
19293 math on. If we would like not to return a subset of CLASS, reject this
19294 alternative: if reload cannot do this, it will still use its choice. */
19295 machine_mode mode = GET_MODE (x);
19296 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19297 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
19298
19299 if (IS_STACK_MODE (mode))
19300 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
19301
19302 return regclass;
19303 }
19304
19305 static reg_class_t
19306 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
19307 machine_mode mode, secondary_reload_info *sri)
19308 {
19309 /* Double-word spills from general registers to non-offsettable memory
19310 references (zero-extended addresses) require special handling. */
19311 if (TARGET_64BIT
19312 && MEM_P (x)
19313 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
19314 && INTEGER_CLASS_P (rclass)
19315 && !offsettable_memref_p (x))
19316 {
19317 sri->icode = (in_p
19318 ? CODE_FOR_reload_noff_load
19319 : CODE_FOR_reload_noff_store);
19320 /* Add the cost of moving address to a temporary. */
19321 sri->extra_cost = 1;
19322
19323 return NO_REGS;
19324 }
19325
19326 /* QImode spills from non-QI registers require
19327 intermediate register on 32bit targets. */
19328 if (mode == QImode
19329 && ((!TARGET_64BIT && !in_p
19330 && INTEGER_CLASS_P (rclass)
19331 && MAYBE_NON_Q_CLASS_P (rclass))
19332 || (!TARGET_AVX512DQ
19333 && MAYBE_MASK_CLASS_P (rclass))))
19334 {
19335 int regno = true_regnum (x);
19336
19337 /* Return Q_REGS if the operand is in memory. */
19338 if (regno == -1)
19339 return Q_REGS;
19340
19341 return NO_REGS;
19342 }
19343
19344 /* Require movement to gpr, and then store to memory. */
19345 if ((mode == HFmode || mode == HImode || mode == V2QImode)
19346 && !TARGET_SSE4_1
19347 && SSE_CLASS_P (rclass)
19348 && !in_p && MEM_P (x))
19349 {
19350 sri->extra_cost = 1;
19351 return GENERAL_REGS;
19352 }
19353
19354 /* This condition handles corner case where an expression involving
19355 pointers gets vectorized. We're trying to use the address of a
19356 stack slot as a vector initializer.
19357
19358 (set (reg:V2DI 74 [ vect_cst_.2 ])
19359 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
19360
19361 Eventually frame gets turned into sp+offset like this:
19362
19363 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19364 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
19365 (const_int 392 [0x188]))))
19366
19367 That later gets turned into:
19368
19369 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19370 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
19371 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
19372
19373 We'll have the following reload recorded:
19374
19375 Reload 0: reload_in (DI) =
19376 (plus:DI (reg/f:DI 7 sp)
19377 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
19378 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19379 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
19380 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
19381 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19382 reload_reg_rtx: (reg:V2DI 22 xmm1)
19383
19384 Which isn't going to work since SSE instructions can't handle scalar
19385 additions. Returning GENERAL_REGS forces the addition into integer
19386 register and reload can handle subsequent reloads without problems. */
19387
19388 if (in_p && GET_CODE (x) == PLUS
19389 && SSE_CLASS_P (rclass)
19390 && SCALAR_INT_MODE_P (mode))
19391 return GENERAL_REGS;
19392
19393 return NO_REGS;
19394 }
19395
19396 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
19397
19398 static bool
19399 ix86_class_likely_spilled_p (reg_class_t rclass)
19400 {
19401 switch (rclass)
19402 {
19403 case AREG:
19404 case DREG:
19405 case CREG:
19406 case BREG:
19407 case AD_REGS:
19408 case SIREG:
19409 case DIREG:
19410 case SSE_FIRST_REG:
19411 case FP_TOP_REG:
19412 case FP_SECOND_REG:
19413 return true;
19414
19415 default:
19416 break;
19417 }
19418
19419 return false;
19420 }
19421
19422 /* Return true if a set of DST by the expression SRC should be allowed.
19423 This prevents complex sets of likely_spilled hard regs before reload. */
19424
19425 bool
19426 ix86_hardreg_mov_ok (rtx dst, rtx src)
19427 {
19428 /* Avoid complex sets of likely_spilled hard registers before reload. */
19429 if (REG_P (dst) && HARD_REGISTER_P (dst)
19430 && !REG_P (src) && !MEM_P (src)
19431 && !(VECTOR_MODE_P (GET_MODE (dst))
19432 ? standard_sse_constant_p (src, GET_MODE (dst))
19433 : x86_64_immediate_operand (src, GET_MODE (dst)))
19434 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
19435 && !reload_completed)
19436 return false;
19437 return true;
19438 }
19439
19440 /* If we are copying between registers from different register sets
19441 (e.g. FP and integer), we may need a memory location.
19442
19443 The function can't work reliably when one of the CLASSES is a class
19444 containing registers from multiple sets. We avoid this by never combining
19445 different sets in a single alternative in the machine description.
19446 Ensure that this constraint holds to avoid unexpected surprises.
19447
19448 When STRICT is false, we are being called from REGISTER_MOVE_COST,
19449 so do not enforce these sanity checks.
19450
19451 To optimize register_move_cost performance, define inline variant. */
19452
19453 static inline bool
19454 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
19455 reg_class_t class2, int strict)
19456 {
19457 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
19458 return false;
19459
19460 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
19461 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
19462 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
19463 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
19464 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
19465 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
19466 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
19467 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
19468 {
19469 gcc_assert (!strict || lra_in_progress);
19470 return true;
19471 }
19472
19473 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
19474 return true;
19475
19476 /* ??? This is a lie. We do have moves between mmx/general, and for
19477 mmx/sse2. But by saying we need secondary memory we discourage the
19478 register allocator from using the mmx registers unless needed. */
19479 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
19480 return true;
19481
19482 /* Between mask and general, we have moves no larger than word size. */
19483 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
19484 {
19485 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
19486 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19487 return true;
19488 }
19489
19490 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19491 {
19492 /* SSE1 doesn't have any direct moves from other classes. */
19493 if (!TARGET_SSE2)
19494 return true;
19495
19496 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
19497 return true;
19498
19499 int msize = GET_MODE_SIZE (mode);
19500
19501 /* Between SSE and general, we have moves no larger than word size. */
19502 if (msize > UNITS_PER_WORD)
19503 return true;
19504
19505 /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
19506 Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
19507 int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
19508
19509 if (msize < minsize)
19510 return true;
19511
19512 /* If the target says that inter-unit moves are more expensive
19513 than moving through memory, then don't generate them. */
19514 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
19515 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
19516 return true;
19517 }
19518
19519 return false;
19520 }
19521
19522 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
19523
19524 static bool
19525 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
19526 reg_class_t class2)
19527 {
19528 return inline_secondary_memory_needed (mode, class1, class2, true);
19529 }
19530
19531 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
19532
19533 get_secondary_mem widens integral modes to BITS_PER_WORD.
19534 There is no need to emit full 64 bit move on 64 bit targets
19535 for integral modes that can be moved using 32 bit move. */
19536
19537 static machine_mode
19538 ix86_secondary_memory_needed_mode (machine_mode mode)
19539 {
19540 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
19541 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
19542 return mode;
19543 }
19544
19545 /* Implement the TARGET_CLASS_MAX_NREGS hook.
19546
19547 On the 80386, this is the size of MODE in words,
19548 except in the FP regs, where a single reg is always enough. */
19549
19550 static unsigned char
19551 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
19552 {
19553 if (MAYBE_INTEGER_CLASS_P (rclass))
19554 {
19555 if (mode == XFmode)
19556 return (TARGET_64BIT ? 2 : 3);
19557 else if (mode == XCmode)
19558 return (TARGET_64BIT ? 4 : 6);
19559 else
19560 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
19561 }
19562 else
19563 {
19564 if (COMPLEX_MODE_P (mode))
19565 return 2;
19566 else
19567 return 1;
19568 }
19569 }
19570
19571 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
19572
19573 static bool
19574 ix86_can_change_mode_class (machine_mode from, machine_mode to,
19575 reg_class_t regclass)
19576 {
19577 if (from == to)
19578 return true;
19579
19580 /* x87 registers can't do subreg at all, as all values are reformatted
19581 to extended precision. */
19582 if (MAYBE_FLOAT_CLASS_P (regclass))
19583 return false;
19584
19585 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
19586 {
19587 /* Vector registers do not support QI or HImode loads. If we don't
19588 disallow a change to these modes, reload will assume it's ok to
19589 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19590 the vec_dupv4hi pattern.
19591 NB: SSE2 can load 16bit data to sse register via pinsrw. */
19592 int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
19593 if (GET_MODE_SIZE (from) < mov_size)
19594 return false;
19595 }
19596
19597 return true;
19598 }
19599
19600 /* Return index of MODE in the sse load/store tables. */
19601
19602 static inline int
19603 sse_store_index (machine_mode mode)
19604 {
19605 /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
19606 costs to processor_costs, which requires changes to all entries in
19607 processor cost table. */
19608 if (mode == E_HFmode)
19609 mode = E_SFmode;
19610
19611 switch (GET_MODE_SIZE (mode))
19612 {
19613 case 4:
19614 return 0;
19615 case 8:
19616 return 1;
19617 case 16:
19618 return 2;
19619 case 32:
19620 return 3;
19621 case 64:
19622 return 4;
19623 default:
19624 return -1;
19625 }
19626 }
19627
19628 /* Return the cost of moving data of mode M between a
19629 register and memory. A value of 2 is the default; this cost is
19630 relative to those in `REGISTER_MOVE_COST'.
19631
19632 This function is used extensively by register_move_cost that is used to
19633 build tables at startup. Make it inline in this case.
19634 When IN is 2, return maximum of in and out move cost.
19635
19636 If moving between registers and memory is more expensive than
19637 between two registers, you should define this macro to express the
19638 relative cost.
19639
19640 Model also increased moving costs of QImode registers in non
19641 Q_REGS classes.
19642 */
19643 static inline int
19644 inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
19645 {
19646 int cost;
19647
19648 if (FLOAT_CLASS_P (regclass))
19649 {
19650 int index;
19651 switch (mode)
19652 {
19653 case E_SFmode:
19654 index = 0;
19655 break;
19656 case E_DFmode:
19657 index = 1;
19658 break;
19659 case E_XFmode:
19660 index = 2;
19661 break;
19662 default:
19663 return 100;
19664 }
19665 if (in == 2)
19666 return MAX (ix86_cost->hard_register.fp_load [index],
19667 ix86_cost->hard_register.fp_store [index]);
19668 return in ? ix86_cost->hard_register.fp_load [index]
19669 : ix86_cost->hard_register.fp_store [index];
19670 }
19671 if (SSE_CLASS_P (regclass))
19672 {
19673 int index = sse_store_index (mode);
19674 if (index == -1)
19675 return 100;
19676 if (in == 2)
19677 return MAX (ix86_cost->hard_register.sse_load [index],
19678 ix86_cost->hard_register.sse_store [index]);
19679 return in ? ix86_cost->hard_register.sse_load [index]
19680 : ix86_cost->hard_register.sse_store [index];
19681 }
19682 if (MASK_CLASS_P (regclass))
19683 {
19684 int index;
19685 switch (GET_MODE_SIZE (mode))
19686 {
19687 case 1:
19688 index = 0;
19689 break;
19690 case 2:
19691 index = 1;
19692 break;
19693 /* DImode loads and stores assumed to cost the same as SImode. */
19694 default:
19695 index = 2;
19696 break;
19697 }
19698
19699 if (in == 2)
19700 return MAX (ix86_cost->hard_register.mask_load[index],
19701 ix86_cost->hard_register.mask_store[index]);
19702 return in ? ix86_cost->hard_register.mask_load[2]
19703 : ix86_cost->hard_register.mask_store[2];
19704 }
19705 if (MMX_CLASS_P (regclass))
19706 {
19707 int index;
19708 switch (GET_MODE_SIZE (mode))
19709 {
19710 case 4:
19711 index = 0;
19712 break;
19713 case 8:
19714 index = 1;
19715 break;
19716 default:
19717 return 100;
19718 }
19719 if (in == 2)
19720 return MAX (ix86_cost->hard_register.mmx_load [index],
19721 ix86_cost->hard_register.mmx_store [index]);
19722 return in ? ix86_cost->hard_register.mmx_load [index]
19723 : ix86_cost->hard_register.mmx_store [index];
19724 }
19725 switch (GET_MODE_SIZE (mode))
19726 {
19727 case 1:
19728 if (Q_CLASS_P (regclass) || TARGET_64BIT)
19729 {
19730 if (!in)
19731 return ix86_cost->hard_register.int_store[0];
19732 if (TARGET_PARTIAL_REG_DEPENDENCY
19733 && optimize_function_for_speed_p (cfun))
19734 cost = ix86_cost->hard_register.movzbl_load;
19735 else
19736 cost = ix86_cost->hard_register.int_load[0];
19737 if (in == 2)
19738 return MAX (cost, ix86_cost->hard_register.int_store[0]);
19739 return cost;
19740 }
19741 else
19742 {
19743 if (in == 2)
19744 return MAX (ix86_cost->hard_register.movzbl_load,
19745 ix86_cost->hard_register.int_store[0] + 4);
19746 if (in)
19747 return ix86_cost->hard_register.movzbl_load;
19748 else
19749 return ix86_cost->hard_register.int_store[0] + 4;
19750 }
19751 break;
19752 case 2:
19753 {
19754 int cost;
19755 if (in == 2)
19756 cost = MAX (ix86_cost->hard_register.int_load[1],
19757 ix86_cost->hard_register.int_store[1]);
19758 else
19759 cost = in ? ix86_cost->hard_register.int_load[1]
19760 : ix86_cost->hard_register.int_store[1];
19761
19762 if (mode == E_HFmode)
19763 {
19764 /* Prefer SSE over GPR for HFmode. */
19765 int sse_cost;
19766 int index = sse_store_index (mode);
19767 if (in == 2)
19768 sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
19769 ix86_cost->hard_register.sse_store[index]);
19770 else
19771 sse_cost = (in
19772 ? ix86_cost->hard_register.sse_load [index]
19773 : ix86_cost->hard_register.sse_store [index]);
19774 if (sse_cost >= cost)
19775 cost = sse_cost + 1;
19776 }
19777 return cost;
19778 }
19779 default:
19780 if (in == 2)
19781 cost = MAX (ix86_cost->hard_register.int_load[2],
19782 ix86_cost->hard_register.int_store[2]);
19783 else if (in)
19784 cost = ix86_cost->hard_register.int_load[2];
19785 else
19786 cost = ix86_cost->hard_register.int_store[2];
19787 /* Multiply with the number of GPR moves needed. */
19788 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
19789 }
19790 }
19791
19792 static int
19793 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
19794 {
19795 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
19796 }
19797
19798
19799 /* Return the cost of moving data from a register in class CLASS1 to
19800 one in class CLASS2.
19801
19802 It is not required that the cost always equal 2 when FROM is the same as TO;
19803 on some machines it is expensive to move between registers if they are not
19804 general registers. */
19805
19806 static int
19807 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
19808 reg_class_t class2_i)
19809 {
19810 enum reg_class class1 = (enum reg_class) class1_i;
19811 enum reg_class class2 = (enum reg_class) class2_i;
19812
19813 /* In case we require secondary memory, compute cost of the store followed
19814 by load. In order to avoid bad register allocation choices, we need
19815 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19816
19817 if (inline_secondary_memory_needed (mode, class1, class2, false))
19818 {
19819 int cost = 1;
19820
19821 cost += inline_memory_move_cost (mode, class1, 2);
19822 cost += inline_memory_move_cost (mode, class2, 2);
19823
19824 /* In case of copying from general_purpose_register we may emit multiple
19825 stores followed by single load causing memory size mismatch stall.
19826 Count this as arbitrarily high cost of 20. */
19827 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
19828 && TARGET_MEMORY_MISMATCH_STALL
19829 && targetm.class_max_nregs (class1, mode)
19830 > targetm.class_max_nregs (class2, mode))
19831 cost += 20;
19832
19833 /* In the case of FP/MMX moves, the registers actually overlap, and we
19834 have to switch modes in order to treat them differently. */
19835 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
19836 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
19837 cost += 20;
19838
19839 return cost;
19840 }
19841
19842 /* Moves between MMX and non-MMX units require secondary memory. */
19843 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
19844 gcc_unreachable ();
19845
19846 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19847 return (SSE_CLASS_P (class1)
19848 ? ix86_cost->hard_register.sse_to_integer
19849 : ix86_cost->hard_register.integer_to_sse);
19850
19851 /* Moves between mask register and GPR. */
19852 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
19853 {
19854 return (MASK_CLASS_P (class1)
19855 ? ix86_cost->hard_register.mask_to_integer
19856 : ix86_cost->hard_register.integer_to_mask);
19857 }
19858 /* Moving between mask registers. */
19859 if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
19860 return ix86_cost->hard_register.mask_move;
19861
19862 if (MAYBE_FLOAT_CLASS_P (class1))
19863 return ix86_cost->hard_register.fp_move;
19864 if (MAYBE_SSE_CLASS_P (class1))
19865 {
19866 if (GET_MODE_BITSIZE (mode) <= 128)
19867 return ix86_cost->hard_register.xmm_move;
19868 if (GET_MODE_BITSIZE (mode) <= 256)
19869 return ix86_cost->hard_register.ymm_move;
19870 return ix86_cost->hard_register.zmm_move;
19871 }
19872 if (MAYBE_MMX_CLASS_P (class1))
19873 return ix86_cost->hard_register.mmx_move;
19874 return 2;
19875 }
19876
19877 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
19878 words of a value of mode MODE but can be less for certain modes in
19879 special long registers.
19880
19881 Actually there are no two word move instructions for consecutive
19882 registers. And only registers 0-3 may have mov byte instructions
19883 applied to them. */
19884
19885 static unsigned int
19886 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
19887 {
19888 if (GENERAL_REGNO_P (regno))
19889 {
19890 if (mode == XFmode)
19891 return TARGET_64BIT ? 2 : 3;
19892 if (mode == XCmode)
19893 return TARGET_64BIT ? 4 : 6;
19894 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
19895 }
19896 if (COMPLEX_MODE_P (mode))
19897 return 2;
19898 /* Register pair for mask registers. */
19899 if (mode == P2QImode || mode == P2HImode)
19900 return 2;
19901 if (mode == V64SFmode || mode == V64SImode)
19902 return 4;
19903 return 1;
19904 }
19905
19906 /* Implement REGMODE_NATURAL_SIZE(MODE). */
19907 unsigned int
19908 ix86_regmode_natural_size (machine_mode mode)
19909 {
19910 if (mode == P2HImode || mode == P2QImode)
19911 return GET_MODE_SIZE (mode) / 2;
19912 return UNITS_PER_WORD;
19913 }
19914
19915 /* Implement TARGET_HARD_REGNO_MODE_OK. */
19916
19917 static bool
19918 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
19919 {
19920 /* Flags and only flags can only hold CCmode values. */
19921 if (CC_REGNO_P (regno))
19922 return GET_MODE_CLASS (mode) == MODE_CC;
19923 if (GET_MODE_CLASS (mode) == MODE_CC
19924 || GET_MODE_CLASS (mode) == MODE_RANDOM)
19925 return false;
19926 if (STACK_REGNO_P (regno))
19927 return VALID_FP_MODE_P (mode);
19928 if (MASK_REGNO_P (regno))
19929 {
19930 /* Register pair only starts at even register number. */
19931 if ((mode == P2QImode || mode == P2HImode))
19932 return MASK_PAIR_REGNO_P(regno);
19933
19934 return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
19935 || (TARGET_AVX512BW
19936 && VALID_MASK_AVX512BW_MODE (mode)));
19937 }
19938
19939 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
19940 return false;
19941
19942 if (SSE_REGNO_P (regno))
19943 {
19944 /* We implement the move patterns for all vector modes into and
19945 out of SSE registers, even when no operation instructions
19946 are available. */
19947
19948 /* For AVX-512 we allow, regardless of regno:
19949 - XI mode
19950 - any of 512-bit wide vector mode
19951 - any scalar mode. */
19952 if (TARGET_AVX512F
19953 && (VALID_AVX512F_REG_OR_XI_MODE (mode)
19954 || VALID_AVX512F_SCALAR_MODE (mode)))
19955 return true;
19956
19957 /* For AVX512FP16, vmovw supports movement of HImode
19958 and HFmode between GPR and SSE registers. */
19959 if (TARGET_AVX512FP16
19960 && VALID_AVX512FP16_SCALAR_MODE (mode))
19961 return true;
19962
19963 /* For AVX-5124FMAPS or AVX-5124VNNIW
19964 allow V64SF and V64SI modes for special regnos. */
19965 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
19966 && (mode == V64SFmode || mode == V64SImode)
19967 && MOD4_SSE_REGNO_P (regno))
19968 return true;
19969
19970 /* TODO check for QI/HI scalars. */
19971 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
19972 if (TARGET_AVX512VL
19973 && (VALID_AVX256_REG_OR_OI_MODE (mode)
19974 || VALID_AVX512VL_128_REG_MODE (mode)))
19975 return true;
19976
19977 /* xmm16-xmm31 are only available for AVX-512. */
19978 if (EXT_REX_SSE_REGNO_P (regno))
19979 return false;
19980
19981 /* OImode and AVX modes are available only when AVX is enabled. */
19982 return ((TARGET_AVX
19983 && VALID_AVX256_REG_OR_OI_MODE (mode))
19984 || VALID_SSE_REG_MODE (mode)
19985 || VALID_SSE2_REG_MODE (mode)
19986 || VALID_MMX_REG_MODE (mode)
19987 || VALID_MMX_REG_MODE_3DNOW (mode));
19988 }
19989 if (MMX_REGNO_P (regno))
19990 {
19991 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19992 so if the register is available at all, then we can move data of
19993 the given mode into or out of it. */
19994 return (VALID_MMX_REG_MODE (mode)
19995 || VALID_MMX_REG_MODE_3DNOW (mode));
19996 }
19997
19998 if (mode == QImode)
19999 {
20000 /* Take care for QImode values - they can be in non-QI regs,
20001 but then they do cause partial register stalls. */
20002 if (ANY_QI_REGNO_P (regno))
20003 return true;
20004 if (!TARGET_PARTIAL_REG_STALL)
20005 return true;
20006 /* LRA checks if the hard register is OK for the given mode.
20007 QImode values can live in non-QI regs, so we allow all
20008 registers here. */
20009 if (lra_in_progress)
20010 return true;
20011 return !can_create_pseudo_p ();
20012 }
20013 /* We handle both integer and floats in the general purpose registers. */
20014 else if (VALID_INT_MODE_P (mode)
20015 || VALID_FP_MODE_P (mode))
20016 return true;
20017 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20018 on to use that value in smaller contexts, this can easily force a
20019 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20020 supporting DImode, allow it. */
20021 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20022 return true;
20023
20024 return false;
20025 }
20026
20027 /* Implement TARGET_INSN_CALLEE_ABI. */
20028
20029 const predefined_function_abi &
20030 ix86_insn_callee_abi (const rtx_insn *insn)
20031 {
20032 unsigned int abi_id = 0;
20033 rtx pat = PATTERN (insn);
20034 if (vzeroupper_pattern (pat, VOIDmode))
20035 abi_id = ABI_VZEROUPPER;
20036
20037 return function_abis[abi_id];
20038 }
20039
20040 /* Initialize function_abis with corresponding abi_id,
20041 currently only handle vzeroupper. */
20042 void
20043 ix86_initialize_callee_abi (unsigned int abi_id)
20044 {
20045 gcc_assert (abi_id == ABI_VZEROUPPER);
20046 predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
20047 if (!vzeroupper_abi.initialized_p ())
20048 {
20049 HARD_REG_SET full_reg_clobbers;
20050 CLEAR_HARD_REG_SET (full_reg_clobbers);
20051 vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
20052 }
20053 }
20054
20055 void
20056 ix86_expand_avx_vzeroupper (void)
20057 {
20058 /* Initialize vzeroupper_abi here. */
20059 ix86_initialize_callee_abi (ABI_VZEROUPPER);
20060 rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
20061 /* Return false for non-local goto in can_nonlocal_goto. */
20062 make_reg_eh_region_note (insn, 0, INT_MIN);
20063 /* Flag used for call_insn indicates it's a fake call. */
20064 RTX_FLAG (insn, used) = 1;
20065 }
20066
20067
20068 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
20069 saves SSE registers across calls is Win64 (thus no need to check the
20070 current ABI here), and with AVX enabled Win64 only guarantees that
20071 the low 16 bytes are saved. */
20072
20073 static bool
20074 ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
20075 machine_mode mode)
20076 {
20077 /* Special ABI for vzeroupper which only clobber higher part of sse regs. */
20078 if (abi_id == ABI_VZEROUPPER)
20079 return (GET_MODE_SIZE (mode) > 16
20080 && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
20081 || LEGACY_SSE_REGNO_P (regno)));
20082
20083 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
20084 }
20085
20086 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20087 tieable integer mode. */
20088
20089 static bool
20090 ix86_tieable_integer_mode_p (machine_mode mode)
20091 {
20092 switch (mode)
20093 {
20094 case E_HImode:
20095 case E_SImode:
20096 return true;
20097
20098 case E_QImode:
20099 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
20100
20101 case E_DImode:
20102 return TARGET_64BIT;
20103
20104 default:
20105 return false;
20106 }
20107 }
20108
20109 /* Implement TARGET_MODES_TIEABLE_P.
20110
20111 Return true if MODE1 is accessible in a register that can hold MODE2
20112 without copying. That is, all register classes that can hold MODE2
20113 can also hold MODE1. */
20114
20115 static bool
20116 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
20117 {
20118 if (mode1 == mode2)
20119 return true;
20120
20121 if (ix86_tieable_integer_mode_p (mode1)
20122 && ix86_tieable_integer_mode_p (mode2))
20123 return true;
20124
20125 /* MODE2 being XFmode implies fp stack or general regs, which means we
20126 can tie any smaller floating point modes to it. Note that we do not
20127 tie this with TFmode. */
20128 if (mode2 == XFmode)
20129 return mode1 == SFmode || mode1 == DFmode;
20130
20131 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20132 that we can tie it with SFmode. */
20133 if (mode2 == DFmode)
20134 return mode1 == SFmode;
20135
20136 /* If MODE2 is only appropriate for an SSE register, then tie with
20137 any other mode acceptable to SSE registers. */
20138 if (GET_MODE_SIZE (mode2) == 64
20139 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20140 return (GET_MODE_SIZE (mode1) == 64
20141 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20142 if (GET_MODE_SIZE (mode2) == 32
20143 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20144 return (GET_MODE_SIZE (mode1) == 32
20145 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20146 if (GET_MODE_SIZE (mode2) == 16
20147 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20148 return (GET_MODE_SIZE (mode1) == 16
20149 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20150
20151 /* If MODE2 is appropriate for an MMX register, then tie
20152 with any other mode acceptable to MMX registers. */
20153 if (GET_MODE_SIZE (mode2) == 8
20154 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
20155 return (GET_MODE_SIZE (mode1) == 8
20156 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
20157
20158 return false;
20159 }
20160
20161 /* Return the cost of moving between two registers of mode MODE. */
20162
20163 static int
20164 ix86_set_reg_reg_cost (machine_mode mode)
20165 {
20166 unsigned int units = UNITS_PER_WORD;
20167
20168 switch (GET_MODE_CLASS (mode))
20169 {
20170 default:
20171 break;
20172
20173 case MODE_CC:
20174 units = GET_MODE_SIZE (CCmode);
20175 break;
20176
20177 case MODE_FLOAT:
20178 if ((TARGET_SSE && mode == TFmode)
20179 || (TARGET_80387 && mode == XFmode)
20180 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
20181 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
20182 units = GET_MODE_SIZE (mode);
20183 break;
20184
20185 case MODE_COMPLEX_FLOAT:
20186 if ((TARGET_SSE && mode == TCmode)
20187 || (TARGET_80387 && mode == XCmode)
20188 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
20189 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
20190 units = GET_MODE_SIZE (mode);
20191 break;
20192
20193 case MODE_VECTOR_INT:
20194 case MODE_VECTOR_FLOAT:
20195 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
20196 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
20197 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20198 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20199 || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
20200 && VALID_MMX_REG_MODE (mode)))
20201 units = GET_MODE_SIZE (mode);
20202 }
20203
20204 /* Return the cost of moving between two registers of mode MODE,
20205 assuming that the move will be in pieces of at most UNITS bytes. */
20206 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
20207 }
20208
20209 /* Return cost of vector operation in MODE given that scalar version has
20210 COST. */
20211
20212 static int
20213 ix86_vec_cost (machine_mode mode, int cost)
20214 {
20215 if (!VECTOR_MODE_P (mode))
20216 return cost;
20217
20218 if (GET_MODE_BITSIZE (mode) == 128
20219 && TARGET_SSE_SPLIT_REGS)
20220 return cost * 2;
20221 if (GET_MODE_BITSIZE (mode) > 128
20222 && TARGET_AVX256_SPLIT_REGS)
20223 return cost * GET_MODE_BITSIZE (mode) / 128;
20224 return cost;
20225 }
20226
20227 /* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
20228 vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
20229 static int
20230 ix86_widen_mult_cost (const struct processor_costs *cost,
20231 enum machine_mode mode, bool uns_p)
20232 {
20233 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
20234 int extra_cost = 0;
20235 int basic_cost = 0;
20236 switch (mode)
20237 {
20238 case V8HImode:
20239 case V16HImode:
20240 if (!uns_p || mode == V16HImode)
20241 extra_cost = cost->sse_op * 2;
20242 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
20243 break;
20244 case V4SImode:
20245 case V8SImode:
20246 /* pmulhw/pmullw can be used. */
20247 basic_cost = cost->mulss * 2 + cost->sse_op * 2;
20248 break;
20249 case V2DImode:
20250 /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
20251 require extra 4 mul, 4 add, 4 cmp and 2 shift. */
20252 if (!TARGET_SSE4_1 && !uns_p)
20253 extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4
20254 + cost->sse_op * 2;
20255 /* Fallthru. */
20256 case V4DImode:
20257 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
20258 break;
20259 default:
20260 gcc_unreachable();
20261 }
20262 return ix86_vec_cost (mode, basic_cost + extra_cost);
20263 }
20264
20265 /* Return cost of multiplication in MODE. */
20266
20267 static int
20268 ix86_multiplication_cost (const struct processor_costs *cost,
20269 enum machine_mode mode)
20270 {
20271 machine_mode inner_mode = mode;
20272 if (VECTOR_MODE_P (mode))
20273 inner_mode = GET_MODE_INNER (mode);
20274
20275 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
20276 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
20277 else if (X87_FLOAT_MODE_P (mode))
20278 return cost->fmul;
20279 else if (FLOAT_MODE_P (mode))
20280 return ix86_vec_cost (mode,
20281 inner_mode == DFmode ? cost->mulsd : cost->mulss);
20282 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
20283 {
20284 /* vpmullq is used in this case. No emulation is needed. */
20285 if (TARGET_AVX512DQ)
20286 return ix86_vec_cost (mode, cost->mulss);
20287
20288 /* V*QImode is emulated with 7-13 insns. */
20289 if (mode == V16QImode || mode == V32QImode)
20290 {
20291 int extra = 11;
20292 if (TARGET_XOP && mode == V16QImode)
20293 extra = 5;
20294 else if (TARGET_SSSE3)
20295 extra = 6;
20296 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra);
20297 }
20298 /* V*DImode is emulated with 5-8 insns. */
20299 else if (mode == V2DImode || mode == V4DImode)
20300 {
20301 if (TARGET_XOP && mode == V2DImode)
20302 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3);
20303 else
20304 return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
20305 }
20306 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
20307 insns, including two PMULUDQ. */
20308 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
20309 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
20310 else
20311 return ix86_vec_cost (mode, cost->mulss);
20312 }
20313 else
20314 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
20315 }
20316
20317 /* Return cost of multiplication in MODE. */
20318
20319 static int
20320 ix86_division_cost (const struct processor_costs *cost,
20321 enum machine_mode mode)
20322 {
20323 machine_mode inner_mode = mode;
20324 if (VECTOR_MODE_P (mode))
20325 inner_mode = GET_MODE_INNER (mode);
20326
20327 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
20328 return inner_mode == DFmode ? cost->divsd : cost->divss;
20329 else if (X87_FLOAT_MODE_P (mode))
20330 return cost->fdiv;
20331 else if (FLOAT_MODE_P (mode))
20332 return ix86_vec_cost (mode,
20333 inner_mode == DFmode ? cost->divsd : cost->divss);
20334 else
20335 return cost->divide[MODE_INDEX (mode)];
20336 }
20337
20338 /* Return cost of shift in MODE.
20339 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
20340 AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
20341 if op1 is a result of subreg.
20342
20343 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
20344
20345 static int
20346 ix86_shift_rotate_cost (const struct processor_costs *cost,
20347 enum rtx_code code,
20348 enum machine_mode mode, bool constant_op1,
20349 HOST_WIDE_INT op1_val,
20350 bool speed,
20351 bool and_in_op1,
20352 bool shift_and_truncate,
20353 bool *skip_op0, bool *skip_op1)
20354 {
20355 if (skip_op0)
20356 *skip_op0 = *skip_op1 = false;
20357 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
20358 {
20359 /* V*QImode is emulated with 1-11 insns. */
20360 if (mode == V16QImode || mode == V32QImode)
20361 {
20362 int count = 11;
20363 if (TARGET_XOP && mode == V16QImode)
20364 {
20365 /* For XOP we use vpshab, which requires a broadcast of the
20366 value to the variable shift insn. For constants this
20367 means a V16Q const in mem; even when we can perform the
20368 shift with one insn set the cost to prefer paddb. */
20369 if (constant_op1)
20370 {
20371 if (skip_op1)
20372 *skip_op1 = true;
20373 return ix86_vec_cost (mode,
20374 cost->sse_op
20375 + (speed
20376 ? 2
20377 : COSTS_N_BYTES
20378 (GET_MODE_UNIT_SIZE (mode))));
20379 }
20380 count = 3;
20381 }
20382 else if (TARGET_SSSE3)
20383 count = 7;
20384 return ix86_vec_cost (mode, cost->sse_op * count);
20385 }
20386 /* V*DImode arithmetic right shift is emulated. */
20387 else if (code == ASHIFTRT
20388 && (mode == V2DImode || mode == V4DImode)
20389 && !TARGET_XOP
20390 && !TARGET_AVX512VL)
20391 {
20392 int count = 4;
20393 if (constant_op1 && op1_val == 63 && TARGET_SSE4_2)
20394 count = 2;
20395 else if (constant_op1)
20396 count = 3;
20397 return ix86_vec_cost (mode, cost->sse_op * count);
20398 }
20399 else
20400 return ix86_vec_cost (mode, cost->sse_op);
20401 }
20402 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20403 {
20404 if (constant_op1)
20405 {
20406 if (op1_val > 32)
20407 return cost->shift_const + COSTS_N_INSNS (2);
20408 else
20409 return cost->shift_const * 2;
20410 }
20411 else
20412 {
20413 if (and_in_op1)
20414 return cost->shift_var * 2;
20415 else
20416 return cost->shift_var * 6 + COSTS_N_INSNS (2);
20417 }
20418 }
20419 else
20420 {
20421 if (constant_op1)
20422 return cost->shift_const;
20423 else if (shift_and_truncate)
20424 {
20425 if (skip_op0)
20426 *skip_op0 = *skip_op1 = true;
20427 /* Return the cost after shift-and truncation. */
20428 return cost->shift_var;
20429 }
20430 else
20431 return cost->shift_var;
20432 }
20433 }
20434
20435 /* Compute a (partial) cost for rtx X. Return true if the complete
20436 cost has been computed, and false if subexpressions should be
20437 scanned. In either case, *TOTAL contains the cost result. */
20438
20439 static bool
20440 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
20441 int *total, bool speed)
20442 {
20443 rtx mask;
20444 enum rtx_code code = GET_CODE (x);
20445 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
20446 const struct processor_costs *cost
20447 = speed ? ix86_tune_cost : &ix86_size_cost;
20448 int src_cost;
20449
20450 switch (code)
20451 {
20452 case SET:
20453 if (register_operand (SET_DEST (x), VOIDmode)
20454 && register_operand (SET_SRC (x), VOIDmode))
20455 {
20456 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
20457 return true;
20458 }
20459
20460 if (register_operand (SET_SRC (x), VOIDmode))
20461 /* Avoid potentially incorrect high cost from rtx_costs
20462 for non-tieable SUBREGs. */
20463 src_cost = 0;
20464 else
20465 {
20466 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
20467
20468 if (CONSTANT_P (SET_SRC (x)))
20469 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
20470 a small value, possibly zero for cheap constants. */
20471 src_cost += COSTS_N_INSNS (1);
20472 }
20473
20474 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
20475 return true;
20476
20477 case CONST_INT:
20478 case CONST:
20479 case LABEL_REF:
20480 case SYMBOL_REF:
20481 if (x86_64_immediate_operand (x, VOIDmode))
20482 *total = 0;
20483 else
20484 *total = 1;
20485 return true;
20486
20487 case CONST_DOUBLE:
20488 if (IS_STACK_MODE (mode))
20489 switch (standard_80387_constant_p (x))
20490 {
20491 case -1:
20492 case 0:
20493 break;
20494 case 1: /* 0.0 */
20495 *total = 1;
20496 return true;
20497 default: /* Other constants */
20498 *total = 2;
20499 return true;
20500 }
20501 /* FALLTHRU */
20502
20503 case CONST_VECTOR:
20504 switch (standard_sse_constant_p (x, mode))
20505 {
20506 case 0:
20507 break;
20508 case 1: /* 0: xor eliminates false dependency */
20509 *total = 0;
20510 return true;
20511 default: /* -1: cmp contains false dependency */
20512 *total = 1;
20513 return true;
20514 }
20515 /* FALLTHRU */
20516
20517 case CONST_WIDE_INT:
20518 /* Fall back to (MEM (SYMBOL_REF)), since that's where
20519 it'll probably end up. Add a penalty for size. */
20520 *total = (COSTS_N_INSNS (1)
20521 + (!TARGET_64BIT && flag_pic)
20522 + (GET_MODE_SIZE (mode) <= 4
20523 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
20524 return true;
20525
20526 case ZERO_EXTEND:
20527 /* The zero extensions is often completely free on x86_64, so make
20528 it as cheap as possible. */
20529 if (TARGET_64BIT && mode == DImode
20530 && GET_MODE (XEXP (x, 0)) == SImode)
20531 *total = 1;
20532 else if (TARGET_ZERO_EXTEND_WITH_AND)
20533 *total = cost->add;
20534 else
20535 *total = cost->movzx;
20536 return false;
20537
20538 case SIGN_EXTEND:
20539 *total = cost->movsx;
20540 return false;
20541
20542 case ASHIFT:
20543 if (SCALAR_INT_MODE_P (mode)
20544 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
20545 && CONST_INT_P (XEXP (x, 1)))
20546 {
20547 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20548 if (value == 1)
20549 {
20550 *total = cost->add;
20551 return false;
20552 }
20553 if ((value == 2 || value == 3)
20554 && cost->lea <= cost->shift_const)
20555 {
20556 *total = cost->lea;
20557 return false;
20558 }
20559 }
20560 /* FALLTHRU */
20561
20562 case ROTATE:
20563 case ASHIFTRT:
20564 case LSHIFTRT:
20565 case ROTATERT:
20566 bool skip_op0, skip_op1;
20567 *total = ix86_shift_rotate_cost (cost, code, mode,
20568 CONSTANT_P (XEXP (x, 1)),
20569 CONST_INT_P (XEXP (x, 1))
20570 ? INTVAL (XEXP (x, 1)) : -1,
20571 speed,
20572 GET_CODE (XEXP (x, 1)) == AND,
20573 SUBREG_P (XEXP (x, 1))
20574 && GET_CODE (XEXP (XEXP (x, 1),
20575 0)) == AND,
20576 &skip_op0, &skip_op1);
20577 if (skip_op0 || skip_op1)
20578 {
20579 if (!skip_op0)
20580 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
20581 if (!skip_op1)
20582 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
20583 return true;
20584 }
20585 return false;
20586
20587 case FMA:
20588 {
20589 rtx sub;
20590
20591 gcc_assert (FLOAT_MODE_P (mode));
20592 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
20593
20594 *total = ix86_vec_cost (mode,
20595 GET_MODE_INNER (mode) == SFmode
20596 ? cost->fmass : cost->fmasd);
20597 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
20598
20599 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
20600 sub = XEXP (x, 0);
20601 if (GET_CODE (sub) == NEG)
20602 sub = XEXP (sub, 0);
20603 *total += rtx_cost (sub, mode, FMA, 0, speed);
20604
20605 sub = XEXP (x, 2);
20606 if (GET_CODE (sub) == NEG)
20607 sub = XEXP (sub, 0);
20608 *total += rtx_cost (sub, mode, FMA, 2, speed);
20609 return true;
20610 }
20611
20612 case MULT:
20613 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
20614 {
20615 rtx op0 = XEXP (x, 0);
20616 rtx op1 = XEXP (x, 1);
20617 int nbits;
20618 if (CONST_INT_P (XEXP (x, 1)))
20619 {
20620 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20621 for (nbits = 0; value != 0; value &= value - 1)
20622 nbits++;
20623 }
20624 else
20625 /* This is arbitrary. */
20626 nbits = 7;
20627
20628 /* Compute costs correctly for widening multiplication. */
20629 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
20630 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
20631 == GET_MODE_SIZE (mode))
20632 {
20633 int is_mulwiden = 0;
20634 machine_mode inner_mode = GET_MODE (op0);
20635
20636 if (GET_CODE (op0) == GET_CODE (op1))
20637 is_mulwiden = 1, op1 = XEXP (op1, 0);
20638 else if (CONST_INT_P (op1))
20639 {
20640 if (GET_CODE (op0) == SIGN_EXTEND)
20641 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
20642 == INTVAL (op1);
20643 else
20644 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
20645 }
20646
20647 if (is_mulwiden)
20648 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
20649 }
20650
20651 *total = (cost->mult_init[MODE_INDEX (mode)]
20652 + nbits * cost->mult_bit
20653 + rtx_cost (op0, mode, outer_code, opno, speed)
20654 + rtx_cost (op1, mode, outer_code, opno, speed));
20655
20656 return true;
20657 }
20658 *total = ix86_multiplication_cost (cost, mode);
20659 return false;
20660
20661 case DIV:
20662 case UDIV:
20663 case MOD:
20664 case UMOD:
20665 *total = ix86_division_cost (cost, mode);
20666 return false;
20667
20668 case PLUS:
20669 if (GET_MODE_CLASS (mode) == MODE_INT
20670 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
20671 {
20672 if (GET_CODE (XEXP (x, 0)) == PLUS
20673 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
20674 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
20675 && CONSTANT_P (XEXP (x, 1)))
20676 {
20677 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20678 if (val == 2 || val == 4 || val == 8)
20679 {
20680 *total = cost->lea;
20681 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
20682 outer_code, opno, speed);
20683 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
20684 outer_code, opno, speed);
20685 *total += rtx_cost (XEXP (x, 1), mode,
20686 outer_code, opno, speed);
20687 return true;
20688 }
20689 }
20690 else if (GET_CODE (XEXP (x, 0)) == MULT
20691 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20692 {
20693 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20694 if (val == 2 || val == 4 || val == 8)
20695 {
20696 *total = cost->lea;
20697 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
20698 outer_code, opno, speed);
20699 *total += rtx_cost (XEXP (x, 1), mode,
20700 outer_code, opno, speed);
20701 return true;
20702 }
20703 }
20704 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20705 {
20706 rtx op = XEXP (XEXP (x, 0), 0);
20707
20708 /* Add with carry, ignore the cost of adding a carry flag. */
20709 if (ix86_carry_flag_operator (op, mode)
20710 || ix86_carry_flag_unset_operator (op, mode))
20711 *total = cost->add;
20712 else
20713 {
20714 *total = cost->lea;
20715 *total += rtx_cost (op, mode,
20716 outer_code, opno, speed);
20717 }
20718
20719 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
20720 outer_code, opno, speed);
20721 *total += rtx_cost (XEXP (x, 1), mode,
20722 outer_code, opno, speed);
20723 return true;
20724 }
20725 }
20726 /* FALLTHRU */
20727
20728 case MINUS:
20729 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
20730 if (GET_MODE_CLASS (mode) == MODE_INT
20731 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
20732 && GET_CODE (XEXP (x, 0)) == MINUS
20733 && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
20734 || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
20735 {
20736 *total = cost->add;
20737 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
20738 outer_code, opno, speed);
20739 *total += rtx_cost (XEXP (x, 1), mode,
20740 outer_code, opno, speed);
20741 return true;
20742 }
20743
20744 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
20745 {
20746 *total = cost->addss;
20747 return false;
20748 }
20749 else if (X87_FLOAT_MODE_P (mode))
20750 {
20751 *total = cost->fadd;
20752 return false;
20753 }
20754 else if (FLOAT_MODE_P (mode))
20755 {
20756 *total = ix86_vec_cost (mode, cost->addss);
20757 return false;
20758 }
20759 /* FALLTHRU */
20760
20761 case AND:
20762 case IOR:
20763 case XOR:
20764 if (GET_MODE_CLASS (mode) == MODE_INT
20765 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20766 {
20767 *total = (cost->add * 2
20768 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
20769 << (GET_MODE (XEXP (x, 0)) != DImode))
20770 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
20771 << (GET_MODE (XEXP (x, 1)) != DImode)));
20772 return true;
20773 }
20774 else if (code == AND
20775 && address_no_seg_operand (x, mode))
20776 {
20777 *total = cost->lea;
20778 return true;
20779 }
20780 /* FALLTHRU */
20781
20782 case NEG:
20783 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
20784 {
20785 *total = cost->sse_op;
20786 return false;
20787 }
20788 else if (X87_FLOAT_MODE_P (mode))
20789 {
20790 *total = cost->fchs;
20791 return false;
20792 }
20793 else if (FLOAT_MODE_P (mode))
20794 {
20795 *total = ix86_vec_cost (mode, cost->sse_op);
20796 return false;
20797 }
20798 /* FALLTHRU */
20799
20800 case NOT:
20801 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
20802 *total = ix86_vec_cost (mode, cost->sse_op);
20803 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20804 *total = cost->add * 2;
20805 else
20806 *total = cost->add;
20807 return false;
20808
20809 case COMPARE:
20810 rtx op0, op1;
20811 op0 = XEXP (x, 0);
20812 op1 = XEXP (x, 1);
20813 if (GET_CODE (op0) == ZERO_EXTRACT
20814 && XEXP (op0, 1) == const1_rtx
20815 && CONST_INT_P (XEXP (op0, 2))
20816 && op1 == const0_rtx)
20817 {
20818 /* This kind of construct is implemented using test[bwl].
20819 Treat it as if we had an AND. */
20820 mode = GET_MODE (XEXP (op0, 0));
20821 *total = (cost->add
20822 + rtx_cost (XEXP (op0, 0), mode, outer_code,
20823 opno, speed)
20824 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
20825 return true;
20826 }
20827
20828 if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
20829 {
20830 /* This is an overflow detection, count it as a normal compare. */
20831 *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
20832 return true;
20833 }
20834
20835 rtx geu;
20836 /* Match x
20837 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
20838 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
20839 if (mode == CCCmode
20840 && GET_CODE (op0) == NEG
20841 && GET_CODE (geu = XEXP (op0, 0)) == GEU
20842 && REG_P (XEXP (geu, 0))
20843 && (GET_MODE (XEXP (geu, 0)) == CCCmode
20844 || GET_MODE (XEXP (geu, 0)) == CCmode)
20845 && REGNO (XEXP (geu, 0)) == FLAGS_REG
20846 && XEXP (geu, 1) == const0_rtx
20847 && GET_CODE (op1) == LTU
20848 && REG_P (XEXP (op1, 0))
20849 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
20850 && REGNO (XEXP (op1, 0)) == FLAGS_REG
20851 && XEXP (op1, 1) == const0_rtx)
20852 {
20853 /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
20854 *total = 0;
20855 return true;
20856 }
20857
20858 /* The embedded comparison operand is completely free. */
20859 if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
20860 *total = 0;
20861
20862 return false;
20863
20864 case FLOAT_EXTEND:
20865 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
20866 *total = 0;
20867 else
20868 *total = ix86_vec_cost (mode, cost->addss);
20869 return false;
20870
20871 case FLOAT_TRUNCATE:
20872 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
20873 *total = cost->fadd;
20874 else
20875 *total = ix86_vec_cost (mode, cost->addss);
20876 return false;
20877
20878 case ABS:
20879 /* SSE requires memory load for the constant operand. It may make
20880 sense to account for this. Of course the constant operand may or
20881 may not be reused. */
20882 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
20883 *total = cost->sse_op;
20884 else if (X87_FLOAT_MODE_P (mode))
20885 *total = cost->fabs;
20886 else if (FLOAT_MODE_P (mode))
20887 *total = ix86_vec_cost (mode, cost->sse_op);
20888 return false;
20889
20890 case SQRT:
20891 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
20892 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
20893 else if (X87_FLOAT_MODE_P (mode))
20894 *total = cost->fsqrt;
20895 else if (FLOAT_MODE_P (mode))
20896 *total = ix86_vec_cost (mode,
20897 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
20898 return false;
20899
20900 case UNSPEC:
20901 if (XINT (x, 1) == UNSPEC_TP)
20902 *total = 0;
20903 else if (XINT(x, 1) == UNSPEC_VTERNLOG)
20904 {
20905 *total = cost->sse_op;
20906 return true;
20907 }
20908 return false;
20909
20910 case VEC_SELECT:
20911 case VEC_CONCAT:
20912 case VEC_DUPLICATE:
20913 /* ??? Assume all of these vector manipulation patterns are
20914 recognizable. In which case they all pretty much have the
20915 same cost. */
20916 *total = cost->sse_op;
20917 return true;
20918 case VEC_MERGE:
20919 mask = XEXP (x, 2);
20920 /* This is masked instruction, assume the same cost,
20921 as nonmasked variant. */
20922 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
20923 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
20924 else
20925 *total = cost->sse_op;
20926 return true;
20927
20928 case MEM:
20929 /* An insn that accesses memory is slightly more expensive
20930 than one that does not. */
20931 if (speed)
20932 *total += 1;
20933 return false;
20934
20935 default:
20936 return false;
20937 }
20938 }
20939
20940 #if TARGET_MACHO
20941
20942 static int current_machopic_label_num;
20943
20944 /* Given a symbol name and its associated stub, write out the
20945 definition of the stub. */
20946
20947 void
20948 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20949 {
20950 unsigned int length;
20951 char *binder_name, *symbol_name, lazy_ptr_name[32];
20952 int label = ++current_machopic_label_num;
20953
20954 /* For 64-bit we shouldn't get here. */
20955 gcc_assert (!TARGET_64BIT);
20956
20957 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20958 symb = targetm.strip_name_encoding (symb);
20959
20960 length = strlen (stub);
20961 binder_name = XALLOCAVEC (char, length + 32);
20962 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
20963
20964 length = strlen (symb);
20965 symbol_name = XALLOCAVEC (char, length + 32);
20966 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20967
20968 sprintf (lazy_ptr_name, "L%d$lz", label);
20969
20970 if (MACHOPIC_ATT_STUB)
20971 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
20972 else if (MACHOPIC_PURE)
20973 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
20974 else
20975 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
20976
20977 fprintf (file, "%s:\n", stub);
20978 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20979
20980 if (MACHOPIC_ATT_STUB)
20981 {
20982 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
20983 }
20984 else if (MACHOPIC_PURE)
20985 {
20986 /* PIC stub. */
20987 /* 25-byte PIC stub using "CALL get_pc_thunk". */
20988 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
20989 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
20990 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
20991 label, lazy_ptr_name, label);
20992 fprintf (file, "\tjmp\t*%%ecx\n");
20993 }
20994 else
20995 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
20996
20997 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
20998 it needs no stub-binding-helper. */
20999 if (MACHOPIC_ATT_STUB)
21000 return;
21001
21002 fprintf (file, "%s:\n", binder_name);
21003
21004 if (MACHOPIC_PURE)
21005 {
21006 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
21007 fprintf (file, "\tpushl\t%%ecx\n");
21008 }
21009 else
21010 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
21011
21012 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
21013
21014 /* N.B. Keep the correspondence of these
21015 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
21016 old-pic/new-pic/non-pic stubs; altering this will break
21017 compatibility with existing dylibs. */
21018 if (MACHOPIC_PURE)
21019 {
21020 /* 25-byte PIC stub using "CALL get_pc_thunk". */
21021 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
21022 }
21023 else
21024 /* 16-byte -mdynamic-no-pic stub. */
21025 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
21026
21027 fprintf (file, "%s:\n", lazy_ptr_name);
21028 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21029 fprintf (file, ASM_LONG "%s\n", binder_name);
21030 }
21031 #endif /* TARGET_MACHO */
21032
21033 /* Order the registers for register allocator. */
21034
21035 void
21036 x86_order_regs_for_local_alloc (void)
21037 {
21038 int pos = 0;
21039 int i;
21040
21041 /* First allocate the local general purpose registers. */
21042 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21043 if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
21044 reg_alloc_order [pos++] = i;
21045
21046 /* Global general purpose registers. */
21047 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21048 if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
21049 reg_alloc_order [pos++] = i;
21050
21051 /* x87 registers come first in case we are doing FP math
21052 using them. */
21053 if (!TARGET_SSE_MATH)
21054 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21055 reg_alloc_order [pos++] = i;
21056
21057 /* SSE registers. */
21058 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
21059 reg_alloc_order [pos++] = i;
21060 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
21061 reg_alloc_order [pos++] = i;
21062
21063 /* Extended REX SSE registers. */
21064 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
21065 reg_alloc_order [pos++] = i;
21066
21067 /* Mask register. */
21068 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
21069 reg_alloc_order [pos++] = i;
21070
21071 /* x87 registers. */
21072 if (TARGET_SSE_MATH)
21073 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21074 reg_alloc_order [pos++] = i;
21075
21076 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
21077 reg_alloc_order [pos++] = i;
21078
21079 /* Initialize the rest of array as we do not allocate some registers
21080 at all. */
21081 while (pos < FIRST_PSEUDO_REGISTER)
21082 reg_alloc_order [pos++] = 0;
21083 }
21084
21085 static bool
21086 ix86_ms_bitfield_layout_p (const_tree record_type)
21087 {
21088 return ((TARGET_MS_BITFIELD_LAYOUT
21089 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
21090 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
21091 }
21092
21093 /* Returns an expression indicating where the this parameter is
21094 located on entry to the FUNCTION. */
21095
21096 static rtx
21097 x86_this_parameter (tree function)
21098 {
21099 tree type = TREE_TYPE (function);
21100 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
21101 int nregs;
21102
21103 if (TARGET_64BIT)
21104 {
21105 const int *parm_regs;
21106
21107 if (ix86_function_type_abi (type) == MS_ABI)
21108 parm_regs = x86_64_ms_abi_int_parameter_registers;
21109 else
21110 parm_regs = x86_64_int_parameter_registers;
21111 return gen_rtx_REG (Pmode, parm_regs[aggr]);
21112 }
21113
21114 nregs = ix86_function_regparm (type, function);
21115
21116 if (nregs > 0 && !stdarg_p (type))
21117 {
21118 int regno;
21119 unsigned int ccvt = ix86_get_callcvt (type);
21120
21121 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
21122 regno = aggr ? DX_REG : CX_REG;
21123 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
21124 {
21125 regno = CX_REG;
21126 if (aggr)
21127 return gen_rtx_MEM (SImode,
21128 plus_constant (Pmode, stack_pointer_rtx, 4));
21129 }
21130 else
21131 {
21132 regno = AX_REG;
21133 if (aggr)
21134 {
21135 regno = DX_REG;
21136 if (nregs == 1)
21137 return gen_rtx_MEM (SImode,
21138 plus_constant (Pmode,
21139 stack_pointer_rtx, 4));
21140 }
21141 }
21142 return gen_rtx_REG (SImode, regno);
21143 }
21144
21145 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
21146 aggr ? 8 : 4));
21147 }
21148
21149 /* Determine whether x86_output_mi_thunk can succeed. */
21150
21151 static bool
21152 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
21153 const_tree function)
21154 {
21155 /* 64-bit can handle anything. */
21156 if (TARGET_64BIT)
21157 return true;
21158
21159 /* For 32-bit, everything's fine if we have one free register. */
21160 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
21161 return true;
21162
21163 /* Need a free register for vcall_offset. */
21164 if (vcall_offset)
21165 return false;
21166
21167 /* Need a free register for GOT references. */
21168 if (flag_pic && !targetm.binds_local_p (function))
21169 return false;
21170
21171 /* Otherwise ok. */
21172 return true;
21173 }
21174
21175 /* Output the assembler code for a thunk function. THUNK_DECL is the
21176 declaration for the thunk function itself, FUNCTION is the decl for
21177 the target function. DELTA is an immediate constant offset to be
21178 added to THIS. If VCALL_OFFSET is nonzero, the word at
21179 *(*this + vcall_offset) should be added to THIS. */
21180
21181 static void
21182 x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
21183 HOST_WIDE_INT vcall_offset, tree function)
21184 {
21185 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
21186 rtx this_param = x86_this_parameter (function);
21187 rtx this_reg, tmp, fnaddr;
21188 unsigned int tmp_regno;
21189 rtx_insn *insn;
21190
21191 if (TARGET_64BIT)
21192 tmp_regno = R10_REG;
21193 else
21194 {
21195 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
21196 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
21197 tmp_regno = AX_REG;
21198 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
21199 tmp_regno = DX_REG;
21200 else
21201 tmp_regno = CX_REG;
21202 }
21203
21204 emit_note (NOTE_INSN_PROLOGUE_END);
21205
21206 /* CET is enabled, insert EB instruction. */
21207 if ((flag_cf_protection & CF_BRANCH))
21208 emit_insn (gen_nop_endbr ());
21209
21210 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21211 pull it in now and let DELTA benefit. */
21212 if (REG_P (this_param))
21213 this_reg = this_param;
21214 else if (vcall_offset)
21215 {
21216 /* Put the this parameter into %eax. */
21217 this_reg = gen_rtx_REG (Pmode, AX_REG);
21218 emit_move_insn (this_reg, this_param);
21219 }
21220 else
21221 this_reg = NULL_RTX;
21222
21223 /* Adjust the this parameter by a fixed constant. */
21224 if (delta)
21225 {
21226 rtx delta_rtx = GEN_INT (delta);
21227 rtx delta_dst = this_reg ? this_reg : this_param;
21228
21229 if (TARGET_64BIT)
21230 {
21231 if (!x86_64_general_operand (delta_rtx, Pmode))
21232 {
21233 tmp = gen_rtx_REG (Pmode, tmp_regno);
21234 emit_move_insn (tmp, delta_rtx);
21235 delta_rtx = tmp;
21236 }
21237 }
21238
21239 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
21240 }
21241
21242 /* Adjust the this parameter by a value stored in the vtable. */
21243 if (vcall_offset)
21244 {
21245 rtx vcall_addr, vcall_mem, this_mem;
21246
21247 tmp = gen_rtx_REG (Pmode, tmp_regno);
21248
21249 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
21250 if (Pmode != ptr_mode)
21251 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
21252 emit_move_insn (tmp, this_mem);
21253
21254 /* Adjust the this parameter. */
21255 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
21256 if (TARGET_64BIT
21257 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
21258 {
21259 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
21260 emit_move_insn (tmp2, GEN_INT (vcall_offset));
21261 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
21262 }
21263
21264 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
21265 if (Pmode != ptr_mode)
21266 emit_insn (gen_addsi_1_zext (this_reg,
21267 gen_rtx_REG (ptr_mode,
21268 REGNO (this_reg)),
21269 vcall_mem));
21270 else
21271 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
21272 }
21273
21274 /* If necessary, drop THIS back to its stack slot. */
21275 if (this_reg && this_reg != this_param)
21276 emit_move_insn (this_param, this_reg);
21277
21278 fnaddr = XEXP (DECL_RTL (function), 0);
21279 if (TARGET_64BIT)
21280 {
21281 if (!flag_pic || targetm.binds_local_p (function)
21282 || TARGET_PECOFF)
21283 ;
21284 else
21285 {
21286 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
21287 tmp = gen_rtx_CONST (Pmode, tmp);
21288 fnaddr = gen_const_mem (Pmode, tmp);
21289 }
21290 }
21291 else
21292 {
21293 if (!flag_pic || targetm.binds_local_p (function))
21294 ;
21295 #if TARGET_MACHO
21296 else if (TARGET_MACHO)
21297 {
21298 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
21299 fnaddr = XEXP (fnaddr, 0);
21300 }
21301 #endif /* TARGET_MACHO */
21302 else
21303 {
21304 tmp = gen_rtx_REG (Pmode, CX_REG);
21305 output_set_got (tmp, NULL_RTX);
21306
21307 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
21308 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
21309 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
21310 fnaddr = gen_const_mem (Pmode, fnaddr);
21311 }
21312 }
21313
21314 /* Our sibling call patterns do not allow memories, because we have no
21315 predicate that can distinguish between frame and non-frame memory.
21316 For our purposes here, we can get away with (ab)using a jump pattern,
21317 because we're going to do no optimization. */
21318 if (MEM_P (fnaddr))
21319 {
21320 if (sibcall_insn_operand (fnaddr, word_mode))
21321 {
21322 fnaddr = XEXP (DECL_RTL (function), 0);
21323 tmp = gen_rtx_MEM (QImode, fnaddr);
21324 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
21325 tmp = emit_call_insn (tmp);
21326 SIBLING_CALL_P (tmp) = 1;
21327 }
21328 else
21329 emit_jump_insn (gen_indirect_jump (fnaddr));
21330 }
21331 else
21332 {
21333 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
21334 {
21335 // CM_LARGE_PIC always uses pseudo PIC register which is
21336 // uninitialized. Since FUNCTION is local and calling it
21337 // doesn't go through PLT, we use scratch register %r11 as
21338 // PIC register and initialize it here.
21339 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
21340 ix86_init_large_pic_reg (tmp_regno);
21341 fnaddr = legitimize_pic_address (fnaddr,
21342 gen_rtx_REG (Pmode, tmp_regno));
21343 }
21344
21345 if (!sibcall_insn_operand (fnaddr, word_mode))
21346 {
21347 tmp = gen_rtx_REG (word_mode, tmp_regno);
21348 if (GET_MODE (fnaddr) != word_mode)
21349 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
21350 emit_move_insn (tmp, fnaddr);
21351 fnaddr = tmp;
21352 }
21353
21354 tmp = gen_rtx_MEM (QImode, fnaddr);
21355 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
21356 tmp = emit_call_insn (tmp);
21357 SIBLING_CALL_P (tmp) = 1;
21358 }
21359 emit_barrier ();
21360
21361 /* Emit just enough of rest_of_compilation to get the insns emitted. */
21362 insn = get_insns ();
21363 shorten_branches (insn);
21364 assemble_start_function (thunk_fndecl, fnname);
21365 final_start_function (insn, file, 1);
21366 final (insn, file, 1);
21367 final_end_function ();
21368 assemble_end_function (thunk_fndecl, fnname);
21369 }
21370
21371 static void
21372 x86_file_start (void)
21373 {
21374 default_file_start ();
21375 if (TARGET_16BIT)
21376 fputs ("\t.code16gcc\n", asm_out_file);
21377 #if TARGET_MACHO
21378 darwin_file_start ();
21379 #endif
21380 if (X86_FILE_START_VERSION_DIRECTIVE)
21381 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
21382 if (X86_FILE_START_FLTUSED)
21383 fputs ("\t.global\t__fltused\n", asm_out_file);
21384 if (ix86_asm_dialect == ASM_INTEL)
21385 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
21386 }
21387
21388 int
21389 x86_field_alignment (tree type, int computed)
21390 {
21391 machine_mode mode;
21392
21393 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
21394 return computed;
21395 if (TARGET_IAMCU)
21396 return iamcu_alignment (type, computed);
21397 type = strip_array_types (type);
21398 mode = TYPE_MODE (type);
21399 if (mode == DFmode || mode == DCmode
21400 || GET_MODE_CLASS (mode) == MODE_INT
21401 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
21402 {
21403 if (TYPE_ATOMIC (type) && computed > 32)
21404 {
21405 static bool warned;
21406
21407 if (!warned && warn_psabi)
21408 {
21409 const char *url
21410 = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
21411
21412 warned = true;
21413 inform (input_location, "the alignment of %<_Atomic %T%> "
21414 "fields changed in %{GCC 11.1%}",
21415 TYPE_MAIN_VARIANT (type), url);
21416 }
21417 }
21418 else
21419 return MIN (32, computed);
21420 }
21421 return computed;
21422 }
21423
21424 /* Print call to TARGET to FILE. */
21425
21426 static void
21427 x86_print_call_or_nop (FILE *file, const char *target)
21428 {
21429 if (flag_nop_mcount || !strcmp (target, "nop"))
21430 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
21431 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
21432 else
21433 fprintf (file, "1:\tcall\t%s\n", target);
21434 }
21435
21436 static bool
21437 current_fentry_name (const char **name)
21438 {
21439 tree attr = lookup_attribute ("fentry_name",
21440 DECL_ATTRIBUTES (current_function_decl));
21441 if (!attr)
21442 return false;
21443 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
21444 return true;
21445 }
21446
21447 static bool
21448 current_fentry_section (const char **name)
21449 {
21450 tree attr = lookup_attribute ("fentry_section",
21451 DECL_ATTRIBUTES (current_function_decl));
21452 if (!attr)
21453 return false;
21454 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
21455 return true;
21456 }
21457
21458 /* Output assembler code to FILE to increment profiler label # LABELNO
21459 for profiling a function entry. */
21460 void
21461 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
21462 {
21463 if (cfun->machine->insn_queued_at_entrance)
21464 {
21465 if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
21466 fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
21467 unsigned int patch_area_size
21468 = crtl->patch_area_size - crtl->patch_area_entry;
21469 if (patch_area_size)
21470 ix86_output_patchable_area (patch_area_size,
21471 crtl->patch_area_entry == 0);
21472 }
21473
21474 const char *mcount_name = MCOUNT_NAME;
21475
21476 if (current_fentry_name (&mcount_name))
21477 ;
21478 else if (fentry_name)
21479 mcount_name = fentry_name;
21480 else if (flag_fentry)
21481 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
21482
21483 if (TARGET_64BIT)
21484 {
21485 #ifndef NO_PROFILE_COUNTERS
21486 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
21487 #endif
21488
21489 if (!TARGET_PECOFF)
21490 {
21491 switch (ix86_cmodel)
21492 {
21493 case CM_LARGE:
21494 /* NB: R10 is caller-saved. Although it can be used as a
21495 static chain register, it is preserved when calling
21496 mcount for nested functions. */
21497 fprintf (file, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
21498 mcount_name);
21499 break;
21500 case CM_LARGE_PIC:
21501 #ifdef NO_PROFILE_COUNTERS
21502 fprintf (file, "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
21503 fprintf (file, "\tleaq\t1b(%%rip), %%r10\n");
21504 fprintf (file, "\taddq\t%%r11, %%r10\n");
21505 fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
21506 fprintf (file, "\taddq\t%%r11, %%r10\n");
21507 fprintf (file, "\tcall\t*%%r10\n");
21508 #else
21509 sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
21510 #endif
21511 break;
21512 case CM_SMALL_PIC:
21513 case CM_MEDIUM_PIC:
21514 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
21515 break;
21516 default:
21517 x86_print_call_or_nop (file, mcount_name);
21518 break;
21519 }
21520 }
21521 else
21522 x86_print_call_or_nop (file, mcount_name);
21523 }
21524 else if (flag_pic)
21525 {
21526 #ifndef NO_PROFILE_COUNTERS
21527 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
21528 LPREFIX, labelno);
21529 #endif
21530 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
21531 }
21532 else
21533 {
21534 #ifndef NO_PROFILE_COUNTERS
21535 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
21536 LPREFIX, labelno);
21537 #endif
21538 x86_print_call_or_nop (file, mcount_name);
21539 }
21540
21541 if (flag_record_mcount
21542 || lookup_attribute ("fentry_section",
21543 DECL_ATTRIBUTES (current_function_decl)))
21544 {
21545 const char *sname = "__mcount_loc";
21546
21547 if (current_fentry_section (&sname))
21548 ;
21549 else if (fentry_section)
21550 sname = fentry_section;
21551
21552 fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
21553 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
21554 fprintf (file, "\t.previous\n");
21555 }
21556 }
21557
21558 /* We don't have exact information about the insn sizes, but we may assume
21559 quite safely that we are informed about all 1 byte insns and memory
21560 address sizes. This is enough to eliminate unnecessary padding in
21561 99% of cases. */
21562
21563 int
21564 ix86_min_insn_size (rtx_insn *insn)
21565 {
21566 int l = 0, len;
21567
21568 if (!INSN_P (insn) || !active_insn_p (insn))
21569 return 0;
21570
21571 /* Discard alignments we've emit and jump instructions. */
21572 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
21573 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
21574 return 0;
21575
21576 /* Important case - calls are always 5 bytes.
21577 It is common to have many calls in the row. */
21578 if (CALL_P (insn)
21579 && symbolic_reference_mentioned_p (PATTERN (insn))
21580 && !SIBLING_CALL_P (insn))
21581 return 5;
21582 len = get_attr_length (insn);
21583 if (len <= 1)
21584 return 1;
21585
21586 /* For normal instructions we rely on get_attr_length being exact,
21587 with a few exceptions. */
21588 if (!JUMP_P (insn))
21589 {
21590 enum attr_type type = get_attr_type (insn);
21591
21592 switch (type)
21593 {
21594 case TYPE_MULTI:
21595 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
21596 || asm_noperands (PATTERN (insn)) >= 0)
21597 return 0;
21598 break;
21599 case TYPE_OTHER:
21600 case TYPE_FCMP:
21601 break;
21602 default:
21603 /* Otherwise trust get_attr_length. */
21604 return len;
21605 }
21606
21607 l = get_attr_length_address (insn);
21608 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
21609 l = 4;
21610 }
21611 if (l)
21612 return 1+l;
21613 else
21614 return 2;
21615 }
21616
21617 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
21618
21619 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
21620 window. */
21621
21622 static void
21623 ix86_avoid_jump_mispredicts (void)
21624 {
21625 rtx_insn *insn, *start = get_insns ();
21626 int nbytes = 0, njumps = 0;
21627 bool isjump = false;
21628
21629 /* Look for all minimal intervals of instructions containing 4 jumps.
21630 The intervals are bounded by START and INSN. NBYTES is the total
21631 size of instructions in the interval including INSN and not including
21632 START. When the NBYTES is smaller than 16 bytes, it is possible
21633 that the end of START and INSN ends up in the same 16byte page.
21634
21635 The smallest offset in the page INSN can start is the case where START
21636 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
21637 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
21638
21639 Don't consider asm goto as jump, while it can contain a jump, it doesn't
21640 have to, control transfer to label(s) can be performed through other
21641 means, and also we estimate minimum length of all asm stmts as 0. */
21642 for (insn = start; insn; insn = NEXT_INSN (insn))
21643 {
21644 int min_size;
21645
21646 if (LABEL_P (insn))
21647 {
21648 align_flags alignment = label_to_alignment (insn);
21649 int align = alignment.levels[0].log;
21650 int max_skip = alignment.levels[0].maxskip;
21651
21652 if (max_skip > 15)
21653 max_skip = 15;
21654 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
21655 already in the current 16 byte page, because otherwise
21656 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
21657 bytes to reach 16 byte boundary. */
21658 if (align <= 0
21659 || (align <= 3 && max_skip != (1 << align) - 1))
21660 max_skip = 0;
21661 if (dump_file)
21662 fprintf (dump_file, "Label %i with max_skip %i\n",
21663 INSN_UID (insn), max_skip);
21664 if (max_skip)
21665 {
21666 while (nbytes + max_skip >= 16)
21667 {
21668 start = NEXT_INSN (start);
21669 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
21670 || CALL_P (start))
21671 njumps--, isjump = true;
21672 else
21673 isjump = false;
21674 nbytes -= ix86_min_insn_size (start);
21675 }
21676 }
21677 continue;
21678 }
21679
21680 min_size = ix86_min_insn_size (insn);
21681 nbytes += min_size;
21682 if (dump_file)
21683 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
21684 INSN_UID (insn), min_size);
21685 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
21686 || CALL_P (insn))
21687 njumps++;
21688 else
21689 continue;
21690
21691 while (njumps > 3)
21692 {
21693 start = NEXT_INSN (start);
21694 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
21695 || CALL_P (start))
21696 njumps--, isjump = true;
21697 else
21698 isjump = false;
21699 nbytes -= ix86_min_insn_size (start);
21700 }
21701 gcc_assert (njumps >= 0);
21702 if (dump_file)
21703 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
21704 INSN_UID (start), INSN_UID (insn), nbytes);
21705
21706 if (njumps == 3 && isjump && nbytes < 16)
21707 {
21708 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
21709
21710 if (dump_file)
21711 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
21712 INSN_UID (insn), padsize);
21713 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
21714 }
21715 }
21716 }
21717 #endif
21718
21719 /* AMD Athlon works faster
21720 when RET is not destination of conditional jump or directly preceded
21721 by other jump instruction. We avoid the penalty by inserting NOP just
21722 before the RET instructions in such cases. */
21723 static void
21724 ix86_pad_returns (void)
21725 {
21726 edge e;
21727 edge_iterator ei;
21728
21729 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21730 {
21731 basic_block bb = e->src;
21732 rtx_insn *ret = BB_END (bb);
21733 rtx_insn *prev;
21734 bool replace = false;
21735
21736 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
21737 || optimize_bb_for_size_p (bb))
21738 continue;
21739 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
21740 if (active_insn_p (prev) || LABEL_P (prev))
21741 break;
21742 if (prev && LABEL_P (prev))
21743 {
21744 edge e;
21745 edge_iterator ei;
21746
21747 FOR_EACH_EDGE (e, ei, bb->preds)
21748 if (EDGE_FREQUENCY (e) && e->src->index >= 0
21749 && !(e->flags & EDGE_FALLTHRU))
21750 {
21751 replace = true;
21752 break;
21753 }
21754 }
21755 if (!replace)
21756 {
21757 prev = prev_active_insn (ret);
21758 if (prev
21759 && ((JUMP_P (prev) && any_condjump_p (prev))
21760 || CALL_P (prev)))
21761 replace = true;
21762 /* Empty functions get branch mispredict even when
21763 the jump destination is not visible to us. */
21764 if (!prev && !optimize_function_for_size_p (cfun))
21765 replace = true;
21766 }
21767 if (replace)
21768 {
21769 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
21770 delete_insn (ret);
21771 }
21772 }
21773 }
21774
21775 /* Count the minimum number of instructions in BB. Return 4 if the
21776 number of instructions >= 4. */
21777
21778 static int
21779 ix86_count_insn_bb (basic_block bb)
21780 {
21781 rtx_insn *insn;
21782 int insn_count = 0;
21783
21784 /* Count number of instructions in this block. Return 4 if the number
21785 of instructions >= 4. */
21786 FOR_BB_INSNS (bb, insn)
21787 {
21788 /* Only happen in exit blocks. */
21789 if (JUMP_P (insn)
21790 && ANY_RETURN_P (PATTERN (insn)))
21791 break;
21792
21793 if (NONDEBUG_INSN_P (insn)
21794 && GET_CODE (PATTERN (insn)) != USE
21795 && GET_CODE (PATTERN (insn)) != CLOBBER)
21796 {
21797 insn_count++;
21798 if (insn_count >= 4)
21799 return insn_count;
21800 }
21801 }
21802
21803 return insn_count;
21804 }
21805
21806
21807 /* Count the minimum number of instructions in code path in BB.
21808 Return 4 if the number of instructions >= 4. */
21809
21810 static int
21811 ix86_count_insn (basic_block bb)
21812 {
21813 edge e;
21814 edge_iterator ei;
21815 int min_prev_count;
21816
21817 /* Only bother counting instructions along paths with no
21818 more than 2 basic blocks between entry and exit. Given
21819 that BB has an edge to exit, determine if a predecessor
21820 of BB has an edge from entry. If so, compute the number
21821 of instructions in the predecessor block. If there
21822 happen to be multiple such blocks, compute the minimum. */
21823 min_prev_count = 4;
21824 FOR_EACH_EDGE (e, ei, bb->preds)
21825 {
21826 edge prev_e;
21827 edge_iterator prev_ei;
21828
21829 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
21830 {
21831 min_prev_count = 0;
21832 break;
21833 }
21834 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
21835 {
21836 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
21837 {
21838 int count = ix86_count_insn_bb (e->src);
21839 if (count < min_prev_count)
21840 min_prev_count = count;
21841 break;
21842 }
21843 }
21844 }
21845
21846 if (min_prev_count < 4)
21847 min_prev_count += ix86_count_insn_bb (bb);
21848
21849 return min_prev_count;
21850 }
21851
21852 /* Pad short function to 4 instructions. */
21853
21854 static void
21855 ix86_pad_short_function (void)
21856 {
21857 edge e;
21858 edge_iterator ei;
21859
21860 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21861 {
21862 rtx_insn *ret = BB_END (e->src);
21863 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
21864 {
21865 int insn_count = ix86_count_insn (e->src);
21866
21867 /* Pad short function. */
21868 if (insn_count < 4)
21869 {
21870 rtx_insn *insn = ret;
21871
21872 /* Find epilogue. */
21873 while (insn
21874 && (!NOTE_P (insn)
21875 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
21876 insn = PREV_INSN (insn);
21877
21878 if (!insn)
21879 insn = ret;
21880
21881 /* Two NOPs count as one instruction. */
21882 insn_count = 2 * (4 - insn_count);
21883 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
21884 }
21885 }
21886 }
21887 }
21888
21889 /* Fix up a Windows system unwinder issue. If an EH region falls through into
21890 the epilogue, the Windows system unwinder will apply epilogue logic and
21891 produce incorrect offsets. This can be avoided by adding a nop between
21892 the last insn that can throw and the first insn of the epilogue. */
21893
21894 static void
21895 ix86_seh_fixup_eh_fallthru (void)
21896 {
21897 edge e;
21898 edge_iterator ei;
21899
21900 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21901 {
21902 rtx_insn *insn, *next;
21903
21904 /* Find the beginning of the epilogue. */
21905 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
21906 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
21907 break;
21908 if (insn == NULL)
21909 continue;
21910
21911 /* We only care about preceding insns that can throw. */
21912 insn = prev_active_insn (insn);
21913 if (insn == NULL || !can_throw_internal (insn))
21914 continue;
21915
21916 /* Do not separate calls from their debug information. */
21917 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
21918 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
21919 insn = next;
21920 else
21921 break;
21922
21923 emit_insn_after (gen_nops (const1_rtx), insn);
21924 }
21925 }
21926
21927 /* Implement machine specific optimizations. We implement padding of returns
21928 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21929 static void
21930 ix86_reorg (void)
21931 {
21932 /* We are freeing block_for_insn in the toplev to keep compatibility
21933 with old MDEP_REORGS that are not CFG based. Recompute it now. */
21934 compute_bb_for_insn ();
21935
21936 if (TARGET_SEH && current_function_has_exception_handlers ())
21937 ix86_seh_fixup_eh_fallthru ();
21938
21939 if (optimize && optimize_function_for_speed_p (cfun))
21940 {
21941 if (TARGET_PAD_SHORT_FUNCTION)
21942 ix86_pad_short_function ();
21943 else if (TARGET_PAD_RETURNS)
21944 ix86_pad_returns ();
21945 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
21946 if (TARGET_FOUR_JUMP_LIMIT)
21947 ix86_avoid_jump_mispredicts ();
21948 #endif
21949 }
21950 }
21951
21952 /* Return nonzero when QImode register that must be represented via REX prefix
21953 is used. */
21954 bool
21955 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
21956 {
21957 int i;
21958 extract_insn_cached (insn);
21959 for (i = 0; i < recog_data.n_operands; i++)
21960 if (GENERAL_REG_P (recog_data.operand[i])
21961 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
21962 return true;
21963 return false;
21964 }
21965
21966 /* Return true when INSN mentions register that must be encoded using REX
21967 prefix. */
21968 bool
21969 x86_extended_reg_mentioned_p (rtx insn)
21970 {
21971 subrtx_iterator::array_type array;
21972 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
21973 {
21974 const_rtx x = *iter;
21975 if (REG_P (x)
21976 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
21977 return true;
21978 }
21979 return false;
21980 }
21981
21982 /* If profitable, negate (without causing overflow) integer constant
21983 of mode MODE at location LOC. Return true in this case. */
21984 bool
21985 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
21986 {
21987 HOST_WIDE_INT val;
21988
21989 if (!CONST_INT_P (*loc))
21990 return false;
21991
21992 switch (mode)
21993 {
21994 case E_DImode:
21995 /* DImode x86_64 constants must fit in 32 bits. */
21996 gcc_assert (x86_64_immediate_operand (*loc, mode));
21997
21998 mode = SImode;
21999 break;
22000
22001 case E_SImode:
22002 case E_HImode:
22003 case E_QImode:
22004 break;
22005
22006 default:
22007 gcc_unreachable ();
22008 }
22009
22010 /* Avoid overflows. */
22011 if (mode_signbit_p (mode, *loc))
22012 return false;
22013
22014 val = INTVAL (*loc);
22015
22016 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
22017 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
22018 if ((val < 0 && val != -128)
22019 || val == 128)
22020 {
22021 *loc = GEN_INT (-val);
22022 return true;
22023 }
22024
22025 return false;
22026 }
22027
22028 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
22029 optabs would emit if we didn't have TFmode patterns. */
22030
22031 void
22032 x86_emit_floatuns (rtx operands[2])
22033 {
22034 rtx_code_label *neglab, *donelab;
22035 rtx i0, i1, f0, in, out;
22036 machine_mode mode, inmode;
22037
22038 inmode = GET_MODE (operands[1]);
22039 gcc_assert (inmode == SImode || inmode == DImode);
22040
22041 out = operands[0];
22042 in = force_reg (inmode, operands[1]);
22043 mode = GET_MODE (out);
22044 neglab = gen_label_rtx ();
22045 donelab = gen_label_rtx ();
22046 f0 = gen_reg_rtx (mode);
22047
22048 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
22049
22050 expand_float (out, in, 0);
22051
22052 emit_jump_insn (gen_jump (donelab));
22053 emit_barrier ();
22054
22055 emit_label (neglab);
22056
22057 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
22058 1, OPTAB_DIRECT);
22059 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
22060 1, OPTAB_DIRECT);
22061 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
22062
22063 expand_float (f0, i0, 0);
22064
22065 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
22066
22067 emit_label (donelab);
22068 }
22069 \f
22070 /* Target hook for scalar_mode_supported_p. */
22071 static bool
22072 ix86_scalar_mode_supported_p (scalar_mode mode)
22073 {
22074 if (DECIMAL_FLOAT_MODE_P (mode))
22075 return default_decimal_float_supported_p ();
22076 else if (mode == TFmode)
22077 return true;
22078 else if (mode == HFmode && TARGET_SSE2)
22079 return true;
22080 else
22081 return default_scalar_mode_supported_p (mode);
22082 }
22083
22084 /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
22085 if MODE is HFmode, and punt to the generic implementation otherwise. */
22086
22087 static bool
22088 ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
22089 {
22090 /* NB: Always return TRUE for HFmode so that the _Float16 type will
22091 be defined by the C front-end for AVX512FP16 intrinsics. We will
22092 issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
22093 enabled. */
22094 return ((mode == HFmode && TARGET_SSE2)
22095 ? true
22096 : default_libgcc_floating_mode_supported_p (mode));
22097 }
22098
22099 /* Implements target hook vector_mode_supported_p. */
22100 static bool
22101 ix86_vector_mode_supported_p (machine_mode mode)
22102 {
22103 /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
22104 either. */
22105 if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
22106 return false;
22107 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22108 return true;
22109 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
22110 return true;
22111 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
22112 return true;
22113 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
22114 return true;
22115 if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
22116 && VALID_MMX_REG_MODE (mode))
22117 return true;
22118 if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
22119 && VALID_MMX_REG_MODE_3DNOW (mode))
22120 return true;
22121 if (mode == V2QImode)
22122 return true;
22123 return false;
22124 }
22125
22126 /* Target hook for c_mode_for_suffix. */
22127 static machine_mode
22128 ix86_c_mode_for_suffix (char suffix)
22129 {
22130 if (suffix == 'q')
22131 return TFmode;
22132 if (suffix == 'w')
22133 return XFmode;
22134
22135 return VOIDmode;
22136 }
22137
22138 /* Worker function for TARGET_MD_ASM_ADJUST.
22139
22140 We implement asm flag outputs, and maintain source compatibility
22141 with the old cc0-based compiler. */
22142
22143 static rtx_insn *
22144 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
22145 vec<machine_mode> & /*input_modes*/,
22146 vec<const char *> &constraints, vec<rtx> &clobbers,
22147 HARD_REG_SET &clobbered_regs, location_t loc)
22148 {
22149 bool saw_asm_flag = false;
22150
22151 start_sequence ();
22152 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
22153 {
22154 const char *con = constraints[i];
22155 if (!startswith (con, "=@cc"))
22156 continue;
22157 con += 4;
22158 if (strchr (con, ',') != NULL)
22159 {
22160 error_at (loc, "alternatives not allowed in %<asm%> flag output");
22161 continue;
22162 }
22163
22164 bool invert = false;
22165 if (con[0] == 'n')
22166 invert = true, con++;
22167
22168 machine_mode mode = CCmode;
22169 rtx_code code = UNKNOWN;
22170
22171 switch (con[0])
22172 {
22173 case 'a':
22174 if (con[1] == 0)
22175 mode = CCAmode, code = EQ;
22176 else if (con[1] == 'e' && con[2] == 0)
22177 mode = CCCmode, code = NE;
22178 break;
22179 case 'b':
22180 if (con[1] == 0)
22181 mode = CCCmode, code = EQ;
22182 else if (con[1] == 'e' && con[2] == 0)
22183 mode = CCAmode, code = NE;
22184 break;
22185 case 'c':
22186 if (con[1] == 0)
22187 mode = CCCmode, code = EQ;
22188 break;
22189 case 'e':
22190 if (con[1] == 0)
22191 mode = CCZmode, code = EQ;
22192 break;
22193 case 'g':
22194 if (con[1] == 0)
22195 mode = CCGCmode, code = GT;
22196 else if (con[1] == 'e' && con[2] == 0)
22197 mode = CCGCmode, code = GE;
22198 break;
22199 case 'l':
22200 if (con[1] == 0)
22201 mode = CCGCmode, code = LT;
22202 else if (con[1] == 'e' && con[2] == 0)
22203 mode = CCGCmode, code = LE;
22204 break;
22205 case 'o':
22206 if (con[1] == 0)
22207 mode = CCOmode, code = EQ;
22208 break;
22209 case 'p':
22210 if (con[1] == 0)
22211 mode = CCPmode, code = EQ;
22212 break;
22213 case 's':
22214 if (con[1] == 0)
22215 mode = CCSmode, code = EQ;
22216 break;
22217 case 'z':
22218 if (con[1] == 0)
22219 mode = CCZmode, code = EQ;
22220 break;
22221 }
22222 if (code == UNKNOWN)
22223 {
22224 error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
22225 continue;
22226 }
22227 if (invert)
22228 code = reverse_condition (code);
22229
22230 rtx dest = outputs[i];
22231 if (!saw_asm_flag)
22232 {
22233 /* This is the first asm flag output. Here we put the flags
22234 register in as the real output and adjust the condition to
22235 allow it. */
22236 constraints[i] = "=Bf";
22237 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
22238 saw_asm_flag = true;
22239 }
22240 else
22241 {
22242 /* We don't need the flags register as output twice. */
22243 constraints[i] = "=X";
22244 outputs[i] = gen_rtx_SCRATCH (SImode);
22245 }
22246
22247 rtx x = gen_rtx_REG (mode, FLAGS_REG);
22248 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
22249
22250 machine_mode dest_mode = GET_MODE (dest);
22251 if (!SCALAR_INT_MODE_P (dest_mode))
22252 {
22253 error_at (loc, "invalid type for %<asm%> flag output");
22254 continue;
22255 }
22256
22257 if (dest_mode == QImode)
22258 emit_insn (gen_rtx_SET (dest, x));
22259 else
22260 {
22261 rtx reg = gen_reg_rtx (QImode);
22262 emit_insn (gen_rtx_SET (reg, x));
22263
22264 reg = convert_to_mode (dest_mode, reg, 1);
22265 emit_move_insn (dest, reg);
22266 }
22267 }
22268
22269 rtx_insn *seq = get_insns ();
22270 end_sequence ();
22271
22272 if (saw_asm_flag)
22273 return seq;
22274 else
22275 {
22276 /* If we had no asm flag outputs, clobber the flags. */
22277 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
22278 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
22279 return NULL;
22280 }
22281 }
22282
22283 /* Implements target vector targetm.asm.encode_section_info. */
22284
22285 static void ATTRIBUTE_UNUSED
22286 ix86_encode_section_info (tree decl, rtx rtl, int first)
22287 {
22288 default_encode_section_info (decl, rtl, first);
22289
22290 if (ix86_in_large_data_p (decl))
22291 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
22292 }
22293
22294 /* Worker function for REVERSE_CONDITION. */
22295
22296 enum rtx_code
22297 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
22298 {
22299 return (mode == CCFPmode
22300 ? reverse_condition_maybe_unordered (code)
22301 : reverse_condition (code));
22302 }
22303
22304 /* Output code to perform an x87 FP register move, from OPERANDS[1]
22305 to OPERANDS[0]. */
22306
22307 const char *
22308 output_387_reg_move (rtx_insn *insn, rtx *operands)
22309 {
22310 if (REG_P (operands[0]))
22311 {
22312 if (REG_P (operands[1])
22313 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22314 {
22315 if (REGNO (operands[0]) == FIRST_STACK_REG)
22316 return output_387_ffreep (operands, 0);
22317 return "fstp\t%y0";
22318 }
22319 if (STACK_TOP_P (operands[0]))
22320 return "fld%Z1\t%y1";
22321 return "fst\t%y0";
22322 }
22323 else if (MEM_P (operands[0]))
22324 {
22325 gcc_assert (REG_P (operands[1]));
22326 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22327 return "fstp%Z0\t%y0";
22328 else
22329 {
22330 /* There is no non-popping store to memory for XFmode.
22331 So if we need one, follow the store with a load. */
22332 if (GET_MODE (operands[0]) == XFmode)
22333 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
22334 else
22335 return "fst%Z0\t%y0";
22336 }
22337 }
22338 else
22339 gcc_unreachable();
22340 }
22341 #ifdef TARGET_SOLARIS
22342 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
22343
22344 static void
22345 i386_solaris_elf_named_section (const char *name, unsigned int flags,
22346 tree decl)
22347 {
22348 /* With Binutils 2.15, the "@unwind" marker must be specified on
22349 every occurrence of the ".eh_frame" section, not just the first
22350 one. */
22351 if (TARGET_64BIT
22352 && strcmp (name, ".eh_frame") == 0)
22353 {
22354 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
22355 flags & SECTION_WRITE ? "aw" : "a");
22356 return;
22357 }
22358
22359 #ifndef USE_GAS
22360 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
22361 {
22362 solaris_elf_asm_comdat_section (name, flags, decl);
22363 return;
22364 }
22365
22366 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
22367 SPARC assembler. One cannot mix single-letter flags and #exclude, so
22368 only emit the latter here. */
22369 if (flags & SECTION_EXCLUDE)
22370 {
22371 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
22372 return;
22373 }
22374 #endif
22375
22376 default_elf_asm_named_section (name, flags, decl);
22377 }
22378 #endif /* TARGET_SOLARIS */
22379
22380 /* Return the mangling of TYPE if it is an extended fundamental type. */
22381
22382 static const char *
22383 ix86_mangle_type (const_tree type)
22384 {
22385 type = TYPE_MAIN_VARIANT (type);
22386
22387 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
22388 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
22389 return NULL;
22390
22391 switch (TYPE_MODE (type))
22392 {
22393 case E_HFmode:
22394 /* _Float16 is "DF16_".
22395 Align with clang's decision in https://reviews.llvm.org/D33719. */
22396 return "DF16_";
22397 case E_TFmode:
22398 /* __float128 is "g". */
22399 return "g";
22400 case E_XFmode:
22401 /* "long double" or __float80 is "e". */
22402 return "e";
22403 default:
22404 return NULL;
22405 }
22406 }
22407
22408 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
22409
22410 static tree
22411 ix86_stack_protect_guard (void)
22412 {
22413 if (TARGET_SSP_TLS_GUARD)
22414 {
22415 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
22416 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
22417 tree type = build_qualified_type (type_node, qual);
22418 tree t;
22419
22420 if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
22421 {
22422 t = ix86_tls_stack_chk_guard_decl;
22423
22424 if (t == NULL)
22425 {
22426 rtx x;
22427
22428 t = build_decl
22429 (UNKNOWN_LOCATION, VAR_DECL,
22430 get_identifier (ix86_stack_protector_guard_symbol_str),
22431 type);
22432 TREE_STATIC (t) = 1;
22433 TREE_PUBLIC (t) = 1;
22434 DECL_EXTERNAL (t) = 1;
22435 TREE_USED (t) = 1;
22436 TREE_THIS_VOLATILE (t) = 1;
22437 DECL_ARTIFICIAL (t) = 1;
22438 DECL_IGNORED_P (t) = 1;
22439
22440 /* Do not share RTL as the declaration is visible outside of
22441 current function. */
22442 x = DECL_RTL (t);
22443 RTX_FLAG (x, used) = 1;
22444
22445 ix86_tls_stack_chk_guard_decl = t;
22446 }
22447 }
22448 else
22449 {
22450 tree asptrtype = build_pointer_type (type);
22451
22452 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
22453 t = build2 (MEM_REF, asptrtype, t,
22454 build_int_cst (asptrtype, 0));
22455 TREE_THIS_VOLATILE (t) = 1;
22456 }
22457
22458 return t;
22459 }
22460
22461 return default_stack_protect_guard ();
22462 }
22463
22464 /* For 32-bit code we can save PIC register setup by using
22465 __stack_chk_fail_local hidden function instead of calling
22466 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
22467 register, so it is better to call __stack_chk_fail directly. */
22468
22469 static tree ATTRIBUTE_UNUSED
22470 ix86_stack_protect_fail (void)
22471 {
22472 return TARGET_64BIT
22473 ? default_external_stack_protect_fail ()
22474 : default_hidden_stack_protect_fail ();
22475 }
22476
22477 /* Select a format to encode pointers in exception handling data. CODE
22478 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
22479 true if the symbol may be affected by dynamic relocations.
22480
22481 ??? All x86 object file formats are capable of representing this.
22482 After all, the relocation needed is the same as for the call insn.
22483 Whether or not a particular assembler allows us to enter such, I
22484 guess we'll have to see. */
22485
22486 int
22487 asm_preferred_eh_data_format (int code, int global)
22488 {
22489 /* PE-COFF is effectively always -fPIC because of the .reloc section. */
22490 if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
22491 {
22492 int type = DW_EH_PE_sdata8;
22493 if (ptr_mode == SImode
22494 || ix86_cmodel == CM_SMALL_PIC
22495 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
22496 type = DW_EH_PE_sdata4;
22497 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
22498 }
22499
22500 if (ix86_cmodel == CM_SMALL
22501 || (ix86_cmodel == CM_MEDIUM && code))
22502 return DW_EH_PE_udata4;
22503
22504 return DW_EH_PE_absptr;
22505 }
22506 \f
22507 /* Implement targetm.vectorize.builtin_vectorization_cost. */
22508 static int
22509 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
22510 tree vectype, int)
22511 {
22512 bool fp = false;
22513 machine_mode mode = TImode;
22514 int index;
22515 if (vectype != NULL)
22516 {
22517 fp = FLOAT_TYPE_P (vectype);
22518 mode = TYPE_MODE (vectype);
22519 }
22520
22521 switch (type_of_cost)
22522 {
22523 case scalar_stmt:
22524 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
22525
22526 case scalar_load:
22527 /* load/store costs are relative to register move which is 2. Recompute
22528 it to COSTS_N_INSNS so everything have same base. */
22529 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
22530 : ix86_cost->int_load [2]) / 2;
22531
22532 case scalar_store:
22533 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
22534 : ix86_cost->int_store [2]) / 2;
22535
22536 case vector_stmt:
22537 return ix86_vec_cost (mode,
22538 fp ? ix86_cost->addss : ix86_cost->sse_op);
22539
22540 case vector_load:
22541 index = sse_store_index (mode);
22542 /* See PR82713 - we may end up being called on non-vector type. */
22543 if (index < 0)
22544 index = 2;
22545 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
22546
22547 case vector_store:
22548 index = sse_store_index (mode);
22549 /* See PR82713 - we may end up being called on non-vector type. */
22550 if (index < 0)
22551 index = 2;
22552 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
22553
22554 case vec_to_scalar:
22555 case scalar_to_vec:
22556 return ix86_vec_cost (mode, ix86_cost->sse_op);
22557
22558 /* We should have separate costs for unaligned loads and gather/scatter.
22559 Do that incrementally. */
22560 case unaligned_load:
22561 index = sse_store_index (mode);
22562 /* See PR82713 - we may end up being called on non-vector type. */
22563 if (index < 0)
22564 index = 2;
22565 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
22566
22567 case unaligned_store:
22568 index = sse_store_index (mode);
22569 /* See PR82713 - we may end up being called on non-vector type. */
22570 if (index < 0)
22571 index = 2;
22572 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
22573
22574 case vector_gather_load:
22575 return ix86_vec_cost (mode,
22576 COSTS_N_INSNS
22577 (ix86_cost->gather_static
22578 + ix86_cost->gather_per_elt
22579 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
22580
22581 case vector_scatter_store:
22582 return ix86_vec_cost (mode,
22583 COSTS_N_INSNS
22584 (ix86_cost->scatter_static
22585 + ix86_cost->scatter_per_elt
22586 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
22587
22588 case cond_branch_taken:
22589 return ix86_cost->cond_taken_branch_cost;
22590
22591 case cond_branch_not_taken:
22592 return ix86_cost->cond_not_taken_branch_cost;
22593
22594 case vec_perm:
22595 case vec_promote_demote:
22596 return ix86_vec_cost (mode, ix86_cost->sse_op);
22597
22598 case vec_construct:
22599 {
22600 int n = TYPE_VECTOR_SUBPARTS (vectype);
22601 /* N - 1 element inserts into an SSE vector, the possible
22602 GPR -> XMM move is accounted for in add_stmt_cost. */
22603 if (GET_MODE_BITSIZE (mode) <= 128)
22604 return (n - 1) * ix86_cost->sse_op;
22605 /* One vinserti128 for combining two SSE vectors for AVX256. */
22606 else if (GET_MODE_BITSIZE (mode) == 256)
22607 return ((n - 2) * ix86_cost->sse_op
22608 + ix86_vec_cost (mode, ix86_cost->addss));
22609 /* One vinserti64x4 and two vinserti128 for combining SSE
22610 and AVX256 vectors to AVX512. */
22611 else if (GET_MODE_BITSIZE (mode) == 512)
22612 return ((n - 4) * ix86_cost->sse_op
22613 + 3 * ix86_vec_cost (mode, ix86_cost->addss));
22614 gcc_unreachable ();
22615 }
22616
22617 default:
22618 gcc_unreachable ();
22619 }
22620 }
22621
22622 \f
22623 /* This function returns the calling abi specific va_list type node.
22624 It returns the FNDECL specific va_list type. */
22625
22626 static tree
22627 ix86_fn_abi_va_list (tree fndecl)
22628 {
22629 if (!TARGET_64BIT)
22630 return va_list_type_node;
22631 gcc_assert (fndecl != NULL_TREE);
22632
22633 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
22634 return ms_va_list_type_node;
22635 else
22636 return sysv_va_list_type_node;
22637 }
22638
22639 /* Returns the canonical va_list type specified by TYPE. If there
22640 is no valid TYPE provided, it return NULL_TREE. */
22641
22642 static tree
22643 ix86_canonical_va_list_type (tree type)
22644 {
22645 if (TARGET_64BIT)
22646 {
22647 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
22648 return ms_va_list_type_node;
22649
22650 if ((TREE_CODE (type) == ARRAY_TYPE
22651 && integer_zerop (array_type_nelts (type)))
22652 || POINTER_TYPE_P (type))
22653 {
22654 tree elem_type = TREE_TYPE (type);
22655 if (TREE_CODE (elem_type) == RECORD_TYPE
22656 && lookup_attribute ("sysv_abi va_list",
22657 TYPE_ATTRIBUTES (elem_type)))
22658 return sysv_va_list_type_node;
22659 }
22660
22661 return NULL_TREE;
22662 }
22663
22664 return std_canonical_va_list_type (type);
22665 }
22666
22667 /* Iterate through the target-specific builtin types for va_list.
22668 IDX denotes the iterator, *PTREE is set to the result type of
22669 the va_list builtin, and *PNAME to its internal type.
22670 Returns zero if there is no element for this index, otherwise
22671 IDX should be increased upon the next call.
22672 Note, do not iterate a base builtin's name like __builtin_va_list.
22673 Used from c_common_nodes_and_builtins. */
22674
22675 static int
22676 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
22677 {
22678 if (TARGET_64BIT)
22679 {
22680 switch (idx)
22681 {
22682 default:
22683 break;
22684
22685 case 0:
22686 *ptree = ms_va_list_type_node;
22687 *pname = "__builtin_ms_va_list";
22688 return 1;
22689
22690 case 1:
22691 *ptree = sysv_va_list_type_node;
22692 *pname = "__builtin_sysv_va_list";
22693 return 1;
22694 }
22695 }
22696
22697 return 0;
22698 }
22699
22700 #undef TARGET_SCHED_DISPATCH
22701 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
22702 #undef TARGET_SCHED_DISPATCH_DO
22703 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
22704 #undef TARGET_SCHED_REASSOCIATION_WIDTH
22705 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
22706 #undef TARGET_SCHED_REORDER
22707 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
22708 #undef TARGET_SCHED_ADJUST_PRIORITY
22709 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
22710 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
22711 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
22712 ix86_dependencies_evaluation_hook
22713
22714
22715 /* Implementation of reassociation_width target hook used by
22716 reassoc phase to identify parallelism level in reassociated
22717 tree. Statements tree_code is passed in OPC. Arguments type
22718 is passed in MODE. */
22719
22720 static int
22721 ix86_reassociation_width (unsigned int op, machine_mode mode)
22722 {
22723 int width = 1;
22724 /* Vector part. */
22725 if (VECTOR_MODE_P (mode))
22726 {
22727 int div = 1;
22728 if (INTEGRAL_MODE_P (mode))
22729 width = ix86_cost->reassoc_vec_int;
22730 else if (FLOAT_MODE_P (mode))
22731 width = ix86_cost->reassoc_vec_fp;
22732
22733 if (width == 1)
22734 return 1;
22735
22736 /* Integer vector instructions execute in FP unit
22737 and can execute 3 additions and one multiplication per cycle. */
22738 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
22739 || ix86_tune == PROCESSOR_ZNVER3)
22740 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
22741 return 1;
22742
22743 /* Account for targets that splits wide vectors into multiple parts. */
22744 if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
22745 div = GET_MODE_BITSIZE (mode) / 128;
22746 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
22747 div = GET_MODE_BITSIZE (mode) / 64;
22748 width = (width + div - 1) / div;
22749 }
22750 /* Scalar part. */
22751 else if (INTEGRAL_MODE_P (mode))
22752 width = ix86_cost->reassoc_int;
22753 else if (FLOAT_MODE_P (mode))
22754 width = ix86_cost->reassoc_fp;
22755
22756 /* Avoid using too many registers in 32bit mode. */
22757 if (!TARGET_64BIT && width > 2)
22758 width = 2;
22759 return width;
22760 }
22761
22762 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
22763 place emms and femms instructions. */
22764
22765 static machine_mode
22766 ix86_preferred_simd_mode (scalar_mode mode)
22767 {
22768 if (!TARGET_SSE)
22769 return word_mode;
22770
22771 switch (mode)
22772 {
22773 case E_QImode:
22774 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
22775 return V64QImode;
22776 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22777 return V32QImode;
22778 else
22779 return V16QImode;
22780
22781 case E_HImode:
22782 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
22783 return V32HImode;
22784 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22785 return V16HImode;
22786 else
22787 return V8HImode;
22788
22789 case E_SImode:
22790 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
22791 return V16SImode;
22792 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22793 return V8SImode;
22794 else
22795 return V4SImode;
22796
22797 case E_DImode:
22798 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
22799 return V8DImode;
22800 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22801 return V4DImode;
22802 else
22803 return V2DImode;
22804
22805 case E_HFmode:
22806 if (TARGET_AVX512FP16)
22807 {
22808 if (TARGET_AVX512VL)
22809 {
22810 if (TARGET_PREFER_AVX128)
22811 return V8HFmode;
22812 else if (TARGET_PREFER_AVX256)
22813 return V16HFmode;
22814 }
22815 return V32HFmode;
22816 }
22817 return word_mode;
22818
22819 case E_SFmode:
22820 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
22821 return V16SFmode;
22822 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22823 return V8SFmode;
22824 else
22825 return V4SFmode;
22826
22827 case E_DFmode:
22828 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
22829 return V8DFmode;
22830 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22831 return V4DFmode;
22832 else if (TARGET_SSE2)
22833 return V2DFmode;
22834 /* FALLTHRU */
22835
22836 default:
22837 return word_mode;
22838 }
22839 }
22840
22841 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
22842 vectors. If AVX512F is enabled then try vectorizing with 512bit,
22843 256bit and 128bit vectors. */
22844
22845 static unsigned int
22846 ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
22847 {
22848 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
22849 {
22850 modes->safe_push (V64QImode);
22851 modes->safe_push (V32QImode);
22852 modes->safe_push (V16QImode);
22853 }
22854 else if (TARGET_AVX512F && all)
22855 {
22856 modes->safe_push (V32QImode);
22857 modes->safe_push (V16QImode);
22858 modes->safe_push (V64QImode);
22859 }
22860 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22861 {
22862 modes->safe_push (V32QImode);
22863 modes->safe_push (V16QImode);
22864 }
22865 else if (TARGET_AVX && all)
22866 {
22867 modes->safe_push (V16QImode);
22868 modes->safe_push (V32QImode);
22869 }
22870 else if (TARGET_SSE2)
22871 modes->safe_push (V16QImode);
22872
22873 if (TARGET_MMX_WITH_SSE)
22874 modes->safe_push (V8QImode);
22875
22876 if (TARGET_SSE2)
22877 modes->safe_push (V4QImode);
22878
22879 return 0;
22880 }
22881
22882 /* Implemenation of targetm.vectorize.get_mask_mode. */
22883
22884 static opt_machine_mode
22885 ix86_get_mask_mode (machine_mode data_mode)
22886 {
22887 unsigned vector_size = GET_MODE_SIZE (data_mode);
22888 unsigned nunits = GET_MODE_NUNITS (data_mode);
22889 unsigned elem_size = vector_size / nunits;
22890
22891 /* Scalar mask case. */
22892 if ((TARGET_AVX512F && vector_size == 64)
22893 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
22894 {
22895 if (elem_size == 4
22896 || elem_size == 8
22897 || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
22898 return smallest_int_mode_for_size (nunits);
22899 }
22900
22901 scalar_int_mode elem_mode
22902 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
22903
22904 gcc_assert (elem_size * nunits == vector_size);
22905
22906 return mode_for_vector (elem_mode, nunits);
22907 }
22908
22909 \f
22910
22911 /* Return class of registers which could be used for pseudo of MODE
22912 and of class RCLASS for spilling instead of memory. Return NO_REGS
22913 if it is not possible or non-profitable. */
22914
22915 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
22916
22917 static reg_class_t
22918 ix86_spill_class (reg_class_t rclass, machine_mode mode)
22919 {
22920 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
22921 && TARGET_SSE2
22922 && TARGET_INTER_UNIT_MOVES_TO_VEC
22923 && TARGET_INTER_UNIT_MOVES_FROM_VEC
22924 && (mode == SImode || (TARGET_64BIT && mode == DImode))
22925 && INTEGER_CLASS_P (rclass))
22926 return ALL_SSE_REGS;
22927 return NO_REGS;
22928 }
22929
22930 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
22931 but returns a lower bound. */
22932
22933 static unsigned int
22934 ix86_max_noce_ifcvt_seq_cost (edge e)
22935 {
22936 bool predictable_p = predictable_edge_p (e);
22937 if (predictable_p)
22938 {
22939 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
22940 return param_max_rtl_if_conversion_predictable_cost;
22941 }
22942 else
22943 {
22944 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
22945 return param_max_rtl_if_conversion_unpredictable_cost;
22946 }
22947
22948 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
22949 }
22950
22951 /* Return true if SEQ is a good candidate as a replacement for the
22952 if-convertible sequence described in IF_INFO. */
22953
22954 static bool
22955 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
22956 {
22957 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
22958 {
22959 int cmov_cnt = 0;
22960 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
22961 Maybe we should allow even more conditional moves as long as they
22962 are used far enough not to stall the CPU, or also consider
22963 IF_INFO->TEST_BB succ edge probabilities. */
22964 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
22965 {
22966 rtx set = single_set (insn);
22967 if (!set)
22968 continue;
22969 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
22970 continue;
22971 rtx src = SET_SRC (set);
22972 machine_mode mode = GET_MODE (src);
22973 if (GET_MODE_CLASS (mode) != MODE_INT
22974 && GET_MODE_CLASS (mode) != MODE_FLOAT)
22975 continue;
22976 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
22977 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
22978 continue;
22979 /* insn is CMOV or FCMOV. */
22980 if (++cmov_cnt > 1)
22981 return false;
22982 }
22983 }
22984 return default_noce_conversion_profitable_p (seq, if_info);
22985 }
22986
22987 /* x86-specific vector costs. */
22988 class ix86_vector_costs : public vector_costs
22989 {
22990 using vector_costs::vector_costs;
22991
22992 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
22993 stmt_vec_info stmt_info, slp_tree node,
22994 tree vectype, int misalign,
22995 vect_cost_model_location where) override;
22996 };
22997
22998 /* Implement targetm.vectorize.create_costs. */
22999
23000 static vector_costs *
23001 ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
23002 {
23003 return new ix86_vector_costs (vinfo, costing_for_scalar);
23004 }
23005
23006 unsigned
23007 ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
23008 stmt_vec_info stmt_info, slp_tree node,
23009 tree vectype, int misalign,
23010 vect_cost_model_location where)
23011 {
23012 unsigned retval = 0;
23013 bool scalar_p
23014 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
23015 int stmt_cost = - 1;
23016
23017 bool fp = false;
23018 machine_mode mode = scalar_p ? SImode : TImode;
23019
23020 if (vectype != NULL)
23021 {
23022 fp = FLOAT_TYPE_P (vectype);
23023 mode = TYPE_MODE (vectype);
23024 if (scalar_p)
23025 mode = TYPE_MODE (TREE_TYPE (vectype));
23026 }
23027
23028 if ((kind == vector_stmt || kind == scalar_stmt)
23029 && stmt_info
23030 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
23031 {
23032 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
23033 /*machine_mode inner_mode = mode;
23034 if (VECTOR_MODE_P (mode))
23035 inner_mode = GET_MODE_INNER (mode);*/
23036
23037 switch (subcode)
23038 {
23039 case PLUS_EXPR:
23040 case POINTER_PLUS_EXPR:
23041 case MINUS_EXPR:
23042 if (kind == scalar_stmt)
23043 {
23044 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
23045 stmt_cost = ix86_cost->addss;
23046 else if (X87_FLOAT_MODE_P (mode))
23047 stmt_cost = ix86_cost->fadd;
23048 else
23049 stmt_cost = ix86_cost->add;
23050 }
23051 else
23052 stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
23053 : ix86_cost->sse_op);
23054 break;
23055
23056 case MULT_EXPR:
23057 /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
23058 take it as MULT_EXPR. */
23059 case MULT_HIGHPART_EXPR:
23060 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
23061 break;
23062 /* There's no direct instruction for WIDEN_MULT_EXPR,
23063 take emulation into account. */
23064 case WIDEN_MULT_EXPR:
23065 stmt_cost = ix86_widen_mult_cost (ix86_cost, mode,
23066 TYPE_UNSIGNED (vectype));
23067 break;
23068
23069 case NEGATE_EXPR:
23070 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
23071 stmt_cost = ix86_cost->sse_op;
23072 else if (X87_FLOAT_MODE_P (mode))
23073 stmt_cost = ix86_cost->fchs;
23074 else if (VECTOR_MODE_P (mode))
23075 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
23076 else
23077 stmt_cost = ix86_cost->add;
23078 break;
23079 case TRUNC_DIV_EXPR:
23080 case CEIL_DIV_EXPR:
23081 case FLOOR_DIV_EXPR:
23082 case ROUND_DIV_EXPR:
23083 case TRUNC_MOD_EXPR:
23084 case CEIL_MOD_EXPR:
23085 case FLOOR_MOD_EXPR:
23086 case RDIV_EXPR:
23087 case ROUND_MOD_EXPR:
23088 case EXACT_DIV_EXPR:
23089 stmt_cost = ix86_division_cost (ix86_cost, mode);
23090 break;
23091
23092 case RSHIFT_EXPR:
23093 case LSHIFT_EXPR:
23094 case LROTATE_EXPR:
23095 case RROTATE_EXPR:
23096 {
23097 tree op1 = gimple_assign_rhs1 (stmt_info->stmt);
23098 tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
23099 stmt_cost = ix86_shift_rotate_cost
23100 (ix86_cost,
23101 (subcode == RSHIFT_EXPR
23102 && !TYPE_UNSIGNED (TREE_TYPE (op1)))
23103 ? ASHIFTRT : LSHIFTRT, mode,
23104 TREE_CODE (op2) == INTEGER_CST,
23105 cst_and_fits_in_hwi (op2)
23106 ? int_cst_value (op2) : -1,
23107 true, false, false, NULL, NULL);
23108 }
23109 break;
23110 case NOP_EXPR:
23111 /* Only sign-conversions are free. */
23112 if (tree_nop_conversion_p
23113 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
23114 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
23115 stmt_cost = 0;
23116 break;
23117
23118 case BIT_IOR_EXPR:
23119 case ABS_EXPR:
23120 case ABSU_EXPR:
23121 case MIN_EXPR:
23122 case MAX_EXPR:
23123 case BIT_XOR_EXPR:
23124 case BIT_AND_EXPR:
23125 case BIT_NOT_EXPR:
23126 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
23127 stmt_cost = ix86_cost->sse_op;
23128 else if (VECTOR_MODE_P (mode))
23129 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
23130 else
23131 stmt_cost = ix86_cost->add;
23132 break;
23133 default:
23134 break;
23135 }
23136 }
23137
23138 combined_fn cfn;
23139 if ((kind == vector_stmt || kind == scalar_stmt)
23140 && stmt_info
23141 && stmt_info->stmt
23142 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
23143 switch (cfn)
23144 {
23145 case CFN_FMA:
23146 stmt_cost = ix86_vec_cost (mode,
23147 mode == SFmode ? ix86_cost->fmass
23148 : ix86_cost->fmasd);
23149 break;
23150 case CFN_MULH:
23151 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
23152 break;
23153 default:
23154 break;
23155 }
23156
23157 /* If we do elementwise loads into a vector then we are bound by
23158 latency and execution resources for the many scalar loads
23159 (AGU and load ports). Try to account for this by scaling the
23160 construction cost by the number of elements involved. */
23161 if ((kind == vec_construct || kind == vec_to_scalar)
23162 && stmt_info
23163 && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
23164 || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
23165 && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
23166 && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) != INTEGER_CST)
23167 {
23168 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
23169 stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
23170 }
23171 else if (kind == vec_construct
23172 && node
23173 && SLP_TREE_DEF_TYPE (node) == vect_external_def
23174 && INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
23175 {
23176 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
23177 unsigned i;
23178 tree op;
23179 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
23180 if (TREE_CODE (op) == SSA_NAME)
23181 TREE_VISITED (op) = 0;
23182 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
23183 {
23184 if (TREE_CODE (op) != SSA_NAME
23185 || TREE_VISITED (op))
23186 continue;
23187 TREE_VISITED (op) = 1;
23188 gimple *def = SSA_NAME_DEF_STMT (op);
23189 tree tem;
23190 if (is_gimple_assign (def)
23191 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
23192 && ((tem = gimple_assign_rhs1 (def)), true)
23193 && TREE_CODE (tem) == SSA_NAME
23194 /* A sign-change expands to nothing. */
23195 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)),
23196 TREE_TYPE (tem)))
23197 def = SSA_NAME_DEF_STMT (tem);
23198 /* When the component is loaded from memory we can directly
23199 move it to a vector register, otherwise we have to go
23200 via a GPR or via vpinsr which involves similar cost.
23201 Likewise with a BIT_FIELD_REF extracting from a vector
23202 register we can hope to avoid using a GPR. */
23203 if (!is_gimple_assign (def)
23204 || (!gimple_assign_load_p (def)
23205 && (gimple_assign_rhs_code (def) != BIT_FIELD_REF
23206 || !VECTOR_TYPE_P (TREE_TYPE
23207 (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))))
23208 stmt_cost += ix86_cost->sse_to_integer;
23209 }
23210 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
23211 if (TREE_CODE (op) == SSA_NAME)
23212 TREE_VISITED (op) = 0;
23213 }
23214 if (stmt_cost == -1)
23215 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
23216
23217 /* Penalize DFmode vector operations for Bonnell. */
23218 if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
23219 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
23220 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
23221
23222 /* Statements in an inner loop relative to the loop being
23223 vectorized are weighted more heavily. The value here is
23224 arbitrary and could potentially be improved with analysis. */
23225 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
23226
23227 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
23228 for Silvermont as it has out of order integer pipeline and can execute
23229 2 scalar instruction per tick, but has in order SIMD pipeline. */
23230 if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
23231 || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
23232 && stmt_info && stmt_info->stmt)
23233 {
23234 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
23235 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
23236 retval = (retval * 17) / 10;
23237 }
23238
23239 m_costs[where] += retval;
23240
23241 return retval;
23242 }
23243
23244 /* Validate target specific memory model bits in VAL. */
23245
23246 static unsigned HOST_WIDE_INT
23247 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
23248 {
23249 enum memmodel model = memmodel_from_int (val);
23250 bool strong;
23251
23252 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
23253 |MEMMODEL_MASK)
23254 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
23255 {
23256 warning (OPT_Winvalid_memory_model,
23257 "unknown architecture specific memory model");
23258 return MEMMODEL_SEQ_CST;
23259 }
23260 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
23261 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
23262 {
23263 warning (OPT_Winvalid_memory_model,
23264 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
23265 "memory model");
23266 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
23267 }
23268 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
23269 {
23270 warning (OPT_Winvalid_memory_model,
23271 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
23272 "memory model");
23273 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
23274 }
23275 return val;
23276 }
23277
23278 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
23279 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
23280 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
23281 or number of vecsize_mangle variants that should be emitted. */
23282
23283 static int
23284 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
23285 struct cgraph_simd_clone *clonei,
23286 tree base_type, int num)
23287 {
23288 int ret = 1;
23289
23290 if (clonei->simdlen
23291 && (clonei->simdlen < 2
23292 || clonei->simdlen > 1024
23293 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
23294 {
23295 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
23296 "unsupported simdlen %wd", clonei->simdlen.to_constant ());
23297 return 0;
23298 }
23299
23300 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
23301 if (TREE_CODE (ret_type) != VOID_TYPE)
23302 switch (TYPE_MODE (ret_type))
23303 {
23304 case E_QImode:
23305 case E_HImode:
23306 case E_SImode:
23307 case E_DImode:
23308 case E_SFmode:
23309 case E_DFmode:
23310 /* case E_SCmode: */
23311 /* case E_DCmode: */
23312 if (!AGGREGATE_TYPE_P (ret_type))
23313 break;
23314 /* FALLTHRU */
23315 default:
23316 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
23317 "unsupported return type %qT for simd", ret_type);
23318 return 0;
23319 }
23320
23321 tree t;
23322 int i;
23323 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
23324 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
23325
23326 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
23327 t && t != void_list_node; t = TREE_CHAIN (t), i++)
23328 {
23329 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
23330 switch (TYPE_MODE (arg_type))
23331 {
23332 case E_QImode:
23333 case E_HImode:
23334 case E_SImode:
23335 case E_DImode:
23336 case E_SFmode:
23337 case E_DFmode:
23338 /* case E_SCmode: */
23339 /* case E_DCmode: */
23340 if (!AGGREGATE_TYPE_P (arg_type))
23341 break;
23342 /* FALLTHRU */
23343 default:
23344 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
23345 break;
23346 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
23347 "unsupported argument type %qT for simd", arg_type);
23348 return 0;
23349 }
23350 }
23351
23352 if (!TREE_PUBLIC (node->decl))
23353 {
23354 /* If the function isn't exported, we can pick up just one ISA
23355 for the clones. */
23356 if (TARGET_AVX512F)
23357 clonei->vecsize_mangle = 'e';
23358 else if (TARGET_AVX2)
23359 clonei->vecsize_mangle = 'd';
23360 else if (TARGET_AVX)
23361 clonei->vecsize_mangle = 'c';
23362 else
23363 clonei->vecsize_mangle = 'b';
23364 ret = 1;
23365 }
23366 else
23367 {
23368 clonei->vecsize_mangle = "bcde"[num];
23369 ret = 4;
23370 }
23371 clonei->mask_mode = VOIDmode;
23372 switch (clonei->vecsize_mangle)
23373 {
23374 case 'b':
23375 clonei->vecsize_int = 128;
23376 clonei->vecsize_float = 128;
23377 break;
23378 case 'c':
23379 clonei->vecsize_int = 128;
23380 clonei->vecsize_float = 256;
23381 break;
23382 case 'd':
23383 clonei->vecsize_int = 256;
23384 clonei->vecsize_float = 256;
23385 break;
23386 case 'e':
23387 clonei->vecsize_int = 512;
23388 clonei->vecsize_float = 512;
23389 if (TYPE_MODE (base_type) == QImode)
23390 clonei->mask_mode = DImode;
23391 else
23392 clonei->mask_mode = SImode;
23393 break;
23394 }
23395 if (clonei->simdlen == 0)
23396 {
23397 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
23398 clonei->simdlen = clonei->vecsize_int;
23399 else
23400 clonei->simdlen = clonei->vecsize_float;
23401 clonei->simdlen = clonei->simdlen
23402 / GET_MODE_BITSIZE (TYPE_MODE (base_type));
23403 }
23404 else if (clonei->simdlen > 16)
23405 {
23406 /* For compatibility with ICC, use the same upper bounds
23407 for simdlen. In particular, for CTYPE below, use the return type,
23408 unless the function returns void, in that case use the characteristic
23409 type. If it is possible for given SIMDLEN to pass CTYPE value
23410 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
23411 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
23412 emit corresponding clone. */
23413 tree ctype = ret_type;
23414 if (TREE_CODE (ret_type) == VOID_TYPE)
23415 ctype = base_type;
23416 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
23417 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
23418 cnt /= clonei->vecsize_int;
23419 else
23420 cnt /= clonei->vecsize_float;
23421 if (cnt > (TARGET_64BIT ? 16 : 8))
23422 {
23423 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
23424 "unsupported simdlen %wd",
23425 clonei->simdlen.to_constant ());
23426 return 0;
23427 }
23428 }
23429 return ret;
23430 }
23431
23432 /* If SIMD clone NODE can't be used in a vectorized loop
23433 in current function, return -1, otherwise return a badness of using it
23434 (0 if it is most desirable from vecsize_mangle point of view, 1
23435 slightly less desirable, etc.). */
23436
23437 static int
23438 ix86_simd_clone_usable (struct cgraph_node *node)
23439 {
23440 switch (node->simdclone->vecsize_mangle)
23441 {
23442 case 'b':
23443 if (!TARGET_SSE2)
23444 return -1;
23445 if (!TARGET_AVX)
23446 return 0;
23447 return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
23448 case 'c':
23449 if (!TARGET_AVX)
23450 return -1;
23451 return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
23452 case 'd':
23453 if (!TARGET_AVX2)
23454 return -1;
23455 return TARGET_AVX512F ? 1 : 0;
23456 case 'e':
23457 if (!TARGET_AVX512F)
23458 return -1;
23459 return 0;
23460 default:
23461 gcc_unreachable ();
23462 }
23463 }
23464
23465 /* This function adjusts the unroll factor based on
23466 the hardware capabilities. For ex, bdver3 has
23467 a loop buffer which makes unrolling of smaller
23468 loops less important. This function decides the
23469 unroll factor using number of memory references
23470 (value 32 is used) as a heuristic. */
23471
23472 static unsigned
23473 ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
23474 {
23475 basic_block *bbs;
23476 rtx_insn *insn;
23477 unsigned i;
23478 unsigned mem_count = 0;
23479
23480 if (!TARGET_ADJUST_UNROLL)
23481 return nunroll;
23482
23483 /* Count the number of memory references within the loop body.
23484 This value determines the unrolling factor for bdver3 and bdver4
23485 architectures. */
23486 subrtx_iterator::array_type array;
23487 bbs = get_loop_body (loop);
23488 for (i = 0; i < loop->num_nodes; i++)
23489 FOR_BB_INSNS (bbs[i], insn)
23490 if (NONDEBUG_INSN_P (insn))
23491 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
23492 if (const_rtx x = *iter)
23493 if (MEM_P (x))
23494 {
23495 machine_mode mode = GET_MODE (x);
23496 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
23497 if (n_words > 4)
23498 mem_count += 2;
23499 else
23500 mem_count += 1;
23501 }
23502 free (bbs);
23503
23504 if (mem_count && mem_count <=32)
23505 return MIN (nunroll, 32 / mem_count);
23506
23507 return nunroll;
23508 }
23509
23510
23511 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
23512
23513 static bool
23514 ix86_float_exceptions_rounding_supported_p (void)
23515 {
23516 /* For x87 floating point with standard excess precision handling,
23517 there is no adddf3 pattern (since x87 floating point only has
23518 XFmode operations) so the default hook implementation gets this
23519 wrong. */
23520 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
23521 }
23522
23523 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
23524
23525 static void
23526 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
23527 {
23528 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
23529 return;
23530 tree exceptions_var = create_tmp_var_raw (integer_type_node);
23531 if (TARGET_80387)
23532 {
23533 tree fenv_index_type = build_index_type (size_int (6));
23534 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
23535 tree fenv_var = create_tmp_var_raw (fenv_type);
23536 TREE_ADDRESSABLE (fenv_var) = 1;
23537 tree fenv_ptr = build_pointer_type (fenv_type);
23538 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
23539 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
23540 tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
23541 tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
23542 tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
23543 tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
23544 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
23545 tree hold_fnclex = build_call_expr (fnclex, 0);
23546 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
23547 NULL_TREE, NULL_TREE);
23548 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
23549 hold_fnclex);
23550 *clear = build_call_expr (fnclex, 0);
23551 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
23552 tree fnstsw_call = build_call_expr (fnstsw, 0);
23553 tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
23554 fnstsw_call, NULL_TREE, NULL_TREE);
23555 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
23556 tree update_mod = build4 (TARGET_EXPR, integer_type_node,
23557 exceptions_var, exceptions_x87,
23558 NULL_TREE, NULL_TREE);
23559 *update = build2 (COMPOUND_EXPR, integer_type_node,
23560 sw_mod, update_mod);
23561 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
23562 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
23563 }
23564 if (TARGET_SSE && TARGET_SSE_MATH)
23565 {
23566 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
23567 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
23568 tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
23569 tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
23570 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
23571 tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
23572 mxcsr_orig_var, stmxcsr_hold_call,
23573 NULL_TREE, NULL_TREE);
23574 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
23575 mxcsr_orig_var,
23576 build_int_cst (unsigned_type_node, 0x1f80));
23577 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
23578 build_int_cst (unsigned_type_node, 0xffffffc0));
23579 tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
23580 mxcsr_mod_var, hold_mod_val,
23581 NULL_TREE, NULL_TREE);
23582 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
23583 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
23584 hold_assign_orig, hold_assign_mod);
23585 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
23586 ldmxcsr_hold_call);
23587 if (*hold)
23588 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
23589 else
23590 *hold = hold_all;
23591 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
23592 if (*clear)
23593 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
23594 ldmxcsr_clear_call);
23595 else
23596 *clear = ldmxcsr_clear_call;
23597 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
23598 tree exceptions_sse = fold_convert (integer_type_node,
23599 stxmcsr_update_call);
23600 if (*update)
23601 {
23602 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
23603 exceptions_var, exceptions_sse);
23604 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
23605 exceptions_var, exceptions_mod);
23606 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
23607 exceptions_assign);
23608 }
23609 else
23610 *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
23611 exceptions_sse, NULL_TREE, NULL_TREE);
23612 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
23613 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
23614 ldmxcsr_update_call);
23615 }
23616 tree atomic_feraiseexcept
23617 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
23618 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
23619 1, exceptions_var);
23620 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
23621 atomic_feraiseexcept_call);
23622 }
23623
23624 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
23625 /* For i386, common symbol is local only for non-PIE binaries. For
23626 x86-64, common symbol is local only for non-PIE binaries or linker
23627 supports copy reloc in PIE binaries. */
23628
23629 static bool
23630 ix86_binds_local_p (const_tree exp)
23631 {
23632 bool direct_extern_access
23633 = (ix86_direct_extern_access
23634 && !(VAR_OR_FUNCTION_DECL_P (exp)
23635 && lookup_attribute ("nodirect_extern_access",
23636 DECL_ATTRIBUTES (exp))));
23637 if (!direct_extern_access)
23638 ix86_has_no_direct_extern_access = true;
23639 return default_binds_local_p_3 (exp, flag_shlib != 0, true,
23640 direct_extern_access,
23641 (direct_extern_access
23642 && (!flag_pic
23643 || (TARGET_64BIT
23644 && HAVE_LD_PIE_COPYRELOC != 0))));
23645 }
23646
23647 /* If flag_pic or ix86_direct_extern_access is false, then neither
23648 local nor global relocs should be placed in readonly memory. */
23649
23650 static int
23651 ix86_reloc_rw_mask (void)
23652 {
23653 return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
23654 }
23655 #endif
23656
23657 /* If MEM is in the form of [base+offset], extract the two parts
23658 of address and set to BASE and OFFSET, otherwise return false. */
23659
23660 static bool
23661 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
23662 {
23663 rtx addr;
23664
23665 gcc_assert (MEM_P (mem));
23666
23667 addr = XEXP (mem, 0);
23668
23669 if (GET_CODE (addr) == CONST)
23670 addr = XEXP (addr, 0);
23671
23672 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
23673 {
23674 *base = addr;
23675 *offset = const0_rtx;
23676 return true;
23677 }
23678
23679 if (GET_CODE (addr) == PLUS
23680 && (REG_P (XEXP (addr, 0))
23681 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
23682 && CONST_INT_P (XEXP (addr, 1)))
23683 {
23684 *base = XEXP (addr, 0);
23685 *offset = XEXP (addr, 1);
23686 return true;
23687 }
23688
23689 return false;
23690 }
23691
23692 /* Given OPERANDS of consecutive load/store, check if we can merge
23693 them into move multiple. LOAD is true if they are load instructions.
23694 MODE is the mode of memory operands. */
23695
23696 bool
23697 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
23698 machine_mode mode)
23699 {
23700 HOST_WIDE_INT offval_1, offval_2, msize;
23701 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
23702
23703 if (load)
23704 {
23705 mem_1 = operands[1];
23706 mem_2 = operands[3];
23707 reg_1 = operands[0];
23708 reg_2 = operands[2];
23709 }
23710 else
23711 {
23712 mem_1 = operands[0];
23713 mem_2 = operands[2];
23714 reg_1 = operands[1];
23715 reg_2 = operands[3];
23716 }
23717
23718 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
23719
23720 if (REGNO (reg_1) != REGNO (reg_2))
23721 return false;
23722
23723 /* Check if the addresses are in the form of [base+offset]. */
23724 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
23725 return false;
23726 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
23727 return false;
23728
23729 /* Check if the bases are the same. */
23730 if (!rtx_equal_p (base_1, base_2))
23731 return false;
23732
23733 offval_1 = INTVAL (offset_1);
23734 offval_2 = INTVAL (offset_2);
23735 msize = GET_MODE_SIZE (mode);
23736 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
23737 if (offval_1 + msize != offval_2)
23738 return false;
23739
23740 return true;
23741 }
23742
23743 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
23744
23745 static bool
23746 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
23747 optimization_type opt_type)
23748 {
23749 switch (op)
23750 {
23751 case asin_optab:
23752 case acos_optab:
23753 case log1p_optab:
23754 case exp_optab:
23755 case exp10_optab:
23756 case exp2_optab:
23757 case expm1_optab:
23758 case ldexp_optab:
23759 case scalb_optab:
23760 case round_optab:
23761 return opt_type == OPTIMIZE_FOR_SPEED;
23762
23763 case rint_optab:
23764 if (SSE_FLOAT_MODE_P (mode1)
23765 && TARGET_SSE_MATH
23766 && !flag_trapping_math
23767 && !TARGET_SSE4_1
23768 && mode1 != HFmode)
23769 return opt_type == OPTIMIZE_FOR_SPEED;
23770 return true;
23771
23772 case floor_optab:
23773 case ceil_optab:
23774 case btrunc_optab:
23775 if (((SSE_FLOAT_MODE_P (mode1)
23776 && TARGET_SSE_MATH
23777 && TARGET_SSE4_1)
23778 || mode1 == HFmode)
23779 && !flag_trapping_math)
23780 return true;
23781 return opt_type == OPTIMIZE_FOR_SPEED;
23782
23783 case rsqrt_optab:
23784 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
23785
23786 default:
23787 return true;
23788 }
23789 }
23790
23791 /* Implement the TARGET_GEN_MEMSET_SCRATCH_RTX hook. Return a scratch
23792 register in MODE for vector load and store. */
23793
23794 rtx
23795 ix86_gen_scratch_sse_rtx (machine_mode mode)
23796 {
23797 return gen_reg_rtx (mode);
23798 }
23799
23800 /* Address space support.
23801
23802 This is not "far pointers" in the 16-bit sense, but an easy way
23803 to use %fs and %gs segment prefixes. Therefore:
23804
23805 (a) All address spaces have the same modes,
23806 (b) All address spaces have the same addresss forms,
23807 (c) While %fs and %gs are technically subsets of the generic
23808 address space, they are probably not subsets of each other.
23809 (d) Since we have no access to the segment base register values
23810 without resorting to a system call, we cannot convert a
23811 non-default address space to a default address space.
23812 Therefore we do not claim %fs or %gs are subsets of generic.
23813
23814 Therefore we can (mostly) use the default hooks. */
23815
23816 /* All use of segmentation is assumed to make address 0 valid. */
23817
23818 static bool
23819 ix86_addr_space_zero_address_valid (addr_space_t as)
23820 {
23821 return as != ADDR_SPACE_GENERIC;
23822 }
23823
23824 static void
23825 ix86_init_libfuncs (void)
23826 {
23827 if (TARGET_64BIT)
23828 {
23829 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
23830 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
23831 }
23832 else
23833 {
23834 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
23835 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
23836 }
23837
23838 #if TARGET_MACHO
23839 darwin_rename_builtins ();
23840 #endif
23841 }
23842
23843 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
23844 FPU, assume that the fpcw is set to extended precision; when using
23845 only SSE, rounding is correct; when using both SSE and the FPU,
23846 the rounding precision is indeterminate, since either may be chosen
23847 apparently at random. */
23848
23849 static enum flt_eval_method
23850 ix86_get_excess_precision (enum excess_precision_type type)
23851 {
23852 switch (type)
23853 {
23854 case EXCESS_PRECISION_TYPE_FAST:
23855 /* The fastest type to promote to will always be the native type,
23856 whether that occurs with implicit excess precision or
23857 otherwise. */
23858 return TARGET_AVX512FP16
23859 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23860 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
23861 case EXCESS_PRECISION_TYPE_STANDARD:
23862 case EXCESS_PRECISION_TYPE_IMPLICIT:
23863 /* Otherwise, the excess precision we want when we are
23864 in a standards compliant mode, and the implicit precision we
23865 provide would be identical were it not for the unpredictable
23866 cases. */
23867 if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
23868 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23869 else if (!TARGET_80387)
23870 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
23871 else if (!TARGET_MIX_SSE_I387)
23872 {
23873 if (!(TARGET_SSE && TARGET_SSE_MATH))
23874 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
23875 else if (TARGET_SSE2)
23876 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
23877 }
23878
23879 /* If we are in standards compliant mode, but we know we will
23880 calculate in unpredictable precision, return
23881 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
23882 excess precision if the target can't guarantee it will honor
23883 it. */
23884 return (type == EXCESS_PRECISION_TYPE_STANDARD
23885 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
23886 : FLT_EVAL_METHOD_UNPREDICTABLE);
23887 case EXCESS_PRECISION_TYPE_FLOAT16:
23888 if (TARGET_80387
23889 && !(TARGET_SSE_MATH && TARGET_SSE))
23890 error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
23891 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23892 default:
23893 gcc_unreachable ();
23894 }
23895
23896 return FLT_EVAL_METHOD_UNPREDICTABLE;
23897 }
23898
23899 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
23900 decrements by exactly 2 no matter what the position was, there is no pushb.
23901
23902 But as CIE data alignment factor on this arch is -4 for 32bit targets
23903 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
23904 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
23905
23906 poly_int64
23907 ix86_push_rounding (poly_int64 bytes)
23908 {
23909 return ROUND_UP (bytes, UNITS_PER_WORD);
23910 }
23911
23912 /* Target-specific selftests. */
23913
23914 #if CHECKING_P
23915
23916 namespace selftest {
23917
23918 /* Verify that hard regs are dumped as expected (in compact mode). */
23919
23920 static void
23921 ix86_test_dumping_hard_regs ()
23922 {
23923 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
23924 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
23925 }
23926
23927 /* Test dumping an insn with repeated references to the same SCRATCH,
23928 to verify the rtx_reuse code. */
23929
23930 static void
23931 ix86_test_dumping_memory_blockage ()
23932 {
23933 set_new_first_and_last_insn (NULL, NULL);
23934
23935 rtx pat = gen_memory_blockage ();
23936 rtx_reuse_manager r;
23937 r.preprocess (pat);
23938
23939 /* Verify that the repeated references to the SCRATCH show use
23940 reuse IDS. The first should be prefixed with a reuse ID,
23941 and the second should be dumped as a "reuse_rtx" of that ID.
23942 The expected string assumes Pmode == DImode. */
23943 if (Pmode == DImode)
23944 ASSERT_RTL_DUMP_EQ_WITH_REUSE
23945 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
23946 " (unspec:BLK [\n"
23947 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
23948 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
23949 }
23950
23951 /* Verify loading an RTL dump; specifically a dump of copying
23952 a param on x86_64 from a hard reg into the frame.
23953 This test is target-specific since the dump contains target-specific
23954 hard reg names. */
23955
23956 static void
23957 ix86_test_loading_dump_fragment_1 ()
23958 {
23959 rtl_dump_test t (SELFTEST_LOCATION,
23960 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
23961
23962 rtx_insn *insn = get_insn_by_uid (1);
23963
23964 /* The block structure and indentation here is purely for
23965 readability; it mirrors the structure of the rtx. */
23966 tree mem_expr;
23967 {
23968 rtx pat = PATTERN (insn);
23969 ASSERT_EQ (SET, GET_CODE (pat));
23970 {
23971 rtx dest = SET_DEST (pat);
23972 ASSERT_EQ (MEM, GET_CODE (dest));
23973 /* Verify the "/c" was parsed. */
23974 ASSERT_TRUE (RTX_FLAG (dest, call));
23975 ASSERT_EQ (SImode, GET_MODE (dest));
23976 {
23977 rtx addr = XEXP (dest, 0);
23978 ASSERT_EQ (PLUS, GET_CODE (addr));
23979 ASSERT_EQ (DImode, GET_MODE (addr));
23980 {
23981 rtx lhs = XEXP (addr, 0);
23982 /* Verify that the "frame" REG was consolidated. */
23983 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
23984 }
23985 {
23986 rtx rhs = XEXP (addr, 1);
23987 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
23988 ASSERT_EQ (-4, INTVAL (rhs));
23989 }
23990 }
23991 /* Verify the "[1 i+0 S4 A32]" was parsed. */
23992 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
23993 /* "i" should have been handled by synthesizing a global int
23994 variable named "i". */
23995 mem_expr = MEM_EXPR (dest);
23996 ASSERT_NE (mem_expr, NULL);
23997 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
23998 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
23999 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
24000 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
24001 /* "+0". */
24002 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
24003 ASSERT_EQ (0, MEM_OFFSET (dest));
24004 /* "S4". */
24005 ASSERT_EQ (4, MEM_SIZE (dest));
24006 /* "A32. */
24007 ASSERT_EQ (32, MEM_ALIGN (dest));
24008 }
24009 {
24010 rtx src = SET_SRC (pat);
24011 ASSERT_EQ (REG, GET_CODE (src));
24012 ASSERT_EQ (SImode, GET_MODE (src));
24013 ASSERT_EQ (5, REGNO (src));
24014 tree reg_expr = REG_EXPR (src);
24015 /* "i" here should point to the same var as for the MEM_EXPR. */
24016 ASSERT_EQ (reg_expr, mem_expr);
24017 }
24018 }
24019 }
24020
24021 /* Verify that the RTL loader copes with a call_insn dump.
24022 This test is target-specific since the dump contains a target-specific
24023 hard reg name. */
24024
24025 static void
24026 ix86_test_loading_call_insn ()
24027 {
24028 /* The test dump includes register "xmm0", where requires TARGET_SSE
24029 to exist. */
24030 if (!TARGET_SSE)
24031 return;
24032
24033 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
24034
24035 rtx_insn *insn = get_insns ();
24036 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
24037
24038 /* "/j". */
24039 ASSERT_TRUE (RTX_FLAG (insn, jump));
24040
24041 rtx pat = PATTERN (insn);
24042 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
24043
24044 /* Verify REG_NOTES. */
24045 {
24046 /* "(expr_list:REG_CALL_DECL". */
24047 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
24048 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
24049 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
24050
24051 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
24052 rtx_expr_list *note1 = note0->next ();
24053 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
24054
24055 ASSERT_EQ (NULL, note1->next ());
24056 }
24057
24058 /* Verify CALL_INSN_FUNCTION_USAGE. */
24059 {
24060 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
24061 rtx_expr_list *usage
24062 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
24063 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
24064 ASSERT_EQ (DFmode, GET_MODE (usage));
24065 ASSERT_EQ (USE, GET_CODE (usage->element ()));
24066 ASSERT_EQ (NULL, usage->next ());
24067 }
24068 }
24069
24070 /* Verify that the RTL loader copes a dump from print_rtx_function.
24071 This test is target-specific since the dump contains target-specific
24072 hard reg names. */
24073
24074 static void
24075 ix86_test_loading_full_dump ()
24076 {
24077 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
24078
24079 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
24080
24081 rtx_insn *insn_1 = get_insn_by_uid (1);
24082 ASSERT_EQ (NOTE, GET_CODE (insn_1));
24083
24084 rtx_insn *insn_7 = get_insn_by_uid (7);
24085 ASSERT_EQ (INSN, GET_CODE (insn_7));
24086 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
24087
24088 rtx_insn *insn_15 = get_insn_by_uid (15);
24089 ASSERT_EQ (INSN, GET_CODE (insn_15));
24090 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
24091
24092 /* Verify crtl->return_rtx. */
24093 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
24094 ASSERT_EQ (0, REGNO (crtl->return_rtx));
24095 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
24096 }
24097
24098 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
24099 In particular, verify that it correctly loads the 2nd operand.
24100 This test is target-specific since these are machine-specific
24101 operands (and enums). */
24102
24103 static void
24104 ix86_test_loading_unspec ()
24105 {
24106 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
24107
24108 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
24109
24110 ASSERT_TRUE (cfun);
24111
24112 /* Test of an UNSPEC. */
24113 rtx_insn *insn = get_insns ();
24114 ASSERT_EQ (INSN, GET_CODE (insn));
24115 rtx set = single_set (insn);
24116 ASSERT_NE (NULL, set);
24117 rtx dst = SET_DEST (set);
24118 ASSERT_EQ (MEM, GET_CODE (dst));
24119 rtx src = SET_SRC (set);
24120 ASSERT_EQ (UNSPEC, GET_CODE (src));
24121 ASSERT_EQ (BLKmode, GET_MODE (src));
24122 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
24123
24124 rtx v0 = XVECEXP (src, 0, 0);
24125
24126 /* Verify that the two uses of the first SCRATCH have pointer
24127 equality. */
24128 rtx scratch_a = XEXP (dst, 0);
24129 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
24130
24131 rtx scratch_b = XEXP (v0, 0);
24132 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
24133
24134 ASSERT_EQ (scratch_a, scratch_b);
24135
24136 /* Verify that the two mems are thus treated as equal. */
24137 ASSERT_TRUE (rtx_equal_p (dst, v0));
24138
24139 /* Verify that the insn is recognized. */
24140 ASSERT_NE(-1, recog_memoized (insn));
24141
24142 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
24143 insn = NEXT_INSN (insn);
24144 ASSERT_EQ (INSN, GET_CODE (insn));
24145
24146 set = single_set (insn);
24147 ASSERT_NE (NULL, set);
24148
24149 src = SET_SRC (set);
24150 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
24151 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
24152 }
24153
24154 /* Run all target-specific selftests. */
24155
24156 static void
24157 ix86_run_selftests (void)
24158 {
24159 ix86_test_dumping_hard_regs ();
24160 ix86_test_dumping_memory_blockage ();
24161
24162 /* Various tests of loading RTL dumps, here because they contain
24163 ix86-isms (e.g. names of hard regs). */
24164 ix86_test_loading_dump_fragment_1 ();
24165 ix86_test_loading_call_insn ();
24166 ix86_test_loading_full_dump ();
24167 ix86_test_loading_unspec ();
24168 }
24169
24170 } // namespace selftest
24171
24172 #endif /* CHECKING_P */
24173
24174 /* Initialize the GCC target structure. */
24175 #undef TARGET_RETURN_IN_MEMORY
24176 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
24177
24178 #undef TARGET_LEGITIMIZE_ADDRESS
24179 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
24180
24181 #undef TARGET_ATTRIBUTE_TABLE
24182 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
24183 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
24184 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
24185 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
24186 # undef TARGET_MERGE_DECL_ATTRIBUTES
24187 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
24188 #endif
24189
24190 #undef TARGET_COMP_TYPE_ATTRIBUTES
24191 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
24192
24193 #undef TARGET_INIT_BUILTINS
24194 #define TARGET_INIT_BUILTINS ix86_init_builtins
24195 #undef TARGET_BUILTIN_DECL
24196 #define TARGET_BUILTIN_DECL ix86_builtin_decl
24197 #undef TARGET_EXPAND_BUILTIN
24198 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
24199
24200 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
24201 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
24202 ix86_builtin_vectorized_function
24203
24204 #undef TARGET_VECTORIZE_BUILTIN_GATHER
24205 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
24206
24207 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
24208 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
24209
24210 #undef TARGET_BUILTIN_RECIPROCAL
24211 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
24212
24213 #undef TARGET_ASM_FUNCTION_EPILOGUE
24214 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
24215
24216 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
24217 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
24218 ix86_print_patchable_function_entry
24219
24220 #undef TARGET_ENCODE_SECTION_INFO
24221 #ifndef SUBTARGET_ENCODE_SECTION_INFO
24222 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
24223 #else
24224 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
24225 #endif
24226
24227 #undef TARGET_ASM_OPEN_PAREN
24228 #define TARGET_ASM_OPEN_PAREN ""
24229 #undef TARGET_ASM_CLOSE_PAREN
24230 #define TARGET_ASM_CLOSE_PAREN ""
24231
24232 #undef TARGET_ASM_BYTE_OP
24233 #define TARGET_ASM_BYTE_OP ASM_BYTE
24234
24235 #undef TARGET_ASM_ALIGNED_HI_OP
24236 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
24237 #undef TARGET_ASM_ALIGNED_SI_OP
24238 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
24239 #ifdef ASM_QUAD
24240 #undef TARGET_ASM_ALIGNED_DI_OP
24241 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
24242 #endif
24243
24244 #undef TARGET_PROFILE_BEFORE_PROLOGUE
24245 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
24246
24247 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
24248 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
24249
24250 #undef TARGET_ASM_UNALIGNED_HI_OP
24251 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
24252 #undef TARGET_ASM_UNALIGNED_SI_OP
24253 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
24254 #undef TARGET_ASM_UNALIGNED_DI_OP
24255 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
24256
24257 #undef TARGET_PRINT_OPERAND
24258 #define TARGET_PRINT_OPERAND ix86_print_operand
24259 #undef TARGET_PRINT_OPERAND_ADDRESS
24260 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
24261 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
24262 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
24263 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
24264 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
24265
24266 #undef TARGET_SCHED_INIT_GLOBAL
24267 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
24268 #undef TARGET_SCHED_ADJUST_COST
24269 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
24270 #undef TARGET_SCHED_ISSUE_RATE
24271 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
24272 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
24273 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
24274 ia32_multipass_dfa_lookahead
24275 #undef TARGET_SCHED_MACRO_FUSION_P
24276 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
24277 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
24278 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
24279
24280 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
24281 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
24282
24283 #undef TARGET_MEMMODEL_CHECK
24284 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
24285
24286 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
24287 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
24288
24289 #ifdef HAVE_AS_TLS
24290 #undef TARGET_HAVE_TLS
24291 #define TARGET_HAVE_TLS true
24292 #endif
24293 #undef TARGET_CANNOT_FORCE_CONST_MEM
24294 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
24295 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
24296 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
24297
24298 #undef TARGET_DELEGITIMIZE_ADDRESS
24299 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
24300
24301 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
24302 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
24303
24304 #undef TARGET_MS_BITFIELD_LAYOUT_P
24305 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
24306
24307 #if TARGET_MACHO
24308 #undef TARGET_BINDS_LOCAL_P
24309 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
24310 #else
24311 #undef TARGET_BINDS_LOCAL_P
24312 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
24313 #endif
24314 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
24315 #undef TARGET_BINDS_LOCAL_P
24316 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
24317 #endif
24318
24319 #undef TARGET_ASM_OUTPUT_MI_THUNK
24320 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
24321 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
24322 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
24323
24324 #undef TARGET_ASM_FILE_START
24325 #define TARGET_ASM_FILE_START x86_file_start
24326
24327 #undef TARGET_OPTION_OVERRIDE
24328 #define TARGET_OPTION_OVERRIDE ix86_option_override
24329
24330 #undef TARGET_REGISTER_MOVE_COST
24331 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
24332 #undef TARGET_MEMORY_MOVE_COST
24333 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
24334 #undef TARGET_RTX_COSTS
24335 #define TARGET_RTX_COSTS ix86_rtx_costs
24336 #undef TARGET_ADDRESS_COST
24337 #define TARGET_ADDRESS_COST ix86_address_cost
24338
24339 #undef TARGET_OVERLAP_OP_BY_PIECES_P
24340 #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
24341
24342 #undef TARGET_FLAGS_REGNUM
24343 #define TARGET_FLAGS_REGNUM FLAGS_REG
24344 #undef TARGET_FIXED_CONDITION_CODE_REGS
24345 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
24346 #undef TARGET_CC_MODES_COMPATIBLE
24347 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
24348
24349 #undef TARGET_MACHINE_DEPENDENT_REORG
24350 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
24351
24352 #undef TARGET_BUILD_BUILTIN_VA_LIST
24353 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
24354
24355 #undef TARGET_FOLD_BUILTIN
24356 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
24357
24358 #undef TARGET_GIMPLE_FOLD_BUILTIN
24359 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
24360
24361 #undef TARGET_COMPARE_VERSION_PRIORITY
24362 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
24363
24364 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
24365 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
24366 ix86_generate_version_dispatcher_body
24367
24368 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
24369 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
24370 ix86_get_function_versions_dispatcher
24371
24372 #undef TARGET_ENUM_VA_LIST_P
24373 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
24374
24375 #undef TARGET_FN_ABI_VA_LIST
24376 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
24377
24378 #undef TARGET_CANONICAL_VA_LIST_TYPE
24379 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
24380
24381 #undef TARGET_EXPAND_BUILTIN_VA_START
24382 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
24383
24384 #undef TARGET_MD_ASM_ADJUST
24385 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
24386
24387 #undef TARGET_C_EXCESS_PRECISION
24388 #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
24389 #undef TARGET_PROMOTE_PROTOTYPES
24390 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
24391 #undef TARGET_PUSH_ARGUMENT
24392 #define TARGET_PUSH_ARGUMENT ix86_push_argument
24393 #undef TARGET_SETUP_INCOMING_VARARGS
24394 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
24395 #undef TARGET_MUST_PASS_IN_STACK
24396 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
24397 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
24398 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
24399 #undef TARGET_FUNCTION_ARG_ADVANCE
24400 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
24401 #undef TARGET_FUNCTION_ARG
24402 #define TARGET_FUNCTION_ARG ix86_function_arg
24403 #undef TARGET_INIT_PIC_REG
24404 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
24405 #undef TARGET_USE_PSEUDO_PIC_REG
24406 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
24407 #undef TARGET_FUNCTION_ARG_BOUNDARY
24408 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
24409 #undef TARGET_PASS_BY_REFERENCE
24410 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
24411 #undef TARGET_INTERNAL_ARG_POINTER
24412 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
24413 #undef TARGET_UPDATE_STACK_BOUNDARY
24414 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
24415 #undef TARGET_GET_DRAP_RTX
24416 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
24417 #undef TARGET_STRICT_ARGUMENT_NAMING
24418 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
24419 #undef TARGET_STATIC_CHAIN
24420 #define TARGET_STATIC_CHAIN ix86_static_chain
24421 #undef TARGET_TRAMPOLINE_INIT
24422 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
24423 #undef TARGET_RETURN_POPS_ARGS
24424 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
24425
24426 #undef TARGET_WARN_FUNC_RETURN
24427 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
24428
24429 #undef TARGET_LEGITIMATE_COMBINED_INSN
24430 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
24431
24432 #undef TARGET_ASAN_SHADOW_OFFSET
24433 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
24434
24435 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
24436 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
24437
24438 #undef TARGET_SCALAR_MODE_SUPPORTED_P
24439 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
24440
24441 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
24442 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
24443 ix86_libgcc_floating_mode_supported_p
24444
24445 #undef TARGET_VECTOR_MODE_SUPPORTED_P
24446 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
24447
24448 #undef TARGET_C_MODE_FOR_SUFFIX
24449 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
24450
24451 #ifdef HAVE_AS_TLS
24452 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
24453 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
24454 #endif
24455
24456 #ifdef SUBTARGET_INSERT_ATTRIBUTES
24457 #undef TARGET_INSERT_ATTRIBUTES
24458 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
24459 #endif
24460
24461 #undef TARGET_MANGLE_TYPE
24462 #define TARGET_MANGLE_TYPE ix86_mangle_type
24463
24464 #undef TARGET_STACK_PROTECT_GUARD
24465 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
24466
24467 #if !TARGET_MACHO
24468 #undef TARGET_STACK_PROTECT_FAIL
24469 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
24470 #endif
24471
24472 #undef TARGET_FUNCTION_VALUE
24473 #define TARGET_FUNCTION_VALUE ix86_function_value
24474
24475 #undef TARGET_FUNCTION_VALUE_REGNO_P
24476 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
24477
24478 #undef TARGET_ZERO_CALL_USED_REGS
24479 #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
24480
24481 #undef TARGET_PROMOTE_FUNCTION_MODE
24482 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
24483
24484 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
24485 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
24486
24487 #undef TARGET_MEMBER_TYPE_FORCES_BLK
24488 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
24489
24490 #undef TARGET_INSTANTIATE_DECLS
24491 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
24492
24493 #undef TARGET_SECONDARY_RELOAD
24494 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
24495 #undef TARGET_SECONDARY_MEMORY_NEEDED
24496 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
24497 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
24498 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
24499
24500 #undef TARGET_CLASS_MAX_NREGS
24501 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
24502
24503 #undef TARGET_PREFERRED_RELOAD_CLASS
24504 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
24505 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
24506 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
24507 #undef TARGET_CLASS_LIKELY_SPILLED_P
24508 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
24509
24510 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
24511 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
24512 ix86_builtin_vectorization_cost
24513 #undef TARGET_VECTORIZE_VEC_PERM_CONST
24514 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
24515 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
24516 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
24517 ix86_preferred_simd_mode
24518 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
24519 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
24520 ix86_split_reduction
24521 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
24522 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
24523 ix86_autovectorize_vector_modes
24524 #undef TARGET_VECTORIZE_GET_MASK_MODE
24525 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
24526 #undef TARGET_VECTORIZE_CREATE_COSTS
24527 #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
24528
24529 #undef TARGET_SET_CURRENT_FUNCTION
24530 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
24531
24532 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
24533 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
24534
24535 #undef TARGET_OPTION_SAVE
24536 #define TARGET_OPTION_SAVE ix86_function_specific_save
24537
24538 #undef TARGET_OPTION_RESTORE
24539 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
24540
24541 #undef TARGET_OPTION_POST_STREAM_IN
24542 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
24543
24544 #undef TARGET_OPTION_PRINT
24545 #define TARGET_OPTION_PRINT ix86_function_specific_print
24546
24547 #undef TARGET_OPTION_FUNCTION_VERSIONS
24548 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
24549
24550 #undef TARGET_CAN_INLINE_P
24551 #define TARGET_CAN_INLINE_P ix86_can_inline_p
24552
24553 #undef TARGET_LEGITIMATE_ADDRESS_P
24554 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
24555
24556 #undef TARGET_REGISTER_PRIORITY
24557 #define TARGET_REGISTER_PRIORITY ix86_register_priority
24558
24559 #undef TARGET_REGISTER_USAGE_LEVELING_P
24560 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
24561
24562 #undef TARGET_LEGITIMATE_CONSTANT_P
24563 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
24564
24565 #undef TARGET_COMPUTE_FRAME_LAYOUT
24566 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
24567
24568 #undef TARGET_FRAME_POINTER_REQUIRED
24569 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
24570
24571 #undef TARGET_CAN_ELIMINATE
24572 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
24573
24574 #undef TARGET_EXTRA_LIVE_ON_ENTRY
24575 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
24576
24577 #undef TARGET_ASM_CODE_END
24578 #define TARGET_ASM_CODE_END ix86_code_end
24579
24580 #undef TARGET_CONDITIONAL_REGISTER_USAGE
24581 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
24582
24583 #undef TARGET_CANONICALIZE_COMPARISON
24584 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
24585
24586 #undef TARGET_LOOP_UNROLL_ADJUST
24587 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
24588
24589 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
24590 #undef TARGET_SPILL_CLASS
24591 #define TARGET_SPILL_CLASS ix86_spill_class
24592
24593 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
24594 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
24595 ix86_simd_clone_compute_vecsize_and_simdlen
24596
24597 #undef TARGET_SIMD_CLONE_ADJUST
24598 #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
24599
24600 #undef TARGET_SIMD_CLONE_USABLE
24601 #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
24602
24603 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
24604 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
24605
24606 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
24607 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
24608 ix86_float_exceptions_rounding_supported_p
24609
24610 #undef TARGET_MODE_EMIT
24611 #define TARGET_MODE_EMIT ix86_emit_mode_set
24612
24613 #undef TARGET_MODE_NEEDED
24614 #define TARGET_MODE_NEEDED ix86_mode_needed
24615
24616 #undef TARGET_MODE_AFTER
24617 #define TARGET_MODE_AFTER ix86_mode_after
24618
24619 #undef TARGET_MODE_ENTRY
24620 #define TARGET_MODE_ENTRY ix86_mode_entry
24621
24622 #undef TARGET_MODE_EXIT
24623 #define TARGET_MODE_EXIT ix86_mode_exit
24624
24625 #undef TARGET_MODE_PRIORITY
24626 #define TARGET_MODE_PRIORITY ix86_mode_priority
24627
24628 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
24629 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
24630
24631 #undef TARGET_OFFLOAD_OPTIONS
24632 #define TARGET_OFFLOAD_OPTIONS \
24633 ix86_offload_options
24634
24635 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
24636 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
24637
24638 #undef TARGET_OPTAB_SUPPORTED_P
24639 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
24640
24641 #undef TARGET_HARD_REGNO_SCRATCH_OK
24642 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
24643
24644 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
24645 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
24646
24647 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
24648 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
24649
24650 #undef TARGET_INIT_LIBFUNCS
24651 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
24652
24653 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
24654 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
24655
24656 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
24657 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
24658
24659 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
24660 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
24661
24662 #undef TARGET_HARD_REGNO_NREGS
24663 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
24664 #undef TARGET_HARD_REGNO_MODE_OK
24665 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
24666
24667 #undef TARGET_MODES_TIEABLE_P
24668 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
24669
24670 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
24671 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
24672 ix86_hard_regno_call_part_clobbered
24673
24674 #undef TARGET_INSN_CALLEE_ABI
24675 #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
24676
24677 #undef TARGET_CAN_CHANGE_MODE_CLASS
24678 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
24679
24680 #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
24681 #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
24682
24683 #undef TARGET_STATIC_RTX_ALIGNMENT
24684 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
24685 #undef TARGET_CONSTANT_ALIGNMENT
24686 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
24687
24688 #undef TARGET_EMPTY_RECORD_P
24689 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
24690
24691 #undef TARGET_WARN_PARAMETER_PASSING_ABI
24692 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
24693
24694 #undef TARGET_GET_MULTILIB_ABI_NAME
24695 #define TARGET_GET_MULTILIB_ABI_NAME \
24696 ix86_get_multilib_abi_name
24697
24698 #undef TARGET_IFUNC_REF_LOCAL_OK
24699 #define TARGET_IFUNC_REF_LOCAL_OK hook_bool_void_true
24700
24701 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
24702 # undef TARGET_ASM_RELOC_RW_MASK
24703 # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
24704 #endif
24705
24706 static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
24707 {
24708 #ifdef OPTION_GLIBC
24709 if (OPTION_GLIBC)
24710 return (built_in_function)fcode == BUILT_IN_MEMPCPY;
24711 else
24712 return false;
24713 #else
24714 return false;
24715 #endif
24716 }
24717
24718 #undef TARGET_LIBC_HAS_FAST_FUNCTION
24719 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
24720
24721 #undef TARGET_GEN_MEMSET_SCRATCH_RTX
24722 #define TARGET_GEN_MEMSET_SCRATCH_RTX ix86_gen_scratch_sse_rtx
24723
24724 #if CHECKING_P
24725 #undef TARGET_RUN_TARGET_SELFTESTS
24726 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
24727 #endif /* #if CHECKING_P */
24728
24729 struct gcc_target targetm = TARGET_INITIALIZER;
24730 \f
24731 #include "gt-i386.h"
This page took 1.130682 seconds and 5 git commands to generate.