1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2022 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
26 #include "coretypes.h"
36 #include "stringpool.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
47 #include "fold-const.h"
49 #include "stor-layout.h"
51 #include "print-tree.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
59 #include "sched-int.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
71 #include "tree-vector-builder.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
76 #include "ipa-fnsummary.h"
79 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
81 #include "case-cfn-macros.h"
83 #include "rs6000-internal.h"
86 /* This file should be included last. */
87 #include "target-def.h"
89 extern tree
rs6000_builtin_mask_for_load (void);
90 extern tree
rs6000_builtin_md_vectorized_function (tree
, tree
, tree
);
91 extern tree
rs6000_builtin_reciprocal (tree
);
93 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
94 systems will also set long double to be IEEE 128-bit. AIX and Darwin
95 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
96 those systems will not pick up this default. This needs to be after all
97 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
99 #ifndef TARGET_IEEEQUAD_DEFAULT
100 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
101 #define TARGET_IEEEQUAD_DEFAULT 1
103 #define TARGET_IEEEQUAD_DEFAULT 0
107 /* Don't enable PC-relative addressing if the target does not support it. */
108 #ifndef PCREL_SUPPORTED_BY_OS
109 #define PCREL_SUPPORTED_BY_OS 0
113 /* Counter for labels which are to be placed in .fixup. */
114 int fixuplabelno
= 0;
117 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
120 /* Specify the machine mode that pointers have. After generation of rtl, the
121 compiler makes no further distinction between pointers and any other objects
122 of this machine mode. */
123 scalar_int_mode rs6000_pmode
;
125 /* Track use of r13 in 64bit AIX TLS. */
126 static bool xcoff_tls_exec_model_detected
= false;
128 /* Width in bits of a pointer. */
129 unsigned rs6000_pointer_size
;
131 #ifdef HAVE_AS_GNU_ATTRIBUTE
132 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
133 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
135 /* Flag whether floating point values have been passed/returned.
136 Note that this doesn't say whether fprs are used, since the
137 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
138 should be set for soft-float values passed in gprs and ieee128
139 values passed in vsx registers. */
140 bool rs6000_passes_float
= false;
141 bool rs6000_passes_long_double
= false;
142 /* Flag whether vector values have been passed/returned. */
143 bool rs6000_passes_vector
= false;
144 /* Flag whether small (<= 8 byte) structures have been returned. */
145 bool rs6000_returns_struct
= false;
148 /* Value is TRUE if register/mode pair is acceptable. */
149 static bool rs6000_hard_regno_mode_ok_p
150 [NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
152 /* Maximum number of registers needed for a given register class and mode. */
153 unsigned char rs6000_class_max_nregs
[NUM_MACHINE_MODES
][LIM_REG_CLASSES
];
155 /* How many registers are needed for a given register and mode. */
156 unsigned char rs6000_hard_regno_nregs
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
158 /* Map register number to register class. */
159 enum reg_class rs6000_regno_regclass
[FIRST_PSEUDO_REGISTER
];
161 static int dbg_cost_ctrl
;
163 /* Flag to say the TOC is initialized */
164 int toc_initialized
, need_toc_init
;
165 char toc_label_name
[10];
167 /* Cached value of rs6000_variable_issue. This is cached in
168 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
169 static short cached_can_issue_more
;
171 static GTY(()) section
*read_only_data_section
;
172 static GTY(()) section
*private_data_section
;
173 static GTY(()) section
*tls_data_section
;
174 static GTY(()) section
*tls_private_data_section
;
175 static GTY(()) section
*read_only_private_data_section
;
176 static GTY(()) section
*sdata2_section
;
178 section
*toc_section
= 0;
180 /* Describe the vector unit used for modes. */
181 enum rs6000_vector rs6000_vector_unit
[NUM_MACHINE_MODES
];
182 enum rs6000_vector rs6000_vector_mem
[NUM_MACHINE_MODES
];
184 /* Register classes for various constraints that are based on the target
186 enum reg_class rs6000_constraints
[RS6000_CONSTRAINT_MAX
];
188 /* Describe the alignment of a vector. */
189 int rs6000_vector_align
[NUM_MACHINE_MODES
];
191 /* What modes to automatically generate reciprocal divide estimate (fre) and
192 reciprocal sqrt (frsqrte) for. */
193 unsigned char rs6000_recip_bits
[MAX_MACHINE_MODE
];
195 /* Masks to determine which reciprocal esitmate instructions to generate
197 enum rs6000_recip_mask
{
198 RECIP_SF_DIV
= 0x001, /* Use divide estimate */
199 RECIP_DF_DIV
= 0x002,
200 RECIP_V4SF_DIV
= 0x004,
201 RECIP_V2DF_DIV
= 0x008,
203 RECIP_SF_RSQRT
= 0x010, /* Use reciprocal sqrt estimate. */
204 RECIP_DF_RSQRT
= 0x020,
205 RECIP_V4SF_RSQRT
= 0x040,
206 RECIP_V2DF_RSQRT
= 0x080,
208 /* Various combination of flags for -mrecip=xxx. */
210 RECIP_ALL
= (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
211 | RECIP_V2DF_DIV
| RECIP_SF_RSQRT
| RECIP_DF_RSQRT
212 | RECIP_V4SF_RSQRT
| RECIP_V2DF_RSQRT
),
214 RECIP_HIGH_PRECISION
= RECIP_ALL
,
216 /* On low precision machines like the power5, don't enable double precision
217 reciprocal square root estimate, since it isn't accurate enough. */
218 RECIP_LOW_PRECISION
= (RECIP_ALL
& ~(RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
))
221 /* -mrecip options. */
224 const char *string
; /* option name */
225 unsigned int mask
; /* mask bits to set */
226 } recip_options
[] = {
227 { "all", RECIP_ALL
},
228 { "none", RECIP_NONE
},
229 { "div", (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
231 { "divf", (RECIP_SF_DIV
| RECIP_V4SF_DIV
) },
232 { "divd", (RECIP_DF_DIV
| RECIP_V2DF_DIV
) },
233 { "rsqrt", (RECIP_SF_RSQRT
| RECIP_DF_RSQRT
| RECIP_V4SF_RSQRT
234 | RECIP_V2DF_RSQRT
) },
235 { "rsqrtf", (RECIP_SF_RSQRT
| RECIP_V4SF_RSQRT
) },
236 { "rsqrtd", (RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
) },
239 /* On PowerPC, we have a limited number of target clones that we care about
240 which means we can use an array to hold the options, rather than having more
241 elaborate data structures to identify each possible variation. Order the
242 clones from the default to the highest ISA. */
244 CLONE_DEFAULT
= 0, /* default clone. */
245 CLONE_ISA_2_05
, /* ISA 2.05 (power6). */
246 CLONE_ISA_2_06
, /* ISA 2.06 (power7). */
247 CLONE_ISA_2_07
, /* ISA 2.07 (power8). */
248 CLONE_ISA_3_00
, /* ISA 3.0 (power9). */
249 CLONE_ISA_3_1
, /* ISA 3.1 (power10). */
253 /* Map compiler ISA bits into HWCAP names. */
255 HOST_WIDE_INT isa_mask
; /* rs6000_isa mask */
256 const char *name
; /* name to use in __builtin_cpu_supports. */
259 static const struct clone_map rs6000_clone_map
[CLONE_MAX
] = {
260 { 0, "" }, /* Default options. */
261 { OPTION_MASK_CMPB
, "arch_2_05" }, /* ISA 2.05 (power6). */
262 { OPTION_MASK_POPCNTD
, "arch_2_06" }, /* ISA 2.06 (power7). */
263 { OPTION_MASK_P8_VECTOR
, "arch_2_07" }, /* ISA 2.07 (power8). */
264 { OPTION_MASK_P9_VECTOR
, "arch_3_00" }, /* ISA 3.0 (power9). */
265 { OPTION_MASK_POWER10
, "arch_3_1" }, /* ISA 3.1 (power10). */
269 /* Newer LIBCs explicitly export this symbol to declare that they provide
270 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
271 reference to this symbol whenever we expand a CPU builtin, so that
272 we never link against an old LIBC. */
273 const char *tcb_verification_symbol
= "__parse_hwcap_and_convert_at_platform";
275 /* True if we have expanded a CPU builtin. */
276 bool cpu_builtin_p
= false;
278 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
279 macros that have changed. Languages that don't support the preprocessor
280 don't link in rs6000-c.cc, so we can't call it directly. */
281 void (*rs6000_target_modify_macros_ptr
) (bool, HOST_WIDE_INT
, HOST_WIDE_INT
);
283 /* Simplfy register classes into simpler classifications. We assume
284 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
285 check for standard register classes (gpr/floating/altivec/vsx) and
286 floating/vector classes (float/altivec/vsx). */
288 enum rs6000_reg_type
{
299 /* Map register class to register type. */
300 static enum rs6000_reg_type reg_class_to_reg_type
[N_REG_CLASSES
];
302 /* First/last register type for the 'normal' register types (i.e. general
303 purpose, floating point, altivec, and VSX registers). */
304 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
306 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
309 /* Register classes we care about in secondary reload or go if legitimate
310 address. We only need to worry about GPR, FPR, and Altivec registers here,
311 along an ANY field that is the OR of the 3 register classes. */
313 enum rs6000_reload_reg_type
{
314 RELOAD_REG_GPR
, /* General purpose registers. */
315 RELOAD_REG_FPR
, /* Traditional floating point regs. */
316 RELOAD_REG_VMX
, /* Altivec (VMX) registers. */
317 RELOAD_REG_ANY
, /* OR of GPR, FPR, Altivec masks. */
321 /* For setting up register classes, loop through the 3 register classes mapping
322 into real registers, and skip the ANY class, which is just an OR of the
324 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
325 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
327 /* Map reload register type to a register in the register class. */
328 struct reload_reg_map_type
{
329 const char *name
; /* Register class name. */
330 int reg
; /* Register in the register class. */
333 static const struct reload_reg_map_type reload_reg_map
[N_RELOAD_REG
] = {
334 { "Gpr", FIRST_GPR_REGNO
}, /* RELOAD_REG_GPR. */
335 { "Fpr", FIRST_FPR_REGNO
}, /* RELOAD_REG_FPR. */
336 { "VMX", FIRST_ALTIVEC_REGNO
}, /* RELOAD_REG_VMX. */
337 { "Any", -1 }, /* RELOAD_REG_ANY. */
340 /* Mask bits for each register class, indexed per mode. Historically the
341 compiler has been more restrictive which types can do PRE_MODIFY instead of
342 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
343 typedef unsigned char addr_mask_type
;
345 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
346 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
347 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
348 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
349 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
350 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
351 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
352 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
354 /* Register type masks based on the type, of valid addressing modes. */
355 struct rs6000_reg_addr
{
356 enum insn_code reload_load
; /* INSN to reload for loading. */
357 enum insn_code reload_store
; /* INSN to reload for storing. */
358 enum insn_code reload_fpr_gpr
; /* INSN to move from FPR to GPR. */
359 enum insn_code reload_gpr_vsx
; /* INSN to move from GPR to VSX. */
360 enum insn_code reload_vsx_gpr
; /* INSN to move from VSX to GPR. */
361 addr_mask_type addr_mask
[(int)N_RELOAD_REG
]; /* Valid address masks. */
362 bool scalar_in_vmx_p
; /* Scalar value can go in VMX. */
365 static struct rs6000_reg_addr reg_addr
[NUM_MACHINE_MODES
];
367 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
369 mode_supports_pre_incdec_p (machine_mode mode
)
371 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_INCDEC
)
375 /* Helper function to say whether a mode supports PRE_MODIFY. */
377 mode_supports_pre_modify_p (machine_mode mode
)
379 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_MODIFY
)
383 /* Return true if we have D-form addressing in altivec registers. */
385 mode_supports_vmx_dform (machine_mode mode
)
387 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_OFFSET
) != 0);
390 /* Return true if we have D-form addressing in VSX registers. This addressing
391 is more limited than normal d-form addressing in that the offset must be
392 aligned on a 16-byte boundary. */
394 mode_supports_dq_form (machine_mode mode
)
396 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_QUAD_OFFSET
)
400 /* Given that there exists at least one variable that is set (produced)
401 by OUT_INSN and read (consumed) by IN_INSN, return true iff
402 IN_INSN represents one or more memory store operations and none of
403 the variables set by OUT_INSN is used by IN_INSN as the address of a
404 store operation. If either IN_INSN or OUT_INSN does not represent
405 a "single" RTL SET expression (as loosely defined by the
406 implementation of the single_set function) or a PARALLEL with only
407 SETs, CLOBBERs, and USEs inside, this function returns false.
409 This rs6000-specific version of store_data_bypass_p checks for
410 certain conditions that result in assertion failures (and internal
411 compiler errors) in the generic store_data_bypass_p function and
412 returns false rather than calling store_data_bypass_p if one of the
413 problematic conditions is detected. */
416 rs6000_store_data_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
423 in_set
= single_set (in_insn
);
426 if (MEM_P (SET_DEST (in_set
)))
428 out_set
= single_set (out_insn
);
431 out_pat
= PATTERN (out_insn
);
432 if (GET_CODE (out_pat
) == PARALLEL
)
434 for (i
= 0; i
< XVECLEN (out_pat
, 0); i
++)
436 out_exp
= XVECEXP (out_pat
, 0, i
);
437 if ((GET_CODE (out_exp
) == CLOBBER
)
438 || (GET_CODE (out_exp
) == USE
))
440 else if (GET_CODE (out_exp
) != SET
)
449 in_pat
= PATTERN (in_insn
);
450 if (GET_CODE (in_pat
) != PARALLEL
)
453 for (i
= 0; i
< XVECLEN (in_pat
, 0); i
++)
455 in_exp
= XVECEXP (in_pat
, 0, i
);
456 if ((GET_CODE (in_exp
) == CLOBBER
) || (GET_CODE (in_exp
) == USE
))
458 else if (GET_CODE (in_exp
) != SET
)
461 if (MEM_P (SET_DEST (in_exp
)))
463 out_set
= single_set (out_insn
);
466 out_pat
= PATTERN (out_insn
);
467 if (GET_CODE (out_pat
) != PARALLEL
)
469 for (j
= 0; j
< XVECLEN (out_pat
, 0); j
++)
471 out_exp
= XVECEXP (out_pat
, 0, j
);
472 if ((GET_CODE (out_exp
) == CLOBBER
)
473 || (GET_CODE (out_exp
) == USE
))
475 else if (GET_CODE (out_exp
) != SET
)
482 return store_data_bypass_p (out_insn
, in_insn
);
486 /* Processor costs (relative to an add) */
488 const struct processor_costs
*rs6000_cost
;
490 /* Instruction size costs on 32bit processors. */
492 struct processor_costs size32_cost
= {
493 COSTS_N_INSNS (1), /* mulsi */
494 COSTS_N_INSNS (1), /* mulsi_const */
495 COSTS_N_INSNS (1), /* mulsi_const9 */
496 COSTS_N_INSNS (1), /* muldi */
497 COSTS_N_INSNS (1), /* divsi */
498 COSTS_N_INSNS (1), /* divdi */
499 COSTS_N_INSNS (1), /* fp */
500 COSTS_N_INSNS (1), /* dmul */
501 COSTS_N_INSNS (1), /* sdiv */
502 COSTS_N_INSNS (1), /* ddiv */
503 32, /* cache line size */
507 0, /* SF->DF convert */
510 /* Instruction size costs on 64bit processors. */
512 struct processor_costs size64_cost
= {
513 COSTS_N_INSNS (1), /* mulsi */
514 COSTS_N_INSNS (1), /* mulsi_const */
515 COSTS_N_INSNS (1), /* mulsi_const9 */
516 COSTS_N_INSNS (1), /* muldi */
517 COSTS_N_INSNS (1), /* divsi */
518 COSTS_N_INSNS (1), /* divdi */
519 COSTS_N_INSNS (1), /* fp */
520 COSTS_N_INSNS (1), /* dmul */
521 COSTS_N_INSNS (1), /* sdiv */
522 COSTS_N_INSNS (1), /* ddiv */
523 128, /* cache line size */
527 0, /* SF->DF convert */
530 /* Instruction costs on RS64A processors. */
532 struct processor_costs rs64a_cost
= {
533 COSTS_N_INSNS (20), /* mulsi */
534 COSTS_N_INSNS (12), /* mulsi_const */
535 COSTS_N_INSNS (8), /* mulsi_const9 */
536 COSTS_N_INSNS (34), /* muldi */
537 COSTS_N_INSNS (65), /* divsi */
538 COSTS_N_INSNS (67), /* divdi */
539 COSTS_N_INSNS (4), /* fp */
540 COSTS_N_INSNS (4), /* dmul */
541 COSTS_N_INSNS (31), /* sdiv */
542 COSTS_N_INSNS (31), /* ddiv */
543 128, /* cache line size */
547 0, /* SF->DF convert */
550 /* Instruction costs on MPCCORE processors. */
552 struct processor_costs mpccore_cost
= {
553 COSTS_N_INSNS (2), /* mulsi */
554 COSTS_N_INSNS (2), /* mulsi_const */
555 COSTS_N_INSNS (2), /* mulsi_const9 */
556 COSTS_N_INSNS (2), /* muldi */
557 COSTS_N_INSNS (6), /* divsi */
558 COSTS_N_INSNS (6), /* divdi */
559 COSTS_N_INSNS (4), /* fp */
560 COSTS_N_INSNS (5), /* dmul */
561 COSTS_N_INSNS (10), /* sdiv */
562 COSTS_N_INSNS (17), /* ddiv */
563 32, /* cache line size */
567 0, /* SF->DF convert */
570 /* Instruction costs on PPC403 processors. */
572 struct processor_costs ppc403_cost
= {
573 COSTS_N_INSNS (4), /* mulsi */
574 COSTS_N_INSNS (4), /* mulsi_const */
575 COSTS_N_INSNS (4), /* mulsi_const9 */
576 COSTS_N_INSNS (4), /* muldi */
577 COSTS_N_INSNS (33), /* divsi */
578 COSTS_N_INSNS (33), /* divdi */
579 COSTS_N_INSNS (11), /* fp */
580 COSTS_N_INSNS (11), /* dmul */
581 COSTS_N_INSNS (11), /* sdiv */
582 COSTS_N_INSNS (11), /* ddiv */
583 32, /* cache line size */
587 0, /* SF->DF convert */
590 /* Instruction costs on PPC405 processors. */
592 struct processor_costs ppc405_cost
= {
593 COSTS_N_INSNS (5), /* mulsi */
594 COSTS_N_INSNS (4), /* mulsi_const */
595 COSTS_N_INSNS (3), /* mulsi_const9 */
596 COSTS_N_INSNS (5), /* muldi */
597 COSTS_N_INSNS (35), /* divsi */
598 COSTS_N_INSNS (35), /* divdi */
599 COSTS_N_INSNS (11), /* fp */
600 COSTS_N_INSNS (11), /* dmul */
601 COSTS_N_INSNS (11), /* sdiv */
602 COSTS_N_INSNS (11), /* ddiv */
603 32, /* cache line size */
607 0, /* SF->DF convert */
610 /* Instruction costs on PPC440 processors. */
612 struct processor_costs ppc440_cost
= {
613 COSTS_N_INSNS (3), /* mulsi */
614 COSTS_N_INSNS (2), /* mulsi_const */
615 COSTS_N_INSNS (2), /* mulsi_const9 */
616 COSTS_N_INSNS (3), /* muldi */
617 COSTS_N_INSNS (34), /* divsi */
618 COSTS_N_INSNS (34), /* divdi */
619 COSTS_N_INSNS (5), /* fp */
620 COSTS_N_INSNS (5), /* dmul */
621 COSTS_N_INSNS (19), /* sdiv */
622 COSTS_N_INSNS (33), /* ddiv */
623 32, /* cache line size */
627 0, /* SF->DF convert */
630 /* Instruction costs on PPC476 processors. */
632 struct processor_costs ppc476_cost
= {
633 COSTS_N_INSNS (4), /* mulsi */
634 COSTS_N_INSNS (4), /* mulsi_const */
635 COSTS_N_INSNS (4), /* mulsi_const9 */
636 COSTS_N_INSNS (4), /* muldi */
637 COSTS_N_INSNS (11), /* divsi */
638 COSTS_N_INSNS (11), /* divdi */
639 COSTS_N_INSNS (6), /* fp */
640 COSTS_N_INSNS (6), /* dmul */
641 COSTS_N_INSNS (19), /* sdiv */
642 COSTS_N_INSNS (33), /* ddiv */
643 32, /* l1 cache line size */
647 0, /* SF->DF convert */
650 /* Instruction costs on PPC601 processors. */
652 struct processor_costs ppc601_cost
= {
653 COSTS_N_INSNS (5), /* mulsi */
654 COSTS_N_INSNS (5), /* mulsi_const */
655 COSTS_N_INSNS (5), /* mulsi_const9 */
656 COSTS_N_INSNS (5), /* muldi */
657 COSTS_N_INSNS (36), /* divsi */
658 COSTS_N_INSNS (36), /* divdi */
659 COSTS_N_INSNS (4), /* fp */
660 COSTS_N_INSNS (5), /* dmul */
661 COSTS_N_INSNS (17), /* sdiv */
662 COSTS_N_INSNS (31), /* ddiv */
663 32, /* cache line size */
667 0, /* SF->DF convert */
670 /* Instruction costs on PPC603 processors. */
672 struct processor_costs ppc603_cost
= {
673 COSTS_N_INSNS (5), /* mulsi */
674 COSTS_N_INSNS (3), /* mulsi_const */
675 COSTS_N_INSNS (2), /* mulsi_const9 */
676 COSTS_N_INSNS (5), /* muldi */
677 COSTS_N_INSNS (37), /* divsi */
678 COSTS_N_INSNS (37), /* divdi */
679 COSTS_N_INSNS (3), /* fp */
680 COSTS_N_INSNS (4), /* dmul */
681 COSTS_N_INSNS (18), /* sdiv */
682 COSTS_N_INSNS (33), /* ddiv */
683 32, /* cache line size */
687 0, /* SF->DF convert */
690 /* Instruction costs on PPC604 processors. */
692 struct processor_costs ppc604_cost
= {
693 COSTS_N_INSNS (4), /* mulsi */
694 COSTS_N_INSNS (4), /* mulsi_const */
695 COSTS_N_INSNS (4), /* mulsi_const9 */
696 COSTS_N_INSNS (4), /* muldi */
697 COSTS_N_INSNS (20), /* divsi */
698 COSTS_N_INSNS (20), /* divdi */
699 COSTS_N_INSNS (3), /* fp */
700 COSTS_N_INSNS (3), /* dmul */
701 COSTS_N_INSNS (18), /* sdiv */
702 COSTS_N_INSNS (32), /* ddiv */
703 32, /* cache line size */
707 0, /* SF->DF convert */
710 /* Instruction costs on PPC604e processors. */
712 struct processor_costs ppc604e_cost
= {
713 COSTS_N_INSNS (2), /* mulsi */
714 COSTS_N_INSNS (2), /* mulsi_const */
715 COSTS_N_INSNS (2), /* mulsi_const9 */
716 COSTS_N_INSNS (2), /* muldi */
717 COSTS_N_INSNS (20), /* divsi */
718 COSTS_N_INSNS (20), /* divdi */
719 COSTS_N_INSNS (3), /* fp */
720 COSTS_N_INSNS (3), /* dmul */
721 COSTS_N_INSNS (18), /* sdiv */
722 COSTS_N_INSNS (32), /* ddiv */
723 32, /* cache line size */
727 0, /* SF->DF convert */
730 /* Instruction costs on PPC620 processors. */
732 struct processor_costs ppc620_cost
= {
733 COSTS_N_INSNS (5), /* mulsi */
734 COSTS_N_INSNS (4), /* mulsi_const */
735 COSTS_N_INSNS (3), /* mulsi_const9 */
736 COSTS_N_INSNS (7), /* muldi */
737 COSTS_N_INSNS (21), /* divsi */
738 COSTS_N_INSNS (37), /* divdi */
739 COSTS_N_INSNS (3), /* fp */
740 COSTS_N_INSNS (3), /* dmul */
741 COSTS_N_INSNS (18), /* sdiv */
742 COSTS_N_INSNS (32), /* ddiv */
743 128, /* cache line size */
747 0, /* SF->DF convert */
750 /* Instruction costs on PPC630 processors. */
752 struct processor_costs ppc630_cost
= {
753 COSTS_N_INSNS (5), /* mulsi */
754 COSTS_N_INSNS (4), /* mulsi_const */
755 COSTS_N_INSNS (3), /* mulsi_const9 */
756 COSTS_N_INSNS (7), /* muldi */
757 COSTS_N_INSNS (21), /* divsi */
758 COSTS_N_INSNS (37), /* divdi */
759 COSTS_N_INSNS (3), /* fp */
760 COSTS_N_INSNS (3), /* dmul */
761 COSTS_N_INSNS (17), /* sdiv */
762 COSTS_N_INSNS (21), /* ddiv */
763 128, /* cache line size */
767 0, /* SF->DF convert */
770 /* Instruction costs on Cell processor. */
771 /* COSTS_N_INSNS (1) ~ one add. */
773 struct processor_costs ppccell_cost
= {
774 COSTS_N_INSNS (9/2)+2, /* mulsi */
775 COSTS_N_INSNS (6/2), /* mulsi_const */
776 COSTS_N_INSNS (6/2), /* mulsi_const9 */
777 COSTS_N_INSNS (15/2)+2, /* muldi */
778 COSTS_N_INSNS (38/2), /* divsi */
779 COSTS_N_INSNS (70/2), /* divdi */
780 COSTS_N_INSNS (10/2), /* fp */
781 COSTS_N_INSNS (10/2), /* dmul */
782 COSTS_N_INSNS (74/2), /* sdiv */
783 COSTS_N_INSNS (74/2), /* ddiv */
784 128, /* cache line size */
788 0, /* SF->DF convert */
791 /* Instruction costs on PPC750 and PPC7400 processors. */
793 struct processor_costs ppc750_cost
= {
794 COSTS_N_INSNS (5), /* mulsi */
795 COSTS_N_INSNS (3), /* mulsi_const */
796 COSTS_N_INSNS (2), /* mulsi_const9 */
797 COSTS_N_INSNS (5), /* muldi */
798 COSTS_N_INSNS (17), /* divsi */
799 COSTS_N_INSNS (17), /* divdi */
800 COSTS_N_INSNS (3), /* fp */
801 COSTS_N_INSNS (3), /* dmul */
802 COSTS_N_INSNS (17), /* sdiv */
803 COSTS_N_INSNS (31), /* ddiv */
804 32, /* cache line size */
808 0, /* SF->DF convert */
811 /* Instruction costs on PPC7450 processors. */
813 struct processor_costs ppc7450_cost
= {
814 COSTS_N_INSNS (4), /* mulsi */
815 COSTS_N_INSNS (3), /* mulsi_const */
816 COSTS_N_INSNS (3), /* mulsi_const9 */
817 COSTS_N_INSNS (4), /* muldi */
818 COSTS_N_INSNS (23), /* divsi */
819 COSTS_N_INSNS (23), /* divdi */
820 COSTS_N_INSNS (5), /* fp */
821 COSTS_N_INSNS (5), /* dmul */
822 COSTS_N_INSNS (21), /* sdiv */
823 COSTS_N_INSNS (35), /* ddiv */
824 32, /* cache line size */
828 0, /* SF->DF convert */
831 /* Instruction costs on PPC8540 processors. */
833 struct processor_costs ppc8540_cost
= {
834 COSTS_N_INSNS (4), /* mulsi */
835 COSTS_N_INSNS (4), /* mulsi_const */
836 COSTS_N_INSNS (4), /* mulsi_const9 */
837 COSTS_N_INSNS (4), /* muldi */
838 COSTS_N_INSNS (19), /* divsi */
839 COSTS_N_INSNS (19), /* divdi */
840 COSTS_N_INSNS (4), /* fp */
841 COSTS_N_INSNS (4), /* dmul */
842 COSTS_N_INSNS (29), /* sdiv */
843 COSTS_N_INSNS (29), /* ddiv */
844 32, /* cache line size */
847 1, /* prefetch streams /*/
848 0, /* SF->DF convert */
851 /* Instruction costs on E300C2 and E300C3 cores. */
853 struct processor_costs ppce300c2c3_cost
= {
854 COSTS_N_INSNS (4), /* mulsi */
855 COSTS_N_INSNS (4), /* mulsi_const */
856 COSTS_N_INSNS (4), /* mulsi_const9 */
857 COSTS_N_INSNS (4), /* muldi */
858 COSTS_N_INSNS (19), /* divsi */
859 COSTS_N_INSNS (19), /* divdi */
860 COSTS_N_INSNS (3), /* fp */
861 COSTS_N_INSNS (4), /* dmul */
862 COSTS_N_INSNS (18), /* sdiv */
863 COSTS_N_INSNS (33), /* ddiv */
867 1, /* prefetch streams /*/
868 0, /* SF->DF convert */
871 /* Instruction costs on PPCE500MC processors. */
873 struct processor_costs ppce500mc_cost
= {
874 COSTS_N_INSNS (4), /* mulsi */
875 COSTS_N_INSNS (4), /* mulsi_const */
876 COSTS_N_INSNS (4), /* mulsi_const9 */
877 COSTS_N_INSNS (4), /* muldi */
878 COSTS_N_INSNS (14), /* divsi */
879 COSTS_N_INSNS (14), /* divdi */
880 COSTS_N_INSNS (8), /* fp */
881 COSTS_N_INSNS (10), /* dmul */
882 COSTS_N_INSNS (36), /* sdiv */
883 COSTS_N_INSNS (66), /* ddiv */
884 64, /* cache line size */
887 1, /* prefetch streams /*/
888 0, /* SF->DF convert */
891 /* Instruction costs on PPCE500MC64 processors. */
893 struct processor_costs ppce500mc64_cost
= {
894 COSTS_N_INSNS (4), /* mulsi */
895 COSTS_N_INSNS (4), /* mulsi_const */
896 COSTS_N_INSNS (4), /* mulsi_const9 */
897 COSTS_N_INSNS (4), /* muldi */
898 COSTS_N_INSNS (14), /* divsi */
899 COSTS_N_INSNS (14), /* divdi */
900 COSTS_N_INSNS (4), /* fp */
901 COSTS_N_INSNS (10), /* dmul */
902 COSTS_N_INSNS (36), /* sdiv */
903 COSTS_N_INSNS (66), /* ddiv */
904 64, /* cache line size */
907 1, /* prefetch streams /*/
908 0, /* SF->DF convert */
911 /* Instruction costs on PPCE5500 processors. */
913 struct processor_costs ppce5500_cost
= {
914 COSTS_N_INSNS (5), /* mulsi */
915 COSTS_N_INSNS (5), /* mulsi_const */
916 COSTS_N_INSNS (4), /* mulsi_const9 */
917 COSTS_N_INSNS (5), /* muldi */
918 COSTS_N_INSNS (14), /* divsi */
919 COSTS_N_INSNS (14), /* divdi */
920 COSTS_N_INSNS (7), /* fp */
921 COSTS_N_INSNS (10), /* dmul */
922 COSTS_N_INSNS (36), /* sdiv */
923 COSTS_N_INSNS (66), /* ddiv */
924 64, /* cache line size */
927 1, /* prefetch streams /*/
928 0, /* SF->DF convert */
931 /* Instruction costs on PPCE6500 processors. */
933 struct processor_costs ppce6500_cost
= {
934 COSTS_N_INSNS (5), /* mulsi */
935 COSTS_N_INSNS (5), /* mulsi_const */
936 COSTS_N_INSNS (4), /* mulsi_const9 */
937 COSTS_N_INSNS (5), /* muldi */
938 COSTS_N_INSNS (14), /* divsi */
939 COSTS_N_INSNS (14), /* divdi */
940 COSTS_N_INSNS (7), /* fp */
941 COSTS_N_INSNS (10), /* dmul */
942 COSTS_N_INSNS (36), /* sdiv */
943 COSTS_N_INSNS (66), /* ddiv */
944 64, /* cache line size */
947 1, /* prefetch streams /*/
948 0, /* SF->DF convert */
951 /* Instruction costs on AppliedMicro Titan processors. */
953 struct processor_costs titan_cost
= {
954 COSTS_N_INSNS (5), /* mulsi */
955 COSTS_N_INSNS (5), /* mulsi_const */
956 COSTS_N_INSNS (5), /* mulsi_const9 */
957 COSTS_N_INSNS (5), /* muldi */
958 COSTS_N_INSNS (18), /* divsi */
959 COSTS_N_INSNS (18), /* divdi */
960 COSTS_N_INSNS (10), /* fp */
961 COSTS_N_INSNS (10), /* dmul */
962 COSTS_N_INSNS (46), /* sdiv */
963 COSTS_N_INSNS (72), /* ddiv */
964 32, /* cache line size */
967 1, /* prefetch streams /*/
968 0, /* SF->DF convert */
971 /* Instruction costs on POWER4 and POWER5 processors. */
973 struct processor_costs power4_cost
= {
974 COSTS_N_INSNS (3), /* mulsi */
975 COSTS_N_INSNS (2), /* mulsi_const */
976 COSTS_N_INSNS (2), /* mulsi_const9 */
977 COSTS_N_INSNS (4), /* muldi */
978 COSTS_N_INSNS (18), /* divsi */
979 COSTS_N_INSNS (34), /* divdi */
980 COSTS_N_INSNS (3), /* fp */
981 COSTS_N_INSNS (3), /* dmul */
982 COSTS_N_INSNS (17), /* sdiv */
983 COSTS_N_INSNS (17), /* ddiv */
984 128, /* cache line size */
987 8, /* prefetch streams /*/
988 0, /* SF->DF convert */
991 /* Instruction costs on POWER6 processors. */
993 struct processor_costs power6_cost
= {
994 COSTS_N_INSNS (8), /* mulsi */
995 COSTS_N_INSNS (8), /* mulsi_const */
996 COSTS_N_INSNS (8), /* mulsi_const9 */
997 COSTS_N_INSNS (8), /* muldi */
998 COSTS_N_INSNS (22), /* divsi */
999 COSTS_N_INSNS (28), /* divdi */
1000 COSTS_N_INSNS (3), /* fp */
1001 COSTS_N_INSNS (3), /* dmul */
1002 COSTS_N_INSNS (13), /* sdiv */
1003 COSTS_N_INSNS (16), /* ddiv */
1004 128, /* cache line size */
1006 2048, /* l2 cache */
1007 16, /* prefetch streams */
1008 0, /* SF->DF convert */
1011 /* Instruction costs on POWER7 processors. */
1013 struct processor_costs power7_cost
= {
1014 COSTS_N_INSNS (2), /* mulsi */
1015 COSTS_N_INSNS (2), /* mulsi_const */
1016 COSTS_N_INSNS (2), /* mulsi_const9 */
1017 COSTS_N_INSNS (2), /* muldi */
1018 COSTS_N_INSNS (18), /* divsi */
1019 COSTS_N_INSNS (34), /* divdi */
1020 COSTS_N_INSNS (3), /* fp */
1021 COSTS_N_INSNS (3), /* dmul */
1022 COSTS_N_INSNS (13), /* sdiv */
1023 COSTS_N_INSNS (16), /* ddiv */
1024 128, /* cache line size */
1027 12, /* prefetch streams */
1028 COSTS_N_INSNS (3), /* SF->DF convert */
1031 /* Instruction costs on POWER8 processors. */
1033 struct processor_costs power8_cost
= {
1034 COSTS_N_INSNS (3), /* mulsi */
1035 COSTS_N_INSNS (3), /* mulsi_const */
1036 COSTS_N_INSNS (3), /* mulsi_const9 */
1037 COSTS_N_INSNS (3), /* muldi */
1038 COSTS_N_INSNS (19), /* divsi */
1039 COSTS_N_INSNS (35), /* divdi */
1040 COSTS_N_INSNS (3), /* fp */
1041 COSTS_N_INSNS (3), /* dmul */
1042 COSTS_N_INSNS (14), /* sdiv */
1043 COSTS_N_INSNS (17), /* ddiv */
1044 128, /* cache line size */
1047 12, /* prefetch streams */
1048 COSTS_N_INSNS (3), /* SF->DF convert */
1051 /* Instruction costs on POWER9 processors. */
1053 struct processor_costs power9_cost
= {
1054 COSTS_N_INSNS (3), /* mulsi */
1055 COSTS_N_INSNS (3), /* mulsi_const */
1056 COSTS_N_INSNS (3), /* mulsi_const9 */
1057 COSTS_N_INSNS (3), /* muldi */
1058 COSTS_N_INSNS (8), /* divsi */
1059 COSTS_N_INSNS (12), /* divdi */
1060 COSTS_N_INSNS (3), /* fp */
1061 COSTS_N_INSNS (3), /* dmul */
1062 COSTS_N_INSNS (13), /* sdiv */
1063 COSTS_N_INSNS (18), /* ddiv */
1064 128, /* cache line size */
1067 8, /* prefetch streams */
1068 COSTS_N_INSNS (3), /* SF->DF convert */
1071 /* Instruction costs on POWER10 processors. */
1073 struct processor_costs power10_cost
= {
1074 COSTS_N_INSNS (2), /* mulsi */
1075 COSTS_N_INSNS (2), /* mulsi_const */
1076 COSTS_N_INSNS (2), /* mulsi_const9 */
1077 COSTS_N_INSNS (2), /* muldi */
1078 COSTS_N_INSNS (6), /* divsi */
1079 COSTS_N_INSNS (6), /* divdi */
1080 COSTS_N_INSNS (2), /* fp */
1081 COSTS_N_INSNS (2), /* dmul */
1082 COSTS_N_INSNS (11), /* sdiv */
1083 COSTS_N_INSNS (13), /* ddiv */
1084 128, /* cache line size */
1087 16, /* prefetch streams */
1088 COSTS_N_INSNS (2), /* SF->DF convert */
1091 /* Instruction costs on POWER A2 processors. */
1093 struct processor_costs ppca2_cost
= {
1094 COSTS_N_INSNS (16), /* mulsi */
1095 COSTS_N_INSNS (16), /* mulsi_const */
1096 COSTS_N_INSNS (16), /* mulsi_const9 */
1097 COSTS_N_INSNS (16), /* muldi */
1098 COSTS_N_INSNS (22), /* divsi */
1099 COSTS_N_INSNS (28), /* divdi */
1100 COSTS_N_INSNS (3), /* fp */
1101 COSTS_N_INSNS (3), /* dmul */
1102 COSTS_N_INSNS (59), /* sdiv */
1103 COSTS_N_INSNS (72), /* ddiv */
1106 2048, /* l2 cache */
1107 16, /* prefetch streams */
1108 0, /* SF->DF convert */
1111 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1112 static tree (*rs6000_veclib_handler
) (combined_fn
, tree
, tree
);
1115 static bool rs6000_debug_legitimate_address_p (machine_mode
, rtx
, bool);
1116 static tree
rs6000_handle_longcall_attribute (tree
*, tree
, tree
, int, bool *);
1117 static tree
rs6000_handle_altivec_attribute (tree
*, tree
, tree
, int, bool *);
1118 static tree
rs6000_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1119 static tree
rs6000_builtin_vectorized_libmass (combined_fn
, tree
, tree
);
1120 static void rs6000_emit_set_long_const (rtx
, HOST_WIDE_INT
);
1121 static int rs6000_memory_move_cost (machine_mode
, reg_class_t
, bool);
1122 static bool rs6000_debug_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
1123 static int rs6000_debug_address_cost (rtx
, machine_mode
, addr_space_t
,
1125 static int rs6000_debug_adjust_cost (rtx_insn
*, int, rtx_insn
*, int,
1127 static bool is_microcoded_insn (rtx_insn
*);
1128 static bool is_nonpipeline_insn (rtx_insn
*);
1129 static bool is_cracked_insn (rtx_insn
*);
1130 static bool is_load_insn (rtx
, rtx
*);
1131 static bool is_store_insn (rtx
, rtx
*);
1132 static bool set_to_load_agen (rtx_insn
*,rtx_insn
*);
1133 static bool insn_terminates_group_p (rtx_insn
*, enum group_termination
);
1134 static bool insn_must_be_first_in_group (rtx_insn
*);
1135 static bool insn_must_be_last_in_group (rtx_insn
*);
1136 bool easy_vector_constant (rtx
, machine_mode
);
1137 static rtx
rs6000_debug_legitimize_address (rtx
, rtx
, machine_mode
);
1138 static rtx
rs6000_legitimize_tls_address (rtx
, enum tls_model
);
1140 static tree
get_prev_label (tree
);
1142 static bool rs6000_mode_dependent_address (const_rtx
);
1143 static bool rs6000_debug_mode_dependent_address (const_rtx
);
1144 static bool rs6000_offsettable_memref_p (rtx
, machine_mode
, bool);
1145 static enum reg_class
rs6000_secondary_reload_class (enum reg_class
,
1147 static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class
,
1150 static enum reg_class
rs6000_preferred_reload_class (rtx
, enum reg_class
);
1151 static enum reg_class
rs6000_debug_preferred_reload_class (rtx
,
1153 static bool rs6000_debug_secondary_memory_needed (machine_mode
,
1156 static bool rs6000_debug_can_change_mode_class (machine_mode
,
1160 static bool (*rs6000_mode_dependent_address_ptr
) (const_rtx
)
1161 = rs6000_mode_dependent_address
;
1163 enum reg_class (*rs6000_secondary_reload_class_ptr
) (enum reg_class
,
1165 = rs6000_secondary_reload_class
;
1167 enum reg_class (*rs6000_preferred_reload_class_ptr
) (rtx
, enum reg_class
)
1168 = rs6000_preferred_reload_class
;
1170 const int INSN_NOT_AVAILABLE
= -1;
1172 static void rs6000_print_isa_options (FILE *, int, const char *,
1174 static void rs6000_print_builtin_options (FILE *, int, const char *,
1176 static HOST_WIDE_INT
rs6000_disable_incompatible_switches (void);
1178 static enum rs6000_reg_type
register_to_reg_type (rtx
, bool *);
1179 static bool rs6000_secondary_reload_move (enum rs6000_reg_type
,
1180 enum rs6000_reg_type
,
1182 secondary_reload_info
*,
1184 rtl_opt_pass
*make_pass_analyze_swaps (gcc::context
*);
1186 /* Hash table stuff for keeping track of TOC entries. */
1188 struct GTY((for_user
)) toc_hash_struct
1190 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1191 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1193 machine_mode key_mode
;
1197 struct toc_hasher
: ggc_ptr_hash
<toc_hash_struct
>
1199 static hashval_t
hash (toc_hash_struct
*);
1200 static bool equal (toc_hash_struct
*, toc_hash_struct
*);
1203 static GTY (()) hash_table
<toc_hasher
> *toc_hash_table
;
1207 /* Default register names. */
1208 char rs6000_reg_names
[][8] =
1211 "0", "1", "2", "3", "4", "5", "6", "7",
1212 "8", "9", "10", "11", "12", "13", "14", "15",
1213 "16", "17", "18", "19", "20", "21", "22", "23",
1214 "24", "25", "26", "27", "28", "29", "30", "31",
1216 "0", "1", "2", "3", "4", "5", "6", "7",
1217 "8", "9", "10", "11", "12", "13", "14", "15",
1218 "16", "17", "18", "19", "20", "21", "22", "23",
1219 "24", "25", "26", "27", "28", "29", "30", "31",
1221 "0", "1", "2", "3", "4", "5", "6", "7",
1222 "8", "9", "10", "11", "12", "13", "14", "15",
1223 "16", "17", "18", "19", "20", "21", "22", "23",
1224 "24", "25", "26", "27", "28", "29", "30", "31",
1226 "lr", "ctr", "ca", "ap",
1228 "0", "1", "2", "3", "4", "5", "6", "7",
1229 /* vrsave vscr sfp */
1230 "vrsave", "vscr", "sfp",
1233 #ifdef TARGET_REGNAMES
1234 static const char alt_reg_names
[][8] =
1237 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1238 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1239 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1240 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1242 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1243 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1244 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1245 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1247 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1248 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1249 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1250 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1252 "lr", "ctr", "ca", "ap",
1254 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1255 /* vrsave vscr sfp */
1256 "vrsave", "vscr", "sfp",
1260 /* Table of valid machine attributes. */
1262 static const struct attribute_spec rs6000_attribute_table
[] =
1264 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1265 affects_type_identity, handler, exclude } */
1266 { "altivec", 1, 1, false, true, false, false,
1267 rs6000_handle_altivec_attribute
, NULL
},
1268 { "longcall", 0, 0, false, true, true, false,
1269 rs6000_handle_longcall_attribute
, NULL
},
1270 { "shortcall", 0, 0, false, true, true, false,
1271 rs6000_handle_longcall_attribute
, NULL
},
1272 { "ms_struct", 0, 0, false, false, false, false,
1273 rs6000_handle_struct_attribute
, NULL
},
1274 { "gcc_struct", 0, 0, false, false, false, false,
1275 rs6000_handle_struct_attribute
, NULL
},
1276 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1277 SUBTARGET_ATTRIBUTE_TABLE
,
1279 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
1282 #ifndef TARGET_PROFILE_KERNEL
1283 #define TARGET_PROFILE_KERNEL 0
1286 /* Initialize the GCC target structure. */
1287 #undef TARGET_ATTRIBUTE_TABLE
1288 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1289 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1290 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1291 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1292 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1294 #undef TARGET_ASM_ALIGNED_DI_OP
1295 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1297 /* Default unaligned ops are only provided for ELF. Find the ops needed
1298 for non-ELF systems. */
1299 #ifndef OBJECT_FORMAT_ELF
1301 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1303 #undef TARGET_ASM_UNALIGNED_HI_OP
1304 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1305 #undef TARGET_ASM_UNALIGNED_SI_OP
1306 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1307 #undef TARGET_ASM_UNALIGNED_DI_OP
1308 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1311 #undef TARGET_ASM_UNALIGNED_HI_OP
1312 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1313 #undef TARGET_ASM_UNALIGNED_SI_OP
1314 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1315 #undef TARGET_ASM_UNALIGNED_DI_OP
1316 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1317 #undef TARGET_ASM_ALIGNED_DI_OP
1318 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1322 /* This hook deals with fixups for relocatable code and DI-mode objects
1324 #undef TARGET_ASM_INTEGER
1325 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1327 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1328 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1329 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1332 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1333 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1334 rs6000_print_patchable_function_entry
1336 #undef TARGET_SET_UP_BY_PROLOGUE
1337 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1339 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1340 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1341 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1342 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1343 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1344 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1345 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1346 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1347 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1348 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1349 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1350 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1352 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1353 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1355 #undef TARGET_INTERNAL_ARG_POINTER
1356 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1358 #undef TARGET_HAVE_TLS
1359 #define TARGET_HAVE_TLS HAVE_AS_TLS
1361 #undef TARGET_CANNOT_FORCE_CONST_MEM
1362 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1364 #undef TARGET_DELEGITIMIZE_ADDRESS
1365 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1367 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1368 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1370 #undef TARGET_LEGITIMATE_COMBINED_INSN
1371 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1373 #undef TARGET_ASM_FUNCTION_PROLOGUE
1374 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1375 #undef TARGET_ASM_FUNCTION_EPILOGUE
1376 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1378 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1379 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1381 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1382 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1384 #undef TARGET_LEGITIMIZE_ADDRESS
1385 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1387 #undef TARGET_SCHED_VARIABLE_ISSUE
1388 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1390 #undef TARGET_SCHED_ISSUE_RATE
1391 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1392 #undef TARGET_SCHED_ADJUST_COST
1393 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1394 #undef TARGET_SCHED_ADJUST_PRIORITY
1395 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1396 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1397 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1398 #undef TARGET_SCHED_INIT
1399 #define TARGET_SCHED_INIT rs6000_sched_init
1400 #undef TARGET_SCHED_FINISH
1401 #define TARGET_SCHED_FINISH rs6000_sched_finish
1402 #undef TARGET_SCHED_REORDER
1403 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1404 #undef TARGET_SCHED_REORDER2
1405 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1407 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1408 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1410 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1411 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1413 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1414 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1415 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1416 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1417 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1418 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1419 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1420 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1422 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1423 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1425 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1426 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1427 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1428 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1429 rs6000_builtin_support_vector_misalignment
1430 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1431 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1432 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1433 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1434 rs6000_builtin_vectorization_cost
1435 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1436 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1437 rs6000_preferred_simd_mode
1438 #undef TARGET_VECTORIZE_CREATE_COSTS
1439 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1441 #undef TARGET_LOOP_UNROLL_ADJUST
1442 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1444 #undef TARGET_INIT_BUILTINS
1445 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1446 #undef TARGET_BUILTIN_DECL
1447 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1449 #undef TARGET_FOLD_BUILTIN
1450 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1451 #undef TARGET_GIMPLE_FOLD_BUILTIN
1452 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1454 #undef TARGET_EXPAND_BUILTIN
1455 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1457 #undef TARGET_MANGLE_TYPE
1458 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1460 #undef TARGET_INIT_LIBFUNCS
1461 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1464 #undef TARGET_BINDS_LOCAL_P
1465 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1468 #undef TARGET_MS_BITFIELD_LAYOUT_P
1469 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1471 #undef TARGET_ASM_OUTPUT_MI_THUNK
1472 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1474 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1475 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1477 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1478 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1480 #undef TARGET_REGISTER_MOVE_COST
1481 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1482 #undef TARGET_MEMORY_MOVE_COST
1483 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1484 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1485 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1486 rs6000_ira_change_pseudo_allocno_class
1487 #undef TARGET_CANNOT_COPY_INSN_P
1488 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1489 #undef TARGET_RTX_COSTS
1490 #define TARGET_RTX_COSTS rs6000_rtx_costs
1491 #undef TARGET_ADDRESS_COST
1492 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1493 #undef TARGET_INSN_COST
1494 #define TARGET_INSN_COST rs6000_insn_cost
1496 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1497 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1499 #undef TARGET_PROMOTE_FUNCTION_MODE
1500 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1502 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1503 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1505 #undef TARGET_RETURN_IN_MEMORY
1506 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1508 #undef TARGET_RETURN_IN_MSB
1509 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1511 #undef TARGET_SETUP_INCOMING_VARARGS
1512 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1514 /* Always strict argument naming on rs6000. */
1515 #undef TARGET_STRICT_ARGUMENT_NAMING
1516 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1517 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1518 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1519 #undef TARGET_SPLIT_COMPLEX_ARG
1520 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1521 #undef TARGET_MUST_PASS_IN_STACK
1522 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1523 #undef TARGET_PASS_BY_REFERENCE
1524 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1525 #undef TARGET_ARG_PARTIAL_BYTES
1526 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1527 #undef TARGET_FUNCTION_ARG_ADVANCE
1528 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1529 #undef TARGET_FUNCTION_ARG
1530 #define TARGET_FUNCTION_ARG rs6000_function_arg
1531 #undef TARGET_FUNCTION_ARG_PADDING
1532 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1533 #undef TARGET_FUNCTION_ARG_BOUNDARY
1534 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1536 #undef TARGET_BUILD_BUILTIN_VA_LIST
1537 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1539 #undef TARGET_EXPAND_BUILTIN_VA_START
1540 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1542 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1543 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1545 #undef TARGET_EH_RETURN_FILTER_MODE
1546 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1548 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1549 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1551 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1552 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1553 rs6000_libgcc_floating_mode_supported_p
1555 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1556 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1558 #undef TARGET_FLOATN_MODE
1559 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1561 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1562 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1564 #undef TARGET_MD_ASM_ADJUST
1565 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1567 #undef TARGET_OPTION_OVERRIDE
1568 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1570 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1571 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1572 rs6000_builtin_vectorized_function
1574 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1575 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1576 rs6000_builtin_md_vectorized_function
1578 #undef TARGET_STACK_PROTECT_GUARD
1579 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1582 #undef TARGET_STACK_PROTECT_FAIL
1583 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1587 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1588 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1591 /* Use a 32-bit anchor range. This leads to sequences like:
1593 addis tmp,anchor,high
1596 where tmp itself acts as an anchor, and can be shared between
1597 accesses to the same 64k page. */
1598 #undef TARGET_MIN_ANCHOR_OFFSET
1599 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1600 #undef TARGET_MAX_ANCHOR_OFFSET
1601 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1602 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1603 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1604 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1605 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1607 #undef TARGET_BUILTIN_RECIPROCAL
1608 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1610 #undef TARGET_SECONDARY_RELOAD
1611 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1612 #undef TARGET_SECONDARY_MEMORY_NEEDED
1613 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1614 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1615 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1617 #undef TARGET_LEGITIMATE_ADDRESS_P
1618 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1620 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1621 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1623 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1624 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1626 #undef TARGET_CAN_ELIMINATE
1627 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1629 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1630 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1632 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1633 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1635 #undef TARGET_TRAMPOLINE_INIT
1636 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1638 #undef TARGET_FUNCTION_VALUE
1639 #define TARGET_FUNCTION_VALUE rs6000_function_value
1641 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1642 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1644 #undef TARGET_OPTION_SAVE
1645 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1647 #undef TARGET_OPTION_RESTORE
1648 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1650 #undef TARGET_OPTION_PRINT
1651 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1653 #undef TARGET_CAN_INLINE_P
1654 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1656 #undef TARGET_SET_CURRENT_FUNCTION
1657 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1659 #undef TARGET_LEGITIMATE_CONSTANT_P
1660 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1662 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1663 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1665 #undef TARGET_CAN_USE_DOLOOP_P
1666 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1668 #undef TARGET_PREDICT_DOLOOP_P
1669 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1671 #undef TARGET_HAVE_COUNT_REG_DECR_P
1672 #define TARGET_HAVE_COUNT_REG_DECR_P true
1674 /* 1000000000 is infinite cost in IVOPTs. */
1675 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1676 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1678 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1679 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1681 #undef TARGET_PREFERRED_DOLOOP_MODE
1682 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1684 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1685 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1687 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1688 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1689 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1690 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1691 #undef TARGET_UNWIND_WORD_MODE
1692 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1694 #undef TARGET_OFFLOAD_OPTIONS
1695 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1697 #undef TARGET_C_MODE_FOR_SUFFIX
1698 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1700 #undef TARGET_INVALID_BINARY_OP
1701 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1703 #undef TARGET_OPTAB_SUPPORTED_P
1704 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1706 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1707 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1709 #undef TARGET_COMPARE_VERSION_PRIORITY
1710 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1712 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1713 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1714 rs6000_generate_version_dispatcher_body
1716 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1717 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1718 rs6000_get_function_versions_dispatcher
1720 #undef TARGET_OPTION_FUNCTION_VERSIONS
1721 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1723 #undef TARGET_HARD_REGNO_NREGS
1724 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1725 #undef TARGET_HARD_REGNO_MODE_OK
1726 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1728 #undef TARGET_MODES_TIEABLE_P
1729 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1731 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1732 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1733 rs6000_hard_regno_call_part_clobbered
1735 #undef TARGET_SLOW_UNALIGNED_ACCESS
1736 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1738 #undef TARGET_CAN_CHANGE_MODE_CLASS
1739 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1741 #undef TARGET_CONSTANT_ALIGNMENT
1742 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1744 #undef TARGET_STARTING_FRAME_OFFSET
1745 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1747 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1748 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1750 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1751 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1753 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1754 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1755 rs6000_cannot_substitute_mem_equiv_p
1757 #undef TARGET_INVALID_CONVERSION
1758 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1760 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1761 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1763 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1764 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1767 /* Processor table. */
1770 const char *const name
; /* Canonical processor name. */
1771 const enum processor_type processor
; /* Processor type enum value. */
1772 const HOST_WIDE_INT target_enable
; /* Target flags to enable. */
1775 static struct rs6000_ptt
const processor_target_table
[] =
1777 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1778 #include "rs6000-cpus.def"
1782 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1786 rs6000_cpu_name_lookup (const char *name
)
1792 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
1793 if (! strcmp (name
, processor_target_table
[i
].name
))
1801 /* Return number of consecutive hard regs needed starting at reg REGNO
1802 to hold something of mode MODE.
1803 This is ordinarily the length in words of a value of mode MODE
1804 but can be less for certain modes in special long registers.
1806 POWER and PowerPC GPRs hold 32 bits worth;
1807 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1810 rs6000_hard_regno_nregs_internal (int regno
, machine_mode mode
)
1812 unsigned HOST_WIDE_INT reg_size
;
1814 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1815 128-bit floating point that can go in vector registers, which has VSX
1816 memory addressing. */
1817 if (FP_REGNO_P (regno
))
1818 reg_size
= (VECTOR_MEM_VSX_P (mode
) || VECTOR_ALIGNMENT_P (mode
)
1819 ? UNITS_PER_VSX_WORD
1820 : UNITS_PER_FP_WORD
);
1822 else if (ALTIVEC_REGNO_P (regno
))
1823 reg_size
= UNITS_PER_ALTIVEC_WORD
;
1826 reg_size
= UNITS_PER_WORD
;
1828 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
1831 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1834 rs6000_hard_regno_mode_ok_uncached (int regno
, machine_mode mode
)
1836 int last_regno
= regno
+ rs6000_hard_regno_nregs
[mode
][regno
] - 1;
1838 if (COMPLEX_MODE_P (mode
))
1839 mode
= GET_MODE_INNER (mode
);
1841 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1844 return (TARGET_MMA
&& VSX_REGNO_P (regno
) && (regno
& 1) == 0);
1846 /* MMA accumulator modes need FPR registers divisible by 4. */
1848 return (TARGET_MMA
&& FP_REGNO_P (regno
) && (regno
& 3) == 0);
1850 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1851 register combinations, and use PTImode where we need to deal with quad
1852 word memory operations. Don't allow quad words in the argument or frame
1853 pointer registers, just registers 0..31. */
1854 if (mode
== PTImode
)
1855 return (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1856 && IN_RANGE (last_regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1857 && ((regno
& 1) == 0));
1859 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1860 implementations. Don't allow an item to be split between a FP register
1861 and an Altivec register. Allow TImode in all VSX registers if the user
1863 if (TARGET_VSX
&& VSX_REGNO_P (regno
)
1864 && (VECTOR_MEM_VSX_P (mode
)
1865 || VECTOR_ALIGNMENT_P (mode
)
1866 || reg_addr
[mode
].scalar_in_vmx_p
1868 || (TARGET_VADDUQM
&& mode
== V1TImode
)))
1870 if (FP_REGNO_P (regno
))
1871 return FP_REGNO_P (last_regno
);
1873 if (ALTIVEC_REGNO_P (regno
))
1875 if (GET_MODE_SIZE (mode
) < 16 && !reg_addr
[mode
].scalar_in_vmx_p
)
1878 return ALTIVEC_REGNO_P (last_regno
);
1882 /* The GPRs can hold any mode, but values bigger than one register
1883 cannot go past R31. */
1884 if (INT_REGNO_P (regno
))
1885 return INT_REGNO_P (last_regno
);
1887 /* The float registers (except for VSX vector modes) can only hold floating
1888 modes and DImode. */
1889 if (FP_REGNO_P (regno
))
1891 if (VECTOR_ALIGNMENT_P (mode
))
1894 if (SCALAR_FLOAT_MODE_P (mode
)
1895 && (mode
!= TDmode
|| (regno
% 2) == 0)
1896 && FP_REGNO_P (last_regno
))
1899 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1901 if(GET_MODE_SIZE (mode
) == UNITS_PER_FP_WORD
)
1904 if (TARGET_P8_VECTOR
&& (mode
== SImode
))
1907 if (TARGET_P9_VECTOR
&& (mode
== QImode
|| mode
== HImode
))
1914 /* The CR register can only hold CC modes. */
1915 if (CR_REGNO_P (regno
))
1916 return GET_MODE_CLASS (mode
) == MODE_CC
;
1918 if (CA_REGNO_P (regno
))
1919 return mode
== Pmode
|| mode
== SImode
;
1921 /* AltiVec only in AldyVec registers. */
1922 if (ALTIVEC_REGNO_P (regno
))
1923 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
1924 || mode
== V1TImode
);
1926 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1927 and it must be able to fit within the register set. */
1929 return GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
;
1932 /* Implement TARGET_HARD_REGNO_NREGS. */
1935 rs6000_hard_regno_nregs_hook (unsigned int regno
, machine_mode mode
)
1937 return rs6000_hard_regno_nregs
[mode
][regno
];
1940 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1943 rs6000_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
1945 return rs6000_hard_regno_mode_ok_p
[mode
][regno
];
1948 /* Implement TARGET_MODES_TIEABLE_P.
1950 PTImode cannot tie with other modes because PTImode is restricted to even
1951 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1954 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1955 registers) or XOmode (vector quad, restricted to FPR registers divisible
1956 by 4) to tie with other modes.
1958 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1959 128-bit floating point on VSX systems ties with other vectors. */
1962 rs6000_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
1964 if (mode1
== PTImode
|| mode1
== OOmode
|| mode1
== XOmode
1965 || mode2
== PTImode
|| mode2
== OOmode
|| mode2
== XOmode
)
1966 return mode1
== mode2
;
1968 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1
))
1969 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2
);
1970 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2
))
1973 if (SCALAR_FLOAT_MODE_P (mode1
))
1974 return SCALAR_FLOAT_MODE_P (mode2
);
1975 if (SCALAR_FLOAT_MODE_P (mode2
))
1978 if (GET_MODE_CLASS (mode1
) == MODE_CC
)
1979 return GET_MODE_CLASS (mode2
) == MODE_CC
;
1980 if (GET_MODE_CLASS (mode2
) == MODE_CC
)
1986 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1989 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
1994 && GET_MODE_SIZE (mode
) > 4
1995 && INT_REGNO_P (regno
))
1999 && FP_REGNO_P (regno
)
2000 && GET_MODE_SIZE (mode
) > 8
2001 && !FLOAT128_2REG_P (mode
))
2007 /* Print interesting facts about registers. */
2009 rs6000_debug_reg_print (int first_regno
, int last_regno
, const char *reg_name
)
2013 for (r
= first_regno
; r
<= last_regno
; ++r
)
2015 const char *comma
= "";
2018 if (first_regno
== last_regno
)
2019 fprintf (stderr
, "%s:\t", reg_name
);
2021 fprintf (stderr
, "%s%d:\t", reg_name
, r
- first_regno
);
2024 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2025 if (rs6000_hard_regno_mode_ok_p
[m
][r
] && rs6000_hard_regno_nregs
[m
][r
])
2029 fprintf (stderr
, ",\n\t");
2034 if (rs6000_hard_regno_nregs
[m
][r
] > 1)
2035 len
+= fprintf (stderr
, "%s%s/%d", comma
, GET_MODE_NAME (m
),
2036 rs6000_hard_regno_nregs
[m
][r
]);
2038 len
+= fprintf (stderr
, "%s%s", comma
, GET_MODE_NAME (m
));
2043 if (call_used_or_fixed_reg_p (r
))
2047 fprintf (stderr
, ",\n\t");
2052 len
+= fprintf (stderr
, "%s%s", comma
, "call-used");
2060 fprintf (stderr
, ",\n\t");
2065 len
+= fprintf (stderr
, "%s%s", comma
, "fixed");
2071 fprintf (stderr
, ",\n\t");
2075 len
+= fprintf (stderr
, "%sreg-class = %s", comma
,
2076 reg_class_names
[(int)rs6000_regno_regclass
[r
]]);
2081 fprintf (stderr
, ",\n\t");
2085 fprintf (stderr
, "%sregno = %d\n", comma
, r
);
2090 rs6000_debug_vector_unit (enum rs6000_vector v
)
2096 case VECTOR_NONE
: ret
= "none"; break;
2097 case VECTOR_ALTIVEC
: ret
= "altivec"; break;
2098 case VECTOR_VSX
: ret
= "vsx"; break;
2099 case VECTOR_P8_VECTOR
: ret
= "p8_vector"; break;
2100 default: ret
= "unknown"; break;
2106 /* Inner function printing just the address mask for a particular reload
2108 DEBUG_FUNCTION
char *
2109 rs6000_debug_addr_mask (addr_mask_type mask
, bool keep_spaces
)
2114 if ((mask
& RELOAD_REG_VALID
) != 0)
2116 else if (keep_spaces
)
2119 if ((mask
& RELOAD_REG_MULTIPLE
) != 0)
2121 else if (keep_spaces
)
2124 if ((mask
& RELOAD_REG_INDEXED
) != 0)
2126 else if (keep_spaces
)
2129 if ((mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
2131 else if ((mask
& RELOAD_REG_OFFSET
) != 0)
2133 else if (keep_spaces
)
2136 if ((mask
& RELOAD_REG_PRE_INCDEC
) != 0)
2138 else if (keep_spaces
)
2141 if ((mask
& RELOAD_REG_PRE_MODIFY
) != 0)
2143 else if (keep_spaces
)
2146 if ((mask
& RELOAD_REG_AND_M16
) != 0)
2148 else if (keep_spaces
)
2156 /* Print the address masks in a human readble fashion. */
2158 rs6000_debug_print_mode (ssize_t m
)
2163 fprintf (stderr
, "Mode: %-5s", GET_MODE_NAME (m
));
2164 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2165 fprintf (stderr
, " %s: %s", reload_reg_map
[rc
].name
,
2166 rs6000_debug_addr_mask (reg_addr
[m
].addr_mask
[rc
], true));
2168 if ((reg_addr
[m
].reload_store
!= CODE_FOR_nothing
)
2169 || (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
))
2171 fprintf (stderr
, "%*s Reload=%c%c", spaces
, "",
2172 (reg_addr
[m
].reload_store
!= CODE_FOR_nothing
) ? 's' : '*',
2173 (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
) ? 'l' : '*');
2177 spaces
+= strlen (" Reload=sl");
2179 if (reg_addr
[m
].scalar_in_vmx_p
)
2181 fprintf (stderr
, "%*s Upper=y", spaces
, "");
2185 spaces
+= strlen (" Upper=y");
2187 if (rs6000_vector_unit
[m
] != VECTOR_NONE
2188 || rs6000_vector_mem
[m
] != VECTOR_NONE
)
2190 fprintf (stderr
, "%*s vector: arith=%-10s mem=%s",
2192 rs6000_debug_vector_unit (rs6000_vector_unit
[m
]),
2193 rs6000_debug_vector_unit (rs6000_vector_mem
[m
]));
2196 fputs ("\n", stderr
);
2199 #define DEBUG_FMT_ID "%-32s= "
2200 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2201 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2202 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2204 /* Print various interesting information with -mdebug=reg. */
2206 rs6000_debug_reg_global (void)
2208 static const char *const tf
[2] = { "false", "true" };
2209 const char *nl
= (const char *)0;
2212 char costly_num
[20];
2214 char flags_buffer
[40];
2215 const char *costly_str
;
2216 const char *nop_str
;
2217 const char *trace_str
;
2218 const char *abi_str
;
2219 const char *cmodel_str
;
2220 struct cl_target_option cl_opts
;
2222 /* Modes we want tieable information on. */
2223 static const machine_mode print_tieable_modes
[] = {
2262 /* Virtual regs we are interested in. */
2263 const static struct {
2264 int regno
; /* register number. */
2265 const char *name
; /* register name. */
2266 } virtual_regs
[] = {
2267 { STACK_POINTER_REGNUM
, "stack pointer:" },
2268 { TOC_REGNUM
, "toc: " },
2269 { STATIC_CHAIN_REGNUM
, "static chain: " },
2270 { RS6000_PIC_OFFSET_TABLE_REGNUM
, "pic offset: " },
2271 { HARD_FRAME_POINTER_REGNUM
, "hard frame: " },
2272 { ARG_POINTER_REGNUM
, "arg pointer: " },
2273 { FRAME_POINTER_REGNUM
, "frame pointer:" },
2274 { FIRST_PSEUDO_REGISTER
, "first pseudo: " },
2275 { FIRST_VIRTUAL_REGISTER
, "first virtual:" },
2276 { VIRTUAL_INCOMING_ARGS_REGNUM
, "incoming_args:" },
2277 { VIRTUAL_STACK_VARS_REGNUM
, "stack_vars: " },
2278 { VIRTUAL_STACK_DYNAMIC_REGNUM
, "stack_dynamic:" },
2279 { VIRTUAL_OUTGOING_ARGS_REGNUM
, "outgoing_args:" },
2280 { VIRTUAL_CFA_REGNUM
, "cfa (frame): " },
2281 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM
, "stack boundry:" },
2282 { LAST_VIRTUAL_REGISTER
, "last virtual: " },
2285 fputs ("\nHard register information:\n", stderr
);
2286 rs6000_debug_reg_print (FIRST_GPR_REGNO
, LAST_GPR_REGNO
, "gr");
2287 rs6000_debug_reg_print (FIRST_FPR_REGNO
, LAST_FPR_REGNO
, "fp");
2288 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO
,
2291 rs6000_debug_reg_print (LR_REGNO
, LR_REGNO
, "lr");
2292 rs6000_debug_reg_print (CTR_REGNO
, CTR_REGNO
, "ctr");
2293 rs6000_debug_reg_print (CR0_REGNO
, CR7_REGNO
, "cr");
2294 rs6000_debug_reg_print (CA_REGNO
, CA_REGNO
, "ca");
2295 rs6000_debug_reg_print (VRSAVE_REGNO
, VRSAVE_REGNO
, "vrsave");
2296 rs6000_debug_reg_print (VSCR_REGNO
, VSCR_REGNO
, "vscr");
2298 fputs ("\nVirtual/stack/frame registers:\n", stderr
);
2299 for (v
= 0; v
< ARRAY_SIZE (virtual_regs
); v
++)
2300 fprintf (stderr
, "%s regno = %3d\n", virtual_regs
[v
].name
, virtual_regs
[v
].regno
);
2304 "d reg_class = %s\n"
2305 "v reg_class = %s\n"
2306 "wa reg_class = %s\n"
2307 "we reg_class = %s\n"
2308 "wr reg_class = %s\n"
2309 "wx reg_class = %s\n"
2310 "wA reg_class = %s\n"
2312 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_d
]],
2313 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_v
]],
2314 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wa
]],
2315 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_we
]],
2316 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wr
]],
2317 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wx
]],
2318 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wA
]]);
2321 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2322 rs6000_debug_print_mode (m
);
2324 fputs ("\n", stderr
);
2326 for (m1
= 0; m1
< ARRAY_SIZE (print_tieable_modes
); m1
++)
2328 machine_mode mode1
= print_tieable_modes
[m1
];
2329 bool first_time
= true;
2331 nl
= (const char *)0;
2332 for (m2
= 0; m2
< ARRAY_SIZE (print_tieable_modes
); m2
++)
2334 machine_mode mode2
= print_tieable_modes
[m2
];
2335 if (mode1
!= mode2
&& rs6000_modes_tieable_p (mode1
, mode2
))
2339 fprintf (stderr
, "Tieable modes %s:", GET_MODE_NAME (mode1
));
2344 fprintf (stderr
, " %s", GET_MODE_NAME (mode2
));
2349 fputs ("\n", stderr
);
2355 if (rs6000_recip_control
)
2357 fprintf (stderr
, "\nReciprocal mask = 0x%x\n", rs6000_recip_control
);
2359 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2360 if (rs6000_recip_bits
[m
])
2363 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2365 (RS6000_RECIP_AUTO_RE_P (m
)
2367 : (RS6000_RECIP_HAVE_RE_P (m
) ? "have" : "none")),
2368 (RS6000_RECIP_AUTO_RSQRTE_P (m
)
2370 : (RS6000_RECIP_HAVE_RSQRTE_P (m
) ? "have" : "none")));
2373 fputs ("\n", stderr
);
2376 if (rs6000_cpu_index
>= 0)
2378 const char *name
= processor_target_table
[rs6000_cpu_index
].name
;
2380 = processor_target_table
[rs6000_cpu_index
].target_enable
;
2382 sprintf (flags_buffer
, "-mcpu=%s flags", name
);
2383 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2386 fprintf (stderr
, DEBUG_FMT_S
, "cpu", "<none>");
2388 if (rs6000_tune_index
>= 0)
2390 const char *name
= processor_target_table
[rs6000_tune_index
].name
;
2392 = processor_target_table
[rs6000_tune_index
].target_enable
;
2394 sprintf (flags_buffer
, "-mtune=%s flags", name
);
2395 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2398 fprintf (stderr
, DEBUG_FMT_S
, "tune", "<none>");
2400 cl_target_option_save (&cl_opts
, &global_options
, &global_options_set
);
2401 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags",
2404 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags_explicit",
2405 rs6000_isa_flags_explicit
);
2407 rs6000_print_builtin_options (stderr
, 0, "rs6000_builtin_mask",
2408 rs6000_builtin_mask
);
2410 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
2412 fprintf (stderr
, DEBUG_FMT_S
, "--with-cpu default",
2413 OPTION_TARGET_CPU_DEFAULT
? OPTION_TARGET_CPU_DEFAULT
: "<none>");
2415 switch (rs6000_sched_costly_dep
)
2417 case max_dep_latency
:
2418 costly_str
= "max_dep_latency";
2422 costly_str
= "no_dep_costly";
2425 case all_deps_costly
:
2426 costly_str
= "all_deps_costly";
2429 case true_store_to_load_dep_costly
:
2430 costly_str
= "true_store_to_load_dep_costly";
2433 case store_to_load_dep_costly
:
2434 costly_str
= "store_to_load_dep_costly";
2438 costly_str
= costly_num
;
2439 sprintf (costly_num
, "%d", (int)rs6000_sched_costly_dep
);
2443 fprintf (stderr
, DEBUG_FMT_S
, "sched_costly_dep", costly_str
);
2445 switch (rs6000_sched_insert_nops
)
2447 case sched_finish_regroup_exact
:
2448 nop_str
= "sched_finish_regroup_exact";
2451 case sched_finish_pad_groups
:
2452 nop_str
= "sched_finish_pad_groups";
2455 case sched_finish_none
:
2456 nop_str
= "sched_finish_none";
2461 sprintf (nop_num
, "%d", (int)rs6000_sched_insert_nops
);
2465 fprintf (stderr
, DEBUG_FMT_S
, "sched_insert_nops", nop_str
);
2467 switch (rs6000_sdata
)
2474 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "data");
2478 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "sysv");
2482 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "eabi");
2487 switch (rs6000_traceback
)
2489 case traceback_default
: trace_str
= "default"; break;
2490 case traceback_none
: trace_str
= "none"; break;
2491 case traceback_part
: trace_str
= "part"; break;
2492 case traceback_full
: trace_str
= "full"; break;
2493 default: trace_str
= "unknown"; break;
2496 fprintf (stderr
, DEBUG_FMT_S
, "traceback", trace_str
);
2498 switch (rs6000_current_cmodel
)
2500 case CMODEL_SMALL
: cmodel_str
= "small"; break;
2501 case CMODEL_MEDIUM
: cmodel_str
= "medium"; break;
2502 case CMODEL_LARGE
: cmodel_str
= "large"; break;
2503 default: cmodel_str
= "unknown"; break;
2506 fprintf (stderr
, DEBUG_FMT_S
, "cmodel", cmodel_str
);
2508 switch (rs6000_current_abi
)
2510 case ABI_NONE
: abi_str
= "none"; break;
2511 case ABI_AIX
: abi_str
= "aix"; break;
2512 case ABI_ELFv2
: abi_str
= "ELFv2"; break;
2513 case ABI_V4
: abi_str
= "V4"; break;
2514 case ABI_DARWIN
: abi_str
= "darwin"; break;
2515 default: abi_str
= "unknown"; break;
2518 fprintf (stderr
, DEBUG_FMT_S
, "abi", abi_str
);
2520 if (rs6000_altivec_abi
)
2521 fprintf (stderr
, DEBUG_FMT_S
, "altivec_abi", "true");
2523 if (rs6000_aix_extabi
)
2524 fprintf (stderr
, DEBUG_FMT_S
, "AIX vec-extabi", "true");
2526 if (rs6000_darwin64_abi
)
2527 fprintf (stderr
, DEBUG_FMT_S
, "darwin64_abi", "true");
2529 fprintf (stderr
, DEBUG_FMT_S
, "soft_float",
2530 (TARGET_SOFT_FLOAT
? "true" : "false"));
2532 if (TARGET_LINK_STACK
)
2533 fprintf (stderr
, DEBUG_FMT_S
, "link_stack", "true");
2535 if (TARGET_P8_FUSION
)
2539 strcpy (options
, "power8");
2540 if (TARGET_P8_FUSION_SIGN
)
2541 strcat (options
, ", sign");
2543 fprintf (stderr
, DEBUG_FMT_S
, "fusion", options
);
2546 fprintf (stderr
, DEBUG_FMT_S
, "plt-format",
2547 TARGET_SECURE_PLT
? "secure" : "bss");
2548 fprintf (stderr
, DEBUG_FMT_S
, "struct-return",
2549 aix_struct_return
? "aix" : "sysv");
2550 fprintf (stderr
, DEBUG_FMT_S
, "always_hint", tf
[!!rs6000_always_hint
]);
2551 fprintf (stderr
, DEBUG_FMT_S
, "sched_groups", tf
[!!rs6000_sched_groups
]);
2552 fprintf (stderr
, DEBUG_FMT_S
, "align_branch",
2553 tf
[!!rs6000_align_branch_targets
]);
2554 fprintf (stderr
, DEBUG_FMT_D
, "tls_size", rs6000_tls_size
);
2555 fprintf (stderr
, DEBUG_FMT_D
, "long_double_size",
2556 rs6000_long_double_type_size
);
2557 if (rs6000_long_double_type_size
> 64)
2559 fprintf (stderr
, DEBUG_FMT_S
, "long double type",
2560 TARGET_IEEEQUAD
? "IEEE" : "IBM");
2561 fprintf (stderr
, DEBUG_FMT_S
, "default long double type",
2562 TARGET_IEEEQUAD_DEFAULT
? "IEEE" : "IBM");
2564 fprintf (stderr
, DEBUG_FMT_D
, "sched_restricted_insns_priority",
2565 (int)rs6000_sched_restricted_insns_priority
);
2566 fprintf (stderr
, DEBUG_FMT_D
, "Number of standard builtins",
2569 fprintf (stderr
, DEBUG_FMT_D
, "Enable float128 on VSX",
2570 (int)TARGET_FLOAT128_ENABLE_TYPE
);
2573 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit scalar element",
2574 (int)VECTOR_ELEMENT_SCALAR_64BIT
);
2576 if (TARGET_DIRECT_MOVE_128
)
2577 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit mfvsrld element",
2578 (int)VECTOR_ELEMENT_MFVSRLD_64BIT
);
2582 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2583 legitimate address support to figure out the appropriate addressing to
2587 rs6000_setup_reg_addr_masks (void)
2589 ssize_t rc
, reg
, m
, nregs
;
2590 addr_mask_type any_addr_mask
, addr_mask
;
2592 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2594 machine_mode m2
= (machine_mode
) m
;
2595 bool complex_p
= false;
2596 bool small_int_p
= (m2
== QImode
|| m2
== HImode
|| m2
== SImode
);
2599 if (COMPLEX_MODE_P (m2
))
2602 m2
= GET_MODE_INNER (m2
);
2605 msize
= GET_MODE_SIZE (m2
);
2607 /* SDmode is special in that we want to access it only via REG+REG
2608 addressing on power7 and above, since we want to use the LFIWZX and
2609 STFIWZX instructions to load it. */
2610 bool indexed_only_p
= (m
== SDmode
&& TARGET_NO_SDMODE_STACK
);
2613 for (rc
= FIRST_RELOAD_REG_CLASS
; rc
<= LAST_RELOAD_REG_CLASS
; rc
++)
2616 reg
= reload_reg_map
[rc
].reg
;
2618 /* Can mode values go in the GPR/FPR/Altivec registers? */
2619 if (reg
>= 0 && rs6000_hard_regno_mode_ok_p
[m
][reg
])
2621 bool small_int_vsx_p
= (small_int_p
2622 && (rc
== RELOAD_REG_FPR
2623 || rc
== RELOAD_REG_VMX
));
2625 nregs
= rs6000_hard_regno_nregs
[m
][reg
];
2626 addr_mask
|= RELOAD_REG_VALID
;
2628 /* Indicate if the mode takes more than 1 physical register. If
2629 it takes a single register, indicate it can do REG+REG
2630 addressing. Small integers in VSX registers can only do
2631 REG+REG addressing. */
2632 if (small_int_vsx_p
)
2633 addr_mask
|= RELOAD_REG_INDEXED
;
2634 else if (nregs
> 1 || m
== BLKmode
|| complex_p
)
2635 addr_mask
|= RELOAD_REG_MULTIPLE
;
2637 addr_mask
|= RELOAD_REG_INDEXED
;
2639 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2640 addressing. If we allow scalars into Altivec registers,
2641 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2643 For VSX systems, we don't allow update addressing for
2644 DFmode/SFmode if those registers can go in both the
2645 traditional floating point registers and Altivec registers.
2646 The load/store instructions for the Altivec registers do not
2647 have update forms. If we allowed update addressing, it seems
2648 to break IV-OPT code using floating point if the index type is
2649 int instead of long (PR target/81550 and target/84042). */
2652 && (rc
== RELOAD_REG_GPR
|| rc
== RELOAD_REG_FPR
)
2654 && !VECTOR_MODE_P (m2
)
2655 && !VECTOR_ALIGNMENT_P (m2
)
2657 && (m
!= E_DFmode
|| !TARGET_VSX
)
2658 && (m
!= E_SFmode
|| !TARGET_P8_VECTOR
)
2659 && !small_int_vsx_p
)
2661 addr_mask
|= RELOAD_REG_PRE_INCDEC
;
2663 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2664 we don't allow PRE_MODIFY for some multi-register
2669 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2673 if (TARGET_POWERPC64
)
2674 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2679 if (TARGET_HARD_FLOAT
)
2680 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2686 /* GPR and FPR registers can do REG+OFFSET addressing, except
2687 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2688 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2689 if ((addr_mask
!= 0) && !indexed_only_p
2691 && (rc
== RELOAD_REG_GPR
2692 || ((msize
== 8 || m2
== SFmode
)
2693 && (rc
== RELOAD_REG_FPR
2694 || (rc
== RELOAD_REG_VMX
&& TARGET_P9_VECTOR
)))))
2695 addr_mask
|= RELOAD_REG_OFFSET
;
2697 /* IBM 128-bit can do REG+OFFSET addressing. */
2698 else if ((addr_mask
!= 0) && !indexed_only_p
2699 && FLOAT128_IBM_P (m
))
2700 addr_mask
|= RELOAD_REG_OFFSET
;
2702 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2703 instructions are enabled. The offset for 128-bit VSX registers is
2704 only 12-bits. While GPRs can handle the full offset range, VSX
2705 registers can only handle the restricted range. */
2706 else if ((addr_mask
!= 0) && !indexed_only_p
2707 && msize
== 16 && TARGET_P9_VECTOR
2708 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2
)
2709 || (m2
== TImode
&& TARGET_VSX
)))
2711 addr_mask
|= RELOAD_REG_OFFSET
;
2712 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2713 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2716 /* Vector pairs can do both indexed and offset loads if the
2717 instructions are enabled, otherwise they can only do offset loads
2718 since it will be broken into two vector moves. Vector quads can
2719 only do offset loads. */
2720 else if ((addr_mask
!= 0) && TARGET_MMA
2721 && (m2
== OOmode
|| m2
== XOmode
))
2723 addr_mask
|= RELOAD_REG_OFFSET
;
2724 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2726 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2728 addr_mask
|= RELOAD_REG_INDEXED
;
2732 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2733 addressing on 128-bit types. */
2734 if (rc
== RELOAD_REG_VMX
&& msize
== 16
2735 && (addr_mask
& RELOAD_REG_VALID
) != 0)
2736 addr_mask
|= RELOAD_REG_AND_M16
;
2738 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
2739 any_addr_mask
|= addr_mask
;
2742 reg_addr
[m
].addr_mask
[RELOAD_REG_ANY
] = any_addr_mask
;
2747 /* Initialize the various global tables that are based on register size. */
2749 rs6000_init_hard_regno_mode_ok (bool global_init_p
)
2755 /* Precalculate REGNO_REG_CLASS. */
2756 rs6000_regno_regclass
[0] = GENERAL_REGS
;
2757 for (r
= 1; r
< 32; ++r
)
2758 rs6000_regno_regclass
[r
] = BASE_REGS
;
2760 for (r
= 32; r
< 64; ++r
)
2761 rs6000_regno_regclass
[r
] = FLOAT_REGS
;
2763 for (r
= 64; HARD_REGISTER_NUM_P (r
); ++r
)
2764 rs6000_regno_regclass
[r
] = NO_REGS
;
2766 for (r
= FIRST_ALTIVEC_REGNO
; r
<= LAST_ALTIVEC_REGNO
; ++r
)
2767 rs6000_regno_regclass
[r
] = ALTIVEC_REGS
;
2769 rs6000_regno_regclass
[CR0_REGNO
] = CR0_REGS
;
2770 for (r
= CR1_REGNO
; r
<= CR7_REGNO
; ++r
)
2771 rs6000_regno_regclass
[r
] = CR_REGS
;
2773 rs6000_regno_regclass
[LR_REGNO
] = LINK_REGS
;
2774 rs6000_regno_regclass
[CTR_REGNO
] = CTR_REGS
;
2775 rs6000_regno_regclass
[CA_REGNO
] = NO_REGS
;
2776 rs6000_regno_regclass
[VRSAVE_REGNO
] = VRSAVE_REGS
;
2777 rs6000_regno_regclass
[VSCR_REGNO
] = VRSAVE_REGS
;
2778 rs6000_regno_regclass
[ARG_POINTER_REGNUM
] = BASE_REGS
;
2779 rs6000_regno_regclass
[FRAME_POINTER_REGNUM
] = BASE_REGS
;
2781 /* Precalculate register class to simpler reload register class. We don't
2782 need all of the register classes that are combinations of different
2783 classes, just the simple ones that have constraint letters. */
2784 for (c
= 0; c
< N_REG_CLASSES
; c
++)
2785 reg_class_to_reg_type
[c
] = NO_REG_TYPE
;
2787 reg_class_to_reg_type
[(int)GENERAL_REGS
] = GPR_REG_TYPE
;
2788 reg_class_to_reg_type
[(int)BASE_REGS
] = GPR_REG_TYPE
;
2789 reg_class_to_reg_type
[(int)VSX_REGS
] = VSX_REG_TYPE
;
2790 reg_class_to_reg_type
[(int)VRSAVE_REGS
] = SPR_REG_TYPE
;
2791 reg_class_to_reg_type
[(int)VSCR_REGS
] = SPR_REG_TYPE
;
2792 reg_class_to_reg_type
[(int)LINK_REGS
] = SPR_REG_TYPE
;
2793 reg_class_to_reg_type
[(int)CTR_REGS
] = SPR_REG_TYPE
;
2794 reg_class_to_reg_type
[(int)LINK_OR_CTR_REGS
] = SPR_REG_TYPE
;
2795 reg_class_to_reg_type
[(int)CR_REGS
] = CR_REG_TYPE
;
2796 reg_class_to_reg_type
[(int)CR0_REGS
] = CR_REG_TYPE
;
2800 reg_class_to_reg_type
[(int)FLOAT_REGS
] = VSX_REG_TYPE
;
2801 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = VSX_REG_TYPE
;
2805 reg_class_to_reg_type
[(int)FLOAT_REGS
] = FPR_REG_TYPE
;
2806 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = ALTIVEC_REG_TYPE
;
2809 /* Precalculate the valid memory formats as well as the vector information,
2810 this must be set up before the rs6000_hard_regno_nregs_internal calls
2812 gcc_assert ((int)VECTOR_NONE
== 0);
2813 memset ((void *) &rs6000_vector_unit
[0], '\0', sizeof (rs6000_vector_unit
));
2814 memset ((void *) &rs6000_vector_mem
[0], '\0', sizeof (rs6000_vector_mem
));
2816 gcc_assert ((int)CODE_FOR_nothing
== 0);
2817 memset ((void *) ®_addr
[0], '\0', sizeof (reg_addr
));
2819 gcc_assert ((int)NO_REGS
== 0);
2820 memset ((void *) &rs6000_constraints
[0], '\0', sizeof (rs6000_constraints
));
2822 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2823 believes it can use native alignment or still uses 128-bit alignment. */
2824 if (TARGET_VSX
&& !TARGET_VSX_ALIGN_128
)
2835 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2836 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2837 if (TARGET_FLOAT128_TYPE
)
2839 rs6000_vector_mem
[KFmode
] = VECTOR_VSX
;
2840 rs6000_vector_align
[KFmode
] = 128;
2842 if (FLOAT128_IEEE_P (TFmode
))
2844 rs6000_vector_mem
[TFmode
] = VECTOR_VSX
;
2845 rs6000_vector_align
[TFmode
] = 128;
2849 /* V2DF mode, VSX only. */
2852 rs6000_vector_unit
[V2DFmode
] = VECTOR_VSX
;
2853 rs6000_vector_mem
[V2DFmode
] = VECTOR_VSX
;
2854 rs6000_vector_align
[V2DFmode
] = align64
;
2857 /* V4SF mode, either VSX or Altivec. */
2860 rs6000_vector_unit
[V4SFmode
] = VECTOR_VSX
;
2861 rs6000_vector_mem
[V4SFmode
] = VECTOR_VSX
;
2862 rs6000_vector_align
[V4SFmode
] = align32
;
2864 else if (TARGET_ALTIVEC
)
2866 rs6000_vector_unit
[V4SFmode
] = VECTOR_ALTIVEC
;
2867 rs6000_vector_mem
[V4SFmode
] = VECTOR_ALTIVEC
;
2868 rs6000_vector_align
[V4SFmode
] = align32
;
2871 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2875 rs6000_vector_unit
[V4SImode
] = VECTOR_ALTIVEC
;
2876 rs6000_vector_unit
[V8HImode
] = VECTOR_ALTIVEC
;
2877 rs6000_vector_unit
[V16QImode
] = VECTOR_ALTIVEC
;
2878 rs6000_vector_align
[V4SImode
] = align32
;
2879 rs6000_vector_align
[V8HImode
] = align32
;
2880 rs6000_vector_align
[V16QImode
] = align32
;
2884 rs6000_vector_mem
[V4SImode
] = VECTOR_VSX
;
2885 rs6000_vector_mem
[V8HImode
] = VECTOR_VSX
;
2886 rs6000_vector_mem
[V16QImode
] = VECTOR_VSX
;
2890 rs6000_vector_mem
[V4SImode
] = VECTOR_ALTIVEC
;
2891 rs6000_vector_mem
[V8HImode
] = VECTOR_ALTIVEC
;
2892 rs6000_vector_mem
[V16QImode
] = VECTOR_ALTIVEC
;
2896 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2897 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2900 rs6000_vector_mem
[V2DImode
] = VECTOR_VSX
;
2901 rs6000_vector_unit
[V2DImode
]
2902 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2903 rs6000_vector_align
[V2DImode
] = align64
;
2905 rs6000_vector_mem
[V1TImode
] = VECTOR_VSX
;
2906 rs6000_vector_unit
[V1TImode
]
2907 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2908 rs6000_vector_align
[V1TImode
] = 128;
2911 /* DFmode, see if we want to use the VSX unit. Memory is handled
2912 differently, so don't set rs6000_vector_mem. */
2915 rs6000_vector_unit
[DFmode
] = VECTOR_VSX
;
2916 rs6000_vector_align
[DFmode
] = 64;
2919 /* SFmode, see if we want to use the VSX unit. */
2920 if (TARGET_P8_VECTOR
)
2922 rs6000_vector_unit
[SFmode
] = VECTOR_VSX
;
2923 rs6000_vector_align
[SFmode
] = 32;
2926 /* Allow TImode in VSX register and set the VSX memory macros. */
2929 rs6000_vector_mem
[TImode
] = VECTOR_VSX
;
2930 rs6000_vector_align
[TImode
] = align64
;
2933 /* Add support for vector pairs and vector quad registers. */
2936 rs6000_vector_unit
[OOmode
] = VECTOR_NONE
;
2937 rs6000_vector_mem
[OOmode
] = VECTOR_VSX
;
2938 rs6000_vector_align
[OOmode
] = 256;
2940 rs6000_vector_unit
[XOmode
] = VECTOR_NONE
;
2941 rs6000_vector_mem
[XOmode
] = VECTOR_VSX
;
2942 rs6000_vector_align
[XOmode
] = 512;
2945 /* Register class constraints for the constraints that depend on compile
2946 switches. When the VSX code was added, different constraints were added
2947 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2948 of the VSX registers are used. The register classes for scalar floating
2949 point types is set, based on whether we allow that type into the upper
2950 (Altivec) registers. GCC has register classes to target the Altivec
2951 registers for load/store operations, to select using a VSX memory
2952 operation instead of the traditional floating point operation. The
2955 d - Register class to use with traditional DFmode instructions.
2956 v - Altivec register.
2957 wa - Any VSX register.
2958 wc - Reserved to represent individual CR bits (used in LLVM).
2959 wn - always NO_REGS.
2960 wr - GPR if 64-bit mode is permitted.
2961 wx - Float register if we can do 32-bit int stores. */
2963 if (TARGET_HARD_FLOAT
)
2964 rs6000_constraints
[RS6000_CONSTRAINT_d
] = FLOAT_REGS
;
2966 rs6000_constraints
[RS6000_CONSTRAINT_v
] = ALTIVEC_REGS
;
2968 rs6000_constraints
[RS6000_CONSTRAINT_wa
] = VSX_REGS
;
2970 if (TARGET_POWERPC64
)
2972 rs6000_constraints
[RS6000_CONSTRAINT_wr
] = GENERAL_REGS
;
2973 rs6000_constraints
[RS6000_CONSTRAINT_wA
] = BASE_REGS
;
2977 rs6000_constraints
[RS6000_CONSTRAINT_wx
] = FLOAT_REGS
; /* DImode */
2979 /* Support for new direct moves (ISA 3.0 + 64bit). */
2980 if (TARGET_DIRECT_MOVE_128
)
2981 rs6000_constraints
[RS6000_CONSTRAINT_we
] = VSX_REGS
;
2983 /* Set up the reload helper and direct move functions. */
2984 if (TARGET_VSX
|| TARGET_ALTIVEC
)
2988 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_di_store
;
2989 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_di_load
;
2990 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_di_store
;
2991 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_di_load
;
2992 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_di_store
;
2993 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_di_load
;
2994 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_di_store
;
2995 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_di_load
;
2996 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_di_store
;
2997 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_di_load
;
2998 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_di_store
;
2999 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_di_load
;
3000 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_di_store
;
3001 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_di_load
;
3002 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_di_store
;
3003 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_di_load
;
3004 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_di_store
;
3005 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_di_load
;
3006 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_di_store
;
3007 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_di_load
;
3009 if (FLOAT128_VECTOR_P (KFmode
))
3011 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_di_store
;
3012 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_di_load
;
3015 if (FLOAT128_VECTOR_P (TFmode
))
3017 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_di_store
;
3018 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_di_load
;
3021 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3023 if (TARGET_NO_SDMODE_STACK
)
3025 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_di_store
;
3026 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_di_load
;
3031 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_di_store
;
3032 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_di_load
;
3035 if (TARGET_DIRECT_MOVE
&& !TARGET_DIRECT_MOVE_128
)
3037 reg_addr
[TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxti
;
3038 reg_addr
[V1TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv1ti
;
3039 reg_addr
[V2DFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2df
;
3040 reg_addr
[V2DImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2di
;
3041 reg_addr
[V4SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4sf
;
3042 reg_addr
[V4SImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4si
;
3043 reg_addr
[V8HImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv8hi
;
3044 reg_addr
[V16QImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv16qi
;
3045 reg_addr
[SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxsf
;
3047 reg_addr
[TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprti
;
3048 reg_addr
[V1TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv1ti
;
3049 reg_addr
[V2DFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2df
;
3050 reg_addr
[V2DImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2di
;
3051 reg_addr
[V4SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4sf
;
3052 reg_addr
[V4SImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4si
;
3053 reg_addr
[V8HImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv8hi
;
3054 reg_addr
[V16QImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv16qi
;
3055 reg_addr
[SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprsf
;
3057 if (FLOAT128_VECTOR_P (KFmode
))
3059 reg_addr
[KFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxkf
;
3060 reg_addr
[KFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprkf
;
3063 if (FLOAT128_VECTOR_P (TFmode
))
3065 reg_addr
[TFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxtf
;
3066 reg_addr
[TFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprtf
;
3071 reg_addr
[OOmode
].reload_store
= CODE_FOR_reload_oo_di_store
;
3072 reg_addr
[OOmode
].reload_load
= CODE_FOR_reload_oo_di_load
;
3073 reg_addr
[XOmode
].reload_store
= CODE_FOR_reload_xo_di_store
;
3074 reg_addr
[XOmode
].reload_load
= CODE_FOR_reload_xo_di_load
;
3080 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_si_store
;
3081 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_si_load
;
3082 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_si_store
;
3083 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_si_load
;
3084 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_si_store
;
3085 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_si_load
;
3086 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_si_store
;
3087 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_si_load
;
3088 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_si_store
;
3089 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_si_load
;
3090 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_si_store
;
3091 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_si_load
;
3092 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_si_store
;
3093 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_si_load
;
3094 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_si_store
;
3095 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_si_load
;
3096 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_si_store
;
3097 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_si_load
;
3098 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_si_store
;
3099 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_si_load
;
3101 if (FLOAT128_VECTOR_P (KFmode
))
3103 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_si_store
;
3104 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_si_load
;
3107 if (FLOAT128_IEEE_P (TFmode
))
3109 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_si_store
;
3110 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_si_load
;
3113 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3115 if (TARGET_NO_SDMODE_STACK
)
3117 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_si_store
;
3118 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_si_load
;
3123 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_si_store
;
3124 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_si_load
;
3127 if (TARGET_DIRECT_MOVE
)
3129 reg_addr
[DImode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdi
;
3130 reg_addr
[DDmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdd
;
3131 reg_addr
[DFmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdf
;
3135 reg_addr
[DFmode
].scalar_in_vmx_p
= true;
3136 reg_addr
[DImode
].scalar_in_vmx_p
= true;
3138 if (TARGET_P8_VECTOR
)
3140 reg_addr
[SFmode
].scalar_in_vmx_p
= true;
3141 reg_addr
[SImode
].scalar_in_vmx_p
= true;
3143 if (TARGET_P9_VECTOR
)
3145 reg_addr
[HImode
].scalar_in_vmx_p
= true;
3146 reg_addr
[QImode
].scalar_in_vmx_p
= true;
3151 /* Precalculate HARD_REGNO_NREGS. */
3152 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3153 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3154 rs6000_hard_regno_nregs
[m
][r
]
3155 = rs6000_hard_regno_nregs_internal (r
, (machine_mode
) m
);
3157 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3158 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3159 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3160 rs6000_hard_regno_mode_ok_p
[m
][r
]
3161 = rs6000_hard_regno_mode_ok_uncached (r
, (machine_mode
) m
);
3163 /* Precalculate CLASS_MAX_NREGS sizes. */
3164 for (c
= 0; c
< LIM_REG_CLASSES
; ++c
)
3168 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
))
3169 reg_size
= UNITS_PER_VSX_WORD
;
3171 else if (c
== ALTIVEC_REGS
)
3172 reg_size
= UNITS_PER_ALTIVEC_WORD
;
3174 else if (c
== FLOAT_REGS
)
3175 reg_size
= UNITS_PER_FP_WORD
;
3178 reg_size
= UNITS_PER_WORD
;
3180 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3182 machine_mode m2
= (machine_mode
)m
;
3183 int reg_size2
= reg_size
;
3185 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3187 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
) && FLOAT128_2REG_P (m
))
3188 reg_size2
= UNITS_PER_FP_WORD
;
3190 rs6000_class_max_nregs
[m
][c
]
3191 = (GET_MODE_SIZE (m2
) + reg_size2
- 1) / reg_size2
;
3195 /* Calculate which modes to automatically generate code to use a the
3196 reciprocal divide and square root instructions. In the future, possibly
3197 automatically generate the instructions even if the user did not specify
3198 -mrecip. The older machines double precision reciprocal sqrt estimate is
3199 not accurate enough. */
3200 memset (rs6000_recip_bits
, 0, sizeof (rs6000_recip_bits
));
3202 rs6000_recip_bits
[SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3204 rs6000_recip_bits
[DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3205 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3206 rs6000_recip_bits
[V4SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3207 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3208 rs6000_recip_bits
[V2DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3210 if (TARGET_FRSQRTES
)
3211 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3213 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3214 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3215 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3216 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3217 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3219 if (rs6000_recip_control
)
3221 if (!flag_finite_math_only
)
3222 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3224 if (flag_trapping_math
)
3225 warning (0, "%qs requires %qs or %qs", "-mrecip",
3226 "-fno-trapping-math", "-ffast-math");
3227 if (!flag_reciprocal_math
)
3228 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3230 if (flag_finite_math_only
&& !flag_trapping_math
&& flag_reciprocal_math
)
3232 if (RS6000_RECIP_HAVE_RE_P (SFmode
)
3233 && (rs6000_recip_control
& RECIP_SF_DIV
) != 0)
3234 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3236 if (RS6000_RECIP_HAVE_RE_P (DFmode
)
3237 && (rs6000_recip_control
& RECIP_DF_DIV
) != 0)
3238 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3240 if (RS6000_RECIP_HAVE_RE_P (V4SFmode
)
3241 && (rs6000_recip_control
& RECIP_V4SF_DIV
) != 0)
3242 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3244 if (RS6000_RECIP_HAVE_RE_P (V2DFmode
)
3245 && (rs6000_recip_control
& RECIP_V2DF_DIV
) != 0)
3246 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3248 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode
)
3249 && (rs6000_recip_control
& RECIP_SF_RSQRT
) != 0)
3250 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3252 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode
)
3253 && (rs6000_recip_control
& RECIP_DF_RSQRT
) != 0)
3254 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3256 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode
)
3257 && (rs6000_recip_control
& RECIP_V4SF_RSQRT
) != 0)
3258 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3260 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode
)
3261 && (rs6000_recip_control
& RECIP_V2DF_RSQRT
) != 0)
3262 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3266 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3267 legitimate address support to figure out the appropriate addressing to
3269 rs6000_setup_reg_addr_masks ();
3271 if (global_init_p
|| TARGET_DEBUG_TARGET
)
3273 if (TARGET_DEBUG_REG
)
3274 rs6000_debug_reg_global ();
3276 if (TARGET_DEBUG_COST
|| TARGET_DEBUG_REG
)
3278 "SImode variable mult cost = %d\n"
3279 "SImode constant mult cost = %d\n"
3280 "SImode short constant mult cost = %d\n"
3281 "DImode multipliciation cost = %d\n"
3282 "SImode division cost = %d\n"
3283 "DImode division cost = %d\n"
3284 "Simple fp operation cost = %d\n"
3285 "DFmode multiplication cost = %d\n"
3286 "SFmode division cost = %d\n"
3287 "DFmode division cost = %d\n"
3288 "cache line size = %d\n"
3289 "l1 cache size = %d\n"
3290 "l2 cache size = %d\n"
3291 "simultaneous prefetches = %d\n"
3294 rs6000_cost
->mulsi_const
,
3295 rs6000_cost
->mulsi_const9
,
3303 rs6000_cost
->cache_line_size
,
3304 rs6000_cost
->l1_cache_size
,
3305 rs6000_cost
->l2_cache_size
,
3306 rs6000_cost
->simultaneous_prefetches
);
3311 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3314 darwin_rs6000_override_options (void)
3316 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3318 rs6000_altivec_abi
= 1;
3319 TARGET_ALTIVEC_VRSAVE
= 1;
3320 rs6000_current_abi
= ABI_DARWIN
;
3322 if (DEFAULT_ABI
== ABI_DARWIN
3324 darwin_one_byte_bool
= 1;
3326 if (TARGET_64BIT
&& ! TARGET_POWERPC64
)
3328 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3329 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3332 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3333 optimisation, and will not work with the most generic case (where the
3334 symbol is undefined external, but there is no symbl stub). */
3336 rs6000_default_long_calls
= 0;
3338 /* ld_classic is (so far) still used for kernel (static) code, and supports
3339 the JBSR longcall / branch islands. */
3342 rs6000_default_long_calls
= 1;
3344 /* Allow a kext author to do -mkernel -mhard-float. */
3345 if (! (rs6000_isa_flags_explicit
& OPTION_MASK_SOFT_FLOAT
))
3346 rs6000_isa_flags
|= OPTION_MASK_SOFT_FLOAT
;
3349 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3351 if (!flag_mkernel
&& !flag_apple_kext
3353 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3354 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3356 /* Unless the user (not the configurer) has explicitly overridden
3357 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3358 G4 unless targeting the kernel. */
3361 && strverscmp (darwin_macosx_version_min
, "10.5") >= 0
3362 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
)
3363 && ! OPTION_SET_P (rs6000_cpu_index
))
3365 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3370 /* If not otherwise specified by a target, make 'long double' equivalent to
3373 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3374 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3377 /* Return the builtin mask of the various options used that could affect which
3378 builtins were used. In the past we used target_flags, but we've run out of
3379 bits, and some options are no longer in target_flags. */
3382 rs6000_builtin_mask_calculate (void)
3384 return (((TARGET_ALTIVEC
) ? OPTION_MASK_ALTIVEC
: 0)
3385 | ((TARGET_CMPB
) ? OPTION_MASK_CMPB
: 0)
3386 | ((TARGET_VSX
) ? OPTION_MASK_VSX
: 0)
3387 | ((TARGET_FRE
) ? OPTION_MASK_POPCNTB
: 0)
3388 | ((TARGET_FRES
) ? OPTION_MASK_PPC_GFXOPT
: 0)
3389 | ((TARGET_FRSQRTE
) ? OPTION_MASK_PPC_GFXOPT
: 0)
3390 | ((TARGET_FRSQRTES
) ? OPTION_MASK_POPCNTB
: 0)
3391 | ((TARGET_POPCNTD
) ? OPTION_MASK_POPCNTD
: 0)
3392 | ((rs6000_cpu
== PROCESSOR_CELL
) ? OPTION_MASK_FPRND
: 0)
3393 | ((TARGET_P8_VECTOR
) ? OPTION_MASK_P8_VECTOR
: 0)
3394 | ((TARGET_P9_VECTOR
) ? OPTION_MASK_P9_VECTOR
: 0)
3395 | ((TARGET_P9_MISC
) ? OPTION_MASK_P9_MISC
: 0)
3396 | ((TARGET_MODULO
) ? OPTION_MASK_MODULO
: 0)
3397 | ((TARGET_64BIT
) ? MASK_64BIT
: 0)
3398 | ((TARGET_POWERPC64
) ? MASK_POWERPC64
: 0)
3399 | ((TARGET_CRYPTO
) ? OPTION_MASK_CRYPTO
: 0)
3400 | ((TARGET_HTM
) ? OPTION_MASK_HTM
: 0)
3401 | ((TARGET_DFP
) ? OPTION_MASK_DFP
: 0)
3402 | ((TARGET_HARD_FLOAT
) ? OPTION_MASK_SOFT_FLOAT
: 0)
3403 | ((TARGET_LONG_DOUBLE_128
3404 && TARGET_HARD_FLOAT
3405 && !TARGET_IEEEQUAD
) ? OPTION_MASK_MULTIPLE
: 0)
3406 | ((TARGET_FLOAT128_TYPE
) ? OPTION_MASK_FLOAT128_KEYWORD
: 0)
3407 | ((TARGET_FLOAT128_HW
) ? OPTION_MASK_FLOAT128_HW
: 0)
3408 | ((TARGET_MMA
) ? OPTION_MASK_MMA
: 0)
3409 | ((TARGET_POWER10
) ? OPTION_MASK_POWER10
: 0));
3412 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3413 to clobber the XER[CA] bit because clobbering that bit without telling
3414 the compiler worked just fine with versions of GCC before GCC 5, and
3415 breaking a lot of older code in ways that are hard to track down is
3416 not such a great idea. */
3419 rs6000_md_asm_adjust (vec
<rtx
> & /*outputs*/, vec
<rtx
> & /*inputs*/,
3420 vec
<machine_mode
> & /*input_modes*/,
3421 vec
<const char *> & /*constraints*/, vec
<rtx
> &clobbers
,
3422 HARD_REG_SET
&clobbered_regs
, location_t
/*loc*/)
3424 clobbers
.safe_push (gen_rtx_REG (SImode
, CA_REGNO
));
3425 SET_HARD_REG_BIT (clobbered_regs
, CA_REGNO
);
3429 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3430 but is called when the optimize level is changed via an attribute or
3431 pragma or when it is reset at the end of the code affected by the
3432 attribute or pragma. It is not called at the beginning of compilation
3433 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3434 actions then, you should have TARGET_OPTION_OVERRIDE call
3435 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3438 rs6000_override_options_after_change (void)
3440 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3441 turns -frename-registers on. */
3442 if ((OPTION_SET_P (flag_unroll_loops
) && flag_unroll_loops
)
3443 || (OPTION_SET_P (flag_unroll_all_loops
)
3444 && flag_unroll_all_loops
))
3446 if (!OPTION_SET_P (unroll_only_small_loops
))
3447 unroll_only_small_loops
= 0;
3448 if (!OPTION_SET_P (flag_rename_registers
))
3449 flag_rename_registers
= 1;
3450 if (!OPTION_SET_P (flag_cunroll_grow_size
))
3451 flag_cunroll_grow_size
= 1;
3453 else if (!OPTION_SET_P (flag_cunroll_grow_size
))
3454 flag_cunroll_grow_size
= flag_peel_loops
|| optimize
>= 3;
3456 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3457 if (rs6000_rop_protect
)
3458 flag_shrink_wrap
= 0;
3461 #ifdef TARGET_USES_LINUX64_OPT
3463 rs6000_linux64_override_options ()
3465 if (!OPTION_SET_P (rs6000_alignment_flags
))
3466 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
3467 if (rs6000_isa_flags
& OPTION_MASK_64BIT
)
3469 if (DEFAULT_ABI
!= ABI_AIX
)
3471 rs6000_current_abi
= ABI_AIX
;
3472 error (INVALID_64BIT
, "call");
3474 dot_symbols
= !strcmp (rs6000_abi_name
, "aixdesc");
3475 if (ELFv2_ABI_CHECK
)
3477 rs6000_current_abi
= ABI_ELFv2
;
3479 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3481 if (rs6000_isa_flags
& OPTION_MASK_RELOCATABLE
)
3483 rs6000_isa_flags
&= ~OPTION_MASK_RELOCATABLE
;
3484 error (INVALID_64BIT
, "relocatable");
3486 if (rs6000_isa_flags
& OPTION_MASK_EABI
)
3488 rs6000_isa_flags
&= ~OPTION_MASK_EABI
;
3489 error (INVALID_64BIT
, "eabi");
3491 if (TARGET_PROTOTYPE
)
3493 target_prototype
= 0;
3494 error (INVALID_64BIT
, "prototype");
3496 if ((rs6000_isa_flags
& OPTION_MASK_POWERPC64
) == 0)
3498 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3499 error ("%<-m64%> requires a PowerPC64 cpu");
3501 if (!OPTION_SET_P (rs6000_current_cmodel
))
3502 SET_CMODEL (CMODEL_MEDIUM
);
3503 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MINIMAL_TOC
) != 0)
3505 if (OPTION_SET_P (rs6000_current_cmodel
)
3506 && rs6000_current_cmodel
!= CMODEL_SMALL
)
3507 error ("%<-mcmodel%> incompatible with other toc options");
3508 if (TARGET_MINIMAL_TOC
)
3509 SET_CMODEL (CMODEL_SMALL
);
3510 else if (TARGET_PCREL
3511 || (PCREL_SUPPORTED_BY_OS
3512 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0))
3513 /* Ignore -mno-minimal-toc. */
3516 SET_CMODEL (CMODEL_SMALL
);
3518 if (rs6000_current_cmodel
!= CMODEL_SMALL
)
3520 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
3521 TARGET_NO_FP_IN_TOC
= rs6000_current_cmodel
== CMODEL_MEDIUM
;
3522 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC
))
3523 TARGET_NO_SUM_IN_TOC
= 0;
3525 if (TARGET_PLTSEQ
&& DEFAULT_ABI
!= ABI_ELFv2
)
3527 if (OPTION_SET_P (rs6000_pltseq
))
3528 warning (0, "%qs unsupported for this ABI",
3530 rs6000_pltseq
= false;
3533 else if (TARGET_64BIT
)
3534 error (INVALID_32BIT
, "32");
3537 if (TARGET_PROFILE_KERNEL
)
3540 error (INVALID_32BIT
, "profile-kernel");
3542 if (OPTION_SET_P (rs6000_current_cmodel
))
3544 SET_CMODEL (CMODEL_SMALL
);
3545 error (INVALID_32BIT
, "cmodel");
3551 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3552 This support is only in little endian GLIBC 2.32 or newer. */
3554 glibc_supports_ieee_128bit (void)
3557 if (OPTION_GLIBC
&& !BYTES_BIG_ENDIAN
3558 && ((TARGET_GLIBC_MAJOR
* 1000) + TARGET_GLIBC_MINOR
) >= 2032)
3560 #endif /* OPTION_GLIBC. */
3565 /* Override command line options.
3567 Combine build-specific configuration information with options
3568 specified on the command line to set various state variables which
3569 influence code generation, optimization, and expansion of built-in
3570 functions. Assure that command-line configuration preferences are
3571 compatible with each other and with the build configuration; issue
3572 warnings while adjusting configuration or error messages while
3573 rejecting configuration.
3575 Upon entry to this function:
3577 This function is called once at the beginning of
3578 compilation, and then again at the start and end of compiling
3579 each section of code that has a different configuration, as
3580 indicated, for example, by adding the
3582 __attribute__((__target__("cpu=power9")))
3584 qualifier to a function definition or, for example, by bracketing
3587 #pragma GCC target("altivec")
3591 #pragma GCC reset_options
3593 directives. Parameter global_init_p is true for the initial
3594 invocation, which initializes global variables, and false for all
3595 subsequent invocations.
3598 Various global state information is assumed to be valid. This
3599 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3600 default CPU specified at build configure time, TARGET_DEFAULT,
3601 representing the default set of option flags for the default
3602 target, and OPTION_SET_P (rs6000_isa_flags), representing
3603 which options were requested on the command line.
3605 Upon return from this function:
3607 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3608 was set by name on the command line. Additionally, if certain
3609 attributes are automatically enabled or disabled by this function
3610 in order to assure compatibility between options and
3611 configuration, the flags associated with those attributes are
3612 also set. By setting these "explicit bits", we avoid the risk
3613 that other code might accidentally overwrite these particular
3614 attributes with "default values".
3616 The various bits of rs6000_isa_flags are set to indicate the
3617 target options that have been selected for the most current
3618 compilation efforts. This has the effect of also turning on the
3619 associated TARGET_XXX values since these are macros which are
3620 generally defined to test the corresponding bit of the
3621 rs6000_isa_flags variable.
3623 The variable rs6000_builtin_mask is set to represent the target
3624 options for the most current compilation efforts, consistent with
3625 the current contents of rs6000_isa_flags. This variable controls
3626 expansion of built-in functions.
3628 Various other global variables and fields of global structures
3629 (over 50 in all) are initialized to reflect the desired options
3630 for the most current compilation efforts. */
3633 rs6000_option_override_internal (bool global_init_p
)
3637 HOST_WIDE_INT set_masks
;
3638 HOST_WIDE_INT ignore_masks
;
3641 struct cl_target_option
*main_target_opt
3642 = ((global_init_p
|| target_option_default_node
== NULL
)
3643 ? NULL
: TREE_TARGET_OPTION (target_option_default_node
));
3645 /* Print defaults. */
3646 if ((TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
) && global_init_p
)
3647 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
3649 /* Remember the explicit arguments. */
3651 rs6000_isa_flags_explicit
= OPTION_SET_P (rs6000_isa_flags
);
3653 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3654 library functions, so warn about it. The flag may be useful for
3655 performance studies from time to time though, so don't disable it
3657 if (OPTION_SET_P (rs6000_alignment_flags
)
3658 && rs6000_alignment_flags
== MASK_ALIGN_POWER
3659 && DEFAULT_ABI
== ABI_DARWIN
3661 warning (0, "%qs is not supported for 64-bit Darwin;"
3662 " it is incompatible with the installed C and C++ libraries",
3665 /* Numerous experiment shows that IRA based loop pressure
3666 calculation works better for RTL loop invariant motion on targets
3667 with enough (>= 32) registers. It is an expensive optimization.
3668 So it is on only for peak performance. */
3669 if (optimize
>= 3 && global_init_p
3670 && !OPTION_SET_P (flag_ira_loop_pressure
))
3671 flag_ira_loop_pressure
= 1;
3673 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3674 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3675 options were already specified. */
3676 if (flag_sanitize
& SANITIZE_USER_ADDRESS
3677 && !OPTION_SET_P (flag_asynchronous_unwind_tables
))
3678 flag_asynchronous_unwind_tables
= 1;
3680 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3681 loop unroller is active. It is only checked during unrolling, so
3682 we can just set it on by default. */
3683 if (!OPTION_SET_P (flag_variable_expansion_in_unroller
))
3684 flag_variable_expansion_in_unroller
= 1;
3686 /* Set the pointer size. */
3689 rs6000_pmode
= DImode
;
3690 rs6000_pointer_size
= 64;
3694 rs6000_pmode
= SImode
;
3695 rs6000_pointer_size
= 32;
3698 /* Some OSs don't support saving the high part of 64-bit registers on context
3699 switch. Other OSs don't support saving Altivec registers. On those OSs,
3700 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3701 if the user wants either, the user must explicitly specify them and we
3702 won't interfere with the user's specification. */
3704 set_masks
= POWERPC_MASKS
;
3705 #ifdef OS_MISSING_POWERPC64
3706 if (OS_MISSING_POWERPC64
)
3707 set_masks
&= ~OPTION_MASK_POWERPC64
;
3709 #ifdef OS_MISSING_ALTIVEC
3710 if (OS_MISSING_ALTIVEC
)
3711 set_masks
&= ~(OPTION_MASK_ALTIVEC
| OPTION_MASK_VSX
3712 | OTHER_VSX_VECTOR_MASKS
);
3715 /* Don't override by the processor default if given explicitly. */
3716 set_masks
&= ~rs6000_isa_flags_explicit
;
3718 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3719 the cpu in a target attribute or pragma, but did not specify a tuning
3720 option, use the cpu for the tuning option rather than the option specified
3721 with -mtune on the command line. Process a '--with-cpu' configuration
3722 request as an implicit --cpu. */
3723 if (rs6000_cpu_index
>= 0)
3724 cpu_index
= rs6000_cpu_index
;
3725 else if (main_target_opt
!= NULL
&& main_target_opt
->x_rs6000_cpu_index
>= 0)
3726 cpu_index
= main_target_opt
->x_rs6000_cpu_index
;
3727 else if (OPTION_TARGET_CPU_DEFAULT
)
3728 cpu_index
= rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT
);
3730 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3731 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3732 with those from the cpu, except for options that were explicitly set. If
3733 we don't have a cpu, do not override the target bits set in
3737 rs6000_cpu_index
= cpu_index
;
3738 rs6000_isa_flags
&= ~set_masks
;
3739 rs6000_isa_flags
|= (processor_target_table
[cpu_index
].target_enable
3744 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3745 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3746 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3747 to using rs6000_isa_flags, we need to do the initialization here.
3749 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3750 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3751 HOST_WIDE_INT flags
;
3753 flags
= TARGET_DEFAULT
;
3756 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3757 const char *default_cpu
= (!TARGET_POWERPC64
3762 int default_cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
3763 flags
= processor_target_table
[default_cpu_index
].target_enable
;
3765 rs6000_isa_flags
|= (flags
& ~rs6000_isa_flags_explicit
);
3768 if (rs6000_tune_index
>= 0)
3769 tune_index
= rs6000_tune_index
;
3770 else if (cpu_index
>= 0)
3771 rs6000_tune_index
= tune_index
= cpu_index
;
3775 enum processor_type tune_proc
3776 = (TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
);
3779 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
3780 if (processor_target_table
[i
].processor
== tune_proc
)
3788 rs6000_cpu
= processor_target_table
[cpu_index
].processor
;
3790 rs6000_cpu
= TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
;
3792 gcc_assert (tune_index
>= 0);
3793 rs6000_tune
= processor_target_table
[tune_index
].processor
;
3795 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
3796 || rs6000_cpu
== PROCESSOR_PPCE500MC
|| rs6000_cpu
== PROCESSOR_PPCE500MC64
3797 || rs6000_cpu
== PROCESSOR_PPCE5500
)
3800 error ("AltiVec not supported in this target");
3803 /* If we are optimizing big endian systems for space, use the load/store
3804 multiple instructions. */
3805 if (BYTES_BIG_ENDIAN
&& optimize_size
)
3806 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
;
3808 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3809 because the hardware doesn't support the instructions used in little
3810 endian mode, and causes an alignment trap. The 750 does not cause an
3811 alignment trap (except when the target is unaligned). */
3813 if (!BYTES_BIG_ENDIAN
&& rs6000_cpu
!= PROCESSOR_PPC750
&& TARGET_MULTIPLE
)
3815 rs6000_isa_flags
&= ~OPTION_MASK_MULTIPLE
;
3816 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
) != 0)
3817 warning (0, "%qs is not supported on little endian systems",
3821 /* If little-endian, default to -mstrict-align on older processors.
3822 Testing for direct_move matches power8 and later. */
3823 if (!BYTES_BIG_ENDIAN
3824 && !(processor_target_table
[tune_index
].target_enable
3825 & OPTION_MASK_DIRECT_MOVE
))
3826 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_STRICT_ALIGN
;
3828 /* Add some warnings for VSX. */
3831 const char *msg
= NULL
;
3832 if (!TARGET_HARD_FLOAT
)
3834 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3835 msg
= N_("%<-mvsx%> requires hardware floating point");
3838 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3839 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3842 else if (TARGET_AVOID_XFORM
> 0)
3843 msg
= N_("%<-mvsx%> needs indexed addressing");
3844 else if (!TARGET_ALTIVEC
&& (rs6000_isa_flags_explicit
3845 & OPTION_MASK_ALTIVEC
))
3847 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3848 msg
= N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3850 msg
= N_("%<-mno-altivec%> disables vsx");
3856 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3857 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3861 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3862 the -mcpu setting to enable options that conflict. */
3863 if ((!TARGET_HARD_FLOAT
|| !TARGET_ALTIVEC
|| !TARGET_VSX
)
3864 && (rs6000_isa_flags_explicit
& (OPTION_MASK_SOFT_FLOAT
3865 | OPTION_MASK_ALTIVEC
3866 | OPTION_MASK_VSX
)) != 0)
3867 rs6000_isa_flags
&= ~((OPTION_MASK_P8_VECTOR
| OPTION_MASK_CRYPTO
3868 | OPTION_MASK_DIRECT_MOVE
)
3869 & ~rs6000_isa_flags_explicit
);
3871 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
3872 rs6000_print_isa_options (stderr
, 0, "before defaults", rs6000_isa_flags
);
3874 #ifdef XCOFF_DEBUGGING_INFO
3875 /* For AIX default to 64-bit DWARF. */
3876 if (!OPTION_SET_P (dwarf_offset_size
))
3877 dwarf_offset_size
= POINTER_SIZE_UNITS
;
3880 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3881 off all of the options that depend on those flags. */
3882 ignore_masks
= rs6000_disable_incompatible_switches ();
3884 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3885 unless the user explicitly used the -mno-<option> to disable the code. */
3886 if (TARGET_P9_VECTOR
|| TARGET_MODULO
|| TARGET_P9_MISC
)
3887 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3888 else if (TARGET_P9_MINMAX
)
3892 if (cpu_index
== PROCESSOR_POWER9
)
3894 /* legacy behavior: allow -mcpu=power9 with certain
3895 capabilities explicitly disabled. */
3896 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3899 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3900 "for <xxx> less than power9", "-mcpu");
3902 else if ((ISA_3_0_MASKS_SERVER
& rs6000_isa_flags_explicit
)
3903 != (ISA_3_0_MASKS_SERVER
& rs6000_isa_flags
3904 & rs6000_isa_flags_explicit
))
3905 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3906 were explicitly cleared. */
3907 error ("%qs incompatible with explicitly disabled options",
3910 rs6000_isa_flags
|= ISA_3_0_MASKS_SERVER
;
3912 else if (TARGET_P8_VECTOR
|| TARGET_DIRECT_MOVE
|| TARGET_CRYPTO
)
3913 rs6000_isa_flags
|= (ISA_2_7_MASKS_SERVER
& ~ignore_masks
);
3914 else if (TARGET_VSX
)
3915 rs6000_isa_flags
|= (ISA_2_6_MASKS_SERVER
& ~ignore_masks
);
3916 else if (TARGET_POPCNTD
)
3917 rs6000_isa_flags
|= (ISA_2_6_MASKS_EMBEDDED
& ~ignore_masks
);
3918 else if (TARGET_DFP
)
3919 rs6000_isa_flags
|= (ISA_2_5_MASKS_SERVER
& ~ignore_masks
);
3920 else if (TARGET_CMPB
)
3921 rs6000_isa_flags
|= (ISA_2_5_MASKS_EMBEDDED
& ~ignore_masks
);
3922 else if (TARGET_FPRND
)
3923 rs6000_isa_flags
|= (ISA_2_4_MASKS
& ~ignore_masks
);
3924 else if (TARGET_POPCNTB
)
3925 rs6000_isa_flags
|= (ISA_2_2_MASKS
& ~ignore_masks
);
3926 else if (TARGET_ALTIVEC
)
3927 rs6000_isa_flags
|= (OPTION_MASK_PPC_GFXOPT
& ~ignore_masks
);
3929 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3930 target attribute or pragma which automatically enables both options,
3931 unless the altivec ABI was set. This is set by default for 64-bit, but
3932 not for 32-bit. Don't move this before the above code using ignore_masks,
3933 since it can reset the cleared VSX/ALTIVEC flag again. */
3934 if (main_target_opt
&& !main_target_opt
->x_rs6000_altivec_abi
)
3935 rs6000_isa_flags
&= ~((OPTION_MASK_VSX
| OPTION_MASK_ALTIVEC
)
3936 & ~rs6000_isa_flags_explicit
);
3938 if (TARGET_CRYPTO
&& !TARGET_ALTIVEC
)
3940 if (rs6000_isa_flags_explicit
& OPTION_MASK_CRYPTO
)
3941 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3942 rs6000_isa_flags
&= ~OPTION_MASK_CRYPTO
;
3945 if (!TARGET_FPRND
&& TARGET_VSX
)
3947 if (rs6000_isa_flags_explicit
& OPTION_MASK_FPRND
)
3948 /* TARGET_VSX = 1 implies Power 7 and newer */
3949 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3950 rs6000_isa_flags
&= ~OPTION_MASK_FPRND
;
3953 if (TARGET_DIRECT_MOVE
&& !TARGET_VSX
)
3955 if (rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
3956 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3957 rs6000_isa_flags
&= ~OPTION_MASK_DIRECT_MOVE
;
3960 if (TARGET_P8_VECTOR
&& !TARGET_ALTIVEC
)
3962 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3963 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3964 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3967 if (TARGET_P8_VECTOR
&& !TARGET_VSX
)
3969 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3970 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
))
3971 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3972 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
) == 0)
3974 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3975 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3976 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
3980 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3982 rs6000_isa_flags
|= OPTION_MASK_VSX
;
3983 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3987 if (TARGET_DFP
&& !TARGET_HARD_FLOAT
)
3989 if (rs6000_isa_flags_explicit
& OPTION_MASK_DFP
)
3990 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3991 rs6000_isa_flags
&= ~OPTION_MASK_DFP
;
3994 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3995 silently turn off quad memory mode. */
3996 if ((TARGET_QUAD_MEMORY
|| TARGET_QUAD_MEMORY_ATOMIC
) && !TARGET_POWERPC64
)
3998 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3999 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
4001 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) != 0)
4002 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
4004 rs6000_isa_flags
&= ~(OPTION_MASK_QUAD_MEMORY
4005 | OPTION_MASK_QUAD_MEMORY_ATOMIC
);
4008 /* Non-atomic quad memory load/store are disabled for little endian, since
4009 the words are reversed, but atomic operations can still be done by
4010 swapping the words. */
4011 if (TARGET_QUAD_MEMORY
&& !WORDS_BIG_ENDIAN
)
4013 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
4014 warning (0, N_("%<-mquad-memory%> is not available in little endian "
4017 rs6000_isa_flags
&= ~OPTION_MASK_QUAD_MEMORY
;
4020 /* Assume if the user asked for normal quad memory instructions, they want
4021 the atomic versions as well, unless they explicity told us not to use quad
4022 word atomic instructions. */
4023 if (TARGET_QUAD_MEMORY
4024 && !TARGET_QUAD_MEMORY_ATOMIC
4025 && ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) == 0))
4026 rs6000_isa_flags
|= OPTION_MASK_QUAD_MEMORY_ATOMIC
;
4028 /* If we can shrink-wrap the TOC register save separately, then use
4029 -msave-toc-indirect unless explicitly disabled. */
4030 if ((rs6000_isa_flags_explicit
& OPTION_MASK_SAVE_TOC_INDIRECT
) == 0
4031 && flag_shrink_wrap_separate
4032 && optimize_function_for_speed_p (cfun
))
4033 rs6000_isa_flags
|= OPTION_MASK_SAVE_TOC_INDIRECT
;
4035 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4036 generating power8 instructions. Power9 does not optimize power8 fusion
4038 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
))
4040 if (processor_target_table
[tune_index
].processor
== PROCESSOR_POWER8
)
4041 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4043 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4046 /* Setting additional fusion flags turns on base fusion. */
4047 if (!TARGET_P8_FUSION
&& TARGET_P8_FUSION_SIGN
)
4049 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4051 if (TARGET_P8_FUSION_SIGN
)
4052 error ("%qs requires %qs", "-mpower8-fusion-sign",
4055 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4058 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4061 /* Power8 does not fuse sign extended loads with the addis. If we are
4062 optimizing at high levels for speed, convert a sign extended load into a
4063 zero extending load, and an explicit sign extension. */
4064 if (TARGET_P8_FUSION
4065 && !(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION_SIGN
)
4066 && optimize_function_for_speed_p (cfun
)
4068 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION_SIGN
;
4070 /* ISA 3.0 vector instructions include ISA 2.07. */
4071 if (TARGET_P9_VECTOR
&& !TARGET_P8_VECTOR
)
4073 /* We prefer to not mention undocumented options in
4074 error messages. However, if users have managed to select
4075 power9-vector without selecting power8-vector, they
4076 already know about undocumented flags. */
4077 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) &&
4078 (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
))
4079 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4080 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) == 0)
4082 rs6000_isa_flags
&= ~OPTION_MASK_P9_VECTOR
;
4083 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4084 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4088 /* OPTION_MASK_P9_VECTOR is explicit and
4089 OPTION_MASK_P8_VECTOR is not explicit. */
4090 rs6000_isa_flags
|= OPTION_MASK_P8_VECTOR
;
4091 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4095 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4096 support. If we only have ISA 2.06 support, and the user did not specify
4097 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4098 but we don't enable the full vectorization support */
4099 if (TARGET_ALLOW_MOVMISALIGN
== -1 && TARGET_P8_VECTOR
&& TARGET_DIRECT_MOVE
)
4100 TARGET_ALLOW_MOVMISALIGN
= 1;
4102 else if (TARGET_ALLOW_MOVMISALIGN
&& !TARGET_VSX
)
4104 if (TARGET_ALLOW_MOVMISALIGN
> 0
4105 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN
))
4106 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4108 TARGET_ALLOW_MOVMISALIGN
= 0;
4111 /* Determine when unaligned vector accesses are permitted, and when
4112 they are preferred over masked Altivec loads. Note that if
4113 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4114 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4116 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4120 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4121 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4123 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4126 else if (!TARGET_ALLOW_MOVMISALIGN
)
4128 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4129 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4130 "-mallow-movmisalign");
4132 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4136 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
))
4138 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4139 rs6000_isa_flags
|= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4141 rs6000_isa_flags
&= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4144 /* Use long double size to select the appropriate long double. We use
4145 TYPE_PRECISION to differentiate the 3 different long double types. We map
4146 128 into the precision used for TFmode. */
4147 int default_long_double_size
= (RS6000_DEFAULT_LONG_DOUBLE_SIZE
== 64
4149 : FLOAT_PRECISION_TFmode
);
4151 /* Set long double size before the IEEE 128-bit tests. */
4152 if (!OPTION_SET_P (rs6000_long_double_type_size
))
4154 if (main_target_opt
!= NULL
4155 && (main_target_opt
->x_rs6000_long_double_type_size
4156 != default_long_double_size
))
4157 error ("target attribute or pragma changes %<long double%> size");
4159 rs6000_long_double_type_size
= default_long_double_size
;
4161 else if (rs6000_long_double_type_size
== FLOAT_PRECISION_TFmode
)
4162 ; /* The option value can be seen when cl_target_option_restore is called. */
4163 else if (rs6000_long_double_type_size
== 128)
4164 rs6000_long_double_type_size
= FLOAT_PRECISION_TFmode
;
4166 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4167 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4168 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4169 those systems will not pick up this default. Warn if the user changes the
4170 default unless -Wno-psabi. */
4171 if (!OPTION_SET_P (rs6000_ieeequad
))
4172 rs6000_ieeequad
= TARGET_IEEEQUAD_DEFAULT
;
4174 else if (TARGET_LONG_DOUBLE_128
)
4176 if (global_options
.x_rs6000_ieeequad
4177 && (!TARGET_POPCNTD
|| !TARGET_VSX
))
4178 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4180 if (rs6000_ieeequad
!= TARGET_IEEEQUAD_DEFAULT
)
4182 /* Determine if the user can change the default long double type at
4183 compilation time. You need GLIBC 2.32 or newer to be able to
4184 change the long double type. Only issue one warning. */
4185 static bool warned_change_long_double
;
4187 if (!warned_change_long_double
&& !glibc_supports_ieee_128bit ())
4189 warned_change_long_double
= true;
4190 if (TARGET_IEEEQUAD
)
4191 warning (OPT_Wpsabi
, "Using IEEE extended precision "
4194 warning (OPT_Wpsabi
, "Using IBM extended precision "
4200 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4201 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4202 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4203 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4204 the keyword as well as the type. */
4205 TARGET_FLOAT128_TYPE
= TARGET_FLOAT128_ENABLE_TYPE
&& TARGET_VSX
;
4207 /* IEEE 128-bit floating point requires VSX support. */
4208 if (TARGET_FLOAT128_KEYWORD
)
4212 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4213 error ("%qs requires VSX support", "-mfloat128");
4215 TARGET_FLOAT128_TYPE
= 0;
4216 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_KEYWORD
4217 | OPTION_MASK_FLOAT128_HW
);
4219 else if (!TARGET_FLOAT128_TYPE
)
4221 TARGET_FLOAT128_TYPE
= 1;
4222 warning (0, "The %<-mfloat128%> option may not be fully supported");
4226 /* Enable the __float128 keyword under Linux by default. */
4227 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_KEYWORD
4228 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) == 0)
4229 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_KEYWORD
;
4231 /* If we have are supporting the float128 type and full ISA 3.0 support,
4232 enable -mfloat128-hardware by default. However, don't enable the
4233 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4234 because sometimes the compiler wants to put things in an integer
4235 container, and if we don't have __int128 support, it is impossible. */
4236 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_HW
&& TARGET_64BIT
4237 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) == ISA_3_0_MASKS_IEEE
4238 && !(rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
))
4239 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_HW
;
4241 if (TARGET_FLOAT128_HW
4242 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) != ISA_3_0_MASKS_IEEE
)
4244 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4245 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4247 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4250 if (TARGET_FLOAT128_HW
&& !TARGET_64BIT
)
4252 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4253 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4255 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4258 /* Enable -mprefixed by default on power10 systems. */
4259 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) == 0)
4260 rs6000_isa_flags
|= OPTION_MASK_PREFIXED
;
4262 /* -mprefixed requires -mcpu=power10 (or later). */
4263 else if (TARGET_PREFIXED
&& !TARGET_POWER10
)
4265 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) != 0)
4266 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4268 rs6000_isa_flags
&= ~OPTION_MASK_PREFIXED
;
4271 /* -mpcrel requires prefixed load/store addressing. */
4272 if (TARGET_PCREL
&& !TARGET_PREFIXED
)
4274 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4275 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4277 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4280 /* Print the options after updating the defaults. */
4281 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4282 rs6000_print_isa_options (stderr
, 0, "after defaults", rs6000_isa_flags
);
4284 /* E500mc does "better" if we inline more aggressively. Respect the
4285 user's opinion, though. */
4286 if (rs6000_block_move_inline_limit
== 0
4287 && (rs6000_tune
== PROCESSOR_PPCE500MC
4288 || rs6000_tune
== PROCESSOR_PPCE500MC64
4289 || rs6000_tune
== PROCESSOR_PPCE5500
4290 || rs6000_tune
== PROCESSOR_PPCE6500
))
4291 rs6000_block_move_inline_limit
= 128;
4293 /* store_one_arg depends on expand_block_move to handle at least the
4294 size of reg_parm_stack_space. */
4295 if (rs6000_block_move_inline_limit
< (TARGET_POWERPC64
? 64 : 32))
4296 rs6000_block_move_inline_limit
= (TARGET_POWERPC64
? 64 : 32);
4300 /* If the appropriate debug option is enabled, replace the target hooks
4301 with debug versions that call the real version and then prints
4302 debugging information. */
4303 if (TARGET_DEBUG_COST
)
4305 targetm
.rtx_costs
= rs6000_debug_rtx_costs
;
4306 targetm
.address_cost
= rs6000_debug_address_cost
;
4307 targetm
.sched
.adjust_cost
= rs6000_debug_adjust_cost
;
4310 if (TARGET_DEBUG_ADDR
)
4312 targetm
.legitimate_address_p
= rs6000_debug_legitimate_address_p
;
4313 targetm
.legitimize_address
= rs6000_debug_legitimize_address
;
4314 rs6000_secondary_reload_class_ptr
4315 = rs6000_debug_secondary_reload_class
;
4316 targetm
.secondary_memory_needed
4317 = rs6000_debug_secondary_memory_needed
;
4318 targetm
.can_change_mode_class
4319 = rs6000_debug_can_change_mode_class
;
4320 rs6000_preferred_reload_class_ptr
4321 = rs6000_debug_preferred_reload_class
;
4322 rs6000_mode_dependent_address_ptr
4323 = rs6000_debug_mode_dependent_address
;
4326 if (rs6000_veclibabi_name
)
4328 if (strcmp (rs6000_veclibabi_name
, "mass") == 0)
4329 rs6000_veclib_handler
= rs6000_builtin_vectorized_libmass
;
4332 error ("unknown vectorization library ABI type in "
4333 "%<-mveclibabi=%s%>", rs6000_veclibabi_name
);
4339 /* Enable Altivec ABI for AIX -maltivec. */
4341 && (TARGET_ALTIVEC
|| TARGET_VSX
)
4342 && !OPTION_SET_P (rs6000_altivec_abi
))
4344 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4345 error ("target attribute or pragma changes AltiVec ABI");
4347 rs6000_altivec_abi
= 1;
4350 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4351 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4352 be explicitly overridden in either case. */
4355 if (!OPTION_SET_P (rs6000_altivec_abi
)
4356 && (TARGET_64BIT
|| TARGET_ALTIVEC
|| TARGET_VSX
))
4358 if (main_target_opt
!= NULL
&&
4359 !main_target_opt
->x_rs6000_altivec_abi
)
4360 error ("target attribute or pragma changes AltiVec ABI");
4362 rs6000_altivec_abi
= 1;
4366 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4367 So far, the only darwin64 targets are also MACH-O. */
4369 && DEFAULT_ABI
== ABI_DARWIN
4372 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_darwin64_abi
)
4373 error ("target attribute or pragma changes darwin64 ABI");
4376 rs6000_darwin64_abi
= 1;
4377 /* Default to natural alignment, for better performance. */
4378 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
4382 /* Place FP constants in the constant pool instead of TOC
4383 if section anchors enabled. */
4384 if (flag_section_anchors
4385 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
4386 TARGET_NO_FP_IN_TOC
= 1;
4388 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4389 rs6000_print_isa_options (stderr
, 0, "before subtarget", rs6000_isa_flags
);
4391 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4392 SUBTARGET_OVERRIDE_OPTIONS
;
4394 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4395 SUBSUBTARGET_OVERRIDE_OPTIONS
;
4397 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4398 SUB3TARGET_OVERRIDE_OPTIONS
;
4401 /* If the ABI has support for PC-relative relocations, enable it by default.
4402 This test depends on the sub-target tests above setting the code model to
4403 medium for ELF v2 systems. */
4404 if (PCREL_SUPPORTED_BY_OS
4405 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0)
4406 rs6000_isa_flags
|= OPTION_MASK_PCREL
;
4408 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4409 after the subtarget override options are done. */
4410 else if (TARGET_PCREL
&& TARGET_CMODEL
!= CMODEL_MEDIUM
)
4412 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4413 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4415 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4418 /* Enable -mmma by default on power10 systems. */
4419 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) == 0)
4420 rs6000_isa_flags
|= OPTION_MASK_MMA
;
4423 && (rs6000_isa_flags_explicit
& OPTION_MASK_P10_FUSION
) == 0)
4424 rs6000_isa_flags
|= OPTION_MASK_P10_FUSION
;
4426 /* Turn off vector pair/mma options on non-power10 systems. */
4427 else if (!TARGET_POWER10
&& TARGET_MMA
)
4429 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4430 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4432 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4435 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4436 such as "*movoo" uses vector pair access which use VSX registers.
4437 So make MMA require VSX support here. */
4438 if (TARGET_MMA
&& !TARGET_VSX
)
4440 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4441 error ("%qs requires %qs", "-mmma", "-mvsx");
4442 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4445 if (!TARGET_PCREL
&& TARGET_PCREL_OPT
)
4446 rs6000_isa_flags
&= ~OPTION_MASK_PCREL_OPT
;
4448 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4449 rs6000_print_isa_options (stderr
, 0, "after subtarget", rs6000_isa_flags
);
4451 rs6000_always_hint
= (rs6000_tune
!= PROCESSOR_POWER4
4452 && rs6000_tune
!= PROCESSOR_POWER5
4453 && rs6000_tune
!= PROCESSOR_POWER6
4454 && rs6000_tune
!= PROCESSOR_POWER7
4455 && rs6000_tune
!= PROCESSOR_POWER8
4456 && rs6000_tune
!= PROCESSOR_POWER9
4457 && rs6000_tune
!= PROCESSOR_POWER10
4458 && rs6000_tune
!= PROCESSOR_PPCA2
4459 && rs6000_tune
!= PROCESSOR_CELL
4460 && rs6000_tune
!= PROCESSOR_PPC476
);
4461 rs6000_sched_groups
= (rs6000_tune
== PROCESSOR_POWER4
4462 || rs6000_tune
== PROCESSOR_POWER5
4463 || rs6000_tune
== PROCESSOR_POWER7
4464 || rs6000_tune
== PROCESSOR_POWER8
);
4465 rs6000_align_branch_targets
= (rs6000_tune
== PROCESSOR_POWER4
4466 || rs6000_tune
== PROCESSOR_POWER5
4467 || rs6000_tune
== PROCESSOR_POWER6
4468 || rs6000_tune
== PROCESSOR_POWER7
4469 || rs6000_tune
== PROCESSOR_POWER8
4470 || rs6000_tune
== PROCESSOR_POWER9
4471 || rs6000_tune
== PROCESSOR_POWER10
4472 || rs6000_tune
== PROCESSOR_PPCE500MC
4473 || rs6000_tune
== PROCESSOR_PPCE500MC64
4474 || rs6000_tune
== PROCESSOR_PPCE5500
4475 || rs6000_tune
== PROCESSOR_PPCE6500
);
4477 /* Allow debug switches to override the above settings. These are set to -1
4478 in rs6000.opt to indicate the user hasn't directly set the switch. */
4479 if (TARGET_ALWAYS_HINT
>= 0)
4480 rs6000_always_hint
= TARGET_ALWAYS_HINT
;
4482 if (TARGET_SCHED_GROUPS
>= 0)
4483 rs6000_sched_groups
= TARGET_SCHED_GROUPS
;
4485 if (TARGET_ALIGN_BRANCH_TARGETS
>= 0)
4486 rs6000_align_branch_targets
= TARGET_ALIGN_BRANCH_TARGETS
;
4488 rs6000_sched_restricted_insns_priority
4489 = (rs6000_sched_groups
? 1 : 0);
4491 /* Handle -msched-costly-dep option. */
4492 rs6000_sched_costly_dep
4493 = (rs6000_sched_groups
? true_store_to_load_dep_costly
: no_dep_costly
);
4495 if (rs6000_sched_costly_dep_str
)
4497 if (! strcmp (rs6000_sched_costly_dep_str
, "no"))
4498 rs6000_sched_costly_dep
= no_dep_costly
;
4499 else if (! strcmp (rs6000_sched_costly_dep_str
, "all"))
4500 rs6000_sched_costly_dep
= all_deps_costly
;
4501 else if (! strcmp (rs6000_sched_costly_dep_str
, "true_store_to_load"))
4502 rs6000_sched_costly_dep
= true_store_to_load_dep_costly
;
4503 else if (! strcmp (rs6000_sched_costly_dep_str
, "store_to_load"))
4504 rs6000_sched_costly_dep
= store_to_load_dep_costly
;
4506 rs6000_sched_costly_dep
= ((enum rs6000_dependence_cost
)
4507 atoi (rs6000_sched_costly_dep_str
));
4510 /* Handle -minsert-sched-nops option. */
4511 rs6000_sched_insert_nops
4512 = (rs6000_sched_groups
? sched_finish_regroup_exact
: sched_finish_none
);
4514 if (rs6000_sched_insert_nops_str
)
4516 if (! strcmp (rs6000_sched_insert_nops_str
, "no"))
4517 rs6000_sched_insert_nops
= sched_finish_none
;
4518 else if (! strcmp (rs6000_sched_insert_nops_str
, "pad"))
4519 rs6000_sched_insert_nops
= sched_finish_pad_groups
;
4520 else if (! strcmp (rs6000_sched_insert_nops_str
, "regroup_exact"))
4521 rs6000_sched_insert_nops
= sched_finish_regroup_exact
;
4523 rs6000_sched_insert_nops
= ((enum rs6000_nop_insertion
)
4524 atoi (rs6000_sched_insert_nops_str
));
4527 /* Handle stack protector */
4528 if (!OPTION_SET_P (rs6000_stack_protector_guard
))
4529 #ifdef TARGET_THREAD_SSP_OFFSET
4530 rs6000_stack_protector_guard
= SSP_TLS
;
4532 rs6000_stack_protector_guard
= SSP_GLOBAL
;
4535 #ifdef TARGET_THREAD_SSP_OFFSET
4536 rs6000_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
4537 rs6000_stack_protector_guard_reg
= TARGET_64BIT
? 13 : 2;
4540 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str
))
4543 const char *str
= rs6000_stack_protector_guard_offset_str
;
4546 long offset
= strtol (str
, &endp
, 0);
4547 if (!*str
|| *endp
|| errno
)
4548 error ("%qs is not a valid number in %qs", str
,
4549 "-mstack-protector-guard-offset=");
4551 if (!IN_RANGE (offset
, -0x8000, 0x7fff)
4552 || (TARGET_64BIT
&& (offset
& 3)))
4553 error ("%qs is not a valid offset in %qs", str
,
4554 "-mstack-protector-guard-offset=");
4556 rs6000_stack_protector_guard_offset
= offset
;
4559 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str
))
4561 const char *str
= rs6000_stack_protector_guard_reg_str
;
4562 int reg
= decode_reg_name (str
);
4564 if (!IN_RANGE (reg
, 1, 31))
4565 error ("%qs is not a valid base register in %qs", str
,
4566 "-mstack-protector-guard-reg=");
4568 rs6000_stack_protector_guard_reg
= reg
;
4571 if (rs6000_stack_protector_guard
== SSP_TLS
4572 && !IN_RANGE (rs6000_stack_protector_guard_reg
, 1, 31))
4573 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4577 #ifdef TARGET_REGNAMES
4578 /* If the user desires alternate register names, copy in the
4579 alternate names now. */
4580 if (TARGET_REGNAMES
)
4581 memcpy (rs6000_reg_names
, alt_reg_names
, sizeof (rs6000_reg_names
));
4584 /* Set aix_struct_return last, after the ABI is determined.
4585 If -maix-struct-return or -msvr4-struct-return was explicitly
4586 used, don't override with the ABI default. */
4587 if (!OPTION_SET_P (aix_struct_return
))
4588 aix_struct_return
= (DEFAULT_ABI
!= ABI_V4
|| DRAFT_V4_STRUCT_RET
);
4591 /* IBM XL compiler defaults to unsigned bitfields. */
4592 if (TARGET_XL_COMPAT
)
4593 flag_signed_bitfields
= 0;
4596 if (TARGET_LONG_DOUBLE_128
&& !TARGET_IEEEQUAD
)
4597 REAL_MODE_FORMAT (TFmode
) = &ibm_extended_format
;
4599 ASM_GENERATE_INTERNAL_LABEL (toc_label_name
, "LCTOC", 1);
4601 /* We can only guarantee the availability of DI pseudo-ops when
4602 assembling for 64-bit targets. */
4605 targetm
.asm_out
.aligned_op
.di
= NULL
;
4606 targetm
.asm_out
.unaligned_op
.di
= NULL
;
4610 /* Set branch target alignment, if not optimizing for size. */
4613 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4614 aligned 8byte to avoid misprediction by the branch predictor. */
4615 if (rs6000_tune
== PROCESSOR_TITAN
4616 || rs6000_tune
== PROCESSOR_CELL
)
4618 if (flag_align_functions
&& !str_align_functions
)
4619 str_align_functions
= "8";
4620 if (flag_align_jumps
&& !str_align_jumps
)
4621 str_align_jumps
= "8";
4622 if (flag_align_loops
&& !str_align_loops
)
4623 str_align_loops
= "8";
4625 if (rs6000_align_branch_targets
)
4627 if (flag_align_functions
&& !str_align_functions
)
4628 str_align_functions
= "16";
4629 if (flag_align_jumps
&& !str_align_jumps
)
4630 str_align_jumps
= "16";
4631 if (flag_align_loops
&& !str_align_loops
)
4633 can_override_loop_align
= 1;
4634 str_align_loops
= "16";
4639 /* Arrange to save and restore machine status around nested functions. */
4640 init_machine_status
= rs6000_init_machine_status
;
4642 /* We should always be splitting complex arguments, but we can't break
4643 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4644 if (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
4645 targetm
.calls
.split_complex_arg
= NULL
;
4647 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4648 if (DEFAULT_ABI
== ABI_AIX
)
4649 targetm
.calls
.custom_function_descriptors
= 0;
4652 /* Initialize rs6000_cost with the appropriate target costs. */
4654 rs6000_cost
= TARGET_POWERPC64
? &size64_cost
: &size32_cost
;
4656 switch (rs6000_tune
)
4658 case PROCESSOR_RS64A
:
4659 rs6000_cost
= &rs64a_cost
;
4662 case PROCESSOR_MPCCORE
:
4663 rs6000_cost
= &mpccore_cost
;
4666 case PROCESSOR_PPC403
:
4667 rs6000_cost
= &ppc403_cost
;
4670 case PROCESSOR_PPC405
:
4671 rs6000_cost
= &ppc405_cost
;
4674 case PROCESSOR_PPC440
:
4675 rs6000_cost
= &ppc440_cost
;
4678 case PROCESSOR_PPC476
:
4679 rs6000_cost
= &ppc476_cost
;
4682 case PROCESSOR_PPC601
:
4683 rs6000_cost
= &ppc601_cost
;
4686 case PROCESSOR_PPC603
:
4687 rs6000_cost
= &ppc603_cost
;
4690 case PROCESSOR_PPC604
:
4691 rs6000_cost
= &ppc604_cost
;
4694 case PROCESSOR_PPC604e
:
4695 rs6000_cost
= &ppc604e_cost
;
4698 case PROCESSOR_PPC620
:
4699 rs6000_cost
= &ppc620_cost
;
4702 case PROCESSOR_PPC630
:
4703 rs6000_cost
= &ppc630_cost
;
4706 case PROCESSOR_CELL
:
4707 rs6000_cost
= &ppccell_cost
;
4710 case PROCESSOR_PPC750
:
4711 case PROCESSOR_PPC7400
:
4712 rs6000_cost
= &ppc750_cost
;
4715 case PROCESSOR_PPC7450
:
4716 rs6000_cost
= &ppc7450_cost
;
4719 case PROCESSOR_PPC8540
:
4720 case PROCESSOR_PPC8548
:
4721 rs6000_cost
= &ppc8540_cost
;
4724 case PROCESSOR_PPCE300C2
:
4725 case PROCESSOR_PPCE300C3
:
4726 rs6000_cost
= &ppce300c2c3_cost
;
4729 case PROCESSOR_PPCE500MC
:
4730 rs6000_cost
= &ppce500mc_cost
;
4733 case PROCESSOR_PPCE500MC64
:
4734 rs6000_cost
= &ppce500mc64_cost
;
4737 case PROCESSOR_PPCE5500
:
4738 rs6000_cost
= &ppce5500_cost
;
4741 case PROCESSOR_PPCE6500
:
4742 rs6000_cost
= &ppce6500_cost
;
4745 case PROCESSOR_TITAN
:
4746 rs6000_cost
= &titan_cost
;
4749 case PROCESSOR_POWER4
:
4750 case PROCESSOR_POWER5
:
4751 rs6000_cost
= &power4_cost
;
4754 case PROCESSOR_POWER6
:
4755 rs6000_cost
= &power6_cost
;
4758 case PROCESSOR_POWER7
:
4759 rs6000_cost
= &power7_cost
;
4762 case PROCESSOR_POWER8
:
4763 rs6000_cost
= &power8_cost
;
4766 case PROCESSOR_POWER9
:
4767 rs6000_cost
= &power9_cost
;
4770 case PROCESSOR_POWER10
:
4771 rs6000_cost
= &power10_cost
;
4774 case PROCESSOR_PPCA2
:
4775 rs6000_cost
= &ppca2_cost
;
4784 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4785 param_simultaneous_prefetches
,
4786 rs6000_cost
->simultaneous_prefetches
);
4787 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4788 param_l1_cache_size
,
4789 rs6000_cost
->l1_cache_size
);
4790 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4791 param_l1_cache_line_size
,
4792 rs6000_cost
->cache_line_size
);
4793 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4794 param_l2_cache_size
,
4795 rs6000_cost
->l2_cache_size
);
4797 /* Increase loop peeling limits based on performance analysis. */
4798 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4799 param_max_peeled_insns
, 400);
4800 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4801 param_max_completely_peeled_insns
, 400);
4803 /* The lxvl/stxvl instructions don't perform well before Power10. */
4805 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4806 param_vect_partial_vector_usage
, 1);
4808 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4809 param_vect_partial_vector_usage
, 0);
4811 /* Use the 'model' -fsched-pressure algorithm by default. */
4812 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4813 param_sched_pressure_algorithm
,
4814 SCHED_PRESSURE_MODEL
);
4816 /* If using typedef char *va_list, signal that
4817 __builtin_va_start (&ap, 0) can be optimized to
4818 ap = __builtin_next_arg (0). */
4819 if (DEFAULT_ABI
!= ABI_V4
)
4820 targetm
.expand_builtin_va_start
= NULL
;
4823 rs6000_override_options_after_change ();
4825 /* If not explicitly specified via option, decide whether to generate indexed
4826 load/store instructions. A value of -1 indicates that the
4827 initial value of this variable has not been overwritten. During
4828 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4829 if (TARGET_AVOID_XFORM
== -1)
4830 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4831 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4832 need indexed accesses and the type used is the scalar type of the element
4833 being loaded or stored. */
4834 TARGET_AVOID_XFORM
= (rs6000_tune
== PROCESSOR_POWER6
&& TARGET_CMPB
4835 && !TARGET_ALTIVEC
);
4837 /* Set the -mrecip options. */
4838 if (rs6000_recip_name
)
4840 char *p
= ASTRDUP (rs6000_recip_name
);
4842 unsigned int mask
, i
;
4845 while ((q
= strtok (p
, ",")) != NULL
)
4856 if (!strcmp (q
, "default"))
4857 mask
= ((TARGET_RECIP_PRECISION
)
4858 ? RECIP_HIGH_PRECISION
: RECIP_LOW_PRECISION
);
4861 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4862 if (!strcmp (q
, recip_options
[i
].string
))
4864 mask
= recip_options
[i
].mask
;
4868 if (i
== ARRAY_SIZE (recip_options
))
4870 error ("unknown option for %<%s=%s%>", "-mrecip", q
);
4878 rs6000_recip_control
&= ~mask
;
4880 rs6000_recip_control
|= mask
;
4884 /* Set the builtin mask of the various options used that could affect which
4885 builtins were used. In the past we used target_flags, but we've run out
4886 of bits, and some options are no longer in target_flags. */
4887 rs6000_builtin_mask
= rs6000_builtin_mask_calculate ();
4888 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
4889 rs6000_print_builtin_options (stderr
, 0, "builtin mask",
4890 rs6000_builtin_mask
);
4892 /* Initialize all of the registers. */
4893 rs6000_init_hard_regno_mode_ok (global_init_p
);
4895 /* Save the initial options in case the user does function specific options */
4897 target_option_default_node
= target_option_current_node
4898 = build_target_option_node (&global_options
, &global_options_set
);
4900 /* If not explicitly specified via option, decide whether to generate the
4901 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4902 if (TARGET_LINK_STACK
== -1)
4903 SET_TARGET_LINK_STACK (rs6000_tune
== PROCESSOR_PPC476
&& flag_pic
);
4905 /* Deprecate use of -mno-speculate-indirect-jumps. */
4906 if (!rs6000_speculate_indirect_jumps
)
4907 warning (0, "%qs is deprecated and not recommended in any circumstances",
4908 "-mno-speculate-indirect-jumps");
4913 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4914 define the target cpu type. */
4917 rs6000_option_override (void)
4919 (void) rs6000_option_override_internal (true);
4923 /* Implement LOOP_ALIGN. */
4925 rs6000_loop_align (rtx label
)
4930 /* Don't override loop alignment if -falign-loops was specified. */
4931 if (!can_override_loop_align
)
4934 bb
= BLOCK_FOR_INSN (label
);
4935 ninsns
= num_loop_insns(bb
->loop_father
);
4937 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4938 if (ninsns
> 4 && ninsns
<= 8
4939 && (rs6000_tune
== PROCESSOR_POWER4
4940 || rs6000_tune
== PROCESSOR_POWER5
4941 || rs6000_tune
== PROCESSOR_POWER6
4942 || rs6000_tune
== PROCESSOR_POWER7
4943 || rs6000_tune
== PROCESSOR_POWER8
))
4944 return align_flags (5);
4949 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4950 after applying N number of iterations. This routine does not determine
4951 how may iterations are required to reach desired alignment. */
4954 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
4961 if (rs6000_alignment_flags
== MASK_ALIGN_NATURAL
)
4964 if (rs6000_alignment_flags
== MASK_ALIGN_POWER
)
4974 /* Assuming that all other types are naturally aligned. CHECKME! */
4979 /* Return true if the vector misalignment factor is supported by the
4982 rs6000_builtin_support_vector_misalignment (machine_mode mode
,
4989 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4992 /* Return if movmisalign pattern is not supported for this mode. */
4993 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
4996 if (misalignment
== -1)
4998 /* Misalignment factor is unknown at compile time but we know
4999 it's word aligned. */
5000 if (rs6000_vector_alignment_reachable (type
, is_packed
))
5002 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
5004 if (element_size
== 64 || element_size
== 32)
5011 /* VSX supports word-aligned vector. */
5012 if (misalignment
% 4 == 0)
5018 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5020 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
5021 tree vectype
, int misalign
)
5026 switch (type_of_cost
)
5034 case cond_branch_not_taken
:
5038 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5042 /* Power7 has only one permute unit, make it a bit expensive. */
5043 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5048 case vec_promote_demote
:
5049 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5050 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5055 case cond_branch_taken
:
5058 case unaligned_load
:
5059 case vector_gather_load
:
5060 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5061 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5064 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5066 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5067 /* See PR102767, consider V1TI to keep consistency. */
5068 if (elements
== 2 || elements
== 1)
5069 /* Double word aligned. */
5077 /* Double word aligned. */
5081 /* Unknown misalignment. */
5094 /* Misaligned loads are not supported. */
5097 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5100 case unaligned_store
:
5101 case vector_scatter_store
:
5102 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5105 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5107 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5108 /* See PR102767, consider V1TI to keep consistency. */
5109 if (elements
== 2 || elements
== 1)
5110 /* Double word aligned. */
5118 /* Double word aligned. */
5122 /* Unknown misalignment. */
5135 /* Misaligned stores are not supported. */
5141 /* This is a rough approximation assuming non-constant elements
5142 constructed into a vector via element insertion. FIXME:
5143 vec_construct is not granular enough for uniformly good
5144 decisions. If the initialization is a splat, this is
5145 cheaper than we estimate. Improve this someday. */
5146 elem_type
= TREE_TYPE (vectype
);
5147 /* 32-bit vectors loaded into registers are stored as double
5148 precision, so we need 2 permutes, 2 converts, and 1 merge
5149 to construct a vector of short floats from them. */
5150 if (SCALAR_FLOAT_TYPE_P (elem_type
)
5151 && TYPE_PRECISION (elem_type
) == 32)
5153 /* On POWER9, integer vector types are built up in GPRs and then
5154 use a direct move (2 cycles). For POWER8 this is even worse,
5155 as we need two direct moves and a merge, and the direct moves
5157 else if (INTEGRAL_TYPE_P (elem_type
))
5159 if (TARGET_P9_VECTOR
)
5160 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 2;
5162 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 5;
5165 /* V2DFmode doesn't need a direct move. */
5173 /* Implement targetm.vectorize.preferred_simd_mode. */
5176 rs6000_preferred_simd_mode (scalar_mode mode
)
5178 opt_machine_mode vmode
= mode_for_vector (mode
, 16 / GET_MODE_SIZE (mode
));
5180 if (vmode
.exists () && !VECTOR_MEM_NONE_P (vmode
.require ()))
5181 return vmode
.require ();
5186 class rs6000_cost_data
: public vector_costs
5189 using vector_costs::vector_costs
;
5191 unsigned int add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5192 stmt_vec_info stmt_info
, slp_tree
, tree vectype
,
5194 vect_cost_model_location where
) override
;
5195 void finish_cost (const vector_costs
*) override
;
5198 void update_target_cost_per_stmt (vect_cost_for_stmt
, stmt_vec_info
,
5199 vect_cost_model_location
, unsigned int);
5200 void density_test (loop_vec_info
);
5201 void adjust_vect_cost_per_loop (loop_vec_info
);
5203 /* Total number of vectorized stmts (loop only). */
5204 unsigned m_nstmts
= 0;
5205 /* Total number of loads (loop only). */
5206 unsigned m_nloads
= 0;
5207 /* Possible extra penalized cost on vector construction (loop only). */
5208 unsigned m_extra_ctor_cost
= 0;
5209 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5210 instruction is needed by the vectorization. */
5211 bool m_vect_nonmem
= false;
5214 /* Test for likely overcommitment of vector hardware resources. If a
5215 loop iteration is relatively large, and too large a percentage of
5216 instructions in the loop are vectorized, the cost model may not
5217 adequately reflect delays from unavailable vector resources.
5218 Penalize the loop body cost for this case. */
5221 rs6000_cost_data::density_test (loop_vec_info loop_vinfo
)
5223 /* This density test only cares about the cost of vector version of the
5224 loop, so immediately return if we are passed costing for the scalar
5225 version (namely computing single scalar iteration cost). */
5226 if (m_costing_for_scalar
)
5229 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5230 basic_block
*bbs
= get_loop_body (loop
);
5231 int nbbs
= loop
->num_nodes
;
5232 int vec_cost
= m_costs
[vect_body
], not_vec_cost
= 0;
5234 for (int i
= 0; i
< nbbs
; i
++)
5236 basic_block bb
= bbs
[i
];
5237 gimple_stmt_iterator gsi
;
5239 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5241 gimple
*stmt
= gsi_stmt (gsi
);
5242 if (is_gimple_debug (stmt
))
5245 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
5247 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5248 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
5254 int density_pct
= (vec_cost
* 100) / (vec_cost
+ not_vec_cost
);
5256 if (density_pct
> rs6000_density_pct_threshold
5257 && vec_cost
+ not_vec_cost
> rs6000_density_size_threshold
)
5259 m_costs
[vect_body
] = vec_cost
* (100 + rs6000_density_penalty
) / 100;
5260 if (dump_enabled_p ())
5261 dump_printf_loc (MSG_NOTE
, vect_location
,
5262 "density %d%%, cost %d exceeds threshold, penalizing "
5263 "loop body cost by %u%%\n", density_pct
,
5264 vec_cost
+ not_vec_cost
, rs6000_density_penalty
);
5267 /* Check whether we need to penalize the body cost to account
5268 for excess strided or elementwise loads. */
5269 if (m_extra_ctor_cost
> 0)
5271 gcc_assert (m_nloads
<= m_nstmts
);
5272 unsigned int load_pct
= (m_nloads
* 100) / m_nstmts
;
5274 /* It's likely to be bounded by latency and execution resources
5275 from many scalar loads which are strided or elementwise loads
5276 into a vector if both conditions below are found:
5277 1. there are many loads, it's easy to result in a long wait
5279 2. load has a big proportion of all vectorized statements,
5280 it's not easy to schedule other statements to spread among
5282 One typical case is the innermost loop of the hotspot of SPEC2017
5283 503.bwaves_r without loop interchange. */
5284 if (m_nloads
> (unsigned int) rs6000_density_load_num_threshold
5285 && load_pct
> (unsigned int) rs6000_density_load_pct_threshold
)
5287 m_costs
[vect_body
] += m_extra_ctor_cost
;
5288 if (dump_enabled_p ())
5289 dump_printf_loc (MSG_NOTE
, vect_location
,
5290 "Found %u loads and "
5291 "load pct. %u%% exceed "
5293 "penalizing loop body "
5294 "cost by extra cost %u "
5302 /* Implement targetm.vectorize.create_costs. */
5304 static vector_costs
*
5305 rs6000_vectorize_create_costs (vec_info
*vinfo
, bool costing_for_scalar
)
5307 return new rs6000_cost_data (vinfo
, costing_for_scalar
);
5310 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5311 For some statement, we would like to further fine-grain tweak the cost on
5312 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5313 information on statement operation codes etc. One typical case here is
5314 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5315 for scalar cost, but it should be priced more whatever transformed to either
5316 compare + branch or compare + isel instructions. */
5319 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind
,
5320 struct _stmt_vec_info
*stmt_info
)
5322 if (kind
== scalar_stmt
&& stmt_info
&& stmt_info
->stmt
5323 && gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
5325 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
5326 if (subcode
== COND_EXPR
)
5333 /* Helper function for add_stmt_cost. Check each statement cost
5334 entry, gather information and update the target_cost fields
5337 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind
,
5338 stmt_vec_info stmt_info
,
5339 vect_cost_model_location where
,
5340 unsigned int orig_count
)
5343 /* Check whether we're doing something other than just a copy loop.
5344 Not all such loops may be profitably vectorized; see
5345 rs6000_finish_cost. */
5346 if (kind
== vec_to_scalar
5348 || kind
== vec_promote_demote
5349 || kind
== vec_construct
5350 || kind
== scalar_to_vec
5351 || (where
== vect_body
&& kind
== vector_stmt
))
5352 m_vect_nonmem
= true;
5354 /* Gather some information when we are costing the vectorized instruction
5355 for the statements located in a loop body. */
5356 if (!m_costing_for_scalar
5357 && is_a
<loop_vec_info
> (m_vinfo
)
5358 && where
== vect_body
)
5360 m_nstmts
+= orig_count
;
5362 if (kind
== scalar_load
|| kind
== vector_load
5363 || kind
== unaligned_load
|| kind
== vector_gather_load
)
5364 m_nloads
+= orig_count
;
5366 /* Power processors do not currently have instructions for strided
5367 and elementwise loads, and instead we must generate multiple
5368 scalar loads. This leads to undercounting of the cost. We
5369 account for this by scaling the construction cost by the number
5370 of elements involved, and saving this as extra cost that we may
5371 or may not need to apply. When finalizing the cost of the loop,
5372 the extra penalty is applied when the load density heuristics
5374 if (kind
== vec_construct
&& stmt_info
5375 && STMT_VINFO_TYPE (stmt_info
) == load_vec_info_type
5376 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_ELEMENTWISE
5377 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_STRIDED_SLP
))
5379 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5380 unsigned int nunits
= vect_nunits_for_cost (vectype
);
5381 /* As PR103702 shows, it's possible that vectorizer wants to do
5382 costings for only one unit here, it's no need to do any
5383 penalization for it, so simply early return here. */
5386 /* i386 port adopts nunits * stmt_cost as the penalized cost
5387 for this kind of penalization, we used to follow it but
5388 found it could result in an unreliable body cost especially
5389 for V16QI/V8HI modes. To make it better, we choose this
5390 new heuristic: for each scalar load, we use 2 as penalized
5391 cost for the case with 2 nunits and use 1 for the other
5392 cases. It's without much supporting theory, mainly
5393 concluded from the broad performance evaluations on Power8,
5394 Power9 and Power10. One possibly related point is that:
5395 vector construction for more units would use more insns,
5396 it has more chances to schedule them better (even run in
5397 parallelly when enough available units at that time), so
5398 it seems reasonable not to penalize that much for them. */
5399 unsigned int adjusted_cost
= (nunits
== 2) ? 2 : 1;
5400 unsigned int extra_cost
= nunits
* adjusted_cost
;
5401 m_extra_ctor_cost
+= extra_cost
;
5407 rs6000_cost_data::add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5408 stmt_vec_info stmt_info
, slp_tree
,
5409 tree vectype
, int misalign
,
5410 vect_cost_model_location where
)
5412 unsigned retval
= 0;
5414 if (flag_vect_cost_model
)
5416 int stmt_cost
= rs6000_builtin_vectorization_cost (kind
, vectype
,
5418 stmt_cost
+= rs6000_adjust_vect_cost_per_stmt (kind
, stmt_info
);
5419 /* Statements in an inner loop relative to the loop being
5420 vectorized are weighted more heavily. The value here is
5421 arbitrary and could potentially be improved with analysis. */
5422 unsigned int orig_count
= count
;
5423 retval
= adjust_cost_for_freq (stmt_info
, where
, count
* stmt_cost
);
5424 m_costs
[where
] += retval
;
5426 update_target_cost_per_stmt (kind
, stmt_info
, where
, orig_count
);
5432 /* For some target specific vectorization cost which can't be handled per stmt,
5433 we check the requisite conditions and adjust the vectorization cost
5434 accordingly if satisfied. One typical example is to model shift cost for
5435 vector with length by counting number of required lengths under condition
5436 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5439 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo
)
5441 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
5443 rgroup_controls
*rgc
;
5444 unsigned int num_vectors_m1
;
5445 unsigned int shift_cnt
= 0;
5446 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo
), num_vectors_m1
, rgc
)
5448 /* Each length needs one shift to fill into bits 0-7. */
5449 shift_cnt
+= num_vectors_m1
+ 1;
5451 add_stmt_cost (shift_cnt
, scalar_stmt
, NULL
, NULL
,
5452 NULL_TREE
, 0, vect_body
);
5457 rs6000_cost_data::finish_cost (const vector_costs
*scalar_costs
)
5459 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (m_vinfo
))
5461 adjust_vect_cost_per_loop (loop_vinfo
);
5462 density_test (loop_vinfo
);
5464 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5465 that require versioning for any reason. The vectorization is at
5466 best a wash inside the loop, and the versioning checks make
5467 profitability highly unlikely and potentially quite harmful. */
5469 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
) == 2
5470 && LOOP_REQUIRES_VERSIONING (loop_vinfo
))
5471 m_costs
[vect_body
] += 10000;
5474 vector_costs::finish_cost (scalar_costs
);
5477 /* Implement targetm.loop_unroll_adjust. */
5480 rs6000_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
5482 if (unroll_only_small_loops
)
5484 /* TODO: These are hardcoded values right now. We probably should use
5486 if (loop
->ninsns
<= 6)
5487 return MIN (4, nunroll
);
5488 if (loop
->ninsns
<= 10)
5489 return MIN (2, nunroll
);
5497 /* Returns a function decl for a vectorized version of the builtin function
5498 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5499 if it is not available.
5501 Implement targetm.vectorize.builtin_vectorized_function. */
5504 rs6000_builtin_vectorized_function (unsigned int fn
, tree type_out
,
5507 machine_mode in_mode
, out_mode
;
5510 if (TARGET_DEBUG_BUILTIN
)
5511 fprintf (stderr
, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5512 combined_fn_name (combined_fn (fn
)),
5513 GET_MODE_NAME (TYPE_MODE (type_out
)),
5514 GET_MODE_NAME (TYPE_MODE (type_in
)));
5516 /* TODO: Should this be gcc_assert? */
5517 if (TREE_CODE (type_out
) != VECTOR_TYPE
5518 || TREE_CODE (type_in
) != VECTOR_TYPE
)
5521 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5522 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
5523 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5524 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5529 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5530 && out_mode
== DFmode
&& out_n
== 2
5531 && in_mode
== DFmode
&& in_n
== 2)
5532 return rs6000_builtin_decls
[RS6000_BIF_CPSGNDP
];
5533 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5534 && out_mode
== SFmode
&& out_n
== 4
5535 && in_mode
== SFmode
&& in_n
== 4)
5536 return rs6000_builtin_decls
[RS6000_BIF_CPSGNSP
];
5537 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5538 && out_mode
== SFmode
&& out_n
== 4
5539 && in_mode
== SFmode
&& in_n
== 4)
5540 return rs6000_builtin_decls
[RS6000_BIF_COPYSIGN_V4SF
];
5543 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5544 && out_mode
== DFmode
&& out_n
== 2
5545 && in_mode
== DFmode
&& in_n
== 2)
5546 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIP
];
5547 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5548 && out_mode
== SFmode
&& out_n
== 4
5549 && in_mode
== SFmode
&& in_n
== 4)
5550 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIP
];
5551 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5552 && out_mode
== SFmode
&& out_n
== 4
5553 && in_mode
== SFmode
&& in_n
== 4)
5554 return rs6000_builtin_decls
[RS6000_BIF_VRFIP
];
5557 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5558 && out_mode
== DFmode
&& out_n
== 2
5559 && in_mode
== DFmode
&& in_n
== 2)
5560 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIM
];
5561 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5562 && out_mode
== SFmode
&& out_n
== 4
5563 && in_mode
== SFmode
&& in_n
== 4)
5564 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIM
];
5565 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5566 && out_mode
== SFmode
&& out_n
== 4
5567 && in_mode
== SFmode
&& in_n
== 4)
5568 return rs6000_builtin_decls
[RS6000_BIF_VRFIM
];
5571 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5572 && out_mode
== DFmode
&& out_n
== 2
5573 && in_mode
== DFmode
&& in_n
== 2)
5574 return rs6000_builtin_decls
[RS6000_BIF_XVMADDDP
];
5575 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5576 && out_mode
== SFmode
&& out_n
== 4
5577 && in_mode
== SFmode
&& in_n
== 4)
5578 return rs6000_builtin_decls
[RS6000_BIF_XVMADDSP
];
5579 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5580 && out_mode
== SFmode
&& out_n
== 4
5581 && in_mode
== SFmode
&& in_n
== 4)
5582 return rs6000_builtin_decls
[RS6000_BIF_VMADDFP
];
5585 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5586 && out_mode
== DFmode
&& out_n
== 2
5587 && in_mode
== DFmode
&& in_n
== 2)
5588 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIZ
];
5589 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5590 && out_mode
== SFmode
&& out_n
== 4
5591 && in_mode
== SFmode
&& in_n
== 4)
5592 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIZ
];
5593 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5594 && out_mode
== SFmode
&& out_n
== 4
5595 && in_mode
== SFmode
&& in_n
== 4)
5596 return rs6000_builtin_decls
[RS6000_BIF_VRFIZ
];
5599 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5600 && flag_unsafe_math_optimizations
5601 && out_mode
== DFmode
&& out_n
== 2
5602 && in_mode
== DFmode
&& in_n
== 2)
5603 return rs6000_builtin_decls
[RS6000_BIF_XVRDPI
];
5604 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5605 && flag_unsafe_math_optimizations
5606 && out_mode
== SFmode
&& out_n
== 4
5607 && in_mode
== SFmode
&& in_n
== 4)
5608 return rs6000_builtin_decls
[RS6000_BIF_XVRSPI
];
5611 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5612 && !flag_trapping_math
5613 && out_mode
== DFmode
&& out_n
== 2
5614 && in_mode
== DFmode
&& in_n
== 2)
5615 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIC
];
5616 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5617 && !flag_trapping_math
5618 && out_mode
== SFmode
&& out_n
== 4
5619 && in_mode
== SFmode
&& in_n
== 4)
5620 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIC
];
5626 /* Generate calls to libmass if appropriate. */
5627 if (rs6000_veclib_handler
)
5628 return rs6000_veclib_handler (combined_fn (fn
), type_out
, type_in
);
5633 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5634 library with vectorized intrinsics. */
5637 rs6000_builtin_vectorized_libmass (combined_fn fn
, tree type_out
,
5641 const char *suffix
= NULL
;
5642 tree fntype
, new_fndecl
, bdecl
= NULL_TREE
;
5645 machine_mode el_mode
, in_mode
;
5648 /* Libmass is suitable for unsafe math only as it does not correctly support
5649 parts of IEEE with the required precision such as denormals. Only support
5650 it if we have VSX to use the simd d2 or f4 functions.
5651 XXX: Add variable length support. */
5652 if (!flag_unsafe_math_optimizations
|| !TARGET_VSX
)
5655 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5656 n
= TYPE_VECTOR_SUBPARTS (type_out
);
5657 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5658 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5659 if (el_mode
!= in_mode
5695 if (el_mode
== DFmode
&& n
== 2)
5697 bdecl
= mathfn_built_in (double_type_node
, fn
);
5698 suffix
= "d2"; /* pow -> powd2 */
5700 else if (el_mode
== SFmode
&& n
== 4)
5702 bdecl
= mathfn_built_in (float_type_node
, fn
);
5703 suffix
= "4"; /* powf -> powf4 */
5715 gcc_assert (suffix
!= NULL
);
5716 bname
= IDENTIFIER_POINTER (DECL_NAME (bdecl
));
5720 strcpy (name
, bname
+ strlen ("__builtin_"));
5721 strcat (name
, suffix
);
5724 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
5725 else if (n_args
== 2)
5726 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
5730 /* Build a function declaration for the vectorized function. */
5731 new_fndecl
= build_decl (BUILTINS_LOCATION
,
5732 FUNCTION_DECL
, get_identifier (name
), fntype
);
5733 TREE_PUBLIC (new_fndecl
) = 1;
5734 DECL_EXTERNAL (new_fndecl
) = 1;
5735 DECL_IS_NOVOPS (new_fndecl
) = 1;
5736 TREE_READONLY (new_fndecl
) = 1;
5742 /* Default CPU string for rs6000*_file_start functions. */
5743 static const char *rs6000_default_cpu
;
5745 #ifdef USING_ELFOS_H
5746 const char *rs6000_machine
;
5749 rs6000_machine_from_flags (void)
5752 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
)
5754 if (rs6000_cpu
== PROCESSOR_PPC8540
|| rs6000_cpu
== PROCESSOR_PPC8548
)
5756 if (rs6000_cpu
== PROCESSOR_PPCE500MC
)
5758 if (rs6000_cpu
== PROCESSOR_PPCE500MC64
)
5760 if (rs6000_cpu
== PROCESSOR_PPCE5500
)
5762 if (rs6000_cpu
== PROCESSOR_PPCE6500
)
5766 if (rs6000_cpu
== PROCESSOR_PPC403
)
5768 if (rs6000_cpu
== PROCESSOR_PPC405
)
5770 if (rs6000_cpu
== PROCESSOR_PPC440
)
5772 if (rs6000_cpu
== PROCESSOR_PPC476
)
5776 if (rs6000_cpu
== PROCESSOR_PPCA2
)
5780 if (rs6000_cpu
== PROCESSOR_CELL
)
5784 if (rs6000_cpu
== PROCESSOR_TITAN
)
5787 /* 500 series and 800 series */
5788 if (rs6000_cpu
== PROCESSOR_MPCCORE
)
5792 /* This (and ppc64 below) are disabled here (for now at least) because
5793 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5794 are #define'd as some of these. Untangling that is a job for later. */
5796 /* 600 series and 700 series, "classic" */
5797 if (rs6000_cpu
== PROCESSOR_PPC601
|| rs6000_cpu
== PROCESSOR_PPC603
5798 || rs6000_cpu
== PROCESSOR_PPC604
|| rs6000_cpu
== PROCESSOR_PPC604e
5799 || rs6000_cpu
== PROCESSOR_PPC750
)
5803 /* Classic with AltiVec, "G4" */
5804 if (rs6000_cpu
== PROCESSOR_PPC7400
|| rs6000_cpu
== PROCESSOR_PPC7450
)
5808 /* The older 64-bit CPUs */
5809 if (rs6000_cpu
== PROCESSOR_PPC620
|| rs6000_cpu
== PROCESSOR_PPC630
5810 || rs6000_cpu
== PROCESSOR_RS64A
)
5814 HOST_WIDE_INT flags
= rs6000_isa_flags
;
5816 /* Disable the flags that should never influence the .machine selection. */
5817 flags
&= ~(OPTION_MASK_PPC_GFXOPT
| OPTION_MASK_PPC_GPOPT
| OPTION_MASK_ISEL
);
5819 if ((flags
& (ISA_3_1_MASKS_SERVER
& ~ISA_3_0_MASKS_SERVER
)) != 0)
5821 if ((flags
& (ISA_3_0_MASKS_SERVER
& ~ISA_2_7_MASKS_SERVER
)) != 0)
5823 if ((flags
& (ISA_2_7_MASKS_SERVER
& ~ISA_2_6_MASKS_SERVER
)) != 0)
5825 if ((flags
& (ISA_2_6_MASKS_SERVER
& ~ISA_2_5_MASKS_SERVER
)) != 0)
5827 if ((flags
& (ISA_2_5_MASKS_SERVER
& ~ISA_2_4_MASKS
)) != 0)
5829 if ((flags
& (ISA_2_4_MASKS
& ~ISA_2_1_MASKS
)) != 0)
5831 if ((flags
& ISA_2_1_MASKS
) != 0)
5833 if ((flags
& OPTION_MASK_POWERPC64
) != 0)
5839 emit_asm_machine (void)
5841 fprintf (asm_out_file
, "\t.machine %s\n", rs6000_machine
);
5845 /* Do anything needed at the start of the asm file. */
5848 rs6000_file_start (void)
5851 const char *start
= buffer
;
5852 FILE *file
= asm_out_file
;
5854 rs6000_default_cpu
= TARGET_CPU_DEFAULT
;
5856 default_file_start ();
5858 if (flag_verbose_asm
)
5860 sprintf (buffer
, "\n%s rs6000/powerpc options:", ASM_COMMENT_START
);
5862 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
5864 fprintf (file
, "%s --with-cpu=%s", start
, rs6000_default_cpu
);
5868 if (OPTION_SET_P (rs6000_cpu_index
))
5870 fprintf (file
, "%s -mcpu=%s", start
,
5871 processor_target_table
[rs6000_cpu_index
].name
);
5875 if (OPTION_SET_P (rs6000_tune_index
))
5877 fprintf (file
, "%s -mtune=%s", start
,
5878 processor_target_table
[rs6000_tune_index
].name
);
5882 if (PPC405_ERRATUM77
)
5884 fprintf (file
, "%s PPC405CR_ERRATUM77", start
);
5888 #ifdef USING_ELFOS_H
5889 switch (rs6000_sdata
)
5891 case SDATA_NONE
: fprintf (file
, "%s -msdata=none", start
); start
= ""; break;
5892 case SDATA_DATA
: fprintf (file
, "%s -msdata=data", start
); start
= ""; break;
5893 case SDATA_SYSV
: fprintf (file
, "%s -msdata=sysv", start
); start
= ""; break;
5894 case SDATA_EABI
: fprintf (file
, "%s -msdata=eabi", start
); start
= ""; break;
5897 if (rs6000_sdata
&& g_switch_value
)
5899 fprintf (file
, "%s -G %d", start
,
5909 #ifdef USING_ELFOS_H
5910 rs6000_machine
= rs6000_machine_from_flags ();
5911 emit_asm_machine ();
5914 if (DEFAULT_ABI
== ABI_ELFv2
)
5915 fprintf (file
, "\t.abiversion 2\n");
5919 /* Return nonzero if this function is known to have a null epilogue. */
5922 direct_return (void)
5924 if (reload_completed
)
5926 rs6000_stack_t
*info
= rs6000_stack_info ();
5928 if (info
->first_gp_reg_save
== 32
5929 && info
->first_fp_reg_save
== 64
5930 && info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1
5931 && ! info
->lr_save_p
5932 && ! info
->cr_save_p
5933 && info
->vrsave_size
== 0
5941 /* Helper for num_insns_constant. Calculate number of instructions to
5942 load VALUE to a single gpr using combinations of addi, addis, ori,
5943 oris, sldi and rldimi instructions. */
5946 num_insns_constant_gpr (HOST_WIDE_INT value
)
5948 /* signed constant loadable with addi */
5949 if (SIGNED_INTEGER_16BIT_P (value
))
5952 /* constant loadable with addis */
5953 else if ((value
& 0xffff) == 0
5954 && (value
>> 31 == -1 || value
>> 31 == 0))
5957 /* PADDI can support up to 34 bit signed integers. */
5958 else if (TARGET_PREFIXED
&& SIGNED_INTEGER_34BIT_P (value
))
5961 else if (TARGET_POWERPC64
)
5963 HOST_WIDE_INT low
= ((value
& 0xffffffff) ^ 0x80000000) - 0x80000000;
5964 HOST_WIDE_INT high
= value
>> 31;
5966 if (high
== 0 || high
== -1)
5971 if (low
== 0 || low
== high
)
5972 return num_insns_constant_gpr (high
) + 1;
5974 return num_insns_constant_gpr (low
) + 1;
5976 return (num_insns_constant_gpr (high
)
5977 + num_insns_constant_gpr (low
) + 1);
5984 /* Helper for num_insns_constant. Allow constants formed by the
5985 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5986 and handle modes that require multiple gprs. */
5989 num_insns_constant_multi (HOST_WIDE_INT value
, machine_mode mode
)
5991 int nregs
= (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5995 HOST_WIDE_INT low
= sext_hwi (value
, BITS_PER_WORD
);
5996 int insns
= num_insns_constant_gpr (low
);
5998 /* We won't get more than 2 from num_insns_constant_gpr
5999 except when TARGET_POWERPC64 and mode is DImode or
6000 wider, so the register mode must be DImode. */
6001 && rs6000_is_valid_and_mask (GEN_INT (low
), DImode
))
6004 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6005 it all at once would be UB. */
6006 value
>>= (BITS_PER_WORD
- 1);
6012 /* Return the number of instructions it takes to form a constant in as
6013 many gprs are needed for MODE. */
6016 num_insns_constant (rtx op
, machine_mode mode
)
6020 switch (GET_CODE (op
))
6026 case CONST_WIDE_INT
:
6029 for (int i
= 0; i
< CONST_WIDE_INT_NUNITS (op
); i
++)
6030 insns
+= num_insns_constant_multi (CONST_WIDE_INT_ELT (op
, i
),
6037 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (op
);
6039 if (mode
== SFmode
|| mode
== SDmode
)
6044 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv
, l
);
6046 REAL_VALUE_TO_TARGET_SINGLE (*rv
, l
);
6047 /* See the first define_split in rs6000.md handling a
6048 const_double_operand. */
6052 else if (mode
== DFmode
|| mode
== DDmode
)
6057 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv
, l
);
6059 REAL_VALUE_TO_TARGET_DOUBLE (*rv
, l
);
6061 /* See the second (32-bit) and third (64-bit) define_split
6062 in rs6000.md handling a const_double_operand. */
6063 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 1] << 32;
6064 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffffUL
;
6067 else if (mode
== TFmode
|| mode
== TDmode
6068 || mode
== KFmode
|| mode
== IFmode
)
6074 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv
, l
);
6076 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv
, l
);
6078 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 3] << 32;
6079 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 2] & 0xffffffffUL
;
6080 insns
= num_insns_constant_multi (val
, DImode
);
6081 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 2 : 1] << 32;
6082 val
|= l
[WORDS_BIG_ENDIAN
? 3 : 0] & 0xffffffffUL
;
6083 insns
+= num_insns_constant_multi (val
, DImode
);
6095 return num_insns_constant_multi (val
, mode
);
6098 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6099 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6100 corresponding element of the vector, but for V4SFmode, the
6101 corresponding "float" is interpreted as an SImode integer. */
6104 const_vector_elt_as_int (rtx op
, unsigned int elt
)
6108 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6109 gcc_assert (GET_MODE (op
) != V2DImode
6110 && GET_MODE (op
) != V2DFmode
);
6112 tmp
= CONST_VECTOR_ELT (op
, elt
);
6113 if (GET_MODE (op
) == V4SFmode
)
6114 tmp
= gen_lowpart (SImode
, tmp
);
6115 return INTVAL (tmp
);
6118 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6119 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6120 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6121 all items are set to the same value and contain COPIES replicas of the
6122 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6123 operand and the others are set to the value of the operand's msb. */
6126 vspltis_constant (rtx op
, unsigned step
, unsigned copies
)
6128 machine_mode mode
= GET_MODE (op
);
6129 machine_mode inner
= GET_MODE_INNER (mode
);
6137 HOST_WIDE_INT splat_val
;
6138 HOST_WIDE_INT msb_val
;
6140 if (mode
== V2DImode
|| mode
== V2DFmode
|| mode
== V1TImode
)
6143 nunits
= GET_MODE_NUNITS (mode
);
6144 bitsize
= GET_MODE_BITSIZE (inner
);
6145 mask
= GET_MODE_MASK (inner
);
6147 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6149 msb_val
= val
>= 0 ? 0 : -1;
6151 if (val
== 0 && step
> 1)
6153 /* Special case for loading most significant bit with step > 1.
6154 In that case, match 0s in all but step-1s elements, where match
6156 for (i
= 1; i
< nunits
; ++i
)
6158 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6159 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6160 if ((i
& (step
- 1)) == step
- 1)
6162 if (!EASY_VECTOR_MSB (elt_val
, inner
))
6172 /* Construct the value to be splatted, if possible. If not, return 0. */
6173 for (i
= 2; i
<= copies
; i
*= 2)
6175 HOST_WIDE_INT small_val
;
6177 small_val
= splat_val
>> bitsize
;
6179 if (splat_val
!= ((HOST_WIDE_INT
)
6180 ((unsigned HOST_WIDE_INT
) small_val
<< bitsize
)
6181 | (small_val
& mask
)))
6183 splat_val
= small_val
;
6184 inner
= smallest_int_mode_for_size (bitsize
);
6187 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6188 if (EASY_VECTOR_15 (splat_val
))
6191 /* Also check if we can splat, and then add the result to itself. Do so if
6192 the value is positive, of if the splat instruction is using OP's mode;
6193 for splat_val < 0, the splat and the add should use the same mode. */
6194 else if (EASY_VECTOR_15_ADD_SELF (splat_val
)
6195 && (splat_val
>= 0 || (step
== 1 && copies
== 1)))
6198 /* Also check if are loading up the most significant bit which can be done by
6199 loading up -1 and shifting the value left by -1. Only do this for
6200 step 1 here, for larger steps it is done earlier. */
6201 else if (EASY_VECTOR_MSB (splat_val
, inner
) && step
== 1)
6207 /* Check if VAL is present in every STEP-th element, and the
6208 other elements are filled with its most significant bit. */
6209 for (i
= 1; i
< nunits
; ++i
)
6211 HOST_WIDE_INT desired_val
;
6212 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6213 if ((i
& (step
- 1)) == 0)
6216 desired_val
= msb_val
;
6218 if (desired_val
!= const_vector_elt_as_int (op
, elt
))
6225 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6226 instruction, filling in the bottom elements with 0 or -1.
6228 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6229 for the number of zeroes to shift in, or negative for the number of 0xff
6232 OP is a CONST_VECTOR. */
6235 vspltis_shifted (rtx op
)
6237 machine_mode mode
= GET_MODE (op
);
6238 machine_mode inner
= GET_MODE_INNER (mode
);
6246 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
)
6249 /* We need to create pseudo registers to do the shift, so don't recognize
6250 shift vector constants after reload. Don't match it even before RA
6251 after split1 is done, because there won't be further splitting pass
6252 before RA to do the splitting. */
6253 if (!can_create_pseudo_p ()
6254 || (cfun
->curr_properties
& PROP_rtl_split_insns
))
6257 nunits
= GET_MODE_NUNITS (mode
);
6258 mask
= GET_MODE_MASK (inner
);
6260 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? 0 : nunits
- 1);
6262 /* Check if the value can really be the operand of a vspltis[bhw]. */
6263 if (EASY_VECTOR_15 (val
))
6266 /* Also check if we are loading up the most significant bit which can be done
6267 by loading up -1 and shifting the value left by -1. */
6268 else if (EASY_VECTOR_MSB (val
, inner
))
6274 /* Check if VAL is present in every STEP-th element until we find elements
6275 that are 0 or all 1 bits. */
6276 for (i
= 1; i
< nunits
; ++i
)
6278 unsigned elt
= BYTES_BIG_ENDIAN
? i
: nunits
- 1 - i
;
6279 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6281 /* If the value isn't the splat value, check for the remaining elements
6287 for (j
= i
+1; j
< nunits
; ++j
)
6289 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6290 if (const_vector_elt_as_int (op
, elt2
) != 0)
6294 return (nunits
- i
) * GET_MODE_SIZE (inner
);
6297 else if ((elt_val
& mask
) == mask
)
6299 for (j
= i
+1; j
< nunits
; ++j
)
6301 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6302 if ((const_vector_elt_as_int (op
, elt2
) & mask
) != mask
)
6306 return -((nunits
- i
) * GET_MODE_SIZE (inner
));
6314 /* If all elements are equal, we don't need to do VSLDOI. */
6319 /* Return non-zero (element mode byte size) if OP is of the given MODE
6320 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6323 easy_altivec_constant (rtx op
, machine_mode mode
)
6325 unsigned step
, copies
;
6327 if (mode
== VOIDmode
)
6328 mode
= GET_MODE (op
);
6329 else if (mode
!= GET_MODE (op
))
6332 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6334 if (mode
== V2DFmode
)
6335 return zero_constant (op
, mode
) ? 8 : 0;
6337 else if (mode
== V2DImode
)
6339 if (!CONST_INT_P (CONST_VECTOR_ELT (op
, 0))
6340 || !CONST_INT_P (CONST_VECTOR_ELT (op
, 1)))
6343 if (zero_constant (op
, mode
))
6346 if (INTVAL (CONST_VECTOR_ELT (op
, 0)) == -1
6347 && INTVAL (CONST_VECTOR_ELT (op
, 1)) == -1)
6353 /* V1TImode is a special container for TImode. Ignore for now. */
6354 else if (mode
== V1TImode
)
6357 /* Start with a vspltisw. */
6358 step
= GET_MODE_NUNITS (mode
) / 4;
6361 if (vspltis_constant (op
, step
, copies
))
6364 /* Then try with a vspltish. */
6370 if (vspltis_constant (op
, step
, copies
))
6373 /* And finally a vspltisb. */
6379 if (vspltis_constant (op
, step
, copies
))
6382 if (vspltis_shifted (op
) != 0)
6383 return GET_MODE_SIZE (GET_MODE_INNER (mode
));
6388 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6389 result is OP. Abort if it is not possible. */
6392 gen_easy_altivec_constant (rtx op
)
6394 machine_mode mode
= GET_MODE (op
);
6395 int nunits
= GET_MODE_NUNITS (mode
);
6396 rtx val
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6397 unsigned step
= nunits
/ 4;
6398 unsigned copies
= 1;
6400 /* Start with a vspltisw. */
6401 if (vspltis_constant (op
, step
, copies
))
6402 return gen_rtx_VEC_DUPLICATE (V4SImode
, gen_lowpart (SImode
, val
));
6404 /* Then try with a vspltish. */
6410 if (vspltis_constant (op
, step
, copies
))
6411 return gen_rtx_VEC_DUPLICATE (V8HImode
, gen_lowpart (HImode
, val
));
6413 /* And finally a vspltisb. */
6419 if (vspltis_constant (op
, step
, copies
))
6420 return gen_rtx_VEC_DUPLICATE (V16QImode
, gen_lowpart (QImode
, val
));
6425 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6426 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6428 Return the number of instructions needed (1 or 2) into the address pointed
6431 Return the constant that is being split via CONSTANT_PTR. */
6434 xxspltib_constant_p (rtx op
,
6439 size_t nunits
= GET_MODE_NUNITS (mode
);
6441 HOST_WIDE_INT value
;
6444 /* Set the returned values to out of bound values. */
6445 *num_insns_ptr
= -1;
6446 *constant_ptr
= 256;
6448 if (!TARGET_P9_VECTOR
)
6451 if (mode
== VOIDmode
)
6452 mode
= GET_MODE (op
);
6454 else if (mode
!= GET_MODE (op
) && GET_MODE (op
) != VOIDmode
)
6457 /* Handle (vec_duplicate <constant>). */
6458 if (GET_CODE (op
) == VEC_DUPLICATE
)
6460 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6461 && mode
!= V2DImode
)
6464 element
= XEXP (op
, 0);
6465 if (!CONST_INT_P (element
))
6468 value
= INTVAL (element
);
6469 if (!IN_RANGE (value
, -128, 127))
6473 /* Handle (const_vector [...]). */
6474 else if (GET_CODE (op
) == CONST_VECTOR
)
6476 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6477 && mode
!= V2DImode
)
6480 element
= CONST_VECTOR_ELT (op
, 0);
6481 if (!CONST_INT_P (element
))
6484 value
= INTVAL (element
);
6485 if (!IN_RANGE (value
, -128, 127))
6488 for (i
= 1; i
< nunits
; i
++)
6490 element
= CONST_VECTOR_ELT (op
, i
);
6491 if (!CONST_INT_P (element
))
6494 if (value
!= INTVAL (element
))
6499 /* Handle integer constants being loaded into the upper part of the VSX
6500 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6501 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6502 else if (CONST_INT_P (op
))
6504 if (!SCALAR_INT_MODE_P (mode
))
6507 value
= INTVAL (op
);
6508 if (!IN_RANGE (value
, -128, 127))
6511 if (!IN_RANGE (value
, -1, 0))
6513 if (!(reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
))
6516 if (EASY_VECTOR_15 (value
))
6524 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6525 sign extend. Special case 0/-1 to allow getting any VSX register instead
6526 of an Altivec register. */
6527 if ((mode
== V4SImode
|| mode
== V8HImode
) && !IN_RANGE (value
, -1, 0)
6528 && EASY_VECTOR_15 (value
))
6531 /* Return # of instructions and the constant byte for XXSPLTIB. */
6532 if (mode
== V16QImode
)
6535 else if (IN_RANGE (value
, -1, 0))
6538 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6539 single XXSPLTIW or XXSPLTIDP instruction. */
6540 else if (vsx_prefixed_constant (op
, mode
))
6543 /* Return XXSPLITB followed by a sign extend operation to convert the
6544 constant to V8HImode or V4SImode. */
6548 *constant_ptr
= (int) value
;
6553 output_vec_const_move (rtx
*operands
)
6561 mode
= GET_MODE (dest
);
6565 bool dest_vmx_p
= ALTIVEC_REGNO_P (REGNO (dest
));
6566 int xxspltib_value
= 256;
6569 if (zero_constant (vec
, mode
))
6571 if (TARGET_P9_VECTOR
)
6572 return "xxspltib %x0,0";
6574 else if (dest_vmx_p
)
6575 return "vspltisw %0,0";
6578 return "xxlxor %x0,%x0,%x0";
6581 if (all_ones_constant (vec
, mode
))
6583 if (TARGET_P9_VECTOR
)
6584 return "xxspltib %x0,255";
6586 else if (dest_vmx_p
)
6587 return "vspltisw %0,-1";
6589 else if (TARGET_P8_VECTOR
)
6590 return "xxlorc %x0,%x0,%x0";
6596 vec_const_128bit_type vsx_const
;
6597 if (TARGET_POWER10
&& vec_const_128bit_to_bytes (vec
, mode
, &vsx_const
))
6599 unsigned imm
= constant_generates_lxvkq (&vsx_const
);
6602 operands
[2] = GEN_INT (imm
);
6603 return "lxvkq %x0,%2";
6606 imm
= constant_generates_xxspltiw (&vsx_const
);
6609 operands
[2] = GEN_INT (imm
);
6610 return "xxspltiw %x0,%2";
6613 imm
= constant_generates_xxspltidp (&vsx_const
);
6616 operands
[2] = GEN_INT (imm
);
6617 return "xxspltidp %x0,%2";
6621 if (TARGET_P9_VECTOR
6622 && xxspltib_constant_p (vec
, mode
, &num_insns
, &xxspltib_value
))
6626 operands
[2] = GEN_INT (xxspltib_value
& 0xff);
6627 return "xxspltib %x0,%2";
6638 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest
)));
6639 if (zero_constant (vec
, mode
))
6640 return "vspltisw %0,0";
6642 if (all_ones_constant (vec
, mode
))
6643 return "vspltisw %0,-1";
6645 /* Do we need to construct a value using VSLDOI? */
6646 shift
= vspltis_shifted (vec
);
6650 splat_vec
= gen_easy_altivec_constant (vec
);
6651 gcc_assert (GET_CODE (splat_vec
) == VEC_DUPLICATE
);
6652 operands
[1] = XEXP (splat_vec
, 0);
6653 if (!EASY_VECTOR_15 (INTVAL (operands
[1])))
6656 switch (GET_MODE (splat_vec
))
6659 return "vspltisw %0,%1";
6662 return "vspltish %0,%1";
6665 return "vspltisb %0,%1";
6675 /* Initialize vector TARGET to VALS. */
6678 rs6000_expand_vector_init (rtx target
, rtx vals
)
6680 machine_mode mode
= GET_MODE (target
);
6681 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6682 unsigned int n_elts
= GET_MODE_NUNITS (mode
);
6683 int n_var
= 0, one_var
= -1;
6684 bool all_same
= true, all_const_zero
= true;
6688 for (i
= 0; i
< n_elts
; ++i
)
6690 x
= XVECEXP (vals
, 0, i
);
6691 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
6692 ++n_var
, one_var
= i
;
6693 else if (x
!= CONST0_RTX (inner_mode
))
6694 all_const_zero
= false;
6696 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6702 rtx const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6703 bool int_vector_p
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
6704 if ((int_vector_p
|| TARGET_VSX
) && all_const_zero
)
6706 /* Zero register. */
6707 emit_move_insn (target
, CONST0_RTX (mode
));
6710 else if (int_vector_p
&& easy_vector_constant (const_vec
, mode
))
6712 /* Splat immediate. */
6713 emit_insn (gen_rtx_SET (target
, const_vec
));
6718 /* Load from constant pool. */
6719 emit_move_insn (target
, const_vec
);
6724 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6725 if (VECTOR_MEM_VSX_P (mode
) && (mode
== V2DFmode
|| mode
== V2DImode
))
6729 size_t num_elements
= all_same
? 1 : 2;
6730 for (i
= 0; i
< num_elements
; i
++)
6732 op
[i
] = XVECEXP (vals
, 0, i
);
6733 /* Just in case there is a SUBREG with a smaller mode, do a
6735 if (GET_MODE (op
[i
]) != inner_mode
)
6737 rtx tmp
= gen_reg_rtx (inner_mode
);
6738 convert_move (tmp
, op
[i
], 0);
6741 /* Allow load with splat double word. */
6742 else if (MEM_P (op
[i
]))
6745 op
[i
] = force_reg (inner_mode
, op
[i
]);
6747 else if (!REG_P (op
[i
]))
6748 op
[i
] = force_reg (inner_mode
, op
[i
]);
6753 if (mode
== V2DFmode
)
6754 emit_insn (gen_vsx_splat_v2df (target
, op
[0]));
6756 emit_insn (gen_vsx_splat_v2di (target
, op
[0]));
6760 if (mode
== V2DFmode
)
6761 emit_insn (gen_vsx_concat_v2df (target
, op
[0], op
[1]));
6763 emit_insn (gen_vsx_concat_v2di (target
, op
[0], op
[1]));
6768 /* Special case initializing vector int if we are on 64-bit systems with
6769 direct move or we have the ISA 3.0 instructions. */
6770 if (mode
== V4SImode
&& VECTOR_MEM_VSX_P (V4SImode
)
6771 && TARGET_DIRECT_MOVE_64BIT
)
6775 rtx element0
= XVECEXP (vals
, 0, 0);
6776 if (MEM_P (element0
))
6777 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6779 element0
= force_reg (SImode
, element0
);
6781 if (TARGET_P9_VECTOR
)
6782 emit_insn (gen_vsx_splat_v4si (target
, element0
));
6785 rtx tmp
= gen_reg_rtx (DImode
);
6786 emit_insn (gen_zero_extendsidi2 (tmp
, element0
));
6787 emit_insn (gen_vsx_splat_v4si_di (target
, tmp
));
6796 for (i
= 0; i
< 4; i
++)
6797 elements
[i
] = force_reg (SImode
, XVECEXP (vals
, 0, i
));
6799 emit_insn (gen_vsx_init_v4si (target
, elements
[0], elements
[1],
6800 elements
[2], elements
[3]));
6805 /* With single precision floating point on VSX, know that internally single
6806 precision is actually represented as a double, and either make 2 V2DF
6807 vectors, and convert these vectors to single precision, or do one
6808 conversion, and splat the result to the other elements. */
6809 if (mode
== V4SFmode
&& VECTOR_MEM_VSX_P (V4SFmode
))
6813 rtx element0
= XVECEXP (vals
, 0, 0);
6815 if (TARGET_P9_VECTOR
)
6817 if (MEM_P (element0
))
6818 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6820 emit_insn (gen_vsx_splat_v4sf (target
, element0
));
6825 rtx freg
= gen_reg_rtx (V4SFmode
);
6826 rtx sreg
= force_reg (SFmode
, element0
);
6827 rtx cvt
= (TARGET_XSCVDPSPN
6828 ? gen_vsx_xscvdpspn_scalar (freg
, sreg
)
6829 : gen_vsx_xscvdpsp_scalar (freg
, sreg
));
6832 emit_insn (gen_vsx_xxspltw_v4sf_direct (target
, freg
,
6838 if (TARGET_P8_VECTOR
&& TARGET_POWERPC64
)
6844 for (i
= 0; i
< 4; i
++)
6846 tmp_si
[i
] = gen_reg_rtx (SImode
);
6847 tmp_di
[i
] = gen_reg_rtx (DImode
);
6848 mrg_di
[i
] = gen_reg_rtx (DImode
);
6849 tmp_sf
[i
] = force_reg (SFmode
, XVECEXP (vals
, 0, i
));
6850 emit_insn (gen_movsi_from_sf (tmp_si
[i
], tmp_sf
[i
]));
6851 emit_insn (gen_zero_extendsidi2 (tmp_di
[i
], tmp_si
[i
]));
6854 if (!BYTES_BIG_ENDIAN
)
6856 std::swap (tmp_di
[0], tmp_di
[1]);
6857 std::swap (tmp_di
[2], tmp_di
[3]);
6860 emit_insn (gen_ashldi3 (mrg_di
[0], tmp_di
[0], GEN_INT (32)));
6861 emit_insn (gen_iordi3 (mrg_di
[1], mrg_di
[0], tmp_di
[1]));
6862 emit_insn (gen_ashldi3 (mrg_di
[2], tmp_di
[2], GEN_INT (32)));
6863 emit_insn (gen_iordi3 (mrg_di
[3], mrg_di
[2], tmp_di
[3]));
6865 rtx tmp_v2di
= gen_reg_rtx (V2DImode
);
6866 emit_insn (gen_vsx_concat_v2di (tmp_v2di
, mrg_di
[1], mrg_di
[3]));
6867 emit_move_insn (target
, gen_lowpart (V4SFmode
, tmp_v2di
));
6871 rtx dbl_even
= gen_reg_rtx (V2DFmode
);
6872 rtx dbl_odd
= gen_reg_rtx (V2DFmode
);
6873 rtx flt_even
= gen_reg_rtx (V4SFmode
);
6874 rtx flt_odd
= gen_reg_rtx (V4SFmode
);
6875 rtx op0
= force_reg (SFmode
, XVECEXP (vals
, 0, 0));
6876 rtx op1
= force_reg (SFmode
, XVECEXP (vals
, 0, 1));
6877 rtx op2
= force_reg (SFmode
, XVECEXP (vals
, 0, 2));
6878 rtx op3
= force_reg (SFmode
, XVECEXP (vals
, 0, 3));
6880 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op1
));
6881 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op2
, op3
));
6882 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
6883 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
6884 rs6000_expand_extract_even (target
, flt_even
, flt_odd
);
6890 /* Special case initializing vector short/char that are splats if we are on
6891 64-bit systems with direct move. */
6892 if (all_same
&& TARGET_DIRECT_MOVE_64BIT
6893 && (mode
== V16QImode
|| mode
== V8HImode
))
6895 rtx op0
= XVECEXP (vals
, 0, 0);
6896 rtx di_tmp
= gen_reg_rtx (DImode
);
6899 op0
= force_reg (GET_MODE_INNER (mode
), op0
);
6901 if (mode
== V16QImode
)
6903 emit_insn (gen_zero_extendqidi2 (di_tmp
, op0
));
6904 emit_insn (gen_vsx_vspltb_di (target
, di_tmp
));
6908 if (mode
== V8HImode
)
6910 emit_insn (gen_zero_extendhidi2 (di_tmp
, op0
));
6911 emit_insn (gen_vsx_vsplth_di (target
, di_tmp
));
6916 /* Store value to stack temp. Load vector element. Splat. However, splat
6917 of 64-bit items is not supported on Altivec. */
6918 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
6920 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
6921 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0),
6922 XVECEXP (vals
, 0, 0));
6923 x
= gen_rtx_UNSPEC (VOIDmode
,
6924 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
6925 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
6927 gen_rtx_SET (target
, mem
),
6929 x
= gen_rtx_VEC_SELECT (inner_mode
, target
,
6930 gen_rtx_PARALLEL (VOIDmode
,
6931 gen_rtvec (1, const0_rtx
)));
6932 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
6936 /* One field is non-constant. Load constant then overwrite
6940 rtx copy
= copy_rtx (vals
);
6942 /* Load constant part of vector, substitute neighboring value for
6944 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
6945 rs6000_expand_vector_init (target
, copy
);
6947 /* Insert variable. */
6948 rs6000_expand_vector_set (target
, XVECEXP (vals
, 0, one_var
),
6953 if (TARGET_DIRECT_MOVE
&& (mode
== V16QImode
|| mode
== V8HImode
))
6956 /* Force the values into word_mode registers. */
6957 for (i
= 0; i
< n_elts
; i
++)
6959 rtx tmp
= force_reg (inner_mode
, XVECEXP (vals
, 0, i
));
6960 machine_mode tmode
= TARGET_POWERPC64
? DImode
: SImode
;
6961 op
[i
] = simplify_gen_subreg (tmode
, tmp
, inner_mode
, 0);
6964 /* Take unsigned char big endianness on 64bit as example for below
6965 construction, the input values are: A, B, C, D, ..., O, P. */
6967 if (TARGET_DIRECT_MOVE_128
)
6969 /* Move to VSX register with vec_concat, each has 2 values.
6970 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
6971 vr1[1] = { xxxxxxxC, xxxxxxxD };
6973 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
6975 for (i
= 0; i
< n_elts
/ 2; i
++)
6977 vr1
[i
] = gen_reg_rtx (V2DImode
);
6978 emit_insn (gen_vsx_concat_v2di (vr1
[i
], op
[i
* 2],
6982 /* Pack vectors with 2 values into vectors with 4 values.
6983 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
6984 vr2[1] = { xxxExxxF, xxxGxxxH };
6985 vr2[1] = { xxxIxxxJ, xxxKxxxL };
6986 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
6988 for (i
= 0; i
< n_elts
/ 4; i
++)
6990 vr2
[i
] = gen_reg_rtx (V4SImode
);
6991 emit_insn (gen_altivec_vpkudum (vr2
[i
], vr1
[i
* 2],
6995 /* Pack vectors with 4 values into vectors with 8 values.
6996 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
6997 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
6999 for (i
= 0; i
< n_elts
/ 8; i
++)
7001 vr3
[i
] = gen_reg_rtx (V8HImode
);
7002 emit_insn (gen_altivec_vpkuwum (vr3
[i
], vr2
[i
* 2],
7006 /* If it's V8HImode, it's done and return it. */
7007 if (mode
== V8HImode
)
7009 emit_insn (gen_rtx_SET (target
, vr3
[0]));
7013 /* Pack vectors with 8 values into 16 values. */
7014 rtx res
= gen_reg_rtx (V16QImode
);
7015 emit_insn (gen_altivec_vpkuhum (res
, vr3
[0], vr3
[1]));
7016 emit_insn (gen_rtx_SET (target
, res
));
7020 rtx (*merge_v16qi
) (rtx
, rtx
, rtx
) = NULL
;
7021 rtx (*merge_v8hi
) (rtx
, rtx
, rtx
) = NULL
;
7022 rtx (*merge_v4si
) (rtx
, rtx
, rtx
) = NULL
;
7025 /* Set up some common gen routines and values. */
7026 if (BYTES_BIG_ENDIAN
)
7028 if (mode
== V16QImode
)
7030 merge_v16qi
= gen_altivec_vmrghb
;
7031 merge_v8hi
= gen_altivec_vmrglh
;
7034 merge_v8hi
= gen_altivec_vmrghh
;
7036 merge_v4si
= gen_altivec_vmrglw
;
7037 perm_idx
= GEN_INT (3);
7041 if (mode
== V16QImode
)
7043 merge_v16qi
= gen_altivec_vmrglb
;
7044 merge_v8hi
= gen_altivec_vmrghh
;
7047 merge_v8hi
= gen_altivec_vmrglh
;
7049 merge_v4si
= gen_altivec_vmrghw
;
7050 perm_idx
= GEN_INT (0);
7053 /* Move to VSX register with direct move.
7054 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7055 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7057 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7059 for (i
= 0; i
< n_elts
; i
++)
7061 vr_qi
[i
] = gen_reg_rtx (V16QImode
);
7062 if (TARGET_POWERPC64
)
7063 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi
[i
], op
[i
]));
7065 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi
[i
], op
[i
]));
7068 /* Merge/move to vector short.
7069 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7070 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7072 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7074 for (i
= 0; i
< 8; i
++)
7077 if (mode
== V16QImode
)
7079 tmp
= gen_reg_rtx (V16QImode
);
7080 emit_insn (merge_v16qi (tmp
, vr_qi
[2 * i
], vr_qi
[2 * i
+ 1]));
7082 vr_hi
[i
] = gen_reg_rtx (V8HImode
);
7083 emit_move_insn (vr_hi
[i
], gen_lowpart (V8HImode
, tmp
));
7086 /* Merge vector short to vector int.
7087 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7088 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7090 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7092 for (i
= 0; i
< 4; i
++)
7094 rtx tmp
= gen_reg_rtx (V8HImode
);
7095 emit_insn (merge_v8hi (tmp
, vr_hi
[2 * i
], vr_hi
[2 * i
+ 1]));
7096 vr_si
[i
] = gen_reg_rtx (V4SImode
);
7097 emit_move_insn (vr_si
[i
], gen_lowpart (V4SImode
, tmp
));
7100 /* Merge vector int to vector long.
7101 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7102 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7104 for (i
= 0; i
< 2; i
++)
7106 rtx tmp
= gen_reg_rtx (V4SImode
);
7107 emit_insn (merge_v4si (tmp
, vr_si
[2 * i
], vr_si
[2 * i
+ 1]));
7108 vr_di
[i
] = gen_reg_rtx (V2DImode
);
7109 emit_move_insn (vr_di
[i
], gen_lowpart (V2DImode
, tmp
));
7112 rtx res
= gen_reg_rtx (V2DImode
);
7113 emit_insn (gen_vsx_xxpermdi_v2di (res
, vr_di
[0], vr_di
[1], perm_idx
));
7114 emit_insn (gen_rtx_SET (target
, gen_lowpart (mode
, res
)));
7120 /* Construct the vector in memory one field at a time
7121 and load the whole vector. */
7122 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7123 for (i
= 0; i
< n_elts
; i
++)
7124 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7125 i
* GET_MODE_SIZE (inner_mode
)),
7126 XVECEXP (vals
, 0, i
));
7127 emit_move_insn (target
, mem
);
7130 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7131 is variable and also counts by vector element size for p9 and above. */
7134 rs6000_expand_vector_set_var_p9 (rtx target
, rtx val
, rtx idx
)
7136 machine_mode mode
= GET_MODE (target
);
7138 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7140 machine_mode inner_mode
= GET_MODE (val
);
7142 int width
= GET_MODE_SIZE (inner_mode
);
7144 gcc_assert (width
>= 1 && width
<= 8);
7146 int shift
= exact_log2 (width
);
7148 machine_mode idx_mode
= GET_MODE (idx
);
7150 machine_mode shift_mode
;
7151 rtx (*gen_ashl
)(rtx
, rtx
, rtx
);
7152 rtx (*gen_lvsl
)(rtx
, rtx
);
7153 rtx (*gen_lvsr
)(rtx
, rtx
);
7155 if (TARGET_POWERPC64
)
7157 shift_mode
= DImode
;
7158 gen_ashl
= gen_ashldi3
;
7159 gen_lvsl
= gen_altivec_lvsl_reg_di
;
7160 gen_lvsr
= gen_altivec_lvsr_reg_di
;
7164 shift_mode
= SImode
;
7165 gen_ashl
= gen_ashlsi3
;
7166 gen_lvsl
= gen_altivec_lvsl_reg_si
;
7167 gen_lvsr
= gen_altivec_lvsr_reg_si
;
7169 /* Generate the IDX for permute shift, width is the vector element size.
7170 idx = idx * width. */
7171 rtx tmp
= gen_reg_rtx (shift_mode
);
7172 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7174 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7176 /* lvsr v1,0,idx. */
7177 rtx pcvr
= gen_reg_rtx (V16QImode
);
7178 emit_insn (gen_lvsr (pcvr
, tmp
));
7180 /* lvsl v2,0,idx. */
7181 rtx pcvl
= gen_reg_rtx (V16QImode
);
7182 emit_insn (gen_lvsl (pcvl
, tmp
));
7184 rtx sub_target
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7187 = gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
, pcvr
);
7190 rs6000_expand_vector_set (target
, val
, const0_rtx
);
7193 = gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
, pcvl
);
7197 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7198 is variable and also counts by vector element size for p7 & p8. */
7201 rs6000_expand_vector_set_var_p7 (rtx target
, rtx val
, rtx idx
)
7203 machine_mode mode
= GET_MODE (target
);
7205 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7207 machine_mode inner_mode
= GET_MODE (val
);
7208 HOST_WIDE_INT mode_mask
= GET_MODE_MASK (inner_mode
);
7210 int width
= GET_MODE_SIZE (inner_mode
);
7211 gcc_assert (width
>= 1 && width
<= 4);
7213 int shift
= exact_log2 (width
);
7215 machine_mode idx_mode
= GET_MODE (idx
);
7217 machine_mode shift_mode
;
7218 rtx (*gen_ashl
)(rtx
, rtx
, rtx
);
7219 rtx (*gen_add
)(rtx
, rtx
, rtx
);
7220 rtx (*gen_sub
)(rtx
, rtx
, rtx
);
7221 rtx (*gen_lvsl
)(rtx
, rtx
);
7223 if (TARGET_POWERPC64
)
7225 shift_mode
= DImode
;
7226 gen_ashl
= gen_ashldi3
;
7227 gen_add
= gen_adddi3
;
7228 gen_sub
= gen_subdi3
;
7229 gen_lvsl
= gen_altivec_lvsl_reg_di
;
7233 shift_mode
= SImode
;
7234 gen_ashl
= gen_ashlsi3
;
7235 gen_add
= gen_addsi3
;
7236 gen_sub
= gen_subsi3
;
7237 gen_lvsl
= gen_altivec_lvsl_reg_si
;
7240 /* idx = idx * width. */
7241 rtx tmp
= gen_reg_rtx (shift_mode
);
7242 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7244 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7246 /* For LE: idx = idx + 8. */
7247 if (!BYTES_BIG_ENDIAN
)
7248 emit_insn (gen_add (tmp
, tmp
, GEN_INT (8)));
7250 emit_insn (gen_sub (tmp
, GEN_INT (24 - width
), tmp
));
7253 DImode: 0xffffffffffffffff0000000000000000
7254 SImode: 0x00000000ffffffff0000000000000000
7255 HImode: 0x000000000000ffff0000000000000000.
7256 QImode: 0x00000000000000ff0000000000000000. */
7257 rtx mask
= gen_reg_rtx (V16QImode
);
7258 rtx mask_v2di
= gen_reg_rtx (V2DImode
);
7259 rtvec v
= rtvec_alloc (2);
7260 if (!BYTES_BIG_ENDIAN
)
7262 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7263 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7267 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7268 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7270 emit_insn (gen_vec_initv2didi (mask_v2di
, gen_rtx_PARALLEL (V2DImode
, v
)));
7271 rtx sub_mask
= simplify_gen_subreg (V16QImode
, mask_v2di
, V2DImode
, 0);
7272 emit_insn (gen_rtx_SET (mask
, sub_mask
));
7274 /* mtvsrd[wz] f0,tmp_val. */
7275 rtx tmp_val
= gen_reg_rtx (SImode
);
7276 if (inner_mode
== E_SFmode
)
7277 if (TARGET_DIRECT_MOVE_64BIT
)
7278 emit_insn (gen_movsi_from_sf (tmp_val
, val
));
7281 rtx stack
= rs6000_allocate_stack_temp (SFmode
, false, true);
7282 emit_insn (gen_movsf_hardfloat (stack
, val
));
7283 rtx stack2
= copy_rtx (stack
);
7284 PUT_MODE (stack2
, SImode
);
7285 emit_move_insn (tmp_val
, stack2
);
7288 tmp_val
= force_reg (SImode
, val
);
7290 rtx val_v16qi
= gen_reg_rtx (V16QImode
);
7291 rtx val_v2di
= gen_reg_rtx (V2DImode
);
7292 rtvec vec_val
= rtvec_alloc (2);
7293 if (!BYTES_BIG_ENDIAN
)
7295 RTVEC_ELT (vec_val
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7296 RTVEC_ELT (vec_val
, 1) = tmp_val
;
7300 RTVEC_ELT (vec_val
, 0) = tmp_val
;
7301 RTVEC_ELT (vec_val
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7304 gen_vec_initv2didi (val_v2di
, gen_rtx_PARALLEL (V2DImode
, vec_val
)));
7305 rtx sub_val
= simplify_gen_subreg (V16QImode
, val_v2di
, V2DImode
, 0);
7306 emit_insn (gen_rtx_SET (val_v16qi
, sub_val
));
7308 /* lvsl 13,0,idx. */
7309 rtx pcv
= gen_reg_rtx (V16QImode
);
7310 emit_insn (gen_lvsl (pcv
, tmp
));
7312 /* vperm 1,1,1,13. */
7313 /* vperm 0,0,0,13. */
7314 rtx val_perm
= gen_reg_rtx (V16QImode
);
7315 rtx mask_perm
= gen_reg_rtx (V16QImode
);
7316 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm
, val_v16qi
, val_v16qi
, pcv
));
7317 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm
, mask
, mask
, pcv
));
7319 rtx target_v16qi
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7321 /* xxsel 34,34,32,33. */
7323 gen_vector_select_v16qi (target_v16qi
, target_v16qi
, val_perm
, mask_perm
));
7326 /* Set field ELT_RTX of TARGET to VAL. */
7329 rs6000_expand_vector_set (rtx target
, rtx val
, rtx elt_rtx
)
7331 machine_mode mode
= GET_MODE (target
);
7332 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7333 rtx reg
= gen_reg_rtx (mode
);
7335 int width
= GET_MODE_SIZE (inner_mode
);
7338 val
= force_reg (GET_MODE (val
), val
);
7340 if (VECTOR_MEM_VSX_P (mode
))
7342 if (!CONST_INT_P (elt_rtx
))
7344 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7345 when elt_rtx is variable. */
7346 if ((TARGET_P9_VECTOR
&& TARGET_POWERPC64
) || width
== 8)
7348 rs6000_expand_vector_set_var_p9 (target
, val
, elt_rtx
);
7351 else if (TARGET_VSX
)
7353 rs6000_expand_vector_set_var_p7 (target
, val
, elt_rtx
);
7357 gcc_assert (CONST_INT_P (elt_rtx
));
7360 rtx insn
= NULL_RTX
;
7362 if (mode
== V2DFmode
)
7363 insn
= gen_vsx_set_v2df (target
, target
, val
, elt_rtx
);
7365 else if (mode
== V2DImode
)
7366 insn
= gen_vsx_set_v2di (target
, target
, val
, elt_rtx
);
7368 else if (TARGET_P9_VECTOR
&& TARGET_POWERPC64
)
7370 if (mode
== V4SImode
)
7371 insn
= gen_vsx_set_v4si_p9 (target
, target
, val
, elt_rtx
);
7372 else if (mode
== V8HImode
)
7373 insn
= gen_vsx_set_v8hi_p9 (target
, target
, val
, elt_rtx
);
7374 else if (mode
== V16QImode
)
7375 insn
= gen_vsx_set_v16qi_p9 (target
, target
, val
, elt_rtx
);
7376 else if (mode
== V4SFmode
)
7377 insn
= gen_vsx_set_v4sf_p9 (target
, target
, val
, elt_rtx
);
7387 /* Simplify setting single element vectors like V1TImode. */
7388 if (GET_MODE_SIZE (mode
) == GET_MODE_SIZE (inner_mode
)
7389 && INTVAL (elt_rtx
) == 0)
7391 emit_move_insn (target
, gen_lowpart (mode
, val
));
7395 /* Load single variable value. */
7396 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7397 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0), val
);
7398 x
= gen_rtx_UNSPEC (VOIDmode
,
7399 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7400 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7402 gen_rtx_SET (reg
, mem
),
7405 /* Linear sequence. */
7406 mask
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
7407 for (i
= 0; i
< 16; ++i
)
7408 XVECEXP (mask
, 0, i
) = GEN_INT (i
);
7410 /* Set permute mask to insert element into target. */
7411 for (i
= 0; i
< width
; ++i
)
7412 XVECEXP (mask
, 0, INTVAL (elt_rtx
) * width
+ i
) = GEN_INT (i
+ 0x10);
7413 x
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (mask
, 0));
7415 if (BYTES_BIG_ENDIAN
)
7416 x
= gen_rtx_UNSPEC (mode
,
7417 gen_rtvec (3, target
, reg
,
7418 force_reg (V16QImode
, x
)),
7422 if (TARGET_P9_VECTOR
)
7423 x
= gen_rtx_UNSPEC (mode
,
7424 gen_rtvec (3, reg
, target
,
7425 force_reg (V16QImode
, x
)),
7429 /* Invert selector. We prefer to generate VNAND on P8 so
7430 that future fusion opportunities can kick in, but must
7431 generate VNOR elsewhere. */
7432 rtx notx
= gen_rtx_NOT (V16QImode
, force_reg (V16QImode
, x
));
7433 rtx iorx
= (TARGET_P8_VECTOR
7434 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
7435 : gen_rtx_AND (V16QImode
, notx
, notx
));
7436 rtx tmp
= gen_reg_rtx (V16QImode
);
7437 emit_insn (gen_rtx_SET (tmp
, iorx
));
7439 /* Permute with operands reversed and adjusted selector. */
7440 x
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, reg
, target
, tmp
),
7445 emit_insn (gen_rtx_SET (target
, x
));
7448 /* Extract field ELT from VEC into TARGET. */
7451 rs6000_expand_vector_extract (rtx target
, rtx vec
, rtx elt
)
7453 machine_mode mode
= GET_MODE (vec
);
7454 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7457 if (VECTOR_MEM_VSX_P (mode
) && CONST_INT_P (elt
))
7464 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7467 emit_insn (gen_vsx_extract_v2df (target
, vec
, elt
));
7470 emit_insn (gen_vsx_extract_v2di (target
, vec
, elt
));
7473 emit_insn (gen_vsx_extract_v4sf (target
, vec
, elt
));
7476 if (TARGET_DIRECT_MOVE_64BIT
)
7478 emit_insn (gen_vsx_extract_v16qi (target
, vec
, elt
));
7484 if (TARGET_DIRECT_MOVE_64BIT
)
7486 emit_insn (gen_vsx_extract_v8hi (target
, vec
, elt
));
7492 if (TARGET_DIRECT_MOVE_64BIT
)
7494 emit_insn (gen_vsx_extract_v4si (target
, vec
, elt
));
7500 else if (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (elt
)
7501 && TARGET_DIRECT_MOVE_64BIT
)
7503 if (GET_MODE (elt
) != DImode
)
7505 rtx tmp
= gen_reg_rtx (DImode
);
7506 convert_move (tmp
, elt
, 0);
7509 else if (!REG_P (elt
))
7510 elt
= force_reg (DImode
, elt
);
7515 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7519 emit_insn (gen_vsx_extract_v2df_var (target
, vec
, elt
));
7523 emit_insn (gen_vsx_extract_v2di_var (target
, vec
, elt
));
7527 emit_insn (gen_vsx_extract_v4sf_var (target
, vec
, elt
));
7531 emit_insn (gen_vsx_extract_v4si_var (target
, vec
, elt
));
7535 emit_insn (gen_vsx_extract_v8hi_var (target
, vec
, elt
));
7539 emit_insn (gen_vsx_extract_v16qi_var (target
, vec
, elt
));
7547 /* Allocate mode-sized buffer. */
7548 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7550 emit_move_insn (mem
, vec
);
7551 if (CONST_INT_P (elt
))
7553 int modulo_elt
= INTVAL (elt
) % GET_MODE_NUNITS (mode
);
7555 /* Add offset to field within buffer matching vector element. */
7556 mem
= adjust_address_nv (mem
, inner_mode
,
7557 modulo_elt
* GET_MODE_SIZE (inner_mode
));
7558 emit_move_insn (target
, adjust_address_nv (mem
, inner_mode
, 0));
7562 unsigned int ele_size
= GET_MODE_SIZE (inner_mode
);
7563 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (mode
) - 1);
7564 rtx new_addr
= gen_reg_rtx (Pmode
);
7566 elt
= gen_rtx_AND (Pmode
, elt
, num_ele_m1
);
7568 elt
= gen_rtx_MULT (Pmode
, elt
, GEN_INT (ele_size
));
7569 new_addr
= gen_rtx_PLUS (Pmode
, XEXP (mem
, 0), elt
);
7570 new_addr
= change_address (mem
, inner_mode
, new_addr
);
7571 emit_move_insn (target
, new_addr
);
7575 /* Return the offset within a memory object (MEM) of a vector type to a given
7576 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7577 the element is constant, we return a constant integer.
7579 Otherwise, we use a base register temporary to calculate the offset after
7580 masking it to fit within the bounds of the vector and scaling it. The
7581 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7582 built-in function. */
7585 get_vector_offset (rtx mem
, rtx element
, rtx base_tmp
, unsigned scalar_size
)
7587 if (CONST_INT_P (element
))
7588 return GEN_INT (INTVAL (element
) * scalar_size
);
7590 /* All insns should use the 'Q' constraint (address is a single register) if
7591 the element number is not a constant. */
7592 gcc_assert (satisfies_constraint_Q (mem
));
7594 /* Mask the element to make sure the element number is between 0 and the
7595 maximum number of elements - 1 so that we don't generate an address
7596 outside the vector. */
7597 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (GET_MODE (mem
)) - 1);
7598 rtx and_op
= gen_rtx_AND (Pmode
, element
, num_ele_m1
);
7599 emit_insn (gen_rtx_SET (base_tmp
, and_op
));
7601 /* Shift the element to get the byte offset from the element number. */
7602 int shift
= exact_log2 (scalar_size
);
7603 gcc_assert (shift
>= 0);
7607 rtx shift_op
= gen_rtx_ASHIFT (Pmode
, base_tmp
, GEN_INT (shift
));
7608 emit_insn (gen_rtx_SET (base_tmp
, shift_op
));
7614 /* Helper function update PC-relative addresses when we are adjusting a memory
7615 address (ADDR) to a vector to point to a scalar field within the vector with
7616 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7617 use the base register temporary (BASE_TMP) to form the address. */
7620 adjust_vec_address_pcrel (rtx addr
, rtx element_offset
, rtx base_tmp
)
7622 rtx new_addr
= NULL
;
7624 gcc_assert (CONST_INT_P (element_offset
));
7626 if (GET_CODE (addr
) == CONST
)
7627 addr
= XEXP (addr
, 0);
7629 if (GET_CODE (addr
) == PLUS
)
7631 rtx op0
= XEXP (addr
, 0);
7632 rtx op1
= XEXP (addr
, 1);
7634 if (CONST_INT_P (op1
))
7636 HOST_WIDE_INT offset
7637 = INTVAL (XEXP (addr
, 1)) + INTVAL (element_offset
);
7644 rtx plus
= gen_rtx_PLUS (Pmode
, op0
, GEN_INT (offset
));
7645 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7651 emit_move_insn (base_tmp
, addr
);
7652 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7656 else if (SYMBOL_REF_P (addr
) || LABEL_REF_P (addr
))
7658 rtx plus
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7659 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7668 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7669 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7670 temporary (BASE_TMP) to fixup the address. Return the new memory address
7671 that is valid for reads or writes to a given register (SCALAR_REG).
7673 This function is expected to be called after reload is completed when we are
7674 splitting insns. The temporary BASE_TMP might be set multiple times with
7678 rs6000_adjust_vec_address (rtx scalar_reg
,
7682 machine_mode scalar_mode
)
7684 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7685 rtx addr
= XEXP (mem
, 0);
7688 gcc_assert (!reg_mentioned_p (base_tmp
, addr
));
7689 gcc_assert (!reg_mentioned_p (base_tmp
, element
));
7691 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7692 gcc_assert (GET_RTX_CLASS (GET_CODE (addr
)) != RTX_AUTOINC
);
7694 /* Calculate what we need to add to the address to get the element
7696 rtx element_offset
= get_vector_offset (mem
, element
, base_tmp
, scalar_size
);
7698 /* Create the new address pointing to the element within the vector. If we
7699 are adding 0, we don't have to change the address. */
7700 if (element_offset
== const0_rtx
)
7703 /* A simple indirect address can be converted into a reg + offset
7705 else if (REG_P (addr
) || SUBREG_P (addr
))
7706 new_addr
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7708 /* For references to local static variables, fold a constant offset into the
7710 else if (pcrel_local_address (addr
, Pmode
) && CONST_INT_P (element_offset
))
7711 new_addr
= adjust_vec_address_pcrel (addr
, element_offset
, base_tmp
);
7713 /* Optimize D-FORM addresses with constant offset with a constant element, to
7714 include the element offset in the address directly. */
7715 else if (GET_CODE (addr
) == PLUS
)
7717 rtx op0
= XEXP (addr
, 0);
7718 rtx op1
= XEXP (addr
, 1);
7720 gcc_assert (REG_P (op0
) || SUBREG_P (op0
));
7721 if (CONST_INT_P (op1
) && CONST_INT_P (element_offset
))
7723 /* op0 should never be r0, because r0+offset is not valid. But it
7724 doesn't hurt to make sure it is not r0. */
7725 gcc_assert (reg_or_subregno (op0
) != 0);
7727 /* D-FORM address with constant element number. */
7728 HOST_WIDE_INT offset
= INTVAL (op1
) + INTVAL (element_offset
);
7729 rtx offset_rtx
= GEN_INT (offset
);
7730 new_addr
= gen_rtx_PLUS (Pmode
, op0
, offset_rtx
);
7734 /* If we don't have a D-FORM address with a constant element number,
7735 add the two elements in the current address. Then add the offset.
7737 Previously, we tried to add the offset to OP1 and change the
7738 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7739 complicated because we had to verify that op1 was not GPR0 and we
7740 had a constant element offset (due to the way ADDI is defined).
7741 By doing the add of OP0 and OP1 first, and then adding in the
7742 offset, it has the benefit that if D-FORM instructions are
7743 allowed, the offset is part of the memory access to the vector
7745 emit_insn (gen_rtx_SET (base_tmp
, gen_rtx_PLUS (Pmode
, op0
, op1
)));
7746 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7752 emit_move_insn (base_tmp
, addr
);
7753 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7756 /* If the address isn't valid, move the address into the temporary base
7757 register. Some reasons it could not be valid include:
7759 The address offset overflowed the 16 or 34 bit offset size;
7760 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7761 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7762 Only X_FORM loads can be done, and the address is D_FORM. */
7764 enum insn_form iform
7765 = address_to_insn_form (new_addr
, scalar_mode
,
7766 reg_to_non_prefixed (scalar_reg
, scalar_mode
));
7768 if (iform
== INSN_FORM_BAD
)
7770 emit_move_insn (base_tmp
, new_addr
);
7771 new_addr
= base_tmp
;
7774 return change_address (mem
, scalar_mode
, new_addr
);
7777 /* Split a variable vec_extract operation into the component instructions. */
7780 rs6000_split_vec_extract_var (rtx dest
, rtx src
, rtx element
, rtx tmp_gpr
,
7783 machine_mode mode
= GET_MODE (src
);
7784 machine_mode scalar_mode
= GET_MODE_INNER (GET_MODE (src
));
7785 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7786 int byte_shift
= exact_log2 (scalar_size
);
7788 gcc_assert (byte_shift
>= 0);
7790 /* If we are given a memory address, optimize to load just the element. We
7791 don't have to adjust the vector element number on little endian
7795 emit_move_insn (dest
,
7796 rs6000_adjust_vec_address (dest
, src
, element
, tmp_gpr
,
7801 else if (REG_P (src
) || SUBREG_P (src
))
7803 int num_elements
= GET_MODE_NUNITS (mode
);
7804 int bits_in_element
= mode_to_bits (GET_MODE_INNER (mode
));
7805 int bit_shift
= 7 - exact_log2 (num_elements
);
7807 unsigned int dest_regno
= reg_or_subregno (dest
);
7808 unsigned int src_regno
= reg_or_subregno (src
);
7809 unsigned int element_regno
= reg_or_subregno (element
);
7811 gcc_assert (REG_P (tmp_gpr
));
7813 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7814 a general purpose register. */
7815 if (TARGET_P9_VECTOR
7816 && (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
7817 && INT_REGNO_P (dest_regno
)
7818 && ALTIVEC_REGNO_P (src_regno
)
7819 && INT_REGNO_P (element_regno
))
7821 rtx dest_si
= gen_rtx_REG (SImode
, dest_regno
);
7822 rtx element_si
= gen_rtx_REG (SImode
, element_regno
);
7824 if (mode
== V16QImode
)
7825 emit_insn (BYTES_BIG_ENDIAN
7826 ? gen_vextublx (dest_si
, element_si
, src
)
7827 : gen_vextubrx (dest_si
, element_si
, src
));
7829 else if (mode
== V8HImode
)
7831 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7832 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const1_rtx
));
7833 emit_insn (BYTES_BIG_ENDIAN
7834 ? gen_vextuhlx (dest_si
, tmp_gpr_si
, src
)
7835 : gen_vextuhrx (dest_si
, tmp_gpr_si
, src
));
7841 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7842 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const2_rtx
));
7843 emit_insn (BYTES_BIG_ENDIAN
7844 ? gen_vextuwlx (dest_si
, tmp_gpr_si
, src
)
7845 : gen_vextuwrx (dest_si
, tmp_gpr_si
, src
));
7852 gcc_assert (REG_P (tmp_altivec
));
7854 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7855 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7856 will shift the element into the upper position (adding 3 to convert a
7857 byte shift into a bit shift). */
7858 if (scalar_size
== 8)
7860 if (!BYTES_BIG_ENDIAN
)
7862 emit_insn (gen_xordi3 (tmp_gpr
, element
, const1_rtx
));
7868 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7870 emit_insn (gen_rtx_SET (tmp_gpr
,
7871 gen_rtx_AND (DImode
,
7872 gen_rtx_ASHIFT (DImode
,
7879 if (!BYTES_BIG_ENDIAN
)
7881 rtx num_ele_m1
= GEN_INT (num_elements
- 1);
7883 emit_insn (gen_anddi3 (tmp_gpr
, element
, num_ele_m1
));
7884 emit_insn (gen_subdi3 (tmp_gpr
, num_ele_m1
, tmp_gpr
));
7890 emit_insn (gen_ashldi3 (tmp_gpr
, element2
, GEN_INT (bit_shift
)));
7893 /* Get the value into the lower byte of the Altivec register where VSLO
7895 if (TARGET_P9_VECTOR
)
7896 emit_insn (gen_vsx_splat_v2di (tmp_altivec
, tmp_gpr
));
7897 else if (can_create_pseudo_p ())
7898 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_gpr
, tmp_gpr
));
7901 rtx tmp_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7902 emit_move_insn (tmp_di
, tmp_gpr
);
7903 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_di
, tmp_di
));
7906 /* Do the VSLO to get the value into the final location. */
7910 emit_insn (gen_vsx_vslo_v2df (dest
, src
, tmp_altivec
));
7914 emit_insn (gen_vsx_vslo_v2di (dest
, src
, tmp_altivec
));
7919 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7920 rtx tmp_altivec_v4sf
= gen_rtx_REG (V4SFmode
, REGNO (tmp_altivec
));
7921 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
7922 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
7925 emit_insn (gen_vsx_xscvspdp_scalar2 (dest
, tmp_altivec_v4sf
));
7933 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7934 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
7935 rtx tmp_gpr_di
= gen_rtx_REG (DImode
, REGNO (dest
));
7936 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
7938 emit_move_insn (tmp_gpr_di
, tmp_altivec_di
);
7939 emit_insn (gen_lshrdi3 (tmp_gpr_di
, tmp_gpr_di
,
7940 GEN_INT (64 - bits_in_element
)));
7954 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7955 selects whether the alignment is abi mandated, optional, or
7956 both abi and optional alignment. */
7959 rs6000_data_alignment (tree type
, unsigned int align
, enum data_align how
)
7961 if (how
!= align_opt
)
7963 if (TREE_CODE (type
) == VECTOR_TYPE
&& align
< 128)
7967 if (how
!= align_abi
)
7969 if (TREE_CODE (type
) == ARRAY_TYPE
7970 && TYPE_MODE (TREE_TYPE (type
)) == QImode
)
7972 if (align
< BITS_PER_WORD
)
7973 align
= BITS_PER_WORD
;
7980 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7981 instructions simply ignore the low bits; VSX memory instructions
7982 are aligned to 4 or 8 bytes. */
7985 rs6000_slow_unaligned_access (machine_mode mode
, unsigned int align
)
7987 return (STRICT_ALIGNMENT
7988 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7989 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && align
< 32)
7990 || ((VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
))
7991 && (int) align
< VECTOR_ALIGN (mode
)))));
7994 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
7997 rs6000_special_adjust_field_align (tree type
, unsigned int computed
)
7999 if (computed
<= 32 || TYPE_PACKED (type
))
8002 /* Strip initial arrays. */
8003 while (TREE_CODE (type
) == ARRAY_TYPE
)
8004 type
= TREE_TYPE (type
);
8006 /* If RECORD or UNION, recursively find the first field. */
8007 while (AGGREGATE_TYPE_P (type
))
8009 tree field
= TYPE_FIELDS (type
);
8011 /* Skip all non field decls */
8012 while (field
!= NULL
8013 && (TREE_CODE (field
) != FIELD_DECL
8014 || DECL_FIELD_ABI_IGNORED (field
)))
8015 field
= DECL_CHAIN (field
);
8020 /* A packed field does not contribute any extra alignment. */
8021 if (DECL_PACKED (field
))
8024 type
= TREE_TYPE (field
);
8027 while (TREE_CODE (type
) == ARRAY_TYPE
)
8028 type
= TREE_TYPE (type
);
8031 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8032 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8033 computed
= MIN (computed
, 32);
8038 /* AIX increases natural record alignment to doubleword if the innermost first
8039 field is an FP double while the FP fields remain word aligned.
8040 Only called if TYPE initially is a RECORD or UNION. */
8043 rs6000_special_round_type_align (tree type
, unsigned int computed
,
8044 unsigned int specified
)
8046 unsigned int align
= MAX (computed
, specified
);
8048 if (TYPE_PACKED (type
) || align
>= 64)
8051 /* If RECORD or UNION, recursively find the first field. */
8054 tree field
= TYPE_FIELDS (type
);
8056 /* Skip all non field decls */
8057 while (field
!= NULL
8058 && (TREE_CODE (field
) != FIELD_DECL
8059 || DECL_FIELD_ABI_IGNORED (field
)))
8060 field
= DECL_CHAIN (field
);
8065 /* A packed field does not contribute any extra alignment. */
8066 if (DECL_PACKED (field
))
8069 type
= TREE_TYPE (field
);
8072 while (TREE_CODE (type
) == ARRAY_TYPE
)
8073 type
= TREE_TYPE (type
);
8074 } while (AGGREGATE_TYPE_P (type
));
8076 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8077 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8078 align
= MAX (align
, 64);
8083 /* Darwin increases record alignment to the natural alignment of
8087 darwin_rs6000_special_round_type_align (tree type
, unsigned int computed
,
8088 unsigned int specified
)
8090 unsigned int align
= MAX (computed
, specified
);
8092 if (TYPE_PACKED (type
))
8095 /* Find the first field, looking down into aggregates. */
8097 tree field
= TYPE_FIELDS (type
);
8098 /* Skip all non field decls */
8099 while (field
!= NULL
8100 && (TREE_CODE (field
) != FIELD_DECL
8101 || DECL_FIELD_ABI_IGNORED (field
)))
8102 field
= DECL_CHAIN (field
);
8105 /* A packed field does not contribute any extra alignment. */
8106 if (DECL_PACKED (field
))
8108 type
= TREE_TYPE (field
);
8109 while (TREE_CODE (type
) == ARRAY_TYPE
)
8110 type
= TREE_TYPE (type
);
8111 } while (AGGREGATE_TYPE_P (type
));
8113 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
)
8114 align
= MAX (align
, TYPE_ALIGN (type
));
8119 /* Return 1 for an operand in small memory on V.4/eabi. */
8122 small_data_operand (rtx op ATTRIBUTE_UNUSED
,
8123 machine_mode mode ATTRIBUTE_UNUSED
)
8128 if (rs6000_sdata
== SDATA_NONE
|| rs6000_sdata
== SDATA_DATA
)
8131 if (DEFAULT_ABI
!= ABI_V4
)
8134 if (SYMBOL_REF_P (op
))
8137 else if (GET_CODE (op
) != CONST
8138 || GET_CODE (XEXP (op
, 0)) != PLUS
8139 || !SYMBOL_REF_P (XEXP (XEXP (op
, 0), 0))
8140 || !CONST_INT_P (XEXP (XEXP (op
, 0), 1)))
8145 rtx sum
= XEXP (op
, 0);
8146 HOST_WIDE_INT summand
;
8148 /* We have to be careful here, because it is the referenced address
8149 that must be 32k from _SDA_BASE_, not just the symbol. */
8150 summand
= INTVAL (XEXP (sum
, 1));
8151 if (summand
< 0 || summand
> g_switch_value
)
8154 sym_ref
= XEXP (sum
, 0);
8157 return SYMBOL_REF_SMALL_P (sym_ref
);
8163 /* Return true if either operand is a general purpose register. */
8166 gpr_or_gpr_p (rtx op0
, rtx op1
)
8168 return ((REG_P (op0
) && INT_REGNO_P (REGNO (op0
)))
8169 || (REG_P (op1
) && INT_REGNO_P (REGNO (op1
))));
8172 /* Return true if this is a move direct operation between GPR registers and
8173 floating point/VSX registers. */
8176 direct_move_p (rtx op0
, rtx op1
)
8178 if (!REG_P (op0
) || !REG_P (op1
))
8181 if (!TARGET_DIRECT_MOVE
)
8184 int regno0
= REGNO (op0
);
8185 int regno1
= REGNO (op1
);
8186 if (!HARD_REGISTER_NUM_P (regno0
) || !HARD_REGISTER_NUM_P (regno1
))
8189 if (INT_REGNO_P (regno0
) && VSX_REGNO_P (regno1
))
8192 if (VSX_REGNO_P (regno0
) && INT_REGNO_P (regno1
))
8198 /* Return true if the ADDR is an acceptable address for a quad memory
8199 operation of mode MODE (either LQ/STQ for general purpose registers, or
8200 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8201 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8202 3.0 LXV/STXV instruction. */
8205 quad_address_p (rtx addr
, machine_mode mode
, bool strict
)
8209 if (GET_MODE_SIZE (mode
) < 16)
8212 if (legitimate_indirect_address_p (addr
, strict
))
8215 if (VECTOR_MODE_P (mode
) && !mode_supports_dq_form (mode
))
8218 /* Is this a valid prefixed address? If the bottom four bits of the offset
8219 are non-zero, we could use a prefixed instruction (which does not have the
8220 DQ-form constraint that the traditional instruction had) instead of
8221 forcing the unaligned offset to a GPR. */
8222 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DQ
))
8225 if (GET_CODE (addr
) != PLUS
)
8228 op0
= XEXP (addr
, 0);
8229 if (!REG_P (op0
) || !INT_REG_OK_FOR_BASE_P (op0
, strict
))
8232 op1
= XEXP (addr
, 1);
8233 if (!CONST_INT_P (op1
))
8236 return quad_address_offset_p (INTVAL (op1
));
8239 /* Return true if this is a load or store quad operation. This function does
8240 not handle the atomic quad memory instructions. */
8243 quad_load_store_p (rtx op0
, rtx op1
)
8247 if (!TARGET_QUAD_MEMORY
)
8250 else if (REG_P (op0
) && MEM_P (op1
))
8251 ret
= (quad_int_reg_operand (op0
, GET_MODE (op0
))
8252 && quad_memory_operand (op1
, GET_MODE (op1
))
8253 && !reg_overlap_mentioned_p (op0
, op1
));
8255 else if (MEM_P (op0
) && REG_P (op1
))
8256 ret
= (quad_memory_operand (op0
, GET_MODE (op0
))
8257 && quad_int_reg_operand (op1
, GET_MODE (op1
)));
8262 if (TARGET_DEBUG_ADDR
)
8264 fprintf (stderr
, "\n========== quad_load_store, return %s\n",
8265 ret
? "true" : "false");
8266 debug_rtx (gen_rtx_SET (op0
, op1
));
8272 /* Given an address, return a constant offset term if one exists. */
8275 address_offset (rtx op
)
8277 if (GET_CODE (op
) == PRE_INC
8278 || GET_CODE (op
) == PRE_DEC
)
8280 else if (GET_CODE (op
) == PRE_MODIFY
8281 || GET_CODE (op
) == LO_SUM
)
8284 if (GET_CODE (op
) == CONST
)
8287 if (GET_CODE (op
) == PLUS
)
8290 if (CONST_INT_P (op
))
8296 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8297 the mode. If we can't find (or don't know) the alignment of the symbol
8298 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8299 should be pessimistic]. Offsets are validated in the same way as for
8302 darwin_rs6000_legitimate_lo_sum_const_p (rtx x
, machine_mode mode
)
8304 /* We should not get here with this. */
8305 gcc_checking_assert (! mode_supports_dq_form (mode
));
8307 if (GET_CODE (x
) == CONST
)
8310 /* If we are building PIC code, then any symbol must be wrapped in an
8311 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8312 bool machopic_offs_p
= false;
8313 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
8315 x
= XVECEXP (x
, 0, 0);
8316 machopic_offs_p
= true;
8320 unsigned HOST_WIDE_INT offset
= 0;
8322 if (GET_CODE (x
) == PLUS
)
8325 if (! SYMBOL_REF_P (sym
))
8327 if (!CONST_INT_P (XEXP (x
, 1)))
8329 offset
= INTVAL (XEXP (x
, 1));
8331 else if (SYMBOL_REF_P (x
))
8333 else if (CONST_INT_P (x
))
8334 offset
= INTVAL (x
);
8335 else if (GET_CODE (x
) == LABEL_REF
)
8336 offset
= 0; // We assume code labels are Pmode aligned
8338 return false; // not sure what we have here.
8340 /* If we don't know the alignment of the thing to which the symbol refers,
8341 we assume optimistically it is "enough".
8342 ??? maybe we should be pessimistic instead. */
8347 tree decl
= SYMBOL_REF_DECL (sym
);
8348 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8349 if (TARGET_MACHO
&& flag_pic
&& !machopic_offs_p
)
8352 if (MACHO_SYMBOL_INDIRECTION_P (sym
))
8353 /* The decl in an indirection symbol is the original one, which might
8354 be less aligned than the indirection. Our indirections are always
8359 if (decl
&& DECL_ALIGN (decl
))
8360 align
= DECL_ALIGN_UNIT (decl
);
8363 unsigned int extra
= 0;
8369 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8371 if (VECTOR_MEM_VSX_P (mode
))
8374 if (!TARGET_POWERPC64
)
8376 else if ((offset
& 3) || (align
& 3))
8387 if (!TARGET_POWERPC64
)
8389 else if ((offset
& 3) || (align
& 3))
8397 /* We only care if the access(es) would cause a change to the high part. */
8398 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8399 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8402 /* Return true if the MEM operand is a memory operand suitable for use
8403 with a (full width, possibly multiple) gpr load/store. On
8404 powerpc64 this means the offset must be divisible by 4.
8405 Implements 'Y' constraint.
8407 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8408 a constraint function we know the operand has satisfied a suitable
8411 Offsetting a lo_sum should not be allowed, except where we know by
8412 alignment that a 32k boundary is not crossed. Note that by
8413 "offsetting" here we mean a further offset to access parts of the
8414 MEM. It's fine to have a lo_sum where the inner address is offset
8415 from a sym, since the same sym+offset will appear in the high part
8416 of the address calculation. */
8419 mem_operand_gpr (rtx op
, machine_mode mode
)
8421 unsigned HOST_WIDE_INT offset
;
8423 rtx addr
= XEXP (op
, 0);
8425 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8427 && (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
8428 && mode_supports_pre_incdec_p (mode
)
8429 && legitimate_indirect_address_p (XEXP (addr
, 0), false))
8432 /* Allow prefixed instructions if supported. If the bottom two bits of the
8433 offset are non-zero, we could use a prefixed instruction (which does not
8434 have the DS-form constraint that the traditional instruction had) instead
8435 of forcing the unaligned offset to a GPR. */
8436 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8439 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8440 really OK. Doing this early avoids teaching all the other machinery
8442 if (TARGET_MACHO
&& GET_CODE (addr
) == LO_SUM
)
8443 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr
, 1), mode
);
8445 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8446 if (!rs6000_offsettable_memref_p (op
, mode
, false))
8449 op
= address_offset (addr
);
8453 offset
= INTVAL (op
);
8454 if (TARGET_POWERPC64
&& (offset
& 3) != 0)
8457 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8461 if (GET_CODE (addr
) == LO_SUM
)
8462 /* For lo_sum addresses, we must allow any offset except one that
8463 causes a wrap, so test only the low 16 bits. */
8464 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8466 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8469 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8470 enforce an offset divisible by 4 even for 32-bit. */
8473 mem_operand_ds_form (rtx op
, machine_mode mode
)
8475 unsigned HOST_WIDE_INT offset
;
8477 rtx addr
= XEXP (op
, 0);
8479 /* Allow prefixed instructions if supported. If the bottom two bits of the
8480 offset are non-zero, we could use a prefixed instruction (which does not
8481 have the DS-form constraint that the traditional instruction had) instead
8482 of forcing the unaligned offset to a GPR. */
8483 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8486 if (!offsettable_address_p (false, mode
, addr
))
8489 op
= address_offset (addr
);
8493 offset
= INTVAL (op
);
8494 if ((offset
& 3) != 0)
8497 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8501 if (GET_CODE (addr
) == LO_SUM
)
8502 /* For lo_sum addresses, we must allow any offset except one that
8503 causes a wrap, so test only the low 16 bits. */
8504 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8506 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8509 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8512 reg_offset_addressing_ok_p (machine_mode mode
)
8520 if (FLOAT128_IBM_P (TFmode
))
8523 /* If TFmode is IEEE 128-bit, treat it like a vector. */
8535 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8536 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8537 a vector mode, if we want to use the VSX registers to move it around,
8538 we need to restrict ourselves to reg+reg addressing. Similarly for
8539 IEEE 128-bit floating point that is passed in a single vector
8541 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
))
8542 return mode_supports_dq_form (mode
);
8545 /* The vector pair/quad types support offset addressing if the
8546 underlying vectors support offset addressing. */
8552 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8553 addressing for the LFIWZX and STFIWX instructions. */
8554 if (TARGET_NO_SDMODE_STACK
)
8566 virtual_stack_registers_memory_p (rtx op
)
8571 regnum
= REGNO (op
);
8573 else if (GET_CODE (op
) == PLUS
8574 && REG_P (XEXP (op
, 0))
8575 && CONST_INT_P (XEXP (op
, 1)))
8576 regnum
= REGNO (XEXP (op
, 0));
8581 return (regnum
>= FIRST_VIRTUAL_REGISTER
8582 && regnum
<= LAST_VIRTUAL_POINTER_REGISTER
);
8585 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8586 is known to not straddle a 32k boundary. This function is used
8587 to determine whether -mcmodel=medium code can use TOC pointer
8588 relative addressing for OP. This means the alignment of the TOC
8589 pointer must also be taken into account, and unfortunately that is
8592 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8593 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8597 offsettable_ok_by_alignment (rtx op
, HOST_WIDE_INT offset
,
8601 unsigned HOST_WIDE_INT dsize
, dalign
, lsb
, mask
;
8603 if (!SYMBOL_REF_P (op
))
8606 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8608 if (mode_supports_dq_form (mode
))
8611 dsize
= GET_MODE_SIZE (mode
);
8612 decl
= SYMBOL_REF_DECL (op
);
8618 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8619 replacing memory addresses with an anchor plus offset. We
8620 could find the decl by rummaging around in the block->objects
8621 VEC for the given offset but that seems like too much work. */
8622 dalign
= BITS_PER_UNIT
;
8623 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op
)
8624 && SYMBOL_REF_ANCHOR_P (op
)
8625 && SYMBOL_REF_BLOCK (op
) != NULL
)
8627 struct object_block
*block
= SYMBOL_REF_BLOCK (op
);
8629 dalign
= block
->alignment
;
8630 offset
+= SYMBOL_REF_BLOCK_OFFSET (op
);
8632 else if (CONSTANT_POOL_ADDRESS_P (op
))
8634 /* It would be nice to have get_pool_align().. */
8635 machine_mode cmode
= get_pool_mode (op
);
8637 dalign
= GET_MODE_ALIGNMENT (cmode
);
8640 else if (DECL_P (decl
))
8642 dalign
= DECL_ALIGN (decl
);
8646 /* Allow BLKmode when the entire object is known to not
8647 cross a 32k boundary. */
8648 if (!DECL_SIZE_UNIT (decl
))
8651 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl
)))
8654 dsize
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
8658 dalign
/= BITS_PER_UNIT
;
8659 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8660 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8661 return dalign
>= dsize
;
8667 /* Find how many bits of the alignment we know for this access. */
8668 dalign
/= BITS_PER_UNIT
;
8669 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8670 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8672 lsb
= offset
& -offset
;
8676 return dalign
>= dsize
;
8680 constant_pool_expr_p (rtx op
)
8684 split_const (op
, &base
, &offset
);
8685 return (SYMBOL_REF_P (base
)
8686 && CONSTANT_POOL_ADDRESS_P (base
)
8687 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base
), Pmode
));
8690 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8691 use that as the register to put the HIGH value into if register allocation
8695 create_TOC_reference (rtx symbol
, rtx largetoc_reg
)
8697 rtx tocrel
, tocreg
, hi
;
8699 gcc_assert (TARGET_TOC
);
8701 if (TARGET_DEBUG_ADDR
)
8703 if (SYMBOL_REF_P (symbol
))
8704 fprintf (stderr
, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8708 fprintf (stderr
, "\ncreate_TOC_reference, code %s:\n",
8709 GET_RTX_NAME (GET_CODE (symbol
)));
8714 if (!can_create_pseudo_p ())
8715 df_set_regs_ever_live (TOC_REGISTER
, true);
8717 tocreg
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
8718 tocrel
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, symbol
, tocreg
), UNSPEC_TOCREL
);
8719 if (TARGET_CMODEL
== CMODEL_SMALL
|| can_create_pseudo_p ())
8722 hi
= gen_rtx_HIGH (Pmode
, copy_rtx (tocrel
));
8723 if (largetoc_reg
!= NULL
)
8725 emit_move_insn (largetoc_reg
, hi
);
8728 return gen_rtx_LO_SUM (Pmode
, hi
, tocrel
);
8731 /* These are only used to pass through from print_operand/print_operand_address
8732 to rs6000_output_addr_const_extra over the intervening function
8733 output_addr_const which is not target code. */
8734 static const_rtx tocrel_base_oac
, tocrel_offset_oac
;
8736 /* Return true if OP is a toc pointer relative address (the output
8737 of create_TOC_reference). If STRICT, do not match non-split
8738 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8739 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8740 TOCREL_OFFSET_RET respectively. */
8743 toc_relative_expr_p (const_rtx op
, bool strict
, const_rtx
*tocrel_base_ret
,
8744 const_rtx
*tocrel_offset_ret
)
8749 if (TARGET_CMODEL
!= CMODEL_SMALL
)
8751 /* When strict ensure we have everything tidy. */
8753 && !(GET_CODE (op
) == LO_SUM
8754 && REG_P (XEXP (op
, 0))
8755 && INT_REG_OK_FOR_BASE_P (XEXP (op
, 0), strict
)))
8758 /* When not strict, allow non-split TOC addresses and also allow
8759 (lo_sum (high ..)) TOC addresses created during reload. */
8760 if (GET_CODE (op
) == LO_SUM
)
8764 const_rtx tocrel_base
= op
;
8765 const_rtx tocrel_offset
= const0_rtx
;
8767 if (GET_CODE (op
) == PLUS
&& add_cint_operand (XEXP (op
, 1), GET_MODE (op
)))
8769 tocrel_base
= XEXP (op
, 0);
8770 tocrel_offset
= XEXP (op
, 1);
8773 if (tocrel_base_ret
)
8774 *tocrel_base_ret
= tocrel_base
;
8775 if (tocrel_offset_ret
)
8776 *tocrel_offset_ret
= tocrel_offset
;
8778 return (GET_CODE (tocrel_base
) == UNSPEC
8779 && XINT (tocrel_base
, 1) == UNSPEC_TOCREL
8780 && REG_P (XVECEXP (tocrel_base
, 0, 1))
8781 && REGNO (XVECEXP (tocrel_base
, 0, 1)) == TOC_REGISTER
);
8784 /* Return true if X is a constant pool address, and also for cmodel=medium
8785 if X is a toc-relative address known to be offsettable within MODE. */
8788 legitimate_constant_pool_address_p (const_rtx x
, machine_mode mode
,
8791 const_rtx tocrel_base
, tocrel_offset
;
8792 return (toc_relative_expr_p (x
, strict
, &tocrel_base
, &tocrel_offset
)
8793 && (TARGET_CMODEL
!= CMODEL_MEDIUM
8794 || constant_pool_expr_p (XVECEXP (tocrel_base
, 0, 0))
8796 || offsettable_ok_by_alignment (XVECEXP (tocrel_base
, 0, 0),
8797 INTVAL (tocrel_offset
), mode
)));
8801 legitimate_small_data_p (machine_mode mode
, rtx x
)
8803 return (DEFAULT_ABI
== ABI_V4
8804 && !flag_pic
&& !TARGET_TOC
8805 && (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
)
8806 && small_data_operand (x
, mode
));
8810 rs6000_legitimate_offset_address_p (machine_mode mode
, rtx x
,
8811 bool strict
, bool worst_case
)
8813 unsigned HOST_WIDE_INT offset
;
8816 if (GET_CODE (x
) != PLUS
)
8818 if (!REG_P (XEXP (x
, 0)))
8820 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8822 if (mode_supports_dq_form (mode
))
8823 return quad_address_p (x
, mode
, strict
);
8824 if (!reg_offset_addressing_ok_p (mode
))
8825 return virtual_stack_registers_memory_p (x
);
8826 if (legitimate_constant_pool_address_p (x
, mode
, strict
|| lra_in_progress
))
8828 if (!CONST_INT_P (XEXP (x
, 1)))
8831 offset
= INTVAL (XEXP (x
, 1));
8838 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8840 if (VECTOR_MEM_VSX_P (mode
))
8845 if (!TARGET_POWERPC64
)
8847 else if (offset
& 3)
8860 if (!TARGET_POWERPC64
)
8862 else if (offset
& 3)
8870 if (TARGET_PREFIXED
)
8871 return SIGNED_34BIT_OFFSET_EXTRA_P (offset
, extra
);
8873 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8877 legitimate_indexed_address_p (rtx x
, int strict
)
8881 if (GET_CODE (x
) != PLUS
)
8887 return (REG_P (op0
) && REG_P (op1
)
8888 && ((INT_REG_OK_FOR_BASE_P (op0
, strict
)
8889 && INT_REG_OK_FOR_INDEX_P (op1
, strict
))
8890 || (INT_REG_OK_FOR_BASE_P (op1
, strict
)
8891 && INT_REG_OK_FOR_INDEX_P (op0
, strict
))));
8895 avoiding_indexed_address_p (machine_mode mode
)
8897 unsigned int msize
= GET_MODE_SIZE (mode
);
8899 /* Avoid indexed addressing for modes that have non-indexed load/store
8900 instruction forms. On power10, vector pairs have an indexed
8901 form, but vector quads don't. */
8905 return (TARGET_AVOID_XFORM
&& VECTOR_MEM_NONE_P (mode
));
8909 legitimate_indirect_address_p (rtx x
, int strict
)
8911 return REG_P (x
) && INT_REG_OK_FOR_BASE_P (x
, strict
);
8915 macho_lo_sum_memory_operand (rtx x
, machine_mode mode
)
8917 if (!TARGET_MACHO
|| !flag_pic
8918 || mode
!= SImode
|| !MEM_P (x
))
8922 if (GET_CODE (x
) != LO_SUM
)
8924 if (!REG_P (XEXP (x
, 0)))
8926 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 0))
8930 return CONSTANT_P (x
);
8934 legitimate_lo_sum_address_p (machine_mode mode
, rtx x
, int strict
)
8936 if (GET_CODE (x
) != LO_SUM
)
8938 if (!REG_P (XEXP (x
, 0)))
8940 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8942 /* quad word addresses are restricted, and we can't use LO_SUM. */
8943 if (mode_supports_dq_form (mode
))
8951 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
)
8953 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8954 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8955 recognizes some LO_SUM addresses as valid although this
8956 function says opposite. In most cases, LRA through different
8957 transformations can generate correct code for address reloads.
8958 It cannot manage only some LO_SUM cases. So we need to add
8959 code here saying that some addresses are still valid. */
8960 large_toc_ok
= (lra_in_progress
&& TARGET_CMODEL
!= CMODEL_SMALL
8961 && small_toc_ref (x
, VOIDmode
));
8962 if (TARGET_TOC
&& ! large_toc_ok
)
8964 if (GET_MODE_NUNITS (mode
) != 1)
8966 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
8967 && !(/* ??? Assume floating point reg based on mode? */
8968 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
8971 return CONSTANT_P (x
) || large_toc_ok
;
8973 else if (TARGET_MACHO
)
8975 if (GET_MODE_NUNITS (mode
) != 1)
8977 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
8978 && !(/* see above */
8979 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
8982 if (MACHO_DYNAMIC_NO_PIC_P
|| !flag_pic
)
8983 return CONSTANT_P (x
);
8985 /* Macho-O PIC code from here. */
8986 if (GET_CODE (x
) == CONST
)
8989 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
8990 if (SYMBOL_REF_P (x
))
8993 /* So this is OK if the wrapped object is const. */
8994 if (GET_CODE (x
) == UNSPEC
8995 && XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
8996 return CONSTANT_P (XVECEXP (x
, 0, 0));
8997 return CONSTANT_P (x
);
9003 /* Try machine-dependent ways of modifying an illegitimate address
9004 to be legitimate. If we find one, return the new, valid address.
9005 This is used from only one place: `memory_address' in explow.cc.
9007 OLDX is the address as it was before break_out_memory_refs was
9008 called. In some cases it is useful to look at this to decide what
9011 It is always safe for this function to do nothing. It exists to
9012 recognize opportunities to optimize the output.
9014 On RS/6000, first check for the sum of a register with a constant
9015 integer that is out of range. If so, generate code to add the
9016 constant with the low-order 16 bits masked to the register and force
9017 this result into another register (this can be done with `cau').
9018 Then generate an address of REG+(CONST&0xffff), allowing for the
9019 possibility of bit 16 being a one.
9021 Then check for the sum of a register and something not constant, try to
9022 load the other things into a register and return the sum. */
9025 rs6000_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
9030 if (!reg_offset_addressing_ok_p (mode
)
9031 || mode_supports_dq_form (mode
))
9033 if (virtual_stack_registers_memory_p (x
))
9036 /* In theory we should not be seeing addresses of the form reg+0,
9037 but just in case it is generated, optimize it away. */
9038 if (GET_CODE (x
) == PLUS
&& XEXP (x
, 1) == const0_rtx
)
9039 return force_reg (Pmode
, XEXP (x
, 0));
9041 /* For TImode with load/store quad, restrict addresses to just a single
9042 pointer, so it works with both GPRs and VSX registers. */
9043 /* Make sure both operands are registers. */
9044 else if (GET_CODE (x
) == PLUS
9045 && (mode
!= TImode
|| !TARGET_VSX
))
9046 return gen_rtx_PLUS (Pmode
,
9047 force_reg (Pmode
, XEXP (x
, 0)),
9048 force_reg (Pmode
, XEXP (x
, 1)));
9050 return force_reg (Pmode
, x
);
9052 if (SYMBOL_REF_P (x
) && !TARGET_MACHO
)
9054 enum tls_model model
= SYMBOL_REF_TLS_MODEL (x
);
9056 return rs6000_legitimize_tls_address (x
, model
);
9068 /* As in legitimate_offset_address_p we do not assume
9069 worst-case. The mode here is just a hint as to the registers
9070 used. A TImode is usually in gprs, but may actually be in
9071 fprs. Leave worst-case scenario for reload to handle via
9072 insn constraints. PTImode is only GPRs. */
9079 if (GET_CODE (x
) == PLUS
9080 && REG_P (XEXP (x
, 0))
9081 && CONST_INT_P (XEXP (x
, 1))
9082 && ((unsigned HOST_WIDE_INT
) (INTVAL (XEXP (x
, 1)) + 0x8000)
9083 >= 0x10000 - extra
))
9085 HOST_WIDE_INT high_int
, low_int
;
9087 low_int
= ((INTVAL (XEXP (x
, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9088 if (low_int
>= 0x8000 - extra
)
9090 high_int
= INTVAL (XEXP (x
, 1)) - low_int
;
9091 sum
= force_operand (gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9092 gen_int_mode (high_int
, Pmode
)), 0);
9093 return plus_constant (Pmode
, sum
, low_int
);
9095 else if (GET_CODE (x
) == PLUS
9096 && REG_P (XEXP (x
, 0))
9097 && !CONST_INT_P (XEXP (x
, 1))
9098 && GET_MODE_NUNITS (mode
) == 1
9099 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9100 || (/* ??? Assume floating point reg based on mode? */
9101 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9102 && !avoiding_indexed_address_p (mode
))
9104 return gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9105 force_reg (Pmode
, force_operand (XEXP (x
, 1), 0)));
9107 else if ((TARGET_ELF
9109 || !MACHO_DYNAMIC_NO_PIC_P
9113 && TARGET_NO_TOC_OR_PCREL
9116 && !CONST_WIDE_INT_P (x
)
9117 && !CONST_DOUBLE_P (x
)
9119 && GET_MODE_NUNITS (mode
) == 1
9120 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9121 || (/* ??? Assume floating point reg based on mode? */
9122 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
))))
9124 rtx reg
= gen_reg_rtx (Pmode
);
9126 emit_insn (gen_elf_high (reg
, x
));
9128 emit_insn (gen_macho_high (Pmode
, reg
, x
));
9129 return gen_rtx_LO_SUM (Pmode
, reg
, x
);
9133 && constant_pool_expr_p (x
)
9134 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x
), Pmode
))
9135 return create_TOC_reference (x
, NULL_RTX
);
9140 /* Debug version of rs6000_legitimize_address. */
9142 rs6000_debug_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
)
9148 ret
= rs6000_legitimize_address (x
, oldx
, mode
);
9149 insns
= get_insns ();
9155 "\nrs6000_legitimize_address: mode %s, old code %s, "
9156 "new code %s, modified\n",
9157 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)),
9158 GET_RTX_NAME (GET_CODE (ret
)));
9160 fprintf (stderr
, "Original address:\n");
9163 fprintf (stderr
, "oldx:\n");
9166 fprintf (stderr
, "New address:\n");
9171 fprintf (stderr
, "Insns added:\n");
9172 debug_rtx_list (insns
, 20);
9178 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9179 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)));
9190 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9191 We need to emit DTP-relative relocations. */
9193 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
9195 rs6000_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
9200 fputs ("\t.long\t", file
);
9203 fputs (DOUBLE_INT_ASM_OP
, file
);
9208 output_addr_const (file
, x
);
9210 fputs ("@dtprel+0x8000", file
);
9213 /* Return true if X is a symbol that refers to real (rather than emulated)
9217 rs6000_real_tls_symbol_ref_p (rtx x
)
9219 return (SYMBOL_REF_P (x
)
9220 && SYMBOL_REF_TLS_MODEL (x
) >= TLS_MODEL_REAL
);
9223 /* In the name of slightly smaller debug output, and to cater to
9224 general assembler lossage, recognize various UNSPEC sequences
9225 and turn them back into a direct symbol reference. */
9228 rs6000_delegitimize_address (rtx orig_x
)
9232 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9233 encodes loading up the high part of the address of a TOC reference along
9234 with a load of a GPR using the same base register used for the load. We
9235 return the original SYMBOL_REF.
9237 (set (reg:INT1 <reg>
9238 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9240 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9241 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9242 We return the original SYMBOL_REF.
9244 (parallel [(set (reg:DI <base-reg>)
9245 (unspec:DI [(symbol_ref <symbol>)
9246 (const_int <marker>)]
9247 UNSPEC_PCREL_OPT_LD_ADDR))
9248 (set (reg:DI <load-reg>)
9249 (unspec:DI [(const_int 0)]
9250 UNSPEC_PCREL_OPT_LD_DATA))])
9252 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9253 GPR being loaded is the same as the GPR used to hold the external address.
9255 (set (reg:DI <base-reg>)
9256 (unspec:DI [(symbol_ref <symbol>)
9257 (const_int <marker>)]
9258 UNSPEC_PCREL_OPT_LD_SAME_REG))
9260 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9261 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9262 We return the original SYMBOL_REF.
9264 (parallel [(set (reg:DI <base-reg>)
9265 (unspec:DI [(symbol_ref <symbol>)
9266 (const_int <marker>)]
9267 UNSPEC_PCREL_OPT_ST_ADDR))
9268 (use (reg <store-reg>))]) */
9270 if (GET_CODE (orig_x
) == UNSPEC
)
9271 switch (XINT (orig_x
, 1))
9273 case UNSPEC_FUSION_GPR
:
9274 case UNSPEC_PCREL_OPT_LD_ADDR
:
9275 case UNSPEC_PCREL_OPT_LD_SAME_REG
:
9276 case UNSPEC_PCREL_OPT_ST_ADDR
:
9277 orig_x
= XVECEXP (orig_x
, 0, 0);
9284 orig_x
= delegitimize_mem_from_attrs (orig_x
);
9291 if (TARGET_CMODEL
!= CMODEL_SMALL
&& GET_CODE (y
) == LO_SUM
)
9295 if (GET_CODE (y
) == PLUS
9296 && GET_MODE (y
) == Pmode
9297 && CONST_INT_P (XEXP (y
, 1)))
9299 offset
= XEXP (y
, 1);
9303 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_TOCREL
)
9305 y
= XVECEXP (y
, 0, 0);
9308 /* Do not associate thread-local symbols with the original
9309 constant pool symbol. */
9312 && CONSTANT_POOL_ADDRESS_P (y
)
9313 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y
)))
9317 if (offset
!= NULL_RTX
)
9318 y
= gen_rtx_PLUS (Pmode
, y
, offset
);
9319 if (!MEM_P (orig_x
))
9322 return replace_equiv_address_nv (orig_x
, y
);
9326 && GET_CODE (orig_x
) == LO_SUM
9327 && GET_CODE (XEXP (orig_x
, 1)) == CONST
)
9329 y
= XEXP (XEXP (orig_x
, 1), 0);
9330 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9331 return XVECEXP (y
, 0, 0);
9337 /* Return true if X shouldn't be emitted into the debug info.
9338 The linker doesn't like .toc section references from
9339 .debug_* sections, so reject .toc section symbols. */
9342 rs6000_const_not_ok_for_debug_p (rtx x
)
9344 if (GET_CODE (x
) == UNSPEC
)
9346 if (SYMBOL_REF_P (x
)
9347 && CONSTANT_POOL_ADDRESS_P (x
))
9349 rtx c
= get_pool_constant (x
);
9350 machine_mode cmode
= get_pool_mode (x
);
9351 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c
, cmode
))
9358 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9361 rs6000_legitimate_combined_insn (rtx_insn
*insn
)
9363 int icode
= INSN_CODE (insn
);
9365 /* Reject creating doloop insns. Combine should not be allowed
9366 to create these for a number of reasons:
9367 1) In a nested loop, if combine creates one of these in an
9368 outer loop and the register allocator happens to allocate ctr
9369 to the outer loop insn, then the inner loop can't use ctr.
9370 Inner loops ought to be more highly optimized.
9371 2) Combine often wants to create one of these from what was
9372 originally a three insn sequence, first combining the three
9373 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9374 allocated ctr, the splitter takes use back to the three insn
9375 sequence. It's better to stop combine at the two insn
9377 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9378 insns, the register allocator sometimes uses floating point
9379 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9380 jump insn and output reloads are not implemented for jumps,
9381 the ctrsi/ctrdi splitters need to handle all possible cases.
9382 That's a pain, and it gets to be seriously difficult when a
9383 splitter that runs after reload needs memory to transfer from
9384 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9385 for the difficult case. It's better to not create problems
9386 in the first place. */
9387 if (icode
!= CODE_FOR_nothing
9388 && (icode
== CODE_FOR_bdz_si
9389 || icode
== CODE_FOR_bdz_di
9390 || icode
== CODE_FOR_bdnz_si
9391 || icode
== CODE_FOR_bdnz_di
9392 || icode
== CODE_FOR_bdztf_si
9393 || icode
== CODE_FOR_bdztf_di
9394 || icode
== CODE_FOR_bdnztf_si
9395 || icode
== CODE_FOR_bdnztf_di
))
9401 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9403 static GTY(()) rtx rs6000_tls_symbol
;
9405 rs6000_tls_get_addr (void)
9407 if (!rs6000_tls_symbol
)
9408 rs6000_tls_symbol
= init_one_libfunc ("__tls_get_addr");
9410 return rs6000_tls_symbol
;
9413 /* Construct the SYMBOL_REF for TLS GOT references. */
9415 static GTY(()) rtx rs6000_got_symbol
;
9417 rs6000_got_sym (void)
9419 if (!rs6000_got_symbol
)
9421 rs6000_got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
9422 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_LOCAL
;
9423 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_EXTERNAL
;
9426 return rs6000_got_symbol
;
9429 /* AIX Thread-Local Address support. */
9432 rs6000_legitimize_tls_address_aix (rtx addr
, enum tls_model model
)
9434 rtx sym
, mem
, tocref
, tlsreg
, tmpreg
, dest
;
9438 /* Place addr into TOC constant pool. */
9439 sym
= force_const_mem (GET_MODE (addr
), addr
);
9441 /* Output the TOC entry and create the MEM referencing the value. */
9442 if (constant_pool_expr_p (XEXP (sym
, 0))
9443 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym
, 0)), Pmode
))
9445 tocref
= create_TOC_reference (XEXP (sym
, 0), NULL_RTX
);
9446 mem
= gen_const_mem (Pmode
, tocref
);
9447 set_mem_alias_set (mem
, get_TOC_alias_set ());
9452 /* Use global-dynamic for local-dynamic. */
9453 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
9454 || model
== TLS_MODEL_LOCAL_DYNAMIC
)
9456 /* Create new TOC reference for @m symbol. */
9457 name
= XSTR (XVECEXP (XEXP (mem
, 0), 0, 0), 0);
9458 tlsname
= XALLOCAVEC (char, strlen (name
) + 1);
9459 strcpy (tlsname
, "*LCM");
9460 strcat (tlsname
, name
+ 3);
9461 rtx modaddr
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (tlsname
));
9462 SYMBOL_REF_FLAGS (modaddr
) |= SYMBOL_FLAG_LOCAL
;
9463 tocref
= create_TOC_reference (modaddr
, NULL_RTX
);
9464 rtx modmem
= gen_const_mem (Pmode
, tocref
);
9465 set_mem_alias_set (modmem
, get_TOC_alias_set ());
9467 rtx modreg
= gen_reg_rtx (Pmode
);
9468 emit_insn (gen_rtx_SET (modreg
, modmem
));
9470 tmpreg
= gen_reg_rtx (Pmode
);
9471 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9473 dest
= gen_reg_rtx (Pmode
);
9475 emit_insn (gen_tls_get_addrsi (dest
, modreg
, tmpreg
));
9477 emit_insn (gen_tls_get_addrdi (dest
, modreg
, tmpreg
));
9480 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9481 else if (TARGET_32BIT
)
9483 tlsreg
= gen_reg_rtx (SImode
);
9484 emit_insn (gen_tls_get_tpointer (tlsreg
));
9488 tlsreg
= gen_rtx_REG (DImode
, 13);
9489 xcoff_tls_exec_model_detected
= true;
9492 /* Load the TOC value into temporary register. */
9493 tmpreg
= gen_reg_rtx (Pmode
);
9494 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9495 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
9496 gen_rtx_MINUS (Pmode
, addr
, tlsreg
));
9498 /* Add TOC symbol value to TLS pointer. */
9499 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tmpreg
, tlsreg
));
9504 /* Passes the tls arg value for global dynamic and local dynamic
9505 emit_library_call_value in rs6000_legitimize_tls_address to
9506 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9507 marker relocs put on __tls_get_addr calls. */
9508 static rtx global_tlsarg
;
9510 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9511 this (thread-local) address. */
9514 rs6000_legitimize_tls_address (rtx addr
, enum tls_model model
)
9519 return rs6000_legitimize_tls_address_aix (addr
, model
);
9521 dest
= gen_reg_rtx (Pmode
);
9522 if (model
== TLS_MODEL_LOCAL_EXEC
9523 && (rs6000_tls_size
== 16 || rs6000_pcrel_p ()))
9529 tlsreg
= gen_rtx_REG (Pmode
, 13);
9530 insn
= gen_tls_tprel_64 (dest
, tlsreg
, addr
);
9534 tlsreg
= gen_rtx_REG (Pmode
, 2);
9535 insn
= gen_tls_tprel_32 (dest
, tlsreg
, addr
);
9539 else if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 32)
9543 tmp
= gen_reg_rtx (Pmode
);
9546 tlsreg
= gen_rtx_REG (Pmode
, 13);
9547 insn
= gen_tls_tprel_ha_64 (tmp
, tlsreg
, addr
);
9551 tlsreg
= gen_rtx_REG (Pmode
, 2);
9552 insn
= gen_tls_tprel_ha_32 (tmp
, tlsreg
, addr
);
9556 insn
= gen_tls_tprel_lo_64 (dest
, tmp
, addr
);
9558 insn
= gen_tls_tprel_lo_32 (dest
, tmp
, addr
);
9563 rtx got
, tga
, tmp1
, tmp2
;
9565 /* We currently use relocations like @got@tlsgd for tls, which
9566 means the linker will handle allocation of tls entries, placing
9567 them in the .got section. So use a pointer to the .got section,
9568 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9569 or to secondary GOT sections used by 32-bit -fPIC. */
9570 if (rs6000_pcrel_p ())
9572 else if (TARGET_64BIT
)
9573 got
= gen_rtx_REG (Pmode
, 2);
9577 got
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
9580 rtx gsym
= rs6000_got_sym ();
9581 got
= gen_reg_rtx (Pmode
);
9583 rs6000_emit_move (got
, gsym
, Pmode
);
9588 tmp1
= gen_reg_rtx (Pmode
);
9589 tmp2
= gen_reg_rtx (Pmode
);
9590 mem
= gen_const_mem (Pmode
, tmp1
);
9591 lab
= gen_label_rtx ();
9592 emit_insn (gen_load_toc_v4_PIC_1b (gsym
, lab
));
9593 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
9594 if (TARGET_LINK_STACK
)
9595 emit_insn (gen_addsi3 (tmp1
, tmp1
, GEN_INT (4)));
9596 emit_move_insn (tmp2
, mem
);
9597 rtx_insn
*last
= emit_insn (gen_addsi3 (got
, tmp1
, tmp2
));
9598 set_unique_reg_note (last
, REG_EQUAL
, gsym
);
9603 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
)
9605 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addr
, got
),
9607 tga
= rs6000_tls_get_addr ();
9608 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9609 emit_insn (gen_rtx_SET (argreg
, arg
));
9610 global_tlsarg
= arg
;
9611 emit_library_call_value (tga
, dest
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9612 global_tlsarg
= NULL_RTX
;
9614 /* Make a note so that the result of this call can be CSEd. */
9615 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9616 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9617 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9619 else if (model
== TLS_MODEL_LOCAL_DYNAMIC
)
9621 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, got
), UNSPEC_TLSLD
);
9622 tga
= rs6000_tls_get_addr ();
9623 tmp1
= gen_reg_rtx (Pmode
);
9624 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9625 emit_insn (gen_rtx_SET (argreg
, arg
));
9626 global_tlsarg
= arg
;
9627 emit_library_call_value (tga
, tmp1
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9628 global_tlsarg
= NULL_RTX
;
9630 /* Make a note so that the result of this call can be CSEd. */
9631 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9632 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9633 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9635 if (rs6000_tls_size
== 16 || rs6000_pcrel_p ())
9638 insn
= gen_tls_dtprel_64 (dest
, tmp1
, addr
);
9640 insn
= gen_tls_dtprel_32 (dest
, tmp1
, addr
);
9642 else if (rs6000_tls_size
== 32)
9644 tmp2
= gen_reg_rtx (Pmode
);
9646 insn
= gen_tls_dtprel_ha_64 (tmp2
, tmp1
, addr
);
9648 insn
= gen_tls_dtprel_ha_32 (tmp2
, tmp1
, addr
);
9651 insn
= gen_tls_dtprel_lo_64 (dest
, tmp2
, addr
);
9653 insn
= gen_tls_dtprel_lo_32 (dest
, tmp2
, addr
);
9657 tmp2
= gen_reg_rtx (Pmode
);
9659 insn
= gen_tls_got_dtprel_64 (tmp2
, got
, addr
);
9661 insn
= gen_tls_got_dtprel_32 (tmp2
, got
, addr
);
9663 insn
= gen_rtx_SET (dest
, gen_rtx_PLUS (Pmode
, tmp2
, tmp1
));
9669 /* IE, or 64-bit offset LE. */
9670 tmp2
= gen_reg_rtx (Pmode
);
9672 insn
= gen_tls_got_tprel_64 (tmp2
, got
, addr
);
9674 insn
= gen_tls_got_tprel_32 (tmp2
, got
, addr
);
9676 if (rs6000_pcrel_p ())
9679 insn
= gen_tls_tls_pcrel_64 (dest
, tmp2
, addr
);
9681 insn
= gen_tls_tls_pcrel_32 (dest
, tmp2
, addr
);
9683 else if (TARGET_64BIT
)
9684 insn
= gen_tls_tls_64 (dest
, tmp2
, addr
);
9686 insn
= gen_tls_tls_32 (dest
, tmp2
, addr
);
9694 /* Only create the global variable for the stack protect guard if we are using
9695 the global flavor of that guard. */
9697 rs6000_init_stack_protect_guard (void)
9699 if (rs6000_stack_protector_guard
== SSP_GLOBAL
)
9700 return default_stack_protect_guard ();
9705 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9708 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9710 if (GET_CODE (x
) == HIGH
9711 && GET_CODE (XEXP (x
, 0)) == UNSPEC
)
9714 /* A TLS symbol in the TOC cannot contain a sum. */
9715 if (GET_CODE (x
) == CONST
9716 && GET_CODE (XEXP (x
, 0)) == PLUS
9717 && SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
9718 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0)) != 0)
9721 /* Allow AIX TOC TLS symbols in the constant pool,
9722 but not ELF TLS symbols. */
9723 return TARGET_ELF
&& tls_referenced_p (x
);
9726 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9727 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9728 can be addressed relative to the toc pointer. */
9731 use_toc_relative_ref (rtx sym
, machine_mode mode
)
9733 return ((constant_pool_expr_p (sym
)
9734 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym
),
9735 get_pool_mode (sym
)))
9736 || (TARGET_CMODEL
== CMODEL_MEDIUM
9737 && SYMBOL_REF_LOCAL_P (sym
)
9738 && GET_MODE_SIZE (mode
) <= POWERPC64_TOC_POINTER_ALIGNMENT
));
9741 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9742 that is a valid memory address for an instruction.
9743 The MODE argument is the machine mode for the MEM expression
9744 that wants to use this address.
9746 On the RS/6000, there are four valid address: a SYMBOL_REF that
9747 refers to a constant pool entry of an address (or the sum of it
9748 plus a constant), a short (16-bit signed) constant plus a register,
9749 the sum of two registers, or a register indirect, possibly with an
9750 auto-increment. For DFmode, DDmode and DImode with a constant plus
9751 register, we must ensure that both words are addressable or PowerPC64
9752 with offset word aligned.
9754 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9755 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9756 because adjacent memory cells are accessed by adding word-sized offsets
9757 during assembly output. */
9759 rs6000_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
)
9761 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
9762 bool quad_offset_p
= mode_supports_dq_form (mode
);
9764 if (TARGET_ELF
&& RS6000_SYMBOL_REF_TLS_P (x
))
9767 /* Handle unaligned altivec lvx/stvx type addresses. */
9768 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
9769 && GET_CODE (x
) == AND
9770 && CONST_INT_P (XEXP (x
, 1))
9771 && INTVAL (XEXP (x
, 1)) == -16)
9774 return (legitimate_indirect_address_p (x
, reg_ok_strict
)
9775 || legitimate_indexed_address_p (x
, reg_ok_strict
)
9776 || virtual_stack_registers_memory_p (x
));
9779 if (legitimate_indirect_address_p (x
, reg_ok_strict
))
9782 && (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == PRE_DEC
)
9783 && mode_supports_pre_incdec_p (mode
)
9784 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
))
9787 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9788 if (address_is_prefixed (x
, mode
, NON_PREFIXED_DEFAULT
))
9791 /* Handle restricted vector d-form offsets in ISA 3.0. */
9794 if (quad_address_p (x
, mode
, reg_ok_strict
))
9797 else if (virtual_stack_registers_memory_p (x
))
9800 else if (reg_offset_p
)
9802 if (legitimate_small_data_p (mode
, x
))
9804 if (legitimate_constant_pool_address_p (x
, mode
,
9805 reg_ok_strict
|| lra_in_progress
))
9809 /* For TImode, if we have TImode in VSX registers, only allow register
9810 indirect addresses. This will allow the values to go in either GPRs
9811 or VSX registers without reloading. The vector types would tend to
9812 go into VSX registers, so we allow REG+REG, while TImode seems
9813 somewhat split, in that some uses are GPR based, and some VSX based. */
9814 /* FIXME: We could loosen this by changing the following to
9815 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9816 but currently we cannot allow REG+REG addressing for TImode. See
9817 PR72827 for complete details on how this ends up hoodwinking DSE. */
9818 if (mode
== TImode
&& TARGET_VSX
)
9820 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9823 && GET_CODE (x
) == PLUS
9824 && REG_P (XEXP (x
, 0))
9825 && (XEXP (x
, 0) == virtual_stack_vars_rtx
9826 || XEXP (x
, 0) == arg_pointer_rtx
)
9827 && CONST_INT_P (XEXP (x
, 1)))
9829 if (rs6000_legitimate_offset_address_p (mode
, x
, reg_ok_strict
, false))
9831 if (!FLOAT128_2REG_P (mode
)
9832 && (TARGET_HARD_FLOAT
9834 || (mode
!= DFmode
&& mode
!= DDmode
))
9835 && (TARGET_POWERPC64
|| mode
!= DImode
)
9836 && (mode
!= TImode
|| VECTOR_MEM_VSX_P (TImode
))
9838 && !avoiding_indexed_address_p (mode
)
9839 && legitimate_indexed_address_p (x
, reg_ok_strict
))
9841 if (TARGET_UPDATE
&& GET_CODE (x
) == PRE_MODIFY
9842 && mode_supports_pre_modify_p (mode
)
9843 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
)
9844 && (rs6000_legitimate_offset_address_p (mode
, XEXP (x
, 1),
9845 reg_ok_strict
, false)
9846 || (!avoiding_indexed_address_p (mode
)
9847 && legitimate_indexed_address_p (XEXP (x
, 1), reg_ok_strict
)))
9848 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
9850 /* There is no prefixed version of the load/store with update. */
9851 rtx addr
= XEXP (x
, 1);
9852 return !address_is_prefixed (addr
, mode
, NON_PREFIXED_DEFAULT
);
9854 if (reg_offset_p
&& !quad_offset_p
9855 && legitimate_lo_sum_address_p (mode
, x
, reg_ok_strict
))
9860 /* Debug version of rs6000_legitimate_address_p. */
9862 rs6000_debug_legitimate_address_p (machine_mode mode
, rtx x
,
9865 bool ret
= rs6000_legitimate_address_p (mode
, x
, reg_ok_strict
);
9867 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9868 "strict = %d, reload = %s, code = %s\n",
9869 ret
? "true" : "false",
9870 GET_MODE_NAME (mode
),
9872 (reload_completed
? "after" : "before"),
9873 GET_RTX_NAME (GET_CODE (x
)));
9879 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9882 rs6000_mode_dependent_address_p (const_rtx addr
,
9883 addr_space_t as ATTRIBUTE_UNUSED
)
9885 return rs6000_mode_dependent_address_ptr (addr
);
9888 /* Go to LABEL if ADDR (a legitimate address expression)
9889 has an effect that depends on the machine mode it is used for.
9891 On the RS/6000 this is true of all integral offsets (since AltiVec
9892 and VSX modes don't allow them) or is a pre-increment or decrement.
9894 ??? Except that due to conceptual problems in offsettable_address_p
9895 we can't really report the problems of integral offsets. So leave
9896 this assuming that the adjustable offset must be valid for the
9897 sub-words of a TFmode operand, which is what we had before. */
9900 rs6000_mode_dependent_address (const_rtx addr
)
9902 switch (GET_CODE (addr
))
9905 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9906 is considered a legitimate address before reload, so there
9907 are no offset restrictions in that case. Note that this
9908 condition is safe in strict mode because any address involving
9909 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9910 been rejected as illegitimate. */
9911 if (XEXP (addr
, 0) != virtual_stack_vars_rtx
9912 && XEXP (addr
, 0) != arg_pointer_rtx
9913 && CONST_INT_P (XEXP (addr
, 1)))
9915 HOST_WIDE_INT val
= INTVAL (XEXP (addr
, 1));
9916 HOST_WIDE_INT extra
= TARGET_POWERPC64
? 8 : 12;
9917 if (TARGET_PREFIXED
)
9918 return !SIGNED_34BIT_OFFSET_EXTRA_P (val
, extra
);
9920 return !SIGNED_16BIT_OFFSET_EXTRA_P (val
, extra
);
9925 /* Anything in the constant pool is sufficiently aligned that
9926 all bytes have the same high part address. */
9927 return !legitimate_constant_pool_address_p (addr
, QImode
, false);
9929 /* Auto-increment cases are now treated generically in recog.cc. */
9931 return TARGET_UPDATE
;
9933 /* AND is only allowed in Altivec loads. */
9944 /* Debug version of rs6000_mode_dependent_address. */
9946 rs6000_debug_mode_dependent_address (const_rtx addr
)
9948 bool ret
= rs6000_mode_dependent_address (addr
);
9950 fprintf (stderr
, "\nrs6000_mode_dependent_address: ret = %s\n",
9951 ret
? "true" : "false");
9957 /* Implement FIND_BASE_TERM. */
9960 rs6000_find_base_term (rtx op
)
9965 if (GET_CODE (base
) == CONST
)
9966 base
= XEXP (base
, 0);
9967 if (GET_CODE (base
) == PLUS
)
9968 base
= XEXP (base
, 0);
9969 if (GET_CODE (base
) == UNSPEC
)
9970 switch (XINT (base
, 1))
9973 case UNSPEC_MACHOPIC_OFFSET
:
9974 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9975 for aliasing purposes. */
9976 return XVECEXP (base
, 0, 0);
9982 /* More elaborate version of recog's offsettable_memref_p predicate
9983 that works around the ??? note of rs6000_mode_dependent_address.
9984 In particular it accepts
9986 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9988 in 32-bit mode, that the recog predicate rejects. */
9991 rs6000_offsettable_memref_p (rtx op
, machine_mode reg_mode
, bool strict
)
9998 /* First mimic offsettable_memref_p. */
9999 if (offsettable_address_p (strict
, GET_MODE (op
), XEXP (op
, 0)))
10002 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10003 the latter predicate knows nothing about the mode of the memory
10004 reference and, therefore, assumes that it is the largest supported
10005 mode (TFmode). As a consequence, legitimate offsettable memory
10006 references are rejected. rs6000_legitimate_offset_address_p contains
10007 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10008 at least with a little bit of help here given that we know the
10009 actual registers used. */
10010 worst_case
= ((TARGET_POWERPC64
&& GET_MODE_CLASS (reg_mode
) == MODE_INT
)
10011 || GET_MODE_SIZE (reg_mode
) == 4);
10012 return rs6000_legitimate_offset_address_p (GET_MODE (op
), XEXP (op
, 0),
10013 strict
, worst_case
);
10016 /* Determine the reassociation width to be used in reassociate_bb.
10017 This takes into account how many parallel operations we
10018 can actually do of a given type, and also the latency.
10020 int add/sub 6/cycle
10022 vect add/sub/mul 2/cycle
10023 fp add/sub/mul 2/cycle
10028 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
10031 switch (rs6000_tune
)
10033 case PROCESSOR_POWER8
:
10034 case PROCESSOR_POWER9
:
10035 case PROCESSOR_POWER10
:
10036 if (DECIMAL_FLOAT_MODE_P (mode
))
10038 if (VECTOR_MODE_P (mode
))
10040 if (INTEGRAL_MODE_P (mode
))
10042 if (FLOAT_MODE_P (mode
))
10051 /* Change register usage conditional on target flags. */
10053 rs6000_conditional_register_usage (void)
10057 if (TARGET_DEBUG_TARGET
)
10058 fprintf (stderr
, "rs6000_conditional_register_usage called\n");
10060 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10062 fixed_regs
[13] = call_used_regs
[13] = 1;
10064 /* Conditionally disable FPRs. */
10065 if (TARGET_SOFT_FLOAT
)
10066 for (i
= 32; i
< 64; i
++)
10067 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10069 /* The TOC register is not killed across calls in a way that is
10070 visible to the compiler. */
10071 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
10072 call_used_regs
[2] = 0;
10074 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
10075 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10077 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
10078 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10079 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10081 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
10082 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10083 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10085 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
)
10086 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10088 if (!TARGET_ALTIVEC
&& !TARGET_VSX
)
10090 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
10091 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10092 call_used_regs
[VRSAVE_REGNO
] = 1;
10095 if (TARGET_ALTIVEC
|| TARGET_VSX
)
10096 global_regs
[VSCR_REGNO
] = 1;
10098 if (TARGET_ALTIVEC_ABI
)
10100 for (i
= FIRST_ALTIVEC_REGNO
; i
< FIRST_ALTIVEC_REGNO
+ 20; ++i
)
10101 call_used_regs
[i
] = 1;
10103 /* AIX reserves VR20:31 in non-extended ABI mode. */
10104 if (TARGET_XCOFF
&& !rs6000_aix_extabi
)
10105 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
< FIRST_ALTIVEC_REGNO
+ 32; ++i
)
10106 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10111 /* Output insns to set DEST equal to the constant SOURCE as a series of
10112 lis, ori and shl instructions and return TRUE. */
10115 rs6000_emit_set_const (rtx dest
, rtx source
)
10117 machine_mode mode
= GET_MODE (dest
);
10122 gcc_checking_assert (CONST_INT_P (source
));
10123 c
= INTVAL (source
);
10128 emit_insn (gen_rtx_SET (dest
, source
));
10132 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (SImode
);
10134 emit_insn (gen_rtx_SET (copy_rtx (temp
),
10135 GEN_INT (c
& ~(HOST_WIDE_INT
) 0xffff)));
10136 emit_insn (gen_rtx_SET (dest
,
10137 gen_rtx_IOR (SImode
, copy_rtx (temp
),
10138 GEN_INT (c
& 0xffff))));
10142 if (!TARGET_POWERPC64
)
10146 hi
= operand_subword_force (copy_rtx (dest
), WORDS_BIG_ENDIAN
== 0,
10148 lo
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
!= 0,
10150 emit_move_insn (hi
, GEN_INT (c
>> 32));
10151 c
= ((c
& 0xffffffff) ^ 0x80000000) - 0x80000000;
10152 emit_move_insn (lo
, GEN_INT (c
));
10155 rs6000_emit_set_long_const (dest
, c
);
10159 gcc_unreachable ();
10162 insn
= get_last_insn ();
10163 set
= single_set (insn
);
10164 if (! CONSTANT_P (SET_SRC (set
)))
10165 set_unique_reg_note (insn
, REG_EQUAL
, GEN_INT (c
));
10170 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10171 Output insns to set DEST equal to the constant C as a series of
10172 lis, ori and shl instructions. */
10175 rs6000_emit_set_long_const (rtx dest
, HOST_WIDE_INT c
)
10178 HOST_WIDE_INT ud1
, ud2
, ud3
, ud4
;
10188 if ((ud4
== 0xffff && ud3
== 0xffff && ud2
== 0xffff && (ud1
& 0x8000))
10189 || (ud4
== 0 && ud3
== 0 && ud2
== 0 && ! (ud1
& 0x8000)))
10190 emit_move_insn (dest
, GEN_INT ((ud1
^ 0x8000) - 0x8000));
10192 else if ((ud4
== 0xffff && ud3
== 0xffff && (ud2
& 0x8000))
10193 || (ud4
== 0 && ud3
== 0 && ! (ud2
& 0x8000)))
10195 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10197 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10198 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
10200 emit_move_insn (dest
,
10201 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10204 else if (ud3
== 0 && ud4
== 0)
10206 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10208 gcc_assert (ud2
& 0x8000);
10209 emit_move_insn (copy_rtx (temp
),
10210 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
10212 emit_move_insn (copy_rtx (temp
),
10213 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10215 emit_move_insn (dest
,
10216 gen_rtx_ZERO_EXTEND (DImode
,
10217 gen_lowpart (SImode
,
10218 copy_rtx (temp
))));
10220 else if (ud1
== ud3
&& ud2
== ud4
)
10222 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10223 HOST_WIDE_INT num
= (ud2
<< 16) | ud1
;
10224 rs6000_emit_set_long_const (temp
, (num
^ 0x80000000) - 0x80000000);
10225 rtx one
= gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff));
10226 rtx two
= gen_rtx_ASHIFT (DImode
, temp
, GEN_INT (32));
10227 emit_move_insn (dest
, gen_rtx_IOR (DImode
, one
, two
));
10229 else if ((ud4
== 0xffff && (ud3
& 0x8000))
10230 || (ud4
== 0 && ! (ud3
& 0x8000)))
10232 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10234 emit_move_insn (copy_rtx (temp
),
10235 GEN_INT (((ud3
<< 16) ^ 0x80000000) - 0x80000000));
10237 emit_move_insn (copy_rtx (temp
),
10238 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10240 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10241 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
10244 emit_move_insn (dest
,
10245 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10250 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10252 emit_move_insn (copy_rtx (temp
),
10253 GEN_INT (((ud4
<< 16) ^ 0x80000000) - 0x80000000));
10255 emit_move_insn (copy_rtx (temp
),
10256 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10259 emit_move_insn (ud2
!= 0 || ud1
!= 0 ? copy_rtx (temp
) : dest
,
10260 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
10263 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10264 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10265 GEN_INT (ud2
<< 16)));
10267 emit_move_insn (dest
,
10268 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10273 /* Helper for the following. Get rid of [r+r] memory refs
10274 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10277 rs6000_eliminate_indexed_memrefs (rtx operands
[2])
10279 if (MEM_P (operands
[0])
10280 && !REG_P (XEXP (operands
[0], 0))
10281 && ! legitimate_constant_pool_address_p (XEXP (operands
[0], 0),
10282 GET_MODE (operands
[0]), false))
10284 = replace_equiv_address (operands
[0],
10285 copy_addr_to_reg (XEXP (operands
[0], 0)));
10287 if (MEM_P (operands
[1])
10288 && !REG_P (XEXP (operands
[1], 0))
10289 && ! legitimate_constant_pool_address_p (XEXP (operands
[1], 0),
10290 GET_MODE (operands
[1]), false))
10292 = replace_equiv_address (operands
[1],
10293 copy_addr_to_reg (XEXP (operands
[1], 0)));
10296 /* Generate a vector of constants to permute MODE for a little-endian
10297 storage operation by swapping the two halves of a vector. */
10299 rs6000_const_vec (machine_mode mode
)
10327 v
= rtvec_alloc (subparts
);
10329 for (i
= 0; i
< subparts
/ 2; ++i
)
10330 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
+ subparts
/ 2);
10331 for (i
= subparts
/ 2; i
< subparts
; ++i
)
10332 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
- subparts
/ 2);
10337 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10338 store operation. */
10340 rs6000_emit_le_vsx_permute (rtx dest
, rtx source
, machine_mode mode
)
10342 gcc_assert (!altivec_indexed_or_indirect_operand (dest
, mode
));
10343 gcc_assert (!altivec_indexed_or_indirect_operand (source
, mode
));
10345 /* Scalar permutations are easier to express in integer modes rather than
10346 floating-point modes, so cast them here. We use V1TImode instead
10347 of TImode to ensure that the values don't go through GPRs. */
10348 if (FLOAT128_VECTOR_P (mode
))
10350 dest
= gen_lowpart (V1TImode
, dest
);
10351 source
= gen_lowpart (V1TImode
, source
);
10355 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10357 if (mode
== TImode
|| mode
== V1TImode
)
10358 emit_insn (gen_rtx_SET (dest
, gen_rtx_ROTATE (mode
, source
,
10362 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rs6000_const_vec (mode
));
10363 emit_insn (gen_rtx_SET (dest
, gen_rtx_VEC_SELECT (mode
, source
, par
)));
10367 /* Emit a little-endian load from vector memory location SOURCE to VSX
10368 register DEST in mode MODE. The load is done with two permuting
10369 insn's that represent an lxvd2x and xxpermdi. */
10371 rs6000_emit_le_vsx_load (rtx dest
, rtx source
, machine_mode mode
)
10373 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10375 if (mode
== TImode
|| mode
== V1TImode
)
10378 dest
= gen_lowpart (V2DImode
, dest
);
10379 source
= adjust_address (source
, V2DImode
, 0);
10382 rtx tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest
) : dest
;
10383 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10384 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10387 /* Emit a little-endian store to vector memory location DEST from VSX
10388 register SOURCE in mode MODE. The store is done with two permuting
10389 insn's that represent an xxpermdi and an stxvd2x. */
10391 rs6000_emit_le_vsx_store (rtx dest
, rtx source
, machine_mode mode
)
10393 /* This should never be called after LRA. */
10394 gcc_assert (can_create_pseudo_p ());
10396 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10398 if (mode
== TImode
|| mode
== V1TImode
)
10401 dest
= adjust_address (dest
, V2DImode
, 0);
10402 source
= gen_lowpart (V2DImode
, source
);
10405 rtx tmp
= gen_reg_rtx_and_attrs (source
);
10406 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10407 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10410 /* Emit a sequence representing a little-endian VSX load or store,
10411 moving data from SOURCE to DEST in mode MODE. This is done
10412 separately from rs6000_emit_move to ensure it is called only
10413 during expand. LE VSX loads and stores introduced later are
10414 handled with a split. The expand-time RTL generation allows
10415 us to optimize away redundant pairs of register-permutes. */
10417 rs6000_emit_le_vsx_move (rtx dest
, rtx source
, machine_mode mode
)
10419 gcc_assert (!BYTES_BIG_ENDIAN
10420 && VECTOR_MEM_VSX_P (mode
)
10421 && !TARGET_P9_VECTOR
10422 && !gpr_or_gpr_p (dest
, source
)
10423 && (MEM_P (source
) ^ MEM_P (dest
)));
10425 if (MEM_P (source
))
10427 gcc_assert (REG_P (dest
) || SUBREG_P (dest
));
10428 rs6000_emit_le_vsx_load (dest
, source
, mode
);
10432 if (!REG_P (source
))
10433 source
= force_reg (mode
, source
);
10434 rs6000_emit_le_vsx_store (dest
, source
, mode
);
10438 /* Return whether a SFmode or SImode move can be done without converting one
10439 mode to another. This arrises when we have:
10441 (SUBREG:SF (REG:SI ...))
10442 (SUBREG:SI (REG:SF ...))
10444 and one of the values is in a floating point/vector register, where SFmode
10445 scalars are stored in DFmode format. */
10448 valid_sf_si_move (rtx dest
, rtx src
, machine_mode mode
)
10450 if (TARGET_ALLOW_SF_SUBREG
)
10453 if (mode
!= SFmode
&& GET_MODE_CLASS (mode
) != MODE_INT
)
10456 if (!SUBREG_P (src
) || !sf_subreg_operand (src
, mode
))
10459 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10460 if (SUBREG_P (dest
))
10462 rtx dest_subreg
= SUBREG_REG (dest
);
10463 rtx src_subreg
= SUBREG_REG (src
);
10464 return GET_MODE (dest_subreg
) == GET_MODE (src_subreg
);
10471 /* Helper function to change moves with:
10473 (SUBREG:SF (REG:SI)) and
10474 (SUBREG:SI (REG:SF))
10476 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10477 values are stored as DFmode values in the VSX registers. We need to convert
10478 the bits before we can use a direct move or operate on the bits in the
10479 vector register as an integer type.
10481 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10484 rs6000_emit_move_si_sf_subreg (rtx dest
, rtx source
, machine_mode mode
)
10486 if (TARGET_DIRECT_MOVE_64BIT
&& !reload_completed
10487 && (!SUBREG_P (dest
) || !sf_subreg_operand (dest
, mode
))
10488 && SUBREG_P (source
) && sf_subreg_operand (source
, mode
))
10490 rtx inner_source
= SUBREG_REG (source
);
10491 machine_mode inner_mode
= GET_MODE (inner_source
);
10493 if (mode
== SImode
&& inner_mode
== SFmode
)
10495 emit_insn (gen_movsi_from_sf (dest
, inner_source
));
10499 if (mode
== SFmode
&& inner_mode
== SImode
)
10501 emit_insn (gen_movsf_from_si (dest
, inner_source
));
10509 /* Emit a move from SOURCE to DEST in mode MODE. */
10511 rs6000_emit_move (rtx dest
, rtx source
, machine_mode mode
)
10514 operands
[0] = dest
;
10515 operands
[1] = source
;
10517 if (TARGET_DEBUG_ADDR
)
10520 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10521 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10522 GET_MODE_NAME (mode
),
10525 can_create_pseudo_p ());
10527 fprintf (stderr
, "source:\n");
10528 debug_rtx (source
);
10531 /* Check that we get CONST_WIDE_INT only when we should. */
10532 if (CONST_WIDE_INT_P (operands
[1])
10533 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
10534 gcc_unreachable ();
10536 #ifdef HAVE_AS_GNU_ATTRIBUTE
10537 /* If we use a long double type, set the flags in .gnu_attribute that say
10538 what the long double type is. This is to allow the linker's warning
10539 message for the wrong long double to be useful, even if the function does
10540 not do a call (for example, doing a 128-bit add on power9 if the long
10541 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10542 used if they aren't the default long dobule type. */
10543 if (rs6000_gnu_attr
&& (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
))
10545 if (TARGET_LONG_DOUBLE_128
&& (mode
== TFmode
|| mode
== TCmode
))
10546 rs6000_passes_float
= rs6000_passes_long_double
= true;
10548 else if (!TARGET_LONG_DOUBLE_128
&& (mode
== DFmode
|| mode
== DCmode
))
10549 rs6000_passes_float
= rs6000_passes_long_double
= true;
10553 /* See if we need to special case SImode/SFmode SUBREG moves. */
10554 if ((mode
== SImode
|| mode
== SFmode
) && SUBREG_P (source
)
10555 && rs6000_emit_move_si_sf_subreg (dest
, source
, mode
))
10558 /* Check if GCC is setting up a block move that will end up using FP
10559 registers as temporaries. We must make sure this is acceptable. */
10560 if (MEM_P (operands
[0])
10561 && MEM_P (operands
[1])
10563 && (rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[0]))
10564 || rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[1])))
10565 && ! (rs6000_slow_unaligned_access (SImode
,
10566 (MEM_ALIGN (operands
[0]) > 32
10567 ? 32 : MEM_ALIGN (operands
[0])))
10568 || rs6000_slow_unaligned_access (SImode
,
10569 (MEM_ALIGN (operands
[1]) > 32
10570 ? 32 : MEM_ALIGN (operands
[1]))))
10571 && ! MEM_VOLATILE_P (operands
[0])
10572 && ! MEM_VOLATILE_P (operands
[1]))
10574 emit_move_insn (adjust_address (operands
[0], SImode
, 0),
10575 adjust_address (operands
[1], SImode
, 0));
10576 emit_move_insn (adjust_address (copy_rtx (operands
[0]), SImode
, 4),
10577 adjust_address (copy_rtx (operands
[1]), SImode
, 4));
10581 if (can_create_pseudo_p () && MEM_P (operands
[0])
10582 && !gpc_reg_operand (operands
[1], mode
))
10583 operands
[1] = force_reg (mode
, operands
[1]);
10585 /* Recognize the case where operand[1] is a reference to thread-local
10586 data and load its address to a register. */
10587 if (tls_referenced_p (operands
[1]))
10589 enum tls_model model
;
10590 rtx tmp
= operands
[1];
10593 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
10595 addend
= XEXP (XEXP (tmp
, 0), 1);
10596 tmp
= XEXP (XEXP (tmp
, 0), 0);
10599 gcc_assert (SYMBOL_REF_P (tmp
));
10600 model
= SYMBOL_REF_TLS_MODEL (tmp
);
10601 gcc_assert (model
!= 0);
10603 tmp
= rs6000_legitimize_tls_address (tmp
, model
);
10606 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
10607 tmp
= force_operand (tmp
, operands
[0]);
10612 /* 128-bit constant floating-point values on Darwin should really be loaded
10613 as two parts. However, this premature splitting is a problem when DFmode
10614 values can go into Altivec registers. */
10615 if (TARGET_MACHO
&& CONST_DOUBLE_P (operands
[1]) && FLOAT128_IBM_P (mode
)
10616 && !reg_addr
[DFmode
].scalar_in_vmx_p
)
10618 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
, 0),
10619 simplify_gen_subreg (DFmode
, operands
[1], mode
, 0),
10621 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
,
10622 GET_MODE_SIZE (DFmode
)),
10623 simplify_gen_subreg (DFmode
, operands
[1], mode
,
10624 GET_MODE_SIZE (DFmode
)),
10629 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10630 p1:SD) if p1 is not of floating point class and p0 is spilled as
10631 we can have no analogous movsd_store for this. */
10632 if (lra_in_progress
&& mode
== DDmode
10633 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
10634 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10635 && SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1]))
10636 && GET_MODE (SUBREG_REG (operands
[1])) == SDmode
)
10639 int regno
= REGNO (SUBREG_REG (operands
[1]));
10641 if (!HARD_REGISTER_NUM_P (regno
))
10643 cl
= reg_preferred_class (regno
);
10644 regno
= reg_renumber
[regno
];
10646 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][1];
10648 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10651 operands
[0] = gen_lowpart_SUBREG (SDmode
, operands
[0]);
10652 operands
[1] = SUBREG_REG (operands
[1]);
10655 if (lra_in_progress
10657 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
10658 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10659 && (REG_P (operands
[1])
10660 || (SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1])))))
10662 int regno
= reg_or_subregno (operands
[1]);
10665 if (!HARD_REGISTER_NUM_P (regno
))
10667 cl
= reg_preferred_class (regno
);
10668 gcc_assert (cl
!= NO_REGS
);
10669 regno
= reg_renumber
[regno
];
10671 regno
= ira_class_hard_regs
[cl
][0];
10673 if (FP_REGNO_P (regno
))
10675 if (GET_MODE (operands
[0]) != DDmode
)
10676 operands
[0] = gen_rtx_SUBREG (DDmode
, operands
[0], 0);
10677 emit_insn (gen_movsd_store (operands
[0], operands
[1]));
10679 else if (INT_REGNO_P (regno
))
10680 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10685 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10686 p:DD)) if p0 is not of floating point class and p1 is spilled as
10687 we can have no analogous movsd_load for this. */
10688 if (lra_in_progress
&& mode
== DDmode
10689 && SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))
10690 && GET_MODE (SUBREG_REG (operands
[0])) == SDmode
10691 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
10692 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10695 int regno
= REGNO (SUBREG_REG (operands
[0]));
10697 if (!HARD_REGISTER_NUM_P (regno
))
10699 cl
= reg_preferred_class (regno
);
10700 regno
= reg_renumber
[regno
];
10702 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][0];
10704 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10707 operands
[0] = SUBREG_REG (operands
[0]);
10708 operands
[1] = gen_lowpart_SUBREG (SDmode
, operands
[1]);
10711 if (lra_in_progress
10713 && (REG_P (operands
[0])
10714 || (SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))))
10715 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
10716 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10718 int regno
= reg_or_subregno (operands
[0]);
10721 if (!HARD_REGISTER_NUM_P (regno
))
10723 cl
= reg_preferred_class (regno
);
10724 gcc_assert (cl
!= NO_REGS
);
10725 regno
= reg_renumber
[regno
];
10727 regno
= ira_class_hard_regs
[cl
][0];
10729 if (FP_REGNO_P (regno
))
10731 if (GET_MODE (operands
[1]) != DDmode
)
10732 operands
[1] = gen_rtx_SUBREG (DDmode
, operands
[1], 0);
10733 emit_insn (gen_movsd_load (operands
[0], operands
[1]));
10735 else if (INT_REGNO_P (regno
))
10736 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10742 /* FIXME: In the long term, this switch statement should go away
10743 and be replaced by a sequence of tests based on things like
10749 if (CONSTANT_P (operands
[1])
10750 && !CONST_INT_P (operands
[1]))
10751 operands
[1] = force_const_mem (mode
, operands
[1]);
10758 if (FLOAT128_2REG_P (mode
))
10759 rs6000_eliminate_indexed_memrefs (operands
);
10766 if (CONSTANT_P (operands
[1])
10767 && ! easy_fp_constant (operands
[1], mode
))
10768 operands
[1] = force_const_mem (mode
, operands
[1]);
10778 if (CONSTANT_P (operands
[1])
10779 && !easy_vector_constant (operands
[1], mode
))
10780 operands
[1] = force_const_mem (mode
, operands
[1]);
10785 if (CONST_INT_P (operands
[1]) && INTVAL (operands
[1]) != 0)
10786 error ("%qs is an opaque type, and you cannot set it to other values",
10787 (mode
== OOmode
) ? "__vector_pair" : "__vector_quad");
10792 /* Use default pattern for address of ELF small data */
10795 && DEFAULT_ABI
== ABI_V4
10796 && (SYMBOL_REF_P (operands
[1])
10797 || GET_CODE (operands
[1]) == CONST
)
10798 && small_data_operand (operands
[1], mode
))
10800 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10804 /* Use the default pattern for loading up PC-relative addresses. */
10805 if (TARGET_PCREL
&& mode
== Pmode
10806 && pcrel_local_or_external_address (operands
[1], Pmode
))
10808 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10812 if (DEFAULT_ABI
== ABI_V4
10813 && mode
== Pmode
&& mode
== SImode
10814 && flag_pic
== 1 && got_operand (operands
[1], mode
))
10816 emit_insn (gen_movsi_got (operands
[0], operands
[1]));
10820 if ((TARGET_ELF
|| DEFAULT_ABI
== ABI_DARWIN
)
10821 && TARGET_NO_TOC_OR_PCREL
10824 && CONSTANT_P (operands
[1])
10825 && GET_CODE (operands
[1]) != HIGH
10826 && !CONST_INT_P (operands
[1]))
10828 rtx target
= (!can_create_pseudo_p ()
10830 : gen_reg_rtx (mode
));
10832 /* If this is a function address on -mcall-aixdesc,
10833 convert it to the address of the descriptor. */
10834 if (DEFAULT_ABI
== ABI_AIX
10835 && SYMBOL_REF_P (operands
[1])
10836 && XSTR (operands
[1], 0)[0] == '.')
10838 const char *name
= XSTR (operands
[1], 0);
10840 while (*name
== '.')
10842 new_ref
= gen_rtx_SYMBOL_REF (Pmode
, name
);
10843 CONSTANT_POOL_ADDRESS_P (new_ref
)
10844 = CONSTANT_POOL_ADDRESS_P (operands
[1]);
10845 SYMBOL_REF_FLAGS (new_ref
) = SYMBOL_REF_FLAGS (operands
[1]);
10846 SYMBOL_REF_USED (new_ref
) = SYMBOL_REF_USED (operands
[1]);
10847 SYMBOL_REF_DATA (new_ref
) = SYMBOL_REF_DATA (operands
[1]);
10848 operands
[1] = new_ref
;
10851 if (DEFAULT_ABI
== ABI_DARWIN
)
10854 /* This is not PIC code, but could require the subset of
10855 indirections used by mdynamic-no-pic. */
10856 if (MACHO_DYNAMIC_NO_PIC_P
)
10858 /* Take care of any required data indirection. */
10859 operands
[1] = rs6000_machopic_legitimize_pic_address (
10860 operands
[1], mode
, operands
[0]);
10861 if (operands
[0] != operands
[1])
10862 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10866 emit_insn (gen_macho_high (Pmode
, target
, operands
[1]));
10867 emit_insn (gen_macho_low (Pmode
, operands
[0],
10868 target
, operands
[1]));
10872 emit_insn (gen_elf_high (target
, operands
[1]));
10873 emit_insn (gen_elf_low (operands
[0], target
, operands
[1]));
10877 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10878 and we have put it in the TOC, we just need to make a TOC-relative
10879 reference to it. */
10881 && SYMBOL_REF_P (operands
[1])
10882 && use_toc_relative_ref (operands
[1], mode
))
10883 operands
[1] = create_TOC_reference (operands
[1], operands
[0]);
10884 else if (mode
== Pmode
10885 && CONSTANT_P (operands
[1])
10886 && GET_CODE (operands
[1]) != HIGH
10887 && ((REG_P (operands
[0])
10888 && FP_REGNO_P (REGNO (operands
[0])))
10889 || !CONST_INT_P (operands
[1])
10890 || (num_insns_constant (operands
[1], mode
)
10891 > (TARGET_CMODEL
!= CMODEL_SMALL
? 3 : 2)))
10892 && !toc_relative_expr_p (operands
[1], false, NULL
, NULL
)
10893 && (TARGET_CMODEL
== CMODEL_SMALL
10894 || can_create_pseudo_p ()
10895 || (REG_P (operands
[0])
10896 && INT_REG_OK_FOR_BASE_P (operands
[0], true))))
10900 /* Darwin uses a special PIC legitimizer. */
10901 if (DEFAULT_ABI
== ABI_DARWIN
&& MACHOPIC_INDIRECT
)
10904 rs6000_machopic_legitimize_pic_address (operands
[1], mode
,
10906 if (operands
[0] != operands
[1])
10907 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10912 /* If we are to limit the number of things we put in the TOC and
10913 this is a symbol plus a constant we can add in one insn,
10914 just put the symbol in the TOC and add the constant. */
10915 if (GET_CODE (operands
[1]) == CONST
10916 && TARGET_NO_SUM_IN_TOC
10917 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
10918 && add_operand (XEXP (XEXP (operands
[1], 0), 1), mode
)
10919 && (GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == LABEL_REF
10920 || SYMBOL_REF_P (XEXP (XEXP (operands
[1], 0), 0)))
10921 && ! side_effects_p (operands
[0]))
10924 force_const_mem (mode
, XEXP (XEXP (operands
[1], 0), 0));
10925 rtx other
= XEXP (XEXP (operands
[1], 0), 1);
10927 sym
= force_reg (mode
, sym
);
10928 emit_insn (gen_add3_insn (operands
[0], sym
, other
));
10932 operands
[1] = force_const_mem (mode
, operands
[1]);
10935 && SYMBOL_REF_P (XEXP (operands
[1], 0))
10936 && use_toc_relative_ref (XEXP (operands
[1], 0), mode
))
10938 rtx tocref
= create_TOC_reference (XEXP (operands
[1], 0),
10940 operands
[1] = gen_const_mem (mode
, tocref
);
10941 set_mem_alias_set (operands
[1], get_TOC_alias_set ());
10947 if (!VECTOR_MEM_VSX_P (TImode
))
10948 rs6000_eliminate_indexed_memrefs (operands
);
10952 rs6000_eliminate_indexed_memrefs (operands
);
10956 fatal_insn ("bad move", gen_rtx_SET (dest
, source
));
10959 /* Above, we may have called force_const_mem which may have returned
10960 an invalid address. If we can, fix this up; otherwise, reload will
10961 have to deal with it. */
10962 if (MEM_P (operands
[1]))
10963 operands
[1] = validize_mem (operands
[1]);
10965 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10969 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10971 init_float128_ibm (machine_mode mode
)
10973 if (!TARGET_XL_COMPAT
)
10975 set_optab_libfunc (add_optab
, mode
, "__gcc_qadd");
10976 set_optab_libfunc (sub_optab
, mode
, "__gcc_qsub");
10977 set_optab_libfunc (smul_optab
, mode
, "__gcc_qmul");
10978 set_optab_libfunc (sdiv_optab
, mode
, "__gcc_qdiv");
10980 set_optab_libfunc (neg_optab
, mode
, "__gcc_qneg");
10981 set_optab_libfunc (eq_optab
, mode
, "__gcc_qeq");
10982 set_optab_libfunc (ne_optab
, mode
, "__gcc_qne");
10983 set_optab_libfunc (gt_optab
, mode
, "__gcc_qgt");
10984 set_optab_libfunc (ge_optab
, mode
, "__gcc_qge");
10985 set_optab_libfunc (lt_optab
, mode
, "__gcc_qlt");
10986 set_optab_libfunc (le_optab
, mode
, "__gcc_qle");
10987 set_optab_libfunc (unord_optab
, mode
, "__gcc_qunord");
10989 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__gcc_stoq");
10990 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__gcc_dtoq");
10991 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__gcc_qtos");
10992 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__gcc_qtod");
10993 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__gcc_qtoi");
10994 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__gcc_qtou");
10995 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__gcc_itoq");
10996 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__gcc_utoq");
11000 set_optab_libfunc (add_optab
, mode
, "_xlqadd");
11001 set_optab_libfunc (sub_optab
, mode
, "_xlqsub");
11002 set_optab_libfunc (smul_optab
, mode
, "_xlqmul");
11003 set_optab_libfunc (sdiv_optab
, mode
, "_xlqdiv");
11006 /* Add various conversions for IFmode to use the traditional TFmode
11008 if (mode
== IFmode
)
11010 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdtf");
11011 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddtf");
11012 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdtf");
11013 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunctfsd");
11014 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunctfdd");
11015 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtftd");
11017 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixtfdi");
11018 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunstfdi");
11020 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatditf");
11021 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatunditf");
11023 if (TARGET_POWERPC64
)
11025 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixtfti");
11026 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunstfti");
11027 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattitf");
11028 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntitf");
11033 /* Create a decl for either complex long double multiply or complex long double
11034 divide when long double is IEEE 128-bit floating point. We can't use
11035 __multc3 and __divtc3 because the original long double using IBM extended
11036 double used those names. The complex multiply/divide functions are encoded
11037 as builtin functions with a complex result and 4 scalar inputs. */
11040 create_complex_muldiv (const char *name
, built_in_function fncode
, tree fntype
)
11042 tree fndecl
= add_builtin_function (name
, fntype
, fncode
, BUILT_IN_NORMAL
,
11045 set_builtin_decl (fncode
, fndecl
, true);
11047 if (TARGET_DEBUG_BUILTIN
)
11048 fprintf (stderr
, "create complex %s, fncode: %d\n", name
, (int) fncode
);
11053 /* Set up IEEE 128-bit floating point routines. Use different names if the
11054 arguments can be passed in a vector register. The historical PowerPC
11055 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11056 continue to use that if we aren't using vector registers to pass IEEE
11057 128-bit floating point. */
11060 init_float128_ieee (machine_mode mode
)
11062 if (FLOAT128_VECTOR_P (mode
))
11064 set_optab_libfunc (add_optab
, mode
, "__addkf3");
11065 set_optab_libfunc (sub_optab
, mode
, "__subkf3");
11066 set_optab_libfunc (neg_optab
, mode
, "__negkf2");
11067 set_optab_libfunc (smul_optab
, mode
, "__mulkf3");
11068 set_optab_libfunc (sdiv_optab
, mode
, "__divkf3");
11069 set_optab_libfunc (sqrt_optab
, mode
, "__sqrtkf2");
11070 set_optab_libfunc (abs_optab
, mode
, "__abskf2");
11071 set_optab_libfunc (powi_optab
, mode
, "__powikf2");
11073 set_optab_libfunc (eq_optab
, mode
, "__eqkf2");
11074 set_optab_libfunc (ne_optab
, mode
, "__nekf2");
11075 set_optab_libfunc (gt_optab
, mode
, "__gtkf2");
11076 set_optab_libfunc (ge_optab
, mode
, "__gekf2");
11077 set_optab_libfunc (lt_optab
, mode
, "__ltkf2");
11078 set_optab_libfunc (le_optab
, mode
, "__lekf2");
11079 set_optab_libfunc (unord_optab
, mode
, "__unordkf2");
11081 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__extendsfkf2");
11082 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__extenddfkf2");
11083 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__trunckfsf2");
11084 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__trunckfdf2");
11086 set_conv_libfunc (sext_optab
, mode
, IFmode
, "__trunctfkf2");
11087 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11088 set_conv_libfunc (sext_optab
, mode
, TFmode
, "__trunctfkf2");
11090 set_conv_libfunc (trunc_optab
, IFmode
, mode
, "__extendkftf2");
11091 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11092 set_conv_libfunc (trunc_optab
, TFmode
, mode
, "__extendkftf2");
11094 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdkf");
11095 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddkf");
11096 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdkf");
11097 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunckfsd");
11098 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunckfdd");
11099 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendkftd");
11101 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__fixkfsi");
11102 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__fixunskfsi");
11103 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixkfdi");
11104 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunskfdi");
11106 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__floatsikf");
11107 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__floatunsikf");
11108 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatdikf");
11109 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatundikf");
11111 if (TARGET_POWERPC64
)
11113 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixkfti_sw");
11114 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunskfti_sw");
11115 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattikf_sw");
11116 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntikf_sw");
11122 set_optab_libfunc (add_optab
, mode
, "_q_add");
11123 set_optab_libfunc (sub_optab
, mode
, "_q_sub");
11124 set_optab_libfunc (neg_optab
, mode
, "_q_neg");
11125 set_optab_libfunc (smul_optab
, mode
, "_q_mul");
11126 set_optab_libfunc (sdiv_optab
, mode
, "_q_div");
11127 if (TARGET_PPC_GPOPT
)
11128 set_optab_libfunc (sqrt_optab
, mode
, "_q_sqrt");
11130 set_optab_libfunc (eq_optab
, mode
, "_q_feq");
11131 set_optab_libfunc (ne_optab
, mode
, "_q_fne");
11132 set_optab_libfunc (gt_optab
, mode
, "_q_fgt");
11133 set_optab_libfunc (ge_optab
, mode
, "_q_fge");
11134 set_optab_libfunc (lt_optab
, mode
, "_q_flt");
11135 set_optab_libfunc (le_optab
, mode
, "_q_fle");
11137 set_conv_libfunc (sext_optab
, mode
, SFmode
, "_q_stoq");
11138 set_conv_libfunc (sext_optab
, mode
, DFmode
, "_q_dtoq");
11139 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "_q_qtos");
11140 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "_q_qtod");
11141 set_conv_libfunc (sfix_optab
, SImode
, mode
, "_q_qtoi");
11142 set_conv_libfunc (ufix_optab
, SImode
, mode
, "_q_qtou");
11143 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "_q_itoq");
11144 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "_q_utoq");
11149 rs6000_init_libfuncs (void)
11151 /* __float128 support. */
11152 if (TARGET_FLOAT128_TYPE
)
11153 init_float128_ieee (KFmode
);
11155 /* __ibm128 support. */
11157 init_float128_ibm (IFmode
);
11159 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11160 if (TARGET_LONG_DOUBLE_128
)
11162 if (!TARGET_IEEEQUAD
)
11163 init_float128_ibm (TFmode
);
11165 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11167 init_float128_ieee (TFmode
);
11170 /* Set up to call __mulkc3 and __divkc3 when long double uses the IEEE
11171 128-bit encoding. We cannot use the same name (__mulkc3 or __divkc3 for
11172 both IEEE long double and for explicit _Float128/__float128) because
11173 c_builtin_function will complain if we create two built-in functions with
11174 the same name. Instead we use an alias name for the case when long double
11175 uses the IEEE 128-bit encoding. Libgcc will create a weak alias reference
11178 We need to only execute this once. If we have clone or target attributes,
11179 this will be called a second time. We need to create the built-in
11180 function only once. */
11181 static bool complex_muldiv_init_p
= false;
11183 if (TARGET_FLOAT128_TYPE
&& TARGET_IEEEQUAD
&& TARGET_LONG_DOUBLE_128
11184 && !complex_muldiv_init_p
)
11186 complex_muldiv_init_p
= true;
11188 tree fntype
= build_function_type_list (complex_long_double_type_node
,
11189 long_double_type_node
,
11190 long_double_type_node
,
11191 long_double_type_node
,
11192 long_double_type_node
,
11195 /* Create complex multiply. */
11196 built_in_function mul_fncode
=
11197 (built_in_function
) (BUILT_IN_COMPLEX_MUL_MIN
+ TCmode
11198 - MIN_MODE_COMPLEX_FLOAT
);
11200 create_complex_muldiv ("__multc3_ieee128", mul_fncode
, fntype
);
11202 /* Create complex divide. */
11203 built_in_function div_fncode
=
11204 (built_in_function
) (BUILT_IN_COMPLEX_DIV_MIN
+ TCmode
11205 - MIN_MODE_COMPLEX_FLOAT
);
11207 create_complex_muldiv ("__divtc3_ieee128", div_fncode
, fntype
);
11211 /* Emit a potentially record-form instruction, setting DST from SRC.
11212 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11213 signed comparison of DST with zero. If DOT is 1, the generated RTL
11214 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11215 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11216 a separate COMPARE. */
11219 rs6000_emit_dot_insn (rtx dst
, rtx src
, int dot
, rtx ccreg
)
11223 emit_move_insn (dst
, src
);
11227 if (cc_reg_not_cr0_operand (ccreg
, CCmode
))
11229 emit_move_insn (dst
, src
);
11230 emit_move_insn (ccreg
, gen_rtx_COMPARE (CCmode
, dst
, const0_rtx
));
11234 rtx ccset
= gen_rtx_SET (ccreg
, gen_rtx_COMPARE (CCmode
, src
, const0_rtx
));
11237 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, dst
);
11238 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, clobber
)));
11242 rtx set
= gen_rtx_SET (dst
, src
);
11243 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, set
)));
11248 /* A validation routine: say whether CODE, a condition code, and MODE
11249 match. The other alternatives either don't make sense or should
11250 never be generated. */
11253 validate_condition_mode (enum rtx_code code
, machine_mode mode
)
11255 gcc_assert ((GET_RTX_CLASS (code
) == RTX_COMPARE
11256 || GET_RTX_CLASS (code
) == RTX_COMM_COMPARE
)
11257 && GET_MODE_CLASS (mode
) == MODE_CC
);
11259 /* These don't make sense. */
11260 gcc_assert ((code
!= GT
&& code
!= LT
&& code
!= GE
&& code
!= LE
)
11261 || mode
!= CCUNSmode
);
11263 gcc_assert ((code
!= GTU
&& code
!= LTU
&& code
!= GEU
&& code
!= LEU
)
11264 || mode
== CCUNSmode
);
11266 gcc_assert (mode
== CCFPmode
11267 || (code
!= ORDERED
&& code
!= UNORDERED
11268 && code
!= UNEQ
&& code
!= LTGT
11269 && code
!= UNGT
&& code
!= UNLT
11270 && code
!= UNGE
&& code
!= UNLE
));
11272 /* These are invalid; the information is not there. */
11273 gcc_assert (mode
!= CCEQmode
|| code
== EQ
|| code
== NE
);
11277 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11278 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11279 not zero, store there the bit offset (counted from the right) where
11280 the single stretch of 1 bits begins; and similarly for B, the bit
11281 offset where it ends. */
11284 rs6000_is_valid_mask (rtx mask
, int *b
, int *e
, machine_mode mode
)
11286 unsigned HOST_WIDE_INT val
= INTVAL (mask
);
11287 unsigned HOST_WIDE_INT bit
;
11289 int n
= GET_MODE_PRECISION (mode
);
11291 if (mode
!= DImode
&& mode
!= SImode
)
11294 if (INTVAL (mask
) >= 0)
11297 ne
= exact_log2 (bit
);
11298 nb
= exact_log2 (val
+ bit
);
11300 else if (val
+ 1 == 0)
11309 nb
= exact_log2 (bit
);
11310 ne
= exact_log2 (val
+ bit
);
11315 ne
= exact_log2 (bit
);
11316 if (val
+ bit
== 0)
11324 if (nb
< 0 || ne
< 0 || nb
>= n
|| ne
>= n
)
11336 rs6000_is_valid_rotate_dot_mask (rtx mask
, machine_mode mode
)
11339 return rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
) && nb
>= ne
&& ne
> 0;
11342 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11343 or rldicr instruction, to implement an AND with it in mode MODE. */
11346 rs6000_is_valid_and_mask (rtx mask
, machine_mode mode
)
11350 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11353 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11355 if (mode
== DImode
)
11356 return (ne
== 0 || nb
== 63 || (nb
< 32 && ne
<= nb
));
11358 /* For SImode, rlwinm can do everything. */
11359 if (mode
== SImode
)
11360 return (nb
< 32 && ne
< 32);
11365 /* Return the instruction template for an AND with mask in mode MODE, with
11366 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11369 rs6000_insn_for_and_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11373 if (!rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
))
11374 gcc_unreachable ();
11376 if (mode
== DImode
&& ne
== 0)
11378 operands
[3] = GEN_INT (63 - nb
);
11380 return "rldicl. %0,%1,0,%3";
11381 return "rldicl %0,%1,0,%3";
11384 if (mode
== DImode
&& nb
== 63)
11386 operands
[3] = GEN_INT (63 - ne
);
11388 return "rldicr. %0,%1,0,%3";
11389 return "rldicr %0,%1,0,%3";
11392 if (nb
< 32 && ne
< 32)
11394 operands
[3] = GEN_INT (31 - nb
);
11395 operands
[4] = GEN_INT (31 - ne
);
11397 return "rlwinm. %0,%1,0,%3,%4";
11398 return "rlwinm %0,%1,0,%3,%4";
11401 gcc_unreachable ();
11404 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11405 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11406 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11409 rs6000_is_valid_shift_mask (rtx mask
, rtx shift
, machine_mode mode
)
11413 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11416 int n
= GET_MODE_PRECISION (mode
);
11419 if (CONST_INT_P (XEXP (shift
, 1)))
11421 sh
= INTVAL (XEXP (shift
, 1));
11422 if (sh
< 0 || sh
>= n
)
11426 rtx_code code
= GET_CODE (shift
);
11428 /* Convert any shift by 0 to a rotate, to simplify below code. */
11432 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11433 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11435 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11441 /* DImode rotates need rld*. */
11442 if (mode
== DImode
&& code
== ROTATE
)
11443 return (nb
== 63 || ne
== 0 || ne
== sh
);
11445 /* SImode rotates need rlw*. */
11446 if (mode
== SImode
&& code
== ROTATE
)
11447 return (nb
< 32 && ne
< 32 && sh
< 32);
11449 /* Wrap-around masks are only okay for rotates. */
11453 /* Variable shifts are only okay for rotates. */
11457 /* Don't allow ASHIFT if the mask is wrong for that. */
11458 if (code
== ASHIFT
&& ne
< sh
)
11461 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11462 if the mask is wrong for that. */
11463 if (nb
< 32 && ne
< 32 && sh
< 32
11464 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11467 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11468 if the mask is wrong for that. */
11469 if (code
== LSHIFTRT
)
11471 if (nb
== 63 || ne
== 0 || ne
== sh
)
11472 return !(code
== LSHIFTRT
&& nb
>= sh
);
11477 /* Return the instruction template for a shift with mask in mode MODE, with
11478 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11481 rs6000_insn_for_shift_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11485 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11486 gcc_unreachable ();
11488 if (mode
== DImode
&& ne
== 0)
11490 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11491 operands
[2] = GEN_INT (64 - INTVAL (operands
[2]));
11492 operands
[3] = GEN_INT (63 - nb
);
11494 return "rld%I2cl. %0,%1,%2,%3";
11495 return "rld%I2cl %0,%1,%2,%3";
11498 if (mode
== DImode
&& nb
== 63)
11500 operands
[3] = GEN_INT (63 - ne
);
11502 return "rld%I2cr. %0,%1,%2,%3";
11503 return "rld%I2cr %0,%1,%2,%3";
11507 && GET_CODE (operands
[4]) != LSHIFTRT
11508 && CONST_INT_P (operands
[2])
11509 && ne
== INTVAL (operands
[2]))
11511 operands
[3] = GEN_INT (63 - nb
);
11513 return "rld%I2c. %0,%1,%2,%3";
11514 return "rld%I2c %0,%1,%2,%3";
11517 if (nb
< 32 && ne
< 32)
11519 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11520 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11521 operands
[3] = GEN_INT (31 - nb
);
11522 operands
[4] = GEN_INT (31 - ne
);
11523 /* This insn can also be a 64-bit rotate with mask that really makes
11524 it just a shift right (with mask); the %h below are to adjust for
11525 that situation (shift count is >= 32 in that case). */
11527 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11528 return "rlw%I2nm %0,%1,%h2,%3,%4";
11531 gcc_unreachable ();
11534 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11535 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11536 ASHIFT, or LSHIFTRT) in mode MODE. */
11539 rs6000_is_valid_insert_mask (rtx mask
, rtx shift
, machine_mode mode
)
11543 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11546 int n
= GET_MODE_PRECISION (mode
);
11548 int sh
= INTVAL (XEXP (shift
, 1));
11549 if (sh
< 0 || sh
>= n
)
11552 rtx_code code
= GET_CODE (shift
);
11554 /* Convert any shift by 0 to a rotate, to simplify below code. */
11558 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11559 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11561 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11567 /* DImode rotates need rldimi. */
11568 if (mode
== DImode
&& code
== ROTATE
)
11571 /* SImode rotates need rlwimi. */
11572 if (mode
== SImode
&& code
== ROTATE
)
11573 return (nb
< 32 && ne
< 32 && sh
< 32);
11575 /* Wrap-around masks are only okay for rotates. */
11579 /* Don't allow ASHIFT if the mask is wrong for that. */
11580 if (code
== ASHIFT
&& ne
< sh
)
11583 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11584 if the mask is wrong for that. */
11585 if (nb
< 32 && ne
< 32 && sh
< 32
11586 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11589 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11590 if the mask is wrong for that. */
11591 if (code
== LSHIFTRT
)
11594 return !(code
== LSHIFTRT
&& nb
>= sh
);
11599 /* Return the instruction template for an insert with mask in mode MODE, with
11600 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11603 rs6000_insn_for_insert_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11607 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11608 gcc_unreachable ();
11610 /* Prefer rldimi because rlwimi is cracked. */
11611 if (TARGET_POWERPC64
11612 && (!dot
|| mode
== DImode
)
11613 && GET_CODE (operands
[4]) != LSHIFTRT
11614 && ne
== INTVAL (operands
[2]))
11616 operands
[3] = GEN_INT (63 - nb
);
11618 return "rldimi. %0,%1,%2,%3";
11619 return "rldimi %0,%1,%2,%3";
11622 if (nb
< 32 && ne
< 32)
11624 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11625 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11626 operands
[3] = GEN_INT (31 - nb
);
11627 operands
[4] = GEN_INT (31 - ne
);
11629 return "rlwimi. %0,%1,%2,%3,%4";
11630 return "rlwimi %0,%1,%2,%3,%4";
11633 gcc_unreachable ();
11636 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11637 using two machine instructions. */
11640 rs6000_is_valid_2insn_and (rtx c
, machine_mode mode
)
11642 /* There are two kinds of AND we can handle with two insns:
11643 1) those we can do with two rl* insn;
11646 We do not handle that last case yet. */
11648 /* If there is just one stretch of ones, we can do it. */
11649 if (rs6000_is_valid_mask (c
, NULL
, NULL
, mode
))
11652 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11653 one insn, we can do the whole thing with two. */
11654 unsigned HOST_WIDE_INT val
= INTVAL (c
);
11655 unsigned HOST_WIDE_INT bit1
= val
& -val
;
11656 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
11657 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
11658 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
11659 return rs6000_is_valid_and_mask (GEN_INT (val
+ bit3
- bit2
), mode
);
11662 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11663 If EXPAND is true, split rotate-and-mask instructions we generate to
11664 their constituent parts as well (this is used during expand); if DOT
11665 is 1, make the last insn a record-form instruction clobbering the
11666 destination GPR and setting the CC reg (from operands[3]); if 2, set
11667 that GPR as well as the CC reg. */
11670 rs6000_emit_2insn_and (machine_mode mode
, rtx
*operands
, bool expand
, int dot
)
11672 gcc_assert (!(expand
&& dot
));
11674 unsigned HOST_WIDE_INT val
= INTVAL (operands
[2]);
11676 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11677 shift right. This generates better code than doing the masks without
11678 shifts, or shifting first right and then left. */
11680 if (rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
) && nb
>= ne
)
11682 gcc_assert (mode
== DImode
);
11684 int shift
= 63 - nb
;
11687 rtx tmp1
= gen_reg_rtx (DImode
);
11688 rtx tmp2
= gen_reg_rtx (DImode
);
11689 emit_insn (gen_ashldi3 (tmp1
, operands
[1], GEN_INT (shift
)));
11690 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (val
<< shift
)));
11691 emit_insn (gen_lshrdi3 (operands
[0], tmp2
, GEN_INT (shift
)));
11695 rtx tmp
= gen_rtx_ASHIFT (mode
, operands
[1], GEN_INT (shift
));
11696 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (val
<< shift
));
11697 emit_move_insn (operands
[0], tmp
);
11698 tmp
= gen_rtx_LSHIFTRT (mode
, operands
[0], GEN_INT (shift
));
11699 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11704 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11705 that does the rest. */
11706 unsigned HOST_WIDE_INT bit1
= val
& -val
;
11707 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
11708 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
11709 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
11711 unsigned HOST_WIDE_INT mask1
= -bit3
+ bit2
- 1;
11712 unsigned HOST_WIDE_INT mask2
= val
+ bit3
- bit2
;
11714 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2
), mode
));
11716 /* Two "no-rotate"-and-mask instructions, for SImode. */
11717 if (rs6000_is_valid_and_mask (GEN_INT (mask1
), mode
))
11719 gcc_assert (mode
== SImode
);
11721 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
11722 rtx tmp
= gen_rtx_AND (mode
, operands
[1], GEN_INT (mask1
));
11723 emit_move_insn (reg
, tmp
);
11724 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
11725 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11729 gcc_assert (mode
== DImode
);
11731 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11732 insns; we have to do the first in SImode, because it wraps. */
11733 if (mask2
<= 0xffffffff
11734 && rs6000_is_valid_and_mask (GEN_INT (mask1
), SImode
))
11736 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
11737 rtx tmp
= gen_rtx_AND (SImode
, gen_lowpart (SImode
, operands
[1]),
11739 rtx reg_low
= gen_lowpart (SImode
, reg
);
11740 emit_move_insn (reg_low
, tmp
);
11741 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
11742 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11746 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11747 at the top end), rotate back and clear the other hole. */
11748 int right
= exact_log2 (bit3
);
11749 int left
= 64 - right
;
11751 /* Rotate the mask too. */
11752 mask1
= (mask1
>> right
) | ((bit2
- 1) << left
);
11756 rtx tmp1
= gen_reg_rtx (DImode
);
11757 rtx tmp2
= gen_reg_rtx (DImode
);
11758 rtx tmp3
= gen_reg_rtx (DImode
);
11759 emit_insn (gen_rotldi3 (tmp1
, operands
[1], GEN_INT (left
)));
11760 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (mask1
)));
11761 emit_insn (gen_rotldi3 (tmp3
, tmp2
, GEN_INT (right
)));
11762 emit_insn (gen_anddi3 (operands
[0], tmp3
, GEN_INT (mask2
)));
11766 rtx tmp
= gen_rtx_ROTATE (mode
, operands
[1], GEN_INT (left
));
11767 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask1
));
11768 emit_move_insn (operands
[0], tmp
);
11769 tmp
= gen_rtx_ROTATE (mode
, operands
[0], GEN_INT (right
));
11770 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask2
));
11771 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11775 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11776 for lfq and stfq insns iff the registers are hard registers. */
11779 registers_ok_for_quad_peep (rtx reg1
, rtx reg2
)
11781 /* We might have been passed a SUBREG. */
11782 if (!REG_P (reg1
) || !REG_P (reg2
))
11785 /* We might have been passed non floating point registers. */
11786 if (!FP_REGNO_P (REGNO (reg1
))
11787 || !FP_REGNO_P (REGNO (reg2
)))
11790 return (REGNO (reg1
) == REGNO (reg2
) - 1);
11793 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11794 addr1 and addr2 must be in consecutive memory locations
11795 (addr2 == addr1 + 8). */
11798 mems_ok_for_quad_peep (rtx mem1
, rtx mem2
)
11801 unsigned int reg1
, reg2
;
11802 int offset1
, offset2
;
11804 /* The mems cannot be volatile. */
11805 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
11808 addr1
= XEXP (mem1
, 0);
11809 addr2
= XEXP (mem2
, 0);
11811 /* Extract an offset (if used) from the first addr. */
11812 if (GET_CODE (addr1
) == PLUS
)
11814 /* If not a REG, return zero. */
11815 if (!REG_P (XEXP (addr1
, 0)))
11819 reg1
= REGNO (XEXP (addr1
, 0));
11820 /* The offset must be constant! */
11821 if (!CONST_INT_P (XEXP (addr1
, 1)))
11823 offset1
= INTVAL (XEXP (addr1
, 1));
11826 else if (!REG_P (addr1
))
11830 reg1
= REGNO (addr1
);
11831 /* This was a simple (mem (reg)) expression. Offset is 0. */
11835 /* And now for the second addr. */
11836 if (GET_CODE (addr2
) == PLUS
)
11838 /* If not a REG, return zero. */
11839 if (!REG_P (XEXP (addr2
, 0)))
11843 reg2
= REGNO (XEXP (addr2
, 0));
11844 /* The offset must be constant. */
11845 if (!CONST_INT_P (XEXP (addr2
, 1)))
11847 offset2
= INTVAL (XEXP (addr2
, 1));
11850 else if (!REG_P (addr2
))
11854 reg2
= REGNO (addr2
);
11855 /* This was a simple (mem (reg)) expression. Offset is 0. */
11859 /* Both of these must have the same base register. */
11863 /* The offset for the second addr must be 8 more than the first addr. */
11864 if (offset2
!= offset1
+ 8)
11867 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
11872 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
11873 need to use DDmode, in all other cases we can use the same mode. */
11874 static machine_mode
11875 rs6000_secondary_memory_needed_mode (machine_mode mode
)
11877 if (lra_in_progress
&& mode
== SDmode
)
11882 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
11883 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11884 only work on the traditional altivec registers, note if an altivec register
11887 static enum rs6000_reg_type
11888 register_to_reg_type (rtx reg
, bool *is_altivec
)
11890 HOST_WIDE_INT regno
;
11891 enum reg_class rclass
;
11893 if (SUBREG_P (reg
))
11894 reg
= SUBREG_REG (reg
);
11897 return NO_REG_TYPE
;
11899 regno
= REGNO (reg
);
11900 if (!HARD_REGISTER_NUM_P (regno
))
11902 if (!lra_in_progress
&& !reload_completed
)
11903 return PSEUDO_REG_TYPE
;
11905 regno
= true_regnum (reg
);
11906 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
))
11907 return PSEUDO_REG_TYPE
;
11910 gcc_assert (regno
>= 0);
11912 if (is_altivec
&& ALTIVEC_REGNO_P (regno
))
11913 *is_altivec
= true;
11915 rclass
= rs6000_regno_regclass
[regno
];
11916 return reg_class_to_reg_type
[(int)rclass
];
11919 /* Helper function to return the cost of adding a TOC entry address. */
11922 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask
)
11926 if (TARGET_CMODEL
!= CMODEL_SMALL
)
11927 ret
= ((addr_mask
& RELOAD_REG_OFFSET
) == 0) ? 1 : 2;
11930 ret
= (TARGET_MINIMAL_TOC
) ? 6 : 3;
11935 /* Helper function for rs6000_secondary_reload to determine whether the memory
11936 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
11937 needs reloading. Return negative if the memory is not handled by the memory
11938 helper functions and to try a different reload method, 0 if no additional
11939 instructions are need, and positive to give the extra cost for the
11943 rs6000_secondary_reload_memory (rtx addr
,
11944 enum reg_class rclass
,
11947 int extra_cost
= 0;
11948 rtx reg
, and_arg
, plus_arg0
, plus_arg1
;
11949 addr_mask_type addr_mask
;
11950 const char *type
= NULL
;
11951 const char *fail_msg
= NULL
;
11953 if (GPR_REG_CLASS_P (rclass
))
11954 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
11956 else if (rclass
== FLOAT_REGS
)
11957 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
11959 else if (rclass
== ALTIVEC_REGS
)
11960 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
11962 /* For the combined VSX_REGS, turn off Altivec AND -16. */
11963 else if (rclass
== VSX_REGS
)
11964 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
]
11965 & ~RELOAD_REG_AND_M16
);
11967 /* If the register allocator hasn't made up its mind yet on the register
11968 class to use, settle on defaults to use. */
11969 else if (rclass
== NO_REGS
)
11971 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
]
11972 & ~RELOAD_REG_AND_M16
);
11974 if ((addr_mask
& RELOAD_REG_MULTIPLE
) != 0)
11975 addr_mask
&= ~(RELOAD_REG_INDEXED
11976 | RELOAD_REG_PRE_INCDEC
11977 | RELOAD_REG_PRE_MODIFY
);
11983 /* If the register isn't valid in this register class, just return now. */
11984 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
11986 if (TARGET_DEBUG_ADDR
)
11989 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11990 "not valid in class\n",
11991 GET_MODE_NAME (mode
), reg_class_names
[rclass
]);
11998 switch (GET_CODE (addr
))
12000 /* Does the register class supports auto update forms for this mode? We
12001 don't need a scratch register, since the powerpc only supports
12002 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12005 reg
= XEXP (addr
, 0);
12006 if (!base_reg_operand (addr
, GET_MODE (reg
)))
12008 fail_msg
= "no base register #1";
12012 else if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
12020 reg
= XEXP (addr
, 0);
12021 plus_arg1
= XEXP (addr
, 1);
12022 if (!base_reg_operand (reg
, GET_MODE (reg
))
12023 || GET_CODE (plus_arg1
) != PLUS
12024 || !rtx_equal_p (reg
, XEXP (plus_arg1
, 0)))
12026 fail_msg
= "bad PRE_MODIFY";
12030 else if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
12037 /* Do we need to simulate AND -16 to clear the bottom address bits used
12038 in VMX load/stores? Only allow the AND for vector sizes. */
12040 and_arg
= XEXP (addr
, 0);
12041 if (GET_MODE_SIZE (mode
) != 16
12042 || !CONST_INT_P (XEXP (addr
, 1))
12043 || INTVAL (XEXP (addr
, 1)) != -16)
12045 fail_msg
= "bad Altivec AND #1";
12049 if (rclass
!= ALTIVEC_REGS
)
12051 if (legitimate_indirect_address_p (and_arg
, false))
12054 else if (legitimate_indexed_address_p (and_arg
, false))
12059 fail_msg
= "bad Altivec AND #2";
12067 /* If this is an indirect address, make sure it is a base register. */
12070 if (!legitimate_indirect_address_p (addr
, false))
12077 /* If this is an indexed address, make sure the register class can handle
12078 indexed addresses for this mode. */
12080 plus_arg0
= XEXP (addr
, 0);
12081 plus_arg1
= XEXP (addr
, 1);
12083 /* (plus (plus (reg) (constant)) (constant)) is generated during
12084 push_reload processing, so handle it now. */
12085 if (GET_CODE (plus_arg0
) == PLUS
&& CONST_INT_P (plus_arg1
))
12087 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12094 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12095 push_reload processing, so handle it now. */
12096 else if (GET_CODE (plus_arg0
) == PLUS
&& REG_P (plus_arg1
))
12098 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12101 type
= "indexed #2";
12105 else if (!base_reg_operand (plus_arg0
, GET_MODE (plus_arg0
)))
12107 fail_msg
= "no base register #2";
12111 else if (int_reg_operand (plus_arg1
, GET_MODE (plus_arg1
)))
12113 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0
12114 || !legitimate_indexed_address_p (addr
, false))
12121 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0
12122 && CONST_INT_P (plus_arg1
))
12124 if (!quad_address_offset_p (INTVAL (plus_arg1
)))
12127 type
= "vector d-form offset";
12131 /* Make sure the register class can handle offset addresses. */
12132 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
12134 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12137 type
= "offset #2";
12143 fail_msg
= "bad PLUS";
12150 /* Quad offsets are restricted and can't handle normal addresses. */
12151 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12154 type
= "vector d-form lo_sum";
12157 else if (!legitimate_lo_sum_address_p (mode
, addr
, false))
12159 fail_msg
= "bad LO_SUM";
12163 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12170 /* Static addresses need to create a TOC entry. */
12174 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12177 type
= "vector d-form lo_sum #2";
12183 extra_cost
= rs6000_secondary_reload_toc_costs (addr_mask
);
12187 /* TOC references look like offsetable memory. */
12189 if (TARGET_CMODEL
== CMODEL_SMALL
|| XINT (addr
, 1) != UNSPEC_TOCREL
)
12191 fail_msg
= "bad UNSPEC";
12195 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12198 type
= "vector d-form lo_sum #3";
12201 else if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12204 type
= "toc reference";
12210 fail_msg
= "bad address";
12215 if (TARGET_DEBUG_ADDR
/* && extra_cost != 0 */)
12217 if (extra_cost
< 0)
12219 "rs6000_secondary_reload_memory error: mode = %s, "
12220 "class = %s, addr_mask = '%s', %s\n",
12221 GET_MODE_NAME (mode
),
12222 reg_class_names
[rclass
],
12223 rs6000_debug_addr_mask (addr_mask
, false),
12224 (fail_msg
!= NULL
) ? fail_msg
: "<bad address>");
12228 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12229 "addr_mask = '%s', extra cost = %d, %s\n",
12230 GET_MODE_NAME (mode
),
12231 reg_class_names
[rclass
],
12232 rs6000_debug_addr_mask (addr_mask
, false),
12234 (type
) ? type
: "<none>");
12242 /* Helper function for rs6000_secondary_reload to return true if a move to a
12243 different register classe is really a simple move. */
12246 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type
,
12247 enum rs6000_reg_type from_type
,
12250 int size
= GET_MODE_SIZE (mode
);
12252 /* Add support for various direct moves available. In this function, we only
12253 look at cases where we don't need any extra registers, and one or more
12254 simple move insns are issued. Originally small integers are not allowed
12255 in FPR/VSX registers. Single precision binary floating is not a simple
12256 move because we need to convert to the single precision memory layout.
12257 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12258 need special direct move handling, which we do not support yet. */
12259 if (TARGET_DIRECT_MOVE
12260 && ((to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12261 || (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12263 if (TARGET_POWERPC64
)
12265 /* ISA 2.07: MTVSRD or MVFVSRD. */
12269 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12270 if (size
== 16 && TARGET_P9_VECTOR
&& mode
!= TDmode
)
12274 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12275 if (TARGET_P8_VECTOR
)
12277 if (mode
== SImode
)
12280 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
12284 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12285 if (mode
== SDmode
)
12289 /* Move to/from SPR. */
12290 else if ((size
== 4 || (TARGET_POWERPC64
&& size
== 8))
12291 && ((to_type
== GPR_REG_TYPE
&& from_type
== SPR_REG_TYPE
)
12292 || (to_type
== SPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12298 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12299 special direct moves that involve allocating an extra register, return the
12300 insn code of the helper function if there is such a function or
12301 CODE_FOR_nothing if not. */
12304 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type
,
12305 enum rs6000_reg_type from_type
,
12307 secondary_reload_info
*sri
,
12311 enum insn_code icode
= CODE_FOR_nothing
;
12313 int size
= GET_MODE_SIZE (mode
);
12315 if (TARGET_POWERPC64
&& size
== 16)
12317 /* Handle moving 128-bit values from GPRs to VSX point registers on
12318 ISA 2.07 (power8, power9) when running in 64-bit mode using
12319 XXPERMDI to glue the two 64-bit values back together. */
12320 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12322 cost
= 3; /* 2 mtvsrd's, 1 xxpermdi. */
12323 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12326 /* Handle moving 128-bit values from VSX point registers to GPRs on
12327 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12328 bottom 64-bit value. */
12329 else if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12331 cost
= 3; /* 2 mfvsrd's, 1 xxpermdi. */
12332 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12336 else if (TARGET_POWERPC64
&& mode
== SFmode
)
12338 if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12340 cost
= 3; /* xscvdpspn, mfvsrd, and. */
12341 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12344 else if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12346 cost
= 2; /* mtvsrz, xscvspdpn. */
12347 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12351 else if (!TARGET_POWERPC64
&& size
== 8)
12353 /* Handle moving 64-bit values from GPRs to floating point registers on
12354 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12355 32-bit values back together. Altivec register classes must be handled
12356 specially since a different instruction is used, and the secondary
12357 reload support requires a single instruction class in the scratch
12358 register constraint. However, right now TFmode is not allowed in
12359 Altivec registers, so the pattern will never match. */
12360 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
&& !altivec_p
)
12362 cost
= 3; /* 2 mtvsrwz's, 1 fmrgow. */
12363 icode
= reg_addr
[mode
].reload_fpr_gpr
;
12367 if (icode
!= CODE_FOR_nothing
)
12372 sri
->icode
= icode
;
12373 sri
->extra_cost
= cost
;
12380 /* Return whether a move between two register classes can be done either
12381 directly (simple move) or via a pattern that uses a single extra temporary
12382 (using ISA 2.07's direct move in this case. */
12385 rs6000_secondary_reload_move (enum rs6000_reg_type to_type
,
12386 enum rs6000_reg_type from_type
,
12388 secondary_reload_info
*sri
,
12391 /* Fall back to load/store reloads if either type is not a register. */
12392 if (to_type
== NO_REG_TYPE
|| from_type
== NO_REG_TYPE
)
12395 /* If we haven't allocated registers yet, assume the move can be done for the
12396 standard register types. */
12397 if ((to_type
== PSEUDO_REG_TYPE
&& from_type
== PSEUDO_REG_TYPE
)
12398 || (to_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (from_type
))
12399 || (from_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (to_type
)))
12402 /* Moves to the same set of registers is a simple move for non-specialized
12404 if (to_type
== from_type
&& IS_STD_REG_TYPE (to_type
))
12407 /* Check whether a simple move can be done directly. */
12408 if (rs6000_secondary_reload_simple_move (to_type
, from_type
, mode
))
12412 sri
->icode
= CODE_FOR_nothing
;
12413 sri
->extra_cost
= 0;
12418 /* Now check if we can do it in a few steps. */
12419 return rs6000_secondary_reload_direct_move (to_type
, from_type
, mode
, sri
,
12423 /* Inform reload about cases where moving X with a mode MODE to a register in
12424 RCLASS requires an extra scratch or immediate register. Return the class
12425 needed for the immediate register.
12427 For VSX and Altivec, we may need a register to convert sp+offset into
12430 For misaligned 64-bit gpr loads and stores we need a register to
12431 convert an offset address to indirect. */
12434 rs6000_secondary_reload (bool in_p
,
12436 reg_class_t rclass_i
,
12438 secondary_reload_info
*sri
)
12440 enum reg_class rclass
= (enum reg_class
) rclass_i
;
12441 reg_class_t ret
= ALL_REGS
;
12442 enum insn_code icode
;
12443 bool default_p
= false;
12444 bool done_p
= false;
12446 /* Allow subreg of memory before/during reload. */
12447 bool memory_p
= (MEM_P (x
)
12448 || (!reload_completed
&& SUBREG_P (x
)
12449 && MEM_P (SUBREG_REG (x
))));
12451 sri
->icode
= CODE_FOR_nothing
;
12452 sri
->t_icode
= CODE_FOR_nothing
;
12453 sri
->extra_cost
= 0;
12455 ? reg_addr
[mode
].reload_load
12456 : reg_addr
[mode
].reload_store
);
12458 if (REG_P (x
) || register_operand (x
, mode
))
12460 enum rs6000_reg_type to_type
= reg_class_to_reg_type
[(int)rclass
];
12461 bool altivec_p
= (rclass
== ALTIVEC_REGS
);
12462 enum rs6000_reg_type from_type
= register_to_reg_type (x
, &altivec_p
);
12465 std::swap (to_type
, from_type
);
12467 /* Can we do a direct move of some sort? */
12468 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
, sri
,
12471 icode
= (enum insn_code
)sri
->icode
;
12478 /* Make sure 0.0 is not reloaded or forced into memory. */
12479 if (x
== CONST0_RTX (mode
) && VSX_REG_CLASS_P (rclass
))
12486 /* If this is a scalar floating point value and we want to load it into the
12487 traditional Altivec registers, do it via a move via a traditional floating
12488 point register, unless we have D-form addressing. Also make sure that
12489 non-zero constants use a FPR. */
12490 if (!done_p
&& reg_addr
[mode
].scalar_in_vmx_p
12491 && !mode_supports_vmx_dform (mode
)
12492 && (rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
12493 && (memory_p
|| CONST_DOUBLE_P (x
)))
12500 /* Handle reload of load/stores if we have reload helper functions. */
12501 if (!done_p
&& icode
!= CODE_FOR_nothing
&& memory_p
)
12503 int extra_cost
= rs6000_secondary_reload_memory (XEXP (x
, 0), rclass
,
12506 if (extra_cost
>= 0)
12510 if (extra_cost
> 0)
12512 sri
->extra_cost
= extra_cost
;
12513 sri
->icode
= icode
;
12518 /* Handle unaligned loads and stores of integer registers. */
12519 if (!done_p
&& TARGET_POWERPC64
12520 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12522 && GET_MODE_SIZE (GET_MODE (x
)) >= UNITS_PER_WORD
)
12524 rtx addr
= XEXP (x
, 0);
12525 rtx off
= address_offset (addr
);
12527 if (off
!= NULL_RTX
)
12529 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12530 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12532 /* We need a secondary reload when our legitimate_address_p
12533 says the address is good (as otherwise the entire address
12534 will be reloaded), and the offset is not a multiple of
12535 four or we have an address wrap. Address wrap will only
12536 occur for LO_SUMs since legitimate_offset_address_p
12537 rejects addresses for 16-byte mems that will wrap. */
12538 if (GET_CODE (addr
) == LO_SUM
12539 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12540 && ((offset
& 3) != 0
12541 || ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
))
12542 : (offset
+ 0x8000 < 0x10000 - extra
/* legitimate_address_p */
12543 && (offset
& 3) != 0))
12545 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12547 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_load
12548 : CODE_FOR_reload_di_load
);
12550 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_store
12551 : CODE_FOR_reload_di_store
);
12552 sri
->extra_cost
= 2;
12563 if (!done_p
&& !TARGET_POWERPC64
12564 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12566 && GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
12568 rtx addr
= XEXP (x
, 0);
12569 rtx off
= address_offset (addr
);
12571 if (off
!= NULL_RTX
)
12573 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12574 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12576 /* We need a secondary reload when our legitimate_address_p
12577 says the address is good (as otherwise the entire address
12578 will be reloaded), and we have a wrap.
12580 legitimate_lo_sum_address_p allows LO_SUM addresses to
12581 have any offset so test for wrap in the low 16 bits.
12583 legitimate_offset_address_p checks for the range
12584 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12585 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12586 [0x7ff4,0x7fff] respectively, so test for the
12587 intersection of these ranges, [0x7ffc,0x7fff] and
12588 [0x7ff4,0x7ff7] respectively.
12590 Note that the address we see here may have been
12591 manipulated by legitimize_reload_address. */
12592 if (GET_CODE (addr
) == LO_SUM
12593 ? ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
12594 : offset
- (0x8000 - extra
) < UNITS_PER_WORD
)
12597 sri
->icode
= CODE_FOR_reload_si_load
;
12599 sri
->icode
= CODE_FOR_reload_si_store
;
12600 sri
->extra_cost
= 2;
12615 ret
= default_secondary_reload (in_p
, x
, rclass
, mode
, sri
);
12617 gcc_assert (ret
!= ALL_REGS
);
12619 if (TARGET_DEBUG_ADDR
)
12622 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12624 reg_class_names
[ret
],
12625 in_p
? "true" : "false",
12626 reg_class_names
[rclass
],
12627 GET_MODE_NAME (mode
));
12629 if (reload_completed
)
12630 fputs (", after reload", stderr
);
12633 fputs (", done_p not set", stderr
);
12636 fputs (", default secondary reload", stderr
);
12638 if (sri
->icode
!= CODE_FOR_nothing
)
12639 fprintf (stderr
, ", reload func = %s, extra cost = %d",
12640 insn_data
[sri
->icode
].name
, sri
->extra_cost
);
12642 else if (sri
->extra_cost
> 0)
12643 fprintf (stderr
, ", extra cost = %d", sri
->extra_cost
);
12645 fputs ("\n", stderr
);
12652 /* Better tracing for rs6000_secondary_reload_inner. */
12655 rs6000_secondary_reload_trace (int line
, rtx reg
, rtx mem
, rtx scratch
,
12660 gcc_assert (reg
!= NULL_RTX
&& mem
!= NULL_RTX
&& scratch
!= NULL_RTX
);
12662 fprintf (stderr
, "rs6000_secondary_reload_inner:%d, type = %s\n", line
,
12663 store_p
? "store" : "load");
12666 set
= gen_rtx_SET (mem
, reg
);
12668 set
= gen_rtx_SET (reg
, mem
);
12670 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
12671 debug_rtx (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
12674 static void rs6000_secondary_reload_fail (int, rtx
, rtx
, rtx
, bool)
12675 ATTRIBUTE_NORETURN
;
12678 rs6000_secondary_reload_fail (int line
, rtx reg
, rtx mem
, rtx scratch
,
12681 rs6000_secondary_reload_trace (line
, reg
, mem
, scratch
, store_p
);
12682 gcc_unreachable ();
12685 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12686 reload helper functions. These were identified in
12687 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12688 reload, it calls the insns:
12689 reload_<RELOAD:mode>_<P:mptrsize>_store
12690 reload_<RELOAD:mode>_<P:mptrsize>_load
12692 which in turn calls this function, to do whatever is necessary to create
12693 valid addresses. */
12696 rs6000_secondary_reload_inner (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
12698 int regno
= true_regnum (reg
);
12699 machine_mode mode
= GET_MODE (reg
);
12700 addr_mask_type addr_mask
;
12703 rtx op_reg
, op0
, op1
;
12708 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
) || !MEM_P (mem
)
12709 || !base_reg_operand (scratch
, GET_MODE (scratch
)))
12710 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12712 if (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
))
12713 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
12715 else if (IN_RANGE (regno
, FIRST_FPR_REGNO
, LAST_FPR_REGNO
))
12716 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
12718 else if (IN_RANGE (regno
, FIRST_ALTIVEC_REGNO
, LAST_ALTIVEC_REGNO
))
12719 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
12722 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12724 /* Make sure the mode is valid in this register class. */
12725 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
12726 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12728 if (TARGET_DEBUG_ADDR
)
12729 rs6000_secondary_reload_trace (__LINE__
, reg
, mem
, scratch
, store_p
);
12731 new_addr
= addr
= XEXP (mem
, 0);
12732 switch (GET_CODE (addr
))
12734 /* Does the register class support auto update forms for this mode? If
12735 not, do the update now. We don't need a scratch register, since the
12736 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12739 op_reg
= XEXP (addr
, 0);
12740 if (!base_reg_operand (op_reg
, Pmode
))
12741 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12743 if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
12745 int delta
= GET_MODE_SIZE (mode
);
12746 if (GET_CODE (addr
) == PRE_DEC
)
12748 emit_insn (gen_add2_insn (op_reg
, GEN_INT (delta
)));
12754 op0
= XEXP (addr
, 0);
12755 op1
= XEXP (addr
, 1);
12756 if (!base_reg_operand (op0
, Pmode
)
12757 || GET_CODE (op1
) != PLUS
12758 || !rtx_equal_p (op0
, XEXP (op1
, 0)))
12759 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12761 if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
12763 emit_insn (gen_rtx_SET (op0
, op1
));
12768 /* Do we need to simulate AND -16 to clear the bottom address bits used
12769 in VMX load/stores? */
12771 op0
= XEXP (addr
, 0);
12772 op1
= XEXP (addr
, 1);
12773 if ((addr_mask
& RELOAD_REG_AND_M16
) == 0)
12775 if (REG_P (op0
) || SUBREG_P (op0
))
12778 else if (GET_CODE (op1
) == PLUS
)
12780 emit_insn (gen_rtx_SET (scratch
, op1
));
12785 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12787 and_op
= gen_rtx_AND (GET_MODE (scratch
), op_reg
, op1
);
12788 cc_clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (CCmode
));
12789 rv
= gen_rtvec (2, gen_rtx_SET (scratch
, and_op
), cc_clobber
);
12790 emit_insn (gen_rtx_PARALLEL (VOIDmode
, rv
));
12791 new_addr
= scratch
;
12795 /* If this is an indirect address, make sure it is a base register. */
12798 if (!base_reg_operand (addr
, GET_MODE (addr
)))
12800 emit_insn (gen_rtx_SET (scratch
, addr
));
12801 new_addr
= scratch
;
12805 /* If this is an indexed address, make sure the register class can handle
12806 indexed addresses for this mode. */
12808 op0
= XEXP (addr
, 0);
12809 op1
= XEXP (addr
, 1);
12810 if (!base_reg_operand (op0
, Pmode
))
12811 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12813 else if (int_reg_operand (op1
, Pmode
))
12815 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12817 emit_insn (gen_rtx_SET (scratch
, addr
));
12818 new_addr
= scratch
;
12822 else if (mode_supports_dq_form (mode
) && CONST_INT_P (op1
))
12824 if (((addr_mask
& RELOAD_REG_QUAD_OFFSET
) == 0)
12825 || !quad_address_p (addr
, mode
, false))
12827 emit_insn (gen_rtx_SET (scratch
, addr
));
12828 new_addr
= scratch
;
12832 /* Make sure the register class can handle offset addresses. */
12833 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
12835 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12837 emit_insn (gen_rtx_SET (scratch
, addr
));
12838 new_addr
= scratch
;
12843 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12848 op0
= XEXP (addr
, 0);
12849 op1
= XEXP (addr
, 1);
12850 if (!base_reg_operand (op0
, Pmode
))
12851 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12853 else if (int_reg_operand (op1
, Pmode
))
12855 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12857 emit_insn (gen_rtx_SET (scratch
, addr
));
12858 new_addr
= scratch
;
12862 /* Quad offsets are restricted and can't handle normal addresses. */
12863 else if (mode_supports_dq_form (mode
))
12865 emit_insn (gen_rtx_SET (scratch
, addr
));
12866 new_addr
= scratch
;
12869 /* Make sure the register class can handle offset addresses. */
12870 else if (legitimate_lo_sum_address_p (mode
, addr
, false))
12872 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12874 emit_insn (gen_rtx_SET (scratch
, addr
));
12875 new_addr
= scratch
;
12880 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12887 rs6000_emit_move (scratch
, addr
, Pmode
);
12888 new_addr
= scratch
;
12892 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12895 /* Adjust the address if it changed. */
12896 if (addr
!= new_addr
)
12898 mem
= replace_equiv_address_nv (mem
, new_addr
);
12899 if (TARGET_DEBUG_ADDR
)
12900 fprintf (stderr
, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12903 /* Now create the move. */
12905 emit_insn (gen_rtx_SET (mem
, reg
));
12907 emit_insn (gen_rtx_SET (reg
, mem
));
12912 /* Convert reloads involving 64-bit gprs and misaligned offset
12913 addressing, or multiple 32-bit gprs and offsets that are too large,
12914 to use indirect addressing. */
12917 rs6000_secondary_reload_gpr (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
12919 int regno
= true_regnum (reg
);
12920 enum reg_class rclass
;
12922 rtx scratch_or_premodify
= scratch
;
12924 if (TARGET_DEBUG_ADDR
)
12926 fprintf (stderr
, "\nrs6000_secondary_reload_gpr, type = %s\n",
12927 store_p
? "store" : "load");
12928 fprintf (stderr
, "reg:\n");
12930 fprintf (stderr
, "mem:\n");
12932 fprintf (stderr
, "scratch:\n");
12933 debug_rtx (scratch
);
12936 gcc_assert (regno
>= 0 && HARD_REGISTER_NUM_P (regno
));
12937 gcc_assert (MEM_P (mem
));
12938 rclass
= REGNO_REG_CLASS (regno
);
12939 gcc_assert (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
);
12940 addr
= XEXP (mem
, 0);
12942 if (GET_CODE (addr
) == PRE_MODIFY
)
12944 gcc_assert (REG_P (XEXP (addr
, 0))
12945 && GET_CODE (XEXP (addr
, 1)) == PLUS
12946 && XEXP (XEXP (addr
, 1), 0) == XEXP (addr
, 0));
12947 scratch_or_premodify
= XEXP (addr
, 0);
12948 addr
= XEXP (addr
, 1);
12950 gcc_assert (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
);
12952 rs6000_emit_move (scratch_or_premodify
, addr
, Pmode
);
12954 mem
= replace_equiv_address_nv (mem
, scratch_or_premodify
);
12956 /* Now create the move. */
12958 emit_insn (gen_rtx_SET (mem
, reg
));
12960 emit_insn (gen_rtx_SET (reg
, mem
));
12965 /* Given an rtx X being reloaded into a reg required to be
12966 in class CLASS, return the class of reg to actually use.
12967 In general this is just CLASS; but on some machines
12968 in some cases it is preferable to use a more restrictive class.
12970 On the RS/6000, we have to return NO_REGS when we want to reload a
12971 floating-point CONST_DOUBLE to force it to be copied to memory.
12973 We also don't want to reload integer values into floating-point
12974 registers if we can at all help it. In fact, this can
12975 cause reload to die, if it tries to generate a reload of CTR
12976 into a FP register and discovers it doesn't have the memory location
12979 ??? Would it be a good idea to have reload do the converse, that is
12980 try to reload floating modes into FP registers if possible?
12983 static enum reg_class
12984 rs6000_preferred_reload_class (rtx x
, enum reg_class rclass
)
12986 machine_mode mode
= GET_MODE (x
);
12987 bool is_constant
= CONSTANT_P (x
);
12989 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12990 reload class for it. */
12991 if ((rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
12992 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
) == 0)
12995 if ((rclass
== FLOAT_REGS
|| rclass
== VSX_REGS
)
12996 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
] & RELOAD_REG_VALID
) == 0)
12999 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13000 the reloading of address expressions using PLUS into floating point
13002 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
) && GET_CODE (x
) != PLUS
)
13006 /* Zero is always allowed in all VSX registers. */
13007 if (x
== CONST0_RTX (mode
))
13010 /* If this is a vector constant that can be formed with a few Altivec
13011 instructions, we want altivec registers. */
13012 if (GET_CODE (x
) == CONST_VECTOR
&& easy_vector_constant (x
, mode
))
13013 return ALTIVEC_REGS
;
13015 /* If this is an integer constant that can easily be loaded into
13016 vector registers, allow it. */
13017 if (CONST_INT_P (x
))
13019 HOST_WIDE_INT value
= INTVAL (x
);
13021 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13022 2.06 can generate it in the Altivec registers with
13026 if (TARGET_P8_VECTOR
)
13028 else if (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
13029 return ALTIVEC_REGS
;
13034 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13035 a sign extend in the Altivec registers. */
13036 if (IN_RANGE (value
, -128, 127) && TARGET_P9_VECTOR
13037 && (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
))
13038 return ALTIVEC_REGS
;
13041 /* Force constant to memory. */
13045 /* D-form addressing can easily reload the value. */
13046 if (mode_supports_vmx_dform (mode
)
13047 || mode_supports_dq_form (mode
))
13050 /* If this is a scalar floating point value and we don't have D-form
13051 addressing, prefer the traditional floating point registers so that we
13052 can use D-form (register+offset) addressing. */
13053 if (rclass
== VSX_REGS
13054 && (mode
== SFmode
|| GET_MODE_SIZE (mode
) == 8))
13057 /* Prefer the Altivec registers if Altivec is handling the vector
13058 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13060 if (VECTOR_UNIT_ALTIVEC_P (mode
) || VECTOR_MEM_ALTIVEC_P (mode
)
13061 || mode
== V1TImode
)
13062 return ALTIVEC_REGS
;
13067 if (is_constant
|| GET_CODE (x
) == PLUS
)
13069 if (reg_class_subset_p (GENERAL_REGS
, rclass
))
13070 return GENERAL_REGS
;
13071 if (reg_class_subset_p (BASE_REGS
, rclass
))
13076 /* For the vector pair and vector quad modes, prefer their natural register
13077 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13078 the GPR registers. */
13079 if (rclass
== GEN_OR_FLOAT_REGS
)
13081 if (mode
== OOmode
)
13084 if (mode
== XOmode
)
13087 if (GET_MODE_CLASS (mode
) == MODE_INT
)
13088 return GENERAL_REGS
;
13094 /* Debug version of rs6000_preferred_reload_class. */
13095 static enum reg_class
13096 rs6000_debug_preferred_reload_class (rtx x
, enum reg_class rclass
)
13098 enum reg_class ret
= rs6000_preferred_reload_class (x
, rclass
);
13101 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13103 reg_class_names
[ret
], reg_class_names
[rclass
],
13104 GET_MODE_NAME (GET_MODE (x
)));
13110 /* If we are copying between FP or AltiVec registers and anything else, we need
13111 a memory location. The exception is when we are targeting ppc64 and the
13112 move to/from fpr to gpr instructions are available. Also, under VSX, you
13113 can copy vector registers from the FP register set to the Altivec register
13114 set and vice versa. */
13117 rs6000_secondary_memory_needed (machine_mode mode
,
13118 reg_class_t from_class
,
13119 reg_class_t to_class
)
13121 enum rs6000_reg_type from_type
, to_type
;
13122 bool altivec_p
= ((from_class
== ALTIVEC_REGS
)
13123 || (to_class
== ALTIVEC_REGS
));
13125 /* If a simple/direct move is available, we don't need secondary memory */
13126 from_type
= reg_class_to_reg_type
[(int)from_class
];
13127 to_type
= reg_class_to_reg_type
[(int)to_class
];
13129 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
,
13130 (secondary_reload_info
*)0, altivec_p
))
13133 /* If we have a floating point or vector register class, we need to use
13134 memory to transfer the data. */
13135 if (IS_FP_VECT_REG_TYPE (from_type
) || IS_FP_VECT_REG_TYPE (to_type
))
13141 /* Debug version of rs6000_secondary_memory_needed. */
13143 rs6000_debug_secondary_memory_needed (machine_mode mode
,
13144 reg_class_t from_class
,
13145 reg_class_t to_class
)
13147 bool ret
= rs6000_secondary_memory_needed (mode
, from_class
, to_class
);
13150 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13151 "to_class = %s, mode = %s\n",
13152 ret
? "true" : "false",
13153 reg_class_names
[from_class
],
13154 reg_class_names
[to_class
],
13155 GET_MODE_NAME (mode
));
13160 /* Return the register class of a scratch register needed to copy IN into
13161 or out of a register in RCLASS in MODE. If it can be done directly,
13162 NO_REGS is returned. */
13164 static enum reg_class
13165 rs6000_secondary_reload_class (enum reg_class rclass
, machine_mode mode
,
13170 if (TARGET_ELF
|| (DEFAULT_ABI
== ABI_DARWIN
13172 && MACHOPIC_INDIRECT
13176 /* We cannot copy a symbolic operand directly into anything
13177 other than BASE_REGS for TARGET_ELF. So indicate that a
13178 register from BASE_REGS is needed as an intermediate
13181 On Darwin, pic addresses require a load from memory, which
13182 needs a base register. */
13183 if (rclass
!= BASE_REGS
13184 && (SYMBOL_REF_P (in
)
13185 || GET_CODE (in
) == HIGH
13186 || GET_CODE (in
) == LABEL_REF
13187 || GET_CODE (in
) == CONST
))
13193 regno
= REGNO (in
);
13194 if (!HARD_REGISTER_NUM_P (regno
))
13196 regno
= true_regnum (in
);
13197 if (!HARD_REGISTER_NUM_P (regno
))
13201 else if (SUBREG_P (in
))
13203 regno
= true_regnum (in
);
13204 if (!HARD_REGISTER_NUM_P (regno
))
13210 /* If we have VSX register moves, prefer moving scalar values between
13211 Altivec registers and GPR by going via an FPR (and then via memory)
13212 instead of reloading the secondary memory address for Altivec moves. */
13214 && GET_MODE_SIZE (mode
) < 16
13215 && !mode_supports_vmx_dform (mode
)
13216 && (((rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
)
13217 && (regno
>= 0 && ALTIVEC_REGNO_P (regno
)))
13218 || ((rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
13219 && (regno
>= 0 && INT_REGNO_P (regno
)))))
13222 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13224 if (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
13225 || (regno
>= 0 && INT_REGNO_P (regno
)))
13228 /* Constants, memory, and VSX registers can go into VSX registers (both the
13229 traditional floating point and the altivec registers). */
13230 if (rclass
== VSX_REGS
13231 && (regno
== -1 || VSX_REGNO_P (regno
)))
13234 /* Constants, memory, and FP registers can go into FP registers. */
13235 if ((regno
== -1 || FP_REGNO_P (regno
))
13236 && (rclass
== FLOAT_REGS
|| rclass
== GEN_OR_FLOAT_REGS
))
13237 return (mode
!= SDmode
|| lra_in_progress
) ? NO_REGS
: GENERAL_REGS
;
13239 /* Memory, and AltiVec registers can go into AltiVec registers. */
13240 if ((regno
== -1 || ALTIVEC_REGNO_P (regno
))
13241 && rclass
== ALTIVEC_REGS
)
13244 /* We can copy among the CR registers. */
13245 if ((rclass
== CR_REGS
|| rclass
== CR0_REGS
)
13246 && regno
>= 0 && CR_REGNO_P (regno
))
13249 /* Otherwise, we need GENERAL_REGS. */
13250 return GENERAL_REGS
;
13253 /* Debug version of rs6000_secondary_reload_class. */
13254 static enum reg_class
13255 rs6000_debug_secondary_reload_class (enum reg_class rclass
,
13256 machine_mode mode
, rtx in
)
13258 enum reg_class ret
= rs6000_secondary_reload_class (rclass
, mode
, in
);
13260 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13261 "mode = %s, input rtx:\n",
13262 reg_class_names
[ret
], reg_class_names
[rclass
],
13263 GET_MODE_NAME (mode
));
13269 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13272 rs6000_can_change_mode_class (machine_mode from
,
13274 reg_class_t rclass
)
13276 unsigned from_size
= GET_MODE_SIZE (from
);
13277 unsigned to_size
= GET_MODE_SIZE (to
);
13279 if (from_size
!= to_size
)
13281 enum reg_class xclass
= (TARGET_VSX
) ? VSX_REGS
: FLOAT_REGS
;
13283 if (reg_classes_intersect_p (xclass
, rclass
))
13285 unsigned to_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, to
);
13286 unsigned from_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, from
);
13287 bool to_float128_vector_p
= FLOAT128_VECTOR_P (to
);
13288 bool from_float128_vector_p
= FLOAT128_VECTOR_P (from
);
13290 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13291 single register under VSX because the scalar part of the register
13292 is in the upper 64-bits, and not the lower 64-bits. Types like
13293 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13294 IEEE floating point can't overlap, and neither can small
13297 if (to_float128_vector_p
&& from_float128_vector_p
)
13300 else if (to_float128_vector_p
|| from_float128_vector_p
)
13303 /* TDmode in floating-mode registers must always go into a register
13304 pair with the most significant word in the even-numbered register
13305 to match ISA requirements. In little-endian mode, this does not
13306 match subreg numbering, so we cannot allow subregs. */
13307 if (!BYTES_BIG_ENDIAN
&& (to
== TDmode
|| from
== TDmode
))
13310 /* Allow SD<->DD changes, since SDmode values are stored in
13311 the low half of the DDmode, just like target-independent
13312 code expects. We need to allow at least SD->DD since
13313 rs6000_secondary_memory_needed_mode asks for that change
13314 to be made for SD reloads. */
13315 if ((to
== DDmode
&& from
== SDmode
)
13316 || (to
== SDmode
&& from
== DDmode
))
13319 if (from_size
< 8 || to_size
< 8)
13322 if (from_size
== 8 && (8 * to_nregs
) != to_size
)
13325 if (to_size
== 8 && (8 * from_nregs
) != from_size
)
13334 /* Since the VSX register set includes traditional floating point registers
13335 and altivec registers, just check for the size being different instead of
13336 trying to check whether the modes are vector modes. Otherwise it won't
13337 allow say DF and DI to change classes. For types like TFmode and TDmode
13338 that take 2 64-bit registers, rather than a single 128-bit register, don't
13339 allow subregs of those types to other 128 bit types. */
13340 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
))
13342 unsigned num_regs
= (from_size
+ 15) / 16;
13343 if (hard_regno_nregs (FIRST_FPR_REGNO
, to
) > num_regs
13344 || hard_regno_nregs (FIRST_FPR_REGNO
, from
) > num_regs
)
13347 return (from_size
== 8 || from_size
== 16);
13350 if (TARGET_ALTIVEC
&& rclass
== ALTIVEC_REGS
13351 && (ALTIVEC_VECTOR_MODE (from
) + ALTIVEC_VECTOR_MODE (to
)) == 1)
13357 /* Debug version of rs6000_can_change_mode_class. */
13359 rs6000_debug_can_change_mode_class (machine_mode from
,
13361 reg_class_t rclass
)
13363 bool ret
= rs6000_can_change_mode_class (from
, to
, rclass
);
13366 "rs6000_can_change_mode_class, return %s, from = %s, "
13367 "to = %s, rclass = %s\n",
13368 ret
? "true" : "false",
13369 GET_MODE_NAME (from
), GET_MODE_NAME (to
),
13370 reg_class_names
[rclass
]);
13375 /* Return a string to do a move operation of 128 bits of data. */
13378 rs6000_output_move_128bit (rtx operands
[])
13380 rtx dest
= operands
[0];
13381 rtx src
= operands
[1];
13382 machine_mode mode
= GET_MODE (dest
);
13385 bool dest_gpr_p
, dest_fp_p
, dest_vmx_p
, dest_vsx_p
;
13386 bool src_gpr_p
, src_fp_p
, src_vmx_p
, src_vsx_p
;
13390 dest_regno
= REGNO (dest
);
13391 dest_gpr_p
= INT_REGNO_P (dest_regno
);
13392 dest_fp_p
= FP_REGNO_P (dest_regno
);
13393 dest_vmx_p
= ALTIVEC_REGNO_P (dest_regno
);
13394 dest_vsx_p
= dest_fp_p
| dest_vmx_p
;
13399 dest_gpr_p
= dest_fp_p
= dest_vmx_p
= dest_vsx_p
= false;
13404 src_regno
= REGNO (src
);
13405 src_gpr_p
= INT_REGNO_P (src_regno
);
13406 src_fp_p
= FP_REGNO_P (src_regno
);
13407 src_vmx_p
= ALTIVEC_REGNO_P (src_regno
);
13408 src_vsx_p
= src_fp_p
| src_vmx_p
;
13413 src_gpr_p
= src_fp_p
= src_vmx_p
= src_vsx_p
= false;
13416 /* Register moves. */
13417 if (dest_regno
>= 0 && src_regno
>= 0)
13424 if (TARGET_DIRECT_MOVE_128
&& src_vsx_p
)
13425 return (WORDS_BIG_ENDIAN
13426 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13427 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13429 else if (TARGET_VSX
&& TARGET_DIRECT_MOVE
&& src_vsx_p
)
13433 else if (TARGET_VSX
&& dest_vsx_p
)
13436 return "xxlor %x0,%x1,%x1";
13438 else if (TARGET_DIRECT_MOVE_128
&& src_gpr_p
)
13439 return (WORDS_BIG_ENDIAN
13440 ? "mtvsrdd %x0,%1,%L1"
13441 : "mtvsrdd %x0,%L1,%1");
13443 else if (TARGET_DIRECT_MOVE
&& src_gpr_p
)
13447 else if (TARGET_ALTIVEC
&& dest_vmx_p
&& src_vmx_p
)
13448 return "vor %0,%1,%1";
13450 else if (dest_fp_p
&& src_fp_p
)
13455 else if (dest_regno
>= 0 && MEM_P (src
))
13459 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13465 else if (TARGET_ALTIVEC
&& dest_vmx_p
13466 && altivec_indexed_or_indirect_operand (src
, mode
))
13467 return "lvx %0,%y1";
13469 else if (TARGET_VSX
&& dest_vsx_p
)
13471 if (mode_supports_dq_form (mode
)
13472 && quad_address_p (XEXP (src
, 0), mode
, true))
13473 return "lxv %x0,%1";
13475 else if (TARGET_P9_VECTOR
)
13476 return "lxvx %x0,%y1";
13478 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13479 return "lxvw4x %x0,%y1";
13482 return "lxvd2x %x0,%y1";
13485 else if (TARGET_ALTIVEC
&& dest_vmx_p
)
13486 return "lvx %0,%y1";
13488 else if (dest_fp_p
)
13493 else if (src_regno
>= 0 && MEM_P (dest
))
13497 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13498 return "stq %1,%0";
13503 else if (TARGET_ALTIVEC
&& src_vmx_p
13504 && altivec_indexed_or_indirect_operand (dest
, mode
))
13505 return "stvx %1,%y0";
13507 else if (TARGET_VSX
&& src_vsx_p
)
13509 if (mode_supports_dq_form (mode
)
13510 && quad_address_p (XEXP (dest
, 0), mode
, true))
13511 return "stxv %x1,%0";
13513 else if (TARGET_P9_VECTOR
)
13514 return "stxvx %x1,%y0";
13516 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13517 return "stxvw4x %x1,%y0";
13520 return "stxvd2x %x1,%y0";
13523 else if (TARGET_ALTIVEC
&& src_vmx_p
)
13524 return "stvx %1,%y0";
13531 else if (dest_regno
>= 0
13532 && (CONST_INT_P (src
)
13533 || CONST_WIDE_INT_P (src
)
13534 || CONST_DOUBLE_P (src
)
13535 || GET_CODE (src
) == CONST_VECTOR
))
13540 else if ((dest_vmx_p
&& TARGET_ALTIVEC
)
13541 || (dest_vsx_p
&& TARGET_VSX
))
13542 return output_vec_const_move (operands
);
13545 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest
, src
));
13548 /* Validate a 128-bit move. */
13550 rs6000_move_128bit_ok_p (rtx operands
[])
13552 machine_mode mode
= GET_MODE (operands
[0]);
13553 return (gpc_reg_operand (operands
[0], mode
)
13554 || gpc_reg_operand (operands
[1], mode
));
13557 /* Return true if a 128-bit move needs to be split. */
13559 rs6000_split_128bit_ok_p (rtx operands
[])
13561 if (!reload_completed
)
13564 if (!gpr_or_gpr_p (operands
[0], operands
[1]))
13567 if (quad_load_store_p (operands
[0], operands
[1]))
13574 /* Given a comparison operation, return the bit number in CCR to test. We
13575 know this is a valid comparison.
13577 SCC_P is 1 if this is for an scc. That means that %D will have been
13578 used instead of %C, so the bits will be in different places.
13580 Return -1 if OP isn't a valid comparison for some reason. */
13583 ccr_bit (rtx op
, int scc_p
)
13585 enum rtx_code code
= GET_CODE (op
);
13586 machine_mode cc_mode
;
13591 if (!COMPARISON_P (op
))
13594 reg
= XEXP (op
, 0);
13596 if (!REG_P (reg
) || !CR_REGNO_P (REGNO (reg
)))
13599 cc_mode
= GET_MODE (reg
);
13600 cc_regnum
= REGNO (reg
);
13601 base_bit
= 4 * (cc_regnum
- CR0_REGNO
);
13603 validate_condition_mode (code
, cc_mode
);
13605 /* When generating a sCOND operation, only positive conditions are
13624 return scc_p
? base_bit
+ 3 : base_bit
+ 2;
13626 return base_bit
+ 2;
13627 case GT
: case GTU
: case UNLE
:
13628 return base_bit
+ 1;
13629 case LT
: case LTU
: case UNGE
:
13631 case ORDERED
: case UNORDERED
:
13632 return base_bit
+ 3;
13635 /* If scc, we will have done a cror to put the bit in the
13636 unordered position. So test that bit. For integer, this is ! LT
13637 unless this is an scc insn. */
13638 return scc_p
? base_bit
+ 3 : base_bit
;
13641 return scc_p
? base_bit
+ 3 : base_bit
+ 1;
13648 /* Return the GOT register. */
13651 rs6000_got_register (rtx value ATTRIBUTE_UNUSED
)
13653 /* The second flow pass currently (June 1999) can't update
13654 regs_ever_live without disturbing other parts of the compiler, so
13655 update it here to make the prolog/epilogue code happy. */
13656 if (!can_create_pseudo_p ()
13657 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
13658 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM
, true);
13660 crtl
->uses_pic_offset_table
= 1;
13662 return pic_offset_table_rtx
;
13665 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13667 /* Write out a function code label. */
13670 rs6000_output_function_entry (FILE *file
, const char *fname
)
13672 if (fname
[0] != '.')
13674 switch (DEFAULT_ABI
)
13677 gcc_unreachable ();
13683 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "L.");
13693 RS6000_OUTPUT_BASENAME (file
, fname
);
13696 /* Print an operand. Recognize special options, documented below. */
13699 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13700 only introduced by the linker, when applying the sda21
13702 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13703 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13705 #define SMALL_DATA_RELOC "sda21"
13706 #define SMALL_DATA_REG 0
13710 print_operand (FILE *file
, rtx x
, int code
)
13713 unsigned HOST_WIDE_INT uval
;
13717 /* %a is output_address. */
13719 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13723 /* Write the MMA accumulator number associated with VSX register X. */
13724 if (!REG_P (x
) || !FP_REGNO_P (REGNO (x
)) || (REGNO (x
) % 4) != 0)
13725 output_operand_lossage ("invalid %%A value");
13727 fprintf (file
, "%d", (REGNO (x
) - FIRST_FPR_REGNO
) / 4);
13731 /* Like 'J' but get to the GT bit only. */
13732 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13734 output_operand_lossage ("invalid %%D value");
13738 /* Bit 1 is GT bit. */
13739 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 1;
13741 /* Add one for shift count in rlinm for scc. */
13742 fprintf (file
, "%d", i
+ 1);
13746 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13749 output_operand_lossage ("invalid %%e value");
13754 if ((uval
& 0xffff) == 0 && uval
!= 0)
13759 /* X is a CR register. Print the number of the EQ bit of the CR */
13760 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13761 output_operand_lossage ("invalid %%E value");
13763 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
) + 2);
13767 /* X is a CR register. Print the shift count needed to move it
13768 to the high-order four bits. */
13769 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13770 output_operand_lossage ("invalid %%f value");
13772 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
));
13776 /* Similar, but print the count for the rotate in the opposite
13778 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13779 output_operand_lossage ("invalid %%F value");
13781 fprintf (file
, "%d", 32 - 4 * (REGNO (x
) - CR0_REGNO
));
13785 /* X is a constant integer. If it is negative, print "m",
13786 otherwise print "z". This is to make an aze or ame insn. */
13787 if (!CONST_INT_P (x
))
13788 output_operand_lossage ("invalid %%G value");
13789 else if (INTVAL (x
) >= 0)
13796 /* If constant, output low-order five bits. Otherwise, write
13799 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 31);
13801 print_operand (file
, x
, 0);
13805 /* If constant, output low-order six bits. Otherwise, write
13808 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 63);
13810 print_operand (file
, x
, 0);
13814 /* Print `i' if this is a constant, else nothing. */
13820 /* Write the bit number in CCR for jump. */
13821 i
= ccr_bit (x
, 0);
13823 output_operand_lossage ("invalid %%j code");
13825 fprintf (file
, "%d", i
);
13829 /* Similar, but add one for shift count in rlinm for scc and pass
13830 scc flag to `ccr_bit'. */
13831 i
= ccr_bit (x
, 1);
13833 output_operand_lossage ("invalid %%J code");
13835 /* If we want bit 31, write a shift count of zero, not 32. */
13836 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
13840 /* X must be a constant. Write the 1's complement of the
13843 output_operand_lossage ("invalid %%k value");
13845 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~ INTVAL (x
));
13849 /* X must be a symbolic constant on ELF. Write an
13850 expression suitable for an 'addi' that adds in the low 16
13851 bits of the MEM. */
13852 if (GET_CODE (x
) == CONST
)
13854 if (GET_CODE (XEXP (x
, 0)) != PLUS
13855 || (!SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
13856 && GET_CODE (XEXP (XEXP (x
, 0), 0)) != LABEL_REF
)
13857 || !CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
13858 output_operand_lossage ("invalid %%K value");
13860 print_operand_address (file
, x
);
13861 fputs ("@l", file
);
13864 /* %l is output_asm_label. */
13867 /* Write second word of DImode or DFmode reference. Works on register
13868 or non-indexed memory only. */
13870 fputs (reg_names
[REGNO (x
) + 1], file
);
13871 else if (MEM_P (x
))
13873 machine_mode mode
= GET_MODE (x
);
13874 /* Handle possible auto-increment. Since it is pre-increment and
13875 we have already done it, we can just use an offset of word. */
13876 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
13877 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
13878 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
13880 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
13881 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
13884 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
,
13888 if (small_data_operand (x
, GET_MODE (x
)))
13889 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
13890 reg_names
[SMALL_DATA_REG
]);
13894 case 'N': /* Unused */
13895 /* Write the number of elements in the vector times 4. */
13896 if (GET_CODE (x
) != PARALLEL
)
13897 output_operand_lossage ("invalid %%N value");
13899 fprintf (file
, "%d", XVECLEN (x
, 0) * 4);
13902 case 'O': /* Unused */
13903 /* Similar, but subtract 1 first. */
13904 if (GET_CODE (x
) != PARALLEL
)
13905 output_operand_lossage ("invalid %%O value");
13907 fprintf (file
, "%d", (XVECLEN (x
, 0) - 1) * 4);
13911 /* X is a CONST_INT that is a power of two. Output the logarithm. */
13914 || (i
= exact_log2 (INTVAL (x
))) < 0)
13915 output_operand_lossage ("invalid %%p value");
13917 fprintf (file
, "%d", i
);
13921 /* The operand must be an indirect memory reference. The result
13922 is the register name. */
13923 if (!MEM_P (x
) || !REG_P (XEXP (x
, 0))
13924 || REGNO (XEXP (x
, 0)) >= 32)
13925 output_operand_lossage ("invalid %%P value");
13927 fputs (reg_names
[REGNO (XEXP (x
, 0))], file
);
13931 /* This outputs the logical code corresponding to a boolean
13932 expression. The expression may have one or both operands
13933 negated (if one, only the first one). For condition register
13934 logical operations, it will also treat the negated
13935 CR codes as NOTs, but not handle NOTs of them. */
13937 const char *const *t
= 0;
13939 enum rtx_code code
= GET_CODE (x
);
13940 static const char * const tbl
[3][3] = {
13941 { "and", "andc", "nor" },
13942 { "or", "orc", "nand" },
13943 { "xor", "eqv", "xor" } };
13947 else if (code
== IOR
)
13949 else if (code
== XOR
)
13952 output_operand_lossage ("invalid %%q value");
13954 if (GET_CODE (XEXP (x
, 0)) != NOT
)
13958 if (GET_CODE (XEXP (x
, 1)) == NOT
)
13969 if (! TARGET_MFCRF
)
13975 /* X is a CR register. Print the mask for `mtcrf'. */
13976 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13977 output_operand_lossage ("invalid %%R value");
13979 fprintf (file
, "%d", 128 >> (REGNO (x
) - CR0_REGNO
));
13983 /* Low 5 bits of 32 - value */
13985 output_operand_lossage ("invalid %%s value");
13987 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (32 - INTVAL (x
)) & 31);
13991 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13992 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13994 output_operand_lossage ("invalid %%t value");
13998 /* Bit 3 is OV bit. */
13999 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 3;
14001 /* If we want bit 31, write a shift count of zero, not 32. */
14002 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
14006 /* Print the symbolic name of a branch target register. */
14007 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14008 x
= XVECEXP (x
, 0, 0);
14009 if (!REG_P (x
) || (REGNO (x
) != LR_REGNO
14010 && REGNO (x
) != CTR_REGNO
))
14011 output_operand_lossage ("invalid %%T value");
14012 else if (REGNO (x
) == LR_REGNO
)
14013 fputs ("lr", file
);
14015 fputs ("ctr", file
);
14019 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14020 for use in unsigned operand. */
14023 output_operand_lossage ("invalid %%u value");
14028 if ((uval
& 0xffff) == 0)
14031 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
, uval
& 0xffff);
14035 /* High-order 16 bits of constant for use in signed operand. */
14037 output_operand_lossage ("invalid %%v value");
14039 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
,
14040 (INTVAL (x
) >> 16) & 0xffff);
14044 /* Print `u' if this has an auto-increment or auto-decrement. */
14046 && (GET_CODE (XEXP (x
, 0)) == PRE_INC
14047 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
14048 || GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
))
14053 /* Print the trap code for this operand. */
14054 switch (GET_CODE (x
))
14057 fputs ("eq", file
); /* 4 */
14060 fputs ("ne", file
); /* 24 */
14063 fputs ("lt", file
); /* 16 */
14066 fputs ("le", file
); /* 20 */
14069 fputs ("gt", file
); /* 8 */
14072 fputs ("ge", file
); /* 12 */
14075 fputs ("llt", file
); /* 2 */
14078 fputs ("lle", file
); /* 6 */
14081 fputs ("lgt", file
); /* 1 */
14084 fputs ("lge", file
); /* 5 */
14087 output_operand_lossage ("invalid %%V value");
14092 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14095 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
14096 ((INTVAL (x
) & 0xffff) ^ 0x8000) - 0x8000);
14098 print_operand (file
, x
, 0);
14102 /* X is a FPR or Altivec register used in a VSX context. */
14103 if (!REG_P (x
) || !VSX_REGNO_P (REGNO (x
)))
14104 output_operand_lossage ("invalid %%x value");
14107 int reg
= REGNO (x
);
14108 int vsx_reg
= (FP_REGNO_P (reg
)
14110 : reg
- FIRST_ALTIVEC_REGNO
+ 32);
14112 #ifdef TARGET_REGNAMES
14113 if (TARGET_REGNAMES
)
14114 fprintf (file
, "%%vs%d", vsx_reg
);
14117 fprintf (file
, "%d", vsx_reg
);
14123 && (legitimate_indexed_address_p (XEXP (x
, 0), 0)
14124 || (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
14125 && legitimate_indexed_address_p (XEXP (XEXP (x
, 0), 1), 0))))
14130 /* Like 'L', for third word of TImode/PTImode */
14132 fputs (reg_names
[REGNO (x
) + 2], file
);
14133 else if (MEM_P (x
))
14135 machine_mode mode
= GET_MODE (x
);
14136 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14137 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14138 output_address (mode
, plus_constant (Pmode
,
14139 XEXP (XEXP (x
, 0), 0), 8));
14140 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14141 output_address (mode
, plus_constant (Pmode
,
14142 XEXP (XEXP (x
, 0), 0), 8));
14144 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 8), 0));
14145 if (small_data_operand (x
, GET_MODE (x
)))
14146 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14147 reg_names
[SMALL_DATA_REG
]);
14152 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14153 x
= XVECEXP (x
, 0, 1);
14154 /* X is a SYMBOL_REF. Write out the name preceded by a
14155 period and without any trailing data in brackets. Used for function
14156 names. If we are configured for System V (or the embedded ABI) on
14157 the PowerPC, do not emit the period, since those systems do not use
14158 TOCs and the like. */
14159 if (!SYMBOL_REF_P (x
))
14161 output_operand_lossage ("invalid %%z value");
14165 /* For macho, check to see if we need a stub. */
14168 const char *name
= XSTR (x
, 0);
14170 if (darwin_symbol_stubs
14171 && MACHOPIC_INDIRECT
14172 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
14173 name
= machopic_indirection_name (x
, /*stub_p=*/true);
14175 assemble_name (file
, name
);
14177 else if (!DOT_SYMBOLS
)
14178 assemble_name (file
, XSTR (x
, 0));
14180 rs6000_output_function_entry (file
, XSTR (x
, 0));
14184 /* Like 'L', for last word of TImode/PTImode. */
14186 fputs (reg_names
[REGNO (x
) + 3], file
);
14187 else if (MEM_P (x
))
14189 machine_mode mode
= GET_MODE (x
);
14190 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14191 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14192 output_address (mode
, plus_constant (Pmode
,
14193 XEXP (XEXP (x
, 0), 0), 12));
14194 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14195 output_address (mode
, plus_constant (Pmode
,
14196 XEXP (XEXP (x
, 0), 0), 12));
14198 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 12), 0));
14199 if (small_data_operand (x
, GET_MODE (x
)))
14200 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14201 reg_names
[SMALL_DATA_REG
]);
14205 /* Print AltiVec memory operand. */
14210 gcc_assert (MEM_P (x
));
14214 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x
))
14215 && GET_CODE (tmp
) == AND
14216 && CONST_INT_P (XEXP (tmp
, 1))
14217 && INTVAL (XEXP (tmp
, 1)) == -16)
14218 tmp
= XEXP (tmp
, 0);
14219 else if (VECTOR_MEM_VSX_P (GET_MODE (x
))
14220 && GET_CODE (tmp
) == PRE_MODIFY
)
14221 tmp
= XEXP (tmp
, 1);
14223 fprintf (file
, "0,%s", reg_names
[REGNO (tmp
)]);
14226 if (GET_CODE (tmp
) != PLUS
14227 || !REG_P (XEXP (tmp
, 0))
14228 || !REG_P (XEXP (tmp
, 1)))
14230 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14234 if (REGNO (XEXP (tmp
, 0)) == 0)
14235 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 1)) ],
14236 reg_names
[ REGNO (XEXP (tmp
, 0)) ]);
14238 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 0)) ],
14239 reg_names
[ REGNO (XEXP (tmp
, 1)) ]);
14246 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
14247 else if (MEM_P (x
))
14249 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14250 know the width from the mode. */
14251 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
)
14252 fprintf (file
, "%d(%s)", GET_MODE_SIZE (GET_MODE (x
)),
14253 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14254 else if (GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14255 fprintf (file
, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x
)),
14256 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14257 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14258 output_address (GET_MODE (x
), XEXP (XEXP (x
, 0), 1));
14260 output_address (GET_MODE (x
), XEXP (x
, 0));
14262 else if (toc_relative_expr_p (x
, false,
14263 &tocrel_base_oac
, &tocrel_offset_oac
))
14264 /* This hack along with a corresponding hack in
14265 rs6000_output_addr_const_extra arranges to output addends
14266 where the assembler expects to find them. eg.
14267 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14268 without this hack would be output as "x@toc+4". We
14270 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14271 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
14272 output_addr_const (file
, XVECEXP (x
, 0, 0));
14273 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14274 output_addr_const (file
, XVECEXP (x
, 0, 1));
14276 output_addr_const (file
, x
);
14280 if (const char *name
= get_some_local_dynamic_name ())
14281 assemble_name (file
, name
);
14283 output_operand_lossage ("'%%&' used without any "
14284 "local dynamic TLS references");
14288 output_operand_lossage ("invalid %%xn code");
14292 /* Print the address of an operand. */
14295 print_operand_address (FILE *file
, rtx x
)
14298 fprintf (file
, "0(%s)", reg_names
[ REGNO (x
) ]);
14300 /* Is it a PC-relative address? */
14301 else if (TARGET_PCREL
&& pcrel_local_or_external_address (x
, VOIDmode
))
14303 HOST_WIDE_INT offset
;
14305 if (GET_CODE (x
) == CONST
)
14308 if (GET_CODE (x
) == PLUS
)
14310 offset
= INTVAL (XEXP (x
, 1));
14316 output_addr_const (file
, x
);
14319 fprintf (file
, "%+" PRId64
, offset
);
14321 if (SYMBOL_REF_P (x
) && !SYMBOL_REF_LOCAL_P (x
))
14322 fprintf (file
, "@got");
14324 fprintf (file
, "@pcrel");
14326 else if (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
14327 || GET_CODE (x
) == LABEL_REF
)
14329 output_addr_const (file
, x
);
14330 if (small_data_operand (x
, GET_MODE (x
)))
14331 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14332 reg_names
[SMALL_DATA_REG
]);
14334 gcc_assert (!TARGET_TOC
);
14336 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14337 && REG_P (XEXP (x
, 1)))
14339 if (REGNO (XEXP (x
, 0)) == 0)
14340 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 1)) ],
14341 reg_names
[ REGNO (XEXP (x
, 0)) ]);
14343 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 0)) ],
14344 reg_names
[ REGNO (XEXP (x
, 1)) ]);
14346 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14347 && CONST_INT_P (XEXP (x
, 1)))
14348 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
14349 INTVAL (XEXP (x
, 1)), reg_names
[ REGNO (XEXP (x
, 0)) ]);
14351 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14352 && CONSTANT_P (XEXP (x
, 1)))
14354 fprintf (file
, "lo16(");
14355 output_addr_const (file
, XEXP (x
, 1));
14356 fprintf (file
, ")(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14360 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14361 && CONSTANT_P (XEXP (x
, 1)))
14363 output_addr_const (file
, XEXP (x
, 1));
14364 fprintf (file
, "@l(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14367 else if (toc_relative_expr_p (x
, false, &tocrel_base_oac
, &tocrel_offset_oac
))
14369 /* This hack along with a corresponding hack in
14370 rs6000_output_addr_const_extra arranges to output addends
14371 where the assembler expects to find them. eg.
14373 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14374 without this hack would be output as "x@toc+8@l(9)". We
14375 want "x+8@toc@l(9)". */
14376 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14377 if (GET_CODE (x
) == LO_SUM
)
14378 fprintf (file
, "@l(%s)", reg_names
[REGNO (XEXP (x
, 0))]);
14380 fprintf (file
, "(%s)", reg_names
[REGNO (XVECEXP (tocrel_base_oac
, 0, 1))]);
14383 output_addr_const (file
, x
);
14386 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14389 rs6000_output_addr_const_extra (FILE *file
, rtx x
)
14391 if (GET_CODE (x
) == UNSPEC
)
14392 switch (XINT (x
, 1))
14394 case UNSPEC_TOCREL
:
14395 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x
, 0, 0))
14396 && REG_P (XVECEXP (x
, 0, 1))
14397 && REGNO (XVECEXP (x
, 0, 1)) == TOC_REGISTER
);
14398 output_addr_const (file
, XVECEXP (x
, 0, 0));
14399 if (x
== tocrel_base_oac
&& tocrel_offset_oac
!= const0_rtx
)
14401 if (INTVAL (tocrel_offset_oac
) >= 0)
14402 fprintf (file
, "+");
14403 output_addr_const (file
, CONST_CAST_RTX (tocrel_offset_oac
));
14405 if (!TARGET_AIX
|| (TARGET_ELF
&& TARGET_MINIMAL_TOC
))
14408 assemble_name (file
, toc_label_name
);
14411 else if (TARGET_ELF
)
14412 fputs ("@toc", file
);
14416 case UNSPEC_MACHOPIC_OFFSET
:
14417 output_addr_const (file
, XVECEXP (x
, 0, 0));
14419 machopic_output_function_base_name (file
);
14426 /* Target hook for assembling integer objects. The PowerPC version has
14427 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14428 is defined. It also needs to handle DI-mode objects on 64-bit
14432 rs6000_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
14434 #ifdef RELOCATABLE_NEEDS_FIXUP
14435 /* Special handling for SI values. */
14436 if (RELOCATABLE_NEEDS_FIXUP
&& size
== 4 && aligned_p
)
14438 static int recurse
= 0;
14440 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14441 the .fixup section. Since the TOC section is already relocated, we
14442 don't need to mark it here. We used to skip the text section, but it
14443 should never be valid for relocated addresses to be placed in the text
14445 if (DEFAULT_ABI
== ABI_V4
14446 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
14447 && in_section
!= toc_section
14449 && !CONST_SCALAR_INT_P (x
)
14455 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCP", fixuplabelno
);
14457 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
14458 fprintf (asm_out_file
, "\t.long\t(");
14459 output_addr_const (asm_out_file
, x
);
14460 fprintf (asm_out_file
, ")@fixup\n");
14461 fprintf (asm_out_file
, "\t.section\t\".fixup\",\"aw\"\n");
14462 ASM_OUTPUT_ALIGN (asm_out_file
, 2);
14463 fprintf (asm_out_file
, "\t.long\t");
14464 assemble_name (asm_out_file
, buf
);
14465 fprintf (asm_out_file
, "\n\t.previous\n");
14469 /* Remove initial .'s to turn a -mcall-aixdesc function
14470 address into the address of the descriptor, not the function
14472 else if (SYMBOL_REF_P (x
)
14473 && XSTR (x
, 0)[0] == '.'
14474 && DEFAULT_ABI
== ABI_AIX
)
14476 const char *name
= XSTR (x
, 0);
14477 while (*name
== '.')
14480 fprintf (asm_out_file
, "\t.long\t%s\n", name
);
14484 #endif /* RELOCATABLE_NEEDS_FIXUP */
14485 return default_assemble_integer (x
, size
, aligned_p
);
14488 /* Return a template string for assembly to emit when making an
14489 external call. FUNOP is the call mem argument operand number. */
14491 static const char *
14492 rs6000_call_template_1 (rtx
*operands
, unsigned int funop
, bool sibcall
)
14494 /* -Wformat-overflow workaround, without which gcc thinks that %u
14495 might produce 10 digits. */
14496 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14500 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14502 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14503 sprintf (arg
, "(%%%u@tlsgd)", funop
+ 1);
14504 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14505 sprintf (arg
, "(%%&@tlsld)");
14508 /* The magic 32768 offset here corresponds to the offset of
14509 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14511 sprintf (z
, "%%z%u%s", funop
,
14512 (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
== 2
14515 static char str
[32]; /* 1 spare */
14516 if (rs6000_pcrel_p ())
14517 sprintf (str
, "b%s %s@notoc%s", sibcall
? "" : "l", z
, arg
);
14518 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
14519 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14520 sibcall
? "" : "\n\tnop");
14521 else if (DEFAULT_ABI
== ABI_V4
)
14522 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14523 flag_pic
? "@plt" : "");
14525 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14526 else if (DEFAULT_ABI
== ABI_DARWIN
)
14528 /* The cookie is in operand func+2. */
14529 gcc_checking_assert (GET_CODE (operands
[funop
+ 2]) == CONST_INT
);
14530 int cookie
= INTVAL (operands
[funop
+ 2]);
14531 if (cookie
& CALL_LONG
)
14533 tree funname
= get_identifier (XSTR (operands
[funop
], 0));
14534 tree labelname
= get_prev_label (funname
);
14535 gcc_checking_assert (labelname
&& !sibcall
);
14537 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14538 instruction will reach 'foo', otherwise link as 'bl L42'".
14539 "L42" should be a 'branch island', that will do a far jump to
14540 'foo'. Branch islands are generated in
14541 macho_branch_islands(). */
14542 sprintf (str
, "jbsr %%z%u,%.10s", funop
,
14543 IDENTIFIER_POINTER (labelname
));
14546 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14548 sprintf (str
, "b%s %s%s", sibcall
? "" : "l", z
, arg
);
14552 gcc_unreachable ();
14557 rs6000_call_template (rtx
*operands
, unsigned int funop
)
14559 return rs6000_call_template_1 (operands
, funop
, false);
14563 rs6000_sibcall_template (rtx
*operands
, unsigned int funop
)
14565 return rs6000_call_template_1 (operands
, funop
, true);
14568 /* As above, for indirect calls. */
14570 static const char *
14571 rs6000_indirect_call_template_1 (rtx
*operands
, unsigned int funop
,
14574 /* -Wformat-overflow workaround, without which gcc thinks that %u
14575 might produce 10 digits. Note that -Wformat-overflow will not
14576 currently warn here for str[], so do not rely on a warning to
14577 ensure str[] is correctly sized. */
14578 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14580 /* Currently, funop is either 0 or 1. The maximum string is always
14581 a !speculate 64-bit __tls_get_addr call.
14584 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14585 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14587 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14588 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14595 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14596 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14598 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14599 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14606 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14607 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14609 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14610 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14617 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14618 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14620 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14621 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14625 static char str
[160]; /* 8 spare */
14627 const char *ptrload
= TARGET_64BIT
? "d" : "wz";
14629 if (DEFAULT_ABI
== ABI_AIX
)
14632 ptrload
, funop
+ 3);
14634 /* We don't need the extra code to stop indirect call speculation if
14636 bool speculate
= (TARGET_MACHO
14637 || rs6000_speculate_indirect_jumps
14638 || (REG_P (operands
[funop
])
14639 && REGNO (operands
[funop
]) == LR_REGNO
));
14641 if (TARGET_PLTSEQ
&& GET_CODE (operands
[funop
]) == UNSPEC
)
14643 const char *rel64
= TARGET_64BIT
? "64" : "";
14646 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14648 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14649 sprintf (tls
, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14651 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14652 sprintf (tls
, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14656 const char *notoc
= rs6000_pcrel_p () ? "_NOTOC" : "";
14657 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
14658 && flag_pic
== 2 ? "+32768" : "");
14662 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14663 tls
, rel64
, notoc
, funop
, addend
);
14664 s
+= sprintf (s
, "crset 2\n\t");
14667 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14668 tls
, rel64
, notoc
, funop
, addend
);
14670 else if (!speculate
)
14671 s
+= sprintf (s
, "crset 2\n\t");
14673 if (rs6000_pcrel_p ())
14676 sprintf (s
, "b%%T%ul", funop
);
14678 sprintf (s
, "beq%%T%ul-", funop
);
14680 else if (DEFAULT_ABI
== ABI_AIX
)
14686 funop
, ptrload
, funop
+ 4);
14691 funop
, ptrload
, funop
+ 4);
14693 else if (DEFAULT_ABI
== ABI_ELFv2
)
14699 funop
, ptrload
, funop
+ 3);
14704 funop
, ptrload
, funop
+ 3);
14711 funop
, sibcall
? "" : "l");
14715 funop
, sibcall
? "" : "l", sibcall
? "\n\tb $" : "");
14721 rs6000_indirect_call_template (rtx
*operands
, unsigned int funop
)
14723 return rs6000_indirect_call_template_1 (operands
, funop
, false);
14727 rs6000_indirect_sibcall_template (rtx
*operands
, unsigned int funop
)
14729 return rs6000_indirect_call_template_1 (operands
, funop
, true);
14733 /* Output indirect call insns. WHICH identifies the type of sequence. */
14735 rs6000_pltseq_template (rtx
*operands
, int which
)
14737 const char *rel64
= TARGET_64BIT
? "64" : "";
14740 if (GET_CODE (operands
[3]) == UNSPEC
)
14742 char off
= which
== RS6000_PLTSEQ_PLT_PCREL34
? '8' : '4';
14743 if (XINT (operands
[3], 1) == UNSPEC_TLSGD
)
14744 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14746 else if (XINT (operands
[3], 1) == UNSPEC_TLSLD
)
14747 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14751 gcc_assert (DEFAULT_ABI
== ABI_ELFv2
|| DEFAULT_ABI
== ABI_V4
);
14752 static char str
[96]; /* 10 spare */
14753 char off
= WORDS_BIG_ENDIAN
? '2' : '4';
14754 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
14755 && flag_pic
== 2 ? "+32768" : "");
14758 case RS6000_PLTSEQ_TOCSAVE
:
14761 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14762 TARGET_64BIT
? "d 2,24(1)" : "w 2,12(1)",
14765 case RS6000_PLTSEQ_PLT16_HA
:
14766 if (DEFAULT_ABI
== ABI_V4
&& !flag_pic
)
14769 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14773 "addis %%0,%%1,0\n\t"
14774 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14775 tls
, off
, rel64
, addend
);
14777 case RS6000_PLTSEQ_PLT16_LO
:
14779 "l%s %%0,0(%%1)\n\t"
14780 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14781 TARGET_64BIT
? "d" : "wz",
14782 tls
, off
, rel64
, TARGET_64BIT
? "_DS" : "", addend
);
14784 case RS6000_PLTSEQ_MTCTR
:
14787 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14788 tls
, rel64
, addend
);
14790 case RS6000_PLTSEQ_PLT_PCREL34
:
14792 "pl%s %%0,0(0),1\n\t"
14793 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14794 TARGET_64BIT
? "d" : "wz",
14798 gcc_unreachable ();
14804 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14805 /* Emit an assembler directive to set symbol visibility for DECL to
14806 VISIBILITY_TYPE. */
14809 rs6000_assemble_visibility (tree decl
, int vis
)
14814 /* Functions need to have their entry point symbol visibility set as
14815 well as their descriptor symbol visibility. */
14816 if (DEFAULT_ABI
== ABI_AIX
14818 && TREE_CODE (decl
) == FUNCTION_DECL
)
14820 static const char * const visibility_types
[] = {
14821 NULL
, "protected", "hidden", "internal"
14824 const char *name
, *type
;
14826 name
= ((* targetm
.strip_name_encoding
)
14827 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
))));
14828 type
= visibility_types
[vis
];
14830 fprintf (asm_out_file
, "\t.%s\t%s\n", type
, name
);
14831 fprintf (asm_out_file
, "\t.%s\t.%s\n", type
, name
);
14834 default_assemble_visibility (decl
, vis
);
14838 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
14839 entry. If RECORD_P is true and the target supports named sections,
14840 the location of the NOPs will be recorded in a special object section
14841 called "__patchable_function_entries". This routine may be called
14842 twice per function to put NOPs before and after the function
14846 rs6000_print_patchable_function_entry (FILE *file
,
14847 unsigned HOST_WIDE_INT patch_area_size
,
14850 unsigned int flags
= SECTION_WRITE
| SECTION_RELRO
;
14851 /* When .opd section is emitted, the function symbol
14852 default_print_patchable_function_entry_1 is emitted into the .opd section
14853 while the patchable area is emitted into the function section.
14854 Don't use SECTION_LINK_ORDER in that case. */
14855 if (!(TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
14856 && HAVE_GAS_SECTION_LINK_ORDER
)
14857 flags
|= SECTION_LINK_ORDER
;
14858 default_print_patchable_function_entry_1 (file
, patch_area_size
, record_p
,
14863 rs6000_reverse_condition (machine_mode mode
, enum rtx_code code
)
14865 /* Reversal of FP compares takes care -- an ordered compare
14866 becomes an unordered compare and vice versa. */
14867 if (mode
== CCFPmode
14868 && (!flag_finite_math_only
14869 || code
== UNLT
|| code
== UNLE
|| code
== UNGT
|| code
== UNGE
14870 || code
== UNEQ
|| code
== LTGT
))
14871 return reverse_condition_maybe_unordered (code
);
14873 return reverse_condition (code
);
14876 /* Generate a compare for CODE. Return a brand-new rtx that
14877 represents the result of the compare. */
14880 rs6000_generate_compare (rtx cmp
, machine_mode mode
)
14882 machine_mode comp_mode
;
14883 rtx compare_result
;
14884 enum rtx_code code
= GET_CODE (cmp
);
14885 rtx op0
= XEXP (cmp
, 0);
14886 rtx op1
= XEXP (cmp
, 1);
14888 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
14889 comp_mode
= CCmode
;
14890 else if (FLOAT_MODE_P (mode
))
14891 comp_mode
= CCFPmode
;
14892 else if (code
== GTU
|| code
== LTU
14893 || code
== GEU
|| code
== LEU
)
14894 comp_mode
= CCUNSmode
;
14895 else if ((code
== EQ
|| code
== NE
)
14896 && unsigned_reg_p (op0
)
14897 && (unsigned_reg_p (op1
)
14898 || (CONST_INT_P (op1
) && INTVAL (op1
) != 0)))
14899 /* These are unsigned values, perhaps there will be a later
14900 ordering compare that can be shared with this one. */
14901 comp_mode
= CCUNSmode
;
14903 comp_mode
= CCmode
;
14905 /* If we have an unsigned compare, make sure we don't have a signed value as
14907 if (comp_mode
== CCUNSmode
&& CONST_INT_P (op1
)
14908 && INTVAL (op1
) < 0)
14910 op0
= copy_rtx_if_shared (op0
);
14911 op1
= force_reg (GET_MODE (op0
), op1
);
14912 cmp
= gen_rtx_fmt_ee (code
, GET_MODE (cmp
), op0
, op1
);
14915 /* First, the compare. */
14916 compare_result
= gen_reg_rtx (comp_mode
);
14918 /* IEEE 128-bit support in VSX registers when we do not have hardware
14920 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
14922 rtx libfunc
= NULL_RTX
;
14923 bool check_nan
= false;
14930 libfunc
= optab_libfunc (eq_optab
, mode
);
14935 libfunc
= optab_libfunc (ge_optab
, mode
);
14940 libfunc
= optab_libfunc (le_optab
, mode
);
14945 libfunc
= optab_libfunc (unord_optab
, mode
);
14946 code
= (code
== UNORDERED
) ? NE
: EQ
;
14952 libfunc
= optab_libfunc (ge_optab
, mode
);
14953 code
= (code
== UNGE
) ? GE
: GT
;
14959 libfunc
= optab_libfunc (le_optab
, mode
);
14960 code
= (code
== UNLE
) ? LE
: LT
;
14966 libfunc
= optab_libfunc (eq_optab
, mode
);
14967 code
= (code
= UNEQ
) ? EQ
: NE
;
14971 gcc_unreachable ();
14974 gcc_assert (libfunc
);
14977 dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
14978 SImode
, op0
, mode
, op1
, mode
);
14980 /* The library signals an exception for signalling NaNs, so we need to
14981 handle isgreater, etc. by first checking isordered. */
14984 rtx ne_rtx
, normal_dest
, unord_dest
;
14985 rtx unord_func
= optab_libfunc (unord_optab
, mode
);
14986 rtx join_label
= gen_label_rtx ();
14987 rtx join_ref
= gen_rtx_LABEL_REF (VOIDmode
, join_label
);
14988 rtx unord_cmp
= gen_reg_rtx (comp_mode
);
14991 /* Test for either value being a NaN. */
14992 gcc_assert (unord_func
);
14993 unord_dest
= emit_library_call_value (unord_func
, NULL_RTX
, LCT_CONST
,
14994 SImode
, op0
, mode
, op1
, mode
);
14996 /* Set value (0) if either value is a NaN, and jump to the join
14998 dest
= gen_reg_rtx (SImode
);
14999 emit_move_insn (dest
, const1_rtx
);
15000 emit_insn (gen_rtx_SET (unord_cmp
,
15001 gen_rtx_COMPARE (comp_mode
, unord_dest
,
15004 ne_rtx
= gen_rtx_NE (comp_mode
, unord_cmp
, const0_rtx
);
15005 emit_jump_insn (gen_rtx_SET (pc_rtx
,
15006 gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
15010 /* Do the normal comparison, knowing that the values are not
15012 normal_dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
15013 SImode
, op0
, mode
, op1
, mode
);
15015 emit_insn (gen_cstoresi4 (dest
,
15016 gen_rtx_fmt_ee (code
, SImode
, normal_dest
,
15018 normal_dest
, const0_rtx
));
15020 /* Join NaN and non-Nan paths. Compare dest against 0. */
15021 emit_label (join_label
);
15025 emit_insn (gen_rtx_SET (compare_result
,
15026 gen_rtx_COMPARE (comp_mode
, dest
, const0_rtx
)));
15031 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15032 CLOBBERs to match cmptf_internal2 pattern. */
15033 if (comp_mode
== CCFPmode
&& TARGET_XL_COMPAT
15034 && FLOAT128_IBM_P (GET_MODE (op0
))
15035 && TARGET_HARD_FLOAT
)
15036 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
15038 gen_rtx_SET (compare_result
,
15039 gen_rtx_COMPARE (comp_mode
, op0
, op1
)),
15040 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15041 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15042 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15043 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15044 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15045 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15046 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15047 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15048 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (Pmode
)))));
15049 else if (GET_CODE (op1
) == UNSPEC
15050 && XINT (op1
, 1) == UNSPEC_SP_TEST
)
15052 rtx op1b
= XVECEXP (op1
, 0, 0);
15053 comp_mode
= CCEQmode
;
15054 compare_result
= gen_reg_rtx (CCEQmode
);
15056 emit_insn (gen_stack_protect_testdi (compare_result
, op0
, op1b
));
15058 emit_insn (gen_stack_protect_testsi (compare_result
, op0
, op1b
));
15061 emit_insn (gen_rtx_SET (compare_result
,
15062 gen_rtx_COMPARE (comp_mode
, op0
, op1
)));
15065 validate_condition_mode (code
, GET_MODE (compare_result
));
15067 return gen_rtx_fmt_ee (code
, VOIDmode
, compare_result
, const0_rtx
);
15071 /* Return the diagnostic message string if the binary operation OP is
15072 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15075 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED
,
15079 machine_mode mode1
= TYPE_MODE (type1
);
15080 machine_mode mode2
= TYPE_MODE (type2
);
15082 /* For complex modes, use the inner type. */
15083 if (COMPLEX_MODE_P (mode1
))
15084 mode1
= GET_MODE_INNER (mode1
);
15086 if (COMPLEX_MODE_P (mode2
))
15087 mode2
= GET_MODE_INNER (mode2
);
15089 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15090 double to intermix unless -mfloat128-convert. */
15091 if (mode1
== mode2
)
15094 if (!TARGET_FLOAT128_CVT
)
15096 if ((FLOAT128_IEEE_P (mode1
) && FLOAT128_IBM_P (mode2
))
15097 || (FLOAT128_IBM_P (mode1
) && FLOAT128_IEEE_P (mode2
)))
15098 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15106 /* Expand floating point conversion to/from __float128 and __ibm128. */
15109 rs6000_expand_float128_convert (rtx dest
, rtx src
, bool unsigned_p
)
15111 machine_mode dest_mode
= GET_MODE (dest
);
15112 machine_mode src_mode
= GET_MODE (src
);
15113 convert_optab cvt
= unknown_optab
;
15114 bool do_move
= false;
15115 rtx libfunc
= NULL_RTX
;
15117 typedef rtx (*rtx_2func_t
) (rtx
, rtx
);
15118 rtx_2func_t hw_convert
= (rtx_2func_t
)0;
15122 rtx_2func_t from_df
;
15123 rtx_2func_t from_sf
;
15124 rtx_2func_t from_si_sign
;
15125 rtx_2func_t from_si_uns
;
15126 rtx_2func_t from_di_sign
;
15127 rtx_2func_t from_di_uns
;
15130 rtx_2func_t to_si_sign
;
15131 rtx_2func_t to_si_uns
;
15132 rtx_2func_t to_di_sign
;
15133 rtx_2func_t to_di_uns
;
15134 } hw_conversions
[2] = {
15135 /* convertions to/from KFmode */
15137 gen_extenddfkf2_hw
, /* KFmode <- DFmode. */
15138 gen_extendsfkf2_hw
, /* KFmode <- SFmode. */
15139 gen_float_kfsi2_hw
, /* KFmode <- SImode (signed). */
15140 gen_floatuns_kfsi2_hw
, /* KFmode <- SImode (unsigned). */
15141 gen_float_kfdi2_hw
, /* KFmode <- DImode (signed). */
15142 gen_floatuns_kfdi2_hw
, /* KFmode <- DImode (unsigned). */
15143 gen_trunckfdf2_hw
, /* DFmode <- KFmode. */
15144 gen_trunckfsf2_hw
, /* SFmode <- KFmode. */
15145 gen_fix_kfsi2_hw
, /* SImode <- KFmode (signed). */
15146 gen_fixuns_kfsi2_hw
, /* SImode <- KFmode (unsigned). */
15147 gen_fix_kfdi2_hw
, /* DImode <- KFmode (signed). */
15148 gen_fixuns_kfdi2_hw
, /* DImode <- KFmode (unsigned). */
15151 /* convertions to/from TFmode */
15153 gen_extenddftf2_hw
, /* TFmode <- DFmode. */
15154 gen_extendsftf2_hw
, /* TFmode <- SFmode. */
15155 gen_float_tfsi2_hw
, /* TFmode <- SImode (signed). */
15156 gen_floatuns_tfsi2_hw
, /* TFmode <- SImode (unsigned). */
15157 gen_float_tfdi2_hw
, /* TFmode <- DImode (signed). */
15158 gen_floatuns_tfdi2_hw
, /* TFmode <- DImode (unsigned). */
15159 gen_trunctfdf2_hw
, /* DFmode <- TFmode. */
15160 gen_trunctfsf2_hw
, /* SFmode <- TFmode. */
15161 gen_fix_tfsi2_hw
, /* SImode <- TFmode (signed). */
15162 gen_fixuns_tfsi2_hw
, /* SImode <- TFmode (unsigned). */
15163 gen_fix_tfdi2_hw
, /* DImode <- TFmode (signed). */
15164 gen_fixuns_tfdi2_hw
, /* DImode <- TFmode (unsigned). */
15168 if (dest_mode
== src_mode
)
15169 gcc_unreachable ();
15171 /* Eliminate memory operations. */
15173 src
= force_reg (src_mode
, src
);
15177 rtx tmp
= gen_reg_rtx (dest_mode
);
15178 rs6000_expand_float128_convert (tmp
, src
, unsigned_p
);
15179 rs6000_emit_move (dest
, tmp
, dest_mode
);
15183 /* Convert to IEEE 128-bit floating point. */
15184 if (FLOAT128_IEEE_P (dest_mode
))
15186 if (dest_mode
== KFmode
)
15188 else if (dest_mode
== TFmode
)
15191 gcc_unreachable ();
15197 hw_convert
= hw_conversions
[kf_or_tf
].from_df
;
15202 hw_convert
= hw_conversions
[kf_or_tf
].from_sf
;
15208 if (FLOAT128_IBM_P (src_mode
))
15217 cvt
= ufloat_optab
;
15218 hw_convert
= hw_conversions
[kf_or_tf
].from_si_uns
;
15222 cvt
= sfloat_optab
;
15223 hw_convert
= hw_conversions
[kf_or_tf
].from_si_sign
;
15230 cvt
= ufloat_optab
;
15231 hw_convert
= hw_conversions
[kf_or_tf
].from_di_uns
;
15235 cvt
= sfloat_optab
;
15236 hw_convert
= hw_conversions
[kf_or_tf
].from_di_sign
;
15241 gcc_unreachable ();
15245 /* Convert from IEEE 128-bit floating point. */
15246 else if (FLOAT128_IEEE_P (src_mode
))
15248 if (src_mode
== KFmode
)
15250 else if (src_mode
== TFmode
)
15253 gcc_unreachable ();
15259 hw_convert
= hw_conversions
[kf_or_tf
].to_df
;
15264 hw_convert
= hw_conversions
[kf_or_tf
].to_sf
;
15270 if (FLOAT128_IBM_P (dest_mode
))
15280 hw_convert
= hw_conversions
[kf_or_tf
].to_si_uns
;
15285 hw_convert
= hw_conversions
[kf_or_tf
].to_si_sign
;
15293 hw_convert
= hw_conversions
[kf_or_tf
].to_di_uns
;
15298 hw_convert
= hw_conversions
[kf_or_tf
].to_di_sign
;
15303 gcc_unreachable ();
15307 /* Both IBM format. */
15308 else if (FLOAT128_IBM_P (dest_mode
) && FLOAT128_IBM_P (src_mode
))
15312 gcc_unreachable ();
15314 /* Handle conversion between TFmode/KFmode/IFmode. */
15316 emit_insn (gen_rtx_SET (dest
, gen_rtx_FLOAT_EXTEND (dest_mode
, src
)));
15318 /* Handle conversion if we have hardware support. */
15319 else if (TARGET_FLOAT128_HW
&& hw_convert
)
15320 emit_insn ((hw_convert
) (dest
, src
));
15322 /* Call an external function to do the conversion. */
15323 else if (cvt
!= unknown_optab
)
15325 libfunc
= convert_optab_libfunc (cvt
, dest_mode
, src_mode
);
15326 gcc_assert (libfunc
!= NULL_RTX
);
15328 dest2
= emit_library_call_value (libfunc
, dest
, LCT_CONST
, dest_mode
,
15331 gcc_assert (dest2
!= NULL_RTX
);
15332 if (!rtx_equal_p (dest
, dest2
))
15333 emit_move_insn (dest
, dest2
);
15337 gcc_unreachable ();
15343 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15344 can be used as that dest register. Return the dest register. */
15347 rs6000_emit_eqne (machine_mode mode
, rtx op1
, rtx op2
, rtx scratch
)
15349 if (op2
== const0_rtx
)
15352 if (GET_CODE (scratch
) == SCRATCH
)
15353 scratch
= gen_reg_rtx (mode
);
15355 if (logical_operand (op2
, mode
))
15356 emit_insn (gen_rtx_SET (scratch
, gen_rtx_XOR (mode
, op1
, op2
)));
15358 emit_insn (gen_rtx_SET (scratch
,
15359 gen_rtx_PLUS (mode
, op1
, negate_rtx (mode
, op2
))));
15364 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15365 requires this. The result is mode MODE. */
15367 rs6000_emit_fp_cror (rtx_code code
, machine_mode mode
, rtx x
)
15371 if (code
== LTGT
|| code
== LE
|| code
== UNLT
)
15372 cond
[n
++] = gen_rtx_fmt_ee (LT
, mode
, x
, const0_rtx
);
15373 if (code
== LTGT
|| code
== GE
|| code
== UNGT
)
15374 cond
[n
++] = gen_rtx_fmt_ee (GT
, mode
, x
, const0_rtx
);
15375 if (code
== LE
|| code
== GE
|| code
== UNEQ
)
15376 cond
[n
++] = gen_rtx_fmt_ee (EQ
, mode
, x
, const0_rtx
);
15377 if (code
== UNLT
|| code
== UNGT
|| code
== UNEQ
)
15378 cond
[n
++] = gen_rtx_fmt_ee (UNORDERED
, mode
, x
, const0_rtx
);
15380 gcc_assert (n
== 2);
15382 rtx cc
= gen_reg_rtx (CCEQmode
);
15383 rtx logical
= gen_rtx_IOR (mode
, cond
[0], cond
[1]);
15384 emit_insn (gen_cceq_ior_compare (mode
, cc
, logical
, cond
[0], x
, cond
[1], x
));
15390 rs6000_emit_sCOND (machine_mode mode
, rtx operands
[])
15392 rtx condition_rtx
= rs6000_generate_compare (operands
[1], mode
);
15393 rtx_code cond_code
= GET_CODE (condition_rtx
);
15395 if (FLOAT_MODE_P (mode
) && HONOR_NANS (mode
)
15396 && !(FLOAT128_VECTOR_P (mode
) && !TARGET_FLOAT128_HW
))
15398 else if (cond_code
== NE
15399 || cond_code
== GE
|| cond_code
== LE
15400 || cond_code
== GEU
|| cond_code
== LEU
15401 || cond_code
== ORDERED
|| cond_code
== UNGE
|| cond_code
== UNLE
)
15403 rtx not_result
= gen_reg_rtx (CCEQmode
);
15404 rtx not_op
, rev_cond_rtx
;
15405 machine_mode cc_mode
;
15407 cc_mode
= GET_MODE (XEXP (condition_rtx
, 0));
15409 rev_cond_rtx
= gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode
, cond_code
),
15410 SImode
, XEXP (condition_rtx
, 0), const0_rtx
);
15411 not_op
= gen_rtx_COMPARE (CCEQmode
, rev_cond_rtx
, const0_rtx
);
15412 emit_insn (gen_rtx_SET (not_result
, not_op
));
15413 condition_rtx
= gen_rtx_EQ (VOIDmode
, not_result
, const0_rtx
);
15416 machine_mode op_mode
= GET_MODE (XEXP (operands
[1], 0));
15417 if (op_mode
== VOIDmode
)
15418 op_mode
= GET_MODE (XEXP (operands
[1], 1));
15420 if (TARGET_POWERPC64
&& (op_mode
== DImode
|| FLOAT_MODE_P (mode
)))
15422 PUT_MODE (condition_rtx
, DImode
);
15423 convert_move (operands
[0], condition_rtx
, 0);
15427 PUT_MODE (condition_rtx
, SImode
);
15428 emit_insn (gen_rtx_SET (operands
[0], condition_rtx
));
15432 /* Emit a branch of kind CODE to location LOC. */
15435 rs6000_emit_cbranch (machine_mode mode
, rtx operands
[])
15437 rtx condition_rtx
= rs6000_generate_compare (operands
[0], mode
);
15438 rtx loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
15439 rtx ite
= gen_rtx_IF_THEN_ELSE (VOIDmode
, condition_rtx
, loc_ref
, pc_rtx
);
15440 emit_jump_insn (gen_rtx_SET (pc_rtx
, ite
));
15443 /* Return the string to output a conditional branch to LABEL, which is
15444 the operand template of the label, or NULL if the branch is really a
15445 conditional return.
15447 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15448 condition code register and its mode specifies what kind of
15449 comparison we made.
15451 REVERSED is nonzero if we should reverse the sense of the comparison.
15453 INSN is the insn. */
15456 output_cbranch (rtx op
, const char *label
, int reversed
, rtx_insn
*insn
)
15458 static char string
[64];
15459 enum rtx_code code
= GET_CODE (op
);
15460 rtx cc_reg
= XEXP (op
, 0);
15461 machine_mode mode
= GET_MODE (cc_reg
);
15462 int cc_regno
= REGNO (cc_reg
) - CR0_REGNO
;
15463 int need_longbranch
= label
!= NULL
&& get_attr_length (insn
) == 8;
15464 int really_reversed
= reversed
^ need_longbranch
;
15470 validate_condition_mode (code
, mode
);
15472 /* Work out which way this really branches. We could use
15473 reverse_condition_maybe_unordered here always but this
15474 makes the resulting assembler clearer. */
15475 if (really_reversed
)
15477 /* Reversal of FP compares takes care -- an ordered compare
15478 becomes an unordered compare and vice versa. */
15479 if (mode
== CCFPmode
)
15480 code
= reverse_condition_maybe_unordered (code
);
15482 code
= reverse_condition (code
);
15487 /* Not all of these are actually distinct opcodes, but
15488 we distinguish them for clarity of the resulting assembler. */
15489 case NE
: case LTGT
:
15490 ccode
= "ne"; break;
15491 case EQ
: case UNEQ
:
15492 ccode
= "eq"; break;
15494 ccode
= "ge"; break;
15495 case GT
: case GTU
: case UNGT
:
15496 ccode
= "gt"; break;
15498 ccode
= "le"; break;
15499 case LT
: case LTU
: case UNLT
:
15500 ccode
= "lt"; break;
15501 case UNORDERED
: ccode
= "un"; break;
15502 case ORDERED
: ccode
= "nu"; break;
15503 case UNGE
: ccode
= "nl"; break;
15504 case UNLE
: ccode
= "ng"; break;
15506 gcc_unreachable ();
15509 /* Maybe we have a guess as to how likely the branch is. */
15511 note
= find_reg_note (insn
, REG_BR_PROB
, NULL_RTX
);
15512 if (note
!= NULL_RTX
)
15514 /* PROB is the difference from 50%. */
15515 int prob
= profile_probability::from_reg_br_prob_note (XINT (note
, 0))
15516 .to_reg_br_prob_base () - REG_BR_PROB_BASE
/ 2;
15518 /* Only hint for highly probable/improbable branches on newer cpus when
15519 we have real profile data, as static prediction overrides processor
15520 dynamic prediction. For older cpus we may as well always hint, but
15521 assume not taken for branches that are very close to 50% as a
15522 mispredicted taken branch is more expensive than a
15523 mispredicted not-taken branch. */
15524 if (rs6000_always_hint
15525 || (abs (prob
) > REG_BR_PROB_BASE
/ 100 * 48
15526 && (profile_status_for_fn (cfun
) != PROFILE_GUESSED
)
15527 && br_prob_note_reliable_p (note
)))
15529 if (abs (prob
) > REG_BR_PROB_BASE
/ 20
15530 && ((prob
> 0) ^ need_longbranch
))
15538 s
+= sprintf (s
, "b%slr%s ", ccode
, pred
);
15540 s
+= sprintf (s
, "b%s%s ", ccode
, pred
);
15542 /* We need to escape any '%' characters in the reg_names string.
15543 Assume they'd only be the first character.... */
15544 if (reg_names
[cc_regno
+ CR0_REGNO
][0] == '%')
15546 s
+= sprintf (s
, "%s", reg_names
[cc_regno
+ CR0_REGNO
]);
15550 /* If the branch distance was too far, we may have to use an
15551 unconditional branch to go the distance. */
15552 if (need_longbranch
)
15553 s
+= sprintf (s
, ",$+8\n\tb %s", label
);
15555 s
+= sprintf (s
, ",%s", label
);
15561 /* Return insn for VSX or Altivec comparisons. */
15564 rs6000_emit_vector_compare_inner (enum rtx_code code
, rtx op0
, rtx op1
)
15567 machine_mode mode
= GET_MODE (op0
);
15575 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15586 mask
= gen_reg_rtx (mode
);
15587 emit_insn (gen_rtx_SET (mask
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
15594 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15595 DMODE is expected destination mode. This is a recursive function. */
15598 rs6000_emit_vector_compare (enum rtx_code rcode
,
15600 machine_mode dmode
)
15603 bool swap_operands
= false;
15604 bool try_again
= false;
15606 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode
));
15607 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
15609 /* See if the comparison works as is. */
15610 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
15618 swap_operands
= true;
15623 swap_operands
= true;
15631 /* Invert condition and try again.
15632 e.g., A != B becomes ~(A==B). */
15634 enum rtx_code rev_code
;
15635 enum insn_code nor_code
;
15638 rev_code
= reverse_condition_maybe_unordered (rcode
);
15639 if (rev_code
== UNKNOWN
)
15642 nor_code
= optab_handler (one_cmpl_optab
, dmode
);
15643 if (nor_code
== CODE_FOR_nothing
)
15646 mask2
= rs6000_emit_vector_compare (rev_code
, op0
, op1
, dmode
);
15650 mask
= gen_reg_rtx (dmode
);
15651 emit_insn (GEN_FCN (nor_code
) (mask
, mask2
));
15659 /* Try GT/GTU/LT/LTU OR EQ */
15662 enum insn_code ior_code
;
15663 enum rtx_code new_code
;
15684 gcc_unreachable ();
15687 ior_code
= optab_handler (ior_optab
, dmode
);
15688 if (ior_code
== CODE_FOR_nothing
)
15691 c_rtx
= rs6000_emit_vector_compare (new_code
, op0
, op1
, dmode
);
15695 eq_rtx
= rs6000_emit_vector_compare (EQ
, op0
, op1
, dmode
);
15699 mask
= gen_reg_rtx (dmode
);
15700 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
15711 std::swap (op0
, op1
);
15713 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
15718 /* You only get two chances. */
15722 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15723 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15724 operands for the relation operation COND. */
15727 rs6000_emit_vector_cond_expr (rtx dest
, rtx op_true
, rtx op_false
,
15728 rtx cond
, rtx cc_op0
, rtx cc_op1
)
15730 machine_mode dest_mode
= GET_MODE (dest
);
15731 machine_mode mask_mode
= GET_MODE (cc_op0
);
15732 enum rtx_code rcode
= GET_CODE (cond
);
15734 bool invert_move
= false;
15736 if (VECTOR_UNIT_NONE_P (dest_mode
))
15739 gcc_assert (GET_MODE_SIZE (dest_mode
) == GET_MODE_SIZE (mask_mode
)
15740 && GET_MODE_NUNITS (dest_mode
) == GET_MODE_NUNITS (mask_mode
));
15744 /* Swap operands if we can, and fall back to doing the operation as
15745 specified, and doing a NOR to invert the test. */
15751 /* Invert condition and try again.
15752 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15753 invert_move
= true;
15754 rcode
= reverse_condition_maybe_unordered (rcode
);
15755 if (rcode
== UNKNOWN
)
15761 if (GET_MODE_CLASS (mask_mode
) == MODE_VECTOR_INT
)
15763 /* Invert condition to avoid compound test. */
15764 invert_move
= true;
15765 rcode
= reverse_condition (rcode
);
15774 /* Invert condition to avoid compound test if necessary. */
15775 if (rcode
== GEU
|| rcode
== LEU
)
15777 invert_move
= true;
15778 rcode
= reverse_condition (rcode
);
15786 /* Get the vector mask for the given relational operations. */
15787 mask
= rs6000_emit_vector_compare (rcode
, cc_op0
, cc_op1
, mask_mode
);
15792 if (mask_mode
!= dest_mode
)
15793 mask
= simplify_gen_subreg (dest_mode
, mask
, mask_mode
, 0);
15796 std::swap (op_true
, op_false
);
15798 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
15799 if (GET_MODE_CLASS (dest_mode
) == MODE_VECTOR_INT
15800 && (GET_CODE (op_true
) == CONST_VECTOR
15801 || GET_CODE (op_false
) == CONST_VECTOR
))
15803 rtx constant_0
= CONST0_RTX (dest_mode
);
15804 rtx constant_m1
= CONSTM1_RTX (dest_mode
);
15806 if (op_true
== constant_m1
&& op_false
== constant_0
)
15808 emit_move_insn (dest
, mask
);
15812 else if (op_true
== constant_0
&& op_false
== constant_m1
)
15814 emit_insn (gen_rtx_SET (dest
, gen_rtx_NOT (dest_mode
, mask
)));
15818 /* If we can't use the vector comparison directly, perhaps we can use
15819 the mask for the true or false fields, instead of loading up a
15821 if (op_true
== constant_m1
)
15824 if (op_false
== constant_0
)
15828 if (!REG_P (op_true
) && !SUBREG_P (op_true
))
15829 op_true
= force_reg (dest_mode
, op_true
);
15831 if (!REG_P (op_false
) && !SUBREG_P (op_false
))
15832 op_false
= force_reg (dest_mode
, op_false
);
15834 rtx tmp
= gen_rtx_IOR (dest_mode
,
15835 gen_rtx_AND (dest_mode
, gen_rtx_NOT (dest_mode
, mask
),
15837 gen_rtx_AND (dest_mode
, mask
, op_true
));
15838 emit_insn (gen_rtx_SET (dest
, tmp
));
15842 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
15843 maximum or minimum with "C" semantics.
15845 Unless you use -ffast-math, you can't use these instructions to replace
15846 conditions that implicitly reverse the condition because the comparison
15847 might generate a NaN or signed zer0.
15849 I.e. the following can be replaced all of the time
15850 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
15851 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
15852 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
15853 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
15855 The following can be replaced only if -ffast-math is used:
15856 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
15857 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
15858 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
15859 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
15861 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15862 nonzero/true, FALSE_COND if it is zero/false.
15864 Return false if we can't generate the appropriate minimum or maximum, and
15865 true if we can did the minimum or maximum. */
15868 rs6000_maybe_emit_maxc_minc (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
15870 enum rtx_code code
= GET_CODE (op
);
15871 rtx op0
= XEXP (op
, 0);
15872 rtx op1
= XEXP (op
, 1);
15873 machine_mode compare_mode
= GET_MODE (op0
);
15874 machine_mode result_mode
= GET_MODE (dest
);
15876 if (result_mode
!= compare_mode
)
15879 /* See the comments of this function, it simply expects GE/GT/LE/LT in
15880 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
15881 we need to do the reversions first to make the following checks
15882 support fewer cases, like:
15884 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
15885 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
15886 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
15887 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
15889 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
15890 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
15891 have to check for fast-math or the like. */
15892 if (code
== UNGE
|| code
== UNGT
|| code
== UNLE
|| code
== UNLT
)
15894 code
= reverse_condition_maybe_unordered (code
);
15895 std::swap (true_cond
, false_cond
);
15899 if (code
== GE
|| code
== GT
)
15901 else if (code
== LE
|| code
== LT
)
15906 if (rtx_equal_p (op0
, true_cond
) && rtx_equal_p (op1
, false_cond
))
15909 /* Only when NaNs and signed-zeros are not in effect, smax could be
15910 used for `op0 < op1 ? op1 : op0`, and smin could be used for
15911 `op0 > op1 ? op1 : op0`. */
15912 else if (rtx_equal_p (op1
, true_cond
) && rtx_equal_p (op0
, false_cond
)
15913 && !HONOR_NANS (compare_mode
) && !HONOR_SIGNED_ZEROS (compare_mode
))
15919 rs6000_emit_minmax (dest
, max_p
? SMAX
: SMIN
, op0
, op1
);
15923 /* Possibly emit a floating point conditional move by generating a compare that
15924 sets a mask instruction and a XXSEL select instruction.
15926 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15927 nonzero/true, FALSE_COND if it is zero/false.
15929 Return false if the operation cannot be generated, and true if we could
15930 generate the instruction. */
15933 rs6000_maybe_emit_fp_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
15935 enum rtx_code code
= GET_CODE (op
);
15936 rtx op0
= XEXP (op
, 0);
15937 rtx op1
= XEXP (op
, 1);
15938 machine_mode compare_mode
= GET_MODE (op0
);
15939 machine_mode result_mode
= GET_MODE (dest
);
15944 if (!can_create_pseudo_p ())
15947 /* We allow the comparison to be either SFmode/DFmode and the true/false
15948 condition to be either SFmode/DFmode. I.e. we allow:
15953 r = (a == b) ? c : d;
15960 r = (a == b) ? c : d;
15962 but we don't allow intermixing the IEEE 128-bit floating point types with
15963 the 32/64-bit scalar types. */
15965 if (!(compare_mode
== result_mode
15966 || (compare_mode
== SFmode
&& result_mode
== DFmode
)
15967 || (compare_mode
== DFmode
&& result_mode
== SFmode
)))
15980 code
= swap_condition (code
);
15981 std::swap (op0
, op1
);
15988 /* Generate: [(parallel [(set (dest)
15989 (if_then_else (op (cmp1) (cmp2))
15992 (clobber (scratch))])]. */
15994 compare_rtx
= gen_rtx_fmt_ee (code
, CCFPmode
, op0
, op1
);
15995 cmove_rtx
= gen_rtx_SET (dest
,
15996 gen_rtx_IF_THEN_ELSE (result_mode
,
16001 clobber_rtx
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (V2DImode
));
16002 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
16003 gen_rtvec (2, cmove_rtx
, clobber_rtx
)));
16008 /* Helper function to return true if the target has instructions to do a
16009 compare and set mask instruction that can be used with XXSEL to implement a
16010 conditional move. It is also assumed that such a target also supports the
16011 "C" minimum and maximum instructions. */
16014 have_compare_and_set_mask (machine_mode mode
)
16020 return TARGET_P9_MINMAX
;
16024 return TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
);
16033 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16034 operands of the last comparison is nonzero/true, FALSE_COND if it
16035 is zero/false. Return 0 if the hardware has no such operation. */
16038 rs6000_emit_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16040 enum rtx_code code
= GET_CODE (op
);
16041 rtx op0
= XEXP (op
, 0);
16042 rtx op1
= XEXP (op
, 1);
16043 machine_mode compare_mode
= GET_MODE (op0
);
16044 machine_mode result_mode
= GET_MODE (dest
);
16046 bool is_against_zero
;
16048 /* These modes should always match. */
16049 if (GET_MODE (op1
) != compare_mode
16050 /* In the isel case however, we can use a compare immediate, so
16051 op1 may be a small constant. */
16052 && (!TARGET_ISEL
|| !short_cint_operand (op1
, VOIDmode
)))
16054 if (GET_MODE (true_cond
) != result_mode
)
16056 if (GET_MODE (false_cond
) != result_mode
)
16059 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16061 if (have_compare_and_set_mask (compare_mode
)
16062 && have_compare_and_set_mask (result_mode
))
16064 if (rs6000_maybe_emit_maxc_minc (dest
, op
, true_cond
, false_cond
))
16067 if (rs6000_maybe_emit_fp_cmove (dest
, op
, true_cond
, false_cond
))
16071 /* Don't allow using floating point comparisons for integer results for
16073 if (FLOAT_MODE_P (compare_mode
) && !FLOAT_MODE_P (result_mode
))
16076 /* First, work out if the hardware can do this at all, or
16077 if it's too slow.... */
16078 if (!FLOAT_MODE_P (compare_mode
))
16081 return rs6000_emit_int_cmove (dest
, op
, true_cond
, false_cond
);
16085 is_against_zero
= op1
== CONST0_RTX (compare_mode
);
16087 /* A floating-point subtract might overflow, underflow, or produce
16088 an inexact result, thus changing the floating-point flags, so it
16089 can't be generated if we care about that. It's safe if one side
16090 of the construct is zero, since then no subtract will be
16092 if (SCALAR_FLOAT_MODE_P (compare_mode
)
16093 && flag_trapping_math
&& ! is_against_zero
)
16096 /* Eliminate half of the comparisons by switching operands, this
16097 makes the remaining code simpler. */
16098 if (code
== UNLT
|| code
== UNGT
|| code
== UNORDERED
|| code
== NE
16099 || code
== LTGT
|| code
== LT
|| code
== UNLE
)
16101 code
= reverse_condition_maybe_unordered (code
);
16103 true_cond
= false_cond
;
16107 /* UNEQ and LTGT take four instructions for a comparison with zero,
16108 it'll probably be faster to use a branch here too. */
16109 if (code
== UNEQ
&& HONOR_NANS (compare_mode
))
16112 /* We're going to try to implement comparisons by performing
16113 a subtract, then comparing against zero. Unfortunately,
16114 Inf - Inf is NaN which is not zero, and so if we don't
16115 know that the operand is finite and the comparison
16116 would treat EQ different to UNORDERED, we can't do it. */
16117 if (HONOR_INFINITIES (compare_mode
)
16118 && code
!= GT
&& code
!= UNGE
16119 && (!CONST_DOUBLE_P (op1
)
16120 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1
)))
16121 /* Constructs of the form (a OP b ? a : b) are safe. */
16122 && ((! rtx_equal_p (op0
, false_cond
) && ! rtx_equal_p (op1
, false_cond
))
16123 || (! rtx_equal_p (op0
, true_cond
)
16124 && ! rtx_equal_p (op1
, true_cond
))))
16127 /* At this point we know we can use fsel. */
16129 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16130 is no fsel instruction. */
16131 if (compare_mode
!= SFmode
&& compare_mode
!= DFmode
)
16134 /* Reduce the comparison to a comparison against zero. */
16135 if (! is_against_zero
)
16137 temp
= gen_reg_rtx (compare_mode
);
16138 emit_insn (gen_rtx_SET (temp
, gen_rtx_MINUS (compare_mode
, op0
, op1
)));
16140 op1
= CONST0_RTX (compare_mode
);
16143 /* If we don't care about NaNs we can reduce some of the comparisons
16144 down to faster ones. */
16145 if (! HONOR_NANS (compare_mode
))
16151 true_cond
= false_cond
;
16164 /* Now, reduce everything down to a GE. */
16171 temp
= gen_reg_rtx (compare_mode
);
16172 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16177 temp
= gen_reg_rtx (compare_mode
);
16178 emit_insn (gen_rtx_SET (temp
, gen_rtx_ABS (compare_mode
, op0
)));
16183 temp
= gen_reg_rtx (compare_mode
);
16184 emit_insn (gen_rtx_SET (temp
,
16185 gen_rtx_NEG (compare_mode
,
16186 gen_rtx_ABS (compare_mode
, op0
))));
16191 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16192 temp
= gen_reg_rtx (result_mode
);
16193 emit_insn (gen_rtx_SET (temp
,
16194 gen_rtx_IF_THEN_ELSE (result_mode
,
16195 gen_rtx_GE (VOIDmode
,
16197 true_cond
, false_cond
)));
16198 false_cond
= true_cond
;
16201 temp
= gen_reg_rtx (compare_mode
);
16202 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16207 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16208 temp
= gen_reg_rtx (result_mode
);
16209 emit_insn (gen_rtx_SET (temp
,
16210 gen_rtx_IF_THEN_ELSE (result_mode
,
16211 gen_rtx_GE (VOIDmode
,
16213 true_cond
, false_cond
)));
16214 true_cond
= false_cond
;
16217 temp
= gen_reg_rtx (compare_mode
);
16218 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16223 gcc_unreachable ();
16226 emit_insn (gen_rtx_SET (dest
,
16227 gen_rtx_IF_THEN_ELSE (result_mode
,
16228 gen_rtx_GE (VOIDmode
,
16230 true_cond
, false_cond
)));
16234 /* Same as above, but for ints (isel). */
16237 rs6000_emit_int_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16239 rtx condition_rtx
, cr
;
16240 machine_mode mode
= GET_MODE (dest
);
16241 enum rtx_code cond_code
;
16242 rtx (*isel_func
) (rtx
, rtx
, rtx
, rtx
, rtx
);
16245 if (mode
!= SImode
&& (!TARGET_POWERPC64
|| mode
!= DImode
))
16248 /* PR104335: We now need to expect CC-mode "comparisons"
16249 coming from ifcvt. The following code expects proper
16250 comparisons so better abort here. */
16251 if (GET_MODE_CLASS (GET_MODE (XEXP (op
, 0))) == MODE_CC
)
16254 /* We still have to do the compare, because isel doesn't do a
16255 compare, it just looks at the CRx bits set by a previous compare
16257 condition_rtx
= rs6000_generate_compare (op
, mode
);
16258 cond_code
= GET_CODE (condition_rtx
);
16259 cr
= XEXP (condition_rtx
, 0);
16260 signedp
= GET_MODE (cr
) == CCmode
;
16262 isel_func
= (mode
== SImode
16263 ? (signedp
? gen_isel_signed_si
: gen_isel_unsigned_si
)
16264 : (signedp
? gen_isel_signed_di
: gen_isel_unsigned_di
));
16268 case LT
: case GT
: case LTU
: case GTU
: case EQ
:
16269 /* isel handles these directly. */
16273 /* We need to swap the sense of the comparison. */
16275 std::swap (false_cond
, true_cond
);
16276 PUT_CODE (condition_rtx
, reverse_condition (cond_code
));
16281 false_cond
= force_reg (mode
, false_cond
);
16282 if (true_cond
!= const0_rtx
)
16283 true_cond
= force_reg (mode
, true_cond
);
16285 emit_insn (isel_func (dest
, condition_rtx
, true_cond
, false_cond
, cr
));
16291 rs6000_emit_minmax (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
16293 machine_mode mode
= GET_MODE (op0
);
16297 /* VSX/altivec have direct min/max insns. */
16298 if ((code
== SMAX
|| code
== SMIN
)
16299 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode
)
16300 || (mode
== SFmode
&& VECTOR_UNIT_VSX_P (DFmode
))
16301 || (TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
))))
16303 emit_insn (gen_rtx_SET (dest
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
16307 if (code
== SMAX
|| code
== SMIN
)
16312 if (code
== SMAX
|| code
== UMAX
)
16313 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16314 op0
, op1
, mode
, 0);
16316 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16317 op1
, op0
, mode
, 0);
16318 gcc_assert (target
);
16319 if (target
!= dest
)
16320 emit_move_insn (dest
, target
);
16323 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16324 COND is true. Mark the jump as unlikely to be taken. */
16327 emit_unlikely_jump (rtx cond
, rtx label
)
16329 rtx x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, label
, pc_rtx
);
16330 rtx_insn
*insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, x
));
16331 add_reg_br_prob_note (insn
, profile_probability::very_unlikely ());
16334 /* A subroutine of the atomic operation splitters. Emit a load-locked
16335 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16336 the zero_extend operation. */
16339 emit_load_locked (machine_mode mode
, rtx reg
, rtx mem
)
16341 rtx (*fn
) (rtx
, rtx
) = NULL
;
16346 fn
= gen_load_lockedqi
;
16349 fn
= gen_load_lockedhi
;
16352 if (GET_MODE (mem
) == QImode
)
16353 fn
= gen_load_lockedqi_si
;
16354 else if (GET_MODE (mem
) == HImode
)
16355 fn
= gen_load_lockedhi_si
;
16357 fn
= gen_load_lockedsi
;
16360 fn
= gen_load_lockeddi
;
16363 fn
= gen_load_lockedti
;
16366 gcc_unreachable ();
16368 emit_insn (fn (reg
, mem
));
16371 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16372 instruction in MODE. */
16375 emit_store_conditional (machine_mode mode
, rtx res
, rtx mem
, rtx val
)
16377 rtx (*fn
) (rtx
, rtx
, rtx
) = NULL
;
16382 fn
= gen_store_conditionalqi
;
16385 fn
= gen_store_conditionalhi
;
16388 fn
= gen_store_conditionalsi
;
16391 fn
= gen_store_conditionaldi
;
16394 fn
= gen_store_conditionalti
;
16397 gcc_unreachable ();
16400 /* Emit sync before stwcx. to address PPC405 Erratum. */
16401 if (PPC405_ERRATUM77
)
16402 emit_insn (gen_hwsync ());
16404 emit_insn (fn (res
, mem
, val
));
16407 /* Expand barriers before and after a load_locked/store_cond sequence. */
16410 rs6000_pre_atomic_barrier (rtx mem
, enum memmodel model
)
16412 rtx addr
= XEXP (mem
, 0);
16414 if (!legitimate_indirect_address_p (addr
, reload_completed
)
16415 && !legitimate_indexed_address_p (addr
, reload_completed
))
16417 addr
= force_reg (Pmode
, addr
);
16418 mem
= replace_equiv_address_nv (mem
, addr
);
16423 case MEMMODEL_RELAXED
:
16424 case MEMMODEL_CONSUME
:
16425 case MEMMODEL_ACQUIRE
:
16427 case MEMMODEL_RELEASE
:
16428 case MEMMODEL_ACQ_REL
:
16429 emit_insn (gen_lwsync ());
16431 case MEMMODEL_SEQ_CST
:
16432 emit_insn (gen_hwsync ());
16435 gcc_unreachable ();
16441 rs6000_post_atomic_barrier (enum memmodel model
)
16445 case MEMMODEL_RELAXED
:
16446 case MEMMODEL_CONSUME
:
16447 case MEMMODEL_RELEASE
:
16449 case MEMMODEL_ACQUIRE
:
16450 case MEMMODEL_ACQ_REL
:
16451 case MEMMODEL_SEQ_CST
:
16452 emit_insn (gen_isync ());
16455 gcc_unreachable ();
16459 /* A subroutine of the various atomic expanders. For sub-word operations,
16460 we must adjust things to operate on SImode. Given the original MEM,
16461 return a new aligned memory. Also build and return the quantities by
16462 which to shift and mask. */
16465 rs6000_adjust_atomic_subword (rtx orig_mem
, rtx
*pshift
, rtx
*pmask
)
16467 rtx addr
, align
, shift
, mask
, mem
;
16468 HOST_WIDE_INT shift_mask
;
16469 machine_mode mode
= GET_MODE (orig_mem
);
16471 /* For smaller modes, we have to implement this via SImode. */
16472 shift_mask
= (mode
== QImode
? 0x18 : 0x10);
16474 addr
= XEXP (orig_mem
, 0);
16475 addr
= force_reg (GET_MODE (addr
), addr
);
16477 /* Aligned memory containing subword. Generate a new memory. We
16478 do not want any of the existing MEM_ATTR data, as we're now
16479 accessing memory outside the original object. */
16480 align
= expand_simple_binop (Pmode
, AND
, addr
, GEN_INT (-4),
16481 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16482 mem
= gen_rtx_MEM (SImode
, align
);
16483 MEM_VOLATILE_P (mem
) = MEM_VOLATILE_P (orig_mem
);
16484 if (MEM_ALIAS_SET (orig_mem
) == ALIAS_SET_MEMORY_BARRIER
)
16485 set_mem_alias_set (mem
, ALIAS_SET_MEMORY_BARRIER
);
16487 /* Shift amount for subword relative to aligned word. */
16488 shift
= gen_reg_rtx (SImode
);
16489 addr
= gen_lowpart (SImode
, addr
);
16490 rtx tmp
= gen_reg_rtx (SImode
);
16491 emit_insn (gen_ashlsi3 (tmp
, addr
, GEN_INT (3)));
16492 emit_insn (gen_andsi3 (shift
, tmp
, GEN_INT (shift_mask
)));
16493 if (BYTES_BIG_ENDIAN
)
16494 shift
= expand_simple_binop (SImode
, XOR
, shift
, GEN_INT (shift_mask
),
16495 shift
, 1, OPTAB_LIB_WIDEN
);
16498 /* Mask for insertion. */
16499 mask
= expand_simple_binop (SImode
, ASHIFT
, GEN_INT (GET_MODE_MASK (mode
)),
16500 shift
, NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16506 /* A subroutine of the various atomic expanders. For sub-word operands,
16507 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16510 rs6000_mask_atomic_subword (rtx oldval
, rtx newval
, rtx mask
)
16514 x
= gen_reg_rtx (SImode
);
16515 emit_insn (gen_rtx_SET (x
, gen_rtx_AND (SImode
,
16516 gen_rtx_NOT (SImode
, mask
),
16519 x
= expand_simple_binop (SImode
, IOR
, newval
, x
, x
, 1, OPTAB_LIB_WIDEN
);
16524 /* A subroutine of the various atomic expanders. For sub-word operands,
16525 extract WIDE to NARROW via SHIFT. */
16528 rs6000_finish_atomic_subword (rtx narrow
, rtx wide
, rtx shift
)
16530 wide
= expand_simple_binop (SImode
, LSHIFTRT
, wide
, shift
,
16531 wide
, 1, OPTAB_LIB_WIDEN
);
16532 emit_move_insn (narrow
, gen_lowpart (GET_MODE (narrow
), wide
));
16535 /* Expand an atomic compare and swap operation. */
16538 rs6000_expand_atomic_compare_and_swap (rtx operands
[])
16540 rtx boolval
, retval
, mem
, oldval
, newval
, cond
;
16541 rtx label1
, label2
, x
, mask
, shift
;
16542 machine_mode mode
, orig_mode
;
16543 enum memmodel mod_s
, mod_f
;
16546 boolval
= operands
[0];
16547 retval
= operands
[1];
16549 oldval
= operands
[3];
16550 newval
= operands
[4];
16551 is_weak
= (INTVAL (operands
[5]) != 0);
16552 mod_s
= memmodel_base (INTVAL (operands
[6]));
16553 mod_f
= memmodel_base (INTVAL (operands
[7]));
16554 orig_mode
= mode
= GET_MODE (mem
);
16556 mask
= shift
= NULL_RTX
;
16557 if (mode
== QImode
|| mode
== HImode
)
16559 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16560 lwarx and shift/mask operations. With power8, we need to do the
16561 comparison in SImode, but the store is still done in QI/HImode. */
16562 oldval
= convert_modes (SImode
, mode
, oldval
, 1);
16564 if (!TARGET_SYNC_HI_QI
)
16566 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16568 /* Shift and mask OLDVAL into position with the word. */
16569 oldval
= expand_simple_binop (SImode
, ASHIFT
, oldval
, shift
,
16570 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16572 /* Shift and mask NEWVAL into position within the word. */
16573 newval
= convert_modes (SImode
, mode
, newval
, 1);
16574 newval
= expand_simple_binop (SImode
, ASHIFT
, newval
, shift
,
16575 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16578 /* Prepare to adjust the return value. */
16579 retval
= gen_reg_rtx (SImode
);
16582 else if (reg_overlap_mentioned_p (retval
, oldval
))
16583 oldval
= copy_to_reg (oldval
);
16585 if (mode
!= TImode
&& !reg_or_short_operand (oldval
, mode
))
16586 oldval
= copy_to_mode_reg (mode
, oldval
);
16588 if (reg_overlap_mentioned_p (retval
, newval
))
16589 newval
= copy_to_reg (newval
);
16591 mem
= rs6000_pre_atomic_barrier (mem
, mod_s
);
16596 label1
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16597 emit_label (XEXP (label1
, 0));
16599 label2
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16601 emit_load_locked (mode
, retval
, mem
);
16605 x
= expand_simple_binop (SImode
, AND
, retval
, mask
,
16606 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16608 cond
= gen_reg_rtx (CCmode
);
16609 /* If we have TImode, synthesize a comparison. */
16610 if (mode
!= TImode
)
16611 x
= gen_rtx_COMPARE (CCmode
, x
, oldval
);
16614 rtx xor1_result
= gen_reg_rtx (DImode
);
16615 rtx xor2_result
= gen_reg_rtx (DImode
);
16616 rtx or_result
= gen_reg_rtx (DImode
);
16617 rtx new_word0
= simplify_gen_subreg (DImode
, x
, TImode
, 0);
16618 rtx new_word1
= simplify_gen_subreg (DImode
, x
, TImode
, 8);
16619 rtx old_word0
= simplify_gen_subreg (DImode
, oldval
, TImode
, 0);
16620 rtx old_word1
= simplify_gen_subreg (DImode
, oldval
, TImode
, 8);
16622 emit_insn (gen_xordi3 (xor1_result
, new_word0
, old_word0
));
16623 emit_insn (gen_xordi3 (xor2_result
, new_word1
, old_word1
));
16624 emit_insn (gen_iordi3 (or_result
, xor1_result
, xor2_result
));
16625 x
= gen_rtx_COMPARE (CCmode
, or_result
, const0_rtx
);
16628 emit_insn (gen_rtx_SET (cond
, x
));
16630 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16631 emit_unlikely_jump (x
, label2
);
16635 x
= rs6000_mask_atomic_subword (retval
, newval
, mask
);
16637 emit_store_conditional (orig_mode
, cond
, mem
, x
);
16641 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16642 emit_unlikely_jump (x
, label1
);
16645 if (!is_mm_relaxed (mod_f
))
16646 emit_label (XEXP (label2
, 0));
16648 rs6000_post_atomic_barrier (mod_s
);
16650 if (is_mm_relaxed (mod_f
))
16651 emit_label (XEXP (label2
, 0));
16654 rs6000_finish_atomic_subword (operands
[1], retval
, shift
);
16655 else if (mode
!= GET_MODE (operands
[1]))
16656 convert_move (operands
[1], retval
, 1);
16658 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16659 x
= gen_rtx_EQ (SImode
, cond
, const0_rtx
);
16660 emit_insn (gen_rtx_SET (boolval
, x
));
16663 /* Expand an atomic exchange operation. */
16666 rs6000_expand_atomic_exchange (rtx operands
[])
16668 rtx retval
, mem
, val
, cond
;
16670 enum memmodel model
;
16671 rtx label
, x
, mask
, shift
;
16673 retval
= operands
[0];
16676 model
= memmodel_base (INTVAL (operands
[3]));
16677 mode
= GET_MODE (mem
);
16679 mask
= shift
= NULL_RTX
;
16680 if (!TARGET_SYNC_HI_QI
&& (mode
== QImode
|| mode
== HImode
))
16682 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16684 /* Shift and mask VAL into position with the word. */
16685 val
= convert_modes (SImode
, mode
, val
, 1);
16686 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
16687 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16689 /* Prepare to adjust the return value. */
16690 retval
= gen_reg_rtx (SImode
);
16694 mem
= rs6000_pre_atomic_barrier (mem
, model
);
16696 label
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16697 emit_label (XEXP (label
, 0));
16699 emit_load_locked (mode
, retval
, mem
);
16703 x
= rs6000_mask_atomic_subword (retval
, val
, mask
);
16705 cond
= gen_reg_rtx (CCmode
);
16706 emit_store_conditional (mode
, cond
, mem
, x
);
16708 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16709 emit_unlikely_jump (x
, label
);
16711 rs6000_post_atomic_barrier (model
);
16714 rs6000_finish_atomic_subword (operands
[0], retval
, shift
);
16717 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16718 to perform. MEM is the memory on which to operate. VAL is the second
16719 operand of the binary operator. BEFORE and AFTER are optional locations to
16720 return the value of MEM either before of after the operation. MODEL_RTX
16721 is a CONST_INT containing the memory model to use. */
16724 rs6000_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
16725 rtx orig_before
, rtx orig_after
, rtx model_rtx
)
16727 enum memmodel model
= memmodel_base (INTVAL (model_rtx
));
16728 machine_mode mode
= GET_MODE (mem
);
16729 machine_mode store_mode
= mode
;
16730 rtx label
, x
, cond
, mask
, shift
;
16731 rtx before
= orig_before
, after
= orig_after
;
16733 mask
= shift
= NULL_RTX
;
16734 /* On power8, we want to use SImode for the operation. On previous systems,
16735 use the operation in a subword and shift/mask to get the proper byte or
16737 if (mode
== QImode
|| mode
== HImode
)
16739 if (TARGET_SYNC_HI_QI
)
16741 val
= convert_modes (SImode
, mode
, val
, 1);
16743 /* Prepare to adjust the return value. */
16744 before
= gen_reg_rtx (SImode
);
16746 after
= gen_reg_rtx (SImode
);
16751 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16753 /* Shift and mask VAL into position with the word. */
16754 val
= convert_modes (SImode
, mode
, val
, 1);
16755 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
16756 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16762 /* We've already zero-extended VAL. That is sufficient to
16763 make certain that it does not affect other bits. */
16768 /* If we make certain that all of the other bits in VAL are
16769 set, that will be sufficient to not affect other bits. */
16770 x
= gen_rtx_NOT (SImode
, mask
);
16771 x
= gen_rtx_IOR (SImode
, x
, val
);
16772 emit_insn (gen_rtx_SET (val
, x
));
16779 /* These will all affect bits outside the field and need
16780 adjustment via MASK within the loop. */
16784 gcc_unreachable ();
16787 /* Prepare to adjust the return value. */
16788 before
= gen_reg_rtx (SImode
);
16790 after
= gen_reg_rtx (SImode
);
16791 store_mode
= mode
= SImode
;
16795 mem
= rs6000_pre_atomic_barrier (mem
, model
);
16797 label
= gen_label_rtx ();
16798 emit_label (label
);
16799 label
= gen_rtx_LABEL_REF (VOIDmode
, label
);
16801 if (before
== NULL_RTX
)
16802 before
= gen_reg_rtx (mode
);
16804 emit_load_locked (mode
, before
, mem
);
16808 x
= expand_simple_binop (mode
, AND
, before
, val
,
16809 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16810 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
16814 after
= expand_simple_binop (mode
, code
, before
, val
,
16815 after
, 1, OPTAB_LIB_WIDEN
);
16821 x
= expand_simple_binop (SImode
, AND
, after
, mask
,
16822 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16823 x
= rs6000_mask_atomic_subword (before
, x
, mask
);
16825 else if (store_mode
!= mode
)
16826 x
= convert_modes (store_mode
, mode
, x
, 1);
16828 cond
= gen_reg_rtx (CCmode
);
16829 emit_store_conditional (store_mode
, cond
, mem
, x
);
16831 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16832 emit_unlikely_jump (x
, label
);
16834 rs6000_post_atomic_barrier (model
);
16838 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16839 then do the calcuations in a SImode register. */
16841 rs6000_finish_atomic_subword (orig_before
, before
, shift
);
16843 rs6000_finish_atomic_subword (orig_after
, after
, shift
);
16845 else if (store_mode
!= mode
)
16847 /* QImode/HImode on machines with lbarx/lharx where we do the native
16848 operation and then do the calcuations in a SImode register. */
16850 convert_move (orig_before
, before
, 1);
16852 convert_move (orig_after
, after
, 1);
16854 else if (orig_after
&& after
!= orig_after
)
16855 emit_move_insn (orig_after
, after
);
16858 static GTY(()) alias_set_type TOC_alias_set
= -1;
16861 get_TOC_alias_set (void)
16863 if (TOC_alias_set
== -1)
16864 TOC_alias_set
= new_alias_set ();
16865 return TOC_alias_set
;
16868 /* The mode the ABI uses for a word. This is not the same as word_mode
16869 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16871 static scalar_int_mode
16872 rs6000_abi_word_mode (void)
16874 return TARGET_32BIT
? SImode
: DImode
;
16877 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16879 rs6000_offload_options (void)
16882 return xstrdup ("-foffload-abi=lp64");
16884 return xstrdup ("-foffload-abi=ilp32");
16888 /* A quick summary of the various types of 'constant-pool tables'
16891 Target Flags Name One table per
16892 AIX (none) AIX TOC object file
16893 AIX -mfull-toc AIX TOC object file
16894 AIX -mminimal-toc AIX minimal TOC translation unit
16895 SVR4/EABI (none) SVR4 SDATA object file
16896 SVR4/EABI -fpic SVR4 pic object file
16897 SVR4/EABI -fPIC SVR4 PIC translation unit
16898 SVR4/EABI -mrelocatable EABI TOC function
16899 SVR4/EABI -maix AIX TOC object file
16900 SVR4/EABI -maix -mminimal-toc
16901 AIX minimal TOC translation unit
16903 Name Reg. Set by entries contains:
16904 made by addrs? fp? sum?
16906 AIX TOC 2 crt0 as Y option option
16907 AIX minimal TOC 30 prolog gcc Y Y option
16908 SVR4 SDATA 13 crt0 gcc N Y N
16909 SVR4 pic 30 prolog ld Y not yet N
16910 SVR4 PIC 30 prolog gcc Y option option
16911 EABI TOC 30 prolog gcc Y option option
16915 /* Hash functions for the hash table. */
16918 rs6000_hash_constant (rtx k
)
16920 enum rtx_code code
= GET_CODE (k
);
16921 machine_mode mode
= GET_MODE (k
);
16922 unsigned result
= (code
<< 3) ^ mode
;
16923 const char *format
;
16926 format
= GET_RTX_FORMAT (code
);
16927 flen
= strlen (format
);
16933 return result
* 1231 + (unsigned) INSN_UID (XEXP (k
, 0));
16935 case CONST_WIDE_INT
:
16938 flen
= CONST_WIDE_INT_NUNITS (k
);
16939 for (i
= 0; i
< flen
; i
++)
16940 result
= result
* 613 + CONST_WIDE_INT_ELT (k
, i
);
16945 return real_hash (CONST_DOUBLE_REAL_VALUE (k
)) * result
;
16955 for (; fidx
< flen
; fidx
++)
16956 switch (format
[fidx
])
16961 const char *str
= XSTR (k
, fidx
);
16962 len
= strlen (str
);
16963 result
= result
* 613 + len
;
16964 for (i
= 0; i
< len
; i
++)
16965 result
= result
* 613 + (unsigned) str
[i
];
16970 result
= result
* 1231 + rs6000_hash_constant (XEXP (k
, fidx
));
16974 result
= result
* 613 + (unsigned) XINT (k
, fidx
);
16977 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT
))
16978 result
= result
* 613 + (unsigned) XWINT (k
, fidx
);
16982 for (i
= 0; i
< sizeof (HOST_WIDE_INT
) / sizeof (unsigned); i
++)
16983 result
= result
* 613 + (unsigned) (XWINT (k
, fidx
)
16990 gcc_unreachable ();
16997 toc_hasher::hash (toc_hash_struct
*thc
)
16999 return rs6000_hash_constant (thc
->key
) ^ thc
->key_mode
;
17002 /* Compare H1 and H2 for equivalence. */
17005 toc_hasher::equal (toc_hash_struct
*h1
, toc_hash_struct
*h2
)
17010 if (h1
->key_mode
!= h2
->key_mode
)
17013 return rtx_equal_p (r1
, r2
);
17016 /* These are the names given by the C++ front-end to vtables, and
17017 vtable-like objects. Ideally, this logic should not be here;
17018 instead, there should be some programmatic way of inquiring as
17019 to whether or not an object is a vtable. */
17021 #define VTABLE_NAME_P(NAME) \
17022 (startswith (name, "_vt.") \
17023 || startswith (name, "_ZTV") \
17024 || startswith (name, "_ZTT") \
17025 || startswith (name, "_ZTI") \
17026 || startswith (name, "_ZTC"))
17028 #ifdef NO_DOLLAR_IN_LABEL
17029 /* Return a GGC-allocated character string translating dollar signs in
17030 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17033 rs6000_xcoff_strip_dollar (const char *name
)
17039 q
= (const char *) strchr (name
, '$');
17041 if (q
== 0 || q
== name
)
17044 len
= strlen (name
);
17045 strip
= XALLOCAVEC (char, len
+ 1);
17046 strcpy (strip
, name
);
17047 p
= strip
+ (q
- name
);
17051 p
= strchr (p
+ 1, '$');
17054 return ggc_alloc_string (strip
, len
);
17059 rs6000_output_symbol_ref (FILE *file
, rtx x
)
17061 const char *name
= XSTR (x
, 0);
17063 /* Currently C++ toc references to vtables can be emitted before it
17064 is decided whether the vtable is public or private. If this is
17065 the case, then the linker will eventually complain that there is
17066 a reference to an unknown section. Thus, for vtables only,
17067 we emit the TOC reference to reference the identifier and not the
17069 if (VTABLE_NAME_P (name
))
17071 RS6000_OUTPUT_BASENAME (file
, name
);
17074 assemble_name (file
, name
);
17077 /* Output a TOC entry. We derive the entry name from what is being
17081 output_toc (FILE *file
, rtx x
, int labelno
, machine_mode mode
)
17084 const char *name
= buf
;
17086 HOST_WIDE_INT offset
= 0;
17088 gcc_assert (!TARGET_NO_TOC_OR_PCREL
);
17090 /* When the linker won't eliminate them, don't output duplicate
17091 TOC entries (this happens on AIX if there is any kind of TOC,
17092 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17094 if (TARGET_TOC
&& GET_CODE (x
) != LABEL_REF
)
17096 struct toc_hash_struct
*h
;
17098 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17099 time because GGC is not initialized at that point. */
17100 if (toc_hash_table
== NULL
)
17101 toc_hash_table
= hash_table
<toc_hasher
>::create_ggc (1021);
17103 h
= ggc_alloc
<toc_hash_struct
> ();
17105 h
->key_mode
= mode
;
17106 h
->labelno
= labelno
;
17108 toc_hash_struct
**found
= toc_hash_table
->find_slot (h
, INSERT
);
17109 if (*found
== NULL
)
17111 else /* This is indeed a duplicate.
17112 Set this label equal to that label. */
17114 fputs ("\t.set ", file
);
17115 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17116 fprintf (file
, "%d,", labelno
);
17117 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17118 fprintf (file
, "%d\n", ((*found
)->labelno
));
17121 if (TARGET_XCOFF
&& SYMBOL_REF_P (x
)
17122 && (SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_GLOBAL_DYNAMIC
17123 || SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
))
17125 fputs ("\t.set ", file
);
17126 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17127 fprintf (file
, "%d,", labelno
);
17128 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17129 fprintf (file
, "%d\n", ((*found
)->labelno
));
17136 /* If we're going to put a double constant in the TOC, make sure it's
17137 aligned properly when strict alignment is on. */
17138 if ((CONST_DOUBLE_P (x
) || CONST_WIDE_INT_P (x
))
17139 && STRICT_ALIGNMENT
17140 && GET_MODE_BITSIZE (mode
) >= 64
17141 && ! (TARGET_NO_FP_IN_TOC
&& ! TARGET_MINIMAL_TOC
)) {
17142 ASM_OUTPUT_ALIGN (file
, 3);
17145 (*targetm
.asm_out
.internal_label
) (file
, "LC", labelno
);
17147 /* Handle FP constants specially. Note that if we have a minimal
17148 TOC, things we put here aren't actually in the TOC, so we can allow
17150 if (CONST_DOUBLE_P (x
)
17151 && (GET_MODE (x
) == TFmode
|| GET_MODE (x
) == TDmode
17152 || GET_MODE (x
) == IFmode
|| GET_MODE (x
) == KFmode
))
17156 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17157 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17159 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17163 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17164 fputs (DOUBLE_INT_ASM_OP
, file
);
17166 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17167 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17168 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17169 fprintf (file
, "0x%lx%08lx,0x%lx%08lx\n",
17170 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17171 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff,
17172 k
[WORDS_BIG_ENDIAN
? 2 : 3] & 0xffffffff,
17173 k
[WORDS_BIG_ENDIAN
? 3 : 2] & 0xffffffff);
17178 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17179 fputs ("\t.long ", file
);
17181 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17182 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17183 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17184 fprintf (file
, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17185 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17186 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17190 else if (CONST_DOUBLE_P (x
)
17191 && (GET_MODE (x
) == DFmode
|| GET_MODE (x
) == DDmode
))
17195 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17196 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17198 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17202 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17203 fputs (DOUBLE_INT_ASM_OP
, file
);
17205 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17206 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17207 fprintf (file
, "0x%lx%08lx\n",
17208 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17209 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff);
17214 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17215 fputs ("\t.long ", file
);
17217 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17218 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17219 fprintf (file
, "0x%lx,0x%lx\n",
17220 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17224 else if (CONST_DOUBLE_P (x
)
17225 && (GET_MODE (x
) == SFmode
|| GET_MODE (x
) == SDmode
))
17229 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17230 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17232 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17236 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17237 fputs (DOUBLE_INT_ASM_OP
, file
);
17239 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17240 if (WORDS_BIG_ENDIAN
)
17241 fprintf (file
, "0x%lx00000000\n", l
& 0xffffffff);
17243 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17248 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17249 fputs ("\t.long ", file
);
17251 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17252 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17256 else if (GET_MODE (x
) == VOIDmode
&& CONST_INT_P (x
))
17258 unsigned HOST_WIDE_INT low
;
17259 HOST_WIDE_INT high
;
17261 low
= INTVAL (x
) & 0xffffffff;
17262 high
= (HOST_WIDE_INT
) INTVAL (x
) >> 32;
17264 /* TOC entries are always Pmode-sized, so when big-endian
17265 smaller integer constants in the TOC need to be padded.
17266 (This is still a win over putting the constants in
17267 a separate constant pool, because then we'd have
17268 to have both a TOC entry _and_ the actual constant.)
17270 For a 32-bit target, CONST_INT values are loaded and shifted
17271 entirely within `low' and can be stored in one TOC entry. */
17273 /* It would be easy to make this work, but it doesn't now. */
17274 gcc_assert (!TARGET_64BIT
|| POINTER_SIZE
>= GET_MODE_BITSIZE (mode
));
17276 if (WORDS_BIG_ENDIAN
&& POINTER_SIZE
> GET_MODE_BITSIZE (mode
))
17279 low
<<= POINTER_SIZE
- GET_MODE_BITSIZE (mode
);
17280 high
= (HOST_WIDE_INT
) low
>> 32;
17286 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17287 fputs (DOUBLE_INT_ASM_OP
, file
);
17289 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17290 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17291 fprintf (file
, "0x%lx%08lx\n",
17292 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17297 if (POINTER_SIZE
< GET_MODE_BITSIZE (mode
))
17299 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17300 fputs ("\t.long ", file
);
17302 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17303 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17304 fprintf (file
, "0x%lx,0x%lx\n",
17305 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17309 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17310 fputs ("\t.long ", file
);
17312 fprintf (file
, "\t.tc IS_%lx[TC],", (long) low
& 0xffffffff);
17313 fprintf (file
, "0x%lx\n", (long) low
& 0xffffffff);
17319 if (GET_CODE (x
) == CONST
)
17321 gcc_assert (GET_CODE (XEXP (x
, 0)) == PLUS
17322 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)));
17324 base
= XEXP (XEXP (x
, 0), 0);
17325 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
17328 switch (GET_CODE (base
))
17331 name
= XSTR (base
, 0);
17335 ASM_GENERATE_INTERNAL_LABEL (buf
, "L",
17336 CODE_LABEL_NUMBER (XEXP (base
, 0)));
17340 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (base
));
17344 gcc_unreachable ();
17347 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17348 fputs (TARGET_32BIT
? "\t.long " : DOUBLE_INT_ASM_OP
, file
);
17351 fputs ("\t.tc ", file
);
17352 RS6000_OUTPUT_BASENAME (file
, name
);
17355 fprintf (file
, ".N" HOST_WIDE_INT_PRINT_UNSIGNED
, - offset
);
17357 fprintf (file
, ".P" HOST_WIDE_INT_PRINT_UNSIGNED
, offset
);
17359 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17360 after other TOC symbols, reducing overflow of small TOC access
17361 to [TC] symbols. */
17362 fputs (TARGET_XCOFF
&& TARGET_CMODEL
!= CMODEL_SMALL
17363 ? "[TE]," : "[TC],", file
);
17366 /* Currently C++ toc references to vtables can be emitted before it
17367 is decided whether the vtable is public or private. If this is
17368 the case, then the linker will eventually complain that there is
17369 a TOC reference to an unknown section. Thus, for vtables only,
17370 we emit the TOC reference to reference the symbol and not the
17372 if (VTABLE_NAME_P (name
))
17374 RS6000_OUTPUT_BASENAME (file
, name
);
17376 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
);
17377 else if (offset
> 0)
17378 fprintf (file
, "+" HOST_WIDE_INT_PRINT_DEC
, offset
);
17381 output_addr_const (file
, x
);
17384 if (TARGET_XCOFF
&& SYMBOL_REF_P (base
))
17386 switch (SYMBOL_REF_TLS_MODEL (base
))
17390 case TLS_MODEL_LOCAL_EXEC
:
17391 fputs ("@le", file
);
17393 case TLS_MODEL_INITIAL_EXEC
:
17394 fputs ("@ie", file
);
17396 /* Use global-dynamic for local-dynamic. */
17397 case TLS_MODEL_GLOBAL_DYNAMIC
:
17398 case TLS_MODEL_LOCAL_DYNAMIC
:
17400 (*targetm
.asm_out
.internal_label
) (file
, "LCM", labelno
);
17401 fputs ("\t.tc .", file
);
17402 RS6000_OUTPUT_BASENAME (file
, name
);
17403 fputs ("[TC],", file
);
17404 output_addr_const (file
, x
);
17405 fputs ("@m", file
);
17408 gcc_unreachable ();
17416 /* Output an assembler pseudo-op to write an ASCII string of N characters
17417 starting at P to FILE.
17419 On the RS/6000, we have to do this using the .byte operation and
17420 write out special characters outside the quoted string.
17421 Also, the assembler is broken; very long strings are truncated,
17422 so we must artificially break them up early. */
17425 output_ascii (FILE *file
, const char *p
, int n
)
17428 int i
, count_string
;
17429 const char *for_string
= "\t.byte \"";
17430 const char *for_decimal
= "\t.byte ";
17431 const char *to_close
= NULL
;
17434 for (i
= 0; i
< n
; i
++)
17437 if (c
>= ' ' && c
< 0177)
17440 fputs (for_string
, file
);
17443 /* Write two quotes to get one. */
17451 for_decimal
= "\"\n\t.byte ";
17455 if (count_string
>= 512)
17457 fputs (to_close
, file
);
17459 for_string
= "\t.byte \"";
17460 for_decimal
= "\t.byte ";
17468 fputs (for_decimal
, file
);
17469 fprintf (file
, "%d", c
);
17471 for_string
= "\n\t.byte \"";
17472 for_decimal
= ", ";
17478 /* Now close the string if we have written one. Then end the line. */
17480 fputs (to_close
, file
);
17483 /* Generate a unique section name for FILENAME for a section type
17484 represented by SECTION_DESC. Output goes into BUF.
17486 SECTION_DESC can be any string, as long as it is different for each
17487 possible section type.
17489 We name the section in the same manner as xlc. The name begins with an
17490 underscore followed by the filename (after stripping any leading directory
17491 names) with the last period replaced by the string SECTION_DESC. If
17492 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17496 rs6000_gen_section_name (char **buf
, const char *filename
,
17497 const char *section_desc
)
17499 const char *q
, *after_last_slash
, *last_period
= 0;
17503 after_last_slash
= filename
;
17504 for (q
= filename
; *q
; q
++)
17507 after_last_slash
= q
+ 1;
17508 else if (*q
== '.')
17512 len
= strlen (after_last_slash
) + strlen (section_desc
) + 2;
17513 *buf
= (char *) xmalloc (len
);
17518 for (q
= after_last_slash
; *q
; q
++)
17520 if (q
== last_period
)
17522 strcpy (p
, section_desc
);
17523 p
+= strlen (section_desc
);
17527 else if (ISALNUM (*q
))
17531 if (last_period
== 0)
17532 strcpy (p
, section_desc
);
17537 /* Emit profile function. */
17540 output_profile_hook (int labelno ATTRIBUTE_UNUSED
)
17542 /* Non-standard profiling for kernels, which just saves LR then calls
17543 _mcount without worrying about arg saves. The idea is to change
17544 the function prologue as little as possible as it isn't easy to
17545 account for arg save/restore code added just for _mcount. */
17546 if (TARGET_PROFILE_KERNEL
)
17549 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
17551 #ifndef NO_PROFILE_COUNTERS
17552 # define NO_PROFILE_COUNTERS 0
17554 if (NO_PROFILE_COUNTERS
)
17555 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17556 LCT_NORMAL
, VOIDmode
);
17560 const char *label_name
;
17563 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
17564 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
17565 fun
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
17567 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17568 LCT_NORMAL
, VOIDmode
, fun
, Pmode
);
17571 else if (DEFAULT_ABI
== ABI_DARWIN
)
17573 const char *mcount_name
= RS6000_MCOUNT
;
17574 int caller_addr_regno
= LR_REGNO
;
17576 /* Be conservative and always set this, at least for now. */
17577 crtl
->uses_pic_offset_table
= 1;
17580 /* For PIC code, set up a stub and collect the caller's address
17581 from r0, which is where the prologue puts it. */
17582 if (MACHOPIC_INDIRECT
17583 && crtl
->uses_pic_offset_table
)
17584 caller_addr_regno
= 0;
17586 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, mcount_name
),
17587 LCT_NORMAL
, VOIDmode
,
17588 gen_rtx_REG (Pmode
, caller_addr_regno
), Pmode
);
17592 /* Write function profiler code. */
17595 output_function_profiler (FILE *file
, int labelno
)
17599 switch (DEFAULT_ABI
)
17602 gcc_unreachable ();
17607 warning (0, "no profiling of 64-bit code for this ABI");
17610 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
17611 fprintf (file
, "\tmflr %s\n", reg_names
[0]);
17612 if (NO_PROFILE_COUNTERS
)
17614 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17615 reg_names
[0], reg_names
[1]);
17617 else if (TARGET_SECURE_PLT
&& flag_pic
)
17619 if (TARGET_LINK_STACK
)
17622 get_ppc476_thunk_name (name
);
17623 asm_fprintf (file
, "\tbl %s\n", name
);
17626 asm_fprintf (file
, "\tbcl 20,31,1f\n1:\n");
17627 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17628 reg_names
[0], reg_names
[1]);
17629 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
17630 asm_fprintf (file
, "\taddis %s,%s,",
17631 reg_names
[12], reg_names
[12]);
17632 assemble_name (file
, buf
);
17633 asm_fprintf (file
, "-1b@ha\n\tla %s,", reg_names
[0]);
17634 assemble_name (file
, buf
);
17635 asm_fprintf (file
, "-1b@l(%s)\n", reg_names
[12]);
17637 else if (flag_pic
== 1)
17639 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file
);
17640 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17641 reg_names
[0], reg_names
[1]);
17642 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
17643 asm_fprintf (file
, "\tlwz %s,", reg_names
[0]);
17644 assemble_name (file
, buf
);
17645 asm_fprintf (file
, "@got(%s)\n", reg_names
[12]);
17647 else if (flag_pic
> 1)
17649 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17650 reg_names
[0], reg_names
[1]);
17651 /* Now, we need to get the address of the label. */
17652 if (TARGET_LINK_STACK
)
17655 get_ppc476_thunk_name (name
);
17656 asm_fprintf (file
, "\tbl %s\n\tb 1f\n\t.long ", name
);
17657 assemble_name (file
, buf
);
17658 fputs ("-.\n1:", file
);
17659 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
17660 asm_fprintf (file
, "\taddi %s,%s,4\n",
17661 reg_names
[11], reg_names
[11]);
17665 fputs ("\tbcl 20,31,1f\n\t.long ", file
);
17666 assemble_name (file
, buf
);
17667 fputs ("-.\n1:", file
);
17668 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
17670 asm_fprintf (file
, "\tlwz %s,0(%s)\n",
17671 reg_names
[0], reg_names
[11]);
17672 asm_fprintf (file
, "\tadd %s,%s,%s\n",
17673 reg_names
[0], reg_names
[0], reg_names
[11]);
17677 asm_fprintf (file
, "\tlis %s,", reg_names
[12]);
17678 assemble_name (file
, buf
);
17679 fputs ("@ha\n", file
);
17680 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17681 reg_names
[0], reg_names
[1]);
17682 asm_fprintf (file
, "\tla %s,", reg_names
[0]);
17683 assemble_name (file
, buf
);
17684 asm_fprintf (file
, "@l(%s)\n", reg_names
[12]);
17687 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17688 fprintf (file
, "\tbl %s%s\n",
17689 RS6000_MCOUNT
, flag_pic
? "@plt" : "");
17695 /* Don't do anything, done in output_profile_hook (). */
17702 /* The following variable value is the last issued insn. */
17704 static rtx_insn
*last_scheduled_insn
;
17706 /* The following variable helps to balance issuing of load and
17707 store instructions */
17709 static int load_store_pendulum
;
17711 /* The following variable helps pair divide insns during scheduling. */
17712 static int divide_cnt
;
17713 /* The following variable helps pair and alternate vector and vector load
17714 insns during scheduling. */
17715 static int vec_pairing
;
17718 /* Power4 load update and store update instructions are cracked into a
17719 load or store and an integer insn which are executed in the same cycle.
17720 Branches have their own dispatch slot which does not count against the
17721 GCC issue rate, but it changes the program flow so there are no other
17722 instructions to issue in this cycle. */
17725 rs6000_variable_issue_1 (rtx_insn
*insn
, int more
)
17727 last_scheduled_insn
= insn
;
17728 if (GET_CODE (PATTERN (insn
)) == USE
17729 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17731 cached_can_issue_more
= more
;
17732 return cached_can_issue_more
;
17735 if (insn_terminates_group_p (insn
, current_group
))
17737 cached_can_issue_more
= 0;
17738 return cached_can_issue_more
;
17741 /* If no reservation, but reach here */
17742 if (recog_memoized (insn
) < 0)
17745 if (rs6000_sched_groups
)
17747 if (is_microcoded_insn (insn
))
17748 cached_can_issue_more
= 0;
17749 else if (is_cracked_insn (insn
))
17750 cached_can_issue_more
= more
> 2 ? more
- 2 : 0;
17752 cached_can_issue_more
= more
- 1;
17754 return cached_can_issue_more
;
17757 if (rs6000_tune
== PROCESSOR_CELL
&& is_nonpipeline_insn (insn
))
17760 cached_can_issue_more
= more
- 1;
17761 return cached_can_issue_more
;
17765 rs6000_variable_issue (FILE *stream
, int verbose
, rtx_insn
*insn
, int more
)
17767 int r
= rs6000_variable_issue_1 (insn
, more
);
17769 fprintf (stream
, "// rs6000_variable_issue (more = %d) = %d\n", more
, r
);
17773 /* Adjust the cost of a scheduling dependency. Return the new cost of
17774 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
17777 rs6000_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
17780 enum attr_type attr_type
;
17782 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
17789 /* Data dependency; DEP_INSN writes a register that INSN reads
17790 some cycles later. */
17792 /* Separate a load from a narrower, dependent store. */
17793 if ((rs6000_sched_groups
|| rs6000_tune
== PROCESSOR_POWER9
17794 || rs6000_tune
== PROCESSOR_POWER10
)
17795 && GET_CODE (PATTERN (insn
)) == SET
17796 && GET_CODE (PATTERN (dep_insn
)) == SET
17797 && MEM_P (XEXP (PATTERN (insn
), 1))
17798 && MEM_P (XEXP (PATTERN (dep_insn
), 0))
17799 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn
), 1)))
17800 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn
), 0)))))
17803 attr_type
= get_attr_type (insn
);
17808 /* Tell the first scheduling pass about the latency between
17809 a mtctr and bctr (and mtlr and br/blr). The first
17810 scheduling pass will not know about this latency since
17811 the mtctr instruction, which has the latency associated
17812 to it, will be generated by reload. */
17815 /* Leave some extra cycles between a compare and its
17816 dependent branch, to inhibit expensive mispredicts. */
17817 if ((rs6000_tune
== PROCESSOR_PPC603
17818 || rs6000_tune
== PROCESSOR_PPC604
17819 || rs6000_tune
== PROCESSOR_PPC604e
17820 || rs6000_tune
== PROCESSOR_PPC620
17821 || rs6000_tune
== PROCESSOR_PPC630
17822 || rs6000_tune
== PROCESSOR_PPC750
17823 || rs6000_tune
== PROCESSOR_PPC7400
17824 || rs6000_tune
== PROCESSOR_PPC7450
17825 || rs6000_tune
== PROCESSOR_PPCE5500
17826 || rs6000_tune
== PROCESSOR_PPCE6500
17827 || rs6000_tune
== PROCESSOR_POWER4
17828 || rs6000_tune
== PROCESSOR_POWER5
17829 || rs6000_tune
== PROCESSOR_POWER7
17830 || rs6000_tune
== PROCESSOR_POWER8
17831 || rs6000_tune
== PROCESSOR_POWER9
17832 || rs6000_tune
== PROCESSOR_POWER10
17833 || rs6000_tune
== PROCESSOR_CELL
)
17834 && recog_memoized (dep_insn
)
17835 && (INSN_CODE (dep_insn
) >= 0))
17837 switch (get_attr_type (dep_insn
))
17840 case TYPE_FPCOMPARE
:
17841 case TYPE_CR_LOGICAL
:
17845 if (get_attr_dot (dep_insn
) == DOT_YES
)
17850 if (get_attr_dot (dep_insn
) == DOT_YES
17851 && get_attr_var_shift (dep_insn
) == VAR_SHIFT_NO
)
17862 if ((rs6000_tune
== PROCESSOR_POWER6
)
17863 && recog_memoized (dep_insn
)
17864 && (INSN_CODE (dep_insn
) >= 0))
17867 if (GET_CODE (PATTERN (insn
)) != SET
)
17868 /* If this happens, we have to extend this to schedule
17869 optimally. Return default for now. */
17872 /* Adjust the cost for the case where the value written
17873 by a fixed point operation is used as the address
17874 gen value on a store. */
17875 switch (get_attr_type (dep_insn
))
17880 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17881 return get_attr_sign_extend (dep_insn
)
17882 == SIGN_EXTEND_YES
? 6 : 4;
17887 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17888 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
17898 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17906 if (get_attr_update (dep_insn
) == UPDATE_YES
17907 && ! rs6000_store_data_bypass_p (dep_insn
, insn
))
17913 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17919 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17920 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
17930 if ((rs6000_tune
== PROCESSOR_POWER6
)
17931 && recog_memoized (dep_insn
)
17932 && (INSN_CODE (dep_insn
) >= 0))
17935 /* Adjust the cost for the case where the value written
17936 by a fixed point instruction is used within the address
17937 gen portion of a subsequent load(u)(x) */
17938 switch (get_attr_type (dep_insn
))
17943 if (set_to_load_agen (dep_insn
, insn
))
17944 return get_attr_sign_extend (dep_insn
)
17945 == SIGN_EXTEND_YES
? 6 : 4;
17950 if (set_to_load_agen (dep_insn
, insn
))
17951 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
17961 if (set_to_load_agen (dep_insn
, insn
))
17969 if (get_attr_update (dep_insn
) == UPDATE_YES
17970 && set_to_load_agen (dep_insn
, insn
))
17976 if (set_to_load_agen (dep_insn
, insn
))
17982 if (set_to_load_agen (dep_insn
, insn
))
17983 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
17996 /* Fall out to return default cost. */
18000 case REG_DEP_OUTPUT
:
18001 /* Output dependency; DEP_INSN writes a register that INSN writes some
18003 if ((rs6000_tune
== PROCESSOR_POWER6
)
18004 && recog_memoized (dep_insn
)
18005 && (INSN_CODE (dep_insn
) >= 0))
18007 attr_type
= get_attr_type (insn
);
18012 case TYPE_FPSIMPLE
:
18013 if (get_attr_type (dep_insn
) == TYPE_FP
18014 || get_attr_type (dep_insn
) == TYPE_FPSIMPLE
)
18021 /* Fall through, no cost for output dependency. */
18025 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18030 gcc_unreachable ();
18036 /* Debug version of rs6000_adjust_cost. */
18039 rs6000_debug_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
18040 int cost
, unsigned int dw
)
18042 int ret
= rs6000_adjust_cost (insn
, dep_type
, dep_insn
, cost
, dw
);
18050 default: dep
= "unknown depencency"; break;
18051 case REG_DEP_TRUE
: dep
= "data dependency"; break;
18052 case REG_DEP_OUTPUT
: dep
= "output dependency"; break;
18053 case REG_DEP_ANTI
: dep
= "anti depencency"; break;
18057 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18058 "%s, insn:\n", ret
, cost
, dep
);
18066 /* The function returns a true if INSN is microcoded.
18067 Return false otherwise. */
18070 is_microcoded_insn (rtx_insn
*insn
)
18072 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18073 || GET_CODE (PATTERN (insn
)) == USE
18074 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18077 if (rs6000_tune
== PROCESSOR_CELL
)
18078 return get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
;
18080 if (rs6000_sched_groups
18081 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18083 enum attr_type type
= get_attr_type (insn
);
18084 if ((type
== TYPE_LOAD
18085 && get_attr_update (insn
) == UPDATE_YES
18086 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
18087 || ((type
== TYPE_LOAD
|| type
== TYPE_STORE
)
18088 && get_attr_update (insn
) == UPDATE_YES
18089 && get_attr_indexed (insn
) == INDEXED_YES
)
18090 || type
== TYPE_MFCR
)
18097 /* The function returns true if INSN is cracked into 2 instructions
18098 by the processor (and therefore occupies 2 issue slots). */
18101 is_cracked_insn (rtx_insn
*insn
)
18103 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18104 || GET_CODE (PATTERN (insn
)) == USE
18105 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18108 if (rs6000_sched_groups
18109 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18111 enum attr_type type
= get_attr_type (insn
);
18112 if ((type
== TYPE_LOAD
18113 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
18114 && get_attr_update (insn
) == UPDATE_NO
)
18115 || (type
== TYPE_LOAD
18116 && get_attr_sign_extend (insn
) == SIGN_EXTEND_NO
18117 && get_attr_update (insn
) == UPDATE_YES
18118 && get_attr_indexed (insn
) == INDEXED_NO
)
18119 || (type
== TYPE_STORE
18120 && get_attr_update (insn
) == UPDATE_YES
18121 && get_attr_indexed (insn
) == INDEXED_NO
)
18122 || ((type
== TYPE_FPLOAD
|| type
== TYPE_FPSTORE
)
18123 && get_attr_update (insn
) == UPDATE_YES
)
18124 || (type
== TYPE_CR_LOGICAL
18125 && get_attr_cr_logical_3op (insn
) == CR_LOGICAL_3OP_YES
)
18126 || (type
== TYPE_EXTS
18127 && get_attr_dot (insn
) == DOT_YES
)
18128 || (type
== TYPE_SHIFT
18129 && get_attr_dot (insn
) == DOT_YES
18130 && get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
18131 || (type
== TYPE_MUL
18132 && get_attr_dot (insn
) == DOT_YES
)
18133 || type
== TYPE_DIV
18134 || (type
== TYPE_INSERT
18135 && get_attr_size (insn
) == SIZE_32
))
18142 /* The function returns true if INSN can be issued only from
18143 the branch slot. */
18146 is_branch_slot_insn (rtx_insn
*insn
)
18148 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18149 || GET_CODE (PATTERN (insn
)) == USE
18150 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18153 if (rs6000_sched_groups
)
18155 enum attr_type type
= get_attr_type (insn
);
18156 if (type
== TYPE_BRANCH
|| type
== TYPE_JMPREG
)
18164 /* The function returns true if out_inst sets a value that is
18165 used in the address generation computation of in_insn */
18167 set_to_load_agen (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
18169 rtx out_set
, in_set
;
18171 /* For performance reasons, only handle the simple case where
18172 both loads are a single_set. */
18173 out_set
= single_set (out_insn
);
18176 in_set
= single_set (in_insn
);
18178 return reg_mentioned_p (SET_DEST (out_set
), SET_SRC (in_set
));
18184 /* Try to determine base/offset/size parts of the given MEM.
18185 Return true if successful, false if all the values couldn't
18188 This function only looks for REG or REG+CONST address forms.
18189 REG+REG address form will return false. */
18192 get_memref_parts (rtx mem
, rtx
*base
, HOST_WIDE_INT
*offset
,
18193 HOST_WIDE_INT
*size
)
18196 if MEM_SIZE_KNOWN_P (mem
)
18197 *size
= MEM_SIZE (mem
);
18201 addr_rtx
= (XEXP (mem
, 0));
18202 if (GET_CODE (addr_rtx
) == PRE_MODIFY
)
18203 addr_rtx
= XEXP (addr_rtx
, 1);
18206 while (GET_CODE (addr_rtx
) == PLUS
18207 && CONST_INT_P (XEXP (addr_rtx
, 1)))
18209 *offset
+= INTVAL (XEXP (addr_rtx
, 1));
18210 addr_rtx
= XEXP (addr_rtx
, 0);
18212 if (!REG_P (addr_rtx
))
18219 /* If the target storage locations of arguments MEM1 and MEM2 are
18220 adjacent, then return the argument that has the lower address.
18221 Otherwise, return NULL_RTX. */
18224 adjacent_mem_locations (rtx mem1
, rtx mem2
)
18227 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18231 && get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18232 && get_memref_parts (mem2
, ®2
, &off2
, &size2
)
18233 && REGNO (reg1
) == REGNO (reg2
))
18235 if (off1
+ size1
== off2
)
18237 else if (off2
+ size2
== off1
)
18244 /* This function returns true if it can be determined that the two MEM
18245 locations overlap by at least 1 byte based on base reg/offset/size. */
18248 mem_locations_overlap (rtx mem1
, rtx mem2
)
18251 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18253 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18254 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
18255 return ((REGNO (reg1
) == REGNO (reg2
))
18256 && (((off1
<= off2
) && (off1
+ size1
> off2
))
18257 || ((off2
<= off1
) && (off2
+ size2
> off1
))));
18262 /* A C statement (sans semicolon) to update the integer scheduling
18263 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18264 INSN earlier, reduce the priority to execute INSN later. Do not
18265 define this macro if you do not need to adjust the scheduling
18266 priorities of insns. */
18269 rs6000_adjust_priority (rtx_insn
*insn ATTRIBUTE_UNUSED
, int priority
)
18271 rtx load_mem
, str_mem
;
18272 /* On machines (like the 750) which have asymmetric integer units,
18273 where one integer unit can do multiply and divides and the other
18274 can't, reduce the priority of multiply/divide so it is scheduled
18275 before other integer operations. */
18278 if (! INSN_P (insn
))
18281 if (GET_CODE (PATTERN (insn
)) == USE
)
18284 switch (rs6000_tune
) {
18285 case PROCESSOR_PPC750
:
18286 switch (get_attr_type (insn
))
18293 fprintf (stderr
, "priority was %#x (%d) before adjustment\n",
18294 priority
, priority
);
18295 if (priority
>= 0 && priority
< 0x01000000)
18302 if (insn_must_be_first_in_group (insn
)
18303 && reload_completed
18304 && current_sched_info
->sched_max_insns_priority
18305 && rs6000_sched_restricted_insns_priority
)
18308 /* Prioritize insns that can be dispatched only in the first
18310 if (rs6000_sched_restricted_insns_priority
== 1)
18311 /* Attach highest priority to insn. This means that in
18312 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18313 precede 'priority' (critical path) considerations. */
18314 return current_sched_info
->sched_max_insns_priority
;
18315 else if (rs6000_sched_restricted_insns_priority
== 2)
18316 /* Increase priority of insn by a minimal amount. This means that in
18317 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18318 considerations precede dispatch-slot restriction considerations. */
18319 return (priority
+ 1);
18322 if (rs6000_tune
== PROCESSOR_POWER6
18323 && ((load_store_pendulum
== -2 && is_load_insn (insn
, &load_mem
))
18324 || (load_store_pendulum
== 2 && is_store_insn (insn
, &str_mem
))))
18325 /* Attach highest priority to insn if the scheduler has just issued two
18326 stores and this instruction is a load, or two loads and this instruction
18327 is a store. Power6 wants loads and stores scheduled alternately
18329 return current_sched_info
->sched_max_insns_priority
;
18334 /* Return true if the instruction is nonpipelined on the Cell. */
18336 is_nonpipeline_insn (rtx_insn
*insn
)
18338 enum attr_type type
;
18339 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18340 || GET_CODE (PATTERN (insn
)) == USE
18341 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18344 type
= get_attr_type (insn
);
18345 if (type
== TYPE_MUL
18346 || type
== TYPE_DIV
18347 || type
== TYPE_SDIV
18348 || type
== TYPE_DDIV
18349 || type
== TYPE_SSQRT
18350 || type
== TYPE_DSQRT
18351 || type
== TYPE_MFCR
18352 || type
== TYPE_MFCRF
18353 || type
== TYPE_MFJMPR
)
18361 /* Return how many instructions the machine can issue per cycle. */
18364 rs6000_issue_rate (void)
18366 /* Unless scheduling for register pressure, use issue rate of 1 for
18367 first scheduling pass to decrease degradation. */
18368 if (!reload_completed
&& !flag_sched_pressure
)
18371 switch (rs6000_tune
) {
18372 case PROCESSOR_RS64A
:
18373 case PROCESSOR_PPC601
: /* ? */
18374 case PROCESSOR_PPC7450
:
18376 case PROCESSOR_PPC440
:
18377 case PROCESSOR_PPC603
:
18378 case PROCESSOR_PPC750
:
18379 case PROCESSOR_PPC7400
:
18380 case PROCESSOR_PPC8540
:
18381 case PROCESSOR_PPC8548
:
18382 case PROCESSOR_CELL
:
18383 case PROCESSOR_PPCE300C2
:
18384 case PROCESSOR_PPCE300C3
:
18385 case PROCESSOR_PPCE500MC
:
18386 case PROCESSOR_PPCE500MC64
:
18387 case PROCESSOR_PPCE5500
:
18388 case PROCESSOR_PPCE6500
:
18389 case PROCESSOR_TITAN
:
18391 case PROCESSOR_PPC476
:
18392 case PROCESSOR_PPC604
:
18393 case PROCESSOR_PPC604e
:
18394 case PROCESSOR_PPC620
:
18395 case PROCESSOR_PPC630
:
18397 case PROCESSOR_POWER4
:
18398 case PROCESSOR_POWER5
:
18399 case PROCESSOR_POWER6
:
18400 case PROCESSOR_POWER7
:
18402 case PROCESSOR_POWER8
:
18404 case PROCESSOR_POWER9
:
18406 case PROCESSOR_POWER10
:
18413 /* Return how many instructions to look ahead for better insn
18417 rs6000_use_sched_lookahead (void)
18419 switch (rs6000_tune
)
18421 case PROCESSOR_PPC8540
:
18422 case PROCESSOR_PPC8548
:
18425 case PROCESSOR_CELL
:
18426 return (reload_completed
? 8 : 0);
18433 /* We are choosing insn from the ready queue. Return zero if INSN can be
18436 rs6000_use_sched_lookahead_guard (rtx_insn
*insn
, int ready_index
)
18438 if (ready_index
== 0)
18441 if (rs6000_tune
!= PROCESSOR_CELL
)
18444 gcc_assert (insn
!= NULL_RTX
&& INSN_P (insn
));
18446 if (!reload_completed
18447 || is_nonpipeline_insn (insn
)
18448 || is_microcoded_insn (insn
))
18454 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18455 and return true. */
18458 find_mem_ref (rtx pat
, rtx
*mem_ref
)
18463 /* stack_tie does not produce any real memory traffic. */
18464 if (tie_operand (pat
, VOIDmode
))
18473 /* Recursively process the pattern. */
18474 fmt
= GET_RTX_FORMAT (GET_CODE (pat
));
18476 for (i
= GET_RTX_LENGTH (GET_CODE (pat
)) - 1; i
>= 0; i
--)
18480 if (find_mem_ref (XEXP (pat
, i
), mem_ref
))
18483 else if (fmt
[i
] == 'E')
18484 for (j
= XVECLEN (pat
, i
) - 1; j
>= 0; j
--)
18486 if (find_mem_ref (XVECEXP (pat
, i
, j
), mem_ref
))
18494 /* Determine if PAT is a PATTERN of a load insn. */
18497 is_load_insn1 (rtx pat
, rtx
*load_mem
)
18499 if (!pat
|| pat
== NULL_RTX
)
18502 if (GET_CODE (pat
) == SET
)
18504 if (REG_P (SET_DEST (pat
)))
18505 return find_mem_ref (SET_SRC (pat
), load_mem
);
18510 if (GET_CODE (pat
) == PARALLEL
)
18514 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18515 if (is_load_insn1 (XVECEXP (pat
, 0, i
), load_mem
))
18522 /* Determine if INSN loads from memory. */
18525 is_load_insn (rtx insn
, rtx
*load_mem
)
18527 if (!insn
|| !INSN_P (insn
))
18533 return is_load_insn1 (PATTERN (insn
), load_mem
);
18536 /* Determine if PAT is a PATTERN of a store insn. */
18539 is_store_insn1 (rtx pat
, rtx
*str_mem
)
18541 if (!pat
|| pat
== NULL_RTX
)
18544 if (GET_CODE (pat
) == SET
)
18546 if (REG_P (SET_SRC (pat
)) || SUBREG_P (SET_SRC (pat
)))
18547 return find_mem_ref (SET_DEST (pat
), str_mem
);
18552 if (GET_CODE (pat
) == PARALLEL
)
18556 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18557 if (is_store_insn1 (XVECEXP (pat
, 0, i
), str_mem
))
18564 /* Determine if INSN stores to memory. */
18567 is_store_insn (rtx insn
, rtx
*str_mem
)
18569 if (!insn
|| !INSN_P (insn
))
18572 return is_store_insn1 (PATTERN (insn
), str_mem
);
18575 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18578 is_power9_pairable_vec_type (enum attr_type type
)
18582 case TYPE_VECSIMPLE
:
18583 case TYPE_VECCOMPLEX
:
18587 case TYPE_VECFLOAT
:
18589 case TYPE_VECDOUBLE
:
18597 /* Returns whether the dependence between INSN and NEXT is considered
18598 costly by the given target. */
18601 rs6000_is_costly_dependence (dep_t dep
, int cost
, int distance
)
18605 rtx load_mem
, str_mem
;
18607 /* If the flag is not enabled - no dependence is considered costly;
18608 allow all dependent insns in the same group.
18609 This is the most aggressive option. */
18610 if (rs6000_sched_costly_dep
== no_dep_costly
)
18613 /* If the flag is set to 1 - a dependence is always considered costly;
18614 do not allow dependent instructions in the same group.
18615 This is the most conservative option. */
18616 if (rs6000_sched_costly_dep
== all_deps_costly
)
18619 insn
= DEP_PRO (dep
);
18620 next
= DEP_CON (dep
);
18622 if (rs6000_sched_costly_dep
== store_to_load_dep_costly
18623 && is_load_insn (next
, &load_mem
)
18624 && is_store_insn (insn
, &str_mem
))
18625 /* Prevent load after store in the same group. */
18628 if (rs6000_sched_costly_dep
== true_store_to_load_dep_costly
18629 && is_load_insn (next
, &load_mem
)
18630 && is_store_insn (insn
, &str_mem
)
18631 && DEP_TYPE (dep
) == REG_DEP_TRUE
18632 && mem_locations_overlap(str_mem
, load_mem
))
18633 /* Prevent load after store in the same group if it is a true
18637 /* The flag is set to X; dependences with latency >= X are considered costly,
18638 and will not be scheduled in the same group. */
18639 if (rs6000_sched_costly_dep
<= max_dep_latency
18640 && ((cost
- distance
) >= (int)rs6000_sched_costly_dep
))
18646 /* Return the next insn after INSN that is found before TAIL is reached,
18647 skipping any "non-active" insns - insns that will not actually occupy
18648 an issue slot. Return NULL_RTX if such an insn is not found. */
18651 get_next_active_insn (rtx_insn
*insn
, rtx_insn
*tail
)
18653 if (insn
== NULL_RTX
|| insn
== tail
)
18658 insn
= NEXT_INSN (insn
);
18659 if (insn
== NULL_RTX
|| insn
== tail
)
18663 || JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
)
18664 || (NONJUMP_INSN_P (insn
)
18665 && GET_CODE (PATTERN (insn
)) != USE
18666 && GET_CODE (PATTERN (insn
)) != CLOBBER
18667 && INSN_CODE (insn
) != CODE_FOR_stack_tie
))
18673 /* Move instruction at POS to the end of the READY list. */
18676 move_to_end_of_ready (rtx_insn
**ready
, int pos
, int lastpos
)
18682 for (i
= pos
; i
< lastpos
; i
++)
18683 ready
[i
] = ready
[i
+ 1];
18684 ready
[lastpos
] = tmp
;
18687 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18690 power6_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
18692 /* For Power6, we need to handle some special cases to try and keep the
18693 store queue from overflowing and triggering expensive flushes.
18695 This code monitors how load and store instructions are being issued
18696 and skews the ready list one way or the other to increase the likelihood
18697 that a desired instruction is issued at the proper time.
18699 A couple of things are done. First, we maintain a "load_store_pendulum"
18700 to track the current state of load/store issue.
18702 - If the pendulum is at zero, then no loads or stores have been
18703 issued in the current cycle so we do nothing.
18705 - If the pendulum is 1, then a single load has been issued in this
18706 cycle and we attempt to locate another load in the ready list to
18709 - If the pendulum is -2, then two stores have already been
18710 issued in this cycle, so we increase the priority of the first load
18711 in the ready list to increase it's likelihood of being chosen first
18714 - If the pendulum is -1, then a single store has been issued in this
18715 cycle and we attempt to locate another store in the ready list to
18716 issue with it, preferring a store to an adjacent memory location to
18717 facilitate store pairing in the store queue.
18719 - If the pendulum is 2, then two loads have already been
18720 issued in this cycle, so we increase the priority of the first store
18721 in the ready list to increase it's likelihood of being chosen first
18724 - If the pendulum < -2 or > 2, then do nothing.
18726 Note: This code covers the most common scenarios. There exist non
18727 load/store instructions which make use of the LSU and which
18728 would need to be accounted for to strictly model the behavior
18729 of the machine. Those instructions are currently unaccounted
18730 for to help minimize compile time overhead of this code.
18733 rtx load_mem
, str_mem
;
18735 if (is_store_insn (last_scheduled_insn
, &str_mem
))
18736 /* Issuing a store, swing the load_store_pendulum to the left */
18737 load_store_pendulum
--;
18738 else if (is_load_insn (last_scheduled_insn
, &load_mem
))
18739 /* Issuing a load, swing the load_store_pendulum to the right */
18740 load_store_pendulum
++;
18742 return cached_can_issue_more
;
18744 /* If the pendulum is balanced, or there is only one instruction on
18745 the ready list, then all is well, so return. */
18746 if ((load_store_pendulum
== 0) || (lastpos
<= 0))
18747 return cached_can_issue_more
;
18749 if (load_store_pendulum
== 1)
18751 /* A load has been issued in this cycle. Scan the ready list
18752 for another load to issue with it */
18757 if (is_load_insn (ready
[pos
], &load_mem
))
18759 /* Found a load. Move it to the head of the ready list,
18760 and adjust it's priority so that it is more likely to
18762 move_to_end_of_ready (ready
, pos
, lastpos
);
18764 if (!sel_sched_p ()
18765 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
18766 INSN_PRIORITY (ready
[lastpos
])++;
18772 else if (load_store_pendulum
== -2)
18774 /* Two stores have been issued in this cycle. Increase the
18775 priority of the first load in the ready list to favor it for
18776 issuing in the next cycle. */
18781 if (is_load_insn (ready
[pos
], &load_mem
)
18783 && INSN_PRIORITY_KNOWN (ready
[pos
]))
18785 INSN_PRIORITY (ready
[pos
])++;
18787 /* Adjust the pendulum to account for the fact that a load
18788 was found and increased in priority. This is to prevent
18789 increasing the priority of multiple loads */
18790 load_store_pendulum
--;
18797 else if (load_store_pendulum
== -1)
18799 /* A store has been issued in this cycle. Scan the ready list for
18800 another store to issue with it, preferring a store to an adjacent
18802 int first_store_pos
= -1;
18808 if (is_store_insn (ready
[pos
], &str_mem
))
18811 /* Maintain the index of the first store found on the
18813 if (first_store_pos
== -1)
18814 first_store_pos
= pos
;
18816 if (is_store_insn (last_scheduled_insn
, &str_mem2
)
18817 && adjacent_mem_locations (str_mem
, str_mem2
))
18819 /* Found an adjacent store. Move it to the head of the
18820 ready list, and adjust it's priority so that it is
18821 more likely to stay there */
18822 move_to_end_of_ready (ready
, pos
, lastpos
);
18824 if (!sel_sched_p ()
18825 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
18826 INSN_PRIORITY (ready
[lastpos
])++;
18828 first_store_pos
= -1;
18836 if (first_store_pos
>= 0)
18838 /* An adjacent store wasn't found, but a non-adjacent store was,
18839 so move the non-adjacent store to the front of the ready
18840 list, and adjust its priority so that it is more likely to
18842 move_to_end_of_ready (ready
, first_store_pos
, lastpos
);
18843 if (!sel_sched_p ()
18844 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
18845 INSN_PRIORITY (ready
[lastpos
])++;
18848 else if (load_store_pendulum
== 2)
18850 /* Two loads have been issued in this cycle. Increase the priority
18851 of the first store in the ready list to favor it for issuing in
18857 if (is_store_insn (ready
[pos
], &str_mem
)
18859 && INSN_PRIORITY_KNOWN (ready
[pos
]))
18861 INSN_PRIORITY (ready
[pos
])++;
18863 /* Adjust the pendulum to account for the fact that a store
18864 was found and increased in priority. This is to prevent
18865 increasing the priority of multiple stores */
18866 load_store_pendulum
++;
18874 return cached_can_issue_more
;
18877 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18880 power9_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
18883 enum attr_type type
, type2
;
18885 type
= get_attr_type (last_scheduled_insn
);
18887 /* Try to issue fixed point divides back-to-back in pairs so they will be
18888 routed to separate execution units and execute in parallel. */
18889 if (type
== TYPE_DIV
&& divide_cnt
== 0)
18891 /* First divide has been scheduled. */
18894 /* Scan the ready list looking for another divide, if found move it
18895 to the end of the list so it is chosen next. */
18899 if (recog_memoized (ready
[pos
]) >= 0
18900 && get_attr_type (ready
[pos
]) == TYPE_DIV
)
18902 move_to_end_of_ready (ready
, pos
, lastpos
);
18910 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18913 /* The best dispatch throughput for vector and vector load insns can be
18914 achieved by interleaving a vector and vector load such that they'll
18915 dispatch to the same superslice. If this pairing cannot be achieved
18916 then it is best to pair vector insns together and vector load insns
18919 To aid in this pairing, vec_pairing maintains the current state with
18920 the following values:
18922 0 : Initial state, no vecload/vector pairing has been started.
18924 1 : A vecload or vector insn has been issued and a candidate for
18925 pairing has been found and moved to the end of the ready
18927 if (type
== TYPE_VECLOAD
)
18929 /* Issued a vecload. */
18930 if (vec_pairing
== 0)
18932 int vecload_pos
= -1;
18933 /* We issued a single vecload, look for a vector insn to pair it
18934 with. If one isn't found, try to pair another vecload. */
18938 if (recog_memoized (ready
[pos
]) >= 0)
18940 type2
= get_attr_type (ready
[pos
]);
18941 if (is_power9_pairable_vec_type (type2
))
18943 /* Found a vector insn to pair with, move it to the
18944 end of the ready list so it is scheduled next. */
18945 move_to_end_of_ready (ready
, pos
, lastpos
);
18947 return cached_can_issue_more
;
18949 else if (type2
== TYPE_VECLOAD
&& vecload_pos
== -1)
18950 /* Remember position of first vecload seen. */
18955 if (vecload_pos
>= 0)
18957 /* Didn't find a vector to pair with but did find a vecload,
18958 move it to the end of the ready list. */
18959 move_to_end_of_ready (ready
, vecload_pos
, lastpos
);
18961 return cached_can_issue_more
;
18965 else if (is_power9_pairable_vec_type (type
))
18967 /* Issued a vector operation. */
18968 if (vec_pairing
== 0)
18971 /* We issued a single vector insn, look for a vecload to pair it
18972 with. If one isn't found, try to pair another vector. */
18976 if (recog_memoized (ready
[pos
]) >= 0)
18978 type2
= get_attr_type (ready
[pos
]);
18979 if (type2
== TYPE_VECLOAD
)
18981 /* Found a vecload insn to pair with, move it to the
18982 end of the ready list so it is scheduled next. */
18983 move_to_end_of_ready (ready
, pos
, lastpos
);
18985 return cached_can_issue_more
;
18987 else if (is_power9_pairable_vec_type (type2
)
18989 /* Remember position of first vector insn seen. */
18996 /* Didn't find a vecload to pair with but did find a vector
18997 insn, move it to the end of the ready list. */
18998 move_to_end_of_ready (ready
, vec_pos
, lastpos
);
19000 return cached_can_issue_more
;
19005 /* We've either finished a vec/vecload pair, couldn't find an insn to
19006 continue the current pair, or the last insn had nothing to do with
19007 with pairing. In any case, reset the state. */
19011 return cached_can_issue_more
;
19014 /* Determine if INSN is a store to memory that can be fused with a similar
19018 is_fusable_store (rtx_insn
*insn
, rtx
*str_mem
)
19020 /* Insn must be a non-prefixed base+disp form store. */
19021 if (is_store_insn (insn
, str_mem
)
19022 && get_attr_prefixed (insn
) == PREFIXED_NO
19023 && get_attr_update (insn
) == UPDATE_NO
19024 && get_attr_indexed (insn
) == INDEXED_NO
)
19026 /* Further restrictions by mode and size. */
19027 if (!MEM_SIZE_KNOWN_P (*str_mem
))
19030 machine_mode mode
= GET_MODE (*str_mem
);
19031 HOST_WIDE_INT size
= MEM_SIZE (*str_mem
);
19033 if (INTEGRAL_MODE_P (mode
))
19034 /* Must be word or dword size. */
19035 return (size
== 4 || size
== 8);
19036 else if (FLOAT_MODE_P (mode
))
19037 /* Must be dword size. */
19038 return (size
== 8);
19044 /* Do Power10 specific reordering of the ready list. */
19047 power10_sched_reorder (rtx_insn
**ready
, int lastpos
)
19051 /* Do store fusion during sched2 only. */
19052 if (!reload_completed
)
19053 return cached_can_issue_more
;
19055 /* If the prior insn finished off a store fusion pair then simply
19056 reset the counter and return, nothing more to do. */
19057 if (load_store_pendulum
!= 0)
19059 load_store_pendulum
= 0;
19060 return cached_can_issue_more
;
19063 /* Try to pair certain store insns to adjacent memory locations
19064 so that the hardware will fuse them to a single operation. */
19065 if (TARGET_P10_FUSION
&& is_fusable_store (last_scheduled_insn
, &mem1
))
19068 /* A fusable store was just scheduled. Scan the ready list for another
19069 store that it can fuse with. */
19074 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19075 must be ascending only. */
19076 if (is_fusable_store (ready
[pos
], &mem2
)
19077 && ((INTEGRAL_MODE_P (GET_MODE (mem1
))
19078 && adjacent_mem_locations (mem1
, mem2
))
19079 || (FLOAT_MODE_P (GET_MODE (mem1
))
19080 && (adjacent_mem_locations (mem1
, mem2
) == mem1
))))
19082 /* Found a fusable store. Move it to the end of the ready list
19083 so it is scheduled next. */
19084 move_to_end_of_ready (ready
, pos
, lastpos
);
19086 load_store_pendulum
= -1;
19093 return cached_can_issue_more
;
19096 /* We are about to begin issuing insns for this clock cycle. */
19099 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
, int sched_verbose
,
19100 rtx_insn
**ready ATTRIBUTE_UNUSED
,
19101 int *pn_ready ATTRIBUTE_UNUSED
,
19102 int clock_var ATTRIBUTE_UNUSED
)
19104 int n_ready
= *pn_ready
;
19107 fprintf (dump
, "// rs6000_sched_reorder :\n");
19109 /* Reorder the ready list, if the second to last ready insn
19110 is a nonepipeline insn. */
19111 if (rs6000_tune
== PROCESSOR_CELL
&& n_ready
> 1)
19113 if (is_nonpipeline_insn (ready
[n_ready
- 1])
19114 && (recog_memoized (ready
[n_ready
- 2]) > 0))
19115 /* Simply swap first two insns. */
19116 std::swap (ready
[n_ready
- 1], ready
[n_ready
- 2]);
19119 if (rs6000_tune
== PROCESSOR_POWER6
)
19120 load_store_pendulum
= 0;
19122 /* Do Power10 dependent reordering. */
19123 if (rs6000_tune
== PROCESSOR_POWER10
&& last_scheduled_insn
)
19124 power10_sched_reorder (ready
, n_ready
- 1);
19126 return rs6000_issue_rate ();
19129 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19132 rs6000_sched_reorder2 (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
19133 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
)
19136 fprintf (dump
, "// rs6000_sched_reorder2 :\n");
19138 /* Do Power6 dependent reordering if necessary. */
19139 if (rs6000_tune
== PROCESSOR_POWER6
&& last_scheduled_insn
)
19140 return power6_sched_reorder2 (ready
, *pn_ready
- 1);
19142 /* Do Power9 dependent reordering if necessary. */
19143 if (rs6000_tune
== PROCESSOR_POWER9
&& last_scheduled_insn
19144 && recog_memoized (last_scheduled_insn
) >= 0)
19145 return power9_sched_reorder2 (ready
, *pn_ready
- 1);
19147 /* Do Power10 dependent reordering. */
19148 if (rs6000_tune
== PROCESSOR_POWER10
&& last_scheduled_insn
)
19149 return power10_sched_reorder (ready
, *pn_ready
- 1);
19151 return cached_can_issue_more
;
19154 /* Return whether the presence of INSN causes a dispatch group termination
19155 of group WHICH_GROUP.
19157 If WHICH_GROUP == current_group, this function will return true if INSN
19158 causes the termination of the current group (i.e, the dispatch group to
19159 which INSN belongs). This means that INSN will be the last insn in the
19160 group it belongs to.
19162 If WHICH_GROUP == previous_group, this function will return true if INSN
19163 causes the termination of the previous group (i.e, the dispatch group that
19164 precedes the group to which INSN belongs). This means that INSN will be
19165 the first insn in the group it belongs to). */
19168 insn_terminates_group_p (rtx_insn
*insn
, enum group_termination which_group
)
19175 first
= insn_must_be_first_in_group (insn
);
19176 last
= insn_must_be_last_in_group (insn
);
19181 if (which_group
== current_group
)
19183 else if (which_group
== previous_group
)
19191 insn_must_be_first_in_group (rtx_insn
*insn
)
19193 enum attr_type type
;
19197 || DEBUG_INSN_P (insn
)
19198 || GET_CODE (PATTERN (insn
)) == USE
19199 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19202 switch (rs6000_tune
)
19204 case PROCESSOR_POWER5
:
19205 if (is_cracked_insn (insn
))
19208 case PROCESSOR_POWER4
:
19209 if (is_microcoded_insn (insn
))
19212 if (!rs6000_sched_groups
)
19215 type
= get_attr_type (insn
);
19222 case TYPE_CR_LOGICAL
:
19235 case PROCESSOR_POWER6
:
19236 type
= get_attr_type (insn
);
19245 case TYPE_FPCOMPARE
:
19256 if (get_attr_dot (insn
) == DOT_NO
19257 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19262 if (get_attr_size (insn
) == SIZE_32
)
19270 if (get_attr_update (insn
) == UPDATE_YES
)
19278 case PROCESSOR_POWER7
:
19279 type
= get_attr_type (insn
);
19283 case TYPE_CR_LOGICAL
:
19297 if (get_attr_dot (insn
) == DOT_YES
)
19302 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19303 || get_attr_update (insn
) == UPDATE_YES
)
19310 if (get_attr_update (insn
) == UPDATE_YES
)
19318 case PROCESSOR_POWER8
:
19319 type
= get_attr_type (insn
);
19323 case TYPE_CR_LOGICAL
:
19331 case TYPE_VECSTORE
:
19338 if (get_attr_dot (insn
) == DOT_YES
)
19343 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19344 || get_attr_update (insn
) == UPDATE_YES
)
19349 if (get_attr_update (insn
) == UPDATE_YES
19350 && get_attr_indexed (insn
) == INDEXED_YES
)
19366 insn_must_be_last_in_group (rtx_insn
*insn
)
19368 enum attr_type type
;
19372 || DEBUG_INSN_P (insn
)
19373 || GET_CODE (PATTERN (insn
)) == USE
19374 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19377 switch (rs6000_tune
) {
19378 case PROCESSOR_POWER4
:
19379 case PROCESSOR_POWER5
:
19380 if (is_microcoded_insn (insn
))
19383 if (is_branch_slot_insn (insn
))
19387 case PROCESSOR_POWER6
:
19388 type
= get_attr_type (insn
);
19396 case TYPE_FPCOMPARE
:
19407 if (get_attr_dot (insn
) == DOT_NO
19408 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19413 if (get_attr_size (insn
) == SIZE_32
)
19421 case PROCESSOR_POWER7
:
19422 type
= get_attr_type (insn
);
19432 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19433 && get_attr_update (insn
) == UPDATE_YES
)
19438 if (get_attr_update (insn
) == UPDATE_YES
19439 && get_attr_indexed (insn
) == INDEXED_YES
)
19447 case PROCESSOR_POWER8
:
19448 type
= get_attr_type (insn
);
19460 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19461 && get_attr_update (insn
) == UPDATE_YES
)
19466 if (get_attr_update (insn
) == UPDATE_YES
19467 && get_attr_indexed (insn
) == INDEXED_YES
)
19482 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19483 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19486 is_costly_group (rtx
*group_insns
, rtx next_insn
)
19489 int issue_rate
= rs6000_issue_rate ();
19491 for (i
= 0; i
< issue_rate
; i
++)
19493 sd_iterator_def sd_it
;
19495 rtx insn
= group_insns
[i
];
19500 FOR_EACH_DEP (insn
, SD_LIST_RES_FORW
, sd_it
, dep
)
19502 rtx next
= DEP_CON (dep
);
19504 if (next
== next_insn
19505 && rs6000_is_costly_dependence (dep
, dep_cost (dep
), 0))
19513 /* Utility of the function redefine_groups.
19514 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19515 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19516 to keep it "far" (in a separate group) from GROUP_INSNS, following
19517 one of the following schemes, depending on the value of the flag
19518 -minsert_sched_nops = X:
19519 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19520 in order to force NEXT_INSN into a separate group.
19521 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19522 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19523 insertion (has a group just ended, how many vacant issue slots remain in the
19524 last group, and how many dispatch groups were encountered so far). */
19527 force_new_group (int sched_verbose
, FILE *dump
, rtx
*group_insns
,
19528 rtx_insn
*next_insn
, bool *group_end
, int can_issue_more
,
19533 int issue_rate
= rs6000_issue_rate ();
19534 bool end
= *group_end
;
19537 if (next_insn
== NULL_RTX
|| DEBUG_INSN_P (next_insn
))
19538 return can_issue_more
;
19540 if (rs6000_sched_insert_nops
> sched_finish_regroup_exact
)
19541 return can_issue_more
;
19543 force
= is_costly_group (group_insns
, next_insn
);
19545 return can_issue_more
;
19547 if (sched_verbose
> 6)
19548 fprintf (dump
,"force: group count = %d, can_issue_more = %d\n",
19549 *group_count
,can_issue_more
);
19551 if (rs6000_sched_insert_nops
== sched_finish_regroup_exact
)
19554 can_issue_more
= 0;
19556 /* Since only a branch can be issued in the last issue_slot, it is
19557 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19558 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19559 in this case the last nop will start a new group and the branch
19560 will be forced to the new group. */
19561 if (can_issue_more
&& !is_branch_slot_insn (next_insn
))
19564 /* Do we have a special group ending nop? */
19565 if (rs6000_tune
== PROCESSOR_POWER6
|| rs6000_tune
== PROCESSOR_POWER7
19566 || rs6000_tune
== PROCESSOR_POWER8
)
19568 nop
= gen_group_ending_nop ();
19569 emit_insn_before (nop
, next_insn
);
19570 can_issue_more
= 0;
19573 while (can_issue_more
> 0)
19576 emit_insn_before (nop
, next_insn
);
19584 if (rs6000_sched_insert_nops
< sched_finish_regroup_exact
)
19586 int n_nops
= rs6000_sched_insert_nops
;
19588 /* Nops can't be issued from the branch slot, so the effective
19589 issue_rate for nops is 'issue_rate - 1'. */
19590 if (can_issue_more
== 0)
19591 can_issue_more
= issue_rate
;
19593 if (can_issue_more
== 0)
19595 can_issue_more
= issue_rate
- 1;
19598 for (i
= 0; i
< issue_rate
; i
++)
19600 group_insns
[i
] = 0;
19607 emit_insn_before (nop
, next_insn
);
19608 if (can_issue_more
== issue_rate
- 1) /* new group begins */
19611 if (can_issue_more
== 0)
19613 can_issue_more
= issue_rate
- 1;
19616 for (i
= 0; i
< issue_rate
; i
++)
19618 group_insns
[i
] = 0;
19624 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19627 /* Is next_insn going to start a new group? */
19630 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
19631 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
19632 || (can_issue_more
< issue_rate
&&
19633 insn_terminates_group_p (next_insn
, previous_group
)));
19634 if (*group_end
&& end
)
19637 if (sched_verbose
> 6)
19638 fprintf (dump
, "done force: group count = %d, can_issue_more = %d\n",
19639 *group_count
, can_issue_more
);
19640 return can_issue_more
;
19643 return can_issue_more
;
19646 /* This function tries to synch the dispatch groups that the compiler "sees"
19647 with the dispatch groups that the processor dispatcher is expected to
19648 form in practice. It tries to achieve this synchronization by forcing the
19649 estimated processor grouping on the compiler (as opposed to the function
19650 'pad_goups' which tries to force the scheduler's grouping on the processor).
19652 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19653 examines the (estimated) dispatch groups that will be formed by the processor
19654 dispatcher. It marks these group boundaries to reflect the estimated
19655 processor grouping, overriding the grouping that the scheduler had marked.
19656 Depending on the value of the flag '-minsert-sched-nops' this function can
19657 force certain insns into separate groups or force a certain distance between
19658 them by inserting nops, for example, if there exists a "costly dependence"
19661 The function estimates the group boundaries that the processor will form as
19662 follows: It keeps track of how many vacant issue slots are available after
19663 each insn. A subsequent insn will start a new group if one of the following
19665 - no more vacant issue slots remain in the current dispatch group.
19666 - only the last issue slot, which is the branch slot, is vacant, but the next
19667 insn is not a branch.
19668 - only the last 2 or less issue slots, including the branch slot, are vacant,
19669 which means that a cracked insn (which occupies two issue slots) can't be
19670 issued in this group.
19671 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19672 start a new group. */
19675 redefine_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
19678 rtx_insn
*insn
, *next_insn
;
19680 int can_issue_more
;
19683 int group_count
= 0;
19687 issue_rate
= rs6000_issue_rate ();
19688 group_insns
= XALLOCAVEC (rtx
, issue_rate
);
19689 for (i
= 0; i
< issue_rate
; i
++)
19691 group_insns
[i
] = 0;
19693 can_issue_more
= issue_rate
;
19695 insn
= get_next_active_insn (prev_head_insn
, tail
);
19698 while (insn
!= NULL_RTX
)
19700 slot
= (issue_rate
- can_issue_more
);
19701 group_insns
[slot
] = insn
;
19703 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
19704 if (insn_terminates_group_p (insn
, current_group
))
19705 can_issue_more
= 0;
19707 next_insn
= get_next_active_insn (insn
, tail
);
19708 if (next_insn
== NULL_RTX
)
19709 return group_count
+ 1;
19711 /* Is next_insn going to start a new group? */
19713 = (can_issue_more
== 0
19714 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
19715 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
19716 || (can_issue_more
< issue_rate
&&
19717 insn_terminates_group_p (next_insn
, previous_group
)));
19719 can_issue_more
= force_new_group (sched_verbose
, dump
, group_insns
,
19720 next_insn
, &group_end
, can_issue_more
,
19726 can_issue_more
= 0;
19727 for (i
= 0; i
< issue_rate
; i
++)
19729 group_insns
[i
] = 0;
19733 if (GET_MODE (next_insn
) == TImode
&& can_issue_more
)
19734 PUT_MODE (next_insn
, VOIDmode
);
19735 else if (!can_issue_more
&& GET_MODE (next_insn
) != TImode
)
19736 PUT_MODE (next_insn
, TImode
);
19739 if (can_issue_more
== 0)
19740 can_issue_more
= issue_rate
;
19743 return group_count
;
19746 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19747 dispatch group boundaries that the scheduler had marked. Pad with nops
19748 any dispatch groups which have vacant issue slots, in order to force the
19749 scheduler's grouping on the processor dispatcher. The function
19750 returns the number of dispatch groups found. */
19753 pad_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
19756 rtx_insn
*insn
, *next_insn
;
19759 int can_issue_more
;
19761 int group_count
= 0;
19763 /* Initialize issue_rate. */
19764 issue_rate
= rs6000_issue_rate ();
19765 can_issue_more
= issue_rate
;
19767 insn
= get_next_active_insn (prev_head_insn
, tail
);
19768 next_insn
= get_next_active_insn (insn
, tail
);
19770 while (insn
!= NULL_RTX
)
19773 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
19775 group_end
= (next_insn
== NULL_RTX
|| GET_MODE (next_insn
) == TImode
);
19777 if (next_insn
== NULL_RTX
)
19782 /* If the scheduler had marked group termination at this location
19783 (between insn and next_insn), and neither insn nor next_insn will
19784 force group termination, pad the group with nops to force group
19787 && (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
19788 && !insn_terminates_group_p (insn
, current_group
)
19789 && !insn_terminates_group_p (next_insn
, previous_group
))
19791 if (!is_branch_slot_insn (next_insn
))
19794 while (can_issue_more
)
19797 emit_insn_before (nop
, next_insn
);
19802 can_issue_more
= issue_rate
;
19807 next_insn
= get_next_active_insn (insn
, tail
);
19810 return group_count
;
19813 /* We're beginning a new block. Initialize data structures as necessary. */
19816 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
19817 int sched_verbose ATTRIBUTE_UNUSED
,
19818 int max_ready ATTRIBUTE_UNUSED
)
19820 last_scheduled_insn
= NULL
;
19821 load_store_pendulum
= 0;
19826 /* The following function is called at the end of scheduling BB.
19827 After reload, it inserts nops at insn group bundling. */
19830 rs6000_sched_finish (FILE *dump
, int sched_verbose
)
19835 fprintf (dump
, "=== Finishing schedule.\n");
19837 if (reload_completed
&& rs6000_sched_groups
)
19839 /* Do not run sched_finish hook when selective scheduling enabled. */
19840 if (sel_sched_p ())
19843 if (rs6000_sched_insert_nops
== sched_finish_none
)
19846 if (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
19847 n_groups
= pad_groups (dump
, sched_verbose
,
19848 current_sched_info
->prev_head
,
19849 current_sched_info
->next_tail
);
19851 n_groups
= redefine_groups (dump
, sched_verbose
,
19852 current_sched_info
->prev_head
,
19853 current_sched_info
->next_tail
);
19855 if (sched_verbose
>= 6)
19857 fprintf (dump
, "ngroups = %d\n", n_groups
);
19858 print_rtl (dump
, current_sched_info
->prev_head
);
19859 fprintf (dump
, "Done finish_sched\n");
19864 struct rs6000_sched_context
19866 short cached_can_issue_more
;
19867 rtx_insn
*last_scheduled_insn
;
19868 int load_store_pendulum
;
19873 typedef struct rs6000_sched_context rs6000_sched_context_def
;
19874 typedef rs6000_sched_context_def
*rs6000_sched_context_t
;
19876 /* Allocate store for new scheduling context. */
19878 rs6000_alloc_sched_context (void)
19880 return xmalloc (sizeof (rs6000_sched_context_def
));
19883 /* If CLEAN_P is true then initializes _SC with clean data,
19884 and from the global context otherwise. */
19886 rs6000_init_sched_context (void *_sc
, bool clean_p
)
19888 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
19892 sc
->cached_can_issue_more
= 0;
19893 sc
->last_scheduled_insn
= NULL
;
19894 sc
->load_store_pendulum
= 0;
19895 sc
->divide_cnt
= 0;
19896 sc
->vec_pairing
= 0;
19900 sc
->cached_can_issue_more
= cached_can_issue_more
;
19901 sc
->last_scheduled_insn
= last_scheduled_insn
;
19902 sc
->load_store_pendulum
= load_store_pendulum
;
19903 sc
->divide_cnt
= divide_cnt
;
19904 sc
->vec_pairing
= vec_pairing
;
19908 /* Sets the global scheduling context to the one pointed to by _SC. */
19910 rs6000_set_sched_context (void *_sc
)
19912 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
19914 gcc_assert (sc
!= NULL
);
19916 cached_can_issue_more
= sc
->cached_can_issue_more
;
19917 last_scheduled_insn
= sc
->last_scheduled_insn
;
19918 load_store_pendulum
= sc
->load_store_pendulum
;
19919 divide_cnt
= sc
->divide_cnt
;
19920 vec_pairing
= sc
->vec_pairing
;
19925 rs6000_free_sched_context (void *_sc
)
19927 gcc_assert (_sc
!= NULL
);
19933 rs6000_sched_can_speculate_insn (rtx_insn
*insn
)
19935 switch (get_attr_type (insn
))
19950 /* Length in units of the trampoline for entering a nested function. */
19953 rs6000_trampoline_size (void)
19957 switch (DEFAULT_ABI
)
19960 gcc_unreachable ();
19963 ret
= (TARGET_32BIT
) ? 12 : 24;
19967 gcc_assert (!TARGET_32BIT
);
19973 ret
= (TARGET_32BIT
) ? 40 : 48;
19980 /* Emit RTL insns to initialize the variable parts of a trampoline.
19981 FNADDR is an RTX for the address of the function's pure code.
19982 CXT is an RTX for the static chain value for the function. */
19985 rs6000_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
19987 int regsize
= (TARGET_32BIT
) ? 4 : 8;
19988 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
19989 rtx ctx_reg
= force_reg (Pmode
, cxt
);
19990 rtx addr
= force_reg (Pmode
, XEXP (m_tramp
, 0));
19992 switch (DEFAULT_ABI
)
19995 gcc_unreachable ();
19997 /* Under AIX, just build the 3 word function descriptor */
20000 rtx fnmem
, fn_reg
, toc_reg
;
20002 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS
)
20003 error ("you cannot take the address of a nested function if you use "
20004 "the %qs option", "-mno-pointers-to-nested-functions");
20006 fnmem
= gen_const_mem (Pmode
, force_reg (Pmode
, fnaddr
));
20007 fn_reg
= gen_reg_rtx (Pmode
);
20008 toc_reg
= gen_reg_rtx (Pmode
);
20010 /* Macro to shorten the code expansions below. */
20011 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20013 m_tramp
= replace_equiv_address (m_tramp
, addr
);
20015 emit_move_insn (fn_reg
, MEM_PLUS (fnmem
, 0));
20016 emit_move_insn (toc_reg
, MEM_PLUS (fnmem
, regsize
));
20017 emit_move_insn (MEM_PLUS (m_tramp
, 0), fn_reg
);
20018 emit_move_insn (MEM_PLUS (m_tramp
, regsize
), toc_reg
);
20019 emit_move_insn (MEM_PLUS (m_tramp
, 2*regsize
), ctx_reg
);
20025 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20029 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__trampoline_setup"),
20030 LCT_NORMAL
, VOIDmode
,
20032 GEN_INT (rs6000_trampoline_size ()), SImode
,
20040 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20041 identifier as an argument, so the front end shouldn't look it up. */
20044 rs6000_attribute_takes_identifier_p (const_tree attr_id
)
20046 return is_attribute_p ("altivec", attr_id
);
20049 /* Handle the "altivec" attribute. The attribute may have
20050 arguments as follows:
20052 __attribute__((altivec(vector__)))
20053 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20054 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20056 and may appear more than once (e.g., 'vector bool char') in a
20057 given declaration. */
20060 rs6000_handle_altivec_attribute (tree
*node
,
20061 tree name ATTRIBUTE_UNUSED
,
20063 int flags ATTRIBUTE_UNUSED
,
20064 bool *no_add_attrs
)
20066 tree type
= *node
, result
= NULL_TREE
;
20070 = ((args
&& TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
)
20071 && TREE_CODE (TREE_VALUE (args
)) == IDENTIFIER_NODE
)
20072 ? *IDENTIFIER_POINTER (TREE_VALUE (args
))
20075 while (POINTER_TYPE_P (type
)
20076 || TREE_CODE (type
) == FUNCTION_TYPE
20077 || TREE_CODE (type
) == METHOD_TYPE
20078 || TREE_CODE (type
) == ARRAY_TYPE
)
20079 type
= TREE_TYPE (type
);
20081 mode
= TYPE_MODE (type
);
20083 /* Check for invalid AltiVec type qualifiers. */
20084 if (type
== long_double_type_node
)
20085 error ("use of %<long double%> in AltiVec types is invalid");
20086 else if (type
== boolean_type_node
)
20087 error ("use of boolean types in AltiVec types is invalid");
20088 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
20089 error ("use of %<complex%> in AltiVec types is invalid");
20090 else if (DECIMAL_FLOAT_MODE_P (mode
))
20091 error ("use of decimal floating-point types in AltiVec types is invalid");
20092 else if (!TARGET_VSX
)
20094 if (type
== long_unsigned_type_node
|| type
== long_integer_type_node
)
20097 error ("use of %<long%> in AltiVec types is invalid for "
20098 "64-bit code without %qs", "-mvsx");
20099 else if (rs6000_warn_altivec_long
)
20100 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20103 else if (type
== long_long_unsigned_type_node
20104 || type
== long_long_integer_type_node
)
20105 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20107 else if (type
== double_type_node
)
20108 error ("use of %<double%> in AltiVec types is invalid without %qs",
20112 switch (altivec_type
)
20115 unsigned_p
= TYPE_UNSIGNED (type
);
20119 result
= (unsigned_p
? unsigned_V1TI_type_node
: V1TI_type_node
);
20122 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
20125 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
20128 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
20131 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
20133 case E_SFmode
: result
= V4SF_type_node
; break;
20134 case E_DFmode
: result
= V2DF_type_node
; break;
20135 /* If the user says 'vector int bool', we may be handed the 'bool'
20136 attribute _before_ the 'vector' attribute, and so select the
20137 proper type in the 'b' case below. */
20138 case E_V4SImode
: case E_V8HImode
: case E_V16QImode
: case E_V4SFmode
:
20139 case E_V2DImode
: case E_V2DFmode
:
20147 case E_TImode
: case E_V1TImode
: result
= bool_V1TI_type_node
; break;
20148 case E_DImode
: case E_V2DImode
: result
= bool_V2DI_type_node
; break;
20149 case E_SImode
: case E_V4SImode
: result
= bool_V4SI_type_node
; break;
20150 case E_HImode
: case E_V8HImode
: result
= bool_V8HI_type_node
; break;
20151 case E_QImode
: case E_V16QImode
: result
= bool_V16QI_type_node
;
20158 case E_V8HImode
: result
= pixel_V8HI_type_node
;
20164 /* Propagate qualifiers attached to the element type
20165 onto the vector type. */
20166 if (result
&& result
!= type
&& TYPE_QUALS (type
))
20167 result
= build_qualified_type (result
, TYPE_QUALS (type
));
20169 *no_add_attrs
= true; /* No need to hang on to the attribute. */
20172 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
20177 /* AltiVec defines five built-in scalar types that serve as vector
20178 elements; we must teach the compiler how to mangle them. The 128-bit
20179 floating point mangling is target-specific as well. MMA defines
20180 two built-in types to be used as opaque vector types. */
20182 static const char *
20183 rs6000_mangle_type (const_tree type
)
20185 type
= TYPE_MAIN_VARIANT (type
);
20187 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
20188 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
20189 && TREE_CODE (type
) != OPAQUE_TYPE
)
20192 if (type
== bool_char_type_node
) return "U6__boolc";
20193 if (type
== bool_short_type_node
) return "U6__bools";
20194 if (type
== pixel_type_node
) return "u7__pixel";
20195 if (type
== bool_int_type_node
) return "U6__booli";
20196 if (type
== bool_long_long_type_node
) return "U6__boolx";
20198 /* If long double uses the IBM 128-bit extended format, we need to
20199 distinguish between __ibm128 and long double. */
20200 if (type
== ibm128_float_type_node
&& TARGET_LONG_DOUBLE_128
20201 && !TARGET_IEEEQUAD
)
20202 return "u8__ibm128";
20204 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IBM_P (TYPE_MODE (type
)))
20206 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IEEE_P (TYPE_MODE (type
)))
20207 return "u9__ieee128";
20209 if (type
== vector_pair_type_node
)
20210 return "u13__vector_pair";
20211 if (type
== vector_quad_type_node
)
20212 return "u13__vector_quad";
20214 /* For all other types, use the default mangling. */
20218 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20219 struct attribute_spec.handler. */
20222 rs6000_handle_longcall_attribute (tree
*node
, tree name
,
20223 tree args ATTRIBUTE_UNUSED
,
20224 int flags ATTRIBUTE_UNUSED
,
20225 bool *no_add_attrs
)
20227 if (TREE_CODE (*node
) != FUNCTION_TYPE
20228 && TREE_CODE (*node
) != FIELD_DECL
20229 && TREE_CODE (*node
) != TYPE_DECL
)
20231 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
20233 *no_add_attrs
= true;
20239 /* Set longcall attributes on all functions declared when
20240 rs6000_default_long_calls is true. */
20242 rs6000_set_default_type_attributes (tree type
)
20244 if (rs6000_default_long_calls
20245 && (TREE_CODE (type
) == FUNCTION_TYPE
20246 || TREE_CODE (type
) == METHOD_TYPE
))
20247 TYPE_ATTRIBUTES (type
) = tree_cons (get_identifier ("longcall"),
20249 TYPE_ATTRIBUTES (type
));
20252 darwin_set_default_type_attributes (type
);
20256 /* Return a reference suitable for calling a function with the
20257 longcall attribute. */
20260 rs6000_longcall_ref (rtx call_ref
, rtx arg
)
20262 /* System V adds '.' to the internal name, so skip them. */
20263 const char *call_name
= XSTR (call_ref
, 0);
20264 if (*call_name
== '.')
20266 while (*call_name
== '.')
20269 tree node
= get_identifier (call_name
);
20270 call_ref
= gen_rtx_SYMBOL_REF (VOIDmode
, IDENTIFIER_POINTER (node
));
20275 rtx base
= const0_rtx
;
20277 if (rs6000_pcrel_p ())
20279 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20280 rtx u
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20281 gen_rtvec (3, base
, call_ref
, arg
),
20282 UNSPECV_PLT_PCREL
);
20283 emit_insn (gen_rtx_SET (reg
, u
));
20287 if (DEFAULT_ABI
== ABI_ELFv2
)
20288 base
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
20292 base
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
20295 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20296 may be used by a function global entry point. For SysV4, r11
20297 is used by __glink_PLTresolve lazy resolver entry. */
20298 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20299 rtx hi
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (3, base
, call_ref
, arg
),
20301 rtx lo
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20302 gen_rtvec (3, reg
, call_ref
, arg
),
20304 emit_insn (gen_rtx_SET (reg
, hi
));
20305 emit_insn (gen_rtx_SET (reg
, lo
));
20309 return force_reg (Pmode
, call_ref
);
20312 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20313 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20316 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20317 struct attribute_spec.handler. */
20319 rs6000_handle_struct_attribute (tree
*node
, tree name
,
20320 tree args ATTRIBUTE_UNUSED
,
20321 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
20324 if (DECL_P (*node
))
20326 if (TREE_CODE (*node
) == TYPE_DECL
)
20327 type
= &TREE_TYPE (*node
);
20332 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
20333 || TREE_CODE (*type
) == UNION_TYPE
)))
20335 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
20336 *no_add_attrs
= true;
20339 else if ((is_attribute_p ("ms_struct", name
)
20340 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
20341 || ((is_attribute_p ("gcc_struct", name
)
20342 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
20344 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
20346 *no_add_attrs
= true;
20353 rs6000_ms_bitfield_layout_p (const_tree record_type
)
20355 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
20356 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
20357 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
20360 #ifdef USING_ELFOS_H
20362 /* A get_unnamed_section callback, used for switching to toc_section. */
20365 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
20367 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20368 && TARGET_MINIMAL_TOC
)
20370 if (!toc_initialized
)
20372 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20373 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20374 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LCTOC", 0);
20375 fprintf (asm_out_file
, "\t.tc ");
20376 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1[TC],");
20377 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20378 fprintf (asm_out_file
, "\n");
20380 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20381 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20382 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20383 fprintf (asm_out_file
, " = .+32768\n");
20384 toc_initialized
= 1;
20387 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20389 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20391 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20392 if (!toc_initialized
)
20394 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20395 toc_initialized
= 1;
20400 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20401 if (!toc_initialized
)
20403 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20404 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20405 fprintf (asm_out_file
, " = .+32768\n");
20406 toc_initialized
= 1;
20411 /* Implement TARGET_ASM_INIT_SECTIONS. */
20414 rs6000_elf_asm_init_sections (void)
20417 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op
, NULL
);
20420 = get_unnamed_section (SECTION_WRITE
, output_section_asm_op
,
20421 SDATA2_SECTION_ASM_OP
);
20424 /* Implement TARGET_SELECT_RTX_SECTION. */
20427 rs6000_elf_select_rtx_section (machine_mode mode
, rtx x
,
20428 unsigned HOST_WIDE_INT align
)
20430 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
20431 return toc_section
;
20433 return default_elf_select_rtx_section (mode
, x
, align
);
20436 /* For a SYMBOL_REF, set generic flags and then perform some
20437 target-specific processing.
20439 When the AIX ABI is requested on a non-AIX system, replace the
20440 function name with the real name (with a leading .) rather than the
20441 function descriptor name. This saves a lot of overriding code to
20442 read the prefixes. */
20444 static void rs6000_elf_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
20446 rs6000_elf_encode_section_info (tree decl
, rtx rtl
, int first
)
20448 default_encode_section_info (decl
, rtl
, first
);
20451 && TREE_CODE (decl
) == FUNCTION_DECL
20453 && DEFAULT_ABI
== ABI_AIX
)
20455 rtx sym_ref
= XEXP (rtl
, 0);
20456 size_t len
= strlen (XSTR (sym_ref
, 0));
20457 char *str
= XALLOCAVEC (char, len
+ 2);
20459 memcpy (str
+ 1, XSTR (sym_ref
, 0), len
+ 1);
20460 XSTR (sym_ref
, 0) = ggc_alloc_string (str
, len
+ 1);
20465 compare_section_name (const char *section
, const char *templ
)
20469 len
= strlen (templ
);
20470 return (strncmp (section
, templ
, len
) == 0
20471 && (section
[len
] == 0 || section
[len
] == '.'));
20475 rs6000_elf_in_small_data_p (const_tree decl
)
20477 if (rs6000_sdata
== SDATA_NONE
)
20480 /* We want to merge strings, so we never consider them small data. */
20481 if (TREE_CODE (decl
) == STRING_CST
)
20484 /* Functions are never in the small data area. */
20485 if (TREE_CODE (decl
) == FUNCTION_DECL
)
20488 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_SECTION_NAME (decl
))
20490 const char *section
= DECL_SECTION_NAME (decl
);
20491 if (compare_section_name (section
, ".sdata")
20492 || compare_section_name (section
, ".sdata2")
20493 || compare_section_name (section
, ".gnu.linkonce.s")
20494 || compare_section_name (section
, ".sbss")
20495 || compare_section_name (section
, ".sbss2")
20496 || compare_section_name (section
, ".gnu.linkonce.sb")
20497 || strcmp (section
, ".PPC.EMB.sdata0") == 0
20498 || strcmp (section
, ".PPC.EMB.sbss0") == 0)
20503 /* If we are told not to put readonly data in sdata, then don't. */
20504 if (TREE_READONLY (decl
) && rs6000_sdata
!= SDATA_EABI
20505 && !rs6000_readonly_in_sdata
)
20508 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
20511 && size
<= g_switch_value
20512 /* If it's not public, and we're not going to reference it there,
20513 there's no need to put it in the small data section. */
20514 && (rs6000_sdata
!= SDATA_DATA
|| TREE_PUBLIC (decl
)))
20521 #endif /* USING_ELFOS_H */
20523 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20526 rs6000_use_blocks_for_constant_p (machine_mode mode
, const_rtx x
)
20528 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
);
20531 /* Do not place thread-local symbols refs in the object blocks. */
20534 rs6000_use_blocks_for_decl_p (const_tree decl
)
20536 return !DECL_THREAD_LOCAL_P (decl
);
20539 /* Return a REG that occurs in ADDR with coefficient 1.
20540 ADDR can be effectively incremented by incrementing REG.
20542 r0 is special and we must not select it as an address
20543 register by this routine since our caller will try to
20544 increment the returned register via an "la" instruction. */
20547 find_addr_reg (rtx addr
)
20549 while (GET_CODE (addr
) == PLUS
)
20551 if (REG_P (XEXP (addr
, 0))
20552 && REGNO (XEXP (addr
, 0)) != 0)
20553 addr
= XEXP (addr
, 0);
20554 else if (REG_P (XEXP (addr
, 1))
20555 && REGNO (XEXP (addr
, 1)) != 0)
20556 addr
= XEXP (addr
, 1);
20557 else if (CONSTANT_P (XEXP (addr
, 0)))
20558 addr
= XEXP (addr
, 1);
20559 else if (CONSTANT_P (XEXP (addr
, 1)))
20560 addr
= XEXP (addr
, 0);
20562 gcc_unreachable ();
20564 gcc_assert (REG_P (addr
) && REGNO (addr
) != 0);
20569 rs6000_fatal_bad_address (rtx op
)
20571 fatal_insn ("bad address", op
);
20576 vec
<branch_island
, va_gc
> *branch_islands
;
20578 /* Remember to generate a branch island for far calls to the given
20582 add_compiler_branch_island (tree label_name
, tree function_name
,
20585 branch_island bi
= {function_name
, label_name
, line_number
};
20586 vec_safe_push (branch_islands
, bi
);
20589 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20590 already there or not. */
20593 no_previous_def (tree function_name
)
20598 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
20599 if (function_name
== bi
->function_name
)
20604 /* GET_PREV_LABEL gets the label name from the previous definition of
20608 get_prev_label (tree function_name
)
20613 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
20614 if (function_name
== bi
->function_name
)
20615 return bi
->label_name
;
20619 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20622 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
20624 unsigned int length
;
20625 char *symbol_name
, *lazy_ptr_name
;
20626 char *local_label_0
;
20627 static unsigned label
= 0;
20629 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20630 symb
= (*targetm
.strip_name_encoding
) (symb
);
20632 length
= strlen (symb
);
20633 symbol_name
= XALLOCAVEC (char, length
+ 32);
20634 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
20636 lazy_ptr_name
= XALLOCAVEC (char, length
+ 32);
20637 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name
, symb
, length
);
20641 switch_to_section (darwin_sections
[machopic_picsymbol_stub1_section
]);
20642 fprintf (file
, "\t.align 5\n");
20644 fprintf (file
, "%s:\n", stub
);
20645 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20648 local_label_0
= XALLOCAVEC (char, 16);
20649 sprintf (local_label_0
, "L%u$spb", label
);
20651 fprintf (file
, "\tmflr r0\n");
20652 fprintf (file
, "\tbcl 20,31,%s\n", local_label_0
);
20653 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
20654 fprintf (file
, "\taddis r11,r11,ha16(%s-%s)\n",
20655 lazy_ptr_name
, local_label_0
);
20656 fprintf (file
, "\tmtlr r0\n");
20657 fprintf (file
, "\t%s r12,lo16(%s-%s)(r11)\n",
20658 (TARGET_64BIT
? "ldu" : "lwzu"),
20659 lazy_ptr_name
, local_label_0
);
20660 fprintf (file
, "\tmtctr r12\n");
20661 fprintf (file
, "\tbctr\n");
20663 else /* mdynamic-no-pic or mkernel. */
20665 switch_to_section (darwin_sections
[machopic_symbol_stub1_section
]);
20666 fprintf (file
, "\t.align 4\n");
20668 fprintf (file
, "%s:\n", stub
);
20669 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20671 fprintf (file
, "\tlis r11,ha16(%s)\n", lazy_ptr_name
);
20672 fprintf (file
, "\t%s r12,lo16(%s)(r11)\n",
20673 (TARGET_64BIT
? "ldu" : "lwzu"),
20675 fprintf (file
, "\tmtctr r12\n");
20676 fprintf (file
, "\tbctr\n");
20679 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
20680 fprintf (file
, "%s:\n", lazy_ptr_name
);
20681 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20682 fprintf (file
, "%sdyld_stub_binding_helper\n",
20683 (TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t"));
20686 /* Legitimize PIC addresses. If the address is already
20687 position-independent, we return ORIG. Newly generated
20688 position-independent addresses go into a reg. This is REG if non
20689 zero, otherwise we allocate register(s) as necessary. */
20691 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20694 rs6000_machopic_legitimize_pic_address (rtx orig
, machine_mode mode
,
20699 if (reg
== NULL
&& !reload_completed
)
20700 reg
= gen_reg_rtx (Pmode
);
20702 if (GET_CODE (orig
) == CONST
)
20706 if (GET_CODE (XEXP (orig
, 0)) == PLUS
20707 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
20710 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
20712 /* Use a different reg for the intermediate value, as
20713 it will be marked UNCHANGING. */
20714 reg_temp
= !can_create_pseudo_p () ? reg
: gen_reg_rtx (Pmode
);
20715 base
= rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 0),
20718 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 1),
20721 if (CONST_INT_P (offset
))
20723 if (SMALL_INT (offset
))
20724 return plus_constant (Pmode
, base
, INTVAL (offset
));
20725 else if (!reload_completed
)
20726 offset
= force_reg (Pmode
, offset
);
20729 rtx mem
= force_const_mem (Pmode
, orig
);
20730 return machopic_legitimize_pic_address (mem
, Pmode
, reg
);
20733 return gen_rtx_PLUS (Pmode
, base
, offset
);
20736 /* Fall back on generic machopic code. */
20737 return machopic_legitimize_pic_address (orig
, mode
, reg
);
20740 /* Output a .machine directive for the Darwin assembler, and call
20741 the generic start_file routine. */
20744 rs6000_darwin_file_start (void)
20746 static const struct
20750 HOST_WIDE_INT if_set
;
20752 { "ppc64", "ppc64", MASK_64BIT
},
20753 { "970", "ppc970", OPTION_MASK_PPC_GPOPT
| OPTION_MASK_MFCRF \
20754 | MASK_POWERPC64
},
20755 { "power4", "ppc970", 0 },
20756 { "G5", "ppc970", 0 },
20757 { "7450", "ppc7450", 0 },
20758 { "7400", "ppc7400", OPTION_MASK_ALTIVEC
},
20759 { "G4", "ppc7400", 0 },
20760 { "750", "ppc750", 0 },
20761 { "740", "ppc750", 0 },
20762 { "G3", "ppc750", 0 },
20763 { "604e", "ppc604e", 0 },
20764 { "604", "ppc604", 0 },
20765 { "603e", "ppc603", 0 },
20766 { "603", "ppc603", 0 },
20767 { "601", "ppc601", 0 },
20768 { NULL
, "ppc", 0 } };
20769 const char *cpu_id
= "";
20772 rs6000_file_start ();
20773 darwin_file_start ();
20775 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
20777 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
20778 cpu_id
= rs6000_default_cpu
;
20780 if (OPTION_SET_P (rs6000_cpu_index
))
20781 cpu_id
= processor_target_table
[rs6000_cpu_index
].name
;
20783 /* Look through the mapping array. Pick the first name that either
20784 matches the argument, has a bit set in IF_SET that is also set
20785 in the target flags, or has a NULL name. */
20788 while (mapping
[i
].arg
!= NULL
20789 && strcmp (mapping
[i
].arg
, cpu_id
) != 0
20790 && (mapping
[i
].if_set
& rs6000_isa_flags
) == 0)
20793 fprintf (asm_out_file
, "\t.machine %s\n", mapping
[i
].name
);
20796 #endif /* TARGET_MACHO */
20800 rs6000_elf_reloc_rw_mask (void)
20804 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20810 /* Record an element in the table of global constructors. SYMBOL is
20811 a SYMBOL_REF of the function to be called; PRIORITY is a number
20812 between 0 and MAX_INIT_PRIORITY.
20814 This differs from default_named_section_asm_out_constructor in
20815 that we have special handling for -mrelocatable. */
20817 static void rs6000_elf_asm_out_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
20819 rs6000_elf_asm_out_constructor (rtx symbol
, int priority
)
20821 const char *section
= ".ctors";
20824 if (priority
!= DEFAULT_INIT_PRIORITY
)
20826 sprintf (buf
, ".ctors.%.5u",
20827 /* Invert the numbering so the linker puts us in the proper
20828 order; constructors are run from right to left, and the
20829 linker sorts in increasing order. */
20830 MAX_INIT_PRIORITY
- priority
);
20834 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
20835 assemble_align (POINTER_SIZE
);
20837 if (DEFAULT_ABI
== ABI_V4
20838 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
20840 fputs ("\t.long (", asm_out_file
);
20841 output_addr_const (asm_out_file
, symbol
);
20842 fputs (")@fixup\n", asm_out_file
);
20845 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
20848 static void rs6000_elf_asm_out_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
20850 rs6000_elf_asm_out_destructor (rtx symbol
, int priority
)
20852 const char *section
= ".dtors";
20855 if (priority
!= DEFAULT_INIT_PRIORITY
)
20857 sprintf (buf
, ".dtors.%.5u",
20858 /* Invert the numbering so the linker puts us in the proper
20859 order; constructors are run from right to left, and the
20860 linker sorts in increasing order. */
20861 MAX_INIT_PRIORITY
- priority
);
20865 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
20866 assemble_align (POINTER_SIZE
);
20868 if (DEFAULT_ABI
== ABI_V4
20869 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
20871 fputs ("\t.long (", asm_out_file
);
20872 output_addr_const (asm_out_file
, symbol
);
20873 fputs (")@fixup\n", asm_out_file
);
20876 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
20880 rs6000_elf_declare_function_name (FILE *file
, const char *name
, tree decl
)
20882 if (TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
20884 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file
);
20885 ASM_OUTPUT_LABEL (file
, name
);
20886 fputs (DOUBLE_INT_ASM_OP
, file
);
20887 rs6000_output_function_entry (file
, name
);
20888 fputs (",.TOC.@tocbase,0\n\t.previous\n", file
);
20891 fputs ("\t.size\t", file
);
20892 assemble_name (file
, name
);
20893 fputs (",24\n\t.type\t.", file
);
20894 assemble_name (file
, name
);
20895 fputs (",@function\n", file
);
20896 if (TREE_PUBLIC (decl
) && ! DECL_WEAK (decl
))
20898 fputs ("\t.globl\t.", file
);
20899 assemble_name (file
, name
);
20904 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
20905 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
20906 rs6000_output_function_entry (file
, name
);
20907 fputs (":\n", file
);
20912 if (DEFAULT_ABI
== ABI_V4
20913 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
20914 && !TARGET_SECURE_PLT
20915 && (!constant_pool_empty_p () || crtl
->profile
)
20916 && (uses_toc
= uses_TOC ()))
20921 switch_to_other_text_partition ();
20922 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
20924 fprintf (file
, "\t.long ");
20925 assemble_name (file
, toc_label_name
);
20928 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
20929 assemble_name (file
, buf
);
20932 switch_to_other_text_partition ();
20935 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
20936 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
20938 if (TARGET_CMODEL
== CMODEL_LARGE
20939 && rs6000_global_entry_point_prologue_needed_p ())
20943 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
20945 fprintf (file
, "\t.quad .TOC.-");
20946 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
20947 assemble_name (file
, buf
);
20951 if (DEFAULT_ABI
== ABI_AIX
)
20953 const char *desc_name
, *orig_name
;
20955 orig_name
= (*targetm
.strip_name_encoding
) (name
);
20956 desc_name
= orig_name
;
20957 while (*desc_name
== '.')
20960 if (TREE_PUBLIC (decl
))
20961 fprintf (file
, "\t.globl %s\n", desc_name
);
20963 fprintf (file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20964 fprintf (file
, "%s:\n", desc_name
);
20965 fprintf (file
, "\t.long %s\n", orig_name
);
20966 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file
);
20967 fputs ("\t.long 0\n", file
);
20968 fprintf (file
, "\t.previous\n");
20970 ASM_OUTPUT_LABEL (file
, name
);
20973 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED
;
20975 rs6000_elf_file_end (void)
20977 #ifdef HAVE_AS_GNU_ATTRIBUTE
20978 /* ??? The value emitted depends on options active at file end.
20979 Assume anyone using #pragma or attributes that might change
20980 options knows what they are doing. */
20981 if ((TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
20982 && rs6000_passes_float
)
20986 if (TARGET_HARD_FLOAT
)
20990 if (rs6000_passes_long_double
)
20992 if (!TARGET_LONG_DOUBLE_128
)
20994 else if (TARGET_IEEEQUAD
)
20999 fprintf (asm_out_file
, "\t.gnu_attribute 4, %d\n", fp
);
21001 if (TARGET_32BIT
&& DEFAULT_ABI
== ABI_V4
)
21003 if (rs6000_passes_vector
)
21004 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
21005 (TARGET_ALTIVEC_ABI
? 2 : 1));
21006 if (rs6000_returns_struct
)
21007 fprintf (asm_out_file
, "\t.gnu_attribute 12, %d\n",
21008 aix_struct_return
? 2 : 1);
21011 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21012 if (TARGET_32BIT
|| DEFAULT_ABI
== ABI_ELFv2
)
21013 file_end_indicate_exec_stack ();
21016 if (flag_split_stack
)
21017 file_end_indicate_split_stack ();
21021 /* We have expanded a CPU builtin, so we need to emit a reference to
21022 the special symbol that LIBC uses to declare it supports the
21023 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21024 switch_to_section (data_section
);
21025 fprintf (asm_out_file
, "\t.align %u\n", TARGET_32BIT
? 2 : 3);
21026 fprintf (asm_out_file
, "\t%s %s\n",
21027 TARGET_32BIT
? ".long" : ".quad", tcb_verification_symbol
);
21034 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21035 #define HAVE_XCOFF_DWARF_EXTRAS 0
21038 static enum unwind_info_type
21039 rs6000_xcoff_debug_unwind_info (void)
21045 rs6000_xcoff_asm_output_anchor (rtx symbol
)
21049 sprintf (buffer
, "$ + " HOST_WIDE_INT_PRINT_DEC
,
21050 SYMBOL_REF_BLOCK_OFFSET (symbol
));
21051 fprintf (asm_out_file
, "%s", SET_ASM_OP
);
21052 RS6000_OUTPUT_BASENAME (asm_out_file
, XSTR (symbol
, 0));
21053 fprintf (asm_out_file
, ",");
21054 RS6000_OUTPUT_BASENAME (asm_out_file
, buffer
);
21055 fprintf (asm_out_file
, "\n");
21059 rs6000_xcoff_asm_globalize_label (FILE *stream
, const char *name
)
21061 fputs (GLOBAL_ASM_OP
, stream
);
21062 RS6000_OUTPUT_BASENAME (stream
, name
);
21063 putc ('\n', stream
);
21066 /* A get_unnamed_decl callback, used for read-only sections. PTR
21067 points to the section string variable. */
21070 rs6000_xcoff_output_readonly_section_asm_op (const char *directive
)
21072 fprintf (asm_out_file
, "\t.csect %s[RO],%s\n",
21074 ? xcoff_private_rodata_section_name
21075 : xcoff_read_only_section_name
,
21076 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21079 /* Likewise for read-write sections. */
21082 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21084 fprintf (asm_out_file
, "\t.csect %s[RW],%s\n",
21085 xcoff_private_data_section_name
,
21086 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21090 rs6000_xcoff_output_tls_section_asm_op (const char *directive
)
21092 fprintf (asm_out_file
, "\t.csect %s[TL],%s\n",
21094 ? xcoff_private_data_section_name
21095 : xcoff_tls_data_section_name
,
21096 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21099 /* A get_unnamed_section callback, used for switching to toc_section. */
21102 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
21104 if (TARGET_MINIMAL_TOC
)
21106 /* toc_section is always selected at least once from
21107 rs6000_xcoff_file_start, so this is guaranteed to
21108 always be defined once and only once in each file. */
21109 if (!toc_initialized
)
21111 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file
);
21112 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file
);
21113 toc_initialized
= 1;
21115 fprintf (asm_out_file
, "\t.csect toc_table[RW]%s\n",
21116 (TARGET_32BIT
? "" : ",3"));
21119 fputs ("\t.toc\n", asm_out_file
);
21122 /* Implement TARGET_ASM_INIT_SECTIONS. */
21125 rs6000_xcoff_asm_init_sections (void)
21127 read_only_data_section
21128 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21131 private_data_section
21132 = get_unnamed_section (SECTION_WRITE
,
21133 rs6000_xcoff_output_readwrite_section_asm_op
,
21136 read_only_private_data_section
21137 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21141 = get_unnamed_section (SECTION_TLS
,
21142 rs6000_xcoff_output_tls_section_asm_op
,
21145 tls_private_data_section
21146 = get_unnamed_section (SECTION_TLS
,
21147 rs6000_xcoff_output_tls_section_asm_op
,
21151 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op
, NULL
);
21153 readonly_data_section
= read_only_data_section
;
21157 rs6000_xcoff_reloc_rw_mask (void)
21163 rs6000_xcoff_asm_named_section (const char *name
, unsigned int flags
,
21164 tree decl ATTRIBUTE_UNUSED
)
21167 static const char * const suffix
[7]
21168 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21170 if (flags
& SECTION_EXCLUDE
)
21172 else if (flags
& SECTION_DEBUG
)
21174 fprintf (asm_out_file
, "\t.dwsect %s\n", name
);
21177 else if (flags
& SECTION_CODE
)
21179 else if (flags
& SECTION_TLS
)
21181 if (flags
& SECTION_BSS
)
21186 else if (flags
& SECTION_WRITE
)
21188 if (flags
& SECTION_BSS
)
21196 fprintf (asm_out_file
, "\t.csect %s%s[%s],%u\n",
21197 (flags
& SECTION_CODE
) ? "." : "",
21198 name
, suffix
[smclass
], flags
& SECTION_ENTSIZE
);
21201 #define IN_NAMED_SECTION(DECL) \
21202 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
21203 && DECL_SECTION_NAME (DECL) != NULL)
21206 rs6000_xcoff_select_section (tree decl
, int reloc
,
21207 unsigned HOST_WIDE_INT align
)
21209 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21211 if (align
> BIGGEST_ALIGNMENT
&& VAR_OR_FUNCTION_DECL_P (decl
))
21213 resolve_unique_section (decl
, reloc
, true);
21214 if (IN_NAMED_SECTION (decl
))
21215 return get_named_section (decl
, NULL
, reloc
);
21218 if (decl_readonly_section (decl
, reloc
))
21220 if (TREE_PUBLIC (decl
))
21221 return read_only_data_section
;
21223 return read_only_private_data_section
;
21228 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
21230 if (bss_initializer_p (decl
))
21231 return tls_comm_section
;
21232 else if (TREE_PUBLIC (decl
))
21233 return tls_data_section
;
21235 return tls_private_data_section
;
21239 if (TREE_PUBLIC (decl
))
21240 return data_section
;
21242 return private_data_section
;
21247 rs6000_xcoff_unique_section (tree decl
, int reloc ATTRIBUTE_UNUSED
)
21251 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
21252 name
= (*targetm
.strip_name_encoding
) (name
);
21253 set_decl_section_name (decl
, name
);
21256 /* Select section for constant in constant pool.
21258 On RS/6000, all constants are in the private read-only data area.
21259 However, if this is being placed in the TOC it must be output as a
21263 rs6000_xcoff_select_rtx_section (machine_mode mode
, rtx x
,
21264 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
21266 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
21267 return toc_section
;
21269 return read_only_private_data_section
;
21272 /* Remove any trailing [DS] or the like from the symbol name. */
21274 static const char *
21275 rs6000_xcoff_strip_name_encoding (const char *name
)
21280 len
= strlen (name
);
21281 if (name
[len
- 1] == ']')
21282 return ggc_alloc_string (name
, len
- 4);
21287 /* Section attributes. AIX is always PIC. */
21289 static unsigned int
21290 rs6000_xcoff_section_type_flags (tree decl
, const char *name
, int reloc
)
21292 unsigned int align
;
21293 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
21295 if (decl
&& DECL_P (decl
) && VAR_P (decl
) && bss_initializer_p (decl
))
21296 flags
|= SECTION_BSS
;
21298 /* Align to at least UNIT size. */
21299 if (!decl
|| !DECL_P (decl
))
21300 align
= MIN_UNITS_PER_WORD
;
21301 /* Align code CSECT to at least 32 bytes. */
21302 else if ((flags
& SECTION_CODE
) != 0)
21303 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
), 32);
21305 /* Increase alignment of large objects if not already stricter. */
21306 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
),
21307 int_size_in_bytes (TREE_TYPE (decl
)) > MIN_UNITS_PER_WORD
21308 ? UNITS_PER_FP_WORD
: MIN_UNITS_PER_WORD
);
21310 return flags
| (exact_log2 (align
) & SECTION_ENTSIZE
);
21313 /* Output at beginning of assembler file.
21315 Initialize the section names for the RS/6000 at this point.
21317 Specify filename, including full path, to assembler.
21319 We want to go into the TOC section so at least one .toc will be emitted.
21320 Also, in order to output proper .bs/.es pairs, we need at least one static
21321 [RW] section emitted.
21323 Finally, declare mcount when profiling to make the assembler happy. */
21326 rs6000_xcoff_file_start (void)
21328 rs6000_gen_section_name (&xcoff_bss_section_name
,
21329 main_input_filename
, ".bss_");
21330 rs6000_gen_section_name (&xcoff_private_data_section_name
,
21331 main_input_filename
, ".rw_");
21332 rs6000_gen_section_name (&xcoff_private_rodata_section_name
,
21333 main_input_filename
, ".rop_");
21334 rs6000_gen_section_name (&xcoff_read_only_section_name
,
21335 main_input_filename
, ".ro_");
21336 rs6000_gen_section_name (&xcoff_tls_data_section_name
,
21337 main_input_filename
, ".tls_");
21339 fputs ("\t.file\t", asm_out_file
);
21340 output_quoted_string (asm_out_file
, main_input_filename
);
21341 fputc ('\n', asm_out_file
);
21342 if (write_symbols
!= NO_DEBUG
)
21343 switch_to_section (private_data_section
);
21344 switch_to_section (toc_section
);
21345 switch_to_section (text_section
);
21347 fprintf (asm_out_file
, "\t.extern %s\n", RS6000_MCOUNT
);
21348 rs6000_file_start ();
21351 /* Output at end of assembler file.
21352 On the RS/6000, referencing data should automatically pull in text. */
21355 rs6000_xcoff_file_end (void)
21357 switch_to_section (text_section
);
21358 if (xcoff_tls_exec_model_detected
)
21360 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21361 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file
);
21363 fputs ("_section_.text:\n", asm_out_file
);
21364 switch_to_section (data_section
);
21365 fputs (TARGET_32BIT
21366 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21371 struct declare_alias_data
21374 bool function_descriptor
;
21377 /* Declare alias N. A helper function for for_node_and_aliases. */
21380 rs6000_declare_alias (struct symtab_node
*n
, void *d
)
21382 struct declare_alias_data
*data
= (struct declare_alias_data
*)d
;
21383 /* Main symbol is output specially, because varasm machinery does part of
21384 the job for us - we do not need to declare .globl/lglobs and such. */
21385 if (!n
->alias
|| n
->weakref
)
21388 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n
->decl
)))
21391 /* Prevent assemble_alias from trying to use .set pseudo operation
21392 that does not behave as expected by the middle-end. */
21393 TREE_ASM_WRITTEN (n
->decl
) = true;
21395 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
));
21396 char *buffer
= (char *) alloca (strlen (name
) + 2);
21398 int dollar_inside
= 0;
21400 strcpy (buffer
, name
);
21401 p
= strchr (buffer
, '$');
21405 p
= strchr (p
+ 1, '$');
21407 if (TREE_PUBLIC (n
->decl
))
21409 if (!RS6000_WEAK
|| !DECL_WEAK (n
->decl
))
21411 if (dollar_inside
) {
21412 if (data
->function_descriptor
)
21413 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21414 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21416 if (data
->function_descriptor
)
21418 fputs ("\t.globl .", data
->file
);
21419 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21420 putc ('\n', data
->file
);
21422 fputs ("\t.globl ", data
->file
);
21423 assemble_name (data
->file
, buffer
);
21424 putc ('\n', data
->file
);
21426 #ifdef ASM_WEAKEN_DECL
21427 else if (DECL_WEAK (n
->decl
) && !data
->function_descriptor
)
21428 ASM_WEAKEN_DECL (data
->file
, n
->decl
, name
, NULL
);
21435 if (data
->function_descriptor
)
21436 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21437 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21439 if (data
->function_descriptor
)
21441 fputs ("\t.lglobl .", data
->file
);
21442 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21443 putc ('\n', data
->file
);
21445 fputs ("\t.lglobl ", data
->file
);
21446 assemble_name (data
->file
, buffer
);
21447 putc ('\n', data
->file
);
21449 if (data
->function_descriptor
)
21450 putc ('.', data
->file
);
21451 ASM_OUTPUT_LABEL (data
->file
, buffer
);
21456 #ifdef HAVE_GAS_HIDDEN
21457 /* Helper function to calculate visibility of a DECL
21458 and return the value as a const string. */
21460 static const char *
21461 rs6000_xcoff_visibility (tree decl
)
21463 static const char * const visibility_types
[] = {
21464 "", ",protected", ",hidden", ",internal"
21467 enum symbol_visibility vis
= DECL_VISIBILITY (decl
);
21468 return visibility_types
[vis
];
21473 /* This macro produces the initial definition of a function name.
21474 On the RS/6000, we need to place an extra '.' in the function name and
21475 output the function descriptor.
21476 Dollar signs are converted to underscores.
21478 The csect for the function will have already been created when
21479 text_section was selected. We do have to go back to that csect, however.
21481 The third and fourth parameters to the .function pseudo-op (16 and 044)
21482 are placeholders which no longer have any use.
21484 Because AIX assembler's .set command has unexpected semantics, we output
21485 all aliases as alternative labels in front of the definition. */
21488 rs6000_xcoff_declare_function_name (FILE *file
, const char *name
, tree decl
)
21490 char *buffer
= (char *) alloca (strlen (name
) + 1);
21492 int dollar_inside
= 0;
21493 struct declare_alias_data data
= {file
, false};
21495 strcpy (buffer
, name
);
21496 p
= strchr (buffer
, '$');
21500 p
= strchr (p
+ 1, '$');
21502 if (TREE_PUBLIC (decl
))
21504 if (!RS6000_WEAK
|| !DECL_WEAK (decl
))
21506 if (dollar_inside
) {
21507 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21508 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21510 fputs ("\t.globl .", file
);
21511 RS6000_OUTPUT_BASENAME (file
, buffer
);
21512 #ifdef HAVE_GAS_HIDDEN
21513 fputs (rs6000_xcoff_visibility (decl
), file
);
21520 if (dollar_inside
) {
21521 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21522 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21524 fputs ("\t.lglobl .", file
);
21525 RS6000_OUTPUT_BASENAME (file
, buffer
);
21529 fputs ("\t.csect ", file
);
21530 assemble_name (file
, buffer
);
21531 fputs (TARGET_32BIT
? "\n" : ",3\n", file
);
21533 ASM_OUTPUT_LABEL (file
, buffer
);
21535 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21537 fputs (TARGET_32BIT
? "\t.long ." : "\t.llong .", file
);
21538 RS6000_OUTPUT_BASENAME (file
, buffer
);
21539 fputs (", TOC[tc0], 0\n", file
);
21542 switch_to_section (function_section (decl
));
21544 ASM_OUTPUT_LABEL (file
, buffer
);
21546 data
.function_descriptor
= true;
21547 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21549 if (!DECL_IGNORED_P (decl
))
21551 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
21552 xcoffout_declare_function (file
, decl
, buffer
);
21553 else if (dwarf_debuginfo_p ())
21555 name
= (*targetm
.strip_name_encoding
) (name
);
21556 fprintf (file
, "\t.function .%s,.%s,2,0\n", name
, name
);
21563 /* Output assembly language to globalize a symbol from a DECL,
21564 possibly with visibility. */
21567 rs6000_xcoff_asm_globalize_decl_name (FILE *stream
, tree decl
)
21569 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
21570 fputs (GLOBAL_ASM_OP
, stream
);
21571 assemble_name (stream
, name
);
21572 #ifdef HAVE_GAS_HIDDEN
21573 fputs (rs6000_xcoff_visibility (decl
), stream
);
21575 putc ('\n', stream
);
21578 /* Output assembly language to define a symbol as COMMON from a DECL,
21579 possibly with visibility. */
21582 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream
,
21583 tree decl ATTRIBUTE_UNUSED
,
21585 unsigned HOST_WIDE_INT size
,
21586 unsigned int align
)
21588 unsigned int align2
= 2;
21591 align
= DATA_ABI_ALIGNMENT (TREE_TYPE (decl
), DECL_ALIGN (decl
));
21594 align2
= floor_log2 (align
/ BITS_PER_UNIT
);
21598 if (! DECL_COMMON (decl
))
21600 /* Forget section. */
21603 /* Globalize TLS BSS. */
21604 if (TREE_PUBLIC (decl
) && DECL_THREAD_LOCAL_P (decl
))
21606 fputs (GLOBAL_ASM_OP
, stream
);
21607 assemble_name (stream
, name
);
21608 fputc ('\n', stream
);
21611 /* Switch to section and skip space. */
21612 fputs ("\t.csect ", stream
);
21613 assemble_name (stream
, name
);
21614 fprintf (stream
, ",%u\n", align2
);
21615 ASM_DECLARE_OBJECT_NAME (stream
, name
, decl
);
21616 ASM_OUTPUT_SKIP (stream
, size
? size
: 1);
21620 if (TREE_PUBLIC (decl
))
21623 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%u" ,
21624 name
, size
, align2
);
21626 #ifdef HAVE_GAS_HIDDEN
21628 fputs (rs6000_xcoff_visibility (decl
), stream
);
21630 putc ('\n', stream
);
21634 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%s,%u\n",
21635 (*targetm
.strip_name_encoding
) (name
), size
, name
, align2
);
21638 /* This macro produces the initial definition of a object (variable) name.
21639 Because AIX assembler's .set command has unexpected semantics, we output
21640 all aliases as alternative labels in front of the definition. */
21643 rs6000_xcoff_declare_object_name (FILE *file
, const char *name
, tree decl
)
21645 struct declare_alias_data data
= {file
, false};
21646 ASM_OUTPUT_LABEL (file
, name
);
21647 symtab_node::get_create (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21651 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21654 rs6000_asm_output_dwarf_pcrel (FILE *file
, int size
, const char *label
)
21656 fputs (integer_asm_op (size
, FALSE
), file
);
21657 assemble_name (file
, label
);
21658 fputs ("-$", file
);
21661 /* Output a symbol offset relative to the dbase for the current object.
21662 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21665 __gcc_unwind_dbase is embedded in all executables/libraries through
21666 libgcc/config/rs6000/crtdbase.S. */
21669 rs6000_asm_output_dwarf_datarel (FILE *file
, int size
, const char *label
)
21671 fputs (integer_asm_op (size
, FALSE
), file
);
21672 assemble_name (file
, label
);
21673 fputs("-__gcc_unwind_dbase", file
);
21678 rs6000_xcoff_encode_section_info (tree decl
, rtx rtl
, int first
)
21682 const char *symname
;
21684 default_encode_section_info (decl
, rtl
, first
);
21686 /* Careful not to prod global register variables. */
21689 symbol
= XEXP (rtl
, 0);
21690 if (!SYMBOL_REF_P (symbol
))
21693 flags
= SYMBOL_REF_FLAGS (symbol
);
21695 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
21696 flags
&= ~SYMBOL_FLAG_HAS_BLOCK_INFO
;
21698 SYMBOL_REF_FLAGS (symbol
) = flags
;
21700 symname
= XSTR (symbol
, 0);
21702 /* Append CSECT mapping class, unless the symbol already is qualified.
21703 Aliases are implemented as labels, so the symbol name should not add
21704 a mapping class. */
21707 && VAR_OR_FUNCTION_DECL_P (decl
)
21708 && (symtab_node::get (decl
) == NULL
21709 || symtab_node::get (decl
)->alias
== 0)
21710 && symname
[strlen (symname
) - 1] != ']')
21712 const char *smclass
= NULL
;
21714 if (TREE_CODE (decl
) == FUNCTION_DECL
)
21716 else if (DECL_THREAD_LOCAL_P (decl
))
21718 if (bss_initializer_p (decl
))
21720 else if (flag_data_sections
)
21723 else if (DECL_EXTERNAL (decl
))
21725 else if (bss_initializer_p (decl
))
21727 else if (flag_data_sections
)
21729 /* This must exactly match the logic of select section. */
21730 if (decl_readonly_section (decl
, compute_reloc_for_var (decl
)))
21736 if (smclass
!= NULL
)
21738 char *newname
= XALLOCAVEC (char, strlen (symname
) + 5);
21740 strcpy (newname
, symname
);
21741 strcat (newname
, smclass
);
21742 XSTR (symbol
, 0) = ggc_strdup (newname
);
21746 #endif /* HAVE_AS_TLS */
21747 #endif /* TARGET_XCOFF */
21750 rs6000_asm_weaken_decl (FILE *stream
, tree decl
,
21751 const char *name
, const char *val
)
21753 fputs ("\t.weak\t", stream
);
21754 assemble_name (stream
, name
);
21755 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
21756 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
21758 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21760 fputs (rs6000_xcoff_visibility (decl
), stream
);
21762 fputs ("\n\t.weak\t.", stream
);
21763 RS6000_OUTPUT_BASENAME (stream
, name
);
21765 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21767 fputs (rs6000_xcoff_visibility (decl
), stream
);
21769 fputc ('\n', stream
);
21773 #ifdef ASM_OUTPUT_DEF
21774 ASM_OUTPUT_DEF (stream
, name
, val
);
21776 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
21777 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
21779 fputs ("\t.set\t.", stream
);
21780 RS6000_OUTPUT_BASENAME (stream
, name
);
21781 fputs (",.", stream
);
21782 RS6000_OUTPUT_BASENAME (stream
, val
);
21783 fputc ('\n', stream
);
21789 /* Return true if INSN should not be copied. */
21792 rs6000_cannot_copy_insn_p (rtx_insn
*insn
)
21794 return recog_memoized (insn
) >= 0
21795 && get_attr_cannot_copy (insn
);
21798 /* Compute a (partial) cost for rtx X. Return true if the complete
21799 cost has been computed, and false if subexpressions should be
21800 scanned. In either case, *TOTAL contains the cost result. */
21803 rs6000_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
21804 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
21806 int code
= GET_CODE (x
);
21810 /* On the RS/6000, if it is valid in the insn, it is free. */
21812 if (((outer_code
== SET
21813 || outer_code
== PLUS
21814 || outer_code
== MINUS
)
21815 && (satisfies_constraint_I (x
)
21816 || satisfies_constraint_L (x
)))
21817 || (outer_code
== AND
21818 && (satisfies_constraint_K (x
)
21820 ? satisfies_constraint_L (x
)
21821 : satisfies_constraint_J (x
))))
21822 || ((outer_code
== IOR
|| outer_code
== XOR
)
21823 && (satisfies_constraint_K (x
)
21825 ? satisfies_constraint_L (x
)
21826 : satisfies_constraint_J (x
))))
21827 || outer_code
== ASHIFT
21828 || outer_code
== ASHIFTRT
21829 || outer_code
== LSHIFTRT
21830 || outer_code
== ROTATE
21831 || outer_code
== ROTATERT
21832 || outer_code
== ZERO_EXTRACT
21833 || (outer_code
== MULT
21834 && satisfies_constraint_I (x
))
21835 || ((outer_code
== DIV
|| outer_code
== UDIV
21836 || outer_code
== MOD
|| outer_code
== UMOD
)
21837 && exact_log2 (INTVAL (x
)) >= 0)
21838 || (outer_code
== COMPARE
21839 && (satisfies_constraint_I (x
)
21840 || satisfies_constraint_K (x
)))
21841 || ((outer_code
== EQ
|| outer_code
== NE
)
21842 && (satisfies_constraint_I (x
)
21843 || satisfies_constraint_K (x
)
21845 ? satisfies_constraint_L (x
)
21846 : satisfies_constraint_J (x
))))
21847 || (outer_code
== GTU
21848 && satisfies_constraint_I (x
))
21849 || (outer_code
== LTU
21850 && satisfies_constraint_P (x
)))
21855 else if ((outer_code
== PLUS
21856 && reg_or_add_cint_operand (x
, mode
))
21857 || (outer_code
== MINUS
21858 && reg_or_sub_cint_operand (x
, mode
))
21859 || ((outer_code
== SET
21860 || outer_code
== IOR
21861 || outer_code
== XOR
)
21863 & ~ (unsigned HOST_WIDE_INT
) 0xffffffff) == 0))
21865 *total
= COSTS_N_INSNS (1);
21871 case CONST_WIDE_INT
:
21875 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21879 /* When optimizing for size, MEM should be slightly more expensive
21880 than generating address, e.g., (plus (reg) (const)).
21881 L1 cache latency is about two instructions. */
21882 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21883 if (rs6000_slow_unaligned_access (mode
, MEM_ALIGN (x
)))
21884 *total
+= COSTS_N_INSNS (100);
21893 if (FLOAT_MODE_P (mode
))
21894 *total
= rs6000_cost
->fp
;
21896 *total
= COSTS_N_INSNS (1);
21900 if (CONST_INT_P (XEXP (x
, 1))
21901 && satisfies_constraint_I (XEXP (x
, 1)))
21903 if (INTVAL (XEXP (x
, 1)) >= -256
21904 && INTVAL (XEXP (x
, 1)) <= 255)
21905 *total
= rs6000_cost
->mulsi_const9
;
21907 *total
= rs6000_cost
->mulsi_const
;
21909 else if (mode
== SFmode
)
21910 *total
= rs6000_cost
->fp
;
21911 else if (FLOAT_MODE_P (mode
))
21912 *total
= rs6000_cost
->dmul
;
21913 else if (mode
== DImode
)
21914 *total
= rs6000_cost
->muldi
;
21916 *total
= rs6000_cost
->mulsi
;
21920 if (mode
== SFmode
)
21921 *total
= rs6000_cost
->fp
;
21923 *total
= rs6000_cost
->dmul
;
21928 if (FLOAT_MODE_P (mode
))
21930 *total
= mode
== DFmode
? rs6000_cost
->ddiv
21931 : rs6000_cost
->sdiv
;
21938 if (CONST_INT_P (XEXP (x
, 1))
21939 && exact_log2 (INTVAL (XEXP (x
, 1))) >= 0)
21941 if (code
== DIV
|| code
== MOD
)
21943 *total
= COSTS_N_INSNS (2);
21946 *total
= COSTS_N_INSNS (1);
21950 if (GET_MODE (XEXP (x
, 1)) == DImode
)
21951 *total
= rs6000_cost
->divdi
;
21953 *total
= rs6000_cost
->divsi
;
21955 /* Add in shift and subtract for MOD unless we have a mod instruction. */
21956 if (!TARGET_MODULO
&& (code
== MOD
|| code
== UMOD
))
21957 *total
+= COSTS_N_INSNS (2);
21961 *total
= COSTS_N_INSNS (TARGET_CTZ
? 1 : 4);
21965 *total
= COSTS_N_INSNS (4);
21969 *total
= COSTS_N_INSNS (TARGET_POPCNTD
? 1 : 6);
21973 *total
= COSTS_N_INSNS (TARGET_CMPB
? 2 : 6);
21977 if (outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
21980 *total
= COSTS_N_INSNS (1);
21984 if (CONST_INT_P (XEXP (x
, 1)))
21986 rtx left
= XEXP (x
, 0);
21987 rtx_code left_code
= GET_CODE (left
);
21989 /* rotate-and-mask: 1 insn. */
21990 if ((left_code
== ROTATE
21991 || left_code
== ASHIFT
21992 || left_code
== LSHIFTRT
)
21993 && rs6000_is_valid_shift_mask (XEXP (x
, 1), left
, mode
))
21995 *total
= rtx_cost (XEXP (left
, 0), mode
, left_code
, 0, speed
);
21996 if (!CONST_INT_P (XEXP (left
, 1)))
21997 *total
+= rtx_cost (XEXP (left
, 1), SImode
, left_code
, 1, speed
);
21998 *total
+= COSTS_N_INSNS (1);
22002 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22003 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
22004 if (rs6000_is_valid_and_mask (XEXP (x
, 1), mode
)
22005 || (val
& 0xffff) == val
22006 || (val
& 0xffff0000) == val
22007 || ((val
& 0xffff) == 0 && mode
== SImode
))
22009 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
22010 *total
+= COSTS_N_INSNS (1);
22015 if (rs6000_is_valid_2insn_and (XEXP (x
, 1), mode
))
22017 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
22018 *total
+= COSTS_N_INSNS (2);
22023 *total
= COSTS_N_INSNS (1);
22028 *total
= COSTS_N_INSNS (1);
22034 *total
= COSTS_N_INSNS (1);
22038 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22039 the sign extend and shift separately within the insn. */
22040 if (TARGET_EXTSWSLI
&& mode
== DImode
22041 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
22042 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
)
22053 /* Handle mul_highpart. */
22054 if (outer_code
== TRUNCATE
22055 && GET_CODE (XEXP (x
, 0)) == MULT
)
22057 if (mode
== DImode
)
22058 *total
= rs6000_cost
->muldi
;
22060 *total
= rs6000_cost
->mulsi
;
22063 else if (outer_code
== AND
)
22066 *total
= COSTS_N_INSNS (1);
22071 if (MEM_P (XEXP (x
, 0)))
22074 *total
= COSTS_N_INSNS (1);
22080 if (!FLOAT_MODE_P (mode
))
22082 *total
= COSTS_N_INSNS (1);
22088 case UNSIGNED_FLOAT
:
22091 case FLOAT_TRUNCATE
:
22092 *total
= rs6000_cost
->fp
;
22096 if (mode
== DFmode
)
22097 *total
= rs6000_cost
->sfdf_convert
;
22099 *total
= rs6000_cost
->fp
;
22106 *total
= COSTS_N_INSNS (1);
22109 else if (FLOAT_MODE_P (mode
) && TARGET_PPC_GFXOPT
&& TARGET_HARD_FLOAT
)
22111 *total
= rs6000_cost
->fp
;
22120 /* Carry bit requires mode == Pmode.
22121 NEG or PLUS already counted so only add one. */
22123 && (outer_code
== NEG
|| outer_code
== PLUS
))
22125 *total
= COSTS_N_INSNS (1);
22133 if (outer_code
== SET
)
22135 if (XEXP (x
, 1) == const0_rtx
)
22137 *total
= COSTS_N_INSNS (2);
22142 *total
= COSTS_N_INSNS (3);
22147 if (outer_code
== COMPARE
)
22155 if (XINT (x
, 1) == UNSPECV_MMA_XXSETACCZ
)
22169 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22172 rs6000_debug_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
22173 int opno
, int *total
, bool speed
)
22175 bool ret
= rs6000_rtx_costs (x
, mode
, outer_code
, opno
, total
, speed
);
22178 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22179 "opno = %d, total = %d, speed = %s, x:\n",
22180 ret
? "complete" : "scan inner",
22181 GET_MODE_NAME (mode
),
22182 GET_RTX_NAME (outer_code
),
22185 speed
? "true" : "false");
22193 rs6000_insn_cost (rtx_insn
*insn
, bool speed
)
22195 if (recog_memoized (insn
) < 0)
22198 /* If we are optimizing for size, just use the length. */
22200 return get_attr_length (insn
);
22202 /* Use the cost if provided. */
22203 int cost
= get_attr_cost (insn
);
22207 /* If the insn tells us how many insns there are, use that. Otherwise use
22208 the length/4. Adjust the insn length to remove the extra size that
22209 prefixed instructions take. */
22210 int n
= get_attr_num_insns (insn
);
22213 int length
= get_attr_length (insn
);
22214 if (get_attr_prefixed (insn
) == PREFIXED_YES
)
22217 ADJUST_INSN_LENGTH (insn
, adjust
);
22224 enum attr_type type
= get_attr_type (insn
);
22231 cost
= COSTS_N_INSNS (n
+ 1);
22235 switch (get_attr_size (insn
))
22238 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const9
;
22241 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const
;
22244 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi
;
22247 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->muldi
;
22250 gcc_unreachable ();
22254 switch (get_attr_size (insn
))
22257 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divsi
;
22260 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divdi
;
22263 gcc_unreachable ();
22268 cost
= n
* rs6000_cost
->fp
;
22271 cost
= n
* rs6000_cost
->dmul
;
22274 cost
= n
* rs6000_cost
->sdiv
;
22277 cost
= n
* rs6000_cost
->ddiv
;
22284 cost
= COSTS_N_INSNS (n
+ 2);
22288 cost
= COSTS_N_INSNS (n
);
22294 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22297 rs6000_debug_address_cost (rtx x
, machine_mode mode
,
22298 addr_space_t as
, bool speed
)
22300 int ret
= TARGET_ADDRESS_COST (x
, mode
, as
, speed
);
22302 fprintf (stderr
, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22303 ret
, speed
? "true" : "false");
22310 /* A C expression returning the cost of moving data from a register of class
22311 CLASS1 to one of CLASS2. */
22314 rs6000_register_move_cost (machine_mode mode
,
22315 reg_class_t from
, reg_class_t to
)
22318 reg_class_t rclass
;
22320 if (TARGET_DEBUG_COST
)
22323 /* If we have VSX, we can easily move between FPR or Altivec registers,
22324 otherwise we can only easily move within classes.
22325 Do this first so we give best-case answers for union classes
22326 containing both gprs and vsx regs. */
22327 HARD_REG_SET to_vsx
, from_vsx
;
22328 to_vsx
= reg_class_contents
[to
] & reg_class_contents
[VSX_REGS
];
22329 from_vsx
= reg_class_contents
[from
] & reg_class_contents
[VSX_REGS
];
22330 if (!hard_reg_set_empty_p (to_vsx
)
22331 && !hard_reg_set_empty_p (from_vsx
)
22333 || hard_reg_set_intersect_p (to_vsx
, from_vsx
)))
22335 int reg
= FIRST_FPR_REGNO
;
22337 || (TEST_HARD_REG_BIT (to_vsx
, FIRST_ALTIVEC_REGNO
)
22338 && TEST_HARD_REG_BIT (from_vsx
, FIRST_ALTIVEC_REGNO
)))
22339 reg
= FIRST_ALTIVEC_REGNO
;
22340 ret
= 2 * hard_regno_nregs (reg
, mode
);
22343 /* Moves from/to GENERAL_REGS. */
22344 else if ((rclass
= from
, reg_classes_intersect_p (to
, GENERAL_REGS
))
22345 || (rclass
= to
, reg_classes_intersect_p (from
, GENERAL_REGS
)))
22347 if (rclass
== FLOAT_REGS
|| rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
22349 if (TARGET_DIRECT_MOVE
)
22351 /* Keep the cost for direct moves above that for within
22352 a register class even if the actual processor cost is
22353 comparable. We do this because a direct move insn
22354 can't be a nop, whereas with ideal register
22355 allocation a move within the same class might turn
22356 out to be a nop. */
22357 if (rs6000_tune
== PROCESSOR_POWER9
22358 || rs6000_tune
== PROCESSOR_POWER10
)
22359 ret
= 3 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22361 ret
= 4 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22362 /* SFmode requires a conversion when moving between gprs
22364 if (mode
== SFmode
)
22368 ret
= (rs6000_memory_move_cost (mode
, rclass
, false)
22369 + rs6000_memory_move_cost (mode
, GENERAL_REGS
, false));
22372 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22374 else if (rclass
== CR_REGS
)
22377 /* For those processors that have slow LR/CTR moves, make them more
22378 expensive than memory in order to bias spills to memory .*/
22379 else if ((rs6000_tune
== PROCESSOR_POWER6
22380 || rs6000_tune
== PROCESSOR_POWER7
22381 || rs6000_tune
== PROCESSOR_POWER8
22382 || rs6000_tune
== PROCESSOR_POWER9
)
22383 && reg_class_subset_p (rclass
, SPECIAL_REGS
))
22384 ret
= 6 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22387 /* A move will cost one instruction per GPR moved. */
22388 ret
= 2 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22391 /* Everything else has to go through GENERAL_REGS. */
22393 ret
= (rs6000_register_move_cost (mode
, GENERAL_REGS
, to
)
22394 + rs6000_register_move_cost (mode
, from
, GENERAL_REGS
));
22396 if (TARGET_DEBUG_COST
)
22398 if (dbg_cost_ctrl
== 1)
22400 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22401 ret
, GET_MODE_NAME (mode
), reg_class_names
[from
],
22402 reg_class_names
[to
]);
22409 /* A C expressions returning the cost of moving data of MODE from a register to
22413 rs6000_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
22414 bool in ATTRIBUTE_UNUSED
)
22418 if (TARGET_DEBUG_COST
)
22421 if (reg_classes_intersect_p (rclass
, GENERAL_REGS
))
22422 ret
= 4 * hard_regno_nregs (0, mode
);
22423 else if ((reg_classes_intersect_p (rclass
, FLOAT_REGS
)
22424 || reg_classes_intersect_p (rclass
, VSX_REGS
)))
22425 ret
= 4 * hard_regno_nregs (32, mode
);
22426 else if (reg_classes_intersect_p (rclass
, ALTIVEC_REGS
))
22427 ret
= 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO
, mode
);
22429 ret
= 4 + rs6000_register_move_cost (mode
, rclass
, GENERAL_REGS
);
22431 if (TARGET_DEBUG_COST
)
22433 if (dbg_cost_ctrl
== 1)
22435 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22436 ret
, GET_MODE_NAME (mode
), reg_class_names
[rclass
], in
);
22443 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22445 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22446 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22447 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22448 move cost between GENERAL_REGS and VSX_REGS low.
22450 It might seem reasonable to use a union class. After all, if usage
22451 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22452 rather than memory. However, in cases where register pressure of
22453 both is high, like the cactus_adm spec test, allowing
22454 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22455 the first scheduling pass. This is partly due to an allocno of
22456 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22457 class, which gives too high a pressure for GENERAL_REGS and too low
22458 for VSX_REGS. So, force a choice of the subclass here.
22460 The best class is also the union if GENERAL_REGS and VSX_REGS have
22461 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22462 allocno class, since trying to narrow down the class by regno mode
22463 is prone to error. For example, SImode is allowed in VSX regs and
22464 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22465 it would be wrong to choose an allocno of GENERAL_REGS based on
22469 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED
,
22470 reg_class_t allocno_class
,
22471 reg_class_t best_class
)
22473 switch (allocno_class
)
22475 case GEN_OR_VSX_REGS
:
22476 /* best_class must be a subset of allocno_class. */
22477 gcc_checking_assert (best_class
== GEN_OR_VSX_REGS
22478 || best_class
== GEN_OR_FLOAT_REGS
22479 || best_class
== VSX_REGS
22480 || best_class
== ALTIVEC_REGS
22481 || best_class
== FLOAT_REGS
22482 || best_class
== GENERAL_REGS
22483 || best_class
== BASE_REGS
);
22484 /* Use best_class but choose wider classes when copying from the
22485 wider class to best_class is cheap. This mimics IRA choice
22486 of allocno class. */
22487 if (best_class
== BASE_REGS
)
22488 return GENERAL_REGS
;
22489 if (TARGET_VSX
&& best_class
== FLOAT_REGS
)
22494 if (best_class
== ALTIVEC_REGS
)
22495 return ALTIVEC_REGS
;
22501 return allocno_class
;
22504 /* Load up a constant. If the mode is a vector mode, splat the value across
22505 all of the vector elements. */
22508 rs6000_load_constant_and_splat (machine_mode mode
, REAL_VALUE_TYPE dconst
)
22512 if (mode
== SFmode
|| mode
== DFmode
)
22514 rtx d
= const_double_from_real_value (dconst
, mode
);
22515 reg
= force_reg (mode
, d
);
22517 else if (mode
== V4SFmode
)
22519 rtx d
= const_double_from_real_value (dconst
, SFmode
);
22520 rtvec v
= gen_rtvec (4, d
, d
, d
, d
);
22521 reg
= gen_reg_rtx (mode
);
22522 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22524 else if (mode
== V2DFmode
)
22526 rtx d
= const_double_from_real_value (dconst
, DFmode
);
22527 rtvec v
= gen_rtvec (2, d
, d
);
22528 reg
= gen_reg_rtx (mode
);
22529 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22532 gcc_unreachable ();
22537 /* Generate an FMA instruction. */
22540 rs6000_emit_madd (rtx target
, rtx m1
, rtx m2
, rtx a
)
22542 machine_mode mode
= GET_MODE (target
);
22545 dst
= expand_ternary_op (mode
, fma_optab
, m1
, m2
, a
, target
, 0);
22546 gcc_assert (dst
!= NULL
);
22549 emit_move_insn (target
, dst
);
22552 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22555 rs6000_emit_nmsub (rtx dst
, rtx m1
, rtx m2
, rtx a
)
22557 machine_mode mode
= GET_MODE (dst
);
22560 /* This is a tad more complicated, since the fnma_optab is for
22561 a different expression: fma(-m1, m2, a), which is the same
22562 thing except in the case of signed zeros.
22564 Fortunately we know that if FMA is supported that FNMSUB is
22565 also supported in the ISA. Just expand it directly. */
22567 gcc_assert (optab_handler (fma_optab
, mode
) != CODE_FOR_nothing
);
22569 r
= gen_rtx_NEG (mode
, a
);
22570 r
= gen_rtx_FMA (mode
, m1
, m2
, r
);
22571 r
= gen_rtx_NEG (mode
, r
);
22572 emit_insn (gen_rtx_SET (dst
, r
));
22575 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22576 add a reg_note saying that this was a division. Support both scalar and
22577 vector divide. Assumes no trapping math and finite arguments. */
22580 rs6000_emit_swdiv (rtx dst
, rtx n
, rtx d
, bool note_p
)
22582 machine_mode mode
= GET_MODE (dst
);
22583 rtx one
, x0
, e0
, x1
, xprev
, eprev
, xnext
, enext
, u
, v
;
22586 /* Low precision estimates guarantee 5 bits of accuracy. High
22587 precision estimates guarantee 14 bits of accuracy. SFmode
22588 requires 23 bits of accuracy. DFmode requires 52 bits of
22589 accuracy. Each pass at least doubles the accuracy, leading
22590 to the following. */
22591 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
22592 if (mode
== DFmode
|| mode
== V2DFmode
)
22595 enum insn_code code
= optab_handler (smul_optab
, mode
);
22596 insn_gen_fn gen_mul
= GEN_FCN (code
);
22598 gcc_assert (code
!= CODE_FOR_nothing
);
22600 one
= rs6000_load_constant_and_splat (mode
, dconst1
);
22602 /* x0 = 1./d estimate */
22603 x0
= gen_reg_rtx (mode
);
22604 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, d
),
22607 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22610 /* e0 = 1. - d * x0 */
22611 e0
= gen_reg_rtx (mode
);
22612 rs6000_emit_nmsub (e0
, d
, x0
, one
);
22614 /* x1 = x0 + e0 * x0 */
22615 x1
= gen_reg_rtx (mode
);
22616 rs6000_emit_madd (x1
, e0
, x0
, x0
);
22618 for (i
= 0, xprev
= x1
, eprev
= e0
; i
< passes
- 2;
22619 ++i
, xprev
= xnext
, eprev
= enext
) {
22621 /* enext = eprev * eprev */
22622 enext
= gen_reg_rtx (mode
);
22623 emit_insn (gen_mul (enext
, eprev
, eprev
));
22625 /* xnext = xprev + enext * xprev */
22626 xnext
= gen_reg_rtx (mode
);
22627 rs6000_emit_madd (xnext
, enext
, xprev
, xprev
);
22633 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22635 /* u = n * xprev */
22636 u
= gen_reg_rtx (mode
);
22637 emit_insn (gen_mul (u
, n
, xprev
));
22639 /* v = n - (d * u) */
22640 v
= gen_reg_rtx (mode
);
22641 rs6000_emit_nmsub (v
, d
, u
, n
);
22643 /* dst = (v * xprev) + u */
22644 rs6000_emit_madd (dst
, v
, xprev
, u
);
22647 add_reg_note (get_last_insn (), REG_EQUAL
, gen_rtx_DIV (mode
, n
, d
));
22650 /* Goldschmidt's Algorithm for single/double-precision floating point
22651 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22654 rs6000_emit_swsqrt (rtx dst
, rtx src
, bool recip
)
22656 machine_mode mode
= GET_MODE (src
);
22657 rtx e
= gen_reg_rtx (mode
);
22658 rtx g
= gen_reg_rtx (mode
);
22659 rtx h
= gen_reg_rtx (mode
);
22661 /* Low precision estimates guarantee 5 bits of accuracy. High
22662 precision estimates guarantee 14 bits of accuracy. SFmode
22663 requires 23 bits of accuracy. DFmode requires 52 bits of
22664 accuracy. Each pass at least doubles the accuracy, leading
22665 to the following. */
22666 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
22667 if (mode
== DFmode
|| mode
== V2DFmode
)
22672 enum insn_code code
= optab_handler (smul_optab
, mode
);
22673 insn_gen_fn gen_mul
= GEN_FCN (code
);
22675 gcc_assert (code
!= CODE_FOR_nothing
);
22677 mhalf
= rs6000_load_constant_and_splat (mode
, dconsthalf
);
22679 /* e = rsqrt estimate */
22680 emit_insn (gen_rtx_SET (e
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, src
),
22683 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22686 rtx zero
= force_reg (mode
, CONST0_RTX (mode
));
22688 if (mode
== SFmode
)
22690 rtx target
= emit_conditional_move (e
, { GT
, src
, zero
, mode
},
22693 emit_move_insn (e
, target
);
22697 rtx cond
= gen_rtx_GT (VOIDmode
, e
, zero
);
22698 rs6000_emit_vector_cond_expr (e
, e
, zero
, cond
, src
, zero
);
22702 /* g = sqrt estimate. */
22703 emit_insn (gen_mul (g
, e
, src
));
22704 /* h = 1/(2*sqrt) estimate. */
22705 emit_insn (gen_mul (h
, e
, mhalf
));
22711 rtx t
= gen_reg_rtx (mode
);
22712 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
22713 /* Apply correction directly to 1/rsqrt estimate. */
22714 rs6000_emit_madd (dst
, e
, t
, e
);
22718 for (i
= 0; i
< passes
; i
++)
22720 rtx t1
= gen_reg_rtx (mode
);
22721 rtx g1
= gen_reg_rtx (mode
);
22722 rtx h1
= gen_reg_rtx (mode
);
22724 rs6000_emit_nmsub (t1
, g
, h
, mhalf
);
22725 rs6000_emit_madd (g1
, g
, t1
, g
);
22726 rs6000_emit_madd (h1
, h
, t1
, h
);
22731 /* Multiply by 2 for 1/rsqrt. */
22732 emit_insn (gen_add3_insn (dst
, h
, h
));
22737 rtx t
= gen_reg_rtx (mode
);
22738 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
22739 rs6000_emit_madd (dst
, g
, t
, g
);
22745 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22746 (Power7) targets. DST is the target, and SRC is the argument operand. */
22749 rs6000_emit_popcount (rtx dst
, rtx src
)
22751 machine_mode mode
= GET_MODE (dst
);
22754 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
22755 if (TARGET_POPCNTD
)
22757 if (mode
== SImode
)
22758 emit_insn (gen_popcntdsi2 (dst
, src
));
22760 emit_insn (gen_popcntddi2 (dst
, src
));
22764 tmp1
= gen_reg_rtx (mode
);
22766 if (mode
== SImode
)
22768 emit_insn (gen_popcntbsi2 (tmp1
, src
));
22769 tmp2
= expand_mult (SImode
, tmp1
, GEN_INT (0x01010101),
22771 tmp2
= force_reg (SImode
, tmp2
);
22772 emit_insn (gen_lshrsi3 (dst
, tmp2
, GEN_INT (24)));
22776 emit_insn (gen_popcntbdi2 (tmp1
, src
));
22777 tmp2
= expand_mult (DImode
, tmp1
,
22778 GEN_INT ((HOST_WIDE_INT
)
22779 0x01010101 << 32 | 0x01010101),
22781 tmp2
= force_reg (DImode
, tmp2
);
22782 emit_insn (gen_lshrdi3 (dst
, tmp2
, GEN_INT (56)));
22787 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
22788 target, and SRC is the argument operand. */
22791 rs6000_emit_parity (rtx dst
, rtx src
)
22793 machine_mode mode
= GET_MODE (dst
);
22796 tmp
= gen_reg_rtx (mode
);
22798 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
22801 if (mode
== SImode
)
22803 emit_insn (gen_popcntbsi2 (tmp
, src
));
22804 emit_insn (gen_paritysi2_cmpb (dst
, tmp
));
22808 emit_insn (gen_popcntbdi2 (tmp
, src
));
22809 emit_insn (gen_paritydi2_cmpb (dst
, tmp
));
22814 if (mode
== SImode
)
22816 /* Is mult+shift >= shift+xor+shift+xor? */
22817 if (rs6000_cost
->mulsi_const
>= COSTS_N_INSNS (3))
22819 rtx tmp1
, tmp2
, tmp3
, tmp4
;
22821 tmp1
= gen_reg_rtx (SImode
);
22822 emit_insn (gen_popcntbsi2 (tmp1
, src
));
22824 tmp2
= gen_reg_rtx (SImode
);
22825 emit_insn (gen_lshrsi3 (tmp2
, tmp1
, GEN_INT (16)));
22826 tmp3
= gen_reg_rtx (SImode
);
22827 emit_insn (gen_xorsi3 (tmp3
, tmp1
, tmp2
));
22829 tmp4
= gen_reg_rtx (SImode
);
22830 emit_insn (gen_lshrsi3 (tmp4
, tmp3
, GEN_INT (8)));
22831 emit_insn (gen_xorsi3 (tmp
, tmp3
, tmp4
));
22834 rs6000_emit_popcount (tmp
, src
);
22835 emit_insn (gen_andsi3 (dst
, tmp
, const1_rtx
));
22839 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
22840 if (rs6000_cost
->muldi
>= COSTS_N_INSNS (5))
22842 rtx tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
;
22844 tmp1
= gen_reg_rtx (DImode
);
22845 emit_insn (gen_popcntbdi2 (tmp1
, src
));
22847 tmp2
= gen_reg_rtx (DImode
);
22848 emit_insn (gen_lshrdi3 (tmp2
, tmp1
, GEN_INT (32)));
22849 tmp3
= gen_reg_rtx (DImode
);
22850 emit_insn (gen_xordi3 (tmp3
, tmp1
, tmp2
));
22852 tmp4
= gen_reg_rtx (DImode
);
22853 emit_insn (gen_lshrdi3 (tmp4
, tmp3
, GEN_INT (16)));
22854 tmp5
= gen_reg_rtx (DImode
);
22855 emit_insn (gen_xordi3 (tmp5
, tmp3
, tmp4
));
22857 tmp6
= gen_reg_rtx (DImode
);
22858 emit_insn (gen_lshrdi3 (tmp6
, tmp5
, GEN_INT (8)));
22859 emit_insn (gen_xordi3 (tmp
, tmp5
, tmp6
));
22862 rs6000_emit_popcount (tmp
, src
);
22863 emit_insn (gen_anddi3 (dst
, tmp
, const1_rtx
));
22867 /* Expand an Altivec constant permutation for little endian mode.
22868 OP0 and OP1 are the input vectors and TARGET is the output vector.
22869 SEL specifies the constant permutation vector.
22871 There are two issues: First, the two input operands must be
22872 swapped so that together they form a double-wide array in LE
22873 order. Second, the vperm instruction has surprising behavior
22874 in LE mode: it interprets the elements of the source vectors
22875 in BE mode ("left to right") and interprets the elements of
22876 the destination vector in LE mode ("right to left"). To
22877 correct for this, we must subtract each element of the permute
22878 control vector from 31.
22880 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
22881 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
22882 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
22883 serve as the permute control vector. Then, in BE mode,
22887 places the desired result in vr9. However, in LE mode the
22888 vector contents will be
22890 vr10 = 00000003 00000002 00000001 00000000
22891 vr11 = 00000007 00000006 00000005 00000004
22893 The result of the vperm using the same permute control vector is
22895 vr9 = 05000000 07000000 01000000 03000000
22897 That is, the leftmost 4 bytes of vr10 are interpreted as the
22898 source for the rightmost 4 bytes of vr9, and so on.
22900 If we change the permute control vector to
22902 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
22910 vr9 = 00000006 00000004 00000002 00000000. */
22913 altivec_expand_vec_perm_const_le (rtx target
, rtx op0
, rtx op1
,
22914 const vec_perm_indices
&sel
)
22918 rtx constv
, unspec
;
22920 /* Unpack and adjust the constant selector. */
22921 for (i
= 0; i
< 16; ++i
)
22923 unsigned int elt
= 31 - (sel
[i
] & 31);
22924 perm
[i
] = GEN_INT (elt
);
22927 /* Expand to a permute, swapping the inputs and using the
22928 adjusted selector. */
22930 op0
= force_reg (V16QImode
, op0
);
22932 op1
= force_reg (V16QImode
, op1
);
22934 constv
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
));
22935 constv
= force_reg (V16QImode
, constv
);
22936 unspec
= gen_rtx_UNSPEC (V16QImode
, gen_rtvec (3, op1
, op0
, constv
),
22938 if (!REG_P (target
))
22940 rtx tmp
= gen_reg_rtx (V16QImode
);
22941 emit_move_insn (tmp
, unspec
);
22945 emit_move_insn (target
, unspec
);
22948 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
22949 permute control vector. But here it's not a constant, so we must
22950 generate a vector NAND or NOR to do the adjustment. */
22953 altivec_expand_vec_perm_le (rtx operands
[4])
22955 rtx notx
, iorx
, unspec
;
22956 rtx target
= operands
[0];
22957 rtx op0
= operands
[1];
22958 rtx op1
= operands
[2];
22959 rtx sel
= operands
[3];
22961 rtx norreg
= gen_reg_rtx (V16QImode
);
22962 machine_mode mode
= GET_MODE (target
);
22964 /* Get everything in regs so the pattern matches. */
22966 op0
= force_reg (mode
, op0
);
22968 op1
= force_reg (mode
, op1
);
22970 sel
= force_reg (V16QImode
, sel
);
22971 if (!REG_P (target
))
22972 tmp
= gen_reg_rtx (mode
);
22974 if (TARGET_P9_VECTOR
)
22976 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, sel
),
22981 /* Invert the selector with a VNAND if available, else a VNOR.
22982 The VNAND is preferred for future fusion opportunities. */
22983 notx
= gen_rtx_NOT (V16QImode
, sel
);
22984 iorx
= (TARGET_P8_VECTOR
22985 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
22986 : gen_rtx_AND (V16QImode
, notx
, notx
));
22987 emit_insn (gen_rtx_SET (norreg
, iorx
));
22989 /* Permute with operands reversed and adjusted selector. */
22990 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, norreg
),
22994 /* Copy into target, possibly by way of a register. */
22995 if (!REG_P (target
))
22997 emit_move_insn (tmp
, unspec
);
23001 emit_move_insn (target
, unspec
);
23004 /* Expand an Altivec constant permutation. Return true if we match
23005 an efficient implementation; false to fall back to VPERM.
23007 OP0 and OP1 are the input vectors and TARGET is the output vector.
23008 SEL specifies the constant permutation vector. */
23011 altivec_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
,
23012 const vec_perm_indices
&sel
)
23014 struct altivec_perm_insn
{
23015 HOST_WIDE_INT mask
;
23016 enum insn_code impl
;
23017 unsigned char perm
[16];
23019 static const struct altivec_perm_insn patterns
[] = {
23020 {OPTION_MASK_ALTIVEC
,
23021 CODE_FOR_altivec_vpkuhum_direct
,
23022 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23023 {OPTION_MASK_ALTIVEC
,
23024 CODE_FOR_altivec_vpkuwum_direct
,
23025 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23026 {OPTION_MASK_ALTIVEC
,
23027 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghb_direct
23028 : CODE_FOR_altivec_vmrglb_direct
,
23029 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23030 {OPTION_MASK_ALTIVEC
,
23031 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghh_direct
23032 : CODE_FOR_altivec_vmrglh_direct
,
23033 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23034 {OPTION_MASK_ALTIVEC
,
23035 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghw_direct_v4si
23036 : CODE_FOR_altivec_vmrglw_direct_v4si
,
23037 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23038 {OPTION_MASK_ALTIVEC
,
23039 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglb_direct
23040 : CODE_FOR_altivec_vmrghb_direct
,
23041 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23042 {OPTION_MASK_ALTIVEC
,
23043 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglh_direct
23044 : CODE_FOR_altivec_vmrghh_direct
,
23045 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23046 {OPTION_MASK_ALTIVEC
,
23047 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglw_direct_v4si
23048 : CODE_FOR_altivec_vmrghw_direct_v4si
,
23049 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23050 {OPTION_MASK_P8_VECTOR
,
23051 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgew_v4sf_direct
23052 : CODE_FOR_p8_vmrgow_v4sf_direct
,
23053 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23054 {OPTION_MASK_P8_VECTOR
,
23055 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgow_v4sf_direct
23056 : CODE_FOR_p8_vmrgew_v4sf_direct
,
23057 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23058 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23059 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23060 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23061 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23062 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23063 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23064 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23065 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23067 unsigned int i
, j
, elt
, which
;
23068 unsigned char perm
[16];
23072 /* Unpack the constant selector. */
23073 for (i
= which
= 0; i
< 16; ++i
)
23076 which
|= (elt
< 16 ? 1 : 2);
23080 /* Simplify the constant selector based on operands. */
23084 gcc_unreachable ();
23088 if (!rtx_equal_p (op0
, op1
))
23093 for (i
= 0; i
< 16; ++i
)
23105 /* Look for splat patterns. */
23110 for (i
= 0; i
< 16; ++i
)
23111 if (perm
[i
] != elt
)
23115 if (!BYTES_BIG_ENDIAN
)
23117 emit_insn (gen_altivec_vspltb_direct (target
, op0
, GEN_INT (elt
)));
23123 for (i
= 0; i
< 16; i
+= 2)
23124 if (perm
[i
] != elt
|| perm
[i
+ 1] != elt
+ 1)
23128 int field
= BYTES_BIG_ENDIAN
? elt
/ 2 : 7 - elt
/ 2;
23129 x
= gen_reg_rtx (V8HImode
);
23130 emit_insn (gen_altivec_vsplth_direct (x
, gen_lowpart (V8HImode
, op0
),
23132 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23139 for (i
= 0; i
< 16; i
+= 4)
23141 || perm
[i
+ 1] != elt
+ 1
23142 || perm
[i
+ 2] != elt
+ 2
23143 || perm
[i
+ 3] != elt
+ 3)
23147 int field
= BYTES_BIG_ENDIAN
? elt
/ 4 : 3 - elt
/ 4;
23148 x
= gen_reg_rtx (V4SImode
);
23149 emit_insn (gen_altivec_vspltw_direct (x
, gen_lowpart (V4SImode
, op0
),
23151 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23157 /* Look for merge and pack patterns. */
23158 for (j
= 0; j
< ARRAY_SIZE (patterns
); ++j
)
23162 if ((patterns
[j
].mask
& rs6000_isa_flags
) == 0)
23165 elt
= patterns
[j
].perm
[0];
23166 if (perm
[0] == elt
)
23168 else if (perm
[0] == elt
+ 16)
23172 for (i
= 1; i
< 16; ++i
)
23174 elt
= patterns
[j
].perm
[i
];
23176 elt
= (elt
>= 16 ? elt
- 16 : elt
+ 16);
23177 else if (one_vec
&& elt
>= 16)
23179 if (perm
[i
] != elt
)
23184 enum insn_code icode
= patterns
[j
].impl
;
23185 machine_mode omode
= insn_data
[icode
].operand
[0].mode
;
23186 machine_mode imode
= insn_data
[icode
].operand
[1].mode
;
23188 rtx perm_idx
= GEN_INT (0);
23189 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23206 perm_idx
= GEN_INT (perm_val
);
23209 /* For little-endian, don't use vpkuwum and vpkuhum if the
23210 underlying vector type is not V4SI and V8HI, respectively.
23211 For example, using vpkuwum with a V8HI picks up the even
23212 halfwords (BE numbering) when the even halfwords (LE
23213 numbering) are what we need. */
23214 if (!BYTES_BIG_ENDIAN
23215 && icode
== CODE_FOR_altivec_vpkuwum_direct
23217 && GET_MODE (op0
) != V4SImode
)
23219 && GET_MODE (XEXP (op0
, 0)) != V4SImode
)))
23221 if (!BYTES_BIG_ENDIAN
23222 && icode
== CODE_FOR_altivec_vpkuhum_direct
23224 && GET_MODE (op0
) != V8HImode
)
23226 && GET_MODE (XEXP (op0
, 0)) != V8HImode
)))
23229 /* For little-endian, the two input operands must be swapped
23230 (or swapped back) to ensure proper right-to-left numbering
23232 if (swapped
^ !BYTES_BIG_ENDIAN
23233 && icode
!= CODE_FOR_vsx_xxpermdi_v16qi
)
23234 std::swap (op0
, op1
);
23235 if (imode
!= V16QImode
)
23237 op0
= gen_lowpart (imode
, op0
);
23238 op1
= gen_lowpart (imode
, op1
);
23240 if (omode
== V16QImode
)
23243 x
= gen_reg_rtx (omode
);
23244 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23245 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
, perm_idx
));
23247 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
));
23248 if (omode
!= V16QImode
)
23249 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23254 if (!BYTES_BIG_ENDIAN
)
23256 altivec_expand_vec_perm_const_le (target
, op0
, op1
, sel
);
23263 /* Expand a VSX Permute Doubleword constant permutation.
23264 Return true if we match an efficient implementation. */
23267 rs6000_expand_vec_perm_const_1 (rtx target
, rtx op0
, rtx op1
,
23268 unsigned char perm0
, unsigned char perm1
)
23272 /* If both selectors come from the same operand, fold to single op. */
23273 if ((perm0
& 2) == (perm1
& 2))
23280 /* If both operands are equal, fold to simpler permutation. */
23281 if (rtx_equal_p (op0
, op1
))
23284 perm1
= (perm1
& 1) + 2;
23286 /* If the first selector comes from the second operand, swap. */
23287 else if (perm0
& 2)
23293 std::swap (op0
, op1
);
23295 /* If the second selector does not come from the second operand, fail. */
23296 else if ((perm1
& 2) == 0)
23300 if (target
!= NULL
)
23302 machine_mode vmode
, dmode
;
23305 vmode
= GET_MODE (target
);
23306 gcc_assert (GET_MODE_NUNITS (vmode
) == 2);
23307 dmode
= mode_for_vector (GET_MODE_INNER (vmode
), 4).require ();
23308 x
= gen_rtx_VEC_CONCAT (dmode
, op0
, op1
);
23309 v
= gen_rtvec (2, GEN_INT (perm0
), GEN_INT (perm1
));
23310 x
= gen_rtx_VEC_SELECT (vmode
, x
, gen_rtx_PARALLEL (VOIDmode
, v
));
23311 emit_insn (gen_rtx_SET (target
, x
));
23316 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23319 rs6000_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
23320 rtx target
, rtx op0
, rtx op1
,
23321 const vec_perm_indices
&sel
)
23323 if (vmode
!= op_mode
)
23326 bool testing_p
= !target
;
23328 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23329 if (TARGET_ALTIVEC
&& testing_p
)
23334 rtx nop0
= force_reg (vmode
, op0
);
23340 op1
= force_reg (vmode
, op1
);
23342 /* Check for ps_merge* or xxpermdi insns. */
23343 if ((vmode
== V2DFmode
|| vmode
== V2DImode
) && VECTOR_MEM_VSX_P (vmode
))
23347 op0
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 1);
23348 op1
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 2);
23350 if (rs6000_expand_vec_perm_const_1 (target
, op0
, op1
, sel
[0], sel
[1]))
23354 if (TARGET_ALTIVEC
)
23356 /* Force the target-independent code to lower to V16QImode. */
23357 if (vmode
!= V16QImode
)
23359 if (altivec_expand_vec_perm_const (target
, op0
, op1
, sel
))
23366 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23367 OP0 and OP1 are the input vectors and TARGET is the output vector.
23368 PERM specifies the constant permutation vector. */
23371 rs6000_do_expand_vec_perm (rtx target
, rtx op0
, rtx op1
,
23372 machine_mode vmode
, const vec_perm_builder
&perm
)
23374 rtx x
= expand_vec_perm_const (vmode
, op0
, op1
, perm
, BLKmode
, target
);
23376 emit_move_insn (target
, x
);
23379 /* Expand an extract even operation. */
23382 rs6000_expand_extract_even (rtx target
, rtx op0
, rtx op1
)
23384 machine_mode vmode
= GET_MODE (target
);
23385 unsigned i
, nelt
= GET_MODE_NUNITS (vmode
);
23386 vec_perm_builder
perm (nelt
, nelt
, 1);
23388 for (i
= 0; i
< nelt
; i
++)
23389 perm
.quick_push (i
* 2);
23391 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23394 /* Expand a vector interleave operation. */
23397 rs6000_expand_interleave (rtx target
, rtx op0
, rtx op1
, bool highp
)
23399 machine_mode vmode
= GET_MODE (target
);
23400 unsigned i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
23401 vec_perm_builder
perm (nelt
, nelt
, 1);
23403 high
= (highp
? 0 : nelt
/ 2);
23404 for (i
= 0; i
< nelt
/ 2; i
++)
23406 perm
.quick_push (i
+ high
);
23407 perm
.quick_push (i
+ nelt
+ high
);
23410 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23413 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23415 rs6000_scale_v2df (rtx tgt
, rtx src
, int scale
)
23417 HOST_WIDE_INT
hwi_scale (scale
);
23418 REAL_VALUE_TYPE r_pow
;
23419 rtvec v
= rtvec_alloc (2);
23421 rtx scale_vec
= gen_reg_rtx (V2DFmode
);
23422 (void)real_powi (&r_pow
, DFmode
, &dconst2
, hwi_scale
);
23423 elt
= const_double_from_real_value (r_pow
, DFmode
);
23424 RTVEC_ELT (v
, 0) = elt
;
23425 RTVEC_ELT (v
, 1) = elt
;
23426 rs6000_expand_vector_init (scale_vec
, gen_rtx_PARALLEL (V2DFmode
, v
));
23427 emit_insn (gen_mulv2df3 (tgt
, src
, scale_vec
));
23430 /* Return an RTX representing where to find the function value of a
23431 function returning MODE. */
23433 rs6000_complex_function_value (machine_mode mode
)
23435 unsigned int regno
;
23437 machine_mode inner
= GET_MODE_INNER (mode
);
23438 unsigned int inner_bytes
= GET_MODE_UNIT_SIZE (mode
);
23440 if (TARGET_FLOAT128_TYPE
23442 || (mode
== TCmode
&& TARGET_IEEEQUAD
)))
23443 regno
= ALTIVEC_ARG_RETURN
;
23445 else if (FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23446 regno
= FP_ARG_RETURN
;
23450 regno
= GP_ARG_RETURN
;
23452 /* 32-bit is OK since it'll go in r3/r4. */
23453 if (TARGET_32BIT
&& inner_bytes
>= 4)
23454 return gen_rtx_REG (mode
, regno
);
23457 if (inner_bytes
>= 8)
23458 return gen_rtx_REG (mode
, regno
);
23460 r1
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
),
23462 r2
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
+ 1),
23463 GEN_INT (inner_bytes
));
23464 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
23467 /* Return an rtx describing a return value of MODE as a PARALLEL
23468 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23469 stride REG_STRIDE. */
23472 rs6000_parallel_return (machine_mode mode
,
23473 int n_elts
, machine_mode elt_mode
,
23474 unsigned int regno
, unsigned int reg_stride
)
23476 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
23479 for (i
= 0; i
< n_elts
; i
++)
23481 rtx r
= gen_rtx_REG (elt_mode
, regno
);
23482 rtx off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
23483 XVECEXP (par
, 0, i
) = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
23484 regno
+= reg_stride
;
23490 /* Target hook for TARGET_FUNCTION_VALUE.
23492 An integer value is in r3 and a floating-point value is in fp1,
23493 unless -msoft-float. */
23496 rs6000_function_value (const_tree valtype
,
23497 const_tree fn_decl_or_type ATTRIBUTE_UNUSED
,
23498 bool outgoing ATTRIBUTE_UNUSED
)
23501 unsigned int regno
;
23502 machine_mode elt_mode
;
23505 /* Special handling for structs in darwin64. */
23507 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype
), valtype
))
23509 CUMULATIVE_ARGS valcum
;
23513 valcum
.fregno
= FP_ARG_MIN_REG
;
23514 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
23515 /* Do a trial code generation as if this were going to be passed as
23516 an argument; if any part goes in memory, we return NULL. */
23517 valret
= rs6000_darwin64_record_arg (&valcum
, valtype
, true, /* retval= */ true);
23520 /* Otherwise fall through to standard ABI rules. */
23523 mode
= TYPE_MODE (valtype
);
23525 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23526 if (rs6000_discover_homogeneous_aggregate (mode
, valtype
, &elt_mode
, &n_elts
))
23528 int first_reg
, n_regs
;
23530 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode
))
23532 /* _Decimal128 must use even/odd register pairs. */
23533 first_reg
= (elt_mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23534 n_regs
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
23538 first_reg
= ALTIVEC_ARG_RETURN
;
23542 return rs6000_parallel_return (mode
, n_elts
, elt_mode
, first_reg
, n_regs
);
23545 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23546 if (TARGET_32BIT
&& TARGET_POWERPC64
)
23555 int count
= GET_MODE_SIZE (mode
) / 4;
23556 return rs6000_parallel_return (mode
, count
, SImode
, GP_ARG_RETURN
, 1);
23559 if ((INTEGRAL_TYPE_P (valtype
)
23560 && GET_MODE_BITSIZE (mode
) < (TARGET_32BIT
? 32 : 64))
23561 || POINTER_TYPE_P (valtype
))
23562 mode
= TARGET_32BIT
? SImode
: DImode
;
23564 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23565 /* _Decimal128 must use an even/odd register pair. */
23566 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23567 else if (SCALAR_FLOAT_TYPE_P (valtype
) && TARGET_HARD_FLOAT
23568 && !FLOAT128_VECTOR_P (mode
))
23569 regno
= FP_ARG_RETURN
;
23570 else if (TREE_CODE (valtype
) == COMPLEX_TYPE
23571 && targetm
.calls
.split_complex_arg
)
23572 return rs6000_complex_function_value (mode
);
23573 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23574 return register is used in both cases, and we won't see V2DImode/V2DFmode
23575 for pure altivec, combine the two cases. */
23576 else if ((TREE_CODE (valtype
) == VECTOR_TYPE
|| VECTOR_ALIGNMENT_P (mode
))
23577 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
23578 && ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
23579 regno
= ALTIVEC_ARG_RETURN
;
23581 regno
= GP_ARG_RETURN
;
23583 return gen_rtx_REG (mode
, regno
);
23586 /* Define how to find the value returned by a library function
23587 assuming the value has mode MODE. */
23589 rs6000_libcall_value (machine_mode mode
)
23591 unsigned int regno
;
23593 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23594 if (TARGET_32BIT
&& TARGET_POWERPC64
&& mode
== DImode
)
23595 return rs6000_parallel_return (mode
, 2, SImode
, GP_ARG_RETURN
, 1);
23597 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23598 /* _Decimal128 must use an even/odd register pair. */
23599 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23600 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && TARGET_HARD_FLOAT
)
23601 regno
= FP_ARG_RETURN
;
23602 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23603 return register is used in both cases, and we won't see V2DImode/V2DFmode
23604 for pure altivec, combine the two cases. */
23605 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
23606 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
)
23607 regno
= ALTIVEC_ARG_RETURN
;
23608 else if (COMPLEX_MODE_P (mode
) && targetm
.calls
.split_complex_arg
)
23609 return rs6000_complex_function_value (mode
);
23611 regno
= GP_ARG_RETURN
;
23613 return gen_rtx_REG (mode
, regno
);
23616 /* Compute register pressure classes. We implement the target hook to avoid
23617 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23618 lead to incorrect estimates of number of available registers and therefor
23619 increased register pressure/spill. */
23621 rs6000_compute_pressure_classes (enum reg_class
*pressure_classes
)
23626 pressure_classes
[n
++] = GENERAL_REGS
;
23627 if (TARGET_ALTIVEC
)
23628 pressure_classes
[n
++] = ALTIVEC_REGS
;
23630 pressure_classes
[n
++] = VSX_REGS
;
23633 if (TARGET_HARD_FLOAT
)
23634 pressure_classes
[n
++] = FLOAT_REGS
;
23636 pressure_classes
[n
++] = CR_REGS
;
23637 pressure_classes
[n
++] = SPECIAL_REGS
;
23642 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23643 Frame pointer elimination is automatically handled.
23645 For the RS/6000, if frame pointer elimination is being done, we would like
23646 to convert ap into fp, not sp.
23648 We need r30 if -mminimal-toc was specified, and there are constant pool
23652 rs6000_can_eliminate (const int from
, const int to
)
23654 return (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
23655 ? ! frame_pointer_needed
23656 : from
== RS6000_PIC_OFFSET_TABLE_REGNUM
23657 ? ! TARGET_MINIMAL_TOC
|| TARGET_NO_TOC_OR_PCREL
23658 || constant_pool_empty_p ()
23662 /* Define the offset between two registers, FROM to be eliminated and its
23663 replacement TO, at the start of a routine. */
23665 rs6000_initial_elimination_offset (int from
, int to
)
23667 rs6000_stack_t
*info
= rs6000_stack_info ();
23668 HOST_WIDE_INT offset
;
23670 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23671 offset
= info
->push_p
? 0 : -info
->total_size
;
23672 else if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23674 offset
= info
->push_p
? 0 : -info
->total_size
;
23675 if (FRAME_GROWS_DOWNWARD
)
23676 offset
+= info
->fixed_size
+ info
->vars_size
+ info
->parm_size
;
23678 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
23679 offset
= FRAME_GROWS_DOWNWARD
23680 ? info
->fixed_size
+ info
->vars_size
+ info
->parm_size
23682 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
23683 offset
= info
->total_size
;
23684 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23685 offset
= info
->push_p
? info
->total_size
: 0;
23686 else if (from
== RS6000_PIC_OFFSET_TABLE_REGNUM
)
23689 gcc_unreachable ();
23694 /* Fill in sizes of registers used by unwinder. */
23697 rs6000_init_dwarf_reg_sizes_extra (tree address
)
23699 if (TARGET_MACHO
&& ! TARGET_ALTIVEC
)
23702 machine_mode mode
= TYPE_MODE (char_type_node
);
23703 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
23704 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
23705 rtx value
= gen_int_mode (16, mode
);
23707 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23708 The unwinder still needs to know the size of Altivec registers. */
23710 for (i
= FIRST_ALTIVEC_REGNO
; i
< LAST_ALTIVEC_REGNO
+1; i
++)
23712 int column
= DWARF_REG_TO_UNWIND_COLUMN
23713 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
23714 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
23716 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
23721 /* Map internal gcc register numbers to debug format register numbers.
23722 FORMAT specifies the type of debug register number to use:
23723 0 -- debug information, except for frame-related sections
23724 1 -- DWARF .debug_frame section
23725 2 -- DWARF .eh_frame section */
23728 rs6000_dbx_register_number (unsigned int regno
, unsigned int format
)
23730 /* On some platforms, we use the standard DWARF register
23731 numbering for .debug_info and .debug_frame. */
23732 if ((format
== 0 && dwarf_debuginfo_p ()) || format
== 1)
23734 #ifdef RS6000_USE_DWARF_NUMBERING
23737 if (FP_REGNO_P (regno
))
23738 return regno
- FIRST_FPR_REGNO
+ 32;
23739 if (ALTIVEC_REGNO_P (regno
))
23740 return regno
- FIRST_ALTIVEC_REGNO
+ 1124;
23741 if (regno
== LR_REGNO
)
23743 if (regno
== CTR_REGNO
)
23745 if (regno
== CA_REGNO
)
23746 return 101; /* XER */
23747 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23748 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23749 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23750 to the DWARF reg for CR. */
23751 if (format
== 1 && regno
== CR2_REGNO
)
23753 if (CR_REGNO_P (regno
))
23754 return regno
- CR0_REGNO
+ 86;
23755 if (regno
== VRSAVE_REGNO
)
23757 if (regno
== VSCR_REGNO
)
23760 /* These do not make much sense. */
23761 if (regno
== FRAME_POINTER_REGNUM
)
23763 if (regno
== ARG_POINTER_REGNUM
)
23768 gcc_unreachable ();
23772 /* We use the GCC 7 (and before) internal number for non-DWARF debug
23773 information, and also for .eh_frame. */
23774 /* Translate the regnos to their numbers in GCC 7 (and before). */
23777 if (FP_REGNO_P (regno
))
23778 return regno
- FIRST_FPR_REGNO
+ 32;
23779 if (ALTIVEC_REGNO_P (regno
))
23780 return regno
- FIRST_ALTIVEC_REGNO
+ 77;
23781 if (regno
== LR_REGNO
)
23783 if (regno
== CTR_REGNO
)
23785 if (regno
== CA_REGNO
)
23786 return 76; /* XER */
23787 if (CR_REGNO_P (regno
))
23788 return regno
- CR0_REGNO
+ 68;
23789 if (regno
== VRSAVE_REGNO
)
23791 if (regno
== VSCR_REGNO
)
23794 if (regno
== FRAME_POINTER_REGNUM
)
23796 if (regno
== ARG_POINTER_REGNUM
)
23801 gcc_unreachable ();
23804 /* target hook eh_return_filter_mode */
23805 static scalar_int_mode
23806 rs6000_eh_return_filter_mode (void)
23808 return TARGET_32BIT
? SImode
: word_mode
;
23811 /* Target hook for scalar_mode_supported_p. */
23813 rs6000_scalar_mode_supported_p (scalar_mode mode
)
23815 /* -m32 does not support TImode. This is the default, from
23816 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
23817 same ABI as for -m32. But default_scalar_mode_supported_p allows
23818 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23819 for -mpowerpc64. */
23820 if (TARGET_32BIT
&& mode
== TImode
)
23823 if (DECIMAL_FLOAT_MODE_P (mode
))
23824 return default_decimal_float_supported_p ();
23825 else if (TARGET_FLOAT128_TYPE
&& mode
== KFmode
)
23827 else if (TARGET_IBM128
&& mode
== IFmode
)
23830 return default_scalar_mode_supported_p (mode
);
23833 /* Target hook for libgcc_floating_mode_supported_p. */
23836 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode
)
23845 /* We only return true for KFmode if IEEE 128-bit types are supported. */
23847 return TARGET_FLOAT128_TYPE
;
23854 /* Target hook for vector_mode_supported_p. */
23856 rs6000_vector_mode_supported_p (machine_mode mode
)
23858 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
23859 128-bit, the compiler might try to widen IEEE 128-bit to IBM
23861 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
) && !FLOAT128_IEEE_P (mode
))
23868 /* Target hook for floatn_mode. */
23869 static opt_scalar_float_mode
23870 rs6000_floatn_mode (int n
, bool extended
)
23880 if (TARGET_FLOAT128_TYPE
)
23883 return opt_scalar_float_mode ();
23886 return opt_scalar_float_mode ();
23889 /* Those are the only valid _FloatNx types. */
23890 gcc_unreachable ();
23904 if (TARGET_FLOAT128_TYPE
)
23907 return opt_scalar_float_mode ();
23910 return opt_scalar_float_mode ();
23916 /* Target hook for c_mode_for_suffix. */
23917 static machine_mode
23918 rs6000_c_mode_for_suffix (char suffix
)
23920 if (TARGET_FLOAT128_TYPE
)
23922 if (suffix
== 'q' || suffix
== 'Q')
23925 /* At the moment, we are not defining a suffix for IBM extended double.
23926 If/when the default for -mabi=ieeelongdouble is changed, and we want
23927 to support __ibm128 constants in legacy library code, we may need to
23928 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
23929 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
23930 __float80 constants. */
23936 /* Target hook for invalid_arg_for_unprototyped_fn. */
23937 static const char *
23938 invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
23940 return (!rs6000_darwin64_abi
23942 && TREE_CODE (TREE_TYPE (val
)) == VECTOR_TYPE
23943 && (funcdecl
== NULL_TREE
23944 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
23945 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
23946 ? N_("AltiVec argument passed to unprototyped function")
23950 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
23951 setup by using __stack_chk_fail_local hidden function instead of
23952 calling __stack_chk_fail directly. Otherwise it is better to call
23953 __stack_chk_fail directly. */
23955 static tree ATTRIBUTE_UNUSED
23956 rs6000_stack_protect_fail (void)
23958 return (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
23959 ? default_hidden_stack_protect_fail ()
23960 : default_external_stack_protect_fail ();
23963 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
23966 static unsigned HOST_WIDE_INT
23967 rs6000_asan_shadow_offset (void)
23969 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_64BIT
? 41 : 29);
23973 /* Mask options that we want to support inside of attribute((target)) and
23974 #pragma GCC target operations. Note, we do not include things like
23975 64/32-bit, endianness, hard/soft floating point, etc. that would have
23976 different calling sequences. */
23978 struct rs6000_opt_mask
{
23979 const char *name
; /* option name */
23980 HOST_WIDE_INT mask
; /* mask to set */
23981 bool invert
; /* invert sense of mask */
23982 bool valid_target
; /* option is a target option */
23985 static struct rs6000_opt_mask
const rs6000_opt_masks
[] =
23987 { "altivec", OPTION_MASK_ALTIVEC
, false, true },
23988 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
,
23990 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
,
23992 { "cmpb", OPTION_MASK_CMPB
, false, true },
23993 { "crypto", OPTION_MASK_CRYPTO
, false, true },
23994 { "direct-move", OPTION_MASK_DIRECT_MOVE
, false, true },
23995 { "dlmzb", OPTION_MASK_DLMZB
, false, true },
23996 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX
,
23998 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, true },
23999 { "float128-hardware", OPTION_MASK_FLOAT128_HW
, false, true },
24000 { "fprnd", OPTION_MASK_FPRND
, false, true },
24001 { "power10", OPTION_MASK_POWER10
, false, true },
24002 { "hard-dfp", OPTION_MASK_DFP
, false, true },
24003 { "htm", OPTION_MASK_HTM
, false, true },
24004 { "isel", OPTION_MASK_ISEL
, false, true },
24005 { "mfcrf", OPTION_MASK_MFCRF
, false, true },
24006 { "mfpgpr", 0, false, true },
24007 { "mma", OPTION_MASK_MMA
, false, true },
24008 { "modulo", OPTION_MASK_MODULO
, false, true },
24009 { "mulhw", OPTION_MASK_MULHW
, false, true },
24010 { "multiple", OPTION_MASK_MULTIPLE
, false, true },
24011 { "pcrel", OPTION_MASK_PCREL
, false, true },
24012 { "pcrel-opt", OPTION_MASK_PCREL_OPT
, false, true },
24013 { "popcntb", OPTION_MASK_POPCNTB
, false, true },
24014 { "popcntd", OPTION_MASK_POPCNTD
, false, true },
24015 { "power8-fusion", OPTION_MASK_P8_FUSION
, false, true },
24016 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN
, false, true },
24017 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, true },
24018 { "power9-minmax", OPTION_MASK_P9_MINMAX
, false, true },
24019 { "power9-misc", OPTION_MASK_P9_MISC
, false, true },
24020 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, true },
24021 { "power10-fusion", OPTION_MASK_P10_FUSION
, false, true },
24022 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT
, false, true },
24023 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT
, false, true },
24024 { "prefixed", OPTION_MASK_PREFIXED
, false, true },
24025 { "quad-memory", OPTION_MASK_QUAD_MEMORY
, false, true },
24026 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC
, false, true },
24027 { "recip-precision", OPTION_MASK_RECIP_PRECISION
, false, true },
24028 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT
, false, true },
24029 { "string", 0, false, true },
24030 { "update", OPTION_MASK_NO_UPDATE
, true , true },
24031 { "vsx", OPTION_MASK_VSX
, false, true },
24032 #ifdef OPTION_MASK_64BIT
24034 { "aix64", OPTION_MASK_64BIT
, false, false },
24035 { "aix32", OPTION_MASK_64BIT
, true, false },
24037 { "64", OPTION_MASK_64BIT
, false, false },
24038 { "32", OPTION_MASK_64BIT
, true, false },
24041 #ifdef OPTION_MASK_EABI
24042 { "eabi", OPTION_MASK_EABI
, false, false },
24044 #ifdef OPTION_MASK_LITTLE_ENDIAN
24045 { "little", OPTION_MASK_LITTLE_ENDIAN
, false, false },
24046 { "big", OPTION_MASK_LITTLE_ENDIAN
, true, false },
24048 #ifdef OPTION_MASK_RELOCATABLE
24049 { "relocatable", OPTION_MASK_RELOCATABLE
, false, false },
24051 #ifdef OPTION_MASK_STRICT_ALIGN
24052 { "strict-align", OPTION_MASK_STRICT_ALIGN
, false, false },
24054 { "soft-float", OPTION_MASK_SOFT_FLOAT
, false, false },
24055 { "string", 0, false, false },
24058 /* Builtin mask mapping for printing the flags. */
24059 static struct rs6000_opt_mask
const rs6000_builtin_mask_names
[] =
24061 { "altivec", OPTION_MASK_ALTIVEC
, false, false },
24062 { "vsx", OPTION_MASK_VSX
, false, false },
24063 { "fre", OPTION_MASK_POPCNTB
, false, false },
24064 { "fres", OPTION_MASK_PPC_GFXOPT
, false, false },
24065 { "frsqrte", OPTION_MASK_PPC_GFXOPT
, false, false },
24066 { "frsqrtes", OPTION_MASK_POPCNTB
, false, false },
24067 { "popcntd", OPTION_MASK_POPCNTD
, false, false },
24068 { "cell", OPTION_MASK_FPRND
, false, false },
24069 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, false },
24070 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, false },
24071 { "power9-misc", OPTION_MASK_P9_MISC
, false, false },
24072 { "crypto", OPTION_MASK_CRYPTO
, false, false },
24073 { "htm", OPTION_MASK_HTM
, false, false },
24074 { "hard-dfp", OPTION_MASK_DFP
, false, false },
24075 { "hard-float", OPTION_MASK_SOFT_FLOAT
, false, false },
24076 { "long-double-128", OPTION_MASK_MULTIPLE
, false, false },
24077 { "powerpc64", MASK_POWERPC64
, false, false },
24078 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, false },
24079 { "float128-hw", OPTION_MASK_FLOAT128_HW
,false, false },
24080 { "mma", OPTION_MASK_MMA
, false, false },
24081 { "power10", OPTION_MASK_POWER10
, false, false },
24084 /* Option variables that we want to support inside attribute((target)) and
24085 #pragma GCC target operations. */
24087 struct rs6000_opt_var
{
24088 const char *name
; /* option name */
24089 size_t global_offset
; /* offset of the option in global_options. */
24090 size_t target_offset
; /* offset of the option in target options. */
24093 static struct rs6000_opt_var
const rs6000_opt_vars
[] =
24096 offsetof (struct gcc_options
, x_TARGET_FRIZ
),
24097 offsetof (struct cl_target_option
, x_TARGET_FRIZ
), },
24098 { "avoid-indexed-addresses",
24099 offsetof (struct gcc_options
, x_TARGET_AVOID_XFORM
),
24100 offsetof (struct cl_target_option
, x_TARGET_AVOID_XFORM
) },
24102 offsetof (struct gcc_options
, x_rs6000_default_long_calls
),
24103 offsetof (struct cl_target_option
, x_rs6000_default_long_calls
), },
24104 { "optimize-swaps",
24105 offsetof (struct gcc_options
, x_rs6000_optimize_swaps
),
24106 offsetof (struct cl_target_option
, x_rs6000_optimize_swaps
), },
24107 { "allow-movmisalign",
24108 offsetof (struct gcc_options
, x_TARGET_ALLOW_MOVMISALIGN
),
24109 offsetof (struct cl_target_option
, x_TARGET_ALLOW_MOVMISALIGN
), },
24111 offsetof (struct gcc_options
, x_TARGET_SCHED_GROUPS
),
24112 offsetof (struct cl_target_option
, x_TARGET_SCHED_GROUPS
), },
24114 offsetof (struct gcc_options
, x_TARGET_ALWAYS_HINT
),
24115 offsetof (struct cl_target_option
, x_TARGET_ALWAYS_HINT
), },
24116 { "align-branch-targets",
24117 offsetof (struct gcc_options
, x_TARGET_ALIGN_BRANCH_TARGETS
),
24118 offsetof (struct cl_target_option
, x_TARGET_ALIGN_BRANCH_TARGETS
), },
24120 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24121 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24123 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24124 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24125 { "speculate-indirect-jumps",
24126 offsetof (struct gcc_options
, x_rs6000_speculate_indirect_jumps
),
24127 offsetof (struct cl_target_option
, x_rs6000_speculate_indirect_jumps
), },
24130 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24131 parsing. Return true if there were no errors. */
24134 rs6000_inner_target_options (tree args
, bool attr_p
)
24138 if (args
== NULL_TREE
)
24141 else if (TREE_CODE (args
) == STRING_CST
)
24143 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24146 while ((q
= strtok (p
, ",")) != NULL
)
24148 bool error_p
= false;
24149 bool not_valid_p
= false;
24150 const char *cpu_opt
= NULL
;
24153 if (startswith (q
, "cpu="))
24155 int cpu_index
= rs6000_cpu_name_lookup (q
+4);
24156 if (cpu_index
>= 0)
24157 rs6000_cpu_index
= cpu_index
;
24164 else if (startswith (q
, "tune="))
24166 int tune_index
= rs6000_cpu_name_lookup (q
+5);
24167 if (tune_index
>= 0)
24168 rs6000_tune_index
= tune_index
;
24178 bool invert
= false;
24182 if (startswith (r
, "no-"))
24188 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_masks
); i
++)
24189 if (strcmp (r
, rs6000_opt_masks
[i
].name
) == 0)
24191 HOST_WIDE_INT mask
= rs6000_opt_masks
[i
].mask
;
24193 if (!rs6000_opt_masks
[i
].valid_target
)
24194 not_valid_p
= true;
24198 rs6000_isa_flags_explicit
|= mask
;
24200 /* VSX needs altivec, so -mvsx automagically sets
24201 altivec and disables -mavoid-indexed-addresses. */
24204 if (mask
== OPTION_MASK_VSX
)
24206 mask
|= OPTION_MASK_ALTIVEC
;
24207 TARGET_AVOID_XFORM
= 0;
24211 if (rs6000_opt_masks
[i
].invert
)
24215 rs6000_isa_flags
&= ~mask
;
24217 rs6000_isa_flags
|= mask
;
24222 if (error_p
&& !not_valid_p
)
24224 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_vars
); i
++)
24225 if (strcmp (r
, rs6000_opt_vars
[i
].name
) == 0)
24227 size_t j
= rs6000_opt_vars
[i
].global_offset
;
24228 *((int *) ((char *)&global_options
+ j
)) = !invert
;
24230 not_valid_p
= false;
24238 const char *eprefix
, *esuffix
;
24243 eprefix
= "__attribute__((__target__(";
24248 eprefix
= "#pragma GCC target ";
24253 error ("invalid cpu %qs for %s%qs%s", cpu_opt
, eprefix
,
24255 else if (not_valid_p
)
24256 error ("%s%qs%s is not allowed", eprefix
, q
, esuffix
);
24258 error ("%s%qs%s is invalid", eprefix
, q
, esuffix
);
24263 else if (TREE_CODE (args
) == TREE_LIST
)
24267 tree value
= TREE_VALUE (args
);
24270 bool ret2
= rs6000_inner_target_options (value
, attr_p
);
24274 args
= TREE_CHAIN (args
);
24276 while (args
!= NULL_TREE
);
24281 error ("attribute %<target%> argument not a string");
24288 /* Print out the target options as a list for -mdebug=target. */
24291 rs6000_debug_target_options (tree args
, const char *prefix
)
24293 if (args
== NULL_TREE
)
24294 fprintf (stderr
, "%s<NULL>", prefix
);
24296 else if (TREE_CODE (args
) == STRING_CST
)
24298 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24301 while ((q
= strtok (p
, ",")) != NULL
)
24304 fprintf (stderr
, "%s\"%s\"", prefix
, q
);
24309 else if (TREE_CODE (args
) == TREE_LIST
)
24313 tree value
= TREE_VALUE (args
);
24316 rs6000_debug_target_options (value
, prefix
);
24319 args
= TREE_CHAIN (args
);
24321 while (args
!= NULL_TREE
);
24325 gcc_unreachable ();
24331 /* Hook to validate attribute((target("..."))). */
24334 rs6000_valid_attribute_p (tree fndecl
,
24335 tree
ARG_UNUSED (name
),
24339 struct cl_target_option cur_target
;
24342 tree new_target
, new_optimize
;
24343 tree func_optimize
;
24345 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
24347 if (TARGET_DEBUG_TARGET
)
24349 tree tname
= DECL_NAME (fndecl
);
24350 fprintf (stderr
, "\n==================== rs6000_valid_attribute_p:\n");
24352 fprintf (stderr
, "function: %.*s\n",
24353 (int) IDENTIFIER_LENGTH (tname
),
24354 IDENTIFIER_POINTER (tname
));
24356 fprintf (stderr
, "function: unknown\n");
24358 fprintf (stderr
, "args:");
24359 rs6000_debug_target_options (args
, " ");
24360 fprintf (stderr
, "\n");
24363 fprintf (stderr
, "flags: 0x%x\n", flags
);
24365 fprintf (stderr
, "--------------------\n");
24368 /* attribute((target("default"))) does nothing, beyond
24369 affecting multi-versioning. */
24370 if (TREE_VALUE (args
)
24371 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
24372 && TREE_CHAIN (args
) == NULL_TREE
24373 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
24376 old_optimize
= build_optimization_node (&global_options
,
24377 &global_options_set
);
24378 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
24380 /* If the function changed the optimization levels as well as setting target
24381 options, start with the optimizations specified. */
24382 if (func_optimize
&& func_optimize
!= old_optimize
)
24383 cl_optimization_restore (&global_options
, &global_options_set
,
24384 TREE_OPTIMIZATION (func_optimize
));
24386 /* The target attributes may also change some optimization flags, so update
24387 the optimization options if necessary. */
24388 cl_target_option_save (&cur_target
, &global_options
, &global_options_set
);
24389 rs6000_cpu_index
= rs6000_tune_index
= -1;
24390 ret
= rs6000_inner_target_options (args
, true);
24392 /* Set up any additional state. */
24395 ret
= rs6000_option_override_internal (false);
24396 new_target
= build_target_option_node (&global_options
,
24397 &global_options_set
);
24402 new_optimize
= build_optimization_node (&global_options
,
24403 &global_options_set
);
24410 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
24412 if (old_optimize
!= new_optimize
)
24413 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
24416 cl_target_option_restore (&global_options
, &global_options_set
, &cur_target
);
24418 if (old_optimize
!= new_optimize
)
24419 cl_optimization_restore (&global_options
, &global_options_set
,
24420 TREE_OPTIMIZATION (old_optimize
));
24426 /* Hook to validate the current #pragma GCC target and set the state, and
24427 update the macros based on what was changed. If ARGS is NULL, then
24428 POP_TARGET is used to reset the options. */
24431 rs6000_pragma_target_parse (tree args
, tree pop_target
)
24433 tree prev_tree
= build_target_option_node (&global_options
,
24434 &global_options_set
);
24436 struct cl_target_option
*prev_opt
, *cur_opt
;
24437 HOST_WIDE_INT prev_flags
, cur_flags
, diff_flags
;
24438 HOST_WIDE_INT prev_bumask
, cur_bumask
, diff_bumask
;
24440 if (TARGET_DEBUG_TARGET
)
24442 fprintf (stderr
, "\n==================== rs6000_pragma_target_parse\n");
24443 fprintf (stderr
, "args:");
24444 rs6000_debug_target_options (args
, " ");
24445 fprintf (stderr
, "\n");
24449 fprintf (stderr
, "pop_target:\n");
24450 debug_tree (pop_target
);
24453 fprintf (stderr
, "pop_target: <NULL>\n");
24455 fprintf (stderr
, "--------------------\n");
24460 cur_tree
= ((pop_target
)
24462 : target_option_default_node
);
24463 cl_target_option_restore (&global_options
, &global_options_set
,
24464 TREE_TARGET_OPTION (cur_tree
));
24468 rs6000_cpu_index
= rs6000_tune_index
= -1;
24469 if (!rs6000_inner_target_options (args
, false)
24470 || !rs6000_option_override_internal (false)
24471 || (cur_tree
= build_target_option_node (&global_options
,
24472 &global_options_set
))
24475 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
24476 fprintf (stderr
, "invalid pragma\n");
24482 target_option_current_node
= cur_tree
;
24483 rs6000_activate_target_options (target_option_current_node
);
24485 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24486 change the macros that are defined. */
24487 if (rs6000_target_modify_macros_ptr
)
24489 prev_opt
= TREE_TARGET_OPTION (prev_tree
);
24490 prev_bumask
= prev_opt
->x_rs6000_builtin_mask
;
24491 prev_flags
= prev_opt
->x_rs6000_isa_flags
;
24493 cur_opt
= TREE_TARGET_OPTION (cur_tree
);
24494 cur_flags
= cur_opt
->x_rs6000_isa_flags
;
24495 cur_bumask
= cur_opt
->x_rs6000_builtin_mask
;
24497 diff_bumask
= (prev_bumask
^ cur_bumask
);
24498 diff_flags
= (prev_flags
^ cur_flags
);
24500 if ((diff_flags
!= 0) || (diff_bumask
!= 0))
24502 /* Delete old macros. */
24503 rs6000_target_modify_macros_ptr (false,
24504 prev_flags
& diff_flags
,
24505 prev_bumask
& diff_bumask
);
24507 /* Define new macros. */
24508 rs6000_target_modify_macros_ptr (true,
24509 cur_flags
& diff_flags
,
24510 cur_bumask
& diff_bumask
);
24518 /* Remember the last target of rs6000_set_current_function. */
24519 static GTY(()) tree rs6000_previous_fndecl
;
24521 /* Restore target's globals from NEW_TREE and invalidate the
24522 rs6000_previous_fndecl cache. */
24525 rs6000_activate_target_options (tree new_tree
)
24527 cl_target_option_restore (&global_options
, &global_options_set
,
24528 TREE_TARGET_OPTION (new_tree
));
24529 if (TREE_TARGET_GLOBALS (new_tree
))
24530 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
24531 else if (new_tree
== target_option_default_node
)
24532 restore_target_globals (&default_target_globals
);
24534 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
24535 rs6000_previous_fndecl
= NULL_TREE
;
24538 /* Establish appropriate back-end context for processing the function
24539 FNDECL. The argument might be NULL to indicate processing at top
24540 level, outside of any function scope. */
24542 rs6000_set_current_function (tree fndecl
)
24544 if (TARGET_DEBUG_TARGET
)
24546 fprintf (stderr
, "\n==================== rs6000_set_current_function");
24549 fprintf (stderr
, ", fndecl %s (%p)",
24550 (DECL_NAME (fndecl
)
24551 ? IDENTIFIER_POINTER (DECL_NAME (fndecl
))
24552 : "<unknown>"), (void *)fndecl
);
24554 if (rs6000_previous_fndecl
)
24555 fprintf (stderr
, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl
);
24557 fprintf (stderr
, "\n");
24560 /* Only change the context if the function changes. This hook is called
24561 several times in the course of compiling a function, and we don't want to
24562 slow things down too much or call target_reinit when it isn't safe. */
24563 if (fndecl
== rs6000_previous_fndecl
)
24567 if (rs6000_previous_fndecl
== NULL_TREE
)
24568 old_tree
= target_option_current_node
;
24569 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
))
24570 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
);
24572 old_tree
= target_option_default_node
;
24575 if (fndecl
== NULL_TREE
)
24577 if (old_tree
!= target_option_current_node
)
24578 new_tree
= target_option_current_node
;
24580 new_tree
= NULL_TREE
;
24584 new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
24585 if (new_tree
== NULL_TREE
)
24586 new_tree
= target_option_default_node
;
24589 if (TARGET_DEBUG_TARGET
)
24593 fprintf (stderr
, "\nnew fndecl target specific options:\n");
24594 debug_tree (new_tree
);
24599 fprintf (stderr
, "\nold fndecl target specific options:\n");
24600 debug_tree (old_tree
);
24603 if (old_tree
!= NULL_TREE
|| new_tree
!= NULL_TREE
)
24604 fprintf (stderr
, "--------------------\n");
24607 if (new_tree
&& old_tree
!= new_tree
)
24608 rs6000_activate_target_options (new_tree
);
24611 rs6000_previous_fndecl
= fndecl
;
24615 /* Save the current options */
24618 rs6000_function_specific_save (struct cl_target_option
*ptr
,
24619 struct gcc_options
*opts
,
24620 struct gcc_options */
* opts_set */
)
24622 ptr
->x_rs6000_isa_flags
= opts
->x_rs6000_isa_flags
;
24623 ptr
->x_rs6000_isa_flags_explicit
= opts
->x_rs6000_isa_flags_explicit
;
24626 /* Restore the current options */
24629 rs6000_function_specific_restore (struct gcc_options
*opts
,
24630 struct gcc_options */
* opts_set */
,
24631 struct cl_target_option
*ptr
)
24634 opts
->x_rs6000_isa_flags
= ptr
->x_rs6000_isa_flags
;
24635 opts
->x_rs6000_isa_flags_explicit
= ptr
->x_rs6000_isa_flags_explicit
;
24636 (void) rs6000_option_override_internal (false);
24639 /* Print the current options */
24642 rs6000_function_specific_print (FILE *file
, int indent
,
24643 struct cl_target_option
*ptr
)
24645 rs6000_print_isa_options (file
, indent
, "Isa options set",
24646 ptr
->x_rs6000_isa_flags
);
24648 rs6000_print_isa_options (file
, indent
, "Isa options explicit",
24649 ptr
->x_rs6000_isa_flags_explicit
);
24652 /* Helper function to print the current isa or misc options on a line. */
24655 rs6000_print_options_internal (FILE *file
,
24657 const char *string
,
24658 HOST_WIDE_INT flags
,
24659 const char *prefix
,
24660 const struct rs6000_opt_mask
*opts
,
24661 size_t num_elements
)
24664 size_t start_column
= 0;
24666 size_t max_column
= 120;
24667 size_t prefix_len
= strlen (prefix
);
24668 size_t comma_len
= 0;
24669 const char *comma
= "";
24672 start_column
+= fprintf (file
, "%*s", indent
, "");
24676 fprintf (stderr
, DEBUG_FMT_S
, string
, "<none>");
24680 start_column
+= fprintf (stderr
, DEBUG_FMT_WX
, string
, flags
);
24682 /* Print the various mask options. */
24683 cur_column
= start_column
;
24684 for (i
= 0; i
< num_elements
; i
++)
24686 bool invert
= opts
[i
].invert
;
24687 const char *name
= opts
[i
].name
;
24688 const char *no_str
= "";
24689 HOST_WIDE_INT mask
= opts
[i
].mask
;
24690 size_t len
= comma_len
+ prefix_len
+ strlen (name
);
24694 if ((flags
& mask
) == 0)
24697 len
+= strlen ("no-");
24705 if ((flags
& mask
) != 0)
24708 len
+= strlen ("no-");
24715 if (cur_column
> max_column
)
24717 fprintf (stderr
, ", \\\n%*s", (int)start_column
, "");
24718 cur_column
= start_column
+ len
;
24722 fprintf (file
, "%s%s%s%s", comma
, prefix
, no_str
, name
);
24724 comma_len
= strlen (", ");
24727 fputs ("\n", file
);
24730 /* Helper function to print the current isa options on a line. */
24733 rs6000_print_isa_options (FILE *file
, int indent
, const char *string
,
24734 HOST_WIDE_INT flags
)
24736 rs6000_print_options_internal (file
, indent
, string
, flags
, "-m",
24737 &rs6000_opt_masks
[0],
24738 ARRAY_SIZE (rs6000_opt_masks
));
24742 rs6000_print_builtin_options (FILE *file
, int indent
, const char *string
,
24743 HOST_WIDE_INT flags
)
24745 rs6000_print_options_internal (file
, indent
, string
, flags
, "",
24746 &rs6000_builtin_mask_names
[0],
24747 ARRAY_SIZE (rs6000_builtin_mask_names
));
24750 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24751 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24752 -mupper-regs-df, etc.).
24754 If the user used -mno-power8-vector, we need to turn off all of the implicit
24755 ISA 2.07 and 3.0 options that relate to the vector unit.
24757 If the user used -mno-power9-vector, we need to turn off all of the implicit
24758 ISA 3.0 options that relate to the vector unit.
24760 This function does not handle explicit options such as the user specifying
24761 -mdirect-move. These are handled in rs6000_option_override_internal, and
24762 the appropriate error is given if needed.
24764 We return a mask of all of the implicit options that should not be enabled
24767 static HOST_WIDE_INT
24768 rs6000_disable_incompatible_switches (void)
24770 HOST_WIDE_INT ignore_masks
= rs6000_isa_flags_explicit
;
24773 static const struct {
24774 const HOST_WIDE_INT no_flag
; /* flag explicitly turned off. */
24775 const HOST_WIDE_INT dep_flags
; /* flags that depend on this option. */
24776 const char *const name
; /* name of the switch. */
24778 { OPTION_MASK_P9_VECTOR
, OTHER_P9_VECTOR_MASKS
, "power9-vector" },
24779 { OPTION_MASK_P8_VECTOR
, OTHER_P8_VECTOR_MASKS
, "power8-vector" },
24780 { OPTION_MASK_VSX
, OTHER_VSX_VECTOR_MASKS
, "vsx" },
24781 { OPTION_MASK_ALTIVEC
, OTHER_ALTIVEC_MASKS
, "altivec" },
24784 for (i
= 0; i
< ARRAY_SIZE (flags
); i
++)
24786 HOST_WIDE_INT no_flag
= flags
[i
].no_flag
;
24788 if ((rs6000_isa_flags
& no_flag
) == 0
24789 && (rs6000_isa_flags_explicit
& no_flag
) != 0)
24791 HOST_WIDE_INT dep_flags
= flags
[i
].dep_flags
;
24792 HOST_WIDE_INT set_flags
= (rs6000_isa_flags_explicit
24798 for (j
= 0; j
< ARRAY_SIZE (rs6000_opt_masks
); j
++)
24799 if ((set_flags
& rs6000_opt_masks
[j
].mask
) != 0)
24801 set_flags
&= ~rs6000_opt_masks
[j
].mask
;
24802 error ("%<-mno-%s%> turns off %<-m%s%>",
24804 rs6000_opt_masks
[j
].name
);
24807 gcc_assert (!set_flags
);
24810 rs6000_isa_flags
&= ~dep_flags
;
24811 ignore_masks
|= no_flag
| dep_flags
;
24815 return ignore_masks
;
24819 /* Helper function for printing the function name when debugging. */
24821 static const char *
24822 get_decl_name (tree fn
)
24829 name
= DECL_NAME (fn
);
24831 return "<no-name>";
24833 return IDENTIFIER_POINTER (name
);
24836 /* Return the clone id of the target we are compiling code for in a target
24837 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24838 the priority list for the target clones (ordered from lowest to
24842 rs6000_clone_priority (tree fndecl
)
24844 tree fn_opts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
24845 HOST_WIDE_INT isa_masks
;
24846 int ret
= CLONE_DEFAULT
;
24847 tree attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (fndecl
));
24848 const char *attrs_str
= NULL
;
24850 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
24851 attrs_str
= TREE_STRING_POINTER (attrs
);
24853 /* Return priority zero for default function. Return the ISA needed for the
24854 function if it is not the default. */
24855 if (strcmp (attrs_str
, "default") != 0)
24857 if (fn_opts
== NULL_TREE
)
24858 fn_opts
= target_option_default_node
;
24860 if (!fn_opts
|| !TREE_TARGET_OPTION (fn_opts
))
24861 isa_masks
= rs6000_isa_flags
;
24863 isa_masks
= TREE_TARGET_OPTION (fn_opts
)->x_rs6000_isa_flags
;
24865 for (ret
= CLONE_MAX
- 1; ret
!= 0; ret
--)
24866 if ((rs6000_clone_map
[ret
].isa_mask
& isa_masks
) != 0)
24870 if (TARGET_DEBUG_TARGET
)
24871 fprintf (stderr
, "rs6000_get_function_version_priority (%s) => %d\n",
24872 get_decl_name (fndecl
), ret
);
24877 /* This compares the priority of target features in function DECL1 and DECL2.
24878 It returns positive value if DECL1 is higher priority, negative value if
24879 DECL2 is higher priority and 0 if they are the same. Note, priorities are
24880 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
24883 rs6000_compare_version_priority (tree decl1
, tree decl2
)
24885 int priority1
= rs6000_clone_priority (decl1
);
24886 int priority2
= rs6000_clone_priority (decl2
);
24887 int ret
= priority1
- priority2
;
24889 if (TARGET_DEBUG_TARGET
)
24890 fprintf (stderr
, "rs6000_compare_version_priority (%s, %s) => %d\n",
24891 get_decl_name (decl1
), get_decl_name (decl2
), ret
);
24896 /* Make a dispatcher declaration for the multi-versioned function DECL.
24897 Calls to DECL function will be replaced with calls to the dispatcher
24898 by the front-end. Returns the decl of the dispatcher function. */
24901 rs6000_get_function_versions_dispatcher (void *decl
)
24903 tree fn
= (tree
) decl
;
24904 struct cgraph_node
*node
= NULL
;
24905 struct cgraph_node
*default_node
= NULL
;
24906 struct cgraph_function_version_info
*node_v
= NULL
;
24907 struct cgraph_function_version_info
*first_v
= NULL
;
24909 tree dispatch_decl
= NULL
;
24911 struct cgraph_function_version_info
*default_version_info
= NULL
;
24912 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
24914 if (TARGET_DEBUG_TARGET
)
24915 fprintf (stderr
, "rs6000_get_function_versions_dispatcher (%s)\n",
24916 get_decl_name (fn
));
24918 node
= cgraph_node::get (fn
);
24919 gcc_assert (node
!= NULL
);
24921 node_v
= node
->function_version ();
24922 gcc_assert (node_v
!= NULL
);
24924 if (node_v
->dispatcher_resolver
!= NULL
)
24925 return node_v
->dispatcher_resolver
;
24927 /* Find the default version and make it the first node. */
24929 /* Go to the beginning of the chain. */
24930 while (first_v
->prev
!= NULL
)
24931 first_v
= first_v
->prev
;
24933 default_version_info
= first_v
;
24934 while (default_version_info
!= NULL
)
24936 const tree decl2
= default_version_info
->this_node
->decl
;
24937 if (is_function_default_version (decl2
))
24939 default_version_info
= default_version_info
->next
;
24942 /* If there is no default node, just return NULL. */
24943 if (default_version_info
== NULL
)
24946 /* Make default info the first node. */
24947 if (first_v
!= default_version_info
)
24949 default_version_info
->prev
->next
= default_version_info
->next
;
24950 if (default_version_info
->next
)
24951 default_version_info
->next
->prev
= default_version_info
->prev
;
24952 first_v
->prev
= default_version_info
;
24953 default_version_info
->next
= first_v
;
24954 default_version_info
->prev
= NULL
;
24957 default_node
= default_version_info
->this_node
;
24959 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
24960 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
24961 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
24962 "exports hardware capability bits");
24965 if (targetm
.has_ifunc_p ())
24967 struct cgraph_function_version_info
*it_v
= NULL
;
24968 struct cgraph_node
*dispatcher_node
= NULL
;
24969 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
24971 /* Right now, the dispatching is done via ifunc. */
24972 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
24974 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
24975 gcc_assert (dispatcher_node
!= NULL
);
24976 dispatcher_node
->dispatcher_function
= 1;
24977 dispatcher_version_info
24978 = dispatcher_node
->insert_new_function_version ();
24979 dispatcher_version_info
->next
= default_version_info
;
24980 dispatcher_node
->definition
= 1;
24982 /* Set the dispatcher for all the versions. */
24983 it_v
= default_version_info
;
24984 while (it_v
!= NULL
)
24986 it_v
->dispatcher_resolver
= dispatch_decl
;
24992 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
24993 "multiversioning needs %<ifunc%> which is not supported "
24998 return dispatch_decl
;
25001 /* Make the resolver function decl to dispatch the versions of a multi-
25002 versioned function, DEFAULT_DECL. Create an empty basic block in the
25003 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25007 make_resolver_func (const tree default_decl
,
25008 const tree dispatch_decl
,
25009 basic_block
*empty_bb
)
25011 /* Make the resolver function static. The resolver function returns
25013 tree decl_name
= clone_function_name (default_decl
, "resolver");
25014 const char *resolver_name
= IDENTIFIER_POINTER (decl_name
);
25015 tree type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
25016 tree decl
= build_fn_decl (resolver_name
, type
);
25017 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
25019 DECL_NAME (decl
) = decl_name
;
25020 TREE_USED (decl
) = 1;
25021 DECL_ARTIFICIAL (decl
) = 1;
25022 DECL_IGNORED_P (decl
) = 0;
25023 TREE_PUBLIC (decl
) = 0;
25024 DECL_UNINLINABLE (decl
) = 1;
25026 /* Resolver is not external, body is generated. */
25027 DECL_EXTERNAL (decl
) = 0;
25028 DECL_EXTERNAL (dispatch_decl
) = 0;
25030 DECL_CONTEXT (decl
) = NULL_TREE
;
25031 DECL_INITIAL (decl
) = make_node (BLOCK
);
25032 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
25034 if (DECL_COMDAT_GROUP (default_decl
)
25035 || TREE_PUBLIC (default_decl
))
25037 /* In this case, each translation unit with a call to this
25038 versioned function will put out a resolver. Ensure it
25039 is comdat to keep just one copy. */
25040 DECL_COMDAT (decl
) = 1;
25041 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
25044 TREE_PUBLIC (dispatch_decl
) = 0;
25046 /* Build result decl and add to function_decl. */
25047 tree t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
25048 DECL_CONTEXT (t
) = decl
;
25049 DECL_ARTIFICIAL (t
) = 1;
25050 DECL_IGNORED_P (t
) = 1;
25051 DECL_RESULT (decl
) = t
;
25053 gimplify_function_tree (decl
);
25054 push_cfun (DECL_STRUCT_FUNCTION (decl
));
25055 *empty_bb
= init_lowered_empty_function (decl
, false,
25056 profile_count::uninitialized ());
25058 cgraph_node::add_new_function (decl
, true);
25059 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
25063 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25064 DECL_ATTRIBUTES (dispatch_decl
)
25065 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
25067 cgraph_node::create_same_body_alias (dispatch_decl
, decl
);
25072 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25073 return a pointer to VERSION_DECL if we are running on a machine that
25074 supports the index CLONE_ISA hardware architecture bits. This function will
25075 be called during version dispatch to decide which function version to
25076 execute. It returns the basic block at the end, to which more conditions
25080 add_condition_to_bb (tree function_decl
, tree version_decl
,
25081 int clone_isa
, basic_block new_bb
)
25083 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
25085 gcc_assert (new_bb
!= NULL
);
25086 gimple_seq gseq
= bb_seq (new_bb
);
25089 tree convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
25090 build_fold_addr_expr (version_decl
));
25091 tree result_var
= create_tmp_var (ptr_type_node
);
25092 gimple
*convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
25093 gimple
*return_stmt
= gimple_build_return (result_var
);
25095 if (clone_isa
== CLONE_DEFAULT
)
25097 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25098 gimple_seq_add_stmt (&gseq
, return_stmt
);
25099 set_bb_seq (new_bb
, gseq
);
25100 gimple_set_bb (convert_stmt
, new_bb
);
25101 gimple_set_bb (return_stmt
, new_bb
);
25106 tree bool_zero
= build_int_cst (bool_int_type_node
, 0);
25107 tree cond_var
= create_tmp_var (bool_int_type_node
);
25108 tree predicate_decl
= rs6000_builtin_decls
[(int) RS6000_BIF_CPU_SUPPORTS
];
25109 const char *arg_str
= rs6000_clone_map
[clone_isa
].name
;
25110 tree predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
25111 gimple
*call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
25112 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
25114 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
25115 gimple_set_bb (call_cond_stmt
, new_bb
);
25116 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
25118 gimple
*if_else_stmt
= gimple_build_cond (NE_EXPR
, cond_var
, bool_zero
,
25119 NULL_TREE
, NULL_TREE
);
25120 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
25121 gimple_set_bb (if_else_stmt
, new_bb
);
25122 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
25124 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25125 gimple_seq_add_stmt (&gseq
, return_stmt
);
25126 set_bb_seq (new_bb
, gseq
);
25128 basic_block bb1
= new_bb
;
25129 edge e12
= split_block (bb1
, if_else_stmt
);
25130 basic_block bb2
= e12
->dest
;
25131 e12
->flags
&= ~EDGE_FALLTHRU
;
25132 e12
->flags
|= EDGE_TRUE_VALUE
;
25134 edge e23
= split_block (bb2
, return_stmt
);
25135 gimple_set_bb (convert_stmt
, bb2
);
25136 gimple_set_bb (return_stmt
, bb2
);
25138 basic_block bb3
= e23
->dest
;
25139 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
25142 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
25148 /* This function generates the dispatch function for multi-versioned functions.
25149 DISPATCH_DECL is the function which will contain the dispatch logic.
25150 FNDECLS are the function choices for dispatch, and is a tree chain.
25151 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25152 code is generated. */
25155 dispatch_function_versions (tree dispatch_decl
,
25157 basic_block
*empty_bb
)
25161 vec
<tree
> *fndecls
;
25162 tree clones
[CLONE_MAX
];
25164 if (TARGET_DEBUG_TARGET
)
25165 fputs ("dispatch_function_versions, top\n", stderr
);
25167 gcc_assert (dispatch_decl
!= NULL
25168 && fndecls_p
!= NULL
25169 && empty_bb
!= NULL
);
25171 /* fndecls_p is actually a vector. */
25172 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
25174 /* At least one more version other than the default. */
25175 gcc_assert (fndecls
->length () >= 2);
25177 /* The first version in the vector is the default decl. */
25178 memset ((void *) clones
, '\0', sizeof (clones
));
25179 clones
[CLONE_DEFAULT
] = (*fndecls
)[0];
25181 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25182 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25183 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25184 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25185 to insert the code here to do the call. */
25187 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
25189 int priority
= rs6000_clone_priority (ele
);
25190 if (!clones
[priority
])
25191 clones
[priority
] = ele
;
25194 for (ix
= CLONE_MAX
- 1; ix
>= 0; ix
--)
25197 if (TARGET_DEBUG_TARGET
)
25198 fprintf (stderr
, "dispatch_function_versions, clone %d, %s\n",
25199 ix
, get_decl_name (clones
[ix
]));
25201 *empty_bb
= add_condition_to_bb (dispatch_decl
, clones
[ix
], ix
,
25208 /* Generate the dispatching code body to dispatch multi-versioned function
25209 DECL. The target hook is called to process the "target" attributes and
25210 provide the code to dispatch the right function at run-time. NODE points
25211 to the dispatcher decl whose body will be created. */
25214 rs6000_generate_version_dispatcher_body (void *node_p
)
25217 basic_block empty_bb
;
25218 struct cgraph_node
*node
= (cgraph_node
*) node_p
;
25219 struct cgraph_function_version_info
*ninfo
= node
->function_version ();
25221 if (ninfo
->dispatcher_resolver
)
25222 return ninfo
->dispatcher_resolver
;
25224 /* node is going to be an alias, so remove the finalized bit. */
25225 node
->definition
= false;
25227 /* The first version in the chain corresponds to the default version. */
25228 ninfo
->dispatcher_resolver
= resolver
25229 = make_resolver_func (ninfo
->next
->this_node
->decl
, node
->decl
, &empty_bb
);
25231 if (TARGET_DEBUG_TARGET
)
25232 fprintf (stderr
, "rs6000_get_function_versions_dispatcher, %s\n",
25233 get_decl_name (resolver
));
25235 push_cfun (DECL_STRUCT_FUNCTION (resolver
));
25236 auto_vec
<tree
, 2> fn_ver_vec
;
25238 for (struct cgraph_function_version_info
*vinfo
= ninfo
->next
;
25240 vinfo
= vinfo
->next
)
25242 struct cgraph_node
*version
= vinfo
->this_node
;
25243 /* Check for virtual functions here again, as by this time it should
25244 have been determined if this function needs a vtable index or
25245 not. This happens for methods in derived classes that override
25246 virtual methods in base classes but are not explicitly marked as
25248 if (DECL_VINDEX (version
->decl
))
25249 sorry ("Virtual function multiversioning not supported");
25251 fn_ver_vec
.safe_push (version
->decl
);
25254 dispatch_function_versions (resolver
, &fn_ver_vec
, &empty_bb
);
25255 cgraph_edge::rebuild_edges ();
25260 /* Hook to decide if we need to scan function gimple statements to
25261 collect target specific information for inlining, and update the
25262 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25263 to predict which ISA feature is used at this time. Return true
25264 if we need to scan, otherwise return false. */
25267 rs6000_need_ipa_fn_target_info (const_tree decl
,
25268 unsigned int &info ATTRIBUTE_UNUSED
)
25270 tree target
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
25272 target
= target_option_default_node
;
25273 struct cl_target_option
*opts
= TREE_TARGET_OPTION (target
);
25275 /* See PR102059, we only handle HTM for now, so will only do
25276 the consequent scannings when HTM feature enabled. */
25277 if (opts
->x_rs6000_isa_flags
& OPTION_MASK_HTM
)
25283 /* Hook to update target specific information INFO for inlining by
25284 checking the given STMT. Return false if we don't need to scan
25285 any more, otherwise return true. */
25288 rs6000_update_ipa_fn_target_info (unsigned int &info
, const gimple
*stmt
)
25290 /* Assume inline asm can use any instruction features. */
25291 if (gimple_code (stmt
) == GIMPLE_ASM
)
25293 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25294 the only bit we care about. */
25295 info
|= RS6000_FN_TARGET_INFO_HTM
;
25298 else if (gimple_code (stmt
) == GIMPLE_CALL
)
25300 tree fndecl
= gimple_call_fndecl (stmt
);
25301 if (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_MD
))
25303 enum rs6000_gen_builtins fcode
25304 = (enum rs6000_gen_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
25305 /* HTM bifs definitely exploit HTM insns. */
25306 if (bif_is_htm (rs6000_builtin_info
[fcode
]))
25308 info
|= RS6000_FN_TARGET_INFO_HTM
;
25317 /* Hook to determine if one function can safely inline another. */
25320 rs6000_can_inline_p (tree caller
, tree callee
)
25323 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
25324 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
25326 /* If the callee has no option attributes, then it is ok to inline. */
25332 HOST_WIDE_INT caller_isa
;
25333 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
25334 HOST_WIDE_INT callee_isa
= callee_opts
->x_rs6000_isa_flags
;
25335 HOST_WIDE_INT explicit_isa
= callee_opts
->x_rs6000_isa_flags_explicit
;
25337 /* If the caller has option attributes, then use them.
25338 Otherwise, use the command line options. */
25340 caller_isa
= TREE_TARGET_OPTION (caller_tree
)->x_rs6000_isa_flags
;
25342 caller_isa
= rs6000_isa_flags
;
25344 cgraph_node
*callee_node
= cgraph_node::get (callee
);
25345 if (ipa_fn_summaries
&& ipa_fn_summaries
->get (callee_node
) != NULL
)
25347 unsigned int info
= ipa_fn_summaries
->get (callee_node
)->target_info
;
25348 if ((info
& RS6000_FN_TARGET_INFO_HTM
) == 0)
25350 callee_isa
&= ~OPTION_MASK_HTM
;
25351 explicit_isa
&= ~OPTION_MASK_HTM
;
25355 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25357 callee_isa
&= ~(OPTION_MASK_P8_FUSION
| OPTION_MASK_P10_FUSION
);
25358 explicit_isa
&= ~(OPTION_MASK_P8_FUSION
| OPTION_MASK_P10_FUSION
);
25360 /* The callee's options must be a subset of the caller's options, i.e.
25361 a vsx function may inline an altivec function, but a no-vsx function
25362 must not inline a vsx function. However, for those options that the
25363 callee has explicitly enabled or disabled, then we must enforce that
25364 the callee's and caller's options match exactly; see PR70010. */
25365 if (((caller_isa
& callee_isa
) == callee_isa
)
25366 && (caller_isa
& explicit_isa
) == (callee_isa
& explicit_isa
))
25370 if (TARGET_DEBUG_TARGET
)
25371 fprintf (stderr
, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25372 get_decl_name (caller
), get_decl_name (callee
),
25373 (ret
? "can" : "cannot"));
25378 /* Allocate a stack temp and fixup the address so it meets the particular
25379 memory requirements (either offetable or REG+REG addressing). */
25382 rs6000_allocate_stack_temp (machine_mode mode
,
25383 bool offsettable_p
,
25386 rtx stack
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
25387 rtx addr
= XEXP (stack
, 0);
25388 int strict_p
= reload_completed
;
25390 if (!legitimate_indirect_address_p (addr
, strict_p
))
25393 && !rs6000_legitimate_offset_address_p (mode
, addr
, strict_p
, true))
25394 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25396 else if (reg_reg_p
&& !legitimate_indexed_address_p (addr
, strict_p
))
25397 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25403 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25404 convert to such a form to deal with memory reference instructions
25405 like STFIWX and LDBRX that only take reg+reg addressing. */
25408 rs6000_force_indexed_or_indirect_mem (rtx x
)
25410 machine_mode mode
= GET_MODE (x
);
25412 gcc_assert (MEM_P (x
));
25413 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x
, mode
))
25415 rtx addr
= XEXP (x
, 0);
25416 if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
25418 rtx reg
= XEXP (addr
, 0);
25419 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (x
));
25420 rtx size_rtx
= GEN_INT ((GET_CODE (addr
) == PRE_DEC
) ? -size
: size
);
25421 gcc_assert (REG_P (reg
));
25422 emit_insn (gen_add3_insn (reg
, reg
, size_rtx
));
25425 else if (GET_CODE (addr
) == PRE_MODIFY
)
25427 rtx reg
= XEXP (addr
, 0);
25428 rtx expr
= XEXP (addr
, 1);
25429 gcc_assert (REG_P (reg
));
25430 gcc_assert (GET_CODE (expr
) == PLUS
);
25431 emit_insn (gen_add3_insn (reg
, XEXP (expr
, 0), XEXP (expr
, 1)));
25435 if (GET_CODE (addr
) == PLUS
)
25437 rtx op0
= XEXP (addr
, 0);
25438 rtx op1
= XEXP (addr
, 1);
25439 op0
= force_reg (Pmode
, op0
);
25440 op1
= force_reg (Pmode
, op1
);
25441 x
= replace_equiv_address (x
, gen_rtx_PLUS (Pmode
, op0
, op1
));
25444 x
= replace_equiv_address (x
, force_reg (Pmode
, addr
));
25450 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25452 On the RS/6000, all integer constants are acceptable, most won't be valid
25453 for particular insns, though. Only easy FP constants are acceptable. */
25456 rs6000_legitimate_constant_p (machine_mode mode
, rtx x
)
25458 if (TARGET_ELF
&& tls_referenced_p (x
))
25461 if (CONST_DOUBLE_P (x
))
25462 return easy_fp_constant (x
, mode
);
25464 if (GET_CODE (x
) == CONST_VECTOR
)
25465 return easy_vector_constant (x
, mode
);
25471 /* Implement TARGET_PRECOMPUTE_TLS_P.
25473 On the AIX, TLS symbols are in the TOC, which is maintained in the
25474 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25475 must be considered legitimate constants. */
25478 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
25480 return tls_referenced_p (x
);
25485 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25488 chain_already_loaded (rtx_insn
*last
)
25490 for (; last
!= NULL
; last
= PREV_INSN (last
))
25492 if (NONJUMP_INSN_P (last
))
25494 rtx patt
= PATTERN (last
);
25496 if (GET_CODE (patt
) == SET
)
25498 rtx lhs
= XEXP (patt
, 0);
25500 if (REG_P (lhs
) && REGNO (lhs
) == STATIC_CHAIN_REGNUM
)
25508 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25511 rs6000_call_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25513 rtx func
= func_desc
;
25514 rtx toc_reg
= gen_rtx_REG (Pmode
, TOC_REGNUM
);
25515 rtx toc_load
= NULL_RTX
;
25516 rtx toc_restore
= NULL_RTX
;
25518 rtx abi_reg
= NULL_RTX
;
25522 bool is_pltseq_longcall
;
25525 tlsarg
= global_tlsarg
;
25527 /* Handle longcall attributes. */
25528 is_pltseq_longcall
= false;
25529 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25530 && GET_CODE (func_desc
) == SYMBOL_REF
)
25532 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25534 is_pltseq_longcall
= true;
25537 /* Handle indirect calls. */
25538 if (!SYMBOL_REF_P (func
)
25539 || (DEFAULT_ABI
== ABI_AIX
&& !SYMBOL_REF_FUNCTION_P (func
)))
25541 if (!rs6000_pcrel_p ())
25543 /* Save the TOC into its reserved slot before the call,
25544 and prepare to restore it after the call. */
25545 rtx stack_toc_offset
= GEN_INT (RS6000_TOC_SAVE_SLOT
);
25546 rtx stack_toc_unspec
= gen_rtx_UNSPEC (Pmode
,
25547 gen_rtvec (1, stack_toc_offset
),
25549 toc_restore
= gen_rtx_SET (toc_reg
, stack_toc_unspec
);
25551 /* Can we optimize saving the TOC in the prologue or
25552 do we need to do it at every call? */
25553 if (TARGET_SAVE_TOC_INDIRECT
&& !cfun
->calls_alloca
)
25554 cfun
->machine
->save_toc_in_prologue
= true;
25557 rtx stack_ptr
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
25558 rtx stack_toc_mem
= gen_frame_mem (Pmode
,
25559 gen_rtx_PLUS (Pmode
, stack_ptr
,
25560 stack_toc_offset
));
25561 MEM_VOLATILE_P (stack_toc_mem
) = 1;
25562 if (is_pltseq_longcall
)
25564 rtvec v
= gen_rtvec (3, toc_reg
, func_desc
, tlsarg
);
25565 rtx mark_toc_reg
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25566 emit_insn (gen_rtx_SET (stack_toc_mem
, mark_toc_reg
));
25569 emit_move_insn (stack_toc_mem
, toc_reg
);
25573 if (DEFAULT_ABI
== ABI_ELFv2
)
25575 /* A function pointer in the ELFv2 ABI is just a plain address, but
25576 the ABI requires it to be loaded into r12 before the call. */
25577 func_addr
= gen_rtx_REG (Pmode
, 12);
25578 emit_move_insn (func_addr
, func
);
25579 abi_reg
= func_addr
;
25580 /* Indirect calls via CTR are strongly preferred over indirect
25581 calls via LR, so move the address there. Needed to mark
25582 this insn for linker plt sequence editing too. */
25583 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25584 if (is_pltseq_longcall
)
25586 rtvec v
= gen_rtvec (3, abi_reg
, func_desc
, tlsarg
);
25587 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25588 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25589 v
= gen_rtvec (2, func_addr
, func_desc
);
25590 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25593 emit_move_insn (func_addr
, abi_reg
);
25597 /* A function pointer under AIX is a pointer to a data area whose
25598 first word contains the actual address of the function, whose
25599 second word contains a pointer to its TOC, and whose third word
25600 contains a value to place in the static chain register (r11).
25601 Note that if we load the static chain, our "trampoline" need
25602 not have any executable code. */
25604 /* Load up address of the actual function. */
25605 func
= force_reg (Pmode
, func
);
25606 func_addr
= gen_reg_rtx (Pmode
);
25607 emit_move_insn (func_addr
, gen_rtx_MEM (Pmode
, func
));
25609 /* Indirect calls via CTR are strongly preferred over indirect
25610 calls via LR, so move the address there. */
25611 rtx ctr_reg
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25612 emit_move_insn (ctr_reg
, func_addr
);
25613 func_addr
= ctr_reg
;
25615 /* Prepare to load the TOC of the called function. Note that the
25616 TOC load must happen immediately before the actual call so
25617 that unwinding the TOC registers works correctly. See the
25618 comment in frob_update_context. */
25619 rtx func_toc_offset
= GEN_INT (GET_MODE_SIZE (Pmode
));
25620 rtx func_toc_mem
= gen_rtx_MEM (Pmode
,
25621 gen_rtx_PLUS (Pmode
, func
,
25623 toc_load
= gen_rtx_USE (VOIDmode
, func_toc_mem
);
25625 /* If we have a static chain, load it up. But, if the call was
25626 originally direct, the 3rd word has not been written since no
25627 trampoline has been built, so we ought not to load it, lest we
25628 override a static chain value. */
25629 if (!(GET_CODE (func_desc
) == SYMBOL_REF
25630 && SYMBOL_REF_FUNCTION_P (func_desc
))
25631 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25632 && !chain_already_loaded (get_current_sequence ()->next
->last
))
25634 rtx sc_reg
= gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
25635 rtx func_sc_offset
= GEN_INT (2 * GET_MODE_SIZE (Pmode
));
25636 rtx func_sc_mem
= gen_rtx_MEM (Pmode
,
25637 gen_rtx_PLUS (Pmode
, func
,
25639 emit_move_insn (sc_reg
, func_sc_mem
);
25646 /* No TOC register needed for calls from PC-relative callers. */
25647 if (!rs6000_pcrel_p ())
25648 /* Direct calls use the TOC: for local calls, the callee will
25649 assume the TOC register is set; for non-local calls, the
25650 PLT stub needs the TOC register. */
25655 /* Create the call. */
25656 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25657 if (value
!= NULL_RTX
)
25658 call
[0] = gen_rtx_SET (value
, call
[0]);
25659 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
25663 call
[n_call
++] = toc_load
;
25665 call
[n_call
++] = toc_restore
;
25667 call
[n_call
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
25669 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n_call
, call
));
25670 insn
= emit_call_insn (insn
);
25672 /* Mention all registers defined by the ABI to hold information
25673 as uses in CALL_INSN_FUNCTION_USAGE. */
25675 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
25678 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25681 rs6000_sibcall_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25685 rtx r12
= NULL_RTX
;
25686 rtx func_addr
= func_desc
;
25689 tlsarg
= global_tlsarg
;
25691 /* Handle longcall attributes. */
25692 if (INTVAL (cookie
) & CALL_LONG
&& SYMBOL_REF_P (func_desc
))
25694 /* PCREL can do a sibling call to a longcall function
25695 because we don't need to restore the TOC register. */
25696 gcc_assert (rs6000_pcrel_p ());
25697 func_desc
= rs6000_longcall_ref (func_desc
, tlsarg
);
25700 gcc_assert (INTVAL (cookie
) == 0);
25702 /* For ELFv2, r12 and CTR need to hold the function address
25703 for an indirect call. */
25704 if (GET_CODE (func_desc
) != SYMBOL_REF
&& DEFAULT_ABI
== ABI_ELFv2
)
25706 r12
= gen_rtx_REG (Pmode
, 12);
25707 emit_move_insn (r12
, func_desc
);
25708 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25709 emit_move_insn (func_addr
, r12
);
25712 /* Create the call. */
25713 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25714 if (value
!= NULL_RTX
)
25715 call
[0] = gen_rtx_SET (value
, call
[0]);
25717 call
[1] = simple_return_rtx
;
25719 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (2, call
));
25720 insn
= emit_call_insn (insn
);
25722 /* Note use of the TOC register. */
25723 if (!rs6000_pcrel_p ())
25724 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
25725 gen_rtx_REG (Pmode
, TOC_REGNUM
));
25727 /* Note use of r12. */
25729 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), r12
);
25732 /* Expand code to perform a call under the SYSV4 ABI. */
25735 rs6000_call_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25737 rtx func
= func_desc
;
25741 rtx abi_reg
= NULL_RTX
;
25745 tlsarg
= global_tlsarg
;
25747 /* Handle longcall attributes. */
25748 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25749 && GET_CODE (func_desc
) == SYMBOL_REF
)
25751 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25752 /* If the longcall was implemented as an inline PLT call using
25753 PLT unspecs then func will be REG:r11. If not, func will be
25754 a pseudo reg. The inline PLT call sequence supports lazy
25755 linking (and longcalls to functions in dlopen'd libraries).
25756 The other style of longcalls don't. The lazy linking entry
25757 to the dynamic symbol resolver requires r11 be the function
25758 address (as it is for linker generated PLT stubs). Ensure
25759 r11 stays valid to the bctrl by marking r11 used by the call. */
25764 /* Handle indirect calls. */
25765 if (GET_CODE (func
) != SYMBOL_REF
)
25767 func
= force_reg (Pmode
, func
);
25769 /* Indirect calls via CTR are strongly preferred over indirect
25770 calls via LR, so move the address there. That can't be left
25771 to reload because we want to mark every instruction in an
25772 inline PLT call sequence with a reloc, enabling the linker to
25773 edit the sequence back to a direct call when that makes sense. */
25774 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25777 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
25778 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25779 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25780 v
= gen_rtvec (2, func_addr
, func_desc
);
25781 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25784 emit_move_insn (func_addr
, func
);
25789 /* Create the call. */
25790 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25791 if (value
!= NULL_RTX
)
25792 call
[0] = gen_rtx_SET (value
, call
[0]);
25794 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
25796 if (TARGET_SECURE_PLT
25798 && GET_CODE (func_addr
) == SYMBOL_REF
25799 && !SYMBOL_REF_LOCAL_P (func_addr
))
25800 call
[n
++] = gen_rtx_USE (VOIDmode
, pic_offset_table_rtx
);
25802 call
[n
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
25804 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n
, call
));
25805 insn
= emit_call_insn (insn
);
25807 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
25810 /* Expand code to perform a sibling call under the SysV4 ABI. */
25813 rs6000_sibcall_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25815 rtx func
= func_desc
;
25819 rtx abi_reg
= NULL_RTX
;
25822 tlsarg
= global_tlsarg
;
25824 /* Handle longcall attributes. */
25825 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25826 && GET_CODE (func_desc
) == SYMBOL_REF
)
25828 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25829 /* If the longcall was implemented as an inline PLT call using
25830 PLT unspecs then func will be REG:r11. If not, func will be
25831 a pseudo reg. The inline PLT call sequence supports lazy
25832 linking (and longcalls to functions in dlopen'd libraries).
25833 The other style of longcalls don't. The lazy linking entry
25834 to the dynamic symbol resolver requires r11 be the function
25835 address (as it is for linker generated PLT stubs). Ensure
25836 r11 stays valid to the bctr by marking r11 used by the call. */
25841 /* Handle indirect calls. */
25842 if (GET_CODE (func
) != SYMBOL_REF
)
25844 func
= force_reg (Pmode
, func
);
25846 /* Indirect sibcalls must go via CTR. That can't be left to
25847 reload because we want to mark every instruction in an inline
25848 PLT call sequence with a reloc, enabling the linker to edit
25849 the sequence back to a direct call when that makes sense. */
25850 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25853 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
25854 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25855 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25856 v
= gen_rtvec (2, func_addr
, func_desc
);
25857 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25860 emit_move_insn (func_addr
, func
);
25865 /* Create the call. */
25866 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25867 if (value
!= NULL_RTX
)
25868 call
[0] = gen_rtx_SET (value
, call
[0]);
25870 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
25871 call
[2] = simple_return_rtx
;
25873 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
25874 insn
= emit_call_insn (insn
);
25876 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
25881 /* Expand code to perform a call under the Darwin ABI.
25882 Modulo handling of mlongcall, this is much the same as sysv.
25883 if/when the longcall optimisation is removed, we could drop this
25884 code and use the sysv case (taking care to avoid the tls stuff).
25886 We can use this for sibcalls too, if needed. */
25889 rs6000_call_darwin_1 (rtx value
, rtx func_desc
, rtx tlsarg
,
25890 rtx cookie
, bool sibcall
)
25892 rtx func
= func_desc
;
25896 int cookie_val
= INTVAL (cookie
);
25897 bool make_island
= false;
25899 /* Handle longcall attributes, there are two cases for Darwin:
25900 1) Newer linkers are capable of synthesising any branch islands needed.
25901 2) We need a helper branch island synthesised by the compiler.
25902 The second case has mostly been retired and we don't use it for m64.
25903 In fact, it's is an optimisation, we could just indirect as sysv does..
25904 ... however, backwards compatibility for now.
25905 If we're going to use this, then we need to keep the CALL_LONG bit set,
25906 so that we can pick up the special insn form later. */
25907 if ((cookie_val
& CALL_LONG
) != 0
25908 && GET_CODE (func_desc
) == SYMBOL_REF
)
25910 /* FIXME: the longcall opt should not hang off this flag, it is most
25911 likely incorrect for kernel-mode code-generation. */
25912 if (darwin_symbol_stubs
&& TARGET_32BIT
)
25913 make_island
= true; /* Do nothing yet, retain the CALL_LONG flag. */
25916 /* The linker is capable of doing this, but the user explicitly
25917 asked for -mlongcall, so we'll do the 'normal' version. */
25918 func
= rs6000_longcall_ref (func_desc
, NULL_RTX
);
25919 cookie_val
&= ~CALL_LONG
; /* Handled, zap it. */
25923 /* Handle indirect calls. */
25924 if (GET_CODE (func
) != SYMBOL_REF
)
25926 func
= force_reg (Pmode
, func
);
25928 /* Indirect calls via CTR are strongly preferred over indirect
25929 calls via LR, and are required for indirect sibcalls, so move
25930 the address there. */
25931 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25932 emit_move_insn (func_addr
, func
);
25937 /* Create the call. */
25938 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25939 if (value
!= NULL_RTX
)
25940 call
[0] = gen_rtx_SET (value
, call
[0]);
25942 call
[1] = gen_rtx_USE (VOIDmode
, GEN_INT (cookie_val
));
25945 call
[2] = simple_return_rtx
;
25947 call
[2] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
25949 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
25950 insn
= emit_call_insn (insn
);
25951 /* Now we have the debug info in the insn, we can set up the branch island
25952 if we're using one. */
25955 tree funname
= get_identifier (XSTR (func_desc
, 0));
25957 if (no_previous_def (funname
))
25959 rtx label_rtx
= gen_label_rtx ();
25960 char *label_buf
, temp_buf
[256];
25961 ASM_GENERATE_INTERNAL_LABEL (temp_buf
, "L",
25962 CODE_LABEL_NUMBER (label_rtx
));
25963 label_buf
= temp_buf
[0] == '*' ? temp_buf
+ 1 : temp_buf
;
25964 tree labelname
= get_identifier (label_buf
);
25965 add_compiler_branch_island (labelname
, funname
,
25966 insn_line ((const rtx_insn
*)insn
));
25973 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
25974 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
25977 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, false);
25985 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
25986 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
25989 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, true);
25995 /* Return whether we should generate PC-relative code for FNDECL. */
25997 rs6000_fndecl_pcrel_p (const_tree fndecl
)
25999 if (DEFAULT_ABI
!= ABI_ELFv2
)
26002 struct cl_target_option
*opts
= target_opts_for_fn (fndecl
);
26004 return ((opts
->x_rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26005 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26008 /* Return whether we should generate PC-relative code for *FN. */
26010 rs6000_function_pcrel_p (struct function
*fn
)
26012 if (DEFAULT_ABI
!= ABI_ELFv2
)
26015 /* Optimize usual case. */
26017 return ((rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26018 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26020 return rs6000_fndecl_pcrel_p (fn
->decl
);
26023 /* Return whether we should generate PC-relative code for the current
26028 return (DEFAULT_ABI
== ABI_ELFv2
26029 && (rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26030 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26034 /* Given an address (ADDR), a mode (MODE), and what the format of the
26035 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26036 for the address. */
26039 address_to_insn_form (rtx addr
,
26041 enum non_prefixed_form non_prefixed_format
)
26043 /* Single register is easy. */
26044 if (REG_P (addr
) || SUBREG_P (addr
))
26045 return INSN_FORM_BASE_REG
;
26047 /* If the non prefixed instruction format doesn't support offset addressing,
26048 make sure only indexed addressing is allowed.
26050 We special case SDmode so that the register allocator does not try to move
26051 SDmode through GPR registers, but instead uses the 32-bit integer load and
26052 store instructions for the floating point registers. */
26053 if (non_prefixed_format
== NON_PREFIXED_X
|| (mode
== SDmode
&& TARGET_DFP
))
26055 if (GET_CODE (addr
) != PLUS
)
26056 return INSN_FORM_BAD
;
26058 rtx op0
= XEXP (addr
, 0);
26059 rtx op1
= XEXP (addr
, 1);
26060 if (!REG_P (op0
) && !SUBREG_P (op0
))
26061 return INSN_FORM_BAD
;
26063 if (!REG_P (op1
) && !SUBREG_P (op1
))
26064 return INSN_FORM_BAD
;
26066 return INSN_FORM_X
;
26069 /* Deal with update forms. */
26070 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
26071 return INSN_FORM_UPDATE
;
26073 /* Handle PC-relative symbols and labels. Check for both local and
26074 external symbols. Assume labels are always local. TLS symbols
26075 are not PC-relative for rs6000. */
26078 if (LABEL_REF_P (addr
))
26079 return INSN_FORM_PCREL_LOCAL
;
26081 if (SYMBOL_REF_P (addr
) && !SYMBOL_REF_TLS_MODEL (addr
))
26083 if (!SYMBOL_REF_LOCAL_P (addr
))
26084 return INSN_FORM_PCREL_EXTERNAL
;
26086 return INSN_FORM_PCREL_LOCAL
;
26090 if (GET_CODE (addr
) == CONST
)
26091 addr
= XEXP (addr
, 0);
26093 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26094 if (GET_CODE (addr
) == LO_SUM
)
26095 return INSN_FORM_LO_SUM
;
26097 /* Everything below must be an offset address of some form. */
26098 if (GET_CODE (addr
) != PLUS
)
26099 return INSN_FORM_BAD
;
26101 rtx op0
= XEXP (addr
, 0);
26102 rtx op1
= XEXP (addr
, 1);
26104 /* Check for indexed addresses. */
26105 if (REG_P (op1
) || SUBREG_P (op1
))
26107 if (REG_P (op0
) || SUBREG_P (op0
))
26108 return INSN_FORM_X
;
26110 return INSN_FORM_BAD
;
26113 if (!CONST_INT_P (op1
))
26114 return INSN_FORM_BAD
;
26116 HOST_WIDE_INT offset
= INTVAL (op1
);
26117 if (!SIGNED_INTEGER_34BIT_P (offset
))
26118 return INSN_FORM_BAD
;
26120 /* Check for local and external PC-relative addresses. Labels are always
26121 local. TLS symbols are not PC-relative for rs6000. */
26124 if (LABEL_REF_P (op0
))
26125 return INSN_FORM_PCREL_LOCAL
;
26127 if (SYMBOL_REF_P (op0
) && !SYMBOL_REF_TLS_MODEL (op0
))
26129 if (!SYMBOL_REF_LOCAL_P (op0
))
26130 return INSN_FORM_PCREL_EXTERNAL
;
26132 return INSN_FORM_PCREL_LOCAL
;
26136 /* If it isn't PC-relative, the address must use a base register. */
26137 if (!REG_P (op0
) && !SUBREG_P (op0
))
26138 return INSN_FORM_BAD
;
26140 /* Large offsets must be prefixed. */
26141 if (!SIGNED_INTEGER_16BIT_P (offset
))
26143 if (TARGET_PREFIXED
)
26144 return INSN_FORM_PREFIXED_NUMERIC
;
26146 return INSN_FORM_BAD
;
26149 /* We have a 16-bit offset, see what default instruction format to use. */
26150 if (non_prefixed_format
== NON_PREFIXED_DEFAULT
)
26152 unsigned size
= GET_MODE_SIZE (mode
);
26154 /* On 64-bit systems, assume 64-bit integers need to use DS form
26155 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26156 (for LXV and STXV). TImode is problematical in that its normal usage
26157 is expected to be GPRs where it wants a DS instruction format, but if
26158 it goes into the vector registers, it wants a DQ instruction
26160 if (TARGET_POWERPC64
&& size
>= 8 && GET_MODE_CLASS (mode
) == MODE_INT
)
26161 non_prefixed_format
= NON_PREFIXED_DS
;
26163 else if (TARGET_VSX
&& size
>= 16
26164 && (VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
)))
26165 non_prefixed_format
= NON_PREFIXED_DQ
;
26168 non_prefixed_format
= NON_PREFIXED_D
;
26171 /* Classify the D/DS/DQ-form addresses. */
26172 switch (non_prefixed_format
)
26174 /* Instruction format D, all 16 bits are valid. */
26175 case NON_PREFIXED_D
:
26176 return INSN_FORM_D
;
26178 /* Instruction format DS, bottom 2 bits must be 0. */
26179 case NON_PREFIXED_DS
:
26180 if ((offset
& 3) == 0)
26181 return INSN_FORM_DS
;
26183 else if (TARGET_PREFIXED
)
26184 return INSN_FORM_PREFIXED_NUMERIC
;
26187 return INSN_FORM_BAD
;
26189 /* Instruction format DQ, bottom 4 bits must be 0. */
26190 case NON_PREFIXED_DQ
:
26191 if ((offset
& 15) == 0)
26192 return INSN_FORM_DQ
;
26194 else if (TARGET_PREFIXED
)
26195 return INSN_FORM_PREFIXED_NUMERIC
;
26198 return INSN_FORM_BAD
;
26204 return INSN_FORM_BAD
;
26207 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26208 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26209 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26210 a D-form or DS-form instruction. X-form and base_reg are always
26213 address_is_non_pfx_d_or_x (rtx addr
, machine_mode mode
,
26214 enum non_prefixed_form non_prefixed_format
)
26216 enum insn_form result_form
;
26218 result_form
= address_to_insn_form (addr
, mode
, non_prefixed_format
);
26220 switch (non_prefixed_format
)
26222 case NON_PREFIXED_D
:
26223 switch (result_form
)
26228 case INSN_FORM_BASE_REG
:
26234 case NON_PREFIXED_DS
:
26235 switch (result_form
)
26239 case INSN_FORM_BASE_REG
:
26251 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26252 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26253 the load or store with the PCREL_OPT optimization to make sure it is an
26254 instruction that can be optimized.
26256 We need to specify the MODE separately from the REG to allow for loads that
26257 include zero/sign/float extension. */
26260 pcrel_opt_valid_mem_p (rtx reg
, machine_mode mode
, rtx mem
)
26262 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26263 PCREL_OPT optimization. */
26264 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mode
);
26265 if (non_prefixed
== NON_PREFIXED_X
)
26268 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26269 rtx addr
= XEXP (mem
, 0);
26270 enum insn_form iform
= address_to_insn_form (addr
, mode
, non_prefixed
);
26271 return (iform
== INSN_FORM_BASE_REG
26272 || iform
== INSN_FORM_D
26273 || iform
== INSN_FORM_DS
26274 || iform
== INSN_FORM_DQ
);
26277 /* Helper function to see if we're potentially looking at lfs/stfs.
26278 - PARALLEL containing a SET and a CLOBBER
26280 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26281 - CLOBBER is a V4SF
26283 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26288 is_lfs_stfs_insn (rtx_insn
*insn
)
26290 rtx pattern
= PATTERN (insn
);
26291 if (GET_CODE (pattern
) != PARALLEL
)
26294 /* This should be a parallel with exactly one set and one clobber. */
26295 if (XVECLEN (pattern
, 0) != 2)
26298 rtx set
= XVECEXP (pattern
, 0, 0);
26299 if (GET_CODE (set
) != SET
)
26302 rtx clobber
= XVECEXP (pattern
, 0, 1);
26303 if (GET_CODE (clobber
) != CLOBBER
)
26306 /* All we care is that the destination of the SET is a mem:SI,
26307 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26308 should be a scratch:V4SF. */
26310 rtx dest
= SET_DEST (set
);
26311 rtx src
= SET_SRC (set
);
26312 rtx scratch
= SET_DEST (clobber
);
26314 if (GET_CODE (src
) != UNSPEC
)
26318 if (XINT (src
, 1) == UNSPEC_SI_FROM_SF
26319 && GET_CODE (dest
) == MEM
&& GET_MODE (dest
) == SImode
26320 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == V4SFmode
)
26324 if (XINT (src
, 1) == UNSPEC_SF_FROM_SI
26325 && GET_CODE (dest
) == REG
&& GET_MODE (dest
) == SFmode
26326 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == DImode
)
26332 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26333 instruction format (D/DS/DQ) used for offset memory. */
26335 enum non_prefixed_form
26336 reg_to_non_prefixed (rtx reg
, machine_mode mode
)
26338 /* If it isn't a register, use the defaults. */
26339 if (!REG_P (reg
) && !SUBREG_P (reg
))
26340 return NON_PREFIXED_DEFAULT
;
26342 unsigned int r
= reg_or_subregno (reg
);
26344 /* If we have a pseudo, use the default instruction format. */
26345 if (!HARD_REGISTER_NUM_P (r
))
26346 return NON_PREFIXED_DEFAULT
;
26348 unsigned size
= GET_MODE_SIZE (mode
);
26350 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26351 128-bit floating point, and 128-bit integers. Before power9, only indexed
26352 addressing was available for vectors. */
26353 if (FP_REGNO_P (r
))
26355 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26356 return NON_PREFIXED_D
;
26359 return NON_PREFIXED_X
;
26361 else if (TARGET_VSX
&& size
>= 16
26362 && (VECTOR_MODE_P (mode
)
26363 || VECTOR_ALIGNMENT_P (mode
)
26364 || mode
== TImode
|| mode
== CTImode
))
26365 return (TARGET_P9_VECTOR
) ? NON_PREFIXED_DQ
: NON_PREFIXED_X
;
26368 return NON_PREFIXED_DEFAULT
;
26371 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26372 128-bit floating point, and 128-bit integers. Before power9, only indexed
26373 addressing was available. */
26374 else if (ALTIVEC_REGNO_P (r
))
26376 if (!TARGET_P9_VECTOR
)
26377 return NON_PREFIXED_X
;
26379 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26380 return NON_PREFIXED_DS
;
26383 return NON_PREFIXED_X
;
26385 else if (TARGET_VSX
&& size
>= 16
26386 && (VECTOR_MODE_P (mode
)
26387 || VECTOR_ALIGNMENT_P (mode
)
26388 || mode
== TImode
|| mode
== CTImode
))
26389 return NON_PREFIXED_DQ
;
26392 return NON_PREFIXED_DEFAULT
;
26395 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26396 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26397 through the GPR registers for memory operations. */
26398 else if (TARGET_POWERPC64
&& size
>= 8)
26399 return NON_PREFIXED_DS
;
26401 return NON_PREFIXED_D
;
26405 /* Whether a load instruction is a prefixed instruction. This is called from
26406 the prefixed attribute processing. */
26409 prefixed_load_p (rtx_insn
*insn
)
26411 /* Validate the insn to make sure it is a normal load insn. */
26412 extract_insn_cached (insn
);
26413 if (recog_data
.n_operands
< 2)
26416 rtx reg
= recog_data
.operand
[0];
26417 rtx mem
= recog_data
.operand
[1];
26419 if (!REG_P (reg
) && !SUBREG_P (reg
))
26425 /* Prefixed load instructions do not support update or indexed forms. */
26426 if (get_attr_indexed (insn
) == INDEXED_YES
26427 || get_attr_update (insn
) == UPDATE_YES
)
26430 /* LWA uses the DS format instead of the D format that LWZ uses. */
26431 enum non_prefixed_form non_prefixed
;
26432 machine_mode reg_mode
= GET_MODE (reg
);
26433 machine_mode mem_mode
= GET_MODE (mem
);
26435 if (mem_mode
== SImode
&& reg_mode
== DImode
26436 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
26437 non_prefixed
= NON_PREFIXED_DS
;
26440 non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26442 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26443 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, NON_PREFIXED_DEFAULT
);
26445 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, non_prefixed
);
26448 /* Whether a store instruction is a prefixed instruction. This is called from
26449 the prefixed attribute processing. */
26452 prefixed_store_p (rtx_insn
*insn
)
26454 /* Validate the insn to make sure it is a normal store insn. */
26455 extract_insn_cached (insn
);
26456 if (recog_data
.n_operands
< 2)
26459 rtx mem
= recog_data
.operand
[0];
26460 rtx reg
= recog_data
.operand
[1];
26462 if (!REG_P (reg
) && !SUBREG_P (reg
))
26468 /* Prefixed store instructions do not support update or indexed forms. */
26469 if (get_attr_indexed (insn
) == INDEXED_YES
26470 || get_attr_update (insn
) == UPDATE_YES
)
26473 machine_mode mem_mode
= GET_MODE (mem
);
26474 rtx addr
= XEXP (mem
, 0);
26475 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26477 /* Need to make sure we aren't looking at a stfs which doesn't look
26478 like the other things reg_to_non_prefixed/address_is_prefixed
26480 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26481 return address_is_prefixed (addr
, mem_mode
, NON_PREFIXED_DEFAULT
);
26483 return address_is_prefixed (addr
, mem_mode
, non_prefixed
);
26486 /* Whether a load immediate or add instruction is a prefixed instruction. This
26487 is called from the prefixed attribute processing. */
26490 prefixed_paddi_p (rtx_insn
*insn
)
26492 rtx set
= single_set (insn
);
26496 rtx dest
= SET_DEST (set
);
26497 rtx src
= SET_SRC (set
);
26499 if (!REG_P (dest
) && !SUBREG_P (dest
))
26502 /* Is this a load immediate that can't be done with a simple ADDI or
26504 if (CONST_INT_P (src
))
26505 return (satisfies_constraint_eI (src
)
26506 && !satisfies_constraint_I (src
)
26507 && !satisfies_constraint_L (src
));
26509 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26511 if (GET_CODE (src
) == PLUS
)
26513 rtx op1
= XEXP (src
, 1);
26515 return (CONST_INT_P (op1
)
26516 && satisfies_constraint_eI (op1
)
26517 && !satisfies_constraint_I (op1
)
26518 && !satisfies_constraint_L (op1
));
26521 /* If not, is it a load of a PC-relative address? */
26522 if (!TARGET_PCREL
|| GET_MODE (dest
) != Pmode
)
26525 if (!SYMBOL_REF_P (src
) && !LABEL_REF_P (src
) && GET_CODE (src
) != CONST
)
26528 enum insn_form iform
= address_to_insn_form (src
, Pmode
,
26529 NON_PREFIXED_DEFAULT
);
26531 return (iform
== INSN_FORM_PCREL_EXTERNAL
|| iform
== INSN_FORM_PCREL_LOCAL
);
26534 /* Whether the next instruction needs a 'p' prefix issued before the
26535 instruction is printed out. */
26536 static bool prepend_p_to_next_insn
;
26538 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26539 outputting the assembler code. On the PowerPC, we remember if the current
26540 insn is a prefixed insn where we need to emit a 'p' before the insn.
26542 In addition, if the insn is part of a PC-relative reference to an external
26543 label optimization, this is recorded also. */
26545 rs6000_final_prescan_insn (rtx_insn
*insn
, rtx
[], int)
26547 prepend_p_to_next_insn
= (get_attr_maybe_prefixed (insn
)
26548 == MAYBE_PREFIXED_YES
26549 && get_attr_prefixed (insn
) == PREFIXED_YES
);
26553 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26554 We use it to emit a 'p' for prefixed insns that is set in
26555 FINAL_PRESCAN_INSN. */
26557 rs6000_asm_output_opcode (FILE *stream
)
26559 if (prepend_p_to_next_insn
)
26561 fprintf (stream
, "p");
26563 /* Reset the flag in the case where there are separate insn lines in the
26564 sequence, so the 'p' is only emitted for the first line. This shows up
26565 when we are doing the PCREL_OPT optimization, in that the label created
26566 with %r<n> would have a leading 'p' printed. */
26567 prepend_p_to_next_insn
= false;
26573 /* Emit the relocation to tie the next instruction to a previous instruction
26574 that loads up an external address. This is used to do the PCREL_OPT
26575 optimization. Note, the label is generated after the PLD of the got
26576 pc-relative address to allow for the assembler to insert NOPs before the PLD
26577 instruction. The operand is a constant integer that is the label
26581 output_pcrel_opt_reloc (rtx label_num
)
26583 rtx operands
[1] = { label_num
};
26584 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26588 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26589 should be adjusted to reflect any required changes. This macro is used when
26590 there is some systematic length adjustment required that would be difficult
26591 to express in the length attribute.
26593 In the PowerPC, we use this to adjust the length of an instruction if one or
26594 more prefixed instructions are generated, using the attribute
26595 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26596 hardware requires that a prefied instruciton does not cross a 64-byte
26597 boundary. This means the compiler has to assume the length of the first
26598 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26599 already set for the non-prefixed instruction, we just need to udpate for the
26603 rs6000_adjust_insn_length (rtx_insn
*insn
, int length
)
26605 if (TARGET_PREFIXED
&& NONJUMP_INSN_P (insn
))
26607 rtx pattern
= PATTERN (insn
);
26608 if (GET_CODE (pattern
) != USE
&& GET_CODE (pattern
) != CLOBBER
26609 && get_attr_prefixed (insn
) == PREFIXED_YES
)
26611 int num_prefixed
= get_attr_max_prefixed_insns (insn
);
26612 length
+= 4 * (num_prefixed
+ 1);
26620 #ifdef HAVE_GAS_HIDDEN
26621 # define USE_HIDDEN_LINKONCE 1
26623 # define USE_HIDDEN_LINKONCE 0
26626 /* Fills in the label name that should be used for a 476 link stack thunk. */
26629 get_ppc476_thunk_name (char name
[32])
26631 gcc_assert (TARGET_LINK_STACK
);
26633 if (USE_HIDDEN_LINKONCE
)
26634 sprintf (name
, "__ppc476.get_thunk");
26636 ASM_GENERATE_INTERNAL_LABEL (name
, "LPPC476_", 0);
26639 /* This function emits the simple thunk routine that is used to preserve
26640 the link stack on the 476 cpu. */
26642 static void rs6000_code_end (void) ATTRIBUTE_UNUSED
;
26644 rs6000_code_end (void)
26649 if (!TARGET_LINK_STACK
)
26652 get_ppc476_thunk_name (name
);
26654 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
, get_identifier (name
),
26655 build_function_type_list (void_type_node
, NULL_TREE
));
26656 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
26657 NULL_TREE
, void_type_node
);
26658 TREE_PUBLIC (decl
) = 1;
26659 TREE_STATIC (decl
) = 1;
26662 if (USE_HIDDEN_LINKONCE
&& !TARGET_XCOFF
)
26664 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
26665 targetm
.asm_out
.unique_section (decl
, 0);
26666 switch_to_section (get_named_section (decl
, NULL
, 0));
26667 DECL_WEAK (decl
) = 1;
26668 ASM_WEAKEN_DECL (asm_out_file
, decl
, name
, 0);
26669 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
26670 targetm
.asm_out
.assemble_visibility (decl
, VISIBILITY_HIDDEN
);
26671 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
26676 switch_to_section (text_section
);
26677 ASM_OUTPUT_LABEL (asm_out_file
, name
);
26680 DECL_INITIAL (decl
) = make_node (BLOCK
);
26681 current_function_decl
= decl
;
26682 allocate_struct_function (decl
, false);
26683 init_function_start (decl
);
26684 first_function_block_is_cold
= false;
26685 /* Make sure unwind info is emitted for the thunk if needed. */
26686 final_start_function (emit_barrier (), asm_out_file
, 1);
26688 fputs ("\tblr\n", asm_out_file
);
26690 final_end_function ();
26691 init_insn_lengths ();
26692 free_after_compilation (cfun
);
26694 current_function_decl
= NULL
;
26697 /* Add r30 to hard reg set if the prologue sets it up and it is not
26698 pic_offset_table_rtx. */
26701 rs6000_set_up_by_prologue (struct hard_reg_set_container
*set
)
26703 if (!TARGET_SINGLE_PIC_BASE
26705 && TARGET_MINIMAL_TOC
26706 && !constant_pool_empty_p ())
26707 add_to_hard_reg_set (&set
->set
, Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
26708 if (cfun
->machine
->split_stack_argp_used
)
26709 add_to_hard_reg_set (&set
->set
, Pmode
, 12);
26711 /* Make sure the hard reg set doesn't include r2, which was possibly added
26712 via PIC_OFFSET_TABLE_REGNUM. */
26714 remove_from_hard_reg_set (&set
->set
, Pmode
, TOC_REGNUM
);
26718 /* Helper function for rs6000_split_logical to emit a logical instruction after
26719 spliting the operation to single GPR registers.
26721 DEST is the destination register.
26722 OP1 and OP2 are the input source registers.
26723 CODE is the base operation (AND, IOR, XOR, NOT).
26724 MODE is the machine mode.
26725 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26726 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26727 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26730 rs6000_split_logical_inner (rtx dest
,
26733 enum rtx_code code
,
26735 bool complement_final_p
,
26736 bool complement_op1_p
,
26737 bool complement_op2_p
)
26741 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26742 if (op2
&& CONST_INT_P (op2
)
26743 && (mode
== SImode
|| (mode
== DImode
&& TARGET_POWERPC64
))
26744 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
26746 HOST_WIDE_INT mask
= GET_MODE_MASK (mode
);
26747 HOST_WIDE_INT value
= INTVAL (op2
) & mask
;
26749 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26754 emit_insn (gen_rtx_SET (dest
, const0_rtx
));
26758 else if (value
== mask
)
26760 if (!rtx_equal_p (dest
, op1
))
26761 emit_insn (gen_rtx_SET (dest
, op1
));
26766 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26767 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26768 else if (code
== IOR
|| code
== XOR
)
26772 if (!rtx_equal_p (dest
, op1
))
26773 emit_insn (gen_rtx_SET (dest
, op1
));
26779 if (code
== AND
&& mode
== SImode
26780 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
26782 emit_insn (gen_andsi3 (dest
, op1
, op2
));
26786 if (complement_op1_p
)
26787 op1
= gen_rtx_NOT (mode
, op1
);
26789 if (complement_op2_p
)
26790 op2
= gen_rtx_NOT (mode
, op2
);
26792 /* For canonical RTL, if only one arm is inverted it is the first. */
26793 if (!complement_op1_p
&& complement_op2_p
)
26794 std::swap (op1
, op2
);
26796 bool_rtx
= ((code
== NOT
)
26797 ? gen_rtx_NOT (mode
, op1
)
26798 : gen_rtx_fmt_ee (code
, mode
, op1
, op2
));
26800 if (complement_final_p
)
26801 bool_rtx
= gen_rtx_NOT (mode
, bool_rtx
);
26803 emit_insn (gen_rtx_SET (dest
, bool_rtx
));
26806 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
26807 operations are split immediately during RTL generation to allow for more
26808 optimizations of the AND/IOR/XOR.
26810 OPERANDS is an array containing the destination and two input operands.
26811 CODE is the base operation (AND, IOR, XOR, NOT).
26812 MODE is the machine mode.
26813 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26814 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26815 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26816 CLOBBER_REG is either NULL or a scratch register of type CC to allow
26817 formation of the AND instructions. */
26820 rs6000_split_logical_di (rtx operands
[3],
26821 enum rtx_code code
,
26822 bool complement_final_p
,
26823 bool complement_op1_p
,
26824 bool complement_op2_p
)
26826 const HOST_WIDE_INT lower_32bits
= HOST_WIDE_INT_C(0xffffffff);
26827 const HOST_WIDE_INT upper_32bits
= ~ lower_32bits
;
26828 const HOST_WIDE_INT sign_bit
= HOST_WIDE_INT_C(0x80000000);
26829 enum hi_lo
{ hi
= 0, lo
= 1 };
26830 rtx op0_hi_lo
[2], op1_hi_lo
[2], op2_hi_lo
[2];
26833 op0_hi_lo
[hi
] = gen_highpart (SImode
, operands
[0]);
26834 op1_hi_lo
[hi
] = gen_highpart (SImode
, operands
[1]);
26835 op0_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[0]);
26836 op1_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[1]);
26839 op2_hi_lo
[hi
] = op2_hi_lo
[lo
] = NULL_RTX
;
26842 if (!CONST_INT_P (operands
[2]))
26844 op2_hi_lo
[hi
] = gen_highpart_mode (SImode
, DImode
, operands
[2]);
26845 op2_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[2]);
26849 HOST_WIDE_INT value
= INTVAL (operands
[2]);
26850 HOST_WIDE_INT value_hi_lo
[2];
26852 gcc_assert (!complement_final_p
);
26853 gcc_assert (!complement_op1_p
);
26854 gcc_assert (!complement_op2_p
);
26856 value_hi_lo
[hi
] = value
>> 32;
26857 value_hi_lo
[lo
] = value
& lower_32bits
;
26859 for (i
= 0; i
< 2; i
++)
26861 HOST_WIDE_INT sub_value
= value_hi_lo
[i
];
26863 if (sub_value
& sign_bit
)
26864 sub_value
|= upper_32bits
;
26866 op2_hi_lo
[i
] = GEN_INT (sub_value
);
26868 /* If this is an AND instruction, check to see if we need to load
26869 the value in a register. */
26870 if (code
== AND
&& sub_value
!= -1 && sub_value
!= 0
26871 && !and_operand (op2_hi_lo
[i
], SImode
))
26872 op2_hi_lo
[i
] = force_reg (SImode
, op2_hi_lo
[i
]);
26877 for (i
= 0; i
< 2; i
++)
26879 /* Split large IOR/XOR operations. */
26880 if ((code
== IOR
|| code
== XOR
)
26881 && CONST_INT_P (op2_hi_lo
[i
])
26882 && !complement_final_p
26883 && !complement_op1_p
26884 && !complement_op2_p
26885 && !logical_const_operand (op2_hi_lo
[i
], SImode
))
26887 HOST_WIDE_INT value
= INTVAL (op2_hi_lo
[i
]);
26888 HOST_WIDE_INT hi_16bits
= value
& HOST_WIDE_INT_C(0xffff0000);
26889 HOST_WIDE_INT lo_16bits
= value
& HOST_WIDE_INT_C(0x0000ffff);
26890 rtx tmp
= gen_reg_rtx (SImode
);
26892 /* Make sure the constant is sign extended. */
26893 if ((hi_16bits
& sign_bit
) != 0)
26894 hi_16bits
|= upper_32bits
;
26896 rs6000_split_logical_inner (tmp
, op1_hi_lo
[i
], GEN_INT (hi_16bits
),
26897 code
, SImode
, false, false, false);
26899 rs6000_split_logical_inner (op0_hi_lo
[i
], tmp
, GEN_INT (lo_16bits
),
26900 code
, SImode
, false, false, false);
26903 rs6000_split_logical_inner (op0_hi_lo
[i
], op1_hi_lo
[i
], op2_hi_lo
[i
],
26904 code
, SImode
, complement_final_p
,
26905 complement_op1_p
, complement_op2_p
);
26911 /* Split the insns that make up boolean operations operating on multiple GPR
26912 registers. The boolean MD patterns ensure that the inputs either are
26913 exactly the same as the output registers, or there is no overlap.
26915 OPERANDS is an array containing the destination and two input operands.
26916 CODE is the base operation (AND, IOR, XOR, NOT).
26917 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26918 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26919 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26922 rs6000_split_logical (rtx operands
[3],
26923 enum rtx_code code
,
26924 bool complement_final_p
,
26925 bool complement_op1_p
,
26926 bool complement_op2_p
)
26928 machine_mode mode
= GET_MODE (operands
[0]);
26929 machine_mode sub_mode
;
26931 int sub_size
, regno0
, regno1
, nregs
, i
;
26933 /* If this is DImode, use the specialized version that can run before
26934 register allocation. */
26935 if (mode
== DImode
&& !TARGET_POWERPC64
)
26937 rs6000_split_logical_di (operands
, code
, complement_final_p
,
26938 complement_op1_p
, complement_op2_p
);
26944 op2
= (code
== NOT
) ? NULL_RTX
: operands
[2];
26945 sub_mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
26946 sub_size
= GET_MODE_SIZE (sub_mode
);
26947 regno0
= REGNO (op0
);
26948 regno1
= REGNO (op1
);
26950 gcc_assert (reload_completed
);
26951 gcc_assert (IN_RANGE (regno0
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
26952 gcc_assert (IN_RANGE (regno1
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
26954 nregs
= rs6000_hard_regno_nregs
[(int)mode
][regno0
];
26955 gcc_assert (nregs
> 1);
26957 if (op2
&& REG_P (op2
))
26958 gcc_assert (IN_RANGE (REGNO (op2
), FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
26960 for (i
= 0; i
< nregs
; i
++)
26962 int offset
= i
* sub_size
;
26963 rtx sub_op0
= simplify_subreg (sub_mode
, op0
, mode
, offset
);
26964 rtx sub_op1
= simplify_subreg (sub_mode
, op1
, mode
, offset
);
26965 rtx sub_op2
= ((code
== NOT
)
26967 : simplify_subreg (sub_mode
, op2
, mode
, offset
));
26969 rs6000_split_logical_inner (sub_op0
, sub_op1
, sub_op2
, code
, sub_mode
,
26970 complement_final_p
, complement_op1_p
,
26977 /* Emit instructions to move SRC to DST. Called by splitters for
26978 multi-register moves. It will emit at most one instruction for
26979 each register that is accessed; that is, it won't emit li/lis pairs
26980 (or equivalent for 64-bit code). One of SRC or DST must be a hard
26984 rs6000_split_multireg_move (rtx dst
, rtx src
)
26986 /* The register number of the first register being moved. */
26988 /* The mode that is to be moved. */
26990 /* The mode that the move is being done in, and its size. */
26991 machine_mode reg_mode
;
26993 /* The number of registers that will be moved. */
26996 reg
= REG_P (dst
) ? REGNO (dst
) : REGNO (src
);
26997 mode
= GET_MODE (dst
);
26998 nregs
= hard_regno_nregs (reg
, mode
);
27000 /* If we have a vector quad register for MMA, and this is a load or store,
27001 see if we can use vector paired load/stores. */
27002 if (mode
== XOmode
&& TARGET_MMA
27003 && (MEM_P (dst
) || MEM_P (src
)))
27008 /* If we have a vector pair/quad mode, split it into two/four separate
27010 else if (mode
== OOmode
|| mode
== XOmode
)
27011 reg_mode
= V1TImode
;
27012 else if (FP_REGNO_P (reg
))
27013 reg_mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
:
27014 (TARGET_HARD_FLOAT
? DFmode
: SFmode
);
27015 else if (ALTIVEC_REGNO_P (reg
))
27016 reg_mode
= V16QImode
;
27018 reg_mode
= word_mode
;
27019 reg_mode_size
= GET_MODE_SIZE (reg_mode
);
27021 gcc_assert (reg_mode_size
* nregs
== GET_MODE_SIZE (mode
));
27023 /* TDmode residing in FP registers is special, since the ISA requires that
27024 the lower-numbered word of a register pair is always the most significant
27025 word, even in little-endian mode. This does not match the usual subreg
27026 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27027 the appropriate constituent registers "by hand" in little-endian mode.
27029 Note we do not need to check for destructive overlap here since TDmode
27030 can only reside in even/odd register pairs. */
27031 if (FP_REGNO_P (reg
) && DECIMAL_FLOAT_MODE_P (mode
) && !BYTES_BIG_ENDIAN
)
27036 for (i
= 0; i
< nregs
; i
++)
27038 if (REG_P (src
) && FP_REGNO_P (REGNO (src
)))
27039 p_src
= gen_rtx_REG (reg_mode
, REGNO (src
) + nregs
- 1 - i
);
27041 p_src
= simplify_gen_subreg (reg_mode
, src
, mode
,
27042 i
* reg_mode_size
);
27044 if (REG_P (dst
) && FP_REGNO_P (REGNO (dst
)))
27045 p_dst
= gen_rtx_REG (reg_mode
, REGNO (dst
) + nregs
- 1 - i
);
27047 p_dst
= simplify_gen_subreg (reg_mode
, dst
, mode
,
27048 i
* reg_mode_size
);
27050 emit_insn (gen_rtx_SET (p_dst
, p_src
));
27056 /* The __vector_pair and __vector_quad modes are multi-register
27057 modes, so if we have to load or store the registers, we have to be
27058 careful to properly swap them if we're in little endian mode
27059 below. This means the last register gets the first memory
27060 location. We also need to be careful of using the right register
27061 numbers if we are splitting XO to OO. */
27062 if (mode
== OOmode
|| mode
== XOmode
)
27064 nregs
= hard_regno_nregs (reg
, mode
);
27065 int reg_mode_nregs
= hard_regno_nregs (reg
, reg_mode
);
27068 unsigned offset
= 0;
27069 unsigned size
= GET_MODE_SIZE (reg_mode
);
27071 /* If we are reading an accumulator register, we have to
27072 deprime it before we can access it. */
27074 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27075 emit_insn (gen_mma_xxmfacc (src
, src
));
27077 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
27080 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
27081 rtx dst2
= adjust_address (dst
, reg_mode
, offset
);
27082 rtx src2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
27084 emit_insn (gen_rtx_SET (dst2
, src2
));
27092 unsigned offset
= 0;
27093 unsigned size
= GET_MODE_SIZE (reg_mode
);
27095 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
27098 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
27099 rtx dst2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
27100 rtx src2
= adjust_address (src
, reg_mode
, offset
);
27102 emit_insn (gen_rtx_SET (dst2
, src2
));
27105 /* If we are writing an accumulator register, we have to
27106 prime it after we've written it. */
27108 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27109 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27114 if (GET_CODE (src
) == UNSPEC
27115 || GET_CODE (src
) == UNSPEC_VOLATILE
)
27117 gcc_assert (XINT (src
, 1) == UNSPEC_VSX_ASSEMBLE
27118 || XINT (src
, 1) == UNSPECV_MMA_ASSEMBLE
);
27119 gcc_assert (REG_P (dst
));
27120 if (GET_MODE (src
) == XOmode
)
27121 gcc_assert (FP_REGNO_P (REGNO (dst
)));
27122 if (GET_MODE (src
) == OOmode
)
27123 gcc_assert (VSX_REGNO_P (REGNO (dst
)));
27125 int nvecs
= XVECLEN (src
, 0);
27126 for (int i
= 0; i
< nvecs
; i
++)
27129 int regno
= reg
+ i
;
27131 if (WORDS_BIG_ENDIAN
)
27133 op
= XVECEXP (src
, 0, i
);
27135 /* If we are loading an even VSX register and the memory location
27136 is adjacent to the next register's memory location (if any),
27137 then we can load them both with one LXVP instruction. */
27138 if ((regno
& 1) == 0)
27140 rtx op2
= XVECEXP (src
, 0, i
+ 1);
27141 if (adjacent_mem_locations (op
, op2
) == op
)
27143 op
= adjust_address (op
, OOmode
, 0);
27144 /* Skip the next register, since we're going to
27145 load it together with this register. */
27152 op
= XVECEXP (src
, 0, nvecs
- i
- 1);
27154 /* If we are loading an even VSX register and the memory location
27155 is adjacent to the next register's memory location (if any),
27156 then we can load them both with one LXVP instruction. */
27157 if ((regno
& 1) == 0)
27159 rtx op2
= XVECEXP (src
, 0, nvecs
- i
- 2);
27160 if (adjacent_mem_locations (op2
, op
) == op2
)
27162 op
= adjust_address (op2
, OOmode
, 0);
27163 /* Skip the next register, since we're going to
27164 load it together with this register. */
27170 rtx dst_i
= gen_rtx_REG (GET_MODE (op
), regno
);
27171 emit_insn (gen_rtx_SET (dst_i
, op
));
27174 /* We are writing an accumulator register, so we have to
27175 prime it after we've written it. */
27176 if (GET_MODE (src
) == XOmode
)
27177 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27182 /* Register -> register moves can use common code. */
27185 if (REG_P (src
) && REG_P (dst
) && (REGNO (src
) < REGNO (dst
)))
27187 /* If we are reading an accumulator register, we have to
27188 deprime it before we can access it. */
27190 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27191 emit_insn (gen_mma_xxmfacc (src
, src
));
27193 /* Move register range backwards, if we might have destructive
27196 /* XO/OO are opaque so cannot use subregs. */
27197 if (mode
== OOmode
|| mode
== XOmode
)
27199 for (i
= nregs
- 1; i
>= 0; i
--)
27201 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + i
);
27202 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + i
);
27203 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27208 for (i
= nregs
- 1; i
>= 0; i
--)
27209 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27210 i
* reg_mode_size
),
27211 simplify_gen_subreg (reg_mode
, src
, mode
,
27212 i
* reg_mode_size
)));
27215 /* If we are writing an accumulator register, we have to
27216 prime it after we've written it. */
27218 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27219 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27225 bool used_update
= false;
27226 rtx restore_basereg
= NULL_RTX
;
27228 if (MEM_P (src
) && INT_REGNO_P (reg
))
27232 if (GET_CODE (XEXP (src
, 0)) == PRE_INC
27233 || GET_CODE (XEXP (src
, 0)) == PRE_DEC
)
27236 breg
= XEXP (XEXP (src
, 0), 0);
27237 delta_rtx
= (GET_CODE (XEXP (src
, 0)) == PRE_INC
27238 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src
)))
27239 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src
))));
27240 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27241 src
= replace_equiv_address (src
, breg
);
27243 else if (! rs6000_offsettable_memref_p (src
, reg_mode
, true))
27245 if (GET_CODE (XEXP (src
, 0)) == PRE_MODIFY
)
27247 rtx basereg
= XEXP (XEXP (src
, 0), 0);
27250 rtx ndst
= simplify_gen_subreg (reg_mode
, dst
, mode
, 0);
27251 emit_insn (gen_rtx_SET (ndst
,
27252 gen_rtx_MEM (reg_mode
,
27254 used_update
= true;
27257 emit_insn (gen_rtx_SET (basereg
,
27258 XEXP (XEXP (src
, 0), 1)));
27259 src
= replace_equiv_address (src
, basereg
);
27263 rtx basereg
= gen_rtx_REG (Pmode
, reg
);
27264 emit_insn (gen_rtx_SET (basereg
, XEXP (src
, 0)));
27265 src
= replace_equiv_address (src
, basereg
);
27269 breg
= XEXP (src
, 0);
27270 if (GET_CODE (breg
) == PLUS
|| GET_CODE (breg
) == LO_SUM
)
27271 breg
= XEXP (breg
, 0);
27273 /* If the base register we are using to address memory is
27274 also a destination reg, then change that register last. */
27276 && REGNO (breg
) >= REGNO (dst
)
27277 && REGNO (breg
) < REGNO (dst
) + nregs
)
27278 j
= REGNO (breg
) - REGNO (dst
);
27280 else if (MEM_P (dst
) && INT_REGNO_P (reg
))
27284 if (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27285 || GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
27288 breg
= XEXP (XEXP (dst
, 0), 0);
27289 delta_rtx
= (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27290 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst
)))
27291 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst
))));
27293 /* We have to update the breg before doing the store.
27294 Use store with update, if available. */
27298 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27299 emit_insn (TARGET_32BIT
27300 ? (TARGET_POWERPC64
27301 ? gen_movdi_si_update (breg
, breg
, delta_rtx
, nsrc
)
27302 : gen_movsi_si_update (breg
, breg
, delta_rtx
, nsrc
))
27303 : gen_movdi_di_update (breg
, breg
, delta_rtx
, nsrc
));
27304 used_update
= true;
27307 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27308 dst
= replace_equiv_address (dst
, breg
);
27310 else if (!rs6000_offsettable_memref_p (dst
, reg_mode
, true)
27311 && GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27313 if (GET_CODE (XEXP (dst
, 0)) == PRE_MODIFY
)
27315 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27318 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27319 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode
,
27322 used_update
= true;
27325 emit_insn (gen_rtx_SET (basereg
,
27326 XEXP (XEXP (dst
, 0), 1)));
27327 dst
= replace_equiv_address (dst
, basereg
);
27331 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27332 rtx offsetreg
= XEXP (XEXP (dst
, 0), 1);
27333 gcc_assert (GET_CODE (XEXP (dst
, 0)) == PLUS
27335 && REG_P (offsetreg
)
27336 && REGNO (basereg
) != REGNO (offsetreg
));
27337 if (REGNO (basereg
) == 0)
27339 rtx tmp
= offsetreg
;
27340 offsetreg
= basereg
;
27343 emit_insn (gen_add3_insn (basereg
, basereg
, offsetreg
));
27344 restore_basereg
= gen_sub3_insn (basereg
, basereg
, offsetreg
);
27345 dst
= replace_equiv_address (dst
, basereg
);
27348 else if (GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27349 gcc_assert (rs6000_offsettable_memref_p (dst
, reg_mode
, true));
27352 /* If we are reading an accumulator register, we have to
27353 deprime it before we can access it. */
27354 if (TARGET_MMA
&& REG_P (src
)
27355 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27356 emit_insn (gen_mma_xxmfacc (src
, src
));
27358 for (i
= 0; i
< nregs
; i
++)
27360 /* Calculate index to next subword. */
27365 /* If compiler already emitted move of first word by
27366 store with update, no need to do anything. */
27367 if (j
== 0 && used_update
)
27370 /* XO/OO are opaque so cannot use subregs. */
27371 if (mode
== OOmode
|| mode
== XOmode
)
27373 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + j
);
27374 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + j
);
27375 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27378 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27379 j
* reg_mode_size
),
27380 simplify_gen_subreg (reg_mode
, src
, mode
,
27381 j
* reg_mode_size
)));
27384 /* If we are writing an accumulator register, we have to
27385 prime it after we've written it. */
27386 if (TARGET_MMA
&& REG_P (dst
)
27387 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27388 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27390 if (restore_basereg
!= NULL_RTX
)
27391 emit_insn (restore_basereg
);
27395 /* Return true if the peephole2 can combine a load involving a combination of
27396 an addis instruction and a load with an offset that can be fused together on
27400 fusion_gpr_load_p (rtx addis_reg
, /* register set via addis. */
27401 rtx addis_value
, /* addis value. */
27402 rtx target
, /* target register that is loaded. */
27403 rtx mem
) /* bottom part of the memory addr. */
27408 /* Validate arguments. */
27409 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
27412 if (!base_reg_operand (target
, GET_MODE (target
)))
27415 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
27418 /* Allow sign/zero extension. */
27419 if (GET_CODE (mem
) == ZERO_EXTEND
27420 || (GET_CODE (mem
) == SIGN_EXTEND
&& TARGET_P8_FUSION_SIGN
))
27421 mem
= XEXP (mem
, 0);
27426 if (!fusion_gpr_mem_load (mem
, GET_MODE (mem
)))
27429 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
27430 if (GET_CODE (addr
) != PLUS
&& GET_CODE (addr
) != LO_SUM
)
27433 /* Validate that the register used to load the high value is either the
27434 register being loaded, or we can safely replace its use.
27436 This function is only called from the peephole2 pass and we assume that
27437 there are 2 instructions in the peephole (addis and load), so we want to
27438 check if the target register was not used in the memory address and the
27439 register to hold the addis result is dead after the peephole. */
27440 if (REGNO (addis_reg
) != REGNO (target
))
27442 if (reg_mentioned_p (target
, mem
))
27445 if (!peep2_reg_dead_p (2, addis_reg
))
27448 /* If the target register being loaded is the stack pointer, we must
27449 avoid loading any other value into it, even temporarily. */
27450 if (REG_P (target
) && REGNO (target
) == STACK_POINTER_REGNUM
)
27454 base_reg
= XEXP (addr
, 0);
27455 return REGNO (addis_reg
) == REGNO (base_reg
);
27458 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27459 sequence. We adjust the addis register to use the target register. If the
27460 load sign extends, we adjust the code to do the zero extending load, and an
27461 explicit sign extension later since the fusion only covers zero extending
27465 operands[0] register set with addis (to be replaced with target)
27466 operands[1] value set via addis
27467 operands[2] target register being loaded
27468 operands[3] D-form memory reference using operands[0]. */
27471 expand_fusion_gpr_load (rtx
*operands
)
27473 rtx addis_value
= operands
[1];
27474 rtx target
= operands
[2];
27475 rtx orig_mem
= operands
[3];
27476 rtx new_addr
, new_mem
, orig_addr
, offset
;
27477 enum rtx_code plus_or_lo_sum
;
27478 machine_mode target_mode
= GET_MODE (target
);
27479 machine_mode extend_mode
= target_mode
;
27480 machine_mode ptr_mode
= Pmode
;
27481 enum rtx_code extend
= UNKNOWN
;
27483 if (GET_CODE (orig_mem
) == ZERO_EXTEND
27484 || (TARGET_P8_FUSION_SIGN
&& GET_CODE (orig_mem
) == SIGN_EXTEND
))
27486 extend
= GET_CODE (orig_mem
);
27487 orig_mem
= XEXP (orig_mem
, 0);
27488 target_mode
= GET_MODE (orig_mem
);
27491 gcc_assert (MEM_P (orig_mem
));
27493 orig_addr
= XEXP (orig_mem
, 0);
27494 plus_or_lo_sum
= GET_CODE (orig_addr
);
27495 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
27497 offset
= XEXP (orig_addr
, 1);
27498 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
27499 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
27501 if (extend
!= UNKNOWN
)
27502 new_mem
= gen_rtx_fmt_e (ZERO_EXTEND
, extend_mode
, new_mem
);
27504 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
27505 UNSPEC_FUSION_GPR
);
27506 emit_insn (gen_rtx_SET (target
, new_mem
));
27508 if (extend
== SIGN_EXTEND
)
27510 int sub_off
= ((BYTES_BIG_ENDIAN
)
27511 ? GET_MODE_SIZE (extend_mode
) - GET_MODE_SIZE (target_mode
)
27514 = simplify_subreg (target_mode
, target
, extend_mode
, sub_off
);
27516 emit_insn (gen_rtx_SET (target
,
27517 gen_rtx_SIGN_EXTEND (extend_mode
, sign_reg
)));
27523 /* Emit the addis instruction that will be part of a fused instruction
27527 emit_fusion_addis (rtx target
, rtx addis_value
)
27530 const char *addis_str
= NULL
;
27532 /* Emit the addis instruction. */
27533 fuse_ops
[0] = target
;
27534 if (satisfies_constraint_L (addis_value
))
27536 fuse_ops
[1] = addis_value
;
27537 addis_str
= "lis %0,%v1";
27540 else if (GET_CODE (addis_value
) == PLUS
)
27542 rtx op0
= XEXP (addis_value
, 0);
27543 rtx op1
= XEXP (addis_value
, 1);
27545 if (REG_P (op0
) && CONST_INT_P (op1
)
27546 && satisfies_constraint_L (op1
))
27550 addis_str
= "addis %0,%1,%v2";
27554 else if (GET_CODE (addis_value
) == HIGH
)
27556 rtx value
= XEXP (addis_value
, 0);
27557 if (GET_CODE (value
) == UNSPEC
&& XINT (value
, 1) == UNSPEC_TOCREL
)
27559 fuse_ops
[1] = XVECEXP (value
, 0, 0); /* symbol ref. */
27560 fuse_ops
[2] = XVECEXP (value
, 0, 1); /* TOC register. */
27562 addis_str
= "addis %0,%2,%1@toc@ha";
27564 else if (TARGET_XCOFF
)
27565 addis_str
= "addis %0,%1@u(%2)";
27568 gcc_unreachable ();
27571 else if (GET_CODE (value
) == PLUS
)
27573 rtx op0
= XEXP (value
, 0);
27574 rtx op1
= XEXP (value
, 1);
27576 if (GET_CODE (op0
) == UNSPEC
27577 && XINT (op0
, 1) == UNSPEC_TOCREL
27578 && CONST_INT_P (op1
))
27580 fuse_ops
[1] = XVECEXP (op0
, 0, 0); /* symbol ref. */
27581 fuse_ops
[2] = XVECEXP (op0
, 0, 1); /* TOC register. */
27584 addis_str
= "addis %0,%2,%1+%3@toc@ha";
27586 else if (TARGET_XCOFF
)
27587 addis_str
= "addis %0,%1+%3@u(%2)";
27590 gcc_unreachable ();
27594 else if (satisfies_constraint_L (value
))
27596 fuse_ops
[1] = value
;
27597 addis_str
= "lis %0,%v1";
27600 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (value
))
27602 fuse_ops
[1] = value
;
27603 addis_str
= "lis %0,%1@ha";
27608 fatal_insn ("Could not generate addis value for fusion", addis_value
);
27610 output_asm_insn (addis_str
, fuse_ops
);
27613 /* Emit a D-form load or store instruction that is the second instruction
27614 of a fusion sequence. */
27617 emit_fusion_load (rtx load_reg
, rtx addis_reg
, rtx offset
, const char *insn_str
)
27620 char insn_template
[80];
27622 fuse_ops
[0] = load_reg
;
27623 fuse_ops
[1] = addis_reg
;
27625 if (CONST_INT_P (offset
) && satisfies_constraint_I (offset
))
27627 sprintf (insn_template
, "%s %%0,%%2(%%1)", insn_str
);
27628 fuse_ops
[2] = offset
;
27629 output_asm_insn (insn_template
, fuse_ops
);
27632 else if (GET_CODE (offset
) == UNSPEC
27633 && XINT (offset
, 1) == UNSPEC_TOCREL
)
27636 sprintf (insn_template
, "%s %%0,%%2@toc@l(%%1)", insn_str
);
27638 else if (TARGET_XCOFF
)
27639 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
27642 gcc_unreachable ();
27644 fuse_ops
[2] = XVECEXP (offset
, 0, 0);
27645 output_asm_insn (insn_template
, fuse_ops
);
27648 else if (GET_CODE (offset
) == PLUS
27649 && GET_CODE (XEXP (offset
, 0)) == UNSPEC
27650 && XINT (XEXP (offset
, 0), 1) == UNSPEC_TOCREL
27651 && CONST_INT_P (XEXP (offset
, 1)))
27653 rtx tocrel_unspec
= XEXP (offset
, 0);
27655 sprintf (insn_template
, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str
);
27657 else if (TARGET_XCOFF
)
27658 sprintf (insn_template
, "%s %%0,%%2+%%3@l(%%1)", insn_str
);
27661 gcc_unreachable ();
27663 fuse_ops
[2] = XVECEXP (tocrel_unspec
, 0, 0);
27664 fuse_ops
[3] = XEXP (offset
, 1);
27665 output_asm_insn (insn_template
, fuse_ops
);
27668 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (offset
))
27670 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
27672 fuse_ops
[2] = offset
;
27673 output_asm_insn (insn_template
, fuse_ops
);
27677 fatal_insn ("Unable to generate load/store offset for fusion", offset
);
27682 /* Given an address, convert it into the addis and load offset parts. Addresses
27683 created during the peephole2 process look like:
27684 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
27685 (unspec [(...)] UNSPEC_TOCREL)) */
27688 fusion_split_address (rtx addr
, rtx
*p_hi
, rtx
*p_lo
)
27692 if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
27694 hi
= XEXP (addr
, 0);
27695 lo
= XEXP (addr
, 1);
27698 gcc_unreachable ();
27704 /* Return a string to fuse an addis instruction with a gpr load to the same
27705 register that we loaded up the addis instruction. The address that is used
27706 is the logical address that was formed during peephole2:
27707 (lo_sum (high) (low-part))
27709 The code is complicated, so we call output_asm_insn directly, and just
27713 emit_fusion_gpr_load (rtx target
, rtx mem
)
27718 const char *load_str
= NULL
;
27721 if (GET_CODE (mem
) == ZERO_EXTEND
)
27722 mem
= XEXP (mem
, 0);
27724 gcc_assert (REG_P (target
) && MEM_P (mem
));
27726 addr
= XEXP (mem
, 0);
27727 fusion_split_address (addr
, &addis_value
, &load_offset
);
27729 /* Now emit the load instruction to the same register. */
27730 mode
= GET_MODE (mem
);
27748 gcc_assert (TARGET_POWERPC64
);
27753 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target
, mem
));
27756 /* Emit the addis instruction. */
27757 emit_fusion_addis (target
, addis_value
);
27759 /* Emit the D-form load instruction. */
27760 emit_fusion_load (target
, target
, load_offset
, load_str
);
27765 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
27766 ignores it then. */
27767 static GTY(()) tree atomic_hold_decl
;
27768 static GTY(()) tree atomic_clear_decl
;
27769 static GTY(()) tree atomic_update_decl
;
27771 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
27773 rs6000_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
27775 if (!TARGET_HARD_FLOAT
)
27777 #ifdef RS6000_GLIBC_ATOMIC_FENV
27778 if (atomic_hold_decl
== NULL_TREE
)
27781 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
27782 get_identifier ("__atomic_feholdexcept"),
27783 build_function_type_list (void_type_node
,
27784 double_ptr_type_node
,
27786 TREE_PUBLIC (atomic_hold_decl
) = 1;
27787 DECL_EXTERNAL (atomic_hold_decl
) = 1;
27790 if (atomic_clear_decl
== NULL_TREE
)
27793 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
27794 get_identifier ("__atomic_feclearexcept"),
27795 build_function_type_list (void_type_node
,
27797 TREE_PUBLIC (atomic_clear_decl
) = 1;
27798 DECL_EXTERNAL (atomic_clear_decl
) = 1;
27801 tree const_double
= build_qualified_type (double_type_node
,
27803 tree const_double_ptr
= build_pointer_type (const_double
);
27804 if (atomic_update_decl
== NULL_TREE
)
27807 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
27808 get_identifier ("__atomic_feupdateenv"),
27809 build_function_type_list (void_type_node
,
27812 TREE_PUBLIC (atomic_update_decl
) = 1;
27813 DECL_EXTERNAL (atomic_update_decl
) = 1;
27816 tree fenv_var
= create_tmp_var_raw (double_type_node
);
27817 TREE_ADDRESSABLE (fenv_var
) = 1;
27818 tree fenv_addr
= build1 (ADDR_EXPR
, double_ptr_type_node
,
27819 build4 (TARGET_EXPR
, double_type_node
, fenv_var
,
27820 void_node
, NULL_TREE
, NULL_TREE
));
27822 *hold
= build_call_expr (atomic_hold_decl
, 1, fenv_addr
);
27823 *clear
= build_call_expr (atomic_clear_decl
, 0);
27824 *update
= build_call_expr (atomic_update_decl
, 1,
27825 fold_convert (const_double_ptr
, fenv_addr
));
27830 tree mffs
= rs6000_builtin_decls
[RS6000_BIF_MFFS
];
27831 tree mtfsf
= rs6000_builtin_decls
[RS6000_BIF_MTFSF
];
27832 tree call_mffs
= build_call_expr (mffs
, 0);
27834 /* Generates the equivalent of feholdexcept (&fenv_var)
27836 *fenv_var = __builtin_mffs ();
27838 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
27839 __builtin_mtfsf (0xff, fenv_hold); */
27841 /* Mask to clear everything except for the rounding modes and non-IEEE
27842 arithmetic flag. */
27843 const unsigned HOST_WIDE_INT hold_exception_mask
27844 = HOST_WIDE_INT_C (0xffffffff00000007);
27846 tree fenv_var
= create_tmp_var_raw (double_type_node
);
27848 tree hold_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_var
, call_mffs
,
27849 NULL_TREE
, NULL_TREE
);
27851 tree fenv_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_var
);
27852 tree fenv_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
27853 build_int_cst (uint64_type_node
,
27854 hold_exception_mask
));
27856 tree fenv_hold_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
27859 tree hold_mtfsf
= build_call_expr (mtfsf
, 2,
27860 build_int_cst (unsigned_type_node
, 0xff),
27863 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_mffs
, hold_mtfsf
);
27865 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
27867 double fenv_clear = __builtin_mffs ();
27868 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
27869 __builtin_mtfsf (0xff, fenv_clear); */
27871 /* Mask to clear everything except for the rounding modes and non-IEEE
27872 arithmetic flag. */
27873 const unsigned HOST_WIDE_INT clear_exception_mask
27874 = HOST_WIDE_INT_C (0xffffffff00000000);
27876 tree fenv_clear
= create_tmp_var_raw (double_type_node
);
27878 tree clear_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_clear
,
27879 call_mffs
, NULL_TREE
, NULL_TREE
);
27881 tree fenv_clean_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_clear
);
27882 tree fenv_clear_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
,
27884 build_int_cst (uint64_type_node
,
27885 clear_exception_mask
));
27887 tree fenv_clear_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
27888 fenv_clear_llu_and
);
27890 tree clear_mtfsf
= build_call_expr (mtfsf
, 2,
27891 build_int_cst (unsigned_type_node
, 0xff),
27894 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, clear_mffs
, clear_mtfsf
);
27896 /* Generates the equivalent of feupdateenv (&fenv_var)
27898 double old_fenv = __builtin_mffs ();
27899 double fenv_update;
27900 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
27901 (*(uint64_t*)fenv_var 0x1ff80fff);
27902 __builtin_mtfsf (0xff, fenv_update); */
27904 const unsigned HOST_WIDE_INT update_exception_mask
27905 = HOST_WIDE_INT_C (0xffffffff1fffff00);
27906 const unsigned HOST_WIDE_INT new_exception_mask
27907 = HOST_WIDE_INT_C (0x1ff80fff);
27909 tree old_fenv
= create_tmp_var_raw (double_type_node
);
27910 tree update_mffs
= build4 (TARGET_EXPR
, double_type_node
, old_fenv
,
27911 call_mffs
, NULL_TREE
, NULL_TREE
);
27913 tree old_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, old_fenv
);
27914 tree old_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, old_llu
,
27915 build_int_cst (uint64_type_node
,
27916 update_exception_mask
));
27918 tree new_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
27919 build_int_cst (uint64_type_node
,
27920 new_exception_mask
));
27922 tree new_llu_mask
= build2 (BIT_IOR_EXPR
, uint64_type_node
,
27923 old_llu_and
, new_llu_and
);
27925 tree fenv_update_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
27928 tree update_mtfsf
= build_call_expr (mtfsf
, 2,
27929 build_int_cst (unsigned_type_node
, 0xff),
27930 fenv_update_mtfsf
);
27932 *update
= build2 (COMPOUND_EXPR
, void_type_node
, update_mffs
, update_mtfsf
);
27936 rs6000_generate_float2_double_code (rtx dst
, rtx src1
, rtx src2
)
27938 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
27940 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
27941 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
27943 /* The destination of the vmrgew instruction layout is:
27944 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27945 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27946 vmrgew instruction will be correct. */
27947 if (BYTES_BIG_ENDIAN
)
27949 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0
, src1
, src2
,
27951 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1
, src1
, src2
,
27956 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
27957 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
27960 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
27961 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
27963 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2
, rtx_tmp0
));
27964 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3
, rtx_tmp1
));
27966 if (BYTES_BIG_ENDIAN
)
27967 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
27969 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
27973 rs6000_generate_float2_code (bool signed_convert
, rtx dst
, rtx src1
, rtx src2
)
27975 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
27977 rtx_tmp0
= gen_reg_rtx (V2DImode
);
27978 rtx_tmp1
= gen_reg_rtx (V2DImode
);
27980 /* The destination of the vmrgew instruction layout is:
27981 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27982 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27983 vmrgew instruction will be correct. */
27984 if (BYTES_BIG_ENDIAN
)
27986 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
27987 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
27991 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
27992 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
27995 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
27996 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
27998 if (signed_convert
)
28000 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2
, rtx_tmp0
));
28001 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3
, rtx_tmp1
));
28005 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2
, rtx_tmp0
));
28006 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3
, rtx_tmp1
));
28009 if (BYTES_BIG_ENDIAN
)
28010 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
28012 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
28016 rs6000_generate_vsigned2_code (bool signed_convert
, rtx dst
, rtx src1
,
28019 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28021 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
28022 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
28024 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
28025 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
28027 rtx_tmp2
= gen_reg_rtx (V4SImode
);
28028 rtx_tmp3
= gen_reg_rtx (V4SImode
);
28030 if (signed_convert
)
28032 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2
, rtx_tmp0
));
28033 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3
, rtx_tmp1
));
28037 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2
, rtx_tmp0
));
28038 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3
, rtx_tmp1
));
28041 emit_insn (gen_p8_vmrgew_v4si (dst
, rtx_tmp2
, rtx_tmp3
));
28044 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28047 rs6000_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
28048 optimization_type opt_type
)
28053 return (opt_type
== OPTIMIZE_FOR_SPEED
28054 && RS6000_RECIP_AUTO_RSQRTE_P (mode1
));
28061 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28063 static HOST_WIDE_INT
28064 rs6000_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
28066 if (TREE_CODE (exp
) == STRING_CST
28067 && (STRICT_ALIGNMENT
|| !optimize_size
))
28068 return MAX (align
, BITS_PER_WORD
);
28072 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28074 static HOST_WIDE_INT
28075 rs6000_starting_frame_offset (void)
28077 if (FRAME_GROWS_DOWNWARD
)
28079 return RS6000_STARTING_FRAME_OFFSET
;
28083 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28084 function names from <foo>l to <foo>f128 if the default long double type is
28085 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28086 include file switches the names on systems that support long double as IEEE
28087 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28088 In the future, glibc will export names like __ieee128_sinf128 and we can
28089 switch to using those instead of using sinf128, which pollutes the user's
28092 This will switch the names for Fortran math functions as well (which doesn't
28093 use math.h). However, Fortran needs other changes to the compiler and
28094 library before you can switch the real*16 type at compile time.
28096 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28097 only do this transformation if the __float128 type is enabled. This
28098 prevents us from doing the transformation on older 32-bit ports that might
28099 have enabled using IEEE 128-bit floating point as the default long double
28103 rs6000_mangle_decl_assembler_name (tree decl
, tree id
)
28105 if (TARGET_FLOAT128_TYPE
&& TARGET_IEEEQUAD
&& TARGET_LONG_DOUBLE_128
28106 && TREE_CODE (decl
) == FUNCTION_DECL
28107 && DECL_IS_UNDECLARED_BUILTIN (decl
)
28108 && DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
28110 size_t len
= IDENTIFIER_LENGTH (id
);
28111 const char *name
= IDENTIFIER_POINTER (id
);
28112 char *newname
= NULL
;
28114 /* See if it is one of the built-in functions with an unusual name. */
28115 switch (DECL_FUNCTION_CODE (decl
))
28117 case BUILT_IN_DREML
:
28118 newname
= xstrdup ("__remainderieee128");
28121 case BUILT_IN_GAMMAL
:
28122 newname
= xstrdup ("__lgammaieee128");
28125 case BUILT_IN_GAMMAL_R
:
28126 case BUILT_IN_LGAMMAL_R
:
28127 newname
= xstrdup ("__lgammaieee128_r");
28130 case BUILT_IN_NEXTTOWARD
:
28131 newname
= xstrdup ("__nexttoward_to_ieee128");
28134 case BUILT_IN_NEXTTOWARDF
:
28135 newname
= xstrdup ("__nexttowardf_to_ieee128");
28138 case BUILT_IN_NEXTTOWARDL
:
28139 newname
= xstrdup ("__nexttowardieee128");
28142 case BUILT_IN_POW10L
:
28143 newname
= xstrdup ("__exp10ieee128");
28146 case BUILT_IN_SCALBL
:
28147 newname
= xstrdup ("__scalbieee128");
28150 case BUILT_IN_SIGNIFICANDL
:
28151 newname
= xstrdup ("__significandieee128");
28154 case BUILT_IN_SINCOSL
:
28155 newname
= xstrdup ("__sincosieee128");
28162 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28165 size_t printf_len
= strlen ("printf");
28166 size_t scanf_len
= strlen ("scanf");
28167 size_t printf_chk_len
= strlen ("printf_chk");
28169 if (len
>= printf_len
28170 && strcmp (name
+ len
- printf_len
, "printf") == 0)
28171 newname
= xasprintf ("__%sieee128", name
);
28173 else if (len
>= scanf_len
28174 && strcmp (name
+ len
- scanf_len
, "scanf") == 0)
28175 newname
= xasprintf ("__isoc99_%sieee128", name
);
28177 else if (len
>= printf_chk_len
28178 && strcmp (name
+ len
- printf_chk_len
, "printf_chk") == 0)
28179 newname
= xasprintf ("%sieee128", name
);
28181 else if (name
[len
- 1] == 'l')
28183 bool uses_ieee128_p
= false;
28184 tree type
= TREE_TYPE (decl
);
28185 machine_mode ret_mode
= TYPE_MODE (type
);
28187 /* See if the function returns a IEEE 128-bit floating point type or
28189 if (ret_mode
== TFmode
|| ret_mode
== TCmode
)
28190 uses_ieee128_p
= true;
28193 function_args_iterator args_iter
;
28196 /* See if the function passes a IEEE 128-bit floating point type
28197 or complex type. */
28198 FOREACH_FUNCTION_ARGS (type
, arg
, args_iter
)
28200 machine_mode arg_mode
= TYPE_MODE (arg
);
28201 if (arg_mode
== TFmode
|| arg_mode
== TCmode
)
28203 uses_ieee128_p
= true;
28209 /* If we passed or returned an IEEE 128-bit floating point type,
28210 change the name. Use __<name>ieee128, instead of <name>l. */
28211 if (uses_ieee128_p
)
28212 newname
= xasprintf ("__%.*sieee128", (int)(len
- 1), name
);
28218 if (TARGET_DEBUG_BUILTIN
)
28219 fprintf (stderr
, "Map %s => %s\n", name
, newname
);
28221 id
= get_identifier (newname
);
28229 /* Predict whether the given loop in gimple will be transformed in the RTL
28230 doloop_optimize pass. */
28233 rs6000_predict_doloop_p (struct loop
*loop
)
28237 /* On rs6000, targetm.can_use_doloop_p is actually
28238 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28239 if (loop
->inner
!= NULL
)
28241 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
28242 fprintf (dump_file
, "Predict doloop failure due to"
28243 " loop nesting.\n");
28250 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28252 static machine_mode
28253 rs6000_preferred_doloop_mode (machine_mode
)
28258 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28261 rs6000_cannot_substitute_mem_equiv_p (rtx mem
)
28263 gcc_assert (MEM_P (mem
));
28265 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28266 type addresses, so don't allow MEMs with those address types to be
28267 substituted as an equivalent expression. See PR93974 for details. */
28268 if (GET_CODE (XEXP (mem
, 0)) == AND
)
28274 /* Implement TARGET_INVALID_CONVERSION. */
28276 static const char *
28277 rs6000_invalid_conversion (const_tree fromtype
, const_tree totype
)
28279 /* Make sure we're working with the canonical types. */
28280 if (TYPE_CANONICAL (fromtype
) != NULL_TREE
)
28281 fromtype
= TYPE_CANONICAL (fromtype
);
28282 if (TYPE_CANONICAL (totype
) != NULL_TREE
)
28283 totype
= TYPE_CANONICAL (totype
);
28285 machine_mode frommode
= TYPE_MODE (fromtype
);
28286 machine_mode tomode
= TYPE_MODE (totype
);
28288 if (frommode
!= tomode
)
28290 /* Do not allow conversions to/from XOmode and OOmode types. */
28291 if (frommode
== XOmode
)
28292 return N_("invalid conversion from type %<__vector_quad%>");
28293 if (tomode
== XOmode
)
28294 return N_("invalid conversion to type %<__vector_quad%>");
28295 if (frommode
== OOmode
)
28296 return N_("invalid conversion from type %<__vector_pair%>");
28297 if (tomode
== OOmode
)
28298 return N_("invalid conversion to type %<__vector_pair%>");
28300 else if (POINTER_TYPE_P (fromtype
) && POINTER_TYPE_P (totype
))
28302 /* We really care about the modes of the base types. */
28303 frommode
= TYPE_MODE (TREE_TYPE (fromtype
));
28304 tomode
= TYPE_MODE (TREE_TYPE (totype
));
28306 /* Do not allow conversions to/from XOmode and OOmode pointer
28307 types, except to/from void pointers. */
28308 if (frommode
!= tomode
28309 && frommode
!= VOIDmode
28310 && tomode
!= VOIDmode
)
28312 if (frommode
== XOmode
)
28313 return N_("invalid conversion from type %<__vector_quad *%>");
28314 if (tomode
== XOmode
)
28315 return N_("invalid conversion to type %<__vector_quad *%>");
28316 if (frommode
== OOmode
)
28317 return N_("invalid conversion from type %<__vector_pair *%>");
28318 if (tomode
== OOmode
)
28319 return N_("invalid conversion to type %<__vector_pair *%>");
28323 /* Conversion allowed. */
28327 /* Convert a SFmode constant to the integer bit pattern. */
28330 rs6000_const_f32_to_i32 (rtx operand
)
28333 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (operand
);
28335 gcc_assert (GET_MODE (operand
) == SFmode
);
28336 REAL_VALUE_TO_TARGET_SINGLE (*rv
, value
);
28341 rs6000_emit_xxspltidp_v2df (rtx dst
, long value
)
28343 if (((value
& 0x7F800000) == 0) && ((value
& 0x7FFFFF) != 0))
28344 inform (input_location
,
28345 "the result for the xxspltidp instruction "
28346 "is undefined for subnormal input values");
28347 emit_insn( gen_xxspltidp_v2df_inst (dst
, GEN_INT (value
)));
28350 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28353 rs6000_gen_pic_addr_diff_vec (void)
28355 return rs6000_relative_jumptables
;
28359 rs6000_output_addr_vec_elt (FILE *file
, int value
)
28361 const char *directive
= TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t";
28364 fprintf (file
, "%s", directive
);
28365 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", value
);
28366 assemble_name (file
, buf
);
28367 fprintf (file
, "\n");
28371 /* Copy an integer constant to the vector constant structure. */
28374 constant_int_to_128bit_vector (rtx op
,
28377 vec_const_128bit_type
*info
)
28379 unsigned HOST_WIDE_INT uvalue
= UINTVAL (op
);
28380 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28382 for (int shift
= bitsize
- 8; shift
>= 0; shift
-= 8)
28383 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28386 /* Copy a floating point constant to the vector constant structure. */
28389 constant_fp_to_128bit_vector (rtx op
,
28392 vec_const_128bit_type
*info
)
28394 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28395 unsigned num_words
= bitsize
/ 32;
28396 const REAL_VALUE_TYPE
*rtype
= CONST_DOUBLE_REAL_VALUE (op
);
28397 long real_words
[VECTOR_128BIT_WORDS
];
28399 /* Make sure we don't overflow the real_words array and that it is
28400 filled completely. */
28401 gcc_assert (num_words
<= VECTOR_128BIT_WORDS
&& (bitsize
% 32) == 0);
28403 real_to_target (real_words
, rtype
, mode
);
28405 /* Iterate over each 32-bit word in the floating point constant. The
28406 real_to_target function puts out words in target endian fashion. We need
28407 to arrange the order so that the bytes are written in big endian order. */
28408 for (unsigned num
= 0; num
< num_words
; num
++)
28410 unsigned endian_num
= (BYTES_BIG_ENDIAN
28412 : num_words
- 1 - num
);
28414 unsigned uvalue
= real_words
[endian_num
];
28415 for (int shift
= 32 - 8; shift
>= 0; shift
-= 8)
28416 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28419 /* Mark that this constant involves floating point. */
28420 info
->fp_constant_p
= true;
28423 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28426 Break out the constant out to bytes, half words, words, and double words.
28427 Return true if we have successfully converted the constant.
28429 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28430 constants. Integer and floating point scalar constants are splatted to fill
28434 vec_const_128bit_to_bytes (rtx op
,
28436 vec_const_128bit_type
*info
)
28438 /* Initialize the constant structure. */
28439 memset ((void *)info
, 0, sizeof (vec_const_128bit_type
));
28441 /* Assume CONST_INTs are DImode. */
28442 if (mode
== VOIDmode
)
28443 mode
= CONST_INT_P (op
) ? DImode
: GET_MODE (op
);
28445 if (mode
== VOIDmode
)
28448 unsigned size
= GET_MODE_SIZE (mode
);
28449 bool splat_p
= false;
28451 if (size
> VECTOR_128BIT_BYTES
)
28454 /* Set up the bits. */
28455 switch (GET_CODE (op
))
28457 /* Integer constants, default to double word. */
28460 constant_int_to_128bit_vector (op
, mode
, 0, info
);
28465 /* Floating point constants. */
28468 /* Fail if the floating point constant is the wrong mode. */
28469 if (GET_MODE (op
) != mode
)
28472 /* SFmode stored as scalars are stored in DFmode format. */
28473 if (mode
== SFmode
)
28476 size
= GET_MODE_SIZE (DFmode
);
28479 constant_fp_to_128bit_vector (op
, mode
, 0, info
);
28484 /* Vector constants, iterate over each element. On little endian
28485 systems, we have to reverse the element numbers. */
28488 /* Fail if the vector constant is the wrong mode or size. */
28489 if (GET_MODE (op
) != mode
28490 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
28493 machine_mode ele_mode
= GET_MODE_INNER (mode
);
28494 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
28495 size_t nunits
= GET_MODE_NUNITS (mode
);
28497 for (size_t num
= 0; num
< nunits
; num
++)
28499 rtx ele
= CONST_VECTOR_ELT (op
, num
);
28500 size_t byte_num
= (BYTES_BIG_ENDIAN
28502 : nunits
- 1 - num
) * ele_size
;
28504 if (CONST_INT_P (ele
))
28505 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28506 else if (CONST_DOUBLE_P (ele
))
28507 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28515 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28516 Since we are duplicating the element, we don't have to worry about
28518 case VEC_DUPLICATE
:
28520 /* Fail if the vector duplicate is the wrong mode or size. */
28521 if (GET_MODE (op
) != mode
28522 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
28525 machine_mode ele_mode
= GET_MODE_INNER (mode
);
28526 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
28527 rtx ele
= XEXP (op
, 0);
28528 size_t nunits
= GET_MODE_NUNITS (mode
);
28530 if (!CONST_INT_P (ele
) && !CONST_DOUBLE_P (ele
))
28533 for (size_t num
= 0; num
< nunits
; num
++)
28535 size_t byte_num
= num
* ele_size
;
28537 if (CONST_INT_P (ele
))
28538 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28540 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28546 /* Any thing else, just return failure. */
28551 /* Splat the constant to fill 128 bits if desired. */
28552 if (splat_p
&& size
< VECTOR_128BIT_BYTES
)
28554 if ((VECTOR_128BIT_BYTES
% size
) != 0)
28557 for (size_t offset
= size
;
28558 offset
< VECTOR_128BIT_BYTES
;
28560 memcpy ((void *) &info
->bytes
[offset
],
28561 (void *) &info
->bytes
[0],
28565 /* Remember original size. */
28566 info
->original_size
= size
;
28568 /* Determine if the bytes are all the same. */
28569 unsigned char first_byte
= info
->bytes
[0];
28570 info
->all_bytes_same
= true;
28571 for (size_t i
= 1; i
< VECTOR_128BIT_BYTES
; i
++)
28572 if (first_byte
!= info
->bytes
[i
])
28574 info
->all_bytes_same
= false;
28578 /* Pack half words together & determine if all of the half words are the
28580 for (size_t i
= 0; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
28581 info
->half_words
[i
] = ((info
->bytes
[i
* 2] << 8)
28582 | info
->bytes
[(i
* 2) + 1]);
28584 unsigned short first_hword
= info
->half_words
[0];
28585 info
->all_half_words_same
= true;
28586 for (size_t i
= 1; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
28587 if (first_hword
!= info
->half_words
[i
])
28589 info
->all_half_words_same
= false;
28593 /* Pack words together & determine if all of the words are the same. */
28594 for (size_t i
= 0; i
< VECTOR_128BIT_WORDS
; i
++)
28595 info
->words
[i
] = ((info
->bytes
[i
* 4] << 24)
28596 | (info
->bytes
[(i
* 4) + 1] << 16)
28597 | (info
->bytes
[(i
* 4) + 2] << 8)
28598 | info
->bytes
[(i
* 4) + 3]);
28600 info
->all_words_same
28601 = (info
->words
[0] == info
->words
[1]
28602 && info
->words
[0] == info
->words
[1]
28603 && info
->words
[0] == info
->words
[2]
28604 && info
->words
[0] == info
->words
[3]);
28606 /* Pack double words together & determine if all of the double words are the
28608 for (size_t i
= 0; i
< VECTOR_128BIT_DOUBLE_WORDS
; i
++)
28610 unsigned HOST_WIDE_INT d_word
= 0;
28611 for (size_t j
= 0; j
< 8; j
++)
28612 d_word
= (d_word
<< 8) | info
->bytes
[(i
* 8) + j
];
28614 info
->double_words
[i
] = d_word
;
28617 info
->all_double_words_same
28618 = (info
->double_words
[0] == info
->double_words
[1]);
28623 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
28624 if the LXVKQ instruction cannot be used. Otherwise return the immediate
28625 value to be used with the LXVKQ instruction. */
28628 constant_generates_lxvkq (vec_const_128bit_type
*vsx_const
)
28630 /* Is the instruction supported with power10 code generation, IEEE 128-bit
28631 floating point hardware and VSX registers are available. */
28632 if (!TARGET_IEEE128_CONSTANT
|| !TARGET_FLOAT128_HW
|| !TARGET_POWER10
28636 /* All of the constants that are generated by LXVKQ have the bottom 3 words
28638 if (vsx_const
->words
[1] != 0
28639 || vsx_const
->words
[2] != 0
28640 || vsx_const
->words
[3] != 0)
28643 /* See if we have a match for the first word. */
28644 switch (vsx_const
->words
[0])
28646 case 0x3FFF0000U
: return 1; /* IEEE 128-bit +1.0. */
28647 case 0x40000000U
: return 2; /* IEEE 128-bit +2.0. */
28648 case 0x40008000U
: return 3; /* IEEE 128-bit +3.0. */
28649 case 0x40010000U
: return 4; /* IEEE 128-bit +4.0. */
28650 case 0x40014000U
: return 5; /* IEEE 128-bit +5.0. */
28651 case 0x40018000U
: return 6; /* IEEE 128-bit +6.0. */
28652 case 0x4001C000U
: return 7; /* IEEE 128-bit +7.0. */
28653 case 0x7FFF0000U
: return 8; /* IEEE 128-bit +Infinity. */
28654 case 0x7FFF8000U
: return 9; /* IEEE 128-bit quiet NaN. */
28655 case 0x80000000U
: return 16; /* IEEE 128-bit -0.0. */
28656 case 0xBFFF0000U
: return 17; /* IEEE 128-bit -1.0. */
28657 case 0xC0000000U
: return 18; /* IEEE 128-bit -2.0. */
28658 case 0xC0008000U
: return 19; /* IEEE 128-bit -3.0. */
28659 case 0xC0010000U
: return 20; /* IEEE 128-bit -4.0. */
28660 case 0xC0014000U
: return 21; /* IEEE 128-bit -5.0. */
28661 case 0xC0018000U
: return 22; /* IEEE 128-bit -6.0. */
28662 case 0xC001C000U
: return 23; /* IEEE 128-bit -7.0. */
28663 case 0xFFFF0000U
: return 24; /* IEEE 128-bit -Infinity. */
28665 /* anything else cannot be loaded. */
28673 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
28674 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
28675 value to be used with the XXSPLTIW instruction. */
28678 constant_generates_xxspltiw (vec_const_128bit_type
*vsx_const
)
28680 if (!TARGET_SPLAT_WORD_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
28683 if (!vsx_const
->all_words_same
)
28686 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
28687 if (vsx_const
->all_bytes_same
)
28690 /* See if we can use VSPLTISH or VSPLTISW. */
28691 if (vsx_const
->all_half_words_same
)
28693 unsigned short h_word
= vsx_const
->half_words
[0];
28694 short sign_h_word
= ((h_word
& 0xffff) ^ 0x8000) - 0x8000;
28695 if (EASY_VECTOR_15 (sign_h_word
))
28699 unsigned int word
= vsx_const
->words
[0];
28700 int sign_word
= ((word
& 0xffffffff) ^ 0x80000000) - 0x80000000;
28701 if (EASY_VECTOR_15 (sign_word
))
28704 return vsx_const
->words
[0];
28707 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
28708 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
28709 value to be used with the XXSPLTIDP instruction. */
28712 constant_generates_xxspltidp (vec_const_128bit_type
*vsx_const
)
28714 if (!TARGET_SPLAT_FLOAT_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
28717 /* Reject if the two 64-bit segments are not the same. */
28718 if (!vsx_const
->all_double_words_same
)
28721 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
28722 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
28723 if (vsx_const
->all_bytes_same
28724 || vsx_const
->all_half_words_same
28725 || vsx_const
->all_words_same
)
28728 unsigned HOST_WIDE_INT value
= vsx_const
->double_words
[0];
28730 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
28731 pattern and the signalling NaN bit pattern. Recognize infinity and
28732 negative infinity. */
28734 /* Bit representation of DFmode normal quiet NaN. */
28735 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
28737 /* Bit representation of DFmode normal signaling NaN. */
28738 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
28740 /* Bit representation of DFmode positive infinity. */
28741 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
28743 /* Bit representation of DFmode negative infinity. */
28744 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
28746 if (value
!= RS6000_CONST_DF_NAN
28747 && value
!= RS6000_CONST_DF_NANS
28748 && value
!= RS6000_CONST_DF_INF
28749 && value
!= RS6000_CONST_DF_NEG_INF
)
28751 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
28752 the exponent, and 52 bits for the mantissa (not counting the hidden
28753 bit used for normal numbers). NaN values have the exponent set to all
28754 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
28756 int df_exponent
= (value
>> 52) & 0x7ff;
28757 unsigned HOST_WIDE_INT
28758 df_mantissa
= value
& ((HOST_WIDE_INT_1U
<< 52) - HOST_WIDE_INT_1U
);
28760 if (df_exponent
== 0x7ff && df_mantissa
!= 0) /* other NaNs. */
28763 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
28764 the exponent all 0 bits, and the mantissa non-zero. If the value is
28765 subnormal, then the hidden bit in the mantissa is not set. */
28766 if (df_exponent
== 0 && df_mantissa
!= 0) /* subnormal. */
28770 /* Change the representation to DFmode constant. */
28771 long df_words
[2] = { vsx_const
->words
[0], vsx_const
->words
[1] };
28773 /* real_from_target takes the target words in target order. */
28774 if (!BYTES_BIG_ENDIAN
)
28775 std::swap (df_words
[0], df_words
[1]);
28777 REAL_VALUE_TYPE rv_type
;
28778 real_from_target (&rv_type
, df_words
, DFmode
);
28780 const REAL_VALUE_TYPE
*rv
= &rv_type
;
28782 /* Validate that the number can be stored as a SFmode value. */
28783 if (!exact_real_truncate (SFmode
, rv
))
28786 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
28787 mantissa field is non-zero) which is undefined for the XXSPLTIDP
28790 real_to_target (&sf_value
, rv
, SFmode
);
28792 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
28793 and 23 bits for the mantissa. Subnormal numbers have the exponent all
28794 0 bits, and the mantissa non-zero. */
28795 long sf_exponent
= (sf_value
>> 23) & 0xFF;
28796 long sf_mantissa
= sf_value
& 0x7FFFFF;
28798 if (sf_exponent
== 0 && sf_mantissa
!= 0)
28801 /* Return the immediate to be used. */
28806 struct gcc_target targetm
= TARGET_INITIALIZER
;
28808 #include "gt-rs6000.h"