1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2024 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
26 #include "coretypes.h"
36 #include "stringpool.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
47 #include "fold-const.h"
49 #include "stor-layout.h"
51 #include "print-tree.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
59 #include "sched-int.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
71 #include "tree-vector-builder.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
78 #include "ipa-fnsummary.h"
80 #include "case-cfn-macros.h"
82 #include "rs6000-internal.h"
85 /* This file should be included last. */
86 #include "target-def.h"
88 extern tree
rs6000_builtin_mask_for_load (void);
89 extern tree
rs6000_builtin_md_vectorized_function (tree
, tree
, tree
);
90 extern tree
rs6000_builtin_reciprocal (tree
);
92 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
93 systems will also set long double to be IEEE 128-bit. AIX and Darwin
94 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
95 those systems will not pick up this default. This needs to be after all
96 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
98 #ifndef TARGET_IEEEQUAD_DEFAULT
99 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
100 #define TARGET_IEEEQUAD_DEFAULT 1
102 #define TARGET_IEEEQUAD_DEFAULT 0
106 /* Don't enable PC-relative addressing if the target does not support it. */
107 #ifndef PCREL_SUPPORTED_BY_OS
108 #define PCREL_SUPPORTED_BY_OS 0
112 /* Counter for labels which are to be placed in .fixup. */
113 int fixuplabelno
= 0;
116 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
119 /* Specify the machine mode that pointers have. After generation of rtl, the
120 compiler makes no further distinction between pointers and any other objects
121 of this machine mode. */
122 scalar_int_mode rs6000_pmode
;
124 /* Track use of r13 in 64bit AIX TLS. */
125 static bool xcoff_tls_exec_model_detected
= false;
127 /* Width in bits of a pointer. */
128 unsigned rs6000_pointer_size
;
130 #ifdef HAVE_AS_GNU_ATTRIBUTE
131 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
132 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
134 /* Flag whether floating point values have been passed/returned.
135 Note that this doesn't say whether fprs are used, since the
136 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
137 should be set for soft-float values passed in gprs and ieee128
138 values passed in vsx registers. */
139 bool rs6000_passes_float
= false;
140 bool rs6000_passes_long_double
= false;
141 /* Flag whether vector values have been passed/returned. */
142 bool rs6000_passes_vector
= false;
143 /* Flag whether small (<= 8 byte) structures have been returned. */
144 bool rs6000_returns_struct
= false;
147 /* Value is TRUE if register/mode pair is acceptable. */
148 static bool rs6000_hard_regno_mode_ok_p
149 [NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
151 /* Maximum number of registers needed for a given register class and mode. */
152 unsigned char rs6000_class_max_nregs
[NUM_MACHINE_MODES
][LIM_REG_CLASSES
];
154 /* How many registers are needed for a given register and mode. */
155 unsigned char rs6000_hard_regno_nregs
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
157 /* Map register number to register class. */
158 enum reg_class rs6000_regno_regclass
[FIRST_PSEUDO_REGISTER
];
160 static int dbg_cost_ctrl
;
162 /* Flag to say the TOC is initialized */
163 int toc_initialized
, need_toc_init
;
164 char toc_label_name
[10];
166 /* Cached value of rs6000_variable_issue. This is cached in
167 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
168 static short cached_can_issue_more
;
170 static GTY(()) section
*read_only_data_section
;
171 static GTY(()) section
*private_data_section
;
172 static GTY(()) section
*tls_data_section
;
173 static GTY(()) section
*tls_private_data_section
;
174 static GTY(()) section
*read_only_private_data_section
;
175 static GTY(()) section
*sdata2_section
;
177 section
*toc_section
= 0;
179 /* Describe the vector unit used for modes. */
180 enum rs6000_vector rs6000_vector_unit
[NUM_MACHINE_MODES
];
181 enum rs6000_vector rs6000_vector_mem
[NUM_MACHINE_MODES
];
183 /* Register classes for various constraints that are based on the target
185 enum reg_class rs6000_constraints
[RS6000_CONSTRAINT_MAX
];
187 /* Describe the alignment of a vector. */
188 int rs6000_vector_align
[NUM_MACHINE_MODES
];
190 /* What modes to automatically generate reciprocal divide estimate (fre) and
191 reciprocal sqrt (frsqrte) for. */
192 unsigned char rs6000_recip_bits
[MAX_MACHINE_MODE
];
194 /* Masks to determine which reciprocal esitmate instructions to generate
196 enum rs6000_recip_mask
{
197 RECIP_SF_DIV
= 0x001, /* Use divide estimate */
198 RECIP_DF_DIV
= 0x002,
199 RECIP_V4SF_DIV
= 0x004,
200 RECIP_V2DF_DIV
= 0x008,
202 RECIP_SF_RSQRT
= 0x010, /* Use reciprocal sqrt estimate. */
203 RECIP_DF_RSQRT
= 0x020,
204 RECIP_V4SF_RSQRT
= 0x040,
205 RECIP_V2DF_RSQRT
= 0x080,
207 /* Various combination of flags for -mrecip=xxx. */
209 RECIP_ALL
= (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
210 | RECIP_V2DF_DIV
| RECIP_SF_RSQRT
| RECIP_DF_RSQRT
211 | RECIP_V4SF_RSQRT
| RECIP_V2DF_RSQRT
),
213 RECIP_HIGH_PRECISION
= RECIP_ALL
,
215 /* On low precision machines like the power5, don't enable double precision
216 reciprocal square root estimate, since it isn't accurate enough. */
217 RECIP_LOW_PRECISION
= (RECIP_ALL
& ~(RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
))
220 /* -mrecip options. */
223 const char *string
; /* option name */
224 unsigned int mask
; /* mask bits to set */
225 } recip_options
[] = {
226 { "all", RECIP_ALL
},
227 { "none", RECIP_NONE
},
228 { "div", (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
230 { "divf", (RECIP_SF_DIV
| RECIP_V4SF_DIV
) },
231 { "divd", (RECIP_DF_DIV
| RECIP_V2DF_DIV
) },
232 { "rsqrt", (RECIP_SF_RSQRT
| RECIP_DF_RSQRT
| RECIP_V4SF_RSQRT
233 | RECIP_V2DF_RSQRT
) },
234 { "rsqrtf", (RECIP_SF_RSQRT
| RECIP_V4SF_RSQRT
) },
235 { "rsqrtd", (RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
) },
238 /* On PowerPC, we have a limited number of target clones that we care about
239 which means we can use an array to hold the options, rather than having more
240 elaborate data structures to identify each possible variation. Order the
241 clones from the default to the highest ISA. */
243 CLONE_DEFAULT
= 0, /* default clone. */
244 CLONE_ISA_2_05
, /* ISA 2.05 (power6). */
245 CLONE_ISA_2_06
, /* ISA 2.06 (power7). */
246 CLONE_ISA_2_07
, /* ISA 2.07 (power8). */
247 CLONE_ISA_3_00
, /* ISA 3.0 (power9). */
248 CLONE_ISA_3_1
, /* ISA 3.1 (power10). */
252 /* Map compiler ISA bits into HWCAP names. */
254 HOST_WIDE_INT isa_mask
; /* rs6000_isa mask */
255 const char *name
; /* name to use in __builtin_cpu_supports. */
258 static const struct clone_map rs6000_clone_map
[CLONE_MAX
] = {
259 { 0, "" }, /* Default options. */
260 { OPTION_MASK_CMPB
, "arch_2_05" }, /* ISA 2.05 (power6). */
261 { OPTION_MASK_POPCNTD
, "arch_2_06" }, /* ISA 2.06 (power7). */
262 { OPTION_MASK_P8_VECTOR
, "arch_2_07" }, /* ISA 2.07 (power8). */
263 { OPTION_MASK_P9_VECTOR
, "arch_3_00" }, /* ISA 3.0 (power9). */
264 { OPTION_MASK_POWER10
, "arch_3_1" }, /* ISA 3.1 (power10). */
268 /* Newer LIBCs explicitly export this symbol to declare that they provide
269 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
270 reference to this symbol whenever we expand a CPU builtin, so that
271 we never link against an old LIBC. */
272 const char *tcb_verification_symbol
= "__parse_hwcap_and_convert_at_platform";
274 /* True if we have expanded a CPU builtin. */
275 bool cpu_builtin_p
= false;
277 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
278 macros that have changed. Languages that don't support the preprocessor
279 don't link in rs6000-c.cc, so we can't call it directly. */
280 void (*rs6000_target_modify_macros_ptr
) (bool, HOST_WIDE_INT
);
282 /* Simplfy register classes into simpler classifications. We assume
283 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
284 check for standard register classes (gpr/floating/altivec/vsx) and
285 floating/vector classes (float/altivec/vsx). */
287 enum rs6000_reg_type
{
298 /* Map register class to register type. */
299 static enum rs6000_reg_type reg_class_to_reg_type
[N_REG_CLASSES
];
301 /* First/last register type for the 'normal' register types (i.e. general
302 purpose, floating point, altivec, and VSX registers). */
303 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
305 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
308 /* Register classes we care about in secondary reload or go if legitimate
309 address. We only need to worry about GPR, FPR, and Altivec registers here,
310 along an ANY field that is the OR of the 3 register classes. */
312 enum rs6000_reload_reg_type
{
313 RELOAD_REG_GPR
, /* General purpose registers. */
314 RELOAD_REG_FPR
, /* Traditional floating point regs. */
315 RELOAD_REG_VMX
, /* Altivec (VMX) registers. */
316 RELOAD_REG_ANY
, /* OR of GPR, FPR, Altivec masks. */
320 /* For setting up register classes, loop through the 3 register classes mapping
321 into real registers, and skip the ANY class, which is just an OR of the
323 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
324 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
326 /* Map reload register type to a register in the register class. */
327 struct reload_reg_map_type
{
328 const char *name
; /* Register class name. */
329 int reg
; /* Register in the register class. */
332 static const struct reload_reg_map_type reload_reg_map
[N_RELOAD_REG
] = {
333 { "Gpr", FIRST_GPR_REGNO
}, /* RELOAD_REG_GPR. */
334 { "Fpr", FIRST_FPR_REGNO
}, /* RELOAD_REG_FPR. */
335 { "VMX", FIRST_ALTIVEC_REGNO
}, /* RELOAD_REG_VMX. */
336 { "Any", -1 }, /* RELOAD_REG_ANY. */
339 /* Mask bits for each register class, indexed per mode. Historically the
340 compiler has been more restrictive which types can do PRE_MODIFY instead of
341 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
342 typedef unsigned char addr_mask_type
;
344 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
345 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
346 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
347 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
348 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
349 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
350 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
351 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
353 /* Register type masks based on the type, of valid addressing modes. */
354 struct rs6000_reg_addr
{
355 enum insn_code reload_load
; /* INSN to reload for loading. */
356 enum insn_code reload_store
; /* INSN to reload for storing. */
357 enum insn_code reload_fpr_gpr
; /* INSN to move from FPR to GPR. */
358 enum insn_code reload_gpr_vsx
; /* INSN to move from GPR to VSX. */
359 enum insn_code reload_vsx_gpr
; /* INSN to move from VSX to GPR. */
360 addr_mask_type addr_mask
[(int)N_RELOAD_REG
]; /* Valid address masks. */
361 bool scalar_in_vmx_p
; /* Scalar value can go in VMX. */
364 static struct rs6000_reg_addr reg_addr
[NUM_MACHINE_MODES
];
366 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
368 mode_supports_pre_incdec_p (machine_mode mode
)
370 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_INCDEC
)
374 /* Helper function to say whether a mode supports PRE_MODIFY. */
376 mode_supports_pre_modify_p (machine_mode mode
)
378 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_MODIFY
)
382 /* Return true if we have D-form addressing in altivec registers. */
384 mode_supports_vmx_dform (machine_mode mode
)
386 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_OFFSET
) != 0);
389 /* Return true if we have D-form addressing in VSX registers. This addressing
390 is more limited than normal d-form addressing in that the offset must be
391 aligned on a 16-byte boundary. */
393 mode_supports_dq_form (machine_mode mode
)
395 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_QUAD_OFFSET
)
399 /* Given that there exists at least one variable that is set (produced)
400 by OUT_INSN and read (consumed) by IN_INSN, return true iff
401 IN_INSN represents one or more memory store operations and none of
402 the variables set by OUT_INSN is used by IN_INSN as the address of a
403 store operation. If either IN_INSN or OUT_INSN does not represent
404 a "single" RTL SET expression (as loosely defined by the
405 implementation of the single_set function) or a PARALLEL with only
406 SETs, CLOBBERs, and USEs inside, this function returns false.
408 This rs6000-specific version of store_data_bypass_p checks for
409 certain conditions that result in assertion failures (and internal
410 compiler errors) in the generic store_data_bypass_p function and
411 returns false rather than calling store_data_bypass_p if one of the
412 problematic conditions is detected. */
415 rs6000_store_data_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
422 in_set
= single_set (in_insn
);
425 if (MEM_P (SET_DEST (in_set
)))
427 out_set
= single_set (out_insn
);
430 out_pat
= PATTERN (out_insn
);
431 if (GET_CODE (out_pat
) == PARALLEL
)
433 for (i
= 0; i
< XVECLEN (out_pat
, 0); i
++)
435 out_exp
= XVECEXP (out_pat
, 0, i
);
436 if ((GET_CODE (out_exp
) == CLOBBER
)
437 || (GET_CODE (out_exp
) == USE
))
439 else if (GET_CODE (out_exp
) != SET
)
448 in_pat
= PATTERN (in_insn
);
449 if (GET_CODE (in_pat
) != PARALLEL
)
452 for (i
= 0; i
< XVECLEN (in_pat
, 0); i
++)
454 in_exp
= XVECEXP (in_pat
, 0, i
);
455 if ((GET_CODE (in_exp
) == CLOBBER
) || (GET_CODE (in_exp
) == USE
))
457 else if (GET_CODE (in_exp
) != SET
)
460 if (MEM_P (SET_DEST (in_exp
)))
462 out_set
= single_set (out_insn
);
465 out_pat
= PATTERN (out_insn
);
466 if (GET_CODE (out_pat
) != PARALLEL
)
468 for (j
= 0; j
< XVECLEN (out_pat
, 0); j
++)
470 out_exp
= XVECEXP (out_pat
, 0, j
);
471 if ((GET_CODE (out_exp
) == CLOBBER
)
472 || (GET_CODE (out_exp
) == USE
))
474 else if (GET_CODE (out_exp
) != SET
)
481 return store_data_bypass_p (out_insn
, in_insn
);
485 /* Processor costs (relative to an add) */
487 const struct processor_costs
*rs6000_cost
;
489 /* Instruction size costs on 32bit processors. */
491 struct processor_costs size32_cost
= {
492 COSTS_N_INSNS (1), /* mulsi */
493 COSTS_N_INSNS (1), /* mulsi_const */
494 COSTS_N_INSNS (1), /* mulsi_const9 */
495 COSTS_N_INSNS (1), /* muldi */
496 COSTS_N_INSNS (1), /* divsi */
497 COSTS_N_INSNS (1), /* divdi */
498 COSTS_N_INSNS (1), /* fp */
499 COSTS_N_INSNS (1), /* dmul */
500 COSTS_N_INSNS (1), /* sdiv */
501 COSTS_N_INSNS (1), /* ddiv */
502 32, /* cache line size */
506 0, /* SF->DF convert */
509 /* Instruction size costs on 64bit processors. */
511 struct processor_costs size64_cost
= {
512 COSTS_N_INSNS (1), /* mulsi */
513 COSTS_N_INSNS (1), /* mulsi_const */
514 COSTS_N_INSNS (1), /* mulsi_const9 */
515 COSTS_N_INSNS (1), /* muldi */
516 COSTS_N_INSNS (1), /* divsi */
517 COSTS_N_INSNS (1), /* divdi */
518 COSTS_N_INSNS (1), /* fp */
519 COSTS_N_INSNS (1), /* dmul */
520 COSTS_N_INSNS (1), /* sdiv */
521 COSTS_N_INSNS (1), /* ddiv */
522 128, /* cache line size */
526 0, /* SF->DF convert */
529 /* Instruction costs on RS64A processors. */
531 struct processor_costs rs64a_cost
= {
532 COSTS_N_INSNS (20), /* mulsi */
533 COSTS_N_INSNS (12), /* mulsi_const */
534 COSTS_N_INSNS (8), /* mulsi_const9 */
535 COSTS_N_INSNS (34), /* muldi */
536 COSTS_N_INSNS (65), /* divsi */
537 COSTS_N_INSNS (67), /* divdi */
538 COSTS_N_INSNS (4), /* fp */
539 COSTS_N_INSNS (4), /* dmul */
540 COSTS_N_INSNS (31), /* sdiv */
541 COSTS_N_INSNS (31), /* ddiv */
542 128, /* cache line size */
546 0, /* SF->DF convert */
549 /* Instruction costs on MPCCORE processors. */
551 struct processor_costs mpccore_cost
= {
552 COSTS_N_INSNS (2), /* mulsi */
553 COSTS_N_INSNS (2), /* mulsi_const */
554 COSTS_N_INSNS (2), /* mulsi_const9 */
555 COSTS_N_INSNS (2), /* muldi */
556 COSTS_N_INSNS (6), /* divsi */
557 COSTS_N_INSNS (6), /* divdi */
558 COSTS_N_INSNS (4), /* fp */
559 COSTS_N_INSNS (5), /* dmul */
560 COSTS_N_INSNS (10), /* sdiv */
561 COSTS_N_INSNS (17), /* ddiv */
562 32, /* cache line size */
566 0, /* SF->DF convert */
569 /* Instruction costs on PPC403 processors. */
571 struct processor_costs ppc403_cost
= {
572 COSTS_N_INSNS (4), /* mulsi */
573 COSTS_N_INSNS (4), /* mulsi_const */
574 COSTS_N_INSNS (4), /* mulsi_const9 */
575 COSTS_N_INSNS (4), /* muldi */
576 COSTS_N_INSNS (33), /* divsi */
577 COSTS_N_INSNS (33), /* divdi */
578 COSTS_N_INSNS (11), /* fp */
579 COSTS_N_INSNS (11), /* dmul */
580 COSTS_N_INSNS (11), /* sdiv */
581 COSTS_N_INSNS (11), /* ddiv */
582 32, /* cache line size */
586 0, /* SF->DF convert */
589 /* Instruction costs on PPC405 processors. */
591 struct processor_costs ppc405_cost
= {
592 COSTS_N_INSNS (5), /* mulsi */
593 COSTS_N_INSNS (4), /* mulsi_const */
594 COSTS_N_INSNS (3), /* mulsi_const9 */
595 COSTS_N_INSNS (5), /* muldi */
596 COSTS_N_INSNS (35), /* divsi */
597 COSTS_N_INSNS (35), /* divdi */
598 COSTS_N_INSNS (11), /* fp */
599 COSTS_N_INSNS (11), /* dmul */
600 COSTS_N_INSNS (11), /* sdiv */
601 COSTS_N_INSNS (11), /* ddiv */
602 32, /* cache line size */
606 0, /* SF->DF convert */
609 /* Instruction costs on PPC440 processors. */
611 struct processor_costs ppc440_cost
= {
612 COSTS_N_INSNS (3), /* mulsi */
613 COSTS_N_INSNS (2), /* mulsi_const */
614 COSTS_N_INSNS (2), /* mulsi_const9 */
615 COSTS_N_INSNS (3), /* muldi */
616 COSTS_N_INSNS (34), /* divsi */
617 COSTS_N_INSNS (34), /* divdi */
618 COSTS_N_INSNS (5), /* fp */
619 COSTS_N_INSNS (5), /* dmul */
620 COSTS_N_INSNS (19), /* sdiv */
621 COSTS_N_INSNS (33), /* ddiv */
622 32, /* cache line size */
626 0, /* SF->DF convert */
629 /* Instruction costs on PPC476 processors. */
631 struct processor_costs ppc476_cost
= {
632 COSTS_N_INSNS (4), /* mulsi */
633 COSTS_N_INSNS (4), /* mulsi_const */
634 COSTS_N_INSNS (4), /* mulsi_const9 */
635 COSTS_N_INSNS (4), /* muldi */
636 COSTS_N_INSNS (11), /* divsi */
637 COSTS_N_INSNS (11), /* divdi */
638 COSTS_N_INSNS (6), /* fp */
639 COSTS_N_INSNS (6), /* dmul */
640 COSTS_N_INSNS (19), /* sdiv */
641 COSTS_N_INSNS (33), /* ddiv */
642 32, /* l1 cache line size */
646 0, /* SF->DF convert */
649 /* Instruction costs on PPC601 processors. */
651 struct processor_costs ppc601_cost
= {
652 COSTS_N_INSNS (5), /* mulsi */
653 COSTS_N_INSNS (5), /* mulsi_const */
654 COSTS_N_INSNS (5), /* mulsi_const9 */
655 COSTS_N_INSNS (5), /* muldi */
656 COSTS_N_INSNS (36), /* divsi */
657 COSTS_N_INSNS (36), /* divdi */
658 COSTS_N_INSNS (4), /* fp */
659 COSTS_N_INSNS (5), /* dmul */
660 COSTS_N_INSNS (17), /* sdiv */
661 COSTS_N_INSNS (31), /* ddiv */
662 32, /* cache line size */
666 0, /* SF->DF convert */
669 /* Instruction costs on PPC603 processors. */
671 struct processor_costs ppc603_cost
= {
672 COSTS_N_INSNS (5), /* mulsi */
673 COSTS_N_INSNS (3), /* mulsi_const */
674 COSTS_N_INSNS (2), /* mulsi_const9 */
675 COSTS_N_INSNS (5), /* muldi */
676 COSTS_N_INSNS (37), /* divsi */
677 COSTS_N_INSNS (37), /* divdi */
678 COSTS_N_INSNS (3), /* fp */
679 COSTS_N_INSNS (4), /* dmul */
680 COSTS_N_INSNS (18), /* sdiv */
681 COSTS_N_INSNS (33), /* ddiv */
682 32, /* cache line size */
686 0, /* SF->DF convert */
689 /* Instruction costs on PPC604 processors. */
691 struct processor_costs ppc604_cost
= {
692 COSTS_N_INSNS (4), /* mulsi */
693 COSTS_N_INSNS (4), /* mulsi_const */
694 COSTS_N_INSNS (4), /* mulsi_const9 */
695 COSTS_N_INSNS (4), /* muldi */
696 COSTS_N_INSNS (20), /* divsi */
697 COSTS_N_INSNS (20), /* divdi */
698 COSTS_N_INSNS (3), /* fp */
699 COSTS_N_INSNS (3), /* dmul */
700 COSTS_N_INSNS (18), /* sdiv */
701 COSTS_N_INSNS (32), /* ddiv */
702 32, /* cache line size */
706 0, /* SF->DF convert */
709 /* Instruction costs on PPC604e processors. */
711 struct processor_costs ppc604e_cost
= {
712 COSTS_N_INSNS (2), /* mulsi */
713 COSTS_N_INSNS (2), /* mulsi_const */
714 COSTS_N_INSNS (2), /* mulsi_const9 */
715 COSTS_N_INSNS (2), /* muldi */
716 COSTS_N_INSNS (20), /* divsi */
717 COSTS_N_INSNS (20), /* divdi */
718 COSTS_N_INSNS (3), /* fp */
719 COSTS_N_INSNS (3), /* dmul */
720 COSTS_N_INSNS (18), /* sdiv */
721 COSTS_N_INSNS (32), /* ddiv */
722 32, /* cache line size */
726 0, /* SF->DF convert */
729 /* Instruction costs on PPC620 processors. */
731 struct processor_costs ppc620_cost
= {
732 COSTS_N_INSNS (5), /* mulsi */
733 COSTS_N_INSNS (4), /* mulsi_const */
734 COSTS_N_INSNS (3), /* mulsi_const9 */
735 COSTS_N_INSNS (7), /* muldi */
736 COSTS_N_INSNS (21), /* divsi */
737 COSTS_N_INSNS (37), /* divdi */
738 COSTS_N_INSNS (3), /* fp */
739 COSTS_N_INSNS (3), /* dmul */
740 COSTS_N_INSNS (18), /* sdiv */
741 COSTS_N_INSNS (32), /* ddiv */
742 128, /* cache line size */
746 0, /* SF->DF convert */
749 /* Instruction costs on PPC630 processors. */
751 struct processor_costs ppc630_cost
= {
752 COSTS_N_INSNS (5), /* mulsi */
753 COSTS_N_INSNS (4), /* mulsi_const */
754 COSTS_N_INSNS (3), /* mulsi_const9 */
755 COSTS_N_INSNS (7), /* muldi */
756 COSTS_N_INSNS (21), /* divsi */
757 COSTS_N_INSNS (37), /* divdi */
758 COSTS_N_INSNS (3), /* fp */
759 COSTS_N_INSNS (3), /* dmul */
760 COSTS_N_INSNS (17), /* sdiv */
761 COSTS_N_INSNS (21), /* ddiv */
762 128, /* cache line size */
766 0, /* SF->DF convert */
769 /* Instruction costs on Cell processor. */
770 /* COSTS_N_INSNS (1) ~ one add. */
772 struct processor_costs ppccell_cost
= {
773 COSTS_N_INSNS (9/2)+2, /* mulsi */
774 COSTS_N_INSNS (6/2), /* mulsi_const */
775 COSTS_N_INSNS (6/2), /* mulsi_const9 */
776 COSTS_N_INSNS (15/2)+2, /* muldi */
777 COSTS_N_INSNS (38/2), /* divsi */
778 COSTS_N_INSNS (70/2), /* divdi */
779 COSTS_N_INSNS (10/2), /* fp */
780 COSTS_N_INSNS (10/2), /* dmul */
781 COSTS_N_INSNS (74/2), /* sdiv */
782 COSTS_N_INSNS (74/2), /* ddiv */
783 128, /* cache line size */
787 0, /* SF->DF convert */
790 /* Instruction costs on PPC750 and PPC7400 processors. */
792 struct processor_costs ppc750_cost
= {
793 COSTS_N_INSNS (5), /* mulsi */
794 COSTS_N_INSNS (3), /* mulsi_const */
795 COSTS_N_INSNS (2), /* mulsi_const9 */
796 COSTS_N_INSNS (5), /* muldi */
797 COSTS_N_INSNS (17), /* divsi */
798 COSTS_N_INSNS (17), /* divdi */
799 COSTS_N_INSNS (3), /* fp */
800 COSTS_N_INSNS (3), /* dmul */
801 COSTS_N_INSNS (17), /* sdiv */
802 COSTS_N_INSNS (31), /* ddiv */
803 32, /* cache line size */
807 0, /* SF->DF convert */
810 /* Instruction costs on PPC7450 processors. */
812 struct processor_costs ppc7450_cost
= {
813 COSTS_N_INSNS (4), /* mulsi */
814 COSTS_N_INSNS (3), /* mulsi_const */
815 COSTS_N_INSNS (3), /* mulsi_const9 */
816 COSTS_N_INSNS (4), /* muldi */
817 COSTS_N_INSNS (23), /* divsi */
818 COSTS_N_INSNS (23), /* divdi */
819 COSTS_N_INSNS (5), /* fp */
820 COSTS_N_INSNS (5), /* dmul */
821 COSTS_N_INSNS (21), /* sdiv */
822 COSTS_N_INSNS (35), /* ddiv */
823 32, /* cache line size */
827 0, /* SF->DF convert */
830 /* Instruction costs on PPC8540 processors. */
832 struct processor_costs ppc8540_cost
= {
833 COSTS_N_INSNS (4), /* mulsi */
834 COSTS_N_INSNS (4), /* mulsi_const */
835 COSTS_N_INSNS (4), /* mulsi_const9 */
836 COSTS_N_INSNS (4), /* muldi */
837 COSTS_N_INSNS (19), /* divsi */
838 COSTS_N_INSNS (19), /* divdi */
839 COSTS_N_INSNS (4), /* fp */
840 COSTS_N_INSNS (4), /* dmul */
841 COSTS_N_INSNS (29), /* sdiv */
842 COSTS_N_INSNS (29), /* ddiv */
843 32, /* cache line size */
846 1, /* prefetch streams /*/
847 0, /* SF->DF convert */
850 /* Instruction costs on E300C2 and E300C3 cores. */
852 struct processor_costs ppce300c2c3_cost
= {
853 COSTS_N_INSNS (4), /* mulsi */
854 COSTS_N_INSNS (4), /* mulsi_const */
855 COSTS_N_INSNS (4), /* mulsi_const9 */
856 COSTS_N_INSNS (4), /* muldi */
857 COSTS_N_INSNS (19), /* divsi */
858 COSTS_N_INSNS (19), /* divdi */
859 COSTS_N_INSNS (3), /* fp */
860 COSTS_N_INSNS (4), /* dmul */
861 COSTS_N_INSNS (18), /* sdiv */
862 COSTS_N_INSNS (33), /* ddiv */
866 1, /* prefetch streams /*/
867 0, /* SF->DF convert */
870 /* Instruction costs on PPCE500MC processors. */
872 struct processor_costs ppce500mc_cost
= {
873 COSTS_N_INSNS (4), /* mulsi */
874 COSTS_N_INSNS (4), /* mulsi_const */
875 COSTS_N_INSNS (4), /* mulsi_const9 */
876 COSTS_N_INSNS (4), /* muldi */
877 COSTS_N_INSNS (14), /* divsi */
878 COSTS_N_INSNS (14), /* divdi */
879 COSTS_N_INSNS (8), /* fp */
880 COSTS_N_INSNS (10), /* dmul */
881 COSTS_N_INSNS (36), /* sdiv */
882 COSTS_N_INSNS (66), /* ddiv */
883 64, /* cache line size */
886 1, /* prefetch streams /*/
887 0, /* SF->DF convert */
890 /* Instruction costs on PPCE500MC64 processors. */
892 struct processor_costs ppce500mc64_cost
= {
893 COSTS_N_INSNS (4), /* mulsi */
894 COSTS_N_INSNS (4), /* mulsi_const */
895 COSTS_N_INSNS (4), /* mulsi_const9 */
896 COSTS_N_INSNS (4), /* muldi */
897 COSTS_N_INSNS (14), /* divsi */
898 COSTS_N_INSNS (14), /* divdi */
899 COSTS_N_INSNS (4), /* fp */
900 COSTS_N_INSNS (10), /* dmul */
901 COSTS_N_INSNS (36), /* sdiv */
902 COSTS_N_INSNS (66), /* ddiv */
903 64, /* cache line size */
906 1, /* prefetch streams /*/
907 0, /* SF->DF convert */
910 /* Instruction costs on PPCE5500 processors. */
912 struct processor_costs ppce5500_cost
= {
913 COSTS_N_INSNS (5), /* mulsi */
914 COSTS_N_INSNS (5), /* mulsi_const */
915 COSTS_N_INSNS (4), /* mulsi_const9 */
916 COSTS_N_INSNS (5), /* muldi */
917 COSTS_N_INSNS (14), /* divsi */
918 COSTS_N_INSNS (14), /* divdi */
919 COSTS_N_INSNS (7), /* fp */
920 COSTS_N_INSNS (10), /* dmul */
921 COSTS_N_INSNS (36), /* sdiv */
922 COSTS_N_INSNS (66), /* ddiv */
923 64, /* cache line size */
926 1, /* prefetch streams /*/
927 0, /* SF->DF convert */
930 /* Instruction costs on PPCE6500 processors. */
932 struct processor_costs ppce6500_cost
= {
933 COSTS_N_INSNS (5), /* mulsi */
934 COSTS_N_INSNS (5), /* mulsi_const */
935 COSTS_N_INSNS (4), /* mulsi_const9 */
936 COSTS_N_INSNS (5), /* muldi */
937 COSTS_N_INSNS (14), /* divsi */
938 COSTS_N_INSNS (14), /* divdi */
939 COSTS_N_INSNS (7), /* fp */
940 COSTS_N_INSNS (10), /* dmul */
941 COSTS_N_INSNS (36), /* sdiv */
942 COSTS_N_INSNS (66), /* ddiv */
943 64, /* cache line size */
946 1, /* prefetch streams /*/
947 0, /* SF->DF convert */
950 /* Instruction costs on AppliedMicro Titan processors. */
952 struct processor_costs titan_cost
= {
953 COSTS_N_INSNS (5), /* mulsi */
954 COSTS_N_INSNS (5), /* mulsi_const */
955 COSTS_N_INSNS (5), /* mulsi_const9 */
956 COSTS_N_INSNS (5), /* muldi */
957 COSTS_N_INSNS (18), /* divsi */
958 COSTS_N_INSNS (18), /* divdi */
959 COSTS_N_INSNS (10), /* fp */
960 COSTS_N_INSNS (10), /* dmul */
961 COSTS_N_INSNS (46), /* sdiv */
962 COSTS_N_INSNS (72), /* ddiv */
963 32, /* cache line size */
966 1, /* prefetch streams /*/
967 0, /* SF->DF convert */
970 /* Instruction costs on POWER4 and POWER5 processors. */
972 struct processor_costs power4_cost
= {
973 COSTS_N_INSNS (3), /* mulsi */
974 COSTS_N_INSNS (2), /* mulsi_const */
975 COSTS_N_INSNS (2), /* mulsi_const9 */
976 COSTS_N_INSNS (4), /* muldi */
977 COSTS_N_INSNS (18), /* divsi */
978 COSTS_N_INSNS (34), /* divdi */
979 COSTS_N_INSNS (3), /* fp */
980 COSTS_N_INSNS (3), /* dmul */
981 COSTS_N_INSNS (17), /* sdiv */
982 COSTS_N_INSNS (17), /* ddiv */
983 128, /* cache line size */
986 8, /* prefetch streams /*/
987 0, /* SF->DF convert */
990 /* Instruction costs on POWER6 processors. */
992 struct processor_costs power6_cost
= {
993 COSTS_N_INSNS (8), /* mulsi */
994 COSTS_N_INSNS (8), /* mulsi_const */
995 COSTS_N_INSNS (8), /* mulsi_const9 */
996 COSTS_N_INSNS (8), /* muldi */
997 COSTS_N_INSNS (22), /* divsi */
998 COSTS_N_INSNS (28), /* divdi */
999 COSTS_N_INSNS (3), /* fp */
1000 COSTS_N_INSNS (3), /* dmul */
1001 COSTS_N_INSNS (13), /* sdiv */
1002 COSTS_N_INSNS (16), /* ddiv */
1003 128, /* cache line size */
1005 2048, /* l2 cache */
1006 16, /* prefetch streams */
1007 0, /* SF->DF convert */
1010 /* Instruction costs on POWER7 processors. */
1012 struct processor_costs power7_cost
= {
1013 COSTS_N_INSNS (2), /* mulsi */
1014 COSTS_N_INSNS (2), /* mulsi_const */
1015 COSTS_N_INSNS (2), /* mulsi_const9 */
1016 COSTS_N_INSNS (2), /* muldi */
1017 COSTS_N_INSNS (18), /* divsi */
1018 COSTS_N_INSNS (34), /* divdi */
1019 COSTS_N_INSNS (3), /* fp */
1020 COSTS_N_INSNS (3), /* dmul */
1021 COSTS_N_INSNS (13), /* sdiv */
1022 COSTS_N_INSNS (16), /* ddiv */
1023 128, /* cache line size */
1026 12, /* prefetch streams */
1027 COSTS_N_INSNS (3), /* SF->DF convert */
1030 /* Instruction costs on POWER8 processors. */
1032 struct processor_costs power8_cost
= {
1033 COSTS_N_INSNS (3), /* mulsi */
1034 COSTS_N_INSNS (3), /* mulsi_const */
1035 COSTS_N_INSNS (3), /* mulsi_const9 */
1036 COSTS_N_INSNS (3), /* muldi */
1037 COSTS_N_INSNS (19), /* divsi */
1038 COSTS_N_INSNS (35), /* divdi */
1039 COSTS_N_INSNS (3), /* fp */
1040 COSTS_N_INSNS (3), /* dmul */
1041 COSTS_N_INSNS (14), /* sdiv */
1042 COSTS_N_INSNS (17), /* ddiv */
1043 128, /* cache line size */
1046 12, /* prefetch streams */
1047 COSTS_N_INSNS (3), /* SF->DF convert */
1050 /* Instruction costs on POWER9 processors. */
1052 struct processor_costs power9_cost
= {
1053 COSTS_N_INSNS (3), /* mulsi */
1054 COSTS_N_INSNS (3), /* mulsi_const */
1055 COSTS_N_INSNS (3), /* mulsi_const9 */
1056 COSTS_N_INSNS (3), /* muldi */
1057 COSTS_N_INSNS (8), /* divsi */
1058 COSTS_N_INSNS (12), /* divdi */
1059 COSTS_N_INSNS (3), /* fp */
1060 COSTS_N_INSNS (3), /* dmul */
1061 COSTS_N_INSNS (13), /* sdiv */
1062 COSTS_N_INSNS (18), /* ddiv */
1063 128, /* cache line size */
1066 8, /* prefetch streams */
1067 COSTS_N_INSNS (3), /* SF->DF convert */
1070 /* Instruction costs on POWER10/POWER11 processors. */
1072 struct processor_costs power10_cost
= {
1073 COSTS_N_INSNS (2), /* mulsi */
1074 COSTS_N_INSNS (2), /* mulsi_const */
1075 COSTS_N_INSNS (2), /* mulsi_const9 */
1076 COSTS_N_INSNS (2), /* muldi */
1077 COSTS_N_INSNS (6), /* divsi */
1078 COSTS_N_INSNS (6), /* divdi */
1079 COSTS_N_INSNS (2), /* fp */
1080 COSTS_N_INSNS (2), /* dmul */
1081 COSTS_N_INSNS (11), /* sdiv */
1082 COSTS_N_INSNS (13), /* ddiv */
1083 128, /* cache line size */
1086 16, /* prefetch streams */
1087 COSTS_N_INSNS (2), /* SF->DF convert */
1090 /* Instruction costs on POWER A2 processors. */
1092 struct processor_costs ppca2_cost
= {
1093 COSTS_N_INSNS (16), /* mulsi */
1094 COSTS_N_INSNS (16), /* mulsi_const */
1095 COSTS_N_INSNS (16), /* mulsi_const9 */
1096 COSTS_N_INSNS (16), /* muldi */
1097 COSTS_N_INSNS (22), /* divsi */
1098 COSTS_N_INSNS (28), /* divdi */
1099 COSTS_N_INSNS (3), /* fp */
1100 COSTS_N_INSNS (3), /* dmul */
1101 COSTS_N_INSNS (59), /* sdiv */
1102 COSTS_N_INSNS (72), /* ddiv */
1105 2048, /* l2 cache */
1106 16, /* prefetch streams */
1107 0, /* SF->DF convert */
1110 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1111 static tree (*rs6000_veclib_handler
) (combined_fn
, tree
, tree
);
1114 static bool rs6000_debug_legitimate_address_p (machine_mode
, rtx
, bool,
1115 code_helper
= ERROR_MARK
);
1116 static tree
rs6000_handle_longcall_attribute (tree
*, tree
, tree
, int, bool *);
1117 static tree
rs6000_handle_altivec_attribute (tree
*, tree
, tree
, int, bool *);
1118 static tree
rs6000_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1119 static tree
rs6000_builtin_vectorized_libmass (combined_fn
, tree
, tree
);
1120 static void rs6000_emit_set_long_const (rtx
, HOST_WIDE_INT
, int * = nullptr);
1121 static int rs6000_memory_move_cost (machine_mode
, reg_class_t
, bool);
1122 static bool rs6000_debug_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
1123 static int rs6000_debug_address_cost (rtx
, machine_mode
, addr_space_t
,
1125 static int rs6000_debug_adjust_cost (rtx_insn
*, int, rtx_insn
*, int,
1127 static bool is_microcoded_insn (rtx_insn
*);
1128 static bool is_nonpipeline_insn (rtx_insn
*);
1129 static bool is_cracked_insn (rtx_insn
*);
1130 static bool is_load_insn (rtx
, rtx
*);
1131 static bool is_store_insn (rtx
, rtx
*);
1132 static bool set_to_load_agen (rtx_insn
*,rtx_insn
*);
1133 static bool insn_terminates_group_p (rtx_insn
*, enum group_termination
);
1134 static bool insn_must_be_first_in_group (rtx_insn
*);
1135 static bool insn_must_be_last_in_group (rtx_insn
*);
1136 bool easy_vector_constant (rtx
, machine_mode
);
1137 static rtx
rs6000_debug_legitimize_address (rtx
, rtx
, machine_mode
);
1138 static rtx
rs6000_legitimize_tls_address (rtx
, enum tls_model
);
1140 static tree
get_prev_label (tree
);
1142 static bool rs6000_mode_dependent_address (const_rtx
);
1143 static bool rs6000_debug_mode_dependent_address (const_rtx
);
1144 static bool rs6000_offsettable_memref_p (rtx
, machine_mode
, bool);
1145 static enum reg_class
rs6000_secondary_reload_class (enum reg_class
,
1147 static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class
,
1150 static enum reg_class
rs6000_preferred_reload_class (rtx
, enum reg_class
);
1151 static enum reg_class
rs6000_debug_preferred_reload_class (rtx
,
1153 static bool rs6000_debug_secondary_memory_needed (machine_mode
,
1156 static bool rs6000_debug_can_change_mode_class (machine_mode
,
1160 static bool (*rs6000_mode_dependent_address_ptr
) (const_rtx
)
1161 = rs6000_mode_dependent_address
;
1163 enum reg_class (*rs6000_secondary_reload_class_ptr
) (enum reg_class
,
1165 = rs6000_secondary_reload_class
;
1167 enum reg_class (*rs6000_preferred_reload_class_ptr
) (rtx
, enum reg_class
)
1168 = rs6000_preferred_reload_class
;
1170 const int INSN_NOT_AVAILABLE
= -1;
1172 static void rs6000_print_isa_options (FILE *, int, const char *,
1174 static HOST_WIDE_INT
rs6000_disable_incompatible_switches (void);
1176 static enum rs6000_reg_type
register_to_reg_type (rtx
, bool *);
1177 static bool rs6000_secondary_reload_move (enum rs6000_reg_type
,
1178 enum rs6000_reg_type
,
1180 secondary_reload_info
*,
1182 rtl_opt_pass
*make_pass_analyze_swaps (gcc::context
*);
1184 /* Hash table stuff for keeping track of TOC entries. */
1186 struct GTY((for_user
)) toc_hash_struct
1188 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1189 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1191 machine_mode key_mode
;
1195 struct toc_hasher
: ggc_ptr_hash
<toc_hash_struct
>
1197 static hashval_t
hash (toc_hash_struct
*);
1198 static bool equal (toc_hash_struct
*, toc_hash_struct
*);
1201 static GTY (()) hash_table
<toc_hasher
> *toc_hash_table
;
1205 /* Default register names. */
1206 char rs6000_reg_names
[][8] =
1209 "0", "1", "2", "3", "4", "5", "6", "7",
1210 "8", "9", "10", "11", "12", "13", "14", "15",
1211 "16", "17", "18", "19", "20", "21", "22", "23",
1212 "24", "25", "26", "27", "28", "29", "30", "31",
1214 "0", "1", "2", "3", "4", "5", "6", "7",
1215 "8", "9", "10", "11", "12", "13", "14", "15",
1216 "16", "17", "18", "19", "20", "21", "22", "23",
1217 "24", "25", "26", "27", "28", "29", "30", "31",
1219 "0", "1", "2", "3", "4", "5", "6", "7",
1220 "8", "9", "10", "11", "12", "13", "14", "15",
1221 "16", "17", "18", "19", "20", "21", "22", "23",
1222 "24", "25", "26", "27", "28", "29", "30", "31",
1224 "lr", "ctr", "ca", "ap",
1226 "0", "1", "2", "3", "4", "5", "6", "7",
1227 /* vrsave vscr sfp */
1228 "vrsave", "vscr", "sfp",
1231 #ifdef TARGET_REGNAMES
1232 static const char alt_reg_names
[][8] =
1235 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1236 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1237 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1238 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1240 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1241 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1242 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1243 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1245 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1246 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1247 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1248 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1250 "lr", "ctr", "ca", "ap",
1252 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1253 /* vrsave vscr sfp */
1254 "vrsave", "vscr", "sfp",
1258 /* Table of valid machine attributes. */
1260 static const attribute_spec rs6000_gnu_attributes
[] =
1262 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1263 affects_type_identity, handler, exclude } */
1264 { "altivec", 1, 1, false, true, false, false,
1265 rs6000_handle_altivec_attribute
, NULL
},
1266 { "longcall", 0, 0, false, true, true, false,
1267 rs6000_handle_longcall_attribute
, NULL
},
1268 { "shortcall", 0, 0, false, true, true, false,
1269 rs6000_handle_longcall_attribute
, NULL
},
1270 { "ms_struct", 0, 0, false, false, false, false,
1271 rs6000_handle_struct_attribute
, NULL
},
1272 { "gcc_struct", 0, 0, false, false, false, false,
1273 rs6000_handle_struct_attribute
, NULL
},
1274 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1275 SUBTARGET_ATTRIBUTE_TABLE
,
1279 static const scoped_attribute_specs rs6000_gnu_attribute_table
=
1281 "gnu", { rs6000_gnu_attributes
}
1284 static const scoped_attribute_specs
*const rs6000_attribute_table
[] =
1286 &rs6000_gnu_attribute_table
1289 #ifndef TARGET_PROFILE_KERNEL
1290 #define TARGET_PROFILE_KERNEL 0
1293 /* Initialize the GCC target structure. */
1294 #undef TARGET_ATTRIBUTE_TABLE
1295 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1296 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1297 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1298 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1299 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1301 #undef TARGET_ASM_ALIGNED_DI_OP
1302 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1304 /* Default unaligned ops are only provided for ELF. Find the ops needed
1305 for non-ELF systems. */
1306 #ifndef OBJECT_FORMAT_ELF
1308 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1310 #undef TARGET_ASM_UNALIGNED_HI_OP
1311 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1312 #undef TARGET_ASM_UNALIGNED_SI_OP
1313 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1314 #undef TARGET_ASM_UNALIGNED_DI_OP
1315 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1318 #undef TARGET_ASM_UNALIGNED_HI_OP
1319 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1320 #undef TARGET_ASM_UNALIGNED_SI_OP
1321 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1322 #undef TARGET_ASM_UNALIGNED_DI_OP
1323 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1324 #undef TARGET_ASM_ALIGNED_DI_OP
1325 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1329 /* This hook deals with fixups for relocatable code and DI-mode objects
1331 #undef TARGET_ASM_INTEGER
1332 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1334 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1335 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1336 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1339 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1340 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1341 rs6000_print_patchable_function_entry
1343 #undef TARGET_SET_UP_BY_PROLOGUE
1344 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1346 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1347 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1348 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1349 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1350 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1351 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1352 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1353 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1354 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1355 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1356 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1357 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1359 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1360 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1362 #undef TARGET_INTERNAL_ARG_POINTER
1363 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1365 #undef TARGET_HAVE_TLS
1366 #define TARGET_HAVE_TLS HAVE_AS_TLS
1368 #undef TARGET_CANNOT_FORCE_CONST_MEM
1369 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1371 #undef TARGET_DELEGITIMIZE_ADDRESS
1372 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1374 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1375 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1377 #undef TARGET_LEGITIMATE_COMBINED_INSN
1378 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1380 #undef TARGET_ASM_FUNCTION_PROLOGUE
1381 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1382 #undef TARGET_ASM_FUNCTION_EPILOGUE
1383 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1385 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1386 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1388 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1389 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1391 #undef TARGET_LEGITIMIZE_ADDRESS
1392 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1394 #undef TARGET_SCHED_VARIABLE_ISSUE
1395 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1397 #undef TARGET_SCHED_ISSUE_RATE
1398 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1399 #undef TARGET_SCHED_ADJUST_COST
1400 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1401 #undef TARGET_SCHED_ADJUST_PRIORITY
1402 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1403 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1404 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1405 #undef TARGET_SCHED_INIT
1406 #define TARGET_SCHED_INIT rs6000_sched_init
1407 #undef TARGET_SCHED_FINISH
1408 #define TARGET_SCHED_FINISH rs6000_sched_finish
1409 #undef TARGET_SCHED_REORDER
1410 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1411 #undef TARGET_SCHED_REORDER2
1412 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1414 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1415 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1417 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1418 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1420 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1421 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1422 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1423 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1424 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1425 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1426 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1427 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1429 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1430 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1432 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1433 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1434 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1435 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1436 rs6000_builtin_support_vector_misalignment
1437 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1438 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1439 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1440 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1441 rs6000_builtin_vectorization_cost
1442 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1443 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1444 rs6000_preferred_simd_mode
1445 #undef TARGET_VECTORIZE_CREATE_COSTS
1446 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1448 #undef TARGET_LOOP_UNROLL_ADJUST
1449 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1451 #undef TARGET_INIT_BUILTINS
1452 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1453 #undef TARGET_BUILTIN_DECL
1454 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1456 #undef TARGET_FOLD_BUILTIN
1457 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1458 #undef TARGET_GIMPLE_FOLD_BUILTIN
1459 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1461 #undef TARGET_EXPAND_BUILTIN
1462 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1464 #undef TARGET_MANGLE_TYPE
1465 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1467 #undef TARGET_INIT_LIBFUNCS
1468 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1471 #undef TARGET_BINDS_LOCAL_P
1472 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1475 #undef TARGET_MS_BITFIELD_LAYOUT_P
1476 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1478 #undef TARGET_ASM_OUTPUT_MI_THUNK
1479 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1481 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1482 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1484 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1485 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1487 #undef TARGET_REGISTER_MOVE_COST
1488 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1489 #undef TARGET_MEMORY_MOVE_COST
1490 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1491 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1492 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1493 rs6000_ira_change_pseudo_allocno_class
1494 #undef TARGET_CANNOT_COPY_INSN_P
1495 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1496 #undef TARGET_RTX_COSTS
1497 #define TARGET_RTX_COSTS rs6000_rtx_costs
1498 #undef TARGET_ADDRESS_COST
1499 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1500 #undef TARGET_INSN_COST
1501 #define TARGET_INSN_COST rs6000_insn_cost
1503 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1504 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1506 #undef TARGET_PROMOTE_FUNCTION_MODE
1507 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1509 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1510 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1512 #undef TARGET_RETURN_IN_MEMORY
1513 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1515 #undef TARGET_RETURN_IN_MSB
1516 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1518 #undef TARGET_SETUP_INCOMING_VARARGS
1519 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1521 /* Always strict argument naming on rs6000. */
1522 #undef TARGET_STRICT_ARGUMENT_NAMING
1523 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1524 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1525 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1526 #undef TARGET_SPLIT_COMPLEX_ARG
1527 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1528 #undef TARGET_MUST_PASS_IN_STACK
1529 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1530 #undef TARGET_PASS_BY_REFERENCE
1531 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1532 #undef TARGET_ARG_PARTIAL_BYTES
1533 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1534 #undef TARGET_FUNCTION_ARG_ADVANCE
1535 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1536 #undef TARGET_FUNCTION_ARG
1537 #define TARGET_FUNCTION_ARG rs6000_function_arg
1538 #undef TARGET_FUNCTION_ARG_PADDING
1539 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1540 #undef TARGET_FUNCTION_ARG_BOUNDARY
1541 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1543 #undef TARGET_BUILD_BUILTIN_VA_LIST
1544 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1546 #undef TARGET_EXPAND_BUILTIN_VA_START
1547 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1549 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1550 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1552 #undef TARGET_EH_RETURN_FILTER_MODE
1553 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1555 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1556 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1558 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1559 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1561 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1562 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1563 rs6000_libgcc_floating_mode_supported_p
1565 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1566 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1568 #undef TARGET_FLOATN_MODE
1569 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1571 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1572 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1574 #undef TARGET_MD_ASM_ADJUST
1575 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1577 #undef TARGET_OPTION_OVERRIDE
1578 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1580 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1581 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1582 rs6000_builtin_vectorized_function
1584 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1585 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1586 rs6000_builtin_md_vectorized_function
1588 #undef TARGET_STACK_PROTECT_GUARD
1589 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1592 #undef TARGET_STACK_PROTECT_FAIL
1593 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1597 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1598 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1601 /* Use a 32-bit anchor range. This leads to sequences like:
1603 addis tmp,anchor,high
1606 where tmp itself acts as an anchor, and can be shared between
1607 accesses to the same 64k page. */
1608 #undef TARGET_MIN_ANCHOR_OFFSET
1609 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1610 #undef TARGET_MAX_ANCHOR_OFFSET
1611 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1612 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1613 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1614 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1615 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1617 #undef TARGET_BUILTIN_RECIPROCAL
1618 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1620 #undef TARGET_SECONDARY_RELOAD
1621 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1622 #undef TARGET_SECONDARY_MEMORY_NEEDED
1623 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1624 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1625 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1627 #undef TARGET_LEGITIMATE_ADDRESS_P
1628 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1630 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1631 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1633 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1634 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1636 #undef TARGET_CAN_ELIMINATE
1637 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1639 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1640 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1642 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1643 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1645 #undef TARGET_TRAMPOLINE_INIT
1646 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1648 #undef TARGET_FUNCTION_VALUE
1649 #define TARGET_FUNCTION_VALUE rs6000_function_value
1651 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1652 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1654 #undef TARGET_OPTION_SAVE
1655 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1657 #undef TARGET_OPTION_RESTORE
1658 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1660 #undef TARGET_OPTION_PRINT
1661 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1663 #undef TARGET_CAN_INLINE_P
1664 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1666 #undef TARGET_SET_CURRENT_FUNCTION
1667 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1669 #undef TARGET_LEGITIMATE_CONSTANT_P
1670 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1672 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1673 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1675 #undef TARGET_CAN_USE_DOLOOP_P
1676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1678 #undef TARGET_PREDICT_DOLOOP_P
1679 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1681 #undef TARGET_HAVE_COUNT_REG_DECR_P
1682 #define TARGET_HAVE_COUNT_REG_DECR_P true
1684 /* 1000000000 is infinite cost in IVOPTs. */
1685 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1686 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1688 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1689 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1691 #undef TARGET_PREFERRED_DOLOOP_MODE
1692 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1694 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1695 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1697 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1698 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1699 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1700 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1701 #undef TARGET_UNWIND_WORD_MODE
1702 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1704 #undef TARGET_OFFLOAD_OPTIONS
1705 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1707 #undef TARGET_C_MODE_FOR_SUFFIX
1708 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1710 #undef TARGET_INVALID_BINARY_OP
1711 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1713 #undef TARGET_OPTAB_SUPPORTED_P
1714 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1716 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1717 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1719 #undef TARGET_COMPARE_VERSION_PRIORITY
1720 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1722 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1723 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1724 rs6000_generate_version_dispatcher_body
1726 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1727 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1728 rs6000_get_function_versions_dispatcher
1730 #undef TARGET_OPTION_FUNCTION_VERSIONS
1731 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1733 #undef TARGET_HARD_REGNO_NREGS
1734 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1735 #undef TARGET_HARD_REGNO_MODE_OK
1736 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1738 #undef TARGET_MODES_TIEABLE_P
1739 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1741 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1742 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1743 rs6000_hard_regno_call_part_clobbered
1745 #undef TARGET_SLOW_UNALIGNED_ACCESS
1746 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1748 #undef TARGET_CAN_CHANGE_MODE_CLASS
1749 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1751 #undef TARGET_CONSTANT_ALIGNMENT
1752 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1754 #undef TARGET_STARTING_FRAME_OFFSET
1755 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1757 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1758 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1760 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1761 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1763 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1764 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1765 rs6000_cannot_substitute_mem_equiv_p
1767 #undef TARGET_INVALID_CONVERSION
1768 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1770 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1771 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1773 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1774 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1776 #undef TARGET_CONST_ANCHOR
1777 #define TARGET_CONST_ANCHOR 0x8000
1781 /* Processor table. */
1784 const char *const name
; /* Canonical processor name. */
1785 const enum processor_type processor
; /* Processor type enum value. */
1786 const HOST_WIDE_INT target_enable
; /* Target flags to enable. */
1789 static struct rs6000_ptt
const processor_target_table
[] =
1791 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1792 #include "rs6000-cpus.def"
1796 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1800 rs6000_cpu_name_lookup (const char *name
)
1806 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
1807 if (! strcmp (name
, processor_target_table
[i
].name
))
1815 /* Return number of consecutive hard regs needed starting at reg REGNO
1816 to hold something of mode MODE.
1817 This is ordinarily the length in words of a value of mode MODE
1818 but can be less for certain modes in special long registers.
1820 POWER and PowerPC GPRs hold 32 bits worth;
1821 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1824 rs6000_hard_regno_nregs_internal (int regno
, machine_mode mode
)
1826 unsigned HOST_WIDE_INT reg_size
;
1828 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1829 128-bit floating point that can go in vector registers, which has VSX
1830 memory addressing. */
1831 if (FP_REGNO_P (regno
))
1832 reg_size
= (VECTOR_MEM_VSX_P (mode
) || VECTOR_ALIGNMENT_P (mode
)
1833 ? UNITS_PER_VSX_WORD
1834 : UNITS_PER_FP_WORD
);
1836 else if (ALTIVEC_REGNO_P (regno
))
1837 reg_size
= UNITS_PER_ALTIVEC_WORD
;
1840 reg_size
= UNITS_PER_WORD
;
1842 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
1845 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1848 rs6000_hard_regno_mode_ok_uncached (int regno
, machine_mode mode
)
1850 int last_regno
= regno
+ rs6000_hard_regno_nregs
[mode
][regno
] - 1;
1852 if (COMPLEX_MODE_P (mode
))
1853 mode
= GET_MODE_INNER (mode
);
1855 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1858 return (TARGET_MMA
&& VSX_REGNO_P (regno
) && (regno
& 1) == 0);
1860 /* MMA accumulator modes need FPR registers divisible by 4. */
1862 return (TARGET_MMA
&& FP_REGNO_P (regno
) && (regno
& 3) == 0);
1864 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1865 register combinations, and use PTImode where we need to deal with quad
1866 word memory operations. Don't allow quad words in the argument or frame
1867 pointer registers, just registers 0..31. */
1868 if (mode
== PTImode
)
1869 return (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1870 && IN_RANGE (last_regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1871 && ((regno
& 1) == 0));
1873 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1874 implementations. Don't allow an item to be split between a FP register
1875 and an Altivec register. Allow TImode in all VSX registers if the user
1877 if (TARGET_VSX
&& VSX_REGNO_P (regno
)
1878 && (VECTOR_MEM_VSX_P (mode
)
1879 || VECTOR_ALIGNMENT_P (mode
)
1880 || reg_addr
[mode
].scalar_in_vmx_p
1882 || (TARGET_VADDUQM
&& mode
== V1TImode
)))
1884 if (FP_REGNO_P (regno
))
1885 return FP_REGNO_P (last_regno
);
1887 if (ALTIVEC_REGNO_P (regno
))
1889 if (GET_MODE_SIZE (mode
) < 16 && !reg_addr
[mode
].scalar_in_vmx_p
)
1892 return ALTIVEC_REGNO_P (last_regno
);
1896 /* The GPRs can hold any mode, but values bigger than one register
1897 cannot go past R31. */
1898 if (INT_REGNO_P (regno
))
1899 return INT_REGNO_P (last_regno
);
1901 /* The float registers (except for VSX vector modes) can only hold floating
1902 modes and DImode. */
1903 if (FP_REGNO_P (regno
))
1905 if (VECTOR_ALIGNMENT_P (mode
))
1908 if (SCALAR_FLOAT_MODE_P (mode
)
1909 && (mode
!= TDmode
|| (regno
% 2) == 0)
1910 && FP_REGNO_P (last_regno
))
1913 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1915 if(GET_MODE_SIZE (mode
) == UNITS_PER_FP_WORD
)
1918 if (TARGET_POPCNTD
&& mode
== SImode
)
1921 if (TARGET_P9_VECTOR
&& (mode
== QImode
|| mode
== HImode
))
1928 /* The CR register can only hold CC modes. */
1929 if (CR_REGNO_P (regno
))
1930 return GET_MODE_CLASS (mode
) == MODE_CC
;
1932 if (CA_REGNO_P (regno
))
1933 return mode
== Pmode
|| mode
== SImode
;
1935 /* AltiVec only in AldyVec registers. */
1936 if (ALTIVEC_REGNO_P (regno
))
1937 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
1938 || mode
== V1TImode
);
1940 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1941 and it must be able to fit within the register set. */
1943 return GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
;
1946 /* Implement TARGET_HARD_REGNO_NREGS. */
1949 rs6000_hard_regno_nregs_hook (unsigned int regno
, machine_mode mode
)
1951 return rs6000_hard_regno_nregs
[mode
][regno
];
1954 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1957 rs6000_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
1959 return rs6000_hard_regno_mode_ok_p
[mode
][regno
];
1962 /* Implement TARGET_MODES_TIEABLE_P.
1964 PTImode cannot tie with other modes because PTImode is restricted to even
1965 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1968 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1969 registers) or XOmode (vector quad, restricted to FPR registers divisible
1970 by 4) to tie with other modes.
1972 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1973 128-bit floating point on VSX systems ties with other vectors. */
1976 rs6000_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
1978 if (mode1
== PTImode
|| mode1
== OOmode
|| mode1
== XOmode
1979 || mode2
== PTImode
|| mode2
== OOmode
|| mode2
== XOmode
)
1980 return mode1
== mode2
;
1982 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1
))
1983 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2
);
1984 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2
))
1987 if (SCALAR_FLOAT_MODE_P (mode1
))
1988 return SCALAR_FLOAT_MODE_P (mode2
);
1989 if (SCALAR_FLOAT_MODE_P (mode2
))
1992 if (GET_MODE_CLASS (mode1
) == MODE_CC
)
1993 return GET_MODE_CLASS (mode2
) == MODE_CC
;
1994 if (GET_MODE_CLASS (mode2
) == MODE_CC
)
2000 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2003 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
2008 && GET_MODE_SIZE (mode
) > 4
2009 && INT_REGNO_P (regno
))
2013 && FP_REGNO_P (regno
)
2014 && GET_MODE_SIZE (mode
) > 8
2015 && !FLOAT128_2REG_P (mode
))
2021 /* Print interesting facts about registers. */
2023 rs6000_debug_reg_print (int first_regno
, int last_regno
, const char *reg_name
)
2027 for (r
= first_regno
; r
<= last_regno
; ++r
)
2029 const char *comma
= "";
2032 if (first_regno
== last_regno
)
2033 fprintf (stderr
, "%s:\t", reg_name
);
2035 fprintf (stderr
, "%s%d:\t", reg_name
, r
- first_regno
);
2038 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2039 if (rs6000_hard_regno_mode_ok_p
[m
][r
] && rs6000_hard_regno_nregs
[m
][r
])
2043 fprintf (stderr
, ",\n\t");
2048 if (rs6000_hard_regno_nregs
[m
][r
] > 1)
2049 len
+= fprintf (stderr
, "%s%s/%d", comma
, GET_MODE_NAME (m
),
2050 rs6000_hard_regno_nregs
[m
][r
]);
2052 len
+= fprintf (stderr
, "%s%s", comma
, GET_MODE_NAME (m
));
2057 if (call_used_or_fixed_reg_p (r
))
2061 fprintf (stderr
, ",\n\t");
2066 len
+= fprintf (stderr
, "%s%s", comma
, "call-used");
2074 fprintf (stderr
, ",\n\t");
2079 len
+= fprintf (stderr
, "%s%s", comma
, "fixed");
2085 fprintf (stderr
, ",\n\t");
2089 len
+= fprintf (stderr
, "%sreg-class = %s", comma
,
2090 reg_class_names
[(int)rs6000_regno_regclass
[r
]]);
2095 fprintf (stderr
, ",\n\t");
2099 fprintf (stderr
, "%sregno = %d\n", comma
, r
);
2104 rs6000_debug_vector_unit (enum rs6000_vector v
)
2110 case VECTOR_NONE
: ret
= "none"; break;
2111 case VECTOR_ALTIVEC
: ret
= "altivec"; break;
2112 case VECTOR_VSX
: ret
= "vsx"; break;
2113 case VECTOR_P8_VECTOR
: ret
= "p8_vector"; break;
2114 default: ret
= "unknown"; break;
2120 /* Inner function printing just the address mask for a particular reload
2122 DEBUG_FUNCTION
char *
2123 rs6000_debug_addr_mask (addr_mask_type mask
, bool keep_spaces
)
2128 if ((mask
& RELOAD_REG_VALID
) != 0)
2130 else if (keep_spaces
)
2133 if ((mask
& RELOAD_REG_MULTIPLE
) != 0)
2135 else if (keep_spaces
)
2138 if ((mask
& RELOAD_REG_INDEXED
) != 0)
2140 else if (keep_spaces
)
2143 if ((mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
2145 else if ((mask
& RELOAD_REG_OFFSET
) != 0)
2147 else if (keep_spaces
)
2150 if ((mask
& RELOAD_REG_PRE_INCDEC
) != 0)
2152 else if (keep_spaces
)
2155 if ((mask
& RELOAD_REG_PRE_MODIFY
) != 0)
2157 else if (keep_spaces
)
2160 if ((mask
& RELOAD_REG_AND_M16
) != 0)
2162 else if (keep_spaces
)
2170 /* Print the address masks in a human readble fashion. */
2172 rs6000_debug_print_mode (ssize_t m
)
2177 fprintf (stderr
, "Mode: %-5s", GET_MODE_NAME (m
));
2178 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2179 fprintf (stderr
, " %s: %s", reload_reg_map
[rc
].name
,
2180 rs6000_debug_addr_mask (reg_addr
[m
].addr_mask
[rc
], true));
2182 if ((reg_addr
[m
].reload_store
!= CODE_FOR_nothing
)
2183 || (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
))
2185 fprintf (stderr
, "%*s Reload=%c%c", spaces
, "",
2186 (reg_addr
[m
].reload_store
!= CODE_FOR_nothing
) ? 's' : '*',
2187 (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
) ? 'l' : '*');
2191 spaces
+= strlen (" Reload=sl");
2193 if (reg_addr
[m
].scalar_in_vmx_p
)
2195 fprintf (stderr
, "%*s Upper=y", spaces
, "");
2199 spaces
+= strlen (" Upper=y");
2201 if (rs6000_vector_unit
[m
] != VECTOR_NONE
2202 || rs6000_vector_mem
[m
] != VECTOR_NONE
)
2204 fprintf (stderr
, "%*s vector: arith=%-10s mem=%s",
2206 rs6000_debug_vector_unit (rs6000_vector_unit
[m
]),
2207 rs6000_debug_vector_unit (rs6000_vector_mem
[m
]));
2210 fputs ("\n", stderr
);
2213 #define DEBUG_FMT_ID "%-32s= "
2214 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2215 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2216 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2218 /* Print various interesting information with -mdebug=reg. */
2220 rs6000_debug_reg_global (void)
2222 static const char *const tf
[2] = { "false", "true" };
2223 const char *nl
= (const char *)0;
2226 char costly_num
[20];
2228 char flags_buffer
[40];
2229 const char *costly_str
;
2230 const char *nop_str
;
2231 const char *trace_str
;
2232 const char *abi_str
;
2233 const char *cmodel_str
;
2234 struct cl_target_option cl_opts
;
2236 /* Modes we want tieable information on. */
2237 static const machine_mode print_tieable_modes
[] = {
2276 /* Virtual regs we are interested in. */
2277 const static struct {
2278 int regno
; /* register number. */
2279 const char *name
; /* register name. */
2280 } virtual_regs
[] = {
2281 { STACK_POINTER_REGNUM
, "stack pointer:" },
2282 { TOC_REGNUM
, "toc: " },
2283 { STATIC_CHAIN_REGNUM
, "static chain: " },
2284 { RS6000_PIC_OFFSET_TABLE_REGNUM
, "pic offset: " },
2285 { HARD_FRAME_POINTER_REGNUM
, "hard frame: " },
2286 { ARG_POINTER_REGNUM
, "arg pointer: " },
2287 { FRAME_POINTER_REGNUM
, "frame pointer:" },
2288 { FIRST_PSEUDO_REGISTER
, "first pseudo: " },
2289 { FIRST_VIRTUAL_REGISTER
, "first virtual:" },
2290 { VIRTUAL_INCOMING_ARGS_REGNUM
, "incoming_args:" },
2291 { VIRTUAL_STACK_VARS_REGNUM
, "stack_vars: " },
2292 { VIRTUAL_STACK_DYNAMIC_REGNUM
, "stack_dynamic:" },
2293 { VIRTUAL_OUTGOING_ARGS_REGNUM
, "outgoing_args:" },
2294 { VIRTUAL_CFA_REGNUM
, "cfa (frame): " },
2295 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM
, "stack boundry:" },
2296 { LAST_VIRTUAL_REGISTER
, "last virtual: " },
2299 fputs ("\nHard register information:\n", stderr
);
2300 rs6000_debug_reg_print (FIRST_GPR_REGNO
, LAST_GPR_REGNO
, "gr");
2301 rs6000_debug_reg_print (FIRST_FPR_REGNO
, LAST_FPR_REGNO
, "fp");
2302 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO
,
2305 rs6000_debug_reg_print (LR_REGNO
, LR_REGNO
, "lr");
2306 rs6000_debug_reg_print (CTR_REGNO
, CTR_REGNO
, "ctr");
2307 rs6000_debug_reg_print (CR0_REGNO
, CR7_REGNO
, "cr");
2308 rs6000_debug_reg_print (CA_REGNO
, CA_REGNO
, "ca");
2309 rs6000_debug_reg_print (VRSAVE_REGNO
, VRSAVE_REGNO
, "vrsave");
2310 rs6000_debug_reg_print (VSCR_REGNO
, VSCR_REGNO
, "vscr");
2312 fputs ("\nVirtual/stack/frame registers:\n", stderr
);
2313 for (v
= 0; v
< ARRAY_SIZE (virtual_regs
); v
++)
2314 fprintf (stderr
, "%s regno = %3d\n", virtual_regs
[v
].name
, virtual_regs
[v
].regno
);
2318 "d reg_class = %s\n"
2319 "v reg_class = %s\n"
2320 "wa reg_class = %s\n"
2321 "we reg_class = %s\n"
2322 "wr reg_class = %s\n"
2323 "wx reg_class = %s\n"
2324 "wA reg_class = %s\n"
2326 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_d
]],
2327 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_v
]],
2328 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wa
]],
2329 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_we
]],
2330 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wr
]],
2331 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wx
]],
2332 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wA
]]);
2335 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2336 rs6000_debug_print_mode (m
);
2338 fputs ("\n", stderr
);
2340 for (m1
= 0; m1
< ARRAY_SIZE (print_tieable_modes
); m1
++)
2342 machine_mode mode1
= print_tieable_modes
[m1
];
2343 bool first_time
= true;
2345 nl
= (const char *)0;
2346 for (m2
= 0; m2
< ARRAY_SIZE (print_tieable_modes
); m2
++)
2348 machine_mode mode2
= print_tieable_modes
[m2
];
2349 if (mode1
!= mode2
&& rs6000_modes_tieable_p (mode1
, mode2
))
2353 fprintf (stderr
, "Tieable modes %s:", GET_MODE_NAME (mode1
));
2358 fprintf (stderr
, " %s", GET_MODE_NAME (mode2
));
2363 fputs ("\n", stderr
);
2369 if (rs6000_recip_control
)
2371 fprintf (stderr
, "\nReciprocal mask = 0x%x\n", rs6000_recip_control
);
2373 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2374 if (rs6000_recip_bits
[m
])
2377 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2379 (RS6000_RECIP_AUTO_RE_P (m
)
2381 : (RS6000_RECIP_HAVE_RE_P (m
) ? "have" : "none")),
2382 (RS6000_RECIP_AUTO_RSQRTE_P (m
)
2384 : (RS6000_RECIP_HAVE_RSQRTE_P (m
) ? "have" : "none")));
2387 fputs ("\n", stderr
);
2390 if (rs6000_cpu_index
>= 0)
2392 const char *name
= processor_target_table
[rs6000_cpu_index
].name
;
2394 = processor_target_table
[rs6000_cpu_index
].target_enable
;
2396 sprintf (flags_buffer
, "-mcpu=%s flags", name
);
2397 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2400 fprintf (stderr
, DEBUG_FMT_S
, "cpu", "<none>");
2402 if (rs6000_tune_index
>= 0)
2404 const char *name
= processor_target_table
[rs6000_tune_index
].name
;
2406 = processor_target_table
[rs6000_tune_index
].target_enable
;
2408 sprintf (flags_buffer
, "-mtune=%s flags", name
);
2409 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2412 fprintf (stderr
, DEBUG_FMT_S
, "tune", "<none>");
2414 cl_target_option_save (&cl_opts
, &global_options
, &global_options_set
);
2415 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags",
2418 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags_explicit",
2419 rs6000_isa_flags_explicit
);
2421 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
2423 fprintf (stderr
, DEBUG_FMT_S
, "--with-cpu default",
2424 OPTION_TARGET_CPU_DEFAULT
? OPTION_TARGET_CPU_DEFAULT
: "<none>");
2426 switch (rs6000_sched_costly_dep
)
2428 case max_dep_latency
:
2429 costly_str
= "max_dep_latency";
2433 costly_str
= "no_dep_costly";
2436 case all_deps_costly
:
2437 costly_str
= "all_deps_costly";
2440 case true_store_to_load_dep_costly
:
2441 costly_str
= "true_store_to_load_dep_costly";
2444 case store_to_load_dep_costly
:
2445 costly_str
= "store_to_load_dep_costly";
2449 costly_str
= costly_num
;
2450 sprintf (costly_num
, "%d", (int)rs6000_sched_costly_dep
);
2454 fprintf (stderr
, DEBUG_FMT_S
, "sched_costly_dep", costly_str
);
2456 switch (rs6000_sched_insert_nops
)
2458 case sched_finish_regroup_exact
:
2459 nop_str
= "sched_finish_regroup_exact";
2462 case sched_finish_pad_groups
:
2463 nop_str
= "sched_finish_pad_groups";
2466 case sched_finish_none
:
2467 nop_str
= "sched_finish_none";
2472 sprintf (nop_num
, "%d", (int)rs6000_sched_insert_nops
);
2476 fprintf (stderr
, DEBUG_FMT_S
, "sched_insert_nops", nop_str
);
2478 switch (rs6000_sdata
)
2485 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "data");
2489 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "sysv");
2493 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "eabi");
2498 switch (rs6000_traceback
)
2500 case traceback_default
: trace_str
= "default"; break;
2501 case traceback_none
: trace_str
= "none"; break;
2502 case traceback_part
: trace_str
= "part"; break;
2503 case traceback_full
: trace_str
= "full"; break;
2504 default: trace_str
= "unknown"; break;
2507 fprintf (stderr
, DEBUG_FMT_S
, "traceback", trace_str
);
2509 switch (rs6000_current_cmodel
)
2511 case CMODEL_SMALL
: cmodel_str
= "small"; break;
2512 case CMODEL_MEDIUM
: cmodel_str
= "medium"; break;
2513 case CMODEL_LARGE
: cmodel_str
= "large"; break;
2514 default: cmodel_str
= "unknown"; break;
2517 fprintf (stderr
, DEBUG_FMT_S
, "cmodel", cmodel_str
);
2519 switch (rs6000_current_abi
)
2521 case ABI_NONE
: abi_str
= "none"; break;
2522 case ABI_AIX
: abi_str
= "aix"; break;
2523 case ABI_ELFv2
: abi_str
= "ELFv2"; break;
2524 case ABI_V4
: abi_str
= "V4"; break;
2525 case ABI_DARWIN
: abi_str
= "darwin"; break;
2526 default: abi_str
= "unknown"; break;
2529 fprintf (stderr
, DEBUG_FMT_S
, "abi", abi_str
);
2531 if (rs6000_altivec_abi
)
2532 fprintf (stderr
, DEBUG_FMT_S
, "altivec_abi", "true");
2534 if (rs6000_aix_extabi
)
2535 fprintf (stderr
, DEBUG_FMT_S
, "AIX vec-extabi", "true");
2537 if (rs6000_darwin64_abi
)
2538 fprintf (stderr
, DEBUG_FMT_S
, "darwin64_abi", "true");
2540 fprintf (stderr
, DEBUG_FMT_S
, "soft_float",
2541 (TARGET_SOFT_FLOAT
? "true" : "false"));
2543 if (TARGET_LINK_STACK
)
2544 fprintf (stderr
, DEBUG_FMT_S
, "link_stack", "true");
2546 if (TARGET_P8_FUSION
)
2550 strcpy (options
, "power8");
2551 if (TARGET_P8_FUSION_SIGN
)
2552 strcat (options
, ", sign");
2554 fprintf (stderr
, DEBUG_FMT_S
, "fusion", options
);
2557 fprintf (stderr
, DEBUG_FMT_S
, "plt-format",
2558 TARGET_SECURE_PLT
? "secure" : "bss");
2559 fprintf (stderr
, DEBUG_FMT_S
, "struct-return",
2560 aix_struct_return
? "aix" : "sysv");
2561 fprintf (stderr
, DEBUG_FMT_S
, "always_hint", tf
[!!rs6000_always_hint
]);
2562 fprintf (stderr
, DEBUG_FMT_S
, "sched_groups", tf
[!!rs6000_sched_groups
]);
2563 fprintf (stderr
, DEBUG_FMT_S
, "align_branch",
2564 tf
[!!rs6000_align_branch_targets
]);
2565 fprintf (stderr
, DEBUG_FMT_D
, "tls_size", rs6000_tls_size
);
2566 fprintf (stderr
, DEBUG_FMT_D
, "long_double_size",
2567 rs6000_long_double_type_size
);
2568 if (rs6000_long_double_type_size
> 64)
2570 fprintf (stderr
, DEBUG_FMT_S
, "long double type",
2571 TARGET_IEEEQUAD
? "IEEE" : "IBM");
2572 fprintf (stderr
, DEBUG_FMT_S
, "default long double type",
2573 TARGET_IEEEQUAD_DEFAULT
? "IEEE" : "IBM");
2575 fprintf (stderr
, DEBUG_FMT_D
, "sched_restricted_insns_priority",
2576 (int)rs6000_sched_restricted_insns_priority
);
2577 fprintf (stderr
, DEBUG_FMT_D
, "Number of standard builtins",
2580 fprintf (stderr
, DEBUG_FMT_D
, "Enable float128 on VSX",
2581 (int)TARGET_FLOAT128_ENABLE_TYPE
);
2584 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit scalar element",
2585 (int)VECTOR_ELEMENT_SCALAR_64BIT
);
2587 if (TARGET_DIRECT_MOVE_128
)
2588 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit mfvsrld element",
2589 (int)VECTOR_ELEMENT_MFVSRLD_64BIT
);
2593 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2594 legitimate address support to figure out the appropriate addressing to
2598 rs6000_setup_reg_addr_masks (void)
2600 ssize_t rc
, reg
, m
, nregs
;
2601 addr_mask_type any_addr_mask
, addr_mask
;
2603 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2605 machine_mode m2
= (machine_mode
) m
;
2606 bool complex_p
= false;
2607 bool small_int_p
= (m2
== QImode
|| m2
== HImode
|| m2
== SImode
);
2610 if (COMPLEX_MODE_P (m2
))
2613 m2
= GET_MODE_INNER (m2
);
2616 msize
= GET_MODE_SIZE (m2
);
2618 /* SDmode is special in that we want to access it only via REG+REG
2619 addressing on power7 and above, since we want to use the LFIWZX and
2620 STFIWZX instructions to load it. */
2621 bool indexed_only_p
= (m
== SDmode
&& TARGET_NO_SDMODE_STACK
);
2624 for (rc
= FIRST_RELOAD_REG_CLASS
; rc
<= LAST_RELOAD_REG_CLASS
; rc
++)
2627 reg
= reload_reg_map
[rc
].reg
;
2629 /* Can mode values go in the GPR/FPR/Altivec registers? */
2630 if (reg
>= 0 && rs6000_hard_regno_mode_ok_p
[m
][reg
])
2632 bool small_int_vsx_p
= (small_int_p
2633 && (rc
== RELOAD_REG_FPR
2634 || rc
== RELOAD_REG_VMX
));
2636 nregs
= rs6000_hard_regno_nregs
[m
][reg
];
2637 addr_mask
|= RELOAD_REG_VALID
;
2639 /* Indicate if the mode takes more than 1 physical register. If
2640 it takes a single register, indicate it can do REG+REG
2641 addressing. Small integers in VSX registers can only do
2642 REG+REG addressing. */
2643 if (small_int_vsx_p
)
2644 addr_mask
|= RELOAD_REG_INDEXED
;
2645 else if (nregs
> 1 || m
== BLKmode
|| complex_p
)
2646 addr_mask
|= RELOAD_REG_MULTIPLE
;
2648 addr_mask
|= RELOAD_REG_INDEXED
;
2650 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2651 addressing. If we allow scalars into Altivec registers,
2652 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2654 For VSX systems, we don't allow update addressing for
2655 DFmode/SFmode if those registers can go in both the
2656 traditional floating point registers and Altivec registers.
2657 The load/store instructions for the Altivec registers do not
2658 have update forms. If we allowed update addressing, it seems
2659 to break IV-OPT code using floating point if the index type is
2660 int instead of long (PR target/81550 and target/84042). */
2663 && (rc
== RELOAD_REG_GPR
|| rc
== RELOAD_REG_FPR
)
2665 && !VECTOR_MODE_P (m2
)
2666 && !VECTOR_ALIGNMENT_P (m2
)
2668 && (m
!= E_DFmode
|| !TARGET_VSX
)
2669 && (m
!= E_SFmode
|| !TARGET_P8_VECTOR
)
2670 && !small_int_vsx_p
)
2672 addr_mask
|= RELOAD_REG_PRE_INCDEC
;
2674 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2675 we don't allow PRE_MODIFY for some multi-register
2680 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2684 if (TARGET_POWERPC64
)
2685 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2690 if (TARGET_HARD_FLOAT
)
2691 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2697 /* GPR and FPR registers can do REG+OFFSET addressing, except
2698 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2699 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2700 if ((addr_mask
!= 0) && !indexed_only_p
2702 && (rc
== RELOAD_REG_GPR
2703 || ((msize
== 8 || m2
== SFmode
)
2704 && (rc
== RELOAD_REG_FPR
2705 || (rc
== RELOAD_REG_VMX
&& TARGET_P9_VECTOR
)))))
2706 addr_mask
|= RELOAD_REG_OFFSET
;
2708 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2709 instructions are enabled. The offset for 128-bit VSX registers is
2710 only 12-bits. While GPRs can handle the full offset range, VSX
2711 registers can only handle the restricted range. */
2712 else if ((addr_mask
!= 0) && !indexed_only_p
2713 && msize
== 16 && TARGET_P9_VECTOR
2714 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2
)
2715 || (m2
== TImode
&& TARGET_VSX
)))
2717 addr_mask
|= RELOAD_REG_OFFSET
;
2718 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2719 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2722 /* Vector pairs can do both indexed and offset loads if the
2723 instructions are enabled, otherwise they can only do offset loads
2724 since it will be broken into two vector moves. Vector quads can
2725 only do offset loads. */
2726 else if ((addr_mask
!= 0) && TARGET_MMA
2727 && (m2
== OOmode
|| m2
== XOmode
))
2729 addr_mask
|= RELOAD_REG_OFFSET
;
2730 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2732 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2734 addr_mask
|= RELOAD_REG_INDEXED
;
2738 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2739 addressing on 128-bit types. */
2740 if (rc
== RELOAD_REG_VMX
&& msize
== 16
2741 && (addr_mask
& RELOAD_REG_VALID
) != 0)
2742 addr_mask
|= RELOAD_REG_AND_M16
;
2744 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
2745 any_addr_mask
|= addr_mask
;
2748 reg_addr
[m
].addr_mask
[RELOAD_REG_ANY
] = any_addr_mask
;
2753 /* Initialize the various global tables that are based on register size. */
2755 rs6000_init_hard_regno_mode_ok (bool global_init_p
)
2761 /* Precalculate REGNO_REG_CLASS. */
2762 rs6000_regno_regclass
[0] = GENERAL_REGS
;
2763 for (r
= 1; r
< 32; ++r
)
2764 rs6000_regno_regclass
[r
] = BASE_REGS
;
2766 for (r
= 32; r
< 64; ++r
)
2767 rs6000_regno_regclass
[r
] = FLOAT_REGS
;
2769 for (r
= 64; HARD_REGISTER_NUM_P (r
); ++r
)
2770 rs6000_regno_regclass
[r
] = NO_REGS
;
2772 for (r
= FIRST_ALTIVEC_REGNO
; r
<= LAST_ALTIVEC_REGNO
; ++r
)
2773 rs6000_regno_regclass
[r
] = ALTIVEC_REGS
;
2775 rs6000_regno_regclass
[CR0_REGNO
] = CR0_REGS
;
2776 for (r
= CR1_REGNO
; r
<= CR7_REGNO
; ++r
)
2777 rs6000_regno_regclass
[r
] = CR_REGS
;
2779 rs6000_regno_regclass
[LR_REGNO
] = LINK_REGS
;
2780 rs6000_regno_regclass
[CTR_REGNO
] = CTR_REGS
;
2781 rs6000_regno_regclass
[CA_REGNO
] = NO_REGS
;
2782 rs6000_regno_regclass
[VRSAVE_REGNO
] = VRSAVE_REGS
;
2783 rs6000_regno_regclass
[VSCR_REGNO
] = VRSAVE_REGS
;
2784 rs6000_regno_regclass
[ARG_POINTER_REGNUM
] = BASE_REGS
;
2785 rs6000_regno_regclass
[FRAME_POINTER_REGNUM
] = BASE_REGS
;
2787 /* Precalculate register class to simpler reload register class. We don't
2788 need all of the register classes that are combinations of different
2789 classes, just the simple ones that have constraint letters. */
2790 for (c
= 0; c
< N_REG_CLASSES
; c
++)
2791 reg_class_to_reg_type
[c
] = NO_REG_TYPE
;
2793 reg_class_to_reg_type
[(int)GENERAL_REGS
] = GPR_REG_TYPE
;
2794 reg_class_to_reg_type
[(int)BASE_REGS
] = GPR_REG_TYPE
;
2795 reg_class_to_reg_type
[(int)VSX_REGS
] = VSX_REG_TYPE
;
2796 reg_class_to_reg_type
[(int)VRSAVE_REGS
] = SPR_REG_TYPE
;
2797 reg_class_to_reg_type
[(int)VSCR_REGS
] = SPR_REG_TYPE
;
2798 reg_class_to_reg_type
[(int)LINK_REGS
] = SPR_REG_TYPE
;
2799 reg_class_to_reg_type
[(int)CTR_REGS
] = SPR_REG_TYPE
;
2800 reg_class_to_reg_type
[(int)LINK_OR_CTR_REGS
] = SPR_REG_TYPE
;
2801 reg_class_to_reg_type
[(int)CR_REGS
] = CR_REG_TYPE
;
2802 reg_class_to_reg_type
[(int)CR0_REGS
] = CR_REG_TYPE
;
2806 reg_class_to_reg_type
[(int)FLOAT_REGS
] = VSX_REG_TYPE
;
2807 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = VSX_REG_TYPE
;
2811 reg_class_to_reg_type
[(int)FLOAT_REGS
] = FPR_REG_TYPE
;
2812 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = ALTIVEC_REG_TYPE
;
2815 /* Precalculate the valid memory formats as well as the vector information,
2816 this must be set up before the rs6000_hard_regno_nregs_internal calls
2818 gcc_assert ((int)VECTOR_NONE
== 0);
2819 memset ((void *) &rs6000_vector_unit
[0], '\0', sizeof (rs6000_vector_unit
));
2820 memset ((void *) &rs6000_vector_mem
[0], '\0', sizeof (rs6000_vector_mem
));
2822 gcc_assert ((int)CODE_FOR_nothing
== 0);
2823 memset ((void *) ®_addr
[0], '\0', sizeof (reg_addr
));
2825 gcc_assert ((int)NO_REGS
== 0);
2826 memset ((void *) &rs6000_constraints
[0], '\0', sizeof (rs6000_constraints
));
2828 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2829 believes it can use native alignment or still uses 128-bit alignment. */
2830 if (TARGET_VSX
&& !TARGET_VSX_ALIGN_128
)
2841 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2842 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2843 if (TARGET_FLOAT128_TYPE
)
2845 rs6000_vector_mem
[KFmode
] = VECTOR_VSX
;
2846 rs6000_vector_align
[KFmode
] = 128;
2848 if (FLOAT128_IEEE_P (TFmode
))
2850 rs6000_vector_mem
[TFmode
] = VECTOR_VSX
;
2851 rs6000_vector_align
[TFmode
] = 128;
2855 /* V2DF mode, VSX only. */
2858 rs6000_vector_unit
[V2DFmode
] = VECTOR_VSX
;
2859 rs6000_vector_mem
[V2DFmode
] = VECTOR_VSX
;
2860 rs6000_vector_align
[V2DFmode
] = align64
;
2863 /* V4SF mode, either VSX or Altivec. */
2866 rs6000_vector_unit
[V4SFmode
] = VECTOR_VSX
;
2867 rs6000_vector_mem
[V4SFmode
] = VECTOR_VSX
;
2868 rs6000_vector_align
[V4SFmode
] = align32
;
2870 else if (TARGET_ALTIVEC
)
2872 rs6000_vector_unit
[V4SFmode
] = VECTOR_ALTIVEC
;
2873 rs6000_vector_mem
[V4SFmode
] = VECTOR_ALTIVEC
;
2874 rs6000_vector_align
[V4SFmode
] = align32
;
2877 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2881 rs6000_vector_unit
[V4SImode
] = VECTOR_ALTIVEC
;
2882 rs6000_vector_unit
[V8HImode
] = VECTOR_ALTIVEC
;
2883 rs6000_vector_unit
[V16QImode
] = VECTOR_ALTIVEC
;
2884 rs6000_vector_align
[V4SImode
] = align32
;
2885 rs6000_vector_align
[V8HImode
] = align32
;
2886 rs6000_vector_align
[V16QImode
] = align32
;
2890 rs6000_vector_mem
[V4SImode
] = VECTOR_VSX
;
2891 rs6000_vector_mem
[V8HImode
] = VECTOR_VSX
;
2892 rs6000_vector_mem
[V16QImode
] = VECTOR_VSX
;
2896 rs6000_vector_mem
[V4SImode
] = VECTOR_ALTIVEC
;
2897 rs6000_vector_mem
[V8HImode
] = VECTOR_ALTIVEC
;
2898 rs6000_vector_mem
[V16QImode
] = VECTOR_ALTIVEC
;
2902 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2903 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2906 rs6000_vector_mem
[V2DImode
] = VECTOR_VSX
;
2907 rs6000_vector_unit
[V2DImode
]
2908 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2909 rs6000_vector_align
[V2DImode
] = align64
;
2911 rs6000_vector_mem
[V1TImode
] = VECTOR_VSX
;
2912 rs6000_vector_unit
[V1TImode
]
2913 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2914 rs6000_vector_align
[V1TImode
] = 128;
2917 /* DFmode, see if we want to use the VSX unit. Memory is handled
2918 differently, so don't set rs6000_vector_mem. */
2921 rs6000_vector_unit
[DFmode
] = VECTOR_VSX
;
2922 rs6000_vector_align
[DFmode
] = 64;
2925 /* SFmode, see if we want to use the VSX unit. */
2926 if (TARGET_P8_VECTOR
)
2928 rs6000_vector_unit
[SFmode
] = VECTOR_VSX
;
2929 rs6000_vector_align
[SFmode
] = 32;
2932 /* Allow TImode in VSX register and set the VSX memory macros. */
2935 rs6000_vector_mem
[TImode
] = VECTOR_VSX
;
2936 rs6000_vector_align
[TImode
] = align64
;
2939 /* Add support for vector pairs and vector quad registers. */
2942 rs6000_vector_unit
[OOmode
] = VECTOR_NONE
;
2943 rs6000_vector_mem
[OOmode
] = VECTOR_VSX
;
2944 rs6000_vector_align
[OOmode
] = 256;
2946 rs6000_vector_unit
[XOmode
] = VECTOR_NONE
;
2947 rs6000_vector_mem
[XOmode
] = VECTOR_VSX
;
2948 rs6000_vector_align
[XOmode
] = 512;
2951 /* Register class constraints for the constraints that depend on compile
2952 switches. When the VSX code was added, different constraints were added
2953 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2954 of the VSX registers are used. The register classes for scalar floating
2955 point types is set, based on whether we allow that type into the upper
2956 (Altivec) registers. GCC has register classes to target the Altivec
2957 registers for load/store operations, to select using a VSX memory
2958 operation instead of the traditional floating point operation. The
2961 d - Register class to use with traditional DFmode instructions.
2962 v - Altivec register.
2963 wa - Any VSX register.
2964 wc - Reserved to represent individual CR bits (used in LLVM).
2965 wn - always NO_REGS.
2966 wr - GPR if 64-bit mode is permitted.
2967 wx - Float register if we can do 32-bit int stores. */
2969 if (TARGET_HARD_FLOAT
)
2970 rs6000_constraints
[RS6000_CONSTRAINT_d
] = FLOAT_REGS
;
2972 rs6000_constraints
[RS6000_CONSTRAINT_v
] = ALTIVEC_REGS
;
2974 rs6000_constraints
[RS6000_CONSTRAINT_wa
] = VSX_REGS
;
2976 if (TARGET_POWERPC64
)
2978 rs6000_constraints
[RS6000_CONSTRAINT_wr
] = GENERAL_REGS
;
2979 rs6000_constraints
[RS6000_CONSTRAINT_wA
] = BASE_REGS
;
2983 rs6000_constraints
[RS6000_CONSTRAINT_wx
] = FLOAT_REGS
; /* DImode */
2985 /* Support for new direct moves (ISA 3.0 + 64bit). */
2986 if (TARGET_DIRECT_MOVE_128
)
2987 rs6000_constraints
[RS6000_CONSTRAINT_we
] = VSX_REGS
;
2989 /* Set up the reload helper and direct move functions. */
2990 if (TARGET_VSX
|| TARGET_ALTIVEC
)
2994 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_di_store
;
2995 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_di_load
;
2996 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_di_store
;
2997 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_di_load
;
2998 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_di_store
;
2999 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_di_load
;
3000 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_di_store
;
3001 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_di_load
;
3002 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_di_store
;
3003 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_di_load
;
3004 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_di_store
;
3005 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_di_load
;
3006 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_di_store
;
3007 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_di_load
;
3008 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_di_store
;
3009 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_di_load
;
3010 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_di_store
;
3011 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_di_load
;
3012 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_di_store
;
3013 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_di_load
;
3015 if (FLOAT128_VECTOR_P (KFmode
))
3017 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_di_store
;
3018 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_di_load
;
3021 if (FLOAT128_VECTOR_P (TFmode
))
3023 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_di_store
;
3024 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_di_load
;
3027 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3029 if (TARGET_NO_SDMODE_STACK
)
3031 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_di_store
;
3032 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_di_load
;
3037 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_di_store
;
3038 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_di_load
;
3041 if (TARGET_DIRECT_MOVE
&& !TARGET_DIRECT_MOVE_128
)
3043 reg_addr
[TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxti
;
3044 reg_addr
[V1TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv1ti
;
3045 reg_addr
[V2DFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2df
;
3046 reg_addr
[V2DImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2di
;
3047 reg_addr
[V4SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4sf
;
3048 reg_addr
[V4SImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4si
;
3049 reg_addr
[V8HImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv8hi
;
3050 reg_addr
[V16QImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv16qi
;
3051 reg_addr
[SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxsf
;
3053 reg_addr
[TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprti
;
3054 reg_addr
[V1TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv1ti
;
3055 reg_addr
[V2DFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2df
;
3056 reg_addr
[V2DImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2di
;
3057 reg_addr
[V4SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4sf
;
3058 reg_addr
[V4SImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4si
;
3059 reg_addr
[V8HImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv8hi
;
3060 reg_addr
[V16QImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv16qi
;
3061 reg_addr
[SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprsf
;
3063 if (FLOAT128_VECTOR_P (KFmode
))
3065 reg_addr
[KFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxkf
;
3066 reg_addr
[KFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprkf
;
3069 if (FLOAT128_VECTOR_P (TFmode
))
3071 reg_addr
[TFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxtf
;
3072 reg_addr
[TFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprtf
;
3077 reg_addr
[OOmode
].reload_store
= CODE_FOR_reload_oo_di_store
;
3078 reg_addr
[OOmode
].reload_load
= CODE_FOR_reload_oo_di_load
;
3079 reg_addr
[XOmode
].reload_store
= CODE_FOR_reload_xo_di_store
;
3080 reg_addr
[XOmode
].reload_load
= CODE_FOR_reload_xo_di_load
;
3086 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_si_store
;
3087 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_si_load
;
3088 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_si_store
;
3089 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_si_load
;
3090 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_si_store
;
3091 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_si_load
;
3092 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_si_store
;
3093 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_si_load
;
3094 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_si_store
;
3095 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_si_load
;
3096 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_si_store
;
3097 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_si_load
;
3098 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_si_store
;
3099 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_si_load
;
3100 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_si_store
;
3101 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_si_load
;
3102 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_si_store
;
3103 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_si_load
;
3104 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_si_store
;
3105 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_si_load
;
3107 if (FLOAT128_VECTOR_P (KFmode
))
3109 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_si_store
;
3110 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_si_load
;
3113 if (FLOAT128_IEEE_P (TFmode
))
3115 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_si_store
;
3116 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_si_load
;
3119 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3121 if (TARGET_NO_SDMODE_STACK
)
3123 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_si_store
;
3124 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_si_load
;
3129 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_si_store
;
3130 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_si_load
;
3133 if (TARGET_DIRECT_MOVE
)
3135 reg_addr
[DImode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdi
;
3136 reg_addr
[DDmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdd
;
3137 reg_addr
[DFmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdf
;
3141 reg_addr
[DFmode
].scalar_in_vmx_p
= true;
3142 reg_addr
[DImode
].scalar_in_vmx_p
= true;
3144 if (TARGET_P8_VECTOR
)
3146 reg_addr
[SFmode
].scalar_in_vmx_p
= true;
3147 reg_addr
[SImode
].scalar_in_vmx_p
= true;
3149 if (TARGET_P9_VECTOR
)
3151 reg_addr
[HImode
].scalar_in_vmx_p
= true;
3152 reg_addr
[QImode
].scalar_in_vmx_p
= true;
3157 /* Precalculate HARD_REGNO_NREGS. */
3158 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3159 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3160 rs6000_hard_regno_nregs
[m
][r
]
3161 = rs6000_hard_regno_nregs_internal (r
, (machine_mode
) m
);
3163 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3164 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3165 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3166 rs6000_hard_regno_mode_ok_p
[m
][r
]
3167 = rs6000_hard_regno_mode_ok_uncached (r
, (machine_mode
) m
);
3169 /* Precalculate CLASS_MAX_NREGS sizes. */
3170 for (c
= 0; c
< LIM_REG_CLASSES
; ++c
)
3174 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
))
3175 reg_size
= UNITS_PER_VSX_WORD
;
3177 else if (c
== ALTIVEC_REGS
)
3178 reg_size
= UNITS_PER_ALTIVEC_WORD
;
3180 else if (c
== FLOAT_REGS
)
3181 reg_size
= UNITS_PER_FP_WORD
;
3184 reg_size
= UNITS_PER_WORD
;
3186 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3188 machine_mode m2
= (machine_mode
)m
;
3189 int reg_size2
= reg_size
;
3191 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3193 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
) && FLOAT128_2REG_P (m
))
3194 reg_size2
= UNITS_PER_FP_WORD
;
3196 rs6000_class_max_nregs
[m
][c
]
3197 = (GET_MODE_SIZE (m2
) + reg_size2
- 1) / reg_size2
;
3201 /* Calculate which modes to automatically generate code to use a the
3202 reciprocal divide and square root instructions. In the future, possibly
3203 automatically generate the instructions even if the user did not specify
3204 -mrecip. The older machines double precision reciprocal sqrt estimate is
3205 not accurate enough. */
3206 memset (rs6000_recip_bits
, 0, sizeof (rs6000_recip_bits
));
3208 rs6000_recip_bits
[SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3210 rs6000_recip_bits
[DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3211 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3212 rs6000_recip_bits
[V4SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3213 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3214 rs6000_recip_bits
[V2DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3216 if (TARGET_FRSQRTES
)
3217 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3219 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3220 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3221 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3222 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3223 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3225 if (rs6000_recip_control
)
3227 if (!flag_finite_math_only
)
3228 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3230 if (flag_trapping_math
)
3231 warning (0, "%qs requires %qs or %qs", "-mrecip",
3232 "-fno-trapping-math", "-ffast-math");
3233 if (!flag_reciprocal_math
)
3234 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3236 if (flag_finite_math_only
&& !flag_trapping_math
&& flag_reciprocal_math
)
3238 if (RS6000_RECIP_HAVE_RE_P (SFmode
)
3239 && (rs6000_recip_control
& RECIP_SF_DIV
) != 0)
3240 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3242 if (RS6000_RECIP_HAVE_RE_P (DFmode
)
3243 && (rs6000_recip_control
& RECIP_DF_DIV
) != 0)
3244 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3246 if (RS6000_RECIP_HAVE_RE_P (V4SFmode
)
3247 && (rs6000_recip_control
& RECIP_V4SF_DIV
) != 0)
3248 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3250 if (RS6000_RECIP_HAVE_RE_P (V2DFmode
)
3251 && (rs6000_recip_control
& RECIP_V2DF_DIV
) != 0)
3252 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3254 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode
)
3255 && (rs6000_recip_control
& RECIP_SF_RSQRT
) != 0)
3256 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3258 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode
)
3259 && (rs6000_recip_control
& RECIP_DF_RSQRT
) != 0)
3260 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3262 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode
)
3263 && (rs6000_recip_control
& RECIP_V4SF_RSQRT
) != 0)
3264 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3266 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode
)
3267 && (rs6000_recip_control
& RECIP_V2DF_RSQRT
) != 0)
3268 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3272 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3273 legitimate address support to figure out the appropriate addressing to
3275 rs6000_setup_reg_addr_masks ();
3277 if (global_init_p
|| TARGET_DEBUG_TARGET
)
3279 if (TARGET_DEBUG_REG
)
3280 rs6000_debug_reg_global ();
3282 if (TARGET_DEBUG_COST
|| TARGET_DEBUG_REG
)
3284 "SImode variable mult cost = %d\n"
3285 "SImode constant mult cost = %d\n"
3286 "SImode short constant mult cost = %d\n"
3287 "DImode multipliciation cost = %d\n"
3288 "SImode division cost = %d\n"
3289 "DImode division cost = %d\n"
3290 "Simple fp operation cost = %d\n"
3291 "DFmode multiplication cost = %d\n"
3292 "SFmode division cost = %d\n"
3293 "DFmode division cost = %d\n"
3294 "cache line size = %d\n"
3295 "l1 cache size = %d\n"
3296 "l2 cache size = %d\n"
3297 "simultaneous prefetches = %d\n"
3300 rs6000_cost
->mulsi_const
,
3301 rs6000_cost
->mulsi_const9
,
3309 rs6000_cost
->cache_line_size
,
3310 rs6000_cost
->l1_cache_size
,
3311 rs6000_cost
->l2_cache_size
,
3312 rs6000_cost
->simultaneous_prefetches
);
3317 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3320 darwin_rs6000_override_options (void)
3322 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3324 rs6000_altivec_abi
= 1;
3325 TARGET_ALTIVEC_VRSAVE
= 1;
3326 rs6000_current_abi
= ABI_DARWIN
;
3328 if (DEFAULT_ABI
== ABI_DARWIN
3330 darwin_one_byte_bool
= 1;
3332 if (TARGET_64BIT
&& ! TARGET_POWERPC64
)
3334 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3335 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3338 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3339 optimisation, and will not work with the most generic case (where the
3340 symbol is undefined external, but there is no symbl stub). */
3342 rs6000_default_long_calls
= 0;
3344 /* ld_classic is (so far) still used for kernel (static) code, and supports
3345 the JBSR longcall / branch islands. */
3348 rs6000_default_long_calls
= 1;
3350 /* Allow a kext author to do -mkernel -mhard-float. */
3351 if (! (rs6000_isa_flags_explicit
& OPTION_MASK_SOFT_FLOAT
))
3352 rs6000_isa_flags
|= OPTION_MASK_SOFT_FLOAT
;
3355 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3357 if (!flag_mkernel
&& !flag_apple_kext
3359 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3360 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3362 /* Unless the user (not the configurer) has explicitly overridden
3363 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3364 G4 unless targeting the kernel. */
3367 && strverscmp (darwin_macosx_version_min
, "10.5") >= 0
3368 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
)
3369 && ! OPTION_SET_P (rs6000_cpu_index
))
3371 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3376 /* If not otherwise specified by a target, make 'long double' equivalent to
3379 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3380 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3383 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3384 to clobber the XER[CA] bit because clobbering that bit without telling
3385 the compiler worked just fine with versions of GCC before GCC 5, and
3386 breaking a lot of older code in ways that are hard to track down is
3387 not such a great idea. */
3390 rs6000_md_asm_adjust (vec
<rtx
> & /*outputs*/, vec
<rtx
> & /*inputs*/,
3391 vec
<machine_mode
> & /*input_modes*/,
3392 vec
<const char *> & /*constraints*/,
3393 vec
<rtx
> &/*uses*/, vec
<rtx
> &clobbers
,
3394 HARD_REG_SET
&clobbered_regs
, location_t
/*loc*/)
3396 clobbers
.safe_push (gen_rtx_REG (SImode
, CA_REGNO
));
3397 SET_HARD_REG_BIT (clobbered_regs
, CA_REGNO
);
3401 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3402 but is called when the optimize level is changed via an attribute or
3403 pragma or when it is reset at the end of the code affected by the
3404 attribute or pragma. It is not called at the beginning of compilation
3405 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3406 actions then, you should have TARGET_OPTION_OVERRIDE call
3407 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3410 rs6000_override_options_after_change (void)
3412 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3413 turns -frename-registers on. */
3414 if ((OPTION_SET_P (flag_unroll_loops
) && flag_unroll_loops
)
3415 || (OPTION_SET_P (flag_unroll_all_loops
)
3416 && flag_unroll_all_loops
))
3418 if (!OPTION_SET_P (unroll_only_small_loops
))
3419 unroll_only_small_loops
= 0;
3420 if (!OPTION_SET_P (flag_rename_registers
))
3421 flag_rename_registers
= 1;
3422 if (!OPTION_SET_P (flag_cunroll_grow_size
))
3423 flag_cunroll_grow_size
= 1;
3425 else if (!OPTION_SET_P (flag_cunroll_grow_size
))
3426 flag_cunroll_grow_size
= flag_peel_loops
|| optimize
>= 3;
3428 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3429 if (rs6000_rop_protect
)
3430 flag_shrink_wrap
= 0;
3433 #ifdef TARGET_USES_LINUX64_OPT
3435 rs6000_linux64_override_options ()
3437 if (!OPTION_SET_P (rs6000_alignment_flags
))
3438 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
3439 if (rs6000_isa_flags
& OPTION_MASK_64BIT
)
3441 if (DEFAULT_ABI
!= ABI_AIX
)
3443 rs6000_current_abi
= ABI_AIX
;
3444 error (INVALID_64BIT
, "call");
3446 dot_symbols
= !strcmp (rs6000_abi_name
, "aixdesc");
3447 if (ELFv2_ABI_CHECK
)
3449 rs6000_current_abi
= ABI_ELFv2
;
3451 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3453 if (rs6000_isa_flags
& OPTION_MASK_RELOCATABLE
)
3455 rs6000_isa_flags
&= ~OPTION_MASK_RELOCATABLE
;
3456 error (INVALID_64BIT
, "relocatable");
3458 if (rs6000_isa_flags
& OPTION_MASK_EABI
)
3460 rs6000_isa_flags
&= ~OPTION_MASK_EABI
;
3461 error (INVALID_64BIT
, "eabi");
3463 if (TARGET_PROTOTYPE
)
3465 target_prototype
= 0;
3466 error (INVALID_64BIT
, "prototype");
3468 if ((rs6000_isa_flags
& OPTION_MASK_POWERPC64
) == 0)
3470 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3471 error ("%<-m64%> requires a PowerPC64 cpu");
3473 if (!OPTION_SET_P (rs6000_current_cmodel
))
3474 SET_CMODEL (CMODEL_MEDIUM
);
3475 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MINIMAL_TOC
) != 0)
3477 if (OPTION_SET_P (rs6000_current_cmodel
)
3478 && rs6000_current_cmodel
!= CMODEL_SMALL
)
3479 error ("%<-mcmodel%> incompatible with other toc options");
3480 if (TARGET_MINIMAL_TOC
)
3481 SET_CMODEL (CMODEL_SMALL
);
3482 else if (TARGET_PCREL
3483 || (PCREL_SUPPORTED_BY_OS
3484 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0))
3485 /* Ignore -mno-minimal-toc. */
3488 SET_CMODEL (CMODEL_SMALL
);
3490 if (rs6000_current_cmodel
!= CMODEL_SMALL
)
3492 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
3493 TARGET_NO_FP_IN_TOC
= rs6000_current_cmodel
== CMODEL_MEDIUM
;
3494 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC
))
3495 TARGET_NO_SUM_IN_TOC
= 0;
3497 if (TARGET_PLTSEQ
&& DEFAULT_ABI
!= ABI_ELFv2
)
3499 if (OPTION_SET_P (rs6000_pltseq
))
3500 warning (0, "%qs unsupported for this ABI",
3502 rs6000_pltseq
= false;
3505 else if (TARGET_64BIT
)
3506 error (INVALID_32BIT
, "32");
3509 if (TARGET_PROFILE_KERNEL
)
3512 error (INVALID_32BIT
, "profile-kernel");
3514 if (OPTION_SET_P (rs6000_current_cmodel
))
3516 SET_CMODEL (CMODEL_SMALL
);
3517 error (INVALID_32BIT
, "cmodel");
3523 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3524 This support is only in little endian GLIBC 2.32 or newer. */
3526 glibc_supports_ieee_128bit (void)
3529 if (OPTION_GLIBC
&& !BYTES_BIG_ENDIAN
3530 && ((TARGET_GLIBC_MAJOR
* 1000) + TARGET_GLIBC_MINOR
) >= 2032)
3532 #endif /* OPTION_GLIBC. */
3537 /* Override command line options.
3539 Combine build-specific configuration information with options
3540 specified on the command line to set various state variables which
3541 influence code generation, optimization, and expansion of built-in
3542 functions. Assure that command-line configuration preferences are
3543 compatible with each other and with the build configuration; issue
3544 warnings while adjusting configuration or error messages while
3545 rejecting configuration.
3547 Upon entry to this function:
3549 This function is called once at the beginning of
3550 compilation, and then again at the start and end of compiling
3551 each section of code that has a different configuration, as
3552 indicated, for example, by adding the
3554 __attribute__((__target__("cpu=power9")))
3556 qualifier to a function definition or, for example, by bracketing
3559 #pragma GCC target("altivec")
3563 #pragma GCC reset_options
3565 directives. Parameter global_init_p is true for the initial
3566 invocation, which initializes global variables, and false for all
3567 subsequent invocations.
3570 Various global state information is assumed to be valid. This
3571 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3572 default CPU specified at build configure time, TARGET_DEFAULT,
3573 representing the default set of option flags for the default
3574 target, and OPTION_SET_P (rs6000_isa_flags), representing
3575 which options were requested on the command line.
3577 Upon return from this function:
3579 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3580 was set by name on the command line. Additionally, if certain
3581 attributes are automatically enabled or disabled by this function
3582 in order to assure compatibility between options and
3583 configuration, the flags associated with those attributes are
3584 also set. By setting these "explicit bits", we avoid the risk
3585 that other code might accidentally overwrite these particular
3586 attributes with "default values".
3588 The various bits of rs6000_isa_flags are set to indicate the
3589 target options that have been selected for the most current
3590 compilation efforts. This has the effect of also turning on the
3591 associated TARGET_XXX values since these are macros which are
3592 generally defined to test the corresponding bit of the
3593 rs6000_isa_flags variable.
3595 Various other global variables and fields of global structures
3596 (over 50 in all) are initialized to reflect the desired options
3597 for the most current compilation efforts. */
3600 rs6000_option_override_internal (bool global_init_p
)
3604 HOST_WIDE_INT set_masks
;
3605 HOST_WIDE_INT ignore_masks
;
3608 struct cl_target_option
*main_target_opt
3609 = ((global_init_p
|| target_option_default_node
== NULL
)
3610 ? NULL
: TREE_TARGET_OPTION (target_option_default_node
));
3612 /* Print defaults. */
3613 if ((TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
) && global_init_p
)
3614 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
3616 /* Remember the explicit arguments. */
3618 rs6000_isa_flags_explicit
= OPTION_SET_P (rs6000_isa_flags
);
3620 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3621 library functions, so warn about it. The flag may be useful for
3622 performance studies from time to time though, so don't disable it
3624 if (OPTION_SET_P (rs6000_alignment_flags
)
3625 && rs6000_alignment_flags
== MASK_ALIGN_POWER
3626 && DEFAULT_ABI
== ABI_DARWIN
3628 warning (0, "%qs is not supported for 64-bit Darwin;"
3629 " it is incompatible with the installed C and C++ libraries",
3632 /* Numerous experiment shows that IRA based loop pressure
3633 calculation works better for RTL loop invariant motion on targets
3634 with enough (>= 32) registers. It is an expensive optimization.
3635 So it is on only for peak performance. */
3636 if (optimize
>= 3 && global_init_p
3637 && !OPTION_SET_P (flag_ira_loop_pressure
))
3638 flag_ira_loop_pressure
= 1;
3640 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3641 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3642 options were already specified. */
3643 if (flag_sanitize
& SANITIZE_USER_ADDRESS
3644 && !OPTION_SET_P (flag_asynchronous_unwind_tables
))
3645 flag_asynchronous_unwind_tables
= 1;
3647 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3648 loop unroller is active. It is only checked during unrolling, so
3649 we can just set it on by default. */
3650 if (!OPTION_SET_P (flag_variable_expansion_in_unroller
))
3651 flag_variable_expansion_in_unroller
= 1;
3653 /* Set the pointer size. */
3656 rs6000_pmode
= DImode
;
3657 rs6000_pointer_size
= 64;
3661 rs6000_pmode
= SImode
;
3662 rs6000_pointer_size
= 32;
3665 /* Some OSs don't support saving Altivec registers. On those OSs, we don't
3666 touch the OPTION_MASK_ALTIVEC settings; if the user wants it, the user
3667 must explicitly specify it and we won't interfere with the user's
3670 set_masks
= POWERPC_MASKS
;
3671 #ifdef OS_MISSING_ALTIVEC
3672 if (OS_MISSING_ALTIVEC
)
3673 set_masks
&= ~(OPTION_MASK_ALTIVEC
| OPTION_MASK_VSX
3674 | OTHER_VSX_VECTOR_MASKS
);
3677 /* Don't override by the processor default if given explicitly. */
3678 set_masks
&= ~rs6000_isa_flags_explicit
;
3680 /* Without option powerpc64 specified explicitly, we need to ensure
3681 powerpc64 always enabled for 64 bit here, otherwise some following
3682 checks can use unexpected TARGET_POWERPC64 value. */
3683 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_POWERPC64
)
3686 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3687 /* Need to stop powerpc64 from being unset in later processing,
3688 so clear it in set_masks. But as PR108240 shows, to keep it
3689 consistent with before, we want to make this only if 64 bit
3690 is enabled explicitly. This is a hack, revisit this later. */
3691 if (rs6000_isa_flags_explicit
& OPTION_MASK_64BIT
)
3692 set_masks
&= ~OPTION_MASK_POWERPC64
;
3695 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3696 the cpu in a target attribute or pragma, but did not specify a tuning
3697 option, use the cpu for the tuning option rather than the option specified
3698 with -mtune on the command line. Process a '--with-cpu' configuration
3699 request as an implicit --cpu. */
3700 if (rs6000_cpu_index
>= 0)
3701 cpu_index
= rs6000_cpu_index
;
3702 else if (main_target_opt
!= NULL
&& main_target_opt
->x_rs6000_cpu_index
>= 0)
3703 cpu_index
= main_target_opt
->x_rs6000_cpu_index
;
3704 else if (OPTION_TARGET_CPU_DEFAULT
)
3705 cpu_index
= rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT
);
3707 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3708 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3709 with those from the cpu, except for options that were explicitly set. If
3710 we don't have a cpu, do not override the target bits set in
3714 rs6000_cpu_index
= cpu_index
;
3715 rs6000_isa_flags
&= ~set_masks
;
3716 rs6000_isa_flags
|= (processor_target_table
[cpu_index
].target_enable
3721 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3722 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3723 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3724 to using rs6000_isa_flags, we need to do the initialization here.
3726 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3727 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3728 HOST_WIDE_INT flags
;
3730 flags
= TARGET_DEFAULT
;
3733 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3734 const char *default_cpu
= (!TARGET_POWERPC64
3739 int default_cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
3740 flags
= processor_target_table
[default_cpu_index
].target_enable
;
3742 rs6000_isa_flags
|= (flags
& ~rs6000_isa_flags_explicit
);
3745 /* Don't expect powerpc64 enabled on those OSes with OS_MISSING_POWERPC64,
3746 since they do not save and restore the high half of the GPRs correctly
3747 in all cases. If the user explicitly specifies it, we won't interfere
3748 with the user's specification. */
3749 #ifdef OS_MISSING_POWERPC64
3750 if (OS_MISSING_POWERPC64
3753 && !(rs6000_isa_flags_explicit
& OPTION_MASK_POWERPC64
))
3754 rs6000_isa_flags
&= ~OPTION_MASK_POWERPC64
;
3757 if (rs6000_tune_index
>= 0)
3758 tune_index
= rs6000_tune_index
;
3759 else if (cpu_index
>= 0)
3760 rs6000_tune_index
= tune_index
= cpu_index
;
3764 enum processor_type tune_proc
3765 = (TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
);
3768 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
3769 if (processor_target_table
[i
].processor
== tune_proc
)
3777 rs6000_cpu
= processor_target_table
[cpu_index
].processor
;
3779 rs6000_cpu
= TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
;
3781 gcc_assert (tune_index
>= 0);
3782 rs6000_tune
= processor_target_table
[tune_index
].processor
;
3784 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
3785 || rs6000_cpu
== PROCESSOR_PPCE500MC
|| rs6000_cpu
== PROCESSOR_PPCE500MC64
3786 || rs6000_cpu
== PROCESSOR_PPCE5500
)
3789 error ("AltiVec not supported in this target");
3792 /* If we are optimizing big endian systems for space, use the load/store
3793 multiple instructions. */
3794 if (BYTES_BIG_ENDIAN
&& optimize_size
)
3795 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
;
3797 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3798 because the hardware doesn't support the instructions used in little
3799 endian mode, and causes an alignment trap. The 750 does not cause an
3800 alignment trap (except when the target is unaligned). */
3802 if (!BYTES_BIG_ENDIAN
&& rs6000_cpu
!= PROCESSOR_PPC750
&& TARGET_MULTIPLE
)
3804 rs6000_isa_flags
&= ~OPTION_MASK_MULTIPLE
;
3805 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
) != 0)
3806 warning (0, "%qs is not supported on little endian systems",
3810 /* If little-endian, default to -mstrict-align on older processors.
3811 Testing for direct_move matches power8 and later. */
3812 if (!BYTES_BIG_ENDIAN
3813 && !(processor_target_table
[tune_index
].target_enable
3814 & OPTION_MASK_DIRECT_MOVE
))
3815 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_STRICT_ALIGN
;
3817 /* Add some warnings for VSX. */
3820 const char *msg
= NULL
;
3821 if (!TARGET_HARD_FLOAT
)
3823 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3824 msg
= N_("%<-mvsx%> requires hardware floating point");
3827 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3828 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3831 else if (TARGET_AVOID_XFORM
> 0)
3832 msg
= N_("%<-mvsx%> needs indexed addressing");
3833 else if (!TARGET_ALTIVEC
&& (rs6000_isa_flags_explicit
3834 & OPTION_MASK_ALTIVEC
))
3836 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3837 msg
= N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3839 msg
= N_("%<-mno-altivec%> disables vsx");
3845 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3846 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3850 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3851 the -mcpu setting to enable options that conflict. */
3852 if ((!TARGET_HARD_FLOAT
|| !TARGET_ALTIVEC
|| !TARGET_VSX
)
3853 && (rs6000_isa_flags_explicit
& (OPTION_MASK_SOFT_FLOAT
3854 | OPTION_MASK_ALTIVEC
3855 | OPTION_MASK_VSX
)) != 0)
3856 rs6000_isa_flags
&= ~((OPTION_MASK_P8_VECTOR
| OPTION_MASK_CRYPTO
3857 | OPTION_MASK_DIRECT_MOVE
)
3858 & ~rs6000_isa_flags_explicit
);
3860 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
3861 rs6000_print_isa_options (stderr
, 0, "before defaults", rs6000_isa_flags
);
3863 #ifdef XCOFF_DEBUGGING_INFO
3864 /* For AIX default to 64-bit DWARF. */
3865 if (!OPTION_SET_P (dwarf_offset_size
))
3866 dwarf_offset_size
= POINTER_SIZE_UNITS
;
3869 /* Handle explicit -mno-{altivec,vsx} and turn off all of
3870 the options that depend on those flags. */
3871 ignore_masks
= rs6000_disable_incompatible_switches ();
3873 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3874 unless the user explicitly used the -mno-<option> to disable the code. */
3875 if (TARGET_P9_VECTOR
|| TARGET_MODULO
|| TARGET_P9_MISC
)
3876 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3877 else if (TARGET_P9_MINMAX
)
3881 if (cpu_index
== PROCESSOR_POWER9
)
3883 /* legacy behavior: allow -mcpu=power9 with certain
3884 capabilities explicitly disabled. */
3885 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3888 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3889 "for <xxx> less than power9", "-mcpu");
3891 else if ((ISA_3_0_MASKS_SERVER
& rs6000_isa_flags_explicit
)
3892 != (ISA_3_0_MASKS_SERVER
& rs6000_isa_flags
3893 & rs6000_isa_flags_explicit
))
3894 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3895 were explicitly cleared. */
3896 error ("%qs incompatible with explicitly disabled options",
3899 rs6000_isa_flags
|= ISA_3_0_MASKS_SERVER
;
3901 else if (TARGET_P8_VECTOR
|| TARGET_DIRECT_MOVE
|| TARGET_CRYPTO
)
3902 rs6000_isa_flags
|= (ISA_2_7_MASKS_SERVER
& ~ignore_masks
);
3903 else if (TARGET_VSX
)
3904 rs6000_isa_flags
|= (ISA_2_6_MASKS_SERVER
& ~ignore_masks
);
3905 else if (TARGET_POPCNTD
)
3906 rs6000_isa_flags
|= (ISA_2_6_MASKS_EMBEDDED
& ~ignore_masks
);
3907 else if (TARGET_DFP
)
3908 rs6000_isa_flags
|= (ISA_2_5_MASKS_SERVER
& ~ignore_masks
);
3909 else if (TARGET_CMPB
)
3910 rs6000_isa_flags
|= (ISA_2_5_MASKS_EMBEDDED
& ~ignore_masks
);
3911 else if (TARGET_FPRND
)
3912 rs6000_isa_flags
|= (ISA_2_4_MASKS
& ~ignore_masks
);
3913 else if (TARGET_POPCNTB
)
3914 rs6000_isa_flags
|= (ISA_2_2_MASKS
& ~ignore_masks
);
3915 else if (TARGET_ALTIVEC
)
3916 rs6000_isa_flags
|= (OPTION_MASK_PPC_GFXOPT
& ~ignore_masks
);
3918 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3919 target attribute or pragma which automatically enables both options,
3920 unless the altivec ABI was set. This is set by default for 64-bit, but
3921 not for 32-bit. Don't move this before the above code using ignore_masks,
3922 since it can reset the cleared VSX/ALTIVEC flag again. */
3923 if (main_target_opt
&& !main_target_opt
->x_rs6000_altivec_abi
)
3924 rs6000_isa_flags
&= ~((OPTION_MASK_VSX
| OPTION_MASK_ALTIVEC
)
3925 & ~rs6000_isa_flags_explicit
);
3927 if (TARGET_CRYPTO
&& !TARGET_ALTIVEC
)
3929 if (rs6000_isa_flags_explicit
& OPTION_MASK_CRYPTO
)
3930 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3931 rs6000_isa_flags
&= ~OPTION_MASK_CRYPTO
;
3934 if (!TARGET_FPRND
&& TARGET_VSX
)
3936 if (rs6000_isa_flags_explicit
& OPTION_MASK_FPRND
)
3937 /* TARGET_VSX = 1 implies Power 7 and newer */
3938 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3939 rs6000_isa_flags
&= ~OPTION_MASK_FPRND
;
3942 if (TARGET_DIRECT_MOVE
&& !TARGET_VSX
)
3944 if (rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
3945 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3946 rs6000_isa_flags
&= ~OPTION_MASK_DIRECT_MOVE
;
3949 if (TARGET_P8_VECTOR
&& !TARGET_ALTIVEC
)
3950 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3952 if (TARGET_P8_VECTOR
&& !TARGET_VSX
)
3953 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3955 if (TARGET_DFP
&& !TARGET_HARD_FLOAT
)
3957 if (rs6000_isa_flags_explicit
& OPTION_MASK_DFP
)
3958 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3959 rs6000_isa_flags
&= ~OPTION_MASK_DFP
;
3962 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3963 silently turn off quad memory mode. */
3964 if ((TARGET_QUAD_MEMORY
|| TARGET_QUAD_MEMORY_ATOMIC
) && !TARGET_POWERPC64
)
3966 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3967 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3969 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) != 0)
3970 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3972 rs6000_isa_flags
&= ~(OPTION_MASK_QUAD_MEMORY
3973 | OPTION_MASK_QUAD_MEMORY_ATOMIC
);
3976 /* Non-atomic quad memory load/store are disabled for little endian, since
3977 the words are reversed, but atomic operations can still be done by
3978 swapping the words. */
3979 if (TARGET_QUAD_MEMORY
&& !WORDS_BIG_ENDIAN
)
3981 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3982 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3985 rs6000_isa_flags
&= ~OPTION_MASK_QUAD_MEMORY
;
3988 /* Assume if the user asked for normal quad memory instructions, they want
3989 the atomic versions as well, unless they explicity told us not to use quad
3990 word atomic instructions. */
3991 if (TARGET_QUAD_MEMORY
3992 && !TARGET_QUAD_MEMORY_ATOMIC
3993 && ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) == 0))
3994 rs6000_isa_flags
|= OPTION_MASK_QUAD_MEMORY_ATOMIC
;
3996 /* If we can shrink-wrap the TOC register save separately, then use
3997 -msave-toc-indirect unless explicitly disabled. */
3998 if ((rs6000_isa_flags_explicit
& OPTION_MASK_SAVE_TOC_INDIRECT
) == 0
3999 && flag_shrink_wrap_separate
4000 && optimize_function_for_speed_p (cfun
))
4001 rs6000_isa_flags
|= OPTION_MASK_SAVE_TOC_INDIRECT
;
4003 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4004 generating power8 instructions. Power9 does not optimize power8 fusion
4006 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
))
4008 if (processor_target_table
[tune_index
].processor
== PROCESSOR_POWER8
)
4009 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4011 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4014 /* Setting additional fusion flags turns on base fusion. */
4015 if (!TARGET_P8_FUSION
&& TARGET_P8_FUSION_SIGN
)
4017 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4019 if (TARGET_P8_FUSION_SIGN
)
4020 error ("%qs requires %qs", "-mpower8-fusion-sign",
4023 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4026 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4029 /* Power8 does not fuse sign extended loads with the addis. If we are
4030 optimizing at high levels for speed, convert a sign extended load into a
4031 zero extending load, and an explicit sign extension. */
4032 if (TARGET_P8_FUSION
4033 && !(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION_SIGN
)
4034 && optimize_function_for_speed_p (cfun
)
4036 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION_SIGN
;
4038 /* ISA 3.0 vector instructions include ISA 2.07. */
4039 if (TARGET_P9_VECTOR
&& !TARGET_P8_VECTOR
)
4040 rs6000_isa_flags
&= ~OPTION_MASK_P9_VECTOR
;
4042 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4043 support. If we only have ISA 2.06 support, and the user did not specify
4044 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4045 but we don't enable the full vectorization support */
4046 if (TARGET_ALLOW_MOVMISALIGN
== -1 && TARGET_P8_VECTOR
&& TARGET_DIRECT_MOVE
)
4047 TARGET_ALLOW_MOVMISALIGN
= 1;
4049 else if (TARGET_ALLOW_MOVMISALIGN
&& !TARGET_VSX
)
4051 if (TARGET_ALLOW_MOVMISALIGN
> 0
4052 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN
))
4053 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4055 TARGET_ALLOW_MOVMISALIGN
= 0;
4058 /* Determine when unaligned vector accesses are permitted, and when
4059 they are preferred over masked Altivec loads. Note that if
4060 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4061 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4063 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4067 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4068 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4070 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4073 else if (!TARGET_ALLOW_MOVMISALIGN
)
4075 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4076 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4077 "-mallow-movmisalign");
4079 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4083 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
))
4085 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4086 rs6000_isa_flags
|= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4088 rs6000_isa_flags
&= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4091 /* Use long double size to select the appropriate long double. We use
4092 TYPE_PRECISION to differentiate the 3 different long double types. We map
4093 128 into the precision used for TFmode. */
4094 int default_long_double_size
= (RS6000_DEFAULT_LONG_DOUBLE_SIZE
== 64
4096 : FLOAT_PRECISION_TFmode
);
4098 /* Set long double size before the IEEE 128-bit tests. */
4099 if (!OPTION_SET_P (rs6000_long_double_type_size
))
4101 if (main_target_opt
!= NULL
4102 && (main_target_opt
->x_rs6000_long_double_type_size
4103 != default_long_double_size
))
4104 error ("target attribute or pragma changes %<long double%> size");
4106 rs6000_long_double_type_size
= default_long_double_size
;
4108 else if (rs6000_long_double_type_size
== FLOAT_PRECISION_TFmode
)
4109 ; /* The option value can be seen when cl_target_option_restore is called. */
4110 else if (rs6000_long_double_type_size
== 128)
4111 rs6000_long_double_type_size
= FLOAT_PRECISION_TFmode
;
4113 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4114 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4115 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4116 those systems will not pick up this default. Warn if the user changes the
4117 default unless -Wno-psabi. */
4118 if (!OPTION_SET_P (rs6000_ieeequad
))
4119 rs6000_ieeequad
= TARGET_IEEEQUAD_DEFAULT
;
4121 else if (TARGET_LONG_DOUBLE_128
)
4123 if (global_options
.x_rs6000_ieeequad
4124 && (!TARGET_POPCNTD
|| !TARGET_VSX
))
4125 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4127 if (rs6000_ieeequad
!= TARGET_IEEEQUAD_DEFAULT
)
4129 /* Determine if the user can change the default long double type at
4130 compilation time. You need GLIBC 2.32 or newer to be able to
4131 change the long double type. Only issue one warning. */
4132 static bool warned_change_long_double
;
4134 if (!warned_change_long_double
&& !glibc_supports_ieee_128bit ())
4136 warned_change_long_double
= true;
4137 if (TARGET_IEEEQUAD
)
4138 warning (OPT_Wpsabi
, "Using IEEE extended precision "
4141 warning (OPT_Wpsabi
, "Using IBM extended precision "
4147 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4148 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4149 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4150 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4151 the keyword as well as the type. */
4152 TARGET_FLOAT128_TYPE
= TARGET_FLOAT128_ENABLE_TYPE
&& TARGET_VSX
;
4154 /* IEEE 128-bit floating point requires VSX support. */
4155 if (TARGET_FLOAT128_KEYWORD
)
4159 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4160 error ("%qs requires VSX support", "-mfloat128");
4162 TARGET_FLOAT128_TYPE
= 0;
4163 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_KEYWORD
4164 | OPTION_MASK_FLOAT128_HW
);
4166 else if (!TARGET_FLOAT128_TYPE
)
4168 TARGET_FLOAT128_TYPE
= 1;
4169 warning (0, "The %<-mfloat128%> option may not be fully supported");
4173 /* Enable the __float128 keyword under Linux by default. */
4174 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_KEYWORD
4175 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) == 0)
4176 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_KEYWORD
;
4178 /* If we have are supporting the float128 type and full ISA 3.0 support,
4179 enable -mfloat128-hardware by default. However, don't enable the
4180 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4181 because sometimes the compiler wants to put things in an integer
4182 container, and if we don't have __int128 support, it is impossible. */
4183 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_HW
&& TARGET_64BIT
4184 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) == ISA_3_0_MASKS_IEEE
4185 && !(rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
))
4186 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_HW
;
4188 if (TARGET_FLOAT128_HW
4189 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) != ISA_3_0_MASKS_IEEE
)
4191 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4192 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4194 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4197 if (TARGET_FLOAT128_HW
&& !TARGET_64BIT
)
4199 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4200 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4202 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4205 /* Enable -mprefixed by default on power10 systems. */
4206 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) == 0)
4207 rs6000_isa_flags
|= OPTION_MASK_PREFIXED
;
4209 /* -mprefixed requires -mcpu=power10 (or later). */
4210 else if (TARGET_PREFIXED
&& !TARGET_POWER10
)
4212 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) != 0)
4213 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4215 rs6000_isa_flags
&= ~OPTION_MASK_PREFIXED
;
4218 /* -mpcrel requires prefixed load/store addressing. */
4219 if (TARGET_PCREL
&& !TARGET_PREFIXED
)
4221 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4222 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4224 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4227 /* Print the options after updating the defaults. */
4228 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4229 rs6000_print_isa_options (stderr
, 0, "after defaults", rs6000_isa_flags
);
4231 /* E500mc does "better" if we inline more aggressively. Respect the
4232 user's opinion, though. */
4233 if (rs6000_block_move_inline_limit
== 0
4234 && (rs6000_tune
== PROCESSOR_PPCE500MC
4235 || rs6000_tune
== PROCESSOR_PPCE500MC64
4236 || rs6000_tune
== PROCESSOR_PPCE5500
4237 || rs6000_tune
== PROCESSOR_PPCE6500
))
4238 rs6000_block_move_inline_limit
= 128;
4240 /* store_one_arg depends on expand_block_move to handle at least the
4241 size of reg_parm_stack_space. */
4242 if (rs6000_block_move_inline_limit
< (TARGET_POWERPC64
? 64 : 32))
4243 rs6000_block_move_inline_limit
= (TARGET_POWERPC64
? 64 : 32);
4247 /* If the appropriate debug option is enabled, replace the target hooks
4248 with debug versions that call the real version and then prints
4249 debugging information. */
4250 if (TARGET_DEBUG_COST
)
4252 targetm
.rtx_costs
= rs6000_debug_rtx_costs
;
4253 targetm
.address_cost
= rs6000_debug_address_cost
;
4254 targetm
.sched
.adjust_cost
= rs6000_debug_adjust_cost
;
4257 if (TARGET_DEBUG_ADDR
)
4259 targetm
.legitimate_address_p
= rs6000_debug_legitimate_address_p
;
4260 targetm
.legitimize_address
= rs6000_debug_legitimize_address
;
4261 rs6000_secondary_reload_class_ptr
4262 = rs6000_debug_secondary_reload_class
;
4263 targetm
.secondary_memory_needed
4264 = rs6000_debug_secondary_memory_needed
;
4265 targetm
.can_change_mode_class
4266 = rs6000_debug_can_change_mode_class
;
4267 rs6000_preferred_reload_class_ptr
4268 = rs6000_debug_preferred_reload_class
;
4269 rs6000_mode_dependent_address_ptr
4270 = rs6000_debug_mode_dependent_address
;
4273 if (rs6000_veclibabi_name
)
4275 if (strcmp (rs6000_veclibabi_name
, "mass") == 0)
4276 rs6000_veclib_handler
= rs6000_builtin_vectorized_libmass
;
4279 error ("unknown vectorization library ABI type in "
4280 "%<-mveclibabi=%s%>", rs6000_veclibabi_name
);
4286 /* Enable Altivec ABI for AIX -maltivec. */
4288 && (TARGET_ALTIVEC
|| TARGET_VSX
)
4289 && !OPTION_SET_P (rs6000_altivec_abi
))
4291 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4292 error ("target attribute or pragma changes AltiVec ABI");
4294 rs6000_altivec_abi
= 1;
4297 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4298 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4299 be explicitly overridden in either case. */
4302 if (!OPTION_SET_P (rs6000_altivec_abi
)
4303 && (TARGET_64BIT
|| TARGET_ALTIVEC
|| TARGET_VSX
))
4305 if (main_target_opt
!= NULL
&&
4306 !main_target_opt
->x_rs6000_altivec_abi
)
4307 error ("target attribute or pragma changes AltiVec ABI");
4309 rs6000_altivec_abi
= 1;
4313 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4314 So far, the only darwin64 targets are also MACH-O. */
4316 && DEFAULT_ABI
== ABI_DARWIN
4319 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_darwin64_abi
)
4320 error ("target attribute or pragma changes darwin64 ABI");
4323 rs6000_darwin64_abi
= 1;
4324 /* Default to natural alignment, for better performance. */
4325 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
4329 /* Place FP constants in the constant pool instead of TOC
4330 if section anchors enabled. */
4331 if (flag_section_anchors
4332 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
4333 TARGET_NO_FP_IN_TOC
= 1;
4335 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4336 rs6000_print_isa_options (stderr
, 0, "before subtarget", rs6000_isa_flags
);
4338 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4339 SUBTARGET_OVERRIDE_OPTIONS
;
4341 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4342 SUBSUBTARGET_OVERRIDE_OPTIONS
;
4344 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4345 SUB3TARGET_OVERRIDE_OPTIONS
;
4348 /* If the ABI has support for PC-relative relocations, enable it by default.
4349 This test depends on the sub-target tests above setting the code model to
4350 medium for ELF v2 systems. */
4351 if (PCREL_SUPPORTED_BY_OS
4352 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0)
4353 rs6000_isa_flags
|= OPTION_MASK_PCREL
;
4355 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4356 after the subtarget override options are done. */
4357 else if (TARGET_PCREL
&& TARGET_CMODEL
!= CMODEL_MEDIUM
)
4359 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4360 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4362 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4365 /* Enable -mmma by default on power10 systems. */
4366 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) == 0)
4367 rs6000_isa_flags
|= OPTION_MASK_MMA
;
4369 /* Turn off vector pair/mma options on non-power10 systems. */
4370 else if (!TARGET_POWER10
&& TARGET_MMA
)
4372 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4373 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4375 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4378 /* Enable power10 fusion if we are tuning for power10, even if we aren't
4379 generating power10 instructions. */
4380 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P10_FUSION
))
4382 if (rs6000_tune
== PROCESSOR_POWER10
4383 || rs6000_tune
== PROCESSOR_POWER11
)
4384 rs6000_isa_flags
|= OPTION_MASK_P10_FUSION
;
4386 rs6000_isa_flags
&= ~OPTION_MASK_P10_FUSION
;
4389 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4390 such as "*movoo" uses vector pair access which use VSX registers.
4391 So make MMA require VSX support here. */
4392 if (TARGET_MMA
&& !TARGET_VSX
)
4394 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4395 error ("%qs requires %qs", "-mmma", "-mvsx");
4396 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4399 if (!TARGET_PCREL
&& TARGET_PCREL_OPT
)
4400 rs6000_isa_flags
&= ~OPTION_MASK_PCREL_OPT
;
4402 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4403 rs6000_print_isa_options (stderr
, 0, "after subtarget", rs6000_isa_flags
);
4405 rs6000_always_hint
= (rs6000_tune
!= PROCESSOR_POWER4
4406 && rs6000_tune
!= PROCESSOR_POWER5
4407 && rs6000_tune
!= PROCESSOR_POWER6
4408 && rs6000_tune
!= PROCESSOR_POWER7
4409 && rs6000_tune
!= PROCESSOR_POWER8
4410 && rs6000_tune
!= PROCESSOR_POWER9
4411 && rs6000_tune
!= PROCESSOR_POWER10
4412 && rs6000_tune
!= PROCESSOR_POWER11
4413 && rs6000_tune
!= PROCESSOR_PPCA2
4414 && rs6000_tune
!= PROCESSOR_CELL
4415 && rs6000_tune
!= PROCESSOR_PPC476
);
4416 rs6000_sched_groups
= (rs6000_tune
== PROCESSOR_POWER4
4417 || rs6000_tune
== PROCESSOR_POWER5
4418 || rs6000_tune
== PROCESSOR_POWER7
4419 || rs6000_tune
== PROCESSOR_POWER8
);
4420 rs6000_align_branch_targets
= (rs6000_tune
== PROCESSOR_POWER4
4421 || rs6000_tune
== PROCESSOR_POWER5
4422 || rs6000_tune
== PROCESSOR_POWER6
4423 || rs6000_tune
== PROCESSOR_POWER7
4424 || rs6000_tune
== PROCESSOR_POWER8
4425 || rs6000_tune
== PROCESSOR_POWER9
4426 || rs6000_tune
== PROCESSOR_POWER10
4427 || rs6000_tune
== PROCESSOR_POWER11
4428 || rs6000_tune
== PROCESSOR_PPCE500MC
4429 || rs6000_tune
== PROCESSOR_PPCE500MC64
4430 || rs6000_tune
== PROCESSOR_PPCE5500
4431 || rs6000_tune
== PROCESSOR_PPCE6500
);
4433 /* Allow debug switches to override the above settings. These are set to -1
4434 in rs6000.opt to indicate the user hasn't directly set the switch. */
4435 if (TARGET_ALWAYS_HINT
>= 0)
4436 rs6000_always_hint
= TARGET_ALWAYS_HINT
;
4438 if (TARGET_SCHED_GROUPS
>= 0)
4439 rs6000_sched_groups
= TARGET_SCHED_GROUPS
;
4441 if (TARGET_ALIGN_BRANCH_TARGETS
>= 0)
4442 rs6000_align_branch_targets
= TARGET_ALIGN_BRANCH_TARGETS
;
4444 rs6000_sched_restricted_insns_priority
4445 = (rs6000_sched_groups
? 1 : 0);
4447 /* Handle -msched-costly-dep option. */
4448 rs6000_sched_costly_dep
4449 = (rs6000_sched_groups
? true_store_to_load_dep_costly
: no_dep_costly
);
4451 if (rs6000_sched_costly_dep_str
)
4453 if (! strcmp (rs6000_sched_costly_dep_str
, "no"))
4454 rs6000_sched_costly_dep
= no_dep_costly
;
4455 else if (! strcmp (rs6000_sched_costly_dep_str
, "all"))
4456 rs6000_sched_costly_dep
= all_deps_costly
;
4457 else if (! strcmp (rs6000_sched_costly_dep_str
, "true_store_to_load"))
4458 rs6000_sched_costly_dep
= true_store_to_load_dep_costly
;
4459 else if (! strcmp (rs6000_sched_costly_dep_str
, "store_to_load"))
4460 rs6000_sched_costly_dep
= store_to_load_dep_costly
;
4462 rs6000_sched_costly_dep
= ((enum rs6000_dependence_cost
)
4463 atoi (rs6000_sched_costly_dep_str
));
4466 /* Handle -minsert-sched-nops option. */
4467 rs6000_sched_insert_nops
4468 = (rs6000_sched_groups
? sched_finish_regroup_exact
: sched_finish_none
);
4470 if (rs6000_sched_insert_nops_str
)
4472 if (! strcmp (rs6000_sched_insert_nops_str
, "no"))
4473 rs6000_sched_insert_nops
= sched_finish_none
;
4474 else if (! strcmp (rs6000_sched_insert_nops_str
, "pad"))
4475 rs6000_sched_insert_nops
= sched_finish_pad_groups
;
4476 else if (! strcmp (rs6000_sched_insert_nops_str
, "regroup_exact"))
4477 rs6000_sched_insert_nops
= sched_finish_regroup_exact
;
4479 rs6000_sched_insert_nops
= ((enum rs6000_nop_insertion
)
4480 atoi (rs6000_sched_insert_nops_str
));
4483 /* Handle stack protector */
4484 if (!OPTION_SET_P (rs6000_stack_protector_guard
))
4485 #ifdef TARGET_THREAD_SSP_OFFSET
4486 rs6000_stack_protector_guard
= SSP_TLS
;
4488 rs6000_stack_protector_guard
= SSP_GLOBAL
;
4491 #ifdef TARGET_THREAD_SSP_OFFSET
4492 rs6000_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
4493 rs6000_stack_protector_guard_reg
= TARGET_64BIT
? 13 : 2;
4496 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str
))
4499 const char *str
= rs6000_stack_protector_guard_offset_str
;
4502 long offset
= strtol (str
, &endp
, 0);
4503 if (!*str
|| *endp
|| errno
)
4504 error ("%qs is not a valid number in %qs", str
,
4505 "-mstack-protector-guard-offset=");
4507 if (!IN_RANGE (offset
, -0x8000, 0x7fff)
4508 || (TARGET_64BIT
&& (offset
& 3)))
4509 error ("%qs is not a valid offset in %qs", str
,
4510 "-mstack-protector-guard-offset=");
4512 rs6000_stack_protector_guard_offset
= offset
;
4515 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str
))
4517 const char *str
= rs6000_stack_protector_guard_reg_str
;
4518 int reg
= decode_reg_name (str
);
4520 if (!IN_RANGE (reg
, 1, 31))
4521 error ("%qs is not a valid base register in %qs", str
,
4522 "-mstack-protector-guard-reg=");
4524 rs6000_stack_protector_guard_reg
= reg
;
4527 if (rs6000_stack_protector_guard
== SSP_TLS
4528 && !IN_RANGE (rs6000_stack_protector_guard_reg
, 1, 31))
4529 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4533 #ifdef TARGET_REGNAMES
4534 /* If the user desires alternate register names, copy in the
4535 alternate names now. */
4536 if (TARGET_REGNAMES
)
4537 memcpy (rs6000_reg_names
, alt_reg_names
, sizeof (rs6000_reg_names
));
4540 /* Set aix_struct_return last, after the ABI is determined.
4541 If -maix-struct-return or -msvr4-struct-return was explicitly
4542 used, don't override with the ABI default. */
4543 if (!OPTION_SET_P (aix_struct_return
))
4544 aix_struct_return
= (DEFAULT_ABI
!= ABI_V4
|| DRAFT_V4_STRUCT_RET
);
4547 /* IBM XL compiler defaults to unsigned bitfields. */
4548 if (TARGET_XL_COMPAT
)
4549 flag_signed_bitfields
= 0;
4552 if (TARGET_LONG_DOUBLE_128
&& !TARGET_IEEEQUAD
)
4553 REAL_MODE_FORMAT (TFmode
) = &ibm_extended_format
;
4555 ASM_GENERATE_INTERNAL_LABEL (toc_label_name
, "LCTOC", 1);
4557 /* We can only guarantee the availability of DI pseudo-ops when
4558 assembling for 64-bit targets. */
4561 targetm
.asm_out
.aligned_op
.di
= NULL
;
4562 targetm
.asm_out
.unaligned_op
.di
= NULL
;
4566 /* Set branch target alignment, if not optimizing for size. */
4569 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4570 aligned 8byte to avoid misprediction by the branch predictor. */
4571 if (rs6000_tune
== PROCESSOR_TITAN
4572 || rs6000_tune
== PROCESSOR_CELL
)
4574 if (flag_align_functions
&& !str_align_functions
)
4575 str_align_functions
= "8";
4576 if (flag_align_jumps
&& !str_align_jumps
)
4577 str_align_jumps
= "8";
4578 if (flag_align_loops
&& !str_align_loops
)
4579 str_align_loops
= "8";
4581 if (rs6000_align_branch_targets
)
4583 if (flag_align_functions
&& !str_align_functions
)
4584 str_align_functions
= "16";
4585 if (flag_align_jumps
&& !str_align_jumps
)
4586 str_align_jumps
= "16";
4587 if (flag_align_loops
&& !str_align_loops
)
4589 can_override_loop_align
= 1;
4590 str_align_loops
= "16";
4595 /* Arrange to save and restore machine status around nested functions. */
4596 init_machine_status
= rs6000_init_machine_status
;
4598 /* We should always be splitting complex arguments, but we can't break
4599 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4600 if (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
4601 targetm
.calls
.split_complex_arg
= NULL
;
4603 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4604 if (DEFAULT_ABI
== ABI_AIX
)
4605 targetm
.calls
.custom_function_descriptors
= 0;
4608 /* Initialize rs6000_cost with the appropriate target costs. */
4610 rs6000_cost
= TARGET_POWERPC64
? &size64_cost
: &size32_cost
;
4612 switch (rs6000_tune
)
4614 case PROCESSOR_RS64A
:
4615 rs6000_cost
= &rs64a_cost
;
4618 case PROCESSOR_MPCCORE
:
4619 rs6000_cost
= &mpccore_cost
;
4622 case PROCESSOR_PPC403
:
4623 rs6000_cost
= &ppc403_cost
;
4626 case PROCESSOR_PPC405
:
4627 rs6000_cost
= &ppc405_cost
;
4630 case PROCESSOR_PPC440
:
4631 rs6000_cost
= &ppc440_cost
;
4634 case PROCESSOR_PPC476
:
4635 rs6000_cost
= &ppc476_cost
;
4638 case PROCESSOR_PPC601
:
4639 rs6000_cost
= &ppc601_cost
;
4642 case PROCESSOR_PPC603
:
4643 rs6000_cost
= &ppc603_cost
;
4646 case PROCESSOR_PPC604
:
4647 rs6000_cost
= &ppc604_cost
;
4650 case PROCESSOR_PPC604e
:
4651 rs6000_cost
= &ppc604e_cost
;
4654 case PROCESSOR_PPC620
:
4655 rs6000_cost
= &ppc620_cost
;
4658 case PROCESSOR_PPC630
:
4659 rs6000_cost
= &ppc630_cost
;
4662 case PROCESSOR_CELL
:
4663 rs6000_cost
= &ppccell_cost
;
4666 case PROCESSOR_PPC750
:
4667 case PROCESSOR_PPC7400
:
4668 rs6000_cost
= &ppc750_cost
;
4671 case PROCESSOR_PPC7450
:
4672 rs6000_cost
= &ppc7450_cost
;
4675 case PROCESSOR_PPC8540
:
4676 case PROCESSOR_PPC8548
:
4677 rs6000_cost
= &ppc8540_cost
;
4680 case PROCESSOR_PPCE300C2
:
4681 case PROCESSOR_PPCE300C3
:
4682 rs6000_cost
= &ppce300c2c3_cost
;
4685 case PROCESSOR_PPCE500MC
:
4686 rs6000_cost
= &ppce500mc_cost
;
4689 case PROCESSOR_PPCE500MC64
:
4690 rs6000_cost
= &ppce500mc64_cost
;
4693 case PROCESSOR_PPCE5500
:
4694 rs6000_cost
= &ppce5500_cost
;
4697 case PROCESSOR_PPCE6500
:
4698 rs6000_cost
= &ppce6500_cost
;
4701 case PROCESSOR_TITAN
:
4702 rs6000_cost
= &titan_cost
;
4705 case PROCESSOR_POWER4
:
4706 case PROCESSOR_POWER5
:
4707 rs6000_cost
= &power4_cost
;
4710 case PROCESSOR_POWER6
:
4711 rs6000_cost
= &power6_cost
;
4714 case PROCESSOR_POWER7
:
4715 rs6000_cost
= &power7_cost
;
4718 case PROCESSOR_POWER8
:
4719 rs6000_cost
= &power8_cost
;
4722 case PROCESSOR_POWER9
:
4723 rs6000_cost
= &power9_cost
;
4726 case PROCESSOR_POWER10
:
4727 case PROCESSOR_POWER11
:
4728 rs6000_cost
= &power10_cost
;
4731 case PROCESSOR_PPCA2
:
4732 rs6000_cost
= &ppca2_cost
;
4741 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4742 param_simultaneous_prefetches
,
4743 rs6000_cost
->simultaneous_prefetches
);
4744 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4745 param_l1_cache_size
,
4746 rs6000_cost
->l1_cache_size
);
4747 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4748 param_l1_cache_line_size
,
4749 rs6000_cost
->cache_line_size
);
4750 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4751 param_l2_cache_size
,
4752 rs6000_cost
->l2_cache_size
);
4754 /* Increase loop peeling limits based on performance analysis. */
4755 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4756 param_max_peeled_insns
, 400);
4757 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4758 param_max_completely_peeled_insns
, 400);
4760 /* The lxvl/stxvl instructions don't perform well before Power10. */
4762 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4763 param_vect_partial_vector_usage
, 1);
4765 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4766 param_vect_partial_vector_usage
, 0);
4768 /* Use the 'model' -fsched-pressure algorithm by default. */
4769 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4770 param_sched_pressure_algorithm
,
4771 SCHED_PRESSURE_MODEL
);
4773 /* If using typedef char *va_list, signal that
4774 __builtin_va_start (&ap, 0) can be optimized to
4775 ap = __builtin_next_arg (0). */
4776 if (DEFAULT_ABI
!= ABI_V4
)
4777 targetm
.expand_builtin_va_start
= NULL
;
4780 rs6000_override_options_after_change ();
4782 /* If not explicitly specified via option, decide whether to generate indexed
4783 load/store instructions. A value of -1 indicates that the
4784 initial value of this variable has not been overwritten. During
4785 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4786 if (TARGET_AVOID_XFORM
== -1)
4787 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4788 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4789 need indexed accesses and the type used is the scalar type of the element
4790 being loaded or stored. */
4791 TARGET_AVOID_XFORM
= (rs6000_tune
== PROCESSOR_POWER6
&& TARGET_CMPB
4792 && !TARGET_ALTIVEC
);
4794 /* Set the -mrecip options. */
4795 if (rs6000_recip_name
)
4797 char *p
= ASTRDUP (rs6000_recip_name
);
4799 unsigned int mask
, i
;
4802 while ((q
= strtok (p
, ",")) != NULL
)
4813 if (!strcmp (q
, "default"))
4814 mask
= ((TARGET_RECIP_PRECISION
)
4815 ? RECIP_HIGH_PRECISION
: RECIP_LOW_PRECISION
);
4818 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4819 if (!strcmp (q
, recip_options
[i
].string
))
4821 mask
= recip_options
[i
].mask
;
4825 if (i
== ARRAY_SIZE (recip_options
))
4827 error ("unknown option for %<%s=%s%>", "-mrecip", q
);
4835 rs6000_recip_control
&= ~mask
;
4837 rs6000_recip_control
|= mask
;
4841 /* Initialize all of the registers. */
4842 rs6000_init_hard_regno_mode_ok (global_init_p
);
4844 /* Save the initial options in case the user does function specific options */
4846 target_option_default_node
= target_option_current_node
4847 = build_target_option_node (&global_options
, &global_options_set
);
4849 /* If not explicitly specified via option, decide whether to generate the
4850 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4851 if (TARGET_LINK_STACK
== -1)
4852 SET_TARGET_LINK_STACK (rs6000_tune
== PROCESSOR_PPC476
&& flag_pic
);
4854 /* Deprecate use of -mno-speculate-indirect-jumps. */
4855 if (!rs6000_speculate_indirect_jumps
)
4856 warning (0, "%qs is deprecated and not recommended in any circumstances",
4857 "-mno-speculate-indirect-jumps");
4862 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4863 define the target cpu type. */
4866 rs6000_option_override (void)
4868 (void) rs6000_option_override_internal (true);
4872 /* Implement LOOP_ALIGN. */
4874 rs6000_loop_align (rtx label
)
4879 /* Don't override loop alignment if -falign-loops was specified. */
4880 if (!can_override_loop_align
)
4883 bb
= BLOCK_FOR_INSN (label
);
4884 ninsns
= num_loop_insns(bb
->loop_father
);
4886 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4887 if (ninsns
> 4 && ninsns
<= 8
4888 && (rs6000_tune
== PROCESSOR_POWER4
4889 || rs6000_tune
== PROCESSOR_POWER5
4890 || rs6000_tune
== PROCESSOR_POWER6
4891 || rs6000_tune
== PROCESSOR_POWER7
4892 || rs6000_tune
== PROCESSOR_POWER8
))
4893 return align_flags (5);
4898 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4899 after applying N number of iterations. This routine does not determine
4900 how may iterations are required to reach desired alignment. */
4903 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
4910 if (rs6000_alignment_flags
== MASK_ALIGN_NATURAL
)
4913 if (rs6000_alignment_flags
== MASK_ALIGN_POWER
)
4923 /* Assuming that all other types are naturally aligned. CHECKME! */
4928 /* Return true if the vector misalignment factor is supported by the
4931 rs6000_builtin_support_vector_misalignment (machine_mode mode
,
4938 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4941 /* Return if movmisalign pattern is not supported for this mode. */
4942 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
4945 if (misalignment
== -1)
4947 /* Misalignment factor is unknown at compile time but we know
4948 it's word aligned. */
4949 if (rs6000_vector_alignment_reachable (type
, is_packed
))
4951 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
4953 if (element_size
== 64 || element_size
== 32)
4960 /* VSX supports word-aligned vector. */
4961 if (misalignment
% 4 == 0)
4967 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4969 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
4970 tree vectype
, int misalign
)
4975 switch (type_of_cost
)
4983 case cond_branch_not_taken
:
4987 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4991 /* Power7 has only one permute unit, make it a bit expensive. */
4992 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
4997 case vec_promote_demote
:
4998 /* Power7 has only one permute/pack unit, make it a bit expensive. */
4999 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5004 case cond_branch_taken
:
5007 case unaligned_load
:
5008 case vector_gather_load
:
5009 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5010 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5013 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5015 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5016 /* See PR102767, consider V1TI to keep consistency. */
5017 if (elements
== 2 || elements
== 1)
5018 /* Double word aligned. */
5026 /* Double word aligned. */
5030 /* Unknown misalignment. */
5043 /* Misaligned loads are not supported. */
5046 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5049 case unaligned_store
:
5050 case vector_scatter_store
:
5051 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5054 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5056 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5057 /* See PR102767, consider V1TI to keep consistency. */
5058 if (elements
== 2 || elements
== 1)
5059 /* Double word aligned. */
5067 /* Double word aligned. */
5071 /* Unknown misalignment. */
5084 /* Misaligned stores are not supported. */
5090 /* This is a rough approximation assuming non-constant elements
5091 constructed into a vector via element insertion. FIXME:
5092 vec_construct is not granular enough for uniformly good
5093 decisions. If the initialization is a splat, this is
5094 cheaper than we estimate. Improve this someday. */
5095 elem_type
= TREE_TYPE (vectype
);
5096 /* 32-bit vectors loaded into registers are stored as double
5097 precision, so we need 2 permutes, 2 converts, and 1 merge
5098 to construct a vector of short floats from them. */
5099 if (SCALAR_FLOAT_TYPE_P (elem_type
)
5100 && TYPE_PRECISION (elem_type
) == 32)
5102 /* On POWER9, integer vector types are built up in GPRs and then
5103 use a direct move (2 cycles). For POWER8 this is even worse,
5104 as we need two direct moves and a merge, and the direct moves
5106 else if (INTEGRAL_TYPE_P (elem_type
))
5108 if (TARGET_P9_VECTOR
)
5109 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 2;
5111 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 5;
5114 /* V2DFmode doesn't need a direct move. */
5122 /* Implement targetm.vectorize.preferred_simd_mode. */
5125 rs6000_preferred_simd_mode (scalar_mode mode
)
5127 opt_machine_mode vmode
= mode_for_vector (mode
, 16 / GET_MODE_SIZE (mode
));
5129 if (vmode
.exists () && !VECTOR_MEM_NONE_P (vmode
.require ()))
5130 return vmode
.require ();
5135 class rs6000_cost_data
: public vector_costs
5138 using vector_costs::vector_costs
;
5140 unsigned int add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5141 stmt_vec_info stmt_info
, slp_tree
, tree vectype
,
5143 vect_cost_model_location where
) override
;
5144 void finish_cost (const vector_costs
*) override
;
5147 void update_target_cost_per_stmt (vect_cost_for_stmt
, stmt_vec_info
,
5148 vect_cost_model_location
, unsigned int);
5149 void density_test (loop_vec_info
);
5150 void adjust_vect_cost_per_loop (loop_vec_info
);
5151 unsigned int determine_suggested_unroll_factor (loop_vec_info
);
5153 /* Total number of vectorized stmts (loop only). */
5154 unsigned m_nstmts
= 0;
5155 /* Total number of loads (loop only). */
5156 unsigned m_nloads
= 0;
5157 /* Total number of stores (loop only). */
5158 unsigned m_nstores
= 0;
5159 /* Reduction factor for suggesting unroll factor (loop only). */
5160 unsigned m_reduc_factor
= 0;
5161 /* Possible extra penalized cost on vector construction (loop only). */
5162 unsigned m_extra_ctor_cost
= 0;
5163 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5164 instruction is needed by the vectorization. */
5165 bool m_vect_nonmem
= false;
5166 /* If this loop gets vectorized with emulated gather load. */
5167 bool m_gather_load
= false;
5170 /* Test for likely overcommitment of vector hardware resources. If a
5171 loop iteration is relatively large, and too large a percentage of
5172 instructions in the loop are vectorized, the cost model may not
5173 adequately reflect delays from unavailable vector resources.
5174 Penalize the loop body cost for this case. */
5177 rs6000_cost_data::density_test (loop_vec_info loop_vinfo
)
5179 /* This density test only cares about the cost of vector version of the
5180 loop, so immediately return if we are passed costing for the scalar
5181 version (namely computing single scalar iteration cost). */
5182 if (m_costing_for_scalar
)
5185 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5186 basic_block
*bbs
= get_loop_body (loop
);
5187 int nbbs
= loop
->num_nodes
;
5188 int vec_cost
= m_costs
[vect_body
], not_vec_cost
= 0;
5190 for (int i
= 0; i
< nbbs
; i
++)
5192 basic_block bb
= bbs
[i
];
5193 gimple_stmt_iterator gsi
;
5195 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5197 gimple
*stmt
= gsi_stmt (gsi
);
5198 if (is_gimple_debug (stmt
))
5201 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
5203 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5204 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
5210 int density_pct
= (vec_cost
* 100) / (vec_cost
+ not_vec_cost
);
5212 if (density_pct
> rs6000_density_pct_threshold
5213 && vec_cost
+ not_vec_cost
> rs6000_density_size_threshold
)
5215 m_costs
[vect_body
] = vec_cost
* (100 + rs6000_density_penalty
) / 100;
5216 if (dump_enabled_p ())
5217 dump_printf_loc (MSG_NOTE
, vect_location
,
5218 "density %d%%, cost %d exceeds threshold, penalizing "
5219 "loop body cost by %u%%\n", density_pct
,
5220 vec_cost
+ not_vec_cost
, rs6000_density_penalty
);
5223 /* Check whether we need to penalize the body cost to account
5224 for excess strided or elementwise loads. */
5225 if (m_extra_ctor_cost
> 0)
5227 gcc_assert (m_nloads
<= m_nstmts
);
5228 unsigned int load_pct
= (m_nloads
* 100) / m_nstmts
;
5230 /* It's likely to be bounded by latency and execution resources
5231 from many scalar loads which are strided or elementwise loads
5232 into a vector if both conditions below are found:
5233 1. there are many loads, it's easy to result in a long wait
5235 2. load has a big proportion of all vectorized statements,
5236 it's not easy to schedule other statements to spread among
5238 One typical case is the innermost loop of the hotspot of SPEC2017
5239 503.bwaves_r without loop interchange. */
5240 if (m_nloads
> (unsigned int) rs6000_density_load_num_threshold
5241 && load_pct
> (unsigned int) rs6000_density_load_pct_threshold
)
5243 m_costs
[vect_body
] += m_extra_ctor_cost
;
5244 if (dump_enabled_p ())
5245 dump_printf_loc (MSG_NOTE
, vect_location
,
5246 "Found %u loads and "
5247 "load pct. %u%% exceed "
5249 "penalizing loop body "
5250 "cost by extra cost %u "
5258 /* Implement targetm.vectorize.create_costs. */
5260 static vector_costs
*
5261 rs6000_vectorize_create_costs (vec_info
*vinfo
, bool costing_for_scalar
)
5263 return new rs6000_cost_data (vinfo
, costing_for_scalar
);
5266 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5267 For some statement, we would like to further fine-grain tweak the cost on
5268 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5269 information on statement operation codes etc. One typical case here is
5270 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5271 for scalar cost, but it should be priced more whatever transformed to either
5272 compare + branch or compare + isel instructions. */
5275 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind
,
5276 struct _stmt_vec_info
*stmt_info
)
5278 if (kind
== scalar_stmt
&& stmt_info
&& stmt_info
->stmt
5279 && gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
5281 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
5282 if (subcode
== COND_EXPR
)
5289 /* Helper function for add_stmt_cost. Check each statement cost
5290 entry, gather information and update the target_cost fields
5293 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind
,
5294 stmt_vec_info stmt_info
,
5295 vect_cost_model_location where
,
5296 unsigned int orig_count
)
5299 /* Check whether we're doing something other than just a copy loop.
5300 Not all such loops may be profitably vectorized; see
5301 rs6000_finish_cost. */
5302 if (kind
== vec_to_scalar
5304 || kind
== vec_promote_demote
5305 || kind
== vec_construct
5306 || kind
== scalar_to_vec
5307 || (where
== vect_body
&& kind
== vector_stmt
))
5308 m_vect_nonmem
= true;
5310 /* Gather some information when we are costing the vectorized instruction
5311 for the statements located in a loop body. */
5312 if (!m_costing_for_scalar
5313 && is_a
<loop_vec_info
> (m_vinfo
)
5314 && where
== vect_body
)
5316 m_nstmts
+= orig_count
;
5318 if (kind
== scalar_load
5319 || kind
== vector_load
5320 || kind
== unaligned_load
5321 || kind
== vector_gather_load
)
5323 m_nloads
+= orig_count
;
5324 if (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5325 m_gather_load
= true;
5327 else if (kind
== scalar_store
5328 || kind
== vector_store
5329 || kind
== unaligned_store
5330 || kind
== vector_scatter_store
)
5331 m_nstores
+= orig_count
;
5332 else if ((kind
== scalar_stmt
5333 || kind
== vector_stmt
5334 || kind
== vec_to_scalar
)
5336 && vect_is_reduction (stmt_info
))
5338 /* Loop body contains normal int or fp operations and epilogue
5339 contains vector reduction. For simplicity, we assume int
5340 operation takes one cycle and fp operation takes one more. */
5341 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
5342 bool is_float
= FLOAT_TYPE_P (TREE_TYPE (lhs
));
5343 unsigned int basic_cost
= is_float
? 2 : 1;
5344 m_reduc_factor
= MAX (basic_cost
* orig_count
, m_reduc_factor
);
5347 /* Power processors do not currently have instructions for strided
5348 and elementwise loads, and instead we must generate multiple
5349 scalar loads. This leads to undercounting of the cost. We
5350 account for this by scaling the construction cost by the number
5351 of elements involved, and saving this as extra cost that we may
5352 or may not need to apply. When finalizing the cost of the loop,
5353 the extra penalty is applied when the load density heuristics
5355 if (kind
== vec_construct
&& stmt_info
5356 && STMT_VINFO_TYPE (stmt_info
) == load_vec_info_type
5357 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_ELEMENTWISE
5358 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_STRIDED_SLP
))
5360 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5361 unsigned int nunits
= vect_nunits_for_cost (vectype
);
5362 /* As PR103702 shows, it's possible that vectorizer wants to do
5363 costings for only one unit here, it's no need to do any
5364 penalization for it, so simply early return here. */
5367 /* i386 port adopts nunits * stmt_cost as the penalized cost
5368 for this kind of penalization, we used to follow it but
5369 found it could result in an unreliable body cost especially
5370 for V16QI/V8HI modes. To make it better, we choose this
5371 new heuristic: for each scalar load, we use 2 as penalized
5372 cost for the case with 2 nunits and use 1 for the other
5373 cases. It's without much supporting theory, mainly
5374 concluded from the broad performance evaluations on Power8,
5375 Power9 and Power10. One possibly related point is that:
5376 vector construction for more units would use more insns,
5377 it has more chances to schedule them better (even run in
5378 parallelly when enough available units at that time), so
5379 it seems reasonable not to penalize that much for them. */
5380 unsigned int adjusted_cost
= (nunits
== 2) ? 2 : 1;
5381 unsigned int extra_cost
= nunits
* adjusted_cost
;
5382 m_extra_ctor_cost
+= extra_cost
;
5388 rs6000_cost_data::add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5389 stmt_vec_info stmt_info
, slp_tree
,
5390 tree vectype
, int misalign
,
5391 vect_cost_model_location where
)
5393 unsigned retval
= 0;
5395 if (flag_vect_cost_model
)
5397 int stmt_cost
= rs6000_builtin_vectorization_cost (kind
, vectype
,
5399 stmt_cost
+= rs6000_adjust_vect_cost_per_stmt (kind
, stmt_info
);
5400 /* Statements in an inner loop relative to the loop being
5401 vectorized are weighted more heavily. The value here is
5402 arbitrary and could potentially be improved with analysis. */
5403 unsigned int orig_count
= count
;
5404 retval
= adjust_cost_for_freq (stmt_info
, where
, count
* stmt_cost
);
5405 m_costs
[where
] += retval
;
5407 update_target_cost_per_stmt (kind
, stmt_info
, where
, orig_count
);
5413 /* For some target specific vectorization cost which can't be handled per stmt,
5414 we check the requisite conditions and adjust the vectorization cost
5415 accordingly if satisfied. One typical example is to model shift cost for
5416 vector with length by counting number of required lengths under condition
5417 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5420 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo
)
5422 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
5424 rgroup_controls
*rgc
;
5425 unsigned int num_vectors_m1
;
5426 unsigned int shift_cnt
= 0;
5427 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo
), num_vectors_m1
, rgc
)
5429 /* Each length needs one shift to fill into bits 0-7. */
5430 shift_cnt
+= num_vectors_m1
+ 1;
5432 add_stmt_cost (shift_cnt
, scalar_stmt
, NULL
, NULL
,
5433 NULL_TREE
, 0, vect_body
);
5437 /* Determine suggested unroll factor by considering some below factors:
5439 - unroll option/pragma which can disable unrolling for this loop;
5440 - simple hardware resource model for non memory vector insns;
5441 - aggressive heuristics when iteration count is unknown:
5442 - reduction case to break cross iteration dependency;
5443 - emulated gather load;
5444 - estimated iteration count when iteration count is unknown;
5449 rs6000_cost_data::determine_suggested_unroll_factor (loop_vec_info loop_vinfo
)
5451 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5453 /* Don't unroll if it's specified explicitly not to be unrolled. */
5454 if (loop
->unroll
== 1
5455 || (OPTION_SET_P (flag_unroll_loops
) && !flag_unroll_loops
)
5456 || (OPTION_SET_P (flag_unroll_all_loops
) && !flag_unroll_all_loops
))
5459 unsigned int nstmts_nonldst
= m_nstmts
- m_nloads
- m_nstores
;
5460 /* Don't unroll if no vector instructions excepting for memory access. */
5461 if (nstmts_nonldst
== 0)
5464 /* Consider breaking cross iteration dependency for reduction. */
5465 unsigned int reduc_factor
= m_reduc_factor
> 1 ? m_reduc_factor
: 1;
5467 /* Use this simple hardware resource model that how many non ld/st
5468 vector instructions can be issued per cycle. */
5469 unsigned int issue_width
= rs6000_vect_unroll_issue
;
5470 unsigned int uf
= CEIL (reduc_factor
* issue_width
, nstmts_nonldst
);
5471 uf
= MIN ((unsigned int) rs6000_vect_unroll_limit
, uf
);
5472 /* Make sure it is power of 2. */
5473 uf
= 1 << ceil_log2 (uf
);
5475 /* If the iteration count is known, the costing would be exact enough,
5476 don't worry it could be worse. */
5477 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
5480 /* Inspired by SPEC2017 parest_r, we want to aggressively unroll the
5481 loop if either condition is satisfied:
5482 - reduction factor exceeds the threshold;
5483 - emulated gather load adopted. */
5484 if (reduc_factor
> (unsigned int) rs6000_vect_unroll_reduc_threshold
5488 /* Check if we can conclude it's good to unroll from the estimated
5490 HOST_WIDE_INT est_niter
= get_estimated_loop_iterations_int (loop
);
5491 unsigned int vf
= vect_vf_for_cost (loop_vinfo
);
5492 unsigned int unrolled_vf
= vf
* uf
;
5493 if (est_niter
== -1 || est_niter
< unrolled_vf
)
5494 /* When the estimated iteration of this loop is unknown, it's possible
5495 that we are able to vectorize this loop with the original VF but fail
5496 to vectorize it with the unrolled VF any more if the actual iteration
5497 count is in between. */
5501 unsigned int epil_niter_unr
= est_niter
% unrolled_vf
;
5502 unsigned int epil_niter
= est_niter
% vf
;
5503 /* Even if we have partial vector support, it can be still inefficent
5504 to calculate the length when the iteration count is unknown, so
5505 only expect it's good to unroll when the epilogue iteration count
5506 is not bigger than VF (only one time length calculation). */
5507 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
5508 && epil_niter_unr
<= vf
)
5510 /* Without partial vector support, conservatively unroll this when
5511 the epilogue iteration count is less than the original one
5512 (epilogue execution time wouldn't be longer than before). */
5513 else if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
5514 && epil_niter_unr
<= epil_niter
)
5522 rs6000_cost_data::finish_cost (const vector_costs
*scalar_costs
)
5524 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (m_vinfo
))
5526 adjust_vect_cost_per_loop (loop_vinfo
);
5527 density_test (loop_vinfo
);
5529 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5530 that require versioning for any reason. The vectorization is at
5531 best a wash inside the loop, and the versioning checks make
5532 profitability highly unlikely and potentially quite harmful. */
5534 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
) == 2
5535 && LOOP_REQUIRES_VERSIONING (loop_vinfo
))
5536 m_costs
[vect_body
] += 10000;
5538 m_suggested_unroll_factor
5539 = determine_suggested_unroll_factor (loop_vinfo
);
5542 vector_costs::finish_cost (scalar_costs
);
5545 /* Implement targetm.loop_unroll_adjust. */
5548 rs6000_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
5550 if (unroll_only_small_loops
)
5552 /* TODO: These are hardcoded values right now. We probably should use
5554 if (loop
->ninsns
<= 6)
5555 return MIN (4, nunroll
);
5556 if (loop
->ninsns
<= 10)
5557 return MIN (2, nunroll
);
5565 /* Returns a function decl for a vectorized version of the builtin function
5566 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5567 if it is not available.
5569 Implement targetm.vectorize.builtin_vectorized_function. */
5572 rs6000_builtin_vectorized_function (unsigned int fn
, tree type_out
,
5575 machine_mode in_mode
, out_mode
;
5578 if (TARGET_DEBUG_BUILTIN
)
5579 fprintf (stderr
, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5580 combined_fn_name (combined_fn (fn
)),
5581 GET_MODE_NAME (TYPE_MODE (type_out
)),
5582 GET_MODE_NAME (TYPE_MODE (type_in
)));
5584 /* TODO: Should this be gcc_assert? */
5585 if (TREE_CODE (type_out
) != VECTOR_TYPE
5586 || TREE_CODE (type_in
) != VECTOR_TYPE
)
5589 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5590 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
5591 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5592 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5597 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5598 && out_mode
== DFmode
&& out_n
== 2
5599 && in_mode
== DFmode
&& in_n
== 2)
5600 return rs6000_builtin_decls
[RS6000_BIF_CPSGNDP
];
5601 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5602 && out_mode
== SFmode
&& out_n
== 4
5603 && in_mode
== SFmode
&& in_n
== 4)
5604 return rs6000_builtin_decls
[RS6000_BIF_CPSGNSP
];
5605 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5606 && out_mode
== SFmode
&& out_n
== 4
5607 && in_mode
== SFmode
&& in_n
== 4)
5608 return rs6000_builtin_decls
[RS6000_BIF_COPYSIGN_V4SF
];
5611 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5612 && out_mode
== DFmode
&& out_n
== 2
5613 && in_mode
== DFmode
&& in_n
== 2)
5614 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIP
];
5615 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5616 && out_mode
== SFmode
&& out_n
== 4
5617 && in_mode
== SFmode
&& in_n
== 4)
5618 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIP
];
5619 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5620 && out_mode
== SFmode
&& out_n
== 4
5621 && in_mode
== SFmode
&& in_n
== 4)
5622 return rs6000_builtin_decls
[RS6000_BIF_VRFIP
];
5625 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5626 && out_mode
== DFmode
&& out_n
== 2
5627 && in_mode
== DFmode
&& in_n
== 2)
5628 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIM
];
5629 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5630 && out_mode
== SFmode
&& out_n
== 4
5631 && in_mode
== SFmode
&& in_n
== 4)
5632 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIM
];
5633 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5634 && out_mode
== SFmode
&& out_n
== 4
5635 && in_mode
== SFmode
&& in_n
== 4)
5636 return rs6000_builtin_decls
[RS6000_BIF_VRFIM
];
5639 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5640 && out_mode
== DFmode
&& out_n
== 2
5641 && in_mode
== DFmode
&& in_n
== 2)
5642 return rs6000_builtin_decls
[RS6000_BIF_XVMADDDP
];
5643 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5644 && out_mode
== SFmode
&& out_n
== 4
5645 && in_mode
== SFmode
&& in_n
== 4)
5646 return rs6000_builtin_decls
[RS6000_BIF_XVMADDSP
];
5647 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5648 && out_mode
== SFmode
&& out_n
== 4
5649 && in_mode
== SFmode
&& in_n
== 4)
5650 return rs6000_builtin_decls
[RS6000_BIF_VMADDFP
];
5653 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5654 && out_mode
== DFmode
&& out_n
== 2
5655 && in_mode
== DFmode
&& in_n
== 2)
5656 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIZ
];
5657 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5658 && out_mode
== SFmode
&& out_n
== 4
5659 && in_mode
== SFmode
&& in_n
== 4)
5660 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIZ
];
5661 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5662 && out_mode
== SFmode
&& out_n
== 4
5663 && in_mode
== SFmode
&& in_n
== 4)
5664 return rs6000_builtin_decls
[RS6000_BIF_VRFIZ
];
5667 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5668 && flag_unsafe_math_optimizations
5669 && out_mode
== DFmode
&& out_n
== 2
5670 && in_mode
== DFmode
&& in_n
== 2)
5671 return rs6000_builtin_decls
[RS6000_BIF_XVRDPI
];
5672 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5673 && flag_unsafe_math_optimizations
5674 && out_mode
== SFmode
&& out_n
== 4
5675 && in_mode
== SFmode
&& in_n
== 4)
5676 return rs6000_builtin_decls
[RS6000_BIF_XVRSPI
];
5679 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5680 && !flag_trapping_math
5681 && out_mode
== DFmode
&& out_n
== 2
5682 && in_mode
== DFmode
&& in_n
== 2)
5683 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIC
];
5684 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5685 && !flag_trapping_math
5686 && out_mode
== SFmode
&& out_n
== 4
5687 && in_mode
== SFmode
&& in_n
== 4)
5688 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIC
];
5694 /* Generate calls to libmass if appropriate. */
5695 if (rs6000_veclib_handler
)
5696 return rs6000_veclib_handler (combined_fn (fn
), type_out
, type_in
);
5701 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5702 library with vectorized intrinsics. */
5705 rs6000_builtin_vectorized_libmass (combined_fn fn
, tree type_out
,
5709 const char *suffix
= NULL
;
5710 tree fntype
, new_fndecl
, bdecl
= NULL_TREE
;
5713 machine_mode el_mode
, in_mode
;
5716 /* Libmass is suitable for unsafe math only as it does not correctly support
5717 parts of IEEE with the required precision such as denormals. Only support
5718 it if we have VSX to use the simd d2 or f4 functions.
5719 XXX: Add variable length support. */
5720 if (!flag_unsafe_math_optimizations
|| !TARGET_VSX
)
5723 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5724 n
= TYPE_VECTOR_SUBPARTS (type_out
);
5725 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5726 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5727 if (el_mode
!= in_mode
5763 if (el_mode
== DFmode
&& n
== 2)
5765 bdecl
= mathfn_built_in (double_type_node
, fn
);
5766 suffix
= "d2"; /* pow -> powd2 */
5768 else if (el_mode
== SFmode
&& n
== 4)
5770 bdecl
= mathfn_built_in (float_type_node
, fn
);
5771 suffix
= "4"; /* powf -> powf4 */
5783 gcc_assert (suffix
!= NULL
);
5784 bname
= IDENTIFIER_POINTER (DECL_NAME (bdecl
));
5788 strcpy (name
, bname
+ strlen ("__builtin_"));
5789 strcat (name
, suffix
);
5792 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
5793 else if (n_args
== 2)
5794 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
5798 /* Build a function declaration for the vectorized function. */
5799 new_fndecl
= build_decl (BUILTINS_LOCATION
,
5800 FUNCTION_DECL
, get_identifier (name
), fntype
);
5801 TREE_PUBLIC (new_fndecl
) = 1;
5802 DECL_EXTERNAL (new_fndecl
) = 1;
5803 DECL_IS_NOVOPS (new_fndecl
) = 1;
5804 TREE_READONLY (new_fndecl
) = 1;
5810 /* Default CPU string for rs6000*_file_start functions. */
5811 static const char *rs6000_default_cpu
;
5813 #ifdef USING_ELFOS_H
5814 const char *rs6000_machine
;
5817 rs6000_machine_from_flags (void)
5820 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
)
5822 if (rs6000_cpu
== PROCESSOR_PPC8540
|| rs6000_cpu
== PROCESSOR_PPC8548
)
5824 if (rs6000_cpu
== PROCESSOR_PPCE500MC
)
5826 if (rs6000_cpu
== PROCESSOR_PPCE500MC64
)
5828 if (rs6000_cpu
== PROCESSOR_PPCE5500
)
5830 if (rs6000_cpu
== PROCESSOR_PPCE6500
)
5834 if (rs6000_cpu
== PROCESSOR_PPC403
)
5836 if (rs6000_cpu
== PROCESSOR_PPC405
)
5838 if (rs6000_cpu
== PROCESSOR_PPC440
)
5840 if (rs6000_cpu
== PROCESSOR_PPC476
)
5844 if (rs6000_cpu
== PROCESSOR_PPCA2
)
5848 if (rs6000_cpu
== PROCESSOR_CELL
)
5852 if (rs6000_cpu
== PROCESSOR_TITAN
)
5855 /* 500 series and 800 series */
5856 if (rs6000_cpu
== PROCESSOR_MPCCORE
)
5860 /* This (and ppc64 below) are disabled here (for now at least) because
5861 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5862 are #define'd as some of these. Untangling that is a job for later. */
5864 /* 600 series and 700 series, "classic" */
5865 if (rs6000_cpu
== PROCESSOR_PPC601
|| rs6000_cpu
== PROCESSOR_PPC603
5866 || rs6000_cpu
== PROCESSOR_PPC604
|| rs6000_cpu
== PROCESSOR_PPC604e
5867 || rs6000_cpu
== PROCESSOR_PPC750
)
5871 /* Classic with AltiVec, "G4" */
5872 if (rs6000_cpu
== PROCESSOR_PPC7400
|| rs6000_cpu
== PROCESSOR_PPC7450
)
5876 /* The older 64-bit CPUs */
5877 if (rs6000_cpu
== PROCESSOR_PPC620
|| rs6000_cpu
== PROCESSOR_PPC630
5878 || rs6000_cpu
== PROCESSOR_RS64A
)
5882 HOST_WIDE_INT flags
= rs6000_isa_flags
;
5884 /* Disable the flags that should never influence the .machine selection. */
5885 flags
&= ~(OPTION_MASK_PPC_GFXOPT
| OPTION_MASK_PPC_GPOPT
| OPTION_MASK_ISEL
);
5887 if ((flags
& (ISA_POWER11_MASKS_SERVER
& ~ISA_3_1_MASKS_SERVER
)) != 0)
5889 if ((flags
& (ISA_3_1_MASKS_SERVER
& ~ISA_3_0_MASKS_SERVER
)) != 0)
5891 if ((flags
& (ISA_3_0_MASKS_SERVER
& ~ISA_2_7_MASKS_SERVER
)) != 0)
5893 if ((flags
& (ISA_2_7_MASKS_SERVER
& ~ISA_2_6_MASKS_SERVER
)) != 0)
5895 if ((flags
& (ISA_2_6_MASKS_SERVER
& ~ISA_2_5_MASKS_SERVER
)) != 0)
5897 if ((flags
& (ISA_2_5_MASKS_SERVER
& ~ISA_2_4_MASKS
)) != 0)
5899 if ((flags
& (ISA_2_4_MASKS
& ~ISA_2_1_MASKS
)) != 0)
5901 if ((flags
& ISA_2_1_MASKS
) != 0)
5903 if ((flags
& OPTION_MASK_POWERPC64
) != 0)
5909 emit_asm_machine (void)
5911 fprintf (asm_out_file
, "\t.machine %s\n", rs6000_machine
);
5915 /* Do anything needed at the start of the asm file. */
5918 rs6000_file_start (void)
5921 const char *start
= buffer
;
5922 FILE *file
= asm_out_file
;
5924 rs6000_default_cpu
= TARGET_CPU_DEFAULT
;
5926 default_file_start ();
5928 if (flag_verbose_asm
)
5930 sprintf (buffer
, "\n%s rs6000/powerpc options:", ASM_COMMENT_START
);
5932 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
5934 fprintf (file
, "%s --with-cpu=%s", start
, rs6000_default_cpu
);
5938 if (OPTION_SET_P (rs6000_cpu_index
))
5940 fprintf (file
, "%s -mcpu=%s", start
,
5941 processor_target_table
[rs6000_cpu_index
].name
);
5945 if (OPTION_SET_P (rs6000_tune_index
))
5947 fprintf (file
, "%s -mtune=%s", start
,
5948 processor_target_table
[rs6000_tune_index
].name
);
5952 if (PPC405_ERRATUM77
)
5954 fprintf (file
, "%s PPC405CR_ERRATUM77", start
);
5958 #ifdef USING_ELFOS_H
5959 switch (rs6000_sdata
)
5961 case SDATA_NONE
: fprintf (file
, "%s -msdata=none", start
); start
= ""; break;
5962 case SDATA_DATA
: fprintf (file
, "%s -msdata=data", start
); start
= ""; break;
5963 case SDATA_SYSV
: fprintf (file
, "%s -msdata=sysv", start
); start
= ""; break;
5964 case SDATA_EABI
: fprintf (file
, "%s -msdata=eabi", start
); start
= ""; break;
5967 if (rs6000_sdata
&& g_switch_value
)
5969 fprintf (file
, "%s -G %d", start
,
5979 #ifdef USING_ELFOS_H
5980 rs6000_machine
= rs6000_machine_from_flags ();
5981 emit_asm_machine ();
5984 if (DEFAULT_ABI
== ABI_ELFv2
)
5985 fprintf (file
, "\t.abiversion 2\n");
5989 /* Return nonzero if this function is known to have a null epilogue. */
5992 direct_return (void)
5994 if (reload_completed
)
5996 rs6000_stack_t
*info
= rs6000_stack_info ();
5998 if (info
->first_gp_reg_save
== 32
5999 && info
->first_fp_reg_save
== 64
6000 && info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1
6001 && ! info
->lr_save_p
6002 && ! info
->cr_save_p
6003 && info
->vrsave_size
== 0
6011 /* Helper for num_insns_constant. Calculate number of instructions to
6012 load VALUE to a single gpr using combinations of addi, addis, ori,
6013 oris, sldi and rldimi instructions. */
6016 num_insns_constant_gpr (HOST_WIDE_INT value
)
6018 /* signed constant loadable with addi */
6019 if (SIGNED_INTEGER_16BIT_P (value
))
6022 /* constant loadable with addis */
6023 else if ((value
& 0xffff) == 0
6024 && (value
>> 31 == -1 || value
>> 31 == 0))
6027 /* PADDI can support up to 34 bit signed integers. */
6028 else if (TARGET_PREFIXED
&& SIGNED_INTEGER_34BIT_P (value
))
6031 else if (TARGET_POWERPC64
)
6034 rs6000_emit_set_long_const (nullptr, value
, &num_insns
);
6042 /* Helper for num_insns_constant. Allow constants formed by the
6043 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
6044 and handle modes that require multiple gprs. */
6047 num_insns_constant_multi (HOST_WIDE_INT value
, machine_mode mode
)
6049 int nregs
= (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6053 HOST_WIDE_INT low
= sext_hwi (value
, BITS_PER_WORD
);
6054 int insns
= num_insns_constant_gpr (low
);
6056 /* We won't get more than 2 from num_insns_constant_gpr
6057 except when TARGET_POWERPC64 and mode is DImode or
6058 wider, so the register mode must be DImode. */
6059 && rs6000_is_valid_and_mask (GEN_INT (low
), DImode
))
6062 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6063 it all at once would be UB. */
6064 value
>>= (BITS_PER_WORD
- 1);
6070 /* Return the number of instructions it takes to form a constant in as
6071 many gprs are needed for MODE. */
6074 num_insns_constant (rtx op
, machine_mode mode
)
6078 switch (GET_CODE (op
))
6084 case CONST_WIDE_INT
:
6087 for (int i
= 0; i
< CONST_WIDE_INT_NUNITS (op
); i
++)
6088 insns
+= num_insns_constant_multi (CONST_WIDE_INT_ELT (op
, i
),
6095 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (op
);
6097 if (mode
== SFmode
|| mode
== SDmode
)
6102 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv
, l
);
6104 REAL_VALUE_TO_TARGET_SINGLE (*rv
, l
);
6105 /* See the first define_split in rs6000.md handling a
6106 const_double_operand. */
6110 else if (mode
== DFmode
|| mode
== DDmode
)
6115 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv
, l
);
6117 REAL_VALUE_TO_TARGET_DOUBLE (*rv
, l
);
6119 /* See the second (32-bit) and third (64-bit) define_split
6120 in rs6000.md handling a const_double_operand. */
6121 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 1] << 32;
6122 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffffUL
;
6125 else if (mode
== TFmode
|| mode
== TDmode
6126 || mode
== KFmode
|| mode
== IFmode
)
6132 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv
, l
);
6134 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv
, l
);
6136 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 3] << 32;
6137 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 2] & 0xffffffffUL
;
6138 insns
= num_insns_constant_multi (val
, DImode
);
6139 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 2 : 1] << 32;
6140 val
|= l
[WORDS_BIG_ENDIAN
? 3 : 0] & 0xffffffffUL
;
6141 insns
+= num_insns_constant_multi (val
, DImode
);
6153 return num_insns_constant_multi (val
, mode
);
6156 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6157 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6158 corresponding element of the vector, but for V4SFmode, the
6159 corresponding "float" is interpreted as an SImode integer. */
6162 const_vector_elt_as_int (rtx op
, unsigned int elt
)
6166 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6167 gcc_assert (GET_MODE (op
) != V2DImode
6168 && GET_MODE (op
) != V2DFmode
);
6170 tmp
= CONST_VECTOR_ELT (op
, elt
);
6171 if (GET_MODE (op
) == V4SFmode
)
6172 tmp
= gen_lowpart (SImode
, tmp
);
6173 return INTVAL (tmp
);
6176 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6177 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6178 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6179 all items are set to the same value and contain COPIES replicas of the
6180 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6181 operand and the others are set to the value of the operand's msb. */
6184 vspltis_constant (rtx op
, unsigned step
, unsigned copies
)
6186 machine_mode mode
= GET_MODE (op
);
6187 machine_mode inner
= GET_MODE_INNER (mode
);
6195 HOST_WIDE_INT splat_val
;
6196 HOST_WIDE_INT msb_val
;
6198 if (mode
== V2DImode
|| mode
== V2DFmode
|| mode
== V1TImode
)
6201 nunits
= GET_MODE_NUNITS (mode
);
6202 bitsize
= GET_MODE_BITSIZE (inner
);
6203 mask
= GET_MODE_MASK (inner
);
6205 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6207 msb_val
= val
>= 0 ? 0 : -1;
6209 if (val
== 0 && step
> 1)
6211 /* Special case for loading most significant bit with step > 1.
6212 In that case, match 0s in all but step-1s elements, where match
6214 for (i
= 1; i
< nunits
; ++i
)
6216 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6217 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6218 if ((i
& (step
- 1)) == step
- 1)
6220 if (!EASY_VECTOR_MSB (elt_val
, inner
))
6230 /* Construct the value to be splatted, if possible. If not, return 0. */
6231 for (i
= 2; i
<= copies
; i
*= 2)
6233 HOST_WIDE_INT small_val
;
6235 small_val
= splat_val
>> bitsize
;
6237 if (splat_val
!= ((HOST_WIDE_INT
)
6238 ((unsigned HOST_WIDE_INT
) small_val
<< bitsize
)
6239 | (small_val
& mask
)))
6241 splat_val
= small_val
;
6242 inner
= smallest_int_mode_for_size (bitsize
);
6245 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6246 if (EASY_VECTOR_15 (splat_val
))
6249 /* Also check if we can splat, and then add the result to itself. Do so if
6250 the value is positive, of if the splat instruction is using OP's mode;
6251 for splat_val < 0, the splat and the add should use the same mode. */
6252 else if (EASY_VECTOR_15_ADD_SELF (splat_val
)
6253 && (splat_val
>= 0 || (step
== 1 && copies
== 1)))
6256 /* Also check if are loading up the most significant bit which can be done by
6257 loading up -1 and shifting the value left by -1. Only do this for
6258 step 1 here, for larger steps it is done earlier. */
6259 else if (EASY_VECTOR_MSB (splat_val
, inner
) && step
== 1)
6265 /* Check if VAL is present in every STEP-th element, and the
6266 other elements are filled with its most significant bit. */
6267 for (i
= 1; i
< nunits
; ++i
)
6269 HOST_WIDE_INT desired_val
;
6270 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6271 if ((i
& (step
- 1)) == 0)
6274 desired_val
= msb_val
;
6276 if (desired_val
!= const_vector_elt_as_int (op
, elt
))
6283 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6284 instruction, filling in the bottom elements with 0 or -1.
6286 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6287 for the number of zeroes to shift in, or negative for the number of 0xff
6290 OP is a CONST_VECTOR. */
6293 vspltis_shifted (rtx op
)
6295 machine_mode mode
= GET_MODE (op
);
6296 machine_mode inner
= GET_MODE_INNER (mode
);
6304 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
)
6307 /* We need to create pseudo registers to do the shift, so don't recognize
6308 shift vector constants after reload. Don't match it even before RA
6309 after split1 is done, because there won't be further splitting pass
6310 before RA to do the splitting. */
6311 if (!can_create_pseudo_p ()
6312 || (cfun
->curr_properties
& PROP_rtl_split_insns
))
6315 nunits
= GET_MODE_NUNITS (mode
);
6316 mask
= GET_MODE_MASK (inner
);
6318 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? 0 : nunits
- 1);
6320 /* Check if the value can really be the operand of a vspltis[bhw]. */
6321 if (EASY_VECTOR_15 (val
))
6324 /* Also check if we are loading up the most significant bit which can be done
6325 by loading up -1 and shifting the value left by -1. */
6326 else if (EASY_VECTOR_MSB (val
, inner
))
6332 /* Check if VAL is present in every STEP-th element until we find elements
6333 that are 0 or all 1 bits. */
6334 for (i
= 1; i
< nunits
; ++i
)
6336 unsigned elt
= BYTES_BIG_ENDIAN
? i
: nunits
- 1 - i
;
6337 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6339 /* If the value isn't the splat value, check for the remaining elements
6345 for (j
= i
+1; j
< nunits
; ++j
)
6347 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6348 if (const_vector_elt_as_int (op
, elt2
) != 0)
6352 return (nunits
- i
) * GET_MODE_SIZE (inner
);
6355 else if ((elt_val
& mask
) == mask
)
6357 for (j
= i
+1; j
< nunits
; ++j
)
6359 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6360 if ((const_vector_elt_as_int (op
, elt2
) & mask
) != mask
)
6364 return -((nunits
- i
) * GET_MODE_SIZE (inner
));
6372 /* If all elements are equal, we don't need to do VSLDOI. */
6377 /* Return non-zero (element mode byte size) if OP is of the given MODE
6378 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6381 easy_altivec_constant (rtx op
, machine_mode mode
)
6383 unsigned step
, copies
;
6385 if (mode
== VOIDmode
)
6386 mode
= GET_MODE (op
);
6387 else if (mode
!= GET_MODE (op
))
6390 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6392 if (mode
== V2DFmode
)
6393 return zero_constant (op
, mode
) ? 8 : 0;
6395 else if (mode
== V2DImode
)
6397 if (!CONST_INT_P (CONST_VECTOR_ELT (op
, 0))
6398 || !CONST_INT_P (CONST_VECTOR_ELT (op
, 1)))
6401 if (zero_constant (op
, mode
))
6404 if (INTVAL (CONST_VECTOR_ELT (op
, 0)) == -1
6405 && INTVAL (CONST_VECTOR_ELT (op
, 1)) == -1)
6411 /* V1TImode is a special container for TImode. Ignore for now. */
6412 else if (mode
== V1TImode
)
6415 /* Start with a vspltisw. */
6416 step
= GET_MODE_NUNITS (mode
) / 4;
6419 if (vspltis_constant (op
, step
, copies
))
6422 /* Then try with a vspltish. */
6428 if (vspltis_constant (op
, step
, copies
))
6431 /* And finally a vspltisb. */
6437 if (vspltis_constant (op
, step
, copies
))
6440 if (vspltis_shifted (op
) != 0)
6441 return GET_MODE_SIZE (GET_MODE_INNER (mode
));
6446 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6447 result is OP. Abort if it is not possible. */
6450 gen_easy_altivec_constant (rtx op
)
6452 machine_mode mode
= GET_MODE (op
);
6453 int nunits
= GET_MODE_NUNITS (mode
);
6454 rtx val
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6455 unsigned step
= nunits
/ 4;
6456 unsigned copies
= 1;
6458 /* Start with a vspltisw. */
6459 if (vspltis_constant (op
, step
, copies
))
6460 return gen_rtx_VEC_DUPLICATE (V4SImode
, gen_lowpart (SImode
, val
));
6462 /* Then try with a vspltish. */
6468 if (vspltis_constant (op
, step
, copies
))
6469 return gen_rtx_VEC_DUPLICATE (V8HImode
, gen_lowpart (HImode
, val
));
6471 /* And finally a vspltisb. */
6477 if (vspltis_constant (op
, step
, copies
))
6478 return gen_rtx_VEC_DUPLICATE (V16QImode
, gen_lowpart (QImode
, val
));
6483 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6484 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6486 Return the number of instructions needed (1 or 2) into the address pointed
6489 Return the constant that is being split via CONSTANT_PTR. */
6492 xxspltib_constant_p (rtx op
,
6497 size_t nunits
= GET_MODE_NUNITS (mode
);
6499 HOST_WIDE_INT value
;
6502 /* Set the returned values to out of bound values. */
6503 *num_insns_ptr
= -1;
6504 *constant_ptr
= 256;
6506 if (!TARGET_P9_VECTOR
)
6509 if (mode
== VOIDmode
)
6510 mode
= GET_MODE (op
);
6512 else if (mode
!= GET_MODE (op
) && GET_MODE (op
) != VOIDmode
)
6515 /* Handle (vec_duplicate <constant>). */
6516 if (GET_CODE (op
) == VEC_DUPLICATE
)
6518 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6519 && mode
!= V2DImode
)
6522 element
= XEXP (op
, 0);
6523 if (!CONST_INT_P (element
))
6526 value
= INTVAL (element
);
6527 if (!IN_RANGE (value
, -128, 127))
6531 /* Handle (const_vector [...]). */
6532 else if (GET_CODE (op
) == CONST_VECTOR
)
6534 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6535 && mode
!= V2DImode
)
6538 element
= CONST_VECTOR_ELT (op
, 0);
6539 if (!CONST_INT_P (element
))
6542 value
= INTVAL (element
);
6543 if (!IN_RANGE (value
, -128, 127))
6546 for (i
= 1; i
< nunits
; i
++)
6548 element
= CONST_VECTOR_ELT (op
, i
);
6549 if (!CONST_INT_P (element
))
6552 if (value
!= INTVAL (element
))
6557 /* Handle integer constants being loaded into the upper part of the VSX
6558 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6559 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6560 else if (CONST_INT_P (op
))
6562 if (!SCALAR_INT_MODE_P (mode
))
6565 value
= INTVAL (op
);
6566 if (!IN_RANGE (value
, -128, 127))
6569 if (!IN_RANGE (value
, -1, 0))
6571 if (!(reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
))
6574 if (EASY_VECTOR_15 (value
))
6582 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6583 sign extend. Special case 0/-1 to allow getting any VSX register instead
6584 of an Altivec register. */
6585 if ((mode
== V4SImode
|| mode
== V8HImode
) && !IN_RANGE (value
, -1, 0)
6586 && EASY_VECTOR_15 (value
))
6589 /* Return # of instructions and the constant byte for XXSPLTIB. */
6590 if (mode
== V16QImode
)
6593 else if (IN_RANGE (value
, -1, 0))
6596 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6597 single XXSPLTIW or XXSPLTIDP instruction. */
6598 else if (vsx_prefixed_constant (op
, mode
))
6601 /* Return XXSPLITB followed by a sign extend operation to convert the
6602 constant to V8HImode or V4SImode. */
6606 *constant_ptr
= (int) value
;
6610 /* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
6611 instructions vupkhsw and vspltisw.
6613 Return the constant that is being split via CONSTANT_PTR. */
6616 vspltisw_vupkhsw_constant_p (rtx op
, machine_mode mode
, int *constant_ptr
)
6618 HOST_WIDE_INT value
;
6621 if (!TARGET_P8_VECTOR
)
6624 if (mode
!= V2DImode
)
6627 if (!const_vec_duplicate_p (op
, &elt
))
6630 value
= INTVAL (elt
);
6631 if (value
== 0 || value
== 1
6632 || !EASY_VECTOR_15 (value
))
6636 *constant_ptr
= (int) value
;
6641 output_vec_const_move (rtx
*operands
)
6649 mode
= GET_MODE (dest
);
6653 bool dest_vmx_p
= ALTIVEC_REGNO_P (REGNO (dest
));
6654 int xxspltib_value
= 256;
6657 if (zero_constant (vec
, mode
))
6659 if (TARGET_P9_VECTOR
)
6660 return "xxspltib %x0,0";
6662 else if (dest_vmx_p
)
6663 return "vspltisw %0,0";
6666 return "xxlxor %x0,%x0,%x0";
6669 if (all_ones_constant (vec
, mode
))
6671 if (TARGET_P9_VECTOR
)
6672 return "xxspltib %x0,255";
6674 else if (dest_vmx_p
)
6675 return "vspltisw %0,-1";
6677 else if (TARGET_P8_VECTOR
)
6678 return "xxlorc %x0,%x0,%x0";
6684 vec_const_128bit_type vsx_const
;
6685 if (TARGET_POWER10
&& vec_const_128bit_to_bytes (vec
, mode
, &vsx_const
))
6687 unsigned imm
= constant_generates_lxvkq (&vsx_const
);
6690 operands
[2] = GEN_INT (imm
);
6691 return "lxvkq %x0,%2";
6694 imm
= constant_generates_xxspltiw (&vsx_const
);
6697 operands
[2] = GEN_INT (imm
);
6698 return "xxspltiw %x0,%2";
6701 imm
= constant_generates_xxspltidp (&vsx_const
);
6704 operands
[2] = GEN_INT (imm
);
6705 return "xxspltidp %x0,%2";
6709 if (TARGET_P9_VECTOR
6710 && xxspltib_constant_p (vec
, mode
, &num_insns
, &xxspltib_value
))
6714 operands
[2] = GEN_INT (xxspltib_value
& 0xff);
6715 return "xxspltib %x0,%2";
6726 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest
)));
6727 if (zero_constant (vec
, mode
))
6728 return "vspltisw %0,0";
6730 if (all_ones_constant (vec
, mode
))
6731 return "vspltisw %0,-1";
6733 /* Do we need to construct a value using VSLDOI? */
6734 shift
= vspltis_shifted (vec
);
6738 splat_vec
= gen_easy_altivec_constant (vec
);
6739 gcc_assert (GET_CODE (splat_vec
) == VEC_DUPLICATE
);
6740 operands
[1] = XEXP (splat_vec
, 0);
6741 if (!EASY_VECTOR_15 (INTVAL (operands
[1])))
6744 switch (GET_MODE (splat_vec
))
6747 return "vspltisw %0,%1";
6750 return "vspltish %0,%1";
6753 return "vspltisb %0,%1";
6763 /* Initialize vector TARGET to VALS. */
6766 rs6000_expand_vector_init (rtx target
, rtx vals
)
6768 machine_mode mode
= GET_MODE (target
);
6769 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6770 unsigned int n_elts
= GET_MODE_NUNITS (mode
);
6771 int n_var
= 0, one_var
= -1;
6772 bool all_same
= true, all_const_zero
= true;
6776 for (i
= 0; i
< n_elts
; ++i
)
6778 x
= XVECEXP (vals
, 0, i
);
6779 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
6780 ++n_var
, one_var
= i
;
6781 else if (x
!= CONST0_RTX (inner_mode
))
6782 all_const_zero
= false;
6784 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6790 rtx const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6791 bool int_vector_p
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
6792 if ((int_vector_p
|| TARGET_VSX
) && all_const_zero
)
6794 /* Zero register. */
6795 emit_move_insn (target
, CONST0_RTX (mode
));
6798 else if (int_vector_p
&& easy_vector_constant (const_vec
, mode
))
6800 /* Splat immediate. */
6801 emit_insn (gen_rtx_SET (target
, const_vec
));
6806 /* Load from constant pool. */
6807 emit_move_insn (target
, const_vec
);
6812 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6813 if (VECTOR_MEM_VSX_P (mode
) && (mode
== V2DFmode
|| mode
== V2DImode
))
6817 size_t num_elements
= all_same
? 1 : 2;
6818 for (i
= 0; i
< num_elements
; i
++)
6820 op
[i
] = XVECEXP (vals
, 0, i
);
6821 /* Just in case there is a SUBREG with a smaller mode, do a
6823 if (GET_MODE (op
[i
]) != inner_mode
)
6825 rtx tmp
= gen_reg_rtx (inner_mode
);
6826 convert_move (tmp
, op
[i
], 0);
6829 /* Allow load with splat double word. */
6830 else if (MEM_P (op
[i
]))
6833 op
[i
] = force_reg (inner_mode
, op
[i
]);
6835 else if (!REG_P (op
[i
]))
6836 op
[i
] = force_reg (inner_mode
, op
[i
]);
6841 if (mode
== V2DFmode
)
6842 emit_insn (gen_vsx_splat_v2df (target
, op
[0]));
6844 emit_insn (gen_vsx_splat_v2di (target
, op
[0]));
6848 if (mode
== V2DFmode
)
6849 emit_insn (gen_vsx_concat_v2df (target
, op
[0], op
[1]));
6851 emit_insn (gen_vsx_concat_v2di (target
, op
[0], op
[1]));
6856 /* Special case initializing vector int if we are on 64-bit systems with
6857 direct move or we have the ISA 3.0 instructions. */
6858 if (mode
== V4SImode
&& VECTOR_MEM_VSX_P (V4SImode
)
6859 && TARGET_DIRECT_MOVE_64BIT
)
6863 rtx element0
= XVECEXP (vals
, 0, 0);
6864 if (MEM_P (element0
))
6865 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6867 element0
= force_reg (SImode
, element0
);
6869 if (TARGET_P9_VECTOR
)
6870 emit_insn (gen_vsx_splat_v4si (target
, element0
));
6873 rtx tmp
= gen_reg_rtx (DImode
);
6874 emit_insn (gen_zero_extendsidi2 (tmp
, element0
));
6875 emit_insn (gen_vsx_splat_v4si_di (target
, tmp
));
6884 for (i
= 0; i
< 4; i
++)
6885 elements
[i
] = force_reg (SImode
, XVECEXP (vals
, 0, i
));
6887 emit_insn (gen_vsx_init_v4si (target
, elements
[0], elements
[1],
6888 elements
[2], elements
[3]));
6893 /* With single precision floating point on VSX, know that internally single
6894 precision is actually represented as a double, and either make 2 V2DF
6895 vectors, and convert these vectors to single precision, or do one
6896 conversion, and splat the result to the other elements. */
6897 if (mode
== V4SFmode
&& VECTOR_MEM_VSX_P (V4SFmode
))
6901 rtx element0
= XVECEXP (vals
, 0, 0);
6903 if (TARGET_P9_VECTOR
)
6905 if (MEM_P (element0
))
6906 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6908 emit_insn (gen_vsx_splat_v4sf (target
, element0
));
6913 rtx freg
= gen_reg_rtx (V4SFmode
);
6914 rtx sreg
= force_reg (SFmode
, element0
);
6915 rtx cvt
= (TARGET_XSCVDPSPN
6916 ? gen_vsx_xscvdpspn_scalar (freg
, sreg
)
6917 : gen_vsx_xscvdpsp_scalar (freg
, sreg
));
6920 emit_insn (gen_vsx_xxspltw_v4sf_direct (target
, freg
,
6926 if (TARGET_P8_VECTOR
&& TARGET_POWERPC64
)
6932 for (i
= 0; i
< 4; i
++)
6934 tmp_si
[i
] = gen_reg_rtx (SImode
);
6935 tmp_di
[i
] = gen_reg_rtx (DImode
);
6936 mrg_di
[i
] = gen_reg_rtx (DImode
);
6937 tmp_sf
[i
] = force_reg (SFmode
, XVECEXP (vals
, 0, i
));
6938 emit_insn (gen_movsi_from_sf (tmp_si
[i
], tmp_sf
[i
]));
6939 emit_insn (gen_zero_extendsidi2 (tmp_di
[i
], tmp_si
[i
]));
6942 if (!BYTES_BIG_ENDIAN
)
6944 std::swap (tmp_di
[0], tmp_di
[1]);
6945 std::swap (tmp_di
[2], tmp_di
[3]);
6948 emit_insn (gen_ashldi3 (mrg_di
[0], tmp_di
[0], GEN_INT (32)));
6949 emit_insn (gen_iordi3 (mrg_di
[1], mrg_di
[0], tmp_di
[1]));
6950 emit_insn (gen_ashldi3 (mrg_di
[2], tmp_di
[2], GEN_INT (32)));
6951 emit_insn (gen_iordi3 (mrg_di
[3], mrg_di
[2], tmp_di
[3]));
6953 rtx tmp_v2di
= gen_reg_rtx (V2DImode
);
6954 emit_insn (gen_vsx_concat_v2di (tmp_v2di
, mrg_di
[1], mrg_di
[3]));
6955 emit_move_insn (target
, gen_lowpart (V4SFmode
, tmp_v2di
));
6959 rtx dbl_even
= gen_reg_rtx (V2DFmode
);
6960 rtx dbl_odd
= gen_reg_rtx (V2DFmode
);
6961 rtx flt_even
= gen_reg_rtx (V4SFmode
);
6962 rtx flt_odd
= gen_reg_rtx (V4SFmode
);
6963 rtx op0
= force_reg (SFmode
, XVECEXP (vals
, 0, 0));
6964 rtx op1
= force_reg (SFmode
, XVECEXP (vals
, 0, 1));
6965 rtx op2
= force_reg (SFmode
, XVECEXP (vals
, 0, 2));
6966 rtx op3
= force_reg (SFmode
, XVECEXP (vals
, 0, 3));
6968 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op1
));
6969 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op2
, op3
));
6970 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
6971 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
6972 rs6000_expand_extract_even (target
, flt_even
, flt_odd
);
6978 /* Special case initializing vector short/char that are splats if we are on
6979 64-bit systems with direct move. */
6980 if (all_same
&& TARGET_DIRECT_MOVE_64BIT
6981 && (mode
== V16QImode
|| mode
== V8HImode
))
6983 rtx op0
= XVECEXP (vals
, 0, 0);
6984 rtx di_tmp
= gen_reg_rtx (DImode
);
6987 op0
= force_reg (GET_MODE_INNER (mode
), op0
);
6989 if (mode
== V16QImode
)
6991 emit_insn (gen_zero_extendqidi2 (di_tmp
, op0
));
6992 emit_insn (gen_vsx_vspltb_di (target
, di_tmp
));
6996 if (mode
== V8HImode
)
6998 emit_insn (gen_zero_extendhidi2 (di_tmp
, op0
));
6999 emit_insn (gen_vsx_vsplth_di (target
, di_tmp
));
7004 /* Store value to stack temp. Load vector element. Splat. However, splat
7005 of 64-bit items is not supported on Altivec. */
7006 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
7008 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7009 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0),
7010 XVECEXP (vals
, 0, 0));
7011 x
= gen_rtx_UNSPEC (VOIDmode
,
7012 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7013 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7015 gen_rtx_SET (target
, mem
),
7017 x
= gen_rtx_VEC_SELECT (inner_mode
, target
,
7018 gen_rtx_PARALLEL (VOIDmode
,
7019 gen_rtvec (1, const0_rtx
)));
7020 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
7024 /* One field is non-constant. Load constant then overwrite
7028 rtx copy
= copy_rtx (vals
);
7030 /* Load constant part of vector, substitute neighboring value for
7032 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
7033 rs6000_expand_vector_init (target
, copy
);
7035 /* Insert variable. */
7036 rs6000_expand_vector_set (target
, XVECEXP (vals
, 0, one_var
),
7041 if (TARGET_DIRECT_MOVE
&& (mode
== V16QImode
|| mode
== V8HImode
))
7044 /* Force the values into word_mode registers. */
7045 for (i
= 0; i
< n_elts
; i
++)
7047 rtx tmp
= force_reg (inner_mode
, XVECEXP (vals
, 0, i
));
7048 machine_mode tmode
= TARGET_POWERPC64
? DImode
: SImode
;
7049 op
[i
] = simplify_gen_subreg (tmode
, tmp
, inner_mode
, 0);
7052 /* Take unsigned char big endianness on 64bit as example for below
7053 construction, the input values are: A, B, C, D, ..., O, P. */
7055 if (TARGET_DIRECT_MOVE_128
)
7057 /* Move to VSX register with vec_concat, each has 2 values.
7058 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
7059 vr1[1] = { xxxxxxxC, xxxxxxxD };
7061 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
7063 for (i
= 0; i
< n_elts
/ 2; i
++)
7065 vr1
[i
] = gen_reg_rtx (V2DImode
);
7066 emit_insn (gen_vsx_concat_v2di (vr1
[i
], op
[i
* 2],
7070 /* Pack vectors with 2 values into vectors with 4 values.
7071 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
7072 vr2[1] = { xxxExxxF, xxxGxxxH };
7073 vr2[1] = { xxxIxxxJ, xxxKxxxL };
7074 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
7076 for (i
= 0; i
< n_elts
/ 4; i
++)
7078 vr2
[i
] = gen_reg_rtx (V4SImode
);
7079 emit_insn (gen_altivec_vpkudum (vr2
[i
], vr1
[i
* 2],
7083 /* Pack vectors with 4 values into vectors with 8 values.
7084 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7085 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7087 for (i
= 0; i
< n_elts
/ 8; i
++)
7089 vr3
[i
] = gen_reg_rtx (V8HImode
);
7090 emit_insn (gen_altivec_vpkuwum (vr3
[i
], vr2
[i
* 2],
7094 /* If it's V8HImode, it's done and return it. */
7095 if (mode
== V8HImode
)
7097 emit_insn (gen_rtx_SET (target
, vr3
[0]));
7101 /* Pack vectors with 8 values into 16 values. */
7102 rtx res
= gen_reg_rtx (V16QImode
);
7103 emit_insn (gen_altivec_vpkuhum (res
, vr3
[0], vr3
[1]));
7104 emit_insn (gen_rtx_SET (target
, res
));
7108 rtx (*merge_v16qi
) (rtx
, rtx
, rtx
) = NULL
;
7109 rtx (*merge_v8hi
) (rtx
, rtx
, rtx
) = NULL
;
7110 rtx (*merge_v4si
) (rtx
, rtx
, rtx
) = NULL
;
7113 /* Set up some common gen routines and values. */
7114 if (BYTES_BIG_ENDIAN
)
7116 if (mode
== V16QImode
)
7118 merge_v16qi
= gen_altivec_vmrghb
;
7119 merge_v8hi
= gen_altivec_vmrglh
;
7122 merge_v8hi
= gen_altivec_vmrghh
;
7124 merge_v4si
= gen_altivec_vmrglw
;
7125 perm_idx
= GEN_INT (3);
7129 if (mode
== V16QImode
)
7131 merge_v16qi
= gen_altivec_vmrglb
;
7132 merge_v8hi
= gen_altivec_vmrghh
;
7135 merge_v8hi
= gen_altivec_vmrglh
;
7137 merge_v4si
= gen_altivec_vmrghw
;
7138 perm_idx
= GEN_INT (0);
7141 /* Move to VSX register with direct move.
7142 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7143 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7145 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7147 for (i
= 0; i
< n_elts
; i
++)
7149 vr_qi
[i
] = gen_reg_rtx (V16QImode
);
7150 if (TARGET_POWERPC64
)
7151 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi
[i
], op
[i
]));
7153 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi
[i
], op
[i
]));
7156 /* Merge/move to vector short.
7157 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7158 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7160 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7162 for (i
= 0; i
< 8; i
++)
7165 if (mode
== V16QImode
)
7167 tmp
= gen_reg_rtx (V16QImode
);
7168 emit_insn (merge_v16qi (tmp
, vr_qi
[2 * i
], vr_qi
[2 * i
+ 1]));
7170 vr_hi
[i
] = gen_reg_rtx (V8HImode
);
7171 emit_move_insn (vr_hi
[i
], gen_lowpart (V8HImode
, tmp
));
7174 /* Merge vector short to vector int.
7175 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7176 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7178 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7180 for (i
= 0; i
< 4; i
++)
7182 rtx tmp
= gen_reg_rtx (V8HImode
);
7183 emit_insn (merge_v8hi (tmp
, vr_hi
[2 * i
], vr_hi
[2 * i
+ 1]));
7184 vr_si
[i
] = gen_reg_rtx (V4SImode
);
7185 emit_move_insn (vr_si
[i
], gen_lowpart (V4SImode
, tmp
));
7188 /* Merge vector int to vector long.
7189 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7190 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7192 for (i
= 0; i
< 2; i
++)
7194 rtx tmp
= gen_reg_rtx (V4SImode
);
7195 emit_insn (merge_v4si (tmp
, vr_si
[2 * i
], vr_si
[2 * i
+ 1]));
7196 vr_di
[i
] = gen_reg_rtx (V2DImode
);
7197 emit_move_insn (vr_di
[i
], gen_lowpart (V2DImode
, tmp
));
7200 rtx res
= gen_reg_rtx (V2DImode
);
7201 emit_insn (gen_vsx_xxpermdi_v2di (res
, vr_di
[0], vr_di
[1], perm_idx
));
7202 emit_insn (gen_rtx_SET (target
, gen_lowpart (mode
, res
)));
7208 /* Construct the vector in memory one field at a time
7209 and load the whole vector. */
7210 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7211 for (i
= 0; i
< n_elts
; i
++)
7212 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7213 i
* GET_MODE_SIZE (inner_mode
)),
7214 XVECEXP (vals
, 0, i
));
7215 emit_move_insn (target
, mem
);
7218 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7219 is variable and also counts by vector element size for p9 and above. */
7222 rs6000_expand_vector_set_var_p9 (rtx target
, rtx val
, rtx idx
)
7224 machine_mode mode
= GET_MODE (target
);
7226 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7228 machine_mode inner_mode
= GET_MODE (val
);
7230 int width
= GET_MODE_SIZE (inner_mode
);
7232 gcc_assert (width
>= 1 && width
<= 8);
7234 int shift
= exact_log2 (width
);
7236 machine_mode idx_mode
= GET_MODE (idx
);
7238 machine_mode shift_mode
;
7239 /* Gen function pointers for shifting left and generation of permutation
7241 rtx (*gen_ashl
) (rtx
, rtx
, rtx
);
7242 rtx (*gen_pcvr1
) (rtx
, rtx
);
7243 rtx (*gen_pcvr2
) (rtx
, rtx
);
7245 if (TARGET_POWERPC64
)
7247 shift_mode
= DImode
;
7248 gen_ashl
= gen_ashldi3
;
7249 gen_pcvr1
= BYTES_BIG_ENDIAN
? gen_altivec_lvsl_reg_di
7250 : gen_altivec_lvsr_reg_di
;
7251 gen_pcvr2
= BYTES_BIG_ENDIAN
? gen_altivec_lvsr_reg_di
7252 : gen_altivec_lvsl_reg_di
;
7256 shift_mode
= SImode
;
7257 gen_ashl
= gen_ashlsi3
;
7258 gen_pcvr1
= BYTES_BIG_ENDIAN
? gen_altivec_lvsl_reg_si
7259 : gen_altivec_lvsr_reg_si
;
7260 gen_pcvr2
= BYTES_BIG_ENDIAN
? gen_altivec_lvsr_reg_si
7261 : gen_altivec_lvsl_reg_si
;
7263 /* Generate the IDX for permute shift, width is the vector element size.
7264 idx = idx * width. */
7265 rtx tmp
= gen_reg_rtx (shift_mode
);
7266 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7268 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7270 /* Generate one permutation control vector used for rotating the element
7271 at to-insert position to element zero in target vector. lvsl is
7272 used for big endianness while lvsr is used for little endianness:
7273 lvs[lr] v1,0,idx. */
7274 rtx pcvr1
= gen_reg_rtx (V16QImode
);
7275 emit_insn (gen_pcvr1 (pcvr1
, tmp
));
7277 rtx sub_target
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7278 rtx perm1
= gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
,
7282 /* Insert val into element 0 of target vector. */
7283 rs6000_expand_vector_set (target
, val
, const0_rtx
);
7285 /* Rotate back with a reversed permutation control vector generated from:
7286 lvs[rl] v2,0,idx. */
7287 rtx pcvr2
= gen_reg_rtx (V16QImode
);
7288 emit_insn (gen_pcvr2 (pcvr2
, tmp
));
7290 rtx perm2
= gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
,
7295 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7296 is variable and also counts by vector element size for p7 & p8. */
7299 rs6000_expand_vector_set_var_p7 (rtx target
, rtx val
, rtx idx
)
7301 machine_mode mode
= GET_MODE (target
);
7303 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7305 machine_mode inner_mode
= GET_MODE (val
);
7306 HOST_WIDE_INT mode_mask
= GET_MODE_MASK (inner_mode
);
7308 int width
= GET_MODE_SIZE (inner_mode
);
7309 gcc_assert (width
>= 1 && width
<= 4);
7311 int shift
= exact_log2 (width
);
7313 machine_mode idx_mode
= GET_MODE (idx
);
7315 machine_mode shift_mode
;
7316 rtx (*gen_ashl
)(rtx
, rtx
, rtx
);
7317 rtx (*gen_add
)(rtx
, rtx
, rtx
);
7318 rtx (*gen_sub
)(rtx
, rtx
, rtx
);
7319 rtx (*gen_lvsl
)(rtx
, rtx
);
7321 if (TARGET_POWERPC64
)
7323 shift_mode
= DImode
;
7324 gen_ashl
= gen_ashldi3
;
7325 gen_add
= gen_adddi3
;
7326 gen_sub
= gen_subdi3
;
7327 gen_lvsl
= gen_altivec_lvsl_reg_di
;
7331 shift_mode
= SImode
;
7332 gen_ashl
= gen_ashlsi3
;
7333 gen_add
= gen_addsi3
;
7334 gen_sub
= gen_subsi3
;
7335 gen_lvsl
= gen_altivec_lvsl_reg_si
;
7338 /* idx = idx * width. */
7339 rtx tmp
= gen_reg_rtx (shift_mode
);
7340 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7342 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7344 /* For LE: idx = idx + 8. */
7345 if (!BYTES_BIG_ENDIAN
)
7346 emit_insn (gen_add (tmp
, tmp
, GEN_INT (8)));
7348 emit_insn (gen_sub (tmp
, GEN_INT (24 - width
), tmp
));
7351 DImode: 0xffffffffffffffff0000000000000000
7352 SImode: 0x00000000ffffffff0000000000000000
7353 HImode: 0x000000000000ffff0000000000000000.
7354 QImode: 0x00000000000000ff0000000000000000. */
7355 rtx mask
= gen_reg_rtx (V16QImode
);
7356 rtx mask_v2di
= gen_reg_rtx (V2DImode
);
7357 rtvec v
= rtvec_alloc (2);
7358 if (!BYTES_BIG_ENDIAN
)
7360 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7361 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7365 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7366 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7368 emit_insn (gen_vec_initv2didi (mask_v2di
, gen_rtx_PARALLEL (V2DImode
, v
)));
7369 rtx sub_mask
= simplify_gen_subreg (V16QImode
, mask_v2di
, V2DImode
, 0);
7370 emit_insn (gen_rtx_SET (mask
, sub_mask
));
7372 /* mtvsrd[wz] f0,tmp_val. */
7373 rtx tmp_val
= gen_reg_rtx (SImode
);
7374 if (inner_mode
== E_SFmode
)
7375 if (TARGET_DIRECT_MOVE_64BIT
)
7376 emit_insn (gen_movsi_from_sf (tmp_val
, val
));
7379 rtx stack
= rs6000_allocate_stack_temp (SFmode
, false, true);
7380 emit_insn (gen_movsf_hardfloat (stack
, val
));
7381 rtx stack2
= copy_rtx (stack
);
7382 PUT_MODE (stack2
, SImode
);
7383 emit_move_insn (tmp_val
, stack2
);
7386 tmp_val
= force_reg (SImode
, val
);
7388 rtx val_v16qi
= gen_reg_rtx (V16QImode
);
7389 rtx val_v2di
= gen_reg_rtx (V2DImode
);
7390 rtvec vec_val
= rtvec_alloc (2);
7391 if (!BYTES_BIG_ENDIAN
)
7393 RTVEC_ELT (vec_val
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7394 RTVEC_ELT (vec_val
, 1) = tmp_val
;
7398 RTVEC_ELT (vec_val
, 0) = tmp_val
;
7399 RTVEC_ELT (vec_val
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7402 gen_vec_initv2didi (val_v2di
, gen_rtx_PARALLEL (V2DImode
, vec_val
)));
7403 rtx sub_val
= simplify_gen_subreg (V16QImode
, val_v2di
, V2DImode
, 0);
7404 emit_insn (gen_rtx_SET (val_v16qi
, sub_val
));
7406 /* lvsl 13,0,idx. */
7407 rtx pcv
= gen_reg_rtx (V16QImode
);
7408 emit_insn (gen_lvsl (pcv
, tmp
));
7410 /* vperm 1,1,1,13. */
7411 /* vperm 0,0,0,13. */
7412 rtx val_perm
= gen_reg_rtx (V16QImode
);
7413 rtx mask_perm
= gen_reg_rtx (V16QImode
);
7414 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm
, val_v16qi
, val_v16qi
, pcv
));
7415 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm
, mask
, mask
, pcv
));
7417 rtx target_v16qi
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7419 /* xxsel 34,34,32,33. */
7421 gen_vector_select_v16qi (target_v16qi
, target_v16qi
, val_perm
, mask_perm
));
7424 /* Set field ELT_RTX of TARGET to VAL. */
7427 rs6000_expand_vector_set (rtx target
, rtx val
, rtx elt_rtx
)
7429 machine_mode mode
= GET_MODE (target
);
7430 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7431 rtx reg
= gen_reg_rtx (mode
);
7433 int width
= GET_MODE_SIZE (inner_mode
);
7436 val
= force_reg (GET_MODE (val
), val
);
7438 if (VECTOR_MEM_VSX_P (mode
))
7440 if (!CONST_INT_P (elt_rtx
))
7442 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7443 when elt_rtx is variable. */
7444 if ((TARGET_P9_VECTOR
&& TARGET_POWERPC64
) || width
== 8)
7446 rs6000_expand_vector_set_var_p9 (target
, val
, elt_rtx
);
7449 else if (TARGET_VSX
)
7451 rs6000_expand_vector_set_var_p7 (target
, val
, elt_rtx
);
7455 gcc_assert (CONST_INT_P (elt_rtx
));
7458 rtx insn
= NULL_RTX
;
7460 if (mode
== V2DFmode
)
7461 insn
= gen_vsx_set_v2df (target
, target
, val
, elt_rtx
);
7463 else if (mode
== V2DImode
)
7464 insn
= gen_vsx_set_v2di (target
, target
, val
, elt_rtx
);
7466 else if (TARGET_P9_VECTOR
&& TARGET_POWERPC64
)
7468 if (mode
== V4SImode
)
7469 insn
= gen_vsx_set_v4si_p9 (target
, target
, val
, elt_rtx
);
7470 else if (mode
== V8HImode
)
7471 insn
= gen_vsx_set_v8hi_p9 (target
, target
, val
, elt_rtx
);
7472 else if (mode
== V16QImode
)
7473 insn
= gen_vsx_set_v16qi_p9 (target
, target
, val
, elt_rtx
);
7474 else if (mode
== V4SFmode
)
7475 insn
= gen_vsx_set_v4sf_p9 (target
, target
, val
, elt_rtx
);
7485 /* Simplify setting single element vectors like V1TImode. */
7486 if (GET_MODE_SIZE (mode
) == GET_MODE_SIZE (inner_mode
)
7487 && INTVAL (elt_rtx
) == 0)
7489 emit_move_insn (target
, gen_lowpart (mode
, val
));
7493 /* Load single variable value. */
7494 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7495 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0), val
);
7496 x
= gen_rtx_UNSPEC (VOIDmode
,
7497 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7498 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7500 gen_rtx_SET (reg
, mem
),
7503 /* Linear sequence. */
7504 mask
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
7505 for (i
= 0; i
< 16; ++i
)
7506 XVECEXP (mask
, 0, i
) = GEN_INT (i
);
7508 /* Set permute mask to insert element into target. */
7509 for (i
= 0; i
< width
; ++i
)
7510 XVECEXP (mask
, 0, INTVAL (elt_rtx
) * width
+ i
) = GEN_INT (i
+ 0x10);
7511 x
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (mask
, 0));
7513 if (BYTES_BIG_ENDIAN
)
7514 x
= gen_rtx_UNSPEC (mode
,
7515 gen_rtvec (3, target
, reg
,
7516 force_reg (V16QImode
, x
)),
7520 if (TARGET_P9_VECTOR
)
7521 x
= gen_rtx_UNSPEC (mode
,
7522 gen_rtvec (3, reg
, target
,
7523 force_reg (V16QImode
, x
)),
7527 /* Invert selector. We prefer to generate VNAND on P8 so
7528 that future fusion opportunities can kick in, but must
7529 generate VNOR elsewhere. */
7530 rtx notx
= gen_rtx_NOT (V16QImode
, force_reg (V16QImode
, x
));
7531 rtx iorx
= (TARGET_P8_VECTOR
7532 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
7533 : gen_rtx_AND (V16QImode
, notx
, notx
));
7534 rtx tmp
= gen_reg_rtx (V16QImode
);
7535 emit_insn (gen_rtx_SET (tmp
, iorx
));
7537 /* Permute with operands reversed and adjusted selector. */
7538 x
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, reg
, target
, tmp
),
7543 emit_insn (gen_rtx_SET (target
, x
));
7546 /* Extract field ELT from VEC into TARGET. */
7549 rs6000_expand_vector_extract (rtx target
, rtx vec
, rtx elt
)
7551 machine_mode mode
= GET_MODE (vec
);
7552 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7555 if (VECTOR_MEM_VSX_P (mode
) && CONST_INT_P (elt
))
7562 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7565 emit_insn (gen_vsx_extract_v2df (target
, vec
, elt
));
7568 emit_insn (gen_vsx_extract_v2di (target
, vec
, elt
));
7571 emit_insn (gen_vsx_extract_v4sf (target
, vec
, elt
));
7574 if (TARGET_DIRECT_MOVE_64BIT
)
7576 emit_insn (gen_vsx_extract_v16qi (target
, vec
, elt
));
7582 if (TARGET_DIRECT_MOVE_64BIT
)
7584 emit_insn (gen_vsx_extract_v8hi (target
, vec
, elt
));
7590 if (TARGET_DIRECT_MOVE_64BIT
)
7592 emit_insn (gen_vsx_extract_v4si (target
, vec
, elt
));
7598 else if (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (elt
)
7599 && TARGET_DIRECT_MOVE_64BIT
)
7601 if (GET_MODE (elt
) != DImode
)
7603 rtx tmp
= gen_reg_rtx (DImode
);
7604 convert_move (tmp
, elt
, 0);
7607 else if (!REG_P (elt
))
7608 elt
= force_reg (DImode
, elt
);
7613 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7617 emit_insn (gen_vsx_extract_v2df_var (target
, vec
, elt
));
7621 emit_insn (gen_vsx_extract_v2di_var (target
, vec
, elt
));
7625 emit_insn (gen_vsx_extract_v4sf_var (target
, vec
, elt
));
7629 emit_insn (gen_vsx_extract_v4si_var (target
, vec
, elt
));
7633 emit_insn (gen_vsx_extract_v8hi_var (target
, vec
, elt
));
7637 emit_insn (gen_vsx_extract_v16qi_var (target
, vec
, elt
));
7645 /* Allocate mode-sized buffer. */
7646 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7648 emit_move_insn (mem
, vec
);
7649 if (CONST_INT_P (elt
))
7651 int modulo_elt
= INTVAL (elt
) % GET_MODE_NUNITS (mode
);
7653 /* Add offset to field within buffer matching vector element. */
7654 mem
= adjust_address_nv (mem
, inner_mode
,
7655 modulo_elt
* GET_MODE_SIZE (inner_mode
));
7656 emit_move_insn (target
, adjust_address_nv (mem
, inner_mode
, 0));
7660 unsigned int ele_size
= GET_MODE_SIZE (inner_mode
);
7661 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (mode
) - 1);
7663 elt
= gen_rtx_AND (Pmode
, elt
, num_ele_m1
);
7665 elt
= gen_rtx_MULT (Pmode
, elt
, GEN_INT (ele_size
));
7666 rtx new_addr
= gen_rtx_PLUS (Pmode
, XEXP (mem
, 0), elt
);
7667 new_addr
= change_address (mem
, inner_mode
, new_addr
);
7668 emit_move_insn (target
, new_addr
);
7672 /* Return the offset within a memory object (MEM) of a vector type to a given
7673 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7674 the element is constant, we return a constant integer.
7676 Otherwise, we use a base register temporary to calculate the offset after
7677 masking it to fit within the bounds of the vector and scaling it. The
7678 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7679 built-in function. */
7682 get_vector_offset (rtx mem
, rtx element
, rtx base_tmp
, unsigned scalar_size
)
7684 if (CONST_INT_P (element
))
7685 return GEN_INT (INTVAL (element
) * scalar_size
);
7687 /* All insns should use the 'Q' constraint (address is a single register) if
7688 the element number is not a constant. */
7689 gcc_assert (satisfies_constraint_Q (mem
));
7691 /* Mask the element to make sure the element number is between 0 and the
7692 maximum number of elements - 1 so that we don't generate an address
7693 outside the vector. */
7694 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (GET_MODE (mem
)) - 1);
7695 rtx and_op
= gen_rtx_AND (Pmode
, element
, num_ele_m1
);
7696 emit_insn (gen_rtx_SET (base_tmp
, and_op
));
7698 /* Shift the element to get the byte offset from the element number. */
7699 int shift
= exact_log2 (scalar_size
);
7700 gcc_assert (shift
>= 0);
7704 rtx shift_op
= gen_rtx_ASHIFT (Pmode
, base_tmp
, GEN_INT (shift
));
7705 emit_insn (gen_rtx_SET (base_tmp
, shift_op
));
7711 /* Helper function update PC-relative addresses when we are adjusting a memory
7712 address (ADDR) to a vector to point to a scalar field within the vector with
7713 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7714 use the base register temporary (BASE_TMP) to form the address. */
7717 adjust_vec_address_pcrel (rtx addr
, rtx element_offset
, rtx base_tmp
)
7719 rtx new_addr
= NULL
;
7721 gcc_assert (CONST_INT_P (element_offset
));
7723 if (GET_CODE (addr
) == CONST
)
7724 addr
= XEXP (addr
, 0);
7726 if (GET_CODE (addr
) == PLUS
)
7728 rtx op0
= XEXP (addr
, 0);
7729 rtx op1
= XEXP (addr
, 1);
7731 if (CONST_INT_P (op1
))
7733 HOST_WIDE_INT offset
7734 = INTVAL (XEXP (addr
, 1)) + INTVAL (element_offset
);
7741 rtx plus
= gen_rtx_PLUS (Pmode
, op0
, GEN_INT (offset
));
7742 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7748 emit_move_insn (base_tmp
, addr
);
7749 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7753 else if (SYMBOL_REF_P (addr
) || LABEL_REF_P (addr
))
7755 rtx plus
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7756 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7765 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7766 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7767 temporary (BASE_TMP) to fixup the address. Return the new memory address
7768 that is valid for reads or writes to a given register (SCALAR_REG).
7770 This function is expected to be called after reload is completed when we are
7771 splitting insns. The temporary BASE_TMP might be set multiple times with
7775 rs6000_adjust_vec_address (rtx scalar_reg
,
7779 machine_mode scalar_mode
)
7781 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7782 rtx addr
= XEXP (mem
, 0);
7785 gcc_assert (!reg_mentioned_p (base_tmp
, addr
));
7786 gcc_assert (!reg_mentioned_p (base_tmp
, element
));
7788 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7789 gcc_assert (GET_RTX_CLASS (GET_CODE (addr
)) != RTX_AUTOINC
);
7791 /* Calculate what we need to add to the address to get the element
7793 rtx element_offset
= get_vector_offset (mem
, element
, base_tmp
, scalar_size
);
7795 /* Create the new address pointing to the element within the vector. If we
7796 are adding 0, we don't have to change the address. */
7797 if (element_offset
== const0_rtx
)
7800 /* A simple indirect address can be converted into a reg + offset
7802 else if (REG_P (addr
) || SUBREG_P (addr
))
7803 new_addr
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7805 /* For references to local static variables, fold a constant offset into the
7807 else if (pcrel_local_address (addr
, Pmode
) && CONST_INT_P (element_offset
))
7808 new_addr
= adjust_vec_address_pcrel (addr
, element_offset
, base_tmp
);
7810 /* Optimize D-FORM addresses with constant offset with a constant element, to
7811 include the element offset in the address directly. */
7812 else if (GET_CODE (addr
) == PLUS
)
7814 rtx op0
= XEXP (addr
, 0);
7815 rtx op1
= XEXP (addr
, 1);
7817 gcc_assert (REG_P (op0
) || SUBREG_P (op0
));
7818 if (CONST_INT_P (op1
) && CONST_INT_P (element_offset
))
7820 /* op0 should never be r0, because r0+offset is not valid. But it
7821 doesn't hurt to make sure it is not r0. */
7822 gcc_assert (reg_or_subregno (op0
) != 0);
7824 /* D-FORM address with constant element number. */
7825 HOST_WIDE_INT offset
= INTVAL (op1
) + INTVAL (element_offset
);
7826 rtx offset_rtx
= GEN_INT (offset
);
7827 new_addr
= gen_rtx_PLUS (Pmode
, op0
, offset_rtx
);
7831 /* If we don't have a D-FORM address with a constant element number,
7832 add the two elements in the current address. Then add the offset.
7834 Previously, we tried to add the offset to OP1 and change the
7835 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7836 complicated because we had to verify that op1 was not GPR0 and we
7837 had a constant element offset (due to the way ADDI is defined).
7838 By doing the add of OP0 and OP1 first, and then adding in the
7839 offset, it has the benefit that if D-FORM instructions are
7840 allowed, the offset is part of the memory access to the vector
7842 emit_insn (gen_rtx_SET (base_tmp
, gen_rtx_PLUS (Pmode
, op0
, op1
)));
7843 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7849 emit_move_insn (base_tmp
, addr
);
7850 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7853 /* If the address isn't valid, move the address into the temporary base
7854 register. Some reasons it could not be valid include:
7856 The address offset overflowed the 16 or 34 bit offset size;
7857 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7858 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7859 Only X_FORM loads can be done, and the address is D_FORM. */
7861 enum insn_form iform
7862 = address_to_insn_form (new_addr
, scalar_mode
,
7863 reg_to_non_prefixed (scalar_reg
, scalar_mode
));
7865 if (iform
== INSN_FORM_BAD
)
7867 emit_move_insn (base_tmp
, new_addr
);
7868 new_addr
= base_tmp
;
7871 return change_address (mem
, scalar_mode
, new_addr
);
7874 /* Split a variable vec_extract operation into the component instructions. */
7877 rs6000_split_vec_extract_var (rtx dest
, rtx src
, rtx element
, rtx tmp_gpr
,
7880 machine_mode mode
= GET_MODE (src
);
7881 machine_mode scalar_mode
= GET_MODE_INNER (GET_MODE (src
));
7882 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7883 int byte_shift
= exact_log2 (scalar_size
);
7885 gcc_assert (byte_shift
>= 0);
7887 /* If we are given a memory address, optimize to load just the element. We
7888 don't have to adjust the vector element number on little endian
7892 emit_move_insn (dest
,
7893 rs6000_adjust_vec_address (dest
, src
, element
, tmp_gpr
,
7898 else if (REG_P (src
) || SUBREG_P (src
))
7900 int num_elements
= GET_MODE_NUNITS (mode
);
7901 int bits_in_element
= mode_to_bits (GET_MODE_INNER (mode
));
7902 int bit_shift
= 7 - exact_log2 (num_elements
);
7904 unsigned int dest_regno
= reg_or_subregno (dest
);
7905 unsigned int src_regno
= reg_or_subregno (src
);
7906 unsigned int element_regno
= reg_or_subregno (element
);
7908 gcc_assert (REG_P (tmp_gpr
));
7910 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7911 a general purpose register. */
7912 if (TARGET_P9_VECTOR
7913 && (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
7914 && INT_REGNO_P (dest_regno
)
7915 && ALTIVEC_REGNO_P (src_regno
)
7916 && INT_REGNO_P (element_regno
))
7918 rtx dest_si
= gen_rtx_REG (SImode
, dest_regno
);
7919 rtx element_si
= gen_rtx_REG (SImode
, element_regno
);
7921 if (mode
== V16QImode
)
7922 emit_insn (BYTES_BIG_ENDIAN
7923 ? gen_vextublx (dest_si
, element_si
, src
)
7924 : gen_vextubrx (dest_si
, element_si
, src
));
7926 else if (mode
== V8HImode
)
7928 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7929 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const1_rtx
));
7930 emit_insn (BYTES_BIG_ENDIAN
7931 ? gen_vextuhlx (dest_si
, tmp_gpr_si
, src
)
7932 : gen_vextuhrx (dest_si
, tmp_gpr_si
, src
));
7938 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7939 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const2_rtx
));
7940 emit_insn (BYTES_BIG_ENDIAN
7941 ? gen_vextuwlx (dest_si
, tmp_gpr_si
, src
)
7942 : gen_vextuwrx (dest_si
, tmp_gpr_si
, src
));
7949 gcc_assert (REG_P (tmp_altivec
));
7951 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7952 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7953 will shift the element into the upper position (adding 3 to convert a
7954 byte shift into a bit shift). */
7955 if (scalar_size
== 8)
7957 if (!BYTES_BIG_ENDIAN
)
7959 emit_insn (gen_xordi3 (tmp_gpr
, element
, const1_rtx
));
7965 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7967 emit_insn (gen_rtx_SET (tmp_gpr
,
7968 gen_rtx_AND (DImode
,
7969 gen_rtx_ASHIFT (DImode
,
7976 if (!BYTES_BIG_ENDIAN
)
7978 rtx num_ele_m1
= GEN_INT (num_elements
- 1);
7980 emit_insn (gen_anddi3 (tmp_gpr
, element
, num_ele_m1
));
7981 emit_insn (gen_subdi3 (tmp_gpr
, num_ele_m1
, tmp_gpr
));
7987 emit_insn (gen_ashldi3 (tmp_gpr
, element2
, GEN_INT (bit_shift
)));
7990 /* Get the value into the lower byte of the Altivec register where VSLO
7992 if (TARGET_P9_VECTOR
)
7993 emit_insn (gen_vsx_splat_v2di (tmp_altivec
, tmp_gpr
));
7994 else if (can_create_pseudo_p ())
7995 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_gpr
, tmp_gpr
));
7998 rtx tmp_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7999 emit_move_insn (tmp_di
, tmp_gpr
);
8000 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_di
, tmp_di
));
8003 /* Do the VSLO to get the value into the final location. */
8007 emit_insn (gen_vsx_vslo_v2df (dest
, src
, tmp_altivec
));
8011 emit_insn (gen_vsx_vslo_v2di (dest
, src
, tmp_altivec
));
8016 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8017 rtx tmp_altivec_v4sf
= gen_rtx_REG (V4SFmode
, REGNO (tmp_altivec
));
8018 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8019 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8022 emit_insn (gen_vsx_xscvspdp_scalar2 (dest
, tmp_altivec_v4sf
));
8030 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8031 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8032 rtx tmp_gpr_di
= gen_rtx_REG (DImode
, REGNO (dest
));
8033 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8035 emit_move_insn (tmp_gpr_di
, tmp_altivec_di
);
8036 emit_insn (gen_lshrdi3 (tmp_gpr_di
, tmp_gpr_di
,
8037 GEN_INT (64 - bits_in_element
)));
8051 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8052 selects whether the alignment is abi mandated, optional, or
8053 both abi and optional alignment. */
8056 rs6000_data_alignment (tree type
, unsigned int align
, enum data_align how
)
8058 if (how
!= align_opt
)
8060 if (VECTOR_TYPE_P (type
) && align
< 128)
8064 if (how
!= align_abi
)
8066 if (TREE_CODE (type
) == ARRAY_TYPE
8067 && TYPE_MODE (TREE_TYPE (type
)) == QImode
)
8069 if (align
< BITS_PER_WORD
)
8070 align
= BITS_PER_WORD
;
8077 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8078 instructions simply ignore the low bits; VSX memory instructions
8079 are aligned to 4 or 8 bytes. */
8082 rs6000_slow_unaligned_access (machine_mode mode
, unsigned int align
)
8084 return (STRICT_ALIGNMENT
8085 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8086 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && align
< 32)
8087 || ((VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
))
8088 && (int) align
< VECTOR_ALIGN (mode
)))));
8091 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
8094 rs6000_special_adjust_field_align (tree type
, unsigned int computed
)
8096 if (computed
<= 32 || TYPE_PACKED (type
))
8099 /* Strip initial arrays. */
8100 while (TREE_CODE (type
) == ARRAY_TYPE
)
8101 type
= TREE_TYPE (type
);
8103 /* If RECORD or UNION, recursively find the first field. */
8104 while (AGGREGATE_TYPE_P (type
))
8106 tree field
= TYPE_FIELDS (type
);
8108 /* Skip all non field decls */
8109 while (field
!= NULL
8110 && (TREE_CODE (field
) != FIELD_DECL
8111 || DECL_FIELD_ABI_IGNORED (field
)))
8112 field
= DECL_CHAIN (field
);
8117 /* A packed field does not contribute any extra alignment. */
8118 if (DECL_PACKED (field
))
8121 type
= TREE_TYPE (field
);
8124 while (TREE_CODE (type
) == ARRAY_TYPE
)
8125 type
= TREE_TYPE (type
);
8128 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8129 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8130 computed
= MIN (computed
, 32);
8135 /* AIX increases natural record alignment to doubleword if the innermost first
8136 field is an FP double while the FP fields remain word aligned.
8137 Only called if TYPE initially is a RECORD or UNION. */
8140 rs6000_special_round_type_align (tree type
, unsigned int computed
,
8141 unsigned int specified
)
8143 unsigned int align
= MAX (computed
, specified
);
8145 if (TYPE_PACKED (type
) || align
>= 64)
8148 /* If RECORD or UNION, recursively find the first field. */
8151 tree field
= TYPE_FIELDS (type
);
8153 /* Skip all non field decls */
8154 while (field
!= NULL
8155 && (TREE_CODE (field
) != FIELD_DECL
8156 || DECL_FIELD_ABI_IGNORED (field
)))
8157 field
= DECL_CHAIN (field
);
8162 /* A packed field does not contribute any extra alignment. */
8163 if (DECL_PACKED (field
))
8166 type
= TREE_TYPE (field
);
8169 while (TREE_CODE (type
) == ARRAY_TYPE
)
8170 type
= TREE_TYPE (type
);
8171 } while (AGGREGATE_TYPE_P (type
));
8173 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8174 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8175 align
= MAX (align
, 64);
8180 /* Darwin increases record alignment to the natural alignment of
8184 darwin_rs6000_special_round_type_align (tree type
, unsigned int computed
,
8185 unsigned int specified
)
8187 unsigned int align
= MAX (computed
, specified
);
8189 if (TYPE_PACKED (type
))
8192 /* Find the first field, looking down into aggregates. */
8194 tree field
= TYPE_FIELDS (type
);
8195 /* Skip all non field decls */
8196 while (field
!= NULL
8197 && (TREE_CODE (field
) != FIELD_DECL
8198 || DECL_FIELD_ABI_IGNORED (field
)))
8199 field
= DECL_CHAIN (field
);
8202 /* A packed field does not contribute any extra alignment. */
8203 if (DECL_PACKED (field
))
8205 type
= TREE_TYPE (field
);
8206 while (TREE_CODE (type
) == ARRAY_TYPE
)
8207 type
= TREE_TYPE (type
);
8208 } while (AGGREGATE_TYPE_P (type
));
8210 if (type
!= error_mark_node
&& ! AGGREGATE_TYPE_P (type
)
8211 && ! TYPE_PACKED (type
) && maximum_field_alignment
== 0)
8212 align
= MAX (align
, TYPE_ALIGN (type
));
8217 /* Return 1 for an operand in small memory on V.4/eabi. */
8220 small_data_operand (rtx op ATTRIBUTE_UNUSED
,
8221 machine_mode mode ATTRIBUTE_UNUSED
)
8226 if (rs6000_sdata
== SDATA_NONE
|| rs6000_sdata
== SDATA_DATA
)
8229 if (DEFAULT_ABI
!= ABI_V4
)
8232 if (SYMBOL_REF_P (op
))
8235 else if (GET_CODE (op
) != CONST
8236 || GET_CODE (XEXP (op
, 0)) != PLUS
8237 || !SYMBOL_REF_P (XEXP (XEXP (op
, 0), 0))
8238 || !CONST_INT_P (XEXP (XEXP (op
, 0), 1)))
8243 rtx sum
= XEXP (op
, 0);
8244 HOST_WIDE_INT summand
;
8246 /* We have to be careful here, because it is the referenced address
8247 that must be 32k from _SDA_BASE_, not just the symbol. */
8248 summand
= INTVAL (XEXP (sum
, 1));
8249 if (summand
< 0 || summand
> g_switch_value
)
8252 sym_ref
= XEXP (sum
, 0);
8255 return SYMBOL_REF_SMALL_P (sym_ref
);
8261 /* Return true if either operand is a general purpose register. */
8264 gpr_or_gpr_p (rtx op0
, rtx op1
)
8266 return ((REG_P (op0
) && INT_REGNO_P (REGNO (op0
)))
8267 || (REG_P (op1
) && INT_REGNO_P (REGNO (op1
))));
8270 /* Return true if this is a move direct operation between GPR registers and
8271 floating point/VSX registers. */
8274 direct_move_p (rtx op0
, rtx op1
)
8276 if (!REG_P (op0
) || !REG_P (op1
))
8279 if (!TARGET_DIRECT_MOVE
)
8282 int regno0
= REGNO (op0
);
8283 int regno1
= REGNO (op1
);
8284 if (!HARD_REGISTER_NUM_P (regno0
) || !HARD_REGISTER_NUM_P (regno1
))
8287 if (INT_REGNO_P (regno0
) && VSX_REGNO_P (regno1
))
8290 if (VSX_REGNO_P (regno0
) && INT_REGNO_P (regno1
))
8296 /* Return true if the ADDR is an acceptable address for a quad memory
8297 operation of mode MODE (either LQ/STQ for general purpose registers, or
8298 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8299 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8300 3.0 LXV/STXV instruction. */
8303 quad_address_p (rtx addr
, machine_mode mode
, bool strict
)
8307 if (GET_MODE_SIZE (mode
) < 16)
8310 if (legitimate_indirect_address_p (addr
, strict
))
8313 if (VECTOR_MODE_P (mode
) && !mode_supports_dq_form (mode
))
8316 /* Is this a valid prefixed address? If the bottom four bits of the offset
8317 are non-zero, we could use a prefixed instruction (which does not have the
8318 DQ-form constraint that the traditional instruction had) instead of
8319 forcing the unaligned offset to a GPR. */
8320 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DQ
))
8323 if (GET_CODE (addr
) != PLUS
)
8326 op0
= XEXP (addr
, 0);
8327 if (!REG_P (op0
) || !INT_REG_OK_FOR_BASE_P (op0
, strict
))
8330 op1
= XEXP (addr
, 1);
8331 if (!CONST_INT_P (op1
))
8334 return quad_address_offset_p (INTVAL (op1
));
8337 /* Return true if this is a load or store quad operation. This function does
8338 not handle the atomic quad memory instructions. */
8341 quad_load_store_p (rtx op0
, rtx op1
)
8345 if (!TARGET_QUAD_MEMORY
)
8348 else if (REG_P (op0
) && MEM_P (op1
))
8349 ret
= (quad_int_reg_operand (op0
, GET_MODE (op0
))
8350 && quad_memory_operand (op1
, GET_MODE (op1
))
8351 && !reg_overlap_mentioned_p (op0
, op1
));
8353 else if (MEM_P (op0
) && REG_P (op1
))
8354 ret
= (quad_memory_operand (op0
, GET_MODE (op0
))
8355 && quad_int_reg_operand (op1
, GET_MODE (op1
)));
8360 if (TARGET_DEBUG_ADDR
)
8362 fprintf (stderr
, "\n========== quad_load_store, return %s\n",
8363 ret
? "true" : "false");
8364 debug_rtx (gen_rtx_SET (op0
, op1
));
8370 /* Given an address, return a constant offset term if one exists. */
8373 address_offset (rtx op
)
8375 if (GET_CODE (op
) == PRE_INC
8376 || GET_CODE (op
) == PRE_DEC
)
8378 else if (GET_CODE (op
) == PRE_MODIFY
8379 || GET_CODE (op
) == LO_SUM
)
8382 if (GET_CODE (op
) == CONST
)
8385 if (GET_CODE (op
) == PLUS
)
8388 if (CONST_INT_P (op
))
8394 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8395 the mode. If we can't find (or don't know) the alignment of the symbol
8396 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8397 should be pessimistic]. Offsets are validated in the same way as for
8400 darwin_rs6000_legitimate_lo_sum_const_p (rtx x
, machine_mode mode
)
8402 /* We should not get here with this. */
8403 gcc_checking_assert (! mode_supports_dq_form (mode
));
8405 if (GET_CODE (x
) == CONST
)
8408 /* If we are building PIC code, then any symbol must be wrapped in an
8409 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8410 bool machopic_offs_p
= false;
8411 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
8413 x
= XVECEXP (x
, 0, 0);
8414 machopic_offs_p
= true;
8418 unsigned HOST_WIDE_INT offset
= 0;
8420 if (GET_CODE (x
) == PLUS
)
8423 if (! SYMBOL_REF_P (sym
))
8425 if (!CONST_INT_P (XEXP (x
, 1)))
8427 offset
= INTVAL (XEXP (x
, 1));
8429 else if (SYMBOL_REF_P (x
))
8431 else if (CONST_INT_P (x
))
8432 offset
= INTVAL (x
);
8433 else if (GET_CODE (x
) == LABEL_REF
)
8434 offset
= 0; // We assume code labels are Pmode aligned
8436 return false; // not sure what we have here.
8438 /* If we don't know the alignment of the thing to which the symbol refers,
8439 we assume optimistically it is "enough".
8440 ??? maybe we should be pessimistic instead. */
8445 tree decl
= SYMBOL_REF_DECL (sym
);
8446 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8447 if (TARGET_MACHO
&& flag_pic
&& !machopic_offs_p
)
8450 if (MACHO_SYMBOL_INDIRECTION_P (sym
))
8451 /* The decl in an indirection symbol is the original one, which might
8452 be less aligned than the indirection. Our indirections are always
8457 if (decl
&& DECL_ALIGN (decl
))
8458 align
= DECL_ALIGN_UNIT (decl
);
8461 unsigned int extra
= 0;
8467 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8469 if (VECTOR_MEM_VSX_P (mode
))
8472 if (!TARGET_POWERPC64
)
8474 else if ((offset
& 3) || (align
& 3))
8485 if (!TARGET_POWERPC64
)
8487 else if ((offset
& 3) || (align
& 3))
8495 /* We only care if the access(es) would cause a change to the high part. */
8496 offset
= sext_hwi (offset
, 16);
8497 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8500 /* Return true if the MEM operand is a memory operand suitable for use
8501 with a (full width, possibly multiple) gpr load/store. On
8502 powerpc64 this means the offset must be divisible by 4.
8503 Implements 'Y' constraint.
8505 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8506 a constraint function we know the operand has satisfied a suitable
8509 Offsetting a lo_sum should not be allowed, except where we know by
8510 alignment that a 32k boundary is not crossed. Note that by
8511 "offsetting" here we mean a further offset to access parts of the
8512 MEM. It's fine to have a lo_sum where the inner address is offset
8513 from a sym, since the same sym+offset will appear in the high part
8514 of the address calculation. */
8517 mem_operand_gpr (rtx op
, machine_mode mode
)
8519 unsigned HOST_WIDE_INT offset
;
8521 rtx addr
= XEXP (op
, 0);
8523 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8525 && (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
8526 && mode_supports_pre_incdec_p (mode
)
8527 && legitimate_indirect_address_p (XEXP (addr
, 0), false))
8530 /* Allow prefixed instructions if supported. If the bottom two bits of the
8531 offset are non-zero, we could use a prefixed instruction (which does not
8532 have the DS-form constraint that the traditional instruction had) instead
8533 of forcing the unaligned offset to a GPR. */
8534 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8537 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8538 really OK. Doing this early avoids teaching all the other machinery
8540 if (TARGET_MACHO
&& GET_CODE (addr
) == LO_SUM
)
8541 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr
, 1), mode
);
8543 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8544 if (!rs6000_offsettable_memref_p (op
, mode
, false))
8547 op
= address_offset (addr
);
8551 offset
= INTVAL (op
);
8552 if (TARGET_POWERPC64
&& (offset
& 3) != 0)
8555 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8559 if (GET_CODE (addr
) == LO_SUM
)
8560 /* For lo_sum addresses, we must allow any offset except one that
8561 causes a wrap, so test only the low 16 bits. */
8562 offset
= sext_hwi (offset
, 16);
8564 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8567 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8568 enforce an offset divisible by 4 even for 32-bit. */
8571 mem_operand_ds_form (rtx op
, machine_mode mode
)
8573 unsigned HOST_WIDE_INT offset
;
8575 rtx addr
= XEXP (op
, 0);
8577 /* Allow prefixed instructions if supported. If the bottom two bits of the
8578 offset are non-zero, we could use a prefixed instruction (which does not
8579 have the DS-form constraint that the traditional instruction had) instead
8580 of forcing the unaligned offset to a GPR. */
8581 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8584 if (!offsettable_address_p (false, mode
, addr
))
8587 op
= address_offset (addr
);
8591 offset
= INTVAL (op
);
8592 if ((offset
& 3) != 0)
8595 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8599 if (GET_CODE (addr
) == LO_SUM
)
8600 /* For lo_sum addresses, we must allow any offset except one that
8601 causes a wrap, so test only the low 16 bits. */
8602 offset
= sext_hwi (offset
, 16);
8604 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8607 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8610 reg_offset_addressing_ok_p (machine_mode mode
)
8624 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8625 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8626 a vector mode, if we want to use the VSX registers to move it around,
8627 we need to restrict ourselves to reg+reg addressing. Similarly for
8628 IEEE 128-bit floating point that is passed in a single vector
8630 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
))
8631 return mode_supports_dq_form (mode
);
8634 /* The vector pair/quad types support offset addressing if the
8635 underlying vectors support offset addressing. */
8641 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8642 addressing for the LFIWZX and STFIWX instructions. */
8643 if (TARGET_NO_SDMODE_STACK
)
8655 virtual_stack_registers_memory_p (rtx op
)
8660 regnum
= REGNO (op
);
8662 else if (GET_CODE (op
) == PLUS
8663 && REG_P (XEXP (op
, 0))
8664 && CONST_INT_P (XEXP (op
, 1)))
8665 regnum
= REGNO (XEXP (op
, 0));
8670 return (regnum
>= FIRST_VIRTUAL_REGISTER
8671 && regnum
<= LAST_VIRTUAL_POINTER_REGISTER
);
8674 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8675 is known to not straddle a 32k boundary. This function is used
8676 to determine whether -mcmodel=medium code can use TOC pointer
8677 relative addressing for OP. This means the alignment of the TOC
8678 pointer must also be taken into account, and unfortunately that is
8681 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8682 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8686 offsettable_ok_by_alignment (rtx op
, HOST_WIDE_INT offset
,
8690 unsigned HOST_WIDE_INT dsize
, dalign
, lsb
, mask
;
8692 if (!SYMBOL_REF_P (op
))
8695 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8697 if (mode_supports_dq_form (mode
))
8700 dsize
= GET_MODE_SIZE (mode
);
8701 decl
= SYMBOL_REF_DECL (op
);
8707 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8708 replacing memory addresses with an anchor plus offset. We
8709 could find the decl by rummaging around in the block->objects
8710 VEC for the given offset but that seems like too much work. */
8711 dalign
= BITS_PER_UNIT
;
8712 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op
)
8713 && SYMBOL_REF_ANCHOR_P (op
)
8714 && SYMBOL_REF_BLOCK (op
) != NULL
)
8716 struct object_block
*block
= SYMBOL_REF_BLOCK (op
);
8718 dalign
= block
->alignment
;
8719 offset
+= SYMBOL_REF_BLOCK_OFFSET (op
);
8721 else if (CONSTANT_POOL_ADDRESS_P (op
))
8723 /* It would be nice to have get_pool_align().. */
8724 machine_mode cmode
= get_pool_mode (op
);
8726 dalign
= GET_MODE_ALIGNMENT (cmode
);
8729 else if (DECL_P (decl
))
8731 dalign
= DECL_ALIGN (decl
);
8735 /* Allow BLKmode when the entire object is known to not
8736 cross a 32k boundary. */
8737 if (!DECL_SIZE_UNIT (decl
))
8740 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl
)))
8743 dsize
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
8747 dalign
/= BITS_PER_UNIT
;
8748 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8749 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8750 return dalign
>= dsize
;
8756 /* Find how many bits of the alignment we know for this access. */
8757 dalign
/= BITS_PER_UNIT
;
8758 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8759 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8761 lsb
= offset
& -offset
;
8765 return dalign
>= dsize
;
8769 constant_pool_expr_p (rtx op
)
8773 split_const (op
, &base
, &offset
);
8774 return (SYMBOL_REF_P (base
)
8775 && CONSTANT_POOL_ADDRESS_P (base
)
8776 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base
), Pmode
));
8779 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8780 use that as the register to put the HIGH value into if register allocation
8784 create_TOC_reference (rtx symbol
, rtx largetoc_reg
)
8786 rtx tocrel
, tocreg
, hi
;
8788 gcc_assert (TARGET_TOC
);
8790 if (TARGET_DEBUG_ADDR
)
8792 if (SYMBOL_REF_P (symbol
))
8793 fprintf (stderr
, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8797 fprintf (stderr
, "\ncreate_TOC_reference, code %s:\n",
8798 GET_RTX_NAME (GET_CODE (symbol
)));
8803 if (!can_create_pseudo_p ())
8804 df_set_regs_ever_live (TOC_REGISTER
, true);
8806 tocreg
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
8807 tocrel
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, symbol
, tocreg
), UNSPEC_TOCREL
);
8808 if (TARGET_CMODEL
== CMODEL_SMALL
|| can_create_pseudo_p ())
8811 hi
= gen_rtx_HIGH (Pmode
, copy_rtx (tocrel
));
8812 if (largetoc_reg
!= NULL
)
8814 emit_move_insn (largetoc_reg
, hi
);
8817 return gen_rtx_LO_SUM (Pmode
, hi
, tocrel
);
8820 /* These are only used to pass through from print_operand/print_operand_address
8821 to rs6000_output_addr_const_extra over the intervening function
8822 output_addr_const which is not target code. */
8823 static const_rtx tocrel_base_oac
, tocrel_offset_oac
;
8825 /* Return true if OP is a toc pointer relative address (the output
8826 of create_TOC_reference). If STRICT, do not match non-split
8827 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8828 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8829 TOCREL_OFFSET_RET respectively. */
8832 toc_relative_expr_p (const_rtx op
, bool strict
, const_rtx
*tocrel_base_ret
,
8833 const_rtx
*tocrel_offset_ret
)
8838 if (TARGET_CMODEL
!= CMODEL_SMALL
)
8840 /* When strict ensure we have everything tidy. */
8842 && !(GET_CODE (op
) == LO_SUM
8843 && REG_P (XEXP (op
, 0))
8844 && INT_REG_OK_FOR_BASE_P (XEXP (op
, 0), strict
)))
8847 /* When not strict, allow non-split TOC addresses and also allow
8848 (lo_sum (high ..)) TOC addresses created during reload. */
8849 if (GET_CODE (op
) == LO_SUM
)
8853 const_rtx tocrel_base
= op
;
8854 const_rtx tocrel_offset
= const0_rtx
;
8856 if (GET_CODE (op
) == PLUS
&& add_cint_operand (XEXP (op
, 1), GET_MODE (op
)))
8858 tocrel_base
= XEXP (op
, 0);
8859 tocrel_offset
= XEXP (op
, 1);
8862 if (tocrel_base_ret
)
8863 *tocrel_base_ret
= tocrel_base
;
8864 if (tocrel_offset_ret
)
8865 *tocrel_offset_ret
= tocrel_offset
;
8867 return (GET_CODE (tocrel_base
) == UNSPEC
8868 && XINT (tocrel_base
, 1) == UNSPEC_TOCREL
8869 && REG_P (XVECEXP (tocrel_base
, 0, 1))
8870 && REGNO (XVECEXP (tocrel_base
, 0, 1)) == TOC_REGISTER
);
8873 /* Return true if X is a constant pool address, and also for cmodel=medium
8874 if X is a toc-relative address known to be offsettable within MODE. */
8877 legitimate_constant_pool_address_p (const_rtx x
, machine_mode mode
,
8880 const_rtx tocrel_base
, tocrel_offset
;
8881 return (toc_relative_expr_p (x
, strict
, &tocrel_base
, &tocrel_offset
)
8882 && (TARGET_CMODEL
!= CMODEL_MEDIUM
8883 || constant_pool_expr_p (XVECEXP (tocrel_base
, 0, 0))
8885 || offsettable_ok_by_alignment (XVECEXP (tocrel_base
, 0, 0),
8886 INTVAL (tocrel_offset
), mode
)));
8890 legitimate_small_data_p (machine_mode mode
, rtx x
)
8892 return (DEFAULT_ABI
== ABI_V4
8893 && !flag_pic
&& !TARGET_TOC
8894 && (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
)
8895 && small_data_operand (x
, mode
));
8899 rs6000_legitimate_offset_address_p (machine_mode mode
, rtx x
,
8900 bool strict
, bool worst_case
)
8902 unsigned HOST_WIDE_INT offset
;
8905 if (GET_CODE (x
) != PLUS
)
8907 if (!REG_P (XEXP (x
, 0)))
8909 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8911 if (mode_supports_dq_form (mode
))
8912 return quad_address_p (x
, mode
, strict
);
8913 if (!reg_offset_addressing_ok_p (mode
))
8914 return virtual_stack_registers_memory_p (x
);
8915 if (legitimate_constant_pool_address_p (x
, mode
, strict
|| lra_in_progress
))
8917 if (!CONST_INT_P (XEXP (x
, 1)))
8920 offset
= INTVAL (XEXP (x
, 1));
8927 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8929 if (VECTOR_MEM_VSX_P (mode
))
8934 if (!TARGET_POWERPC64
)
8936 else if (offset
& 3)
8949 if (!TARGET_POWERPC64
)
8951 else if (offset
& 3)
8959 if (TARGET_PREFIXED
)
8960 return SIGNED_34BIT_OFFSET_EXTRA_P (offset
, extra
);
8962 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8966 legitimate_indexed_address_p (rtx x
, int strict
)
8970 if (GET_CODE (x
) != PLUS
)
8976 return (REG_P (op0
) && REG_P (op1
)
8977 && ((INT_REG_OK_FOR_BASE_P (op0
, strict
)
8978 && INT_REG_OK_FOR_INDEX_P (op1
, strict
))
8979 || (INT_REG_OK_FOR_BASE_P (op1
, strict
)
8980 && INT_REG_OK_FOR_INDEX_P (op0
, strict
))));
8984 avoiding_indexed_address_p (machine_mode mode
)
8986 unsigned int msize
= GET_MODE_SIZE (mode
);
8988 /* Avoid indexed addressing for modes that have non-indexed load/store
8989 instruction forms. On power10, vector pairs have an indexed
8990 form, but vector quads don't. */
8994 return (TARGET_AVOID_XFORM
&& VECTOR_MEM_NONE_P (mode
));
8998 legitimate_indirect_address_p (rtx x
, int strict
)
9000 return REG_P (x
) && INT_REG_OK_FOR_BASE_P (x
, strict
);
9004 macho_lo_sum_memory_operand (rtx x
, machine_mode mode
)
9006 if (!TARGET_MACHO
|| !flag_pic
9007 || mode
!= SImode
|| !MEM_P (x
))
9011 if (GET_CODE (x
) != LO_SUM
)
9013 if (!REG_P (XEXP (x
, 0)))
9015 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 0))
9019 return CONSTANT_P (x
);
9023 legitimate_lo_sum_address_p (machine_mode mode
, rtx x
, int strict
)
9025 if (GET_CODE (x
) != LO_SUM
)
9027 if (!REG_P (XEXP (x
, 0)))
9029 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
9031 /* quad word addresses are restricted, and we can't use LO_SUM. */
9032 if (mode_supports_dq_form (mode
))
9040 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
)
9042 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9043 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9044 recognizes some LO_SUM addresses as valid although this
9045 function says opposite. In most cases, LRA through different
9046 transformations can generate correct code for address reloads.
9047 It cannot manage only some LO_SUM cases. So we need to add
9048 code here saying that some addresses are still valid. */
9049 large_toc_ok
= (lra_in_progress
&& TARGET_CMODEL
!= CMODEL_SMALL
9050 && small_toc_ref (x
, VOIDmode
));
9051 if (TARGET_TOC
&& ! large_toc_ok
)
9053 if (GET_MODE_NUNITS (mode
) != 1)
9055 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
9056 && !(/* ??? Assume floating point reg based on mode? */
9057 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9060 return CONSTANT_P (x
) || large_toc_ok
;
9062 else if (TARGET_MACHO
)
9064 if (GET_MODE_NUNITS (mode
) != 1)
9066 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
9067 && !(/* see above */
9068 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9071 if (MACHO_DYNAMIC_NO_PIC_P
|| !flag_pic
)
9072 return CONSTANT_P (x
);
9074 /* Macho-O PIC code from here. */
9075 if (GET_CODE (x
) == CONST
)
9078 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
9079 if (SYMBOL_REF_P (x
))
9082 /* So this is OK if the wrapped object is const. */
9083 if (GET_CODE (x
) == UNSPEC
9084 && XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9085 return CONSTANT_P (XVECEXP (x
, 0, 0));
9086 return CONSTANT_P (x
);
9092 /* Try machine-dependent ways of modifying an illegitimate address
9093 to be legitimate. If we find one, return the new, valid address.
9094 This is used from only one place: `memory_address' in explow.cc.
9096 OLDX is the address as it was before break_out_memory_refs was
9097 called. In some cases it is useful to look at this to decide what
9100 It is always safe for this function to do nothing. It exists to
9101 recognize opportunities to optimize the output.
9103 On RS/6000, first check for the sum of a register with a constant
9104 integer that is out of range. If so, generate code to add the
9105 constant with the low-order 16 bits masked to the register and force
9106 this result into another register (this can be done with `cau').
9107 Then generate an address of REG+(CONST&0xffff), allowing for the
9108 possibility of bit 16 being a one.
9110 Then check for the sum of a register and something not constant, try to
9111 load the other things into a register and return the sum. */
9114 rs6000_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
9119 if (!reg_offset_addressing_ok_p (mode
)
9120 || mode_supports_dq_form (mode
))
9122 if (virtual_stack_registers_memory_p (x
))
9125 /* In theory we should not be seeing addresses of the form reg+0,
9126 but just in case it is generated, optimize it away. */
9127 if (GET_CODE (x
) == PLUS
&& XEXP (x
, 1) == const0_rtx
)
9128 return force_reg (Pmode
, XEXP (x
, 0));
9130 /* For TImode with load/store quad, restrict addresses to just a single
9131 pointer, so it works with both GPRs and VSX registers. */
9132 /* Make sure both operands are registers. */
9133 else if (GET_CODE (x
) == PLUS
9134 && (mode
!= TImode
|| !TARGET_VSX
))
9135 return gen_rtx_PLUS (Pmode
,
9136 force_reg (Pmode
, XEXP (x
, 0)),
9137 force_reg (Pmode
, XEXP (x
, 1)));
9139 return force_reg (Pmode
, x
);
9141 if (SYMBOL_REF_P (x
) && !TARGET_MACHO
)
9143 enum tls_model model
= SYMBOL_REF_TLS_MODEL (x
);
9145 return rs6000_legitimize_tls_address (x
, model
);
9157 /* As in legitimate_offset_address_p we do not assume
9158 worst-case. The mode here is just a hint as to the registers
9159 used. A TImode is usually in gprs, but may actually be in
9160 fprs. Leave worst-case scenario for reload to handle via
9161 insn constraints. PTImode is only GPRs. */
9168 if (GET_CODE (x
) == PLUS
9169 && REG_P (XEXP (x
, 0))
9170 && CONST_INT_P (XEXP (x
, 1))
9171 && ((unsigned HOST_WIDE_INT
) (INTVAL (XEXP (x
, 1)) + 0x8000)
9172 >= 0x10000 - extra
))
9174 HOST_WIDE_INT high_int
, low_int
;
9176 low_int
= sext_hwi (INTVAL (XEXP (x
, 1)), 16);
9177 if (low_int
>= 0x8000 - extra
)
9179 high_int
= INTVAL (XEXP (x
, 1)) - low_int
;
9180 sum
= force_operand (gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9181 gen_int_mode (high_int
, Pmode
)), 0);
9182 return plus_constant (Pmode
, sum
, low_int
);
9184 else if (GET_CODE (x
) == PLUS
9185 && REG_P (XEXP (x
, 0))
9186 && !CONST_INT_P (XEXP (x
, 1))
9187 && GET_MODE_NUNITS (mode
) == 1
9188 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9189 || (/* ??? Assume floating point reg based on mode? */
9190 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9191 && !avoiding_indexed_address_p (mode
))
9193 return gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9194 force_reg (Pmode
, force_operand (XEXP (x
, 1), 0)));
9196 else if ((TARGET_ELF
9198 || !MACHO_DYNAMIC_NO_PIC_P
9202 && TARGET_NO_TOC_OR_PCREL
9205 && !CONST_WIDE_INT_P (x
)
9206 && !CONST_DOUBLE_P (x
)
9208 && GET_MODE_NUNITS (mode
) == 1
9209 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9210 || (/* ??? Assume floating point reg based on mode? */
9211 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
))))
9213 rtx reg
= gen_reg_rtx (Pmode
);
9215 emit_insn (gen_elf_high (reg
, x
));
9217 emit_insn (gen_macho_high (Pmode
, reg
, x
));
9218 return gen_rtx_LO_SUM (Pmode
, reg
, x
);
9222 && constant_pool_expr_p (x
)
9223 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x
), Pmode
))
9224 return create_TOC_reference (x
, NULL_RTX
);
9229 /* Debug version of rs6000_legitimize_address. */
9231 rs6000_debug_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
)
9237 ret
= rs6000_legitimize_address (x
, oldx
, mode
);
9238 insns
= get_insns ();
9244 "\nrs6000_legitimize_address: mode %s, old code %s, "
9245 "new code %s, modified\n",
9246 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)),
9247 GET_RTX_NAME (GET_CODE (ret
)));
9249 fprintf (stderr
, "Original address:\n");
9252 fprintf (stderr
, "oldx:\n");
9255 fprintf (stderr
, "New address:\n");
9260 fprintf (stderr
, "Insns added:\n");
9261 debug_rtx_list (insns
, 20);
9267 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9268 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)));
9279 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9280 We need to emit DTP-relative relocations. */
9282 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
9284 rs6000_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
9289 fputs ("\t.long\t", file
);
9292 fputs (DOUBLE_INT_ASM_OP
, file
);
9297 output_addr_const (file
, x
);
9299 fputs ("@dtprel+0x8000", file
);
9302 /* Return true if X is a symbol that refers to real (rather than emulated)
9306 rs6000_real_tls_symbol_ref_p (rtx x
)
9308 return (SYMBOL_REF_P (x
)
9309 && SYMBOL_REF_TLS_MODEL (x
) >= TLS_MODEL_REAL
);
9312 /* In the name of slightly smaller debug output, and to cater to
9313 general assembler lossage, recognize various UNSPEC sequences
9314 and turn them back into a direct symbol reference. */
9317 rs6000_delegitimize_address (rtx orig_x
)
9321 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9322 encodes loading up the high part of the address of a TOC reference along
9323 with a load of a GPR using the same base register used for the load. We
9324 return the original SYMBOL_REF.
9326 (set (reg:INT1 <reg>
9327 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9329 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9330 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9331 We return the original SYMBOL_REF.
9333 (parallel [(set (reg:DI <base-reg>)
9334 (unspec:DI [(symbol_ref <symbol>)
9335 (const_int <marker>)]
9336 UNSPEC_PCREL_OPT_LD_ADDR))
9337 (set (reg:DI <load-reg>)
9338 (unspec:DI [(const_int 0)]
9339 UNSPEC_PCREL_OPT_LD_DATA))])
9341 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9342 GPR being loaded is the same as the GPR used to hold the external address.
9344 (set (reg:DI <base-reg>)
9345 (unspec:DI [(symbol_ref <symbol>)
9346 (const_int <marker>)]
9347 UNSPEC_PCREL_OPT_LD_SAME_REG))
9349 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9350 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9351 We return the original SYMBOL_REF.
9353 (parallel [(set (reg:DI <base-reg>)
9354 (unspec:DI [(symbol_ref <symbol>)
9355 (const_int <marker>)]
9356 UNSPEC_PCREL_OPT_ST_ADDR))
9357 (use (reg <store-reg>))]) */
9359 if (GET_CODE (orig_x
) == UNSPEC
)
9360 switch (XINT (orig_x
, 1))
9362 case UNSPEC_FUSION_GPR
:
9363 case UNSPEC_PCREL_OPT_LD_ADDR
:
9364 case UNSPEC_PCREL_OPT_LD_SAME_REG
:
9365 case UNSPEC_PCREL_OPT_ST_ADDR
:
9366 orig_x
= XVECEXP (orig_x
, 0, 0);
9373 orig_x
= delegitimize_mem_from_attrs (orig_x
);
9380 if (TARGET_CMODEL
!= CMODEL_SMALL
&& GET_CODE (y
) == LO_SUM
)
9384 if (GET_CODE (y
) == PLUS
9385 && GET_MODE (y
) == Pmode
9386 && CONST_INT_P (XEXP (y
, 1)))
9388 offset
= XEXP (y
, 1);
9392 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_TOCREL
)
9394 y
= XVECEXP (y
, 0, 0);
9397 /* Do not associate thread-local symbols with the original
9398 constant pool symbol. */
9401 && CONSTANT_POOL_ADDRESS_P (y
)
9402 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y
)))
9406 if (offset
!= NULL_RTX
)
9407 y
= gen_rtx_PLUS (Pmode
, y
, offset
);
9408 if (!MEM_P (orig_x
))
9411 return replace_equiv_address_nv (orig_x
, y
);
9415 && GET_CODE (orig_x
) == LO_SUM
9416 && GET_CODE (XEXP (orig_x
, 1)) == CONST
)
9418 y
= XEXP (XEXP (orig_x
, 1), 0);
9419 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9420 return XVECEXP (y
, 0, 0);
9426 /* Return true if X shouldn't be emitted into the debug info.
9427 The linker doesn't like .toc section references from
9428 .debug_* sections, so reject .toc section symbols. */
9431 rs6000_const_not_ok_for_debug_p (rtx x
)
9433 if (GET_CODE (x
) == UNSPEC
)
9435 if (SYMBOL_REF_P (x
)
9436 && CONSTANT_POOL_ADDRESS_P (x
))
9438 rtx c
= get_pool_constant (x
);
9439 machine_mode cmode
= get_pool_mode (x
);
9440 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c
, cmode
))
9447 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9450 rs6000_legitimate_combined_insn (rtx_insn
*insn
)
9452 int icode
= INSN_CODE (insn
);
9454 /* Reject creating doloop insns. Combine should not be allowed
9455 to create these for a number of reasons:
9456 1) In a nested loop, if combine creates one of these in an
9457 outer loop and the register allocator happens to allocate ctr
9458 to the outer loop insn, then the inner loop can't use ctr.
9459 Inner loops ought to be more highly optimized.
9460 2) Combine often wants to create one of these from what was
9461 originally a three insn sequence, first combining the three
9462 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9463 allocated ctr, the splitter takes use back to the three insn
9464 sequence. It's better to stop combine at the two insn
9466 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9467 insns, the register allocator sometimes uses floating point
9468 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9469 jump insn and output reloads are not implemented for jumps,
9470 the ctrsi/ctrdi splitters need to handle all possible cases.
9471 That's a pain, and it gets to be seriously difficult when a
9472 splitter that runs after reload needs memory to transfer from
9473 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9474 for the difficult case. It's better to not create problems
9475 in the first place. */
9476 if (icode
!= CODE_FOR_nothing
9477 && (icode
== CODE_FOR_bdz_si
9478 || icode
== CODE_FOR_bdz_di
9479 || icode
== CODE_FOR_bdnz_si
9480 || icode
== CODE_FOR_bdnz_di
9481 || icode
== CODE_FOR_bdztf_si
9482 || icode
== CODE_FOR_bdztf_di
9483 || icode
== CODE_FOR_bdnztf_si
9484 || icode
== CODE_FOR_bdnztf_di
))
9490 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9492 static GTY(()) rtx rs6000_tls_symbol
;
9494 rs6000_tls_get_addr (void)
9496 if (!rs6000_tls_symbol
)
9497 rs6000_tls_symbol
= init_one_libfunc ("__tls_get_addr");
9499 return rs6000_tls_symbol
;
9502 /* Construct the SYMBOL_REF for TLS GOT references. */
9504 static GTY(()) rtx rs6000_got_symbol
;
9506 rs6000_got_sym (void)
9508 if (!rs6000_got_symbol
)
9510 rs6000_got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
9511 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_LOCAL
;
9512 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_EXTERNAL
;
9515 return rs6000_got_symbol
;
9518 /* AIX Thread-Local Address support. */
9521 rs6000_legitimize_tls_address_aix (rtx addr
, enum tls_model model
)
9523 rtx sym
, mem
, tocref
, tlsreg
, tmpreg
, dest
;
9527 /* Place addr into TOC constant pool. */
9528 sym
= force_const_mem (GET_MODE (addr
), addr
);
9530 /* Output the TOC entry and create the MEM referencing the value. */
9531 if (constant_pool_expr_p (XEXP (sym
, 0))
9532 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym
, 0)), Pmode
))
9534 tocref
= create_TOC_reference (XEXP (sym
, 0), NULL_RTX
);
9535 mem
= gen_const_mem (Pmode
, tocref
);
9536 set_mem_alias_set (mem
, get_TOC_alias_set ());
9541 /* Use global-dynamic for local-dynamic. */
9542 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
9543 || model
== TLS_MODEL_LOCAL_DYNAMIC
)
9545 /* Create new TOC reference for @m symbol. */
9546 name
= XSTR (XVECEXP (XEXP (mem
, 0), 0, 0), 0);
9547 tlsname
= XALLOCAVEC (char, strlen (name
) + 1);
9548 strcpy (tlsname
, "*LCM");
9549 strcat (tlsname
, name
+ 3);
9550 rtx modaddr
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (tlsname
));
9551 SYMBOL_REF_FLAGS (modaddr
) |= SYMBOL_FLAG_LOCAL
;
9552 tocref
= create_TOC_reference (modaddr
, NULL_RTX
);
9553 rtx modmem
= gen_const_mem (Pmode
, tocref
);
9554 set_mem_alias_set (modmem
, get_TOC_alias_set ());
9556 rtx modreg
= gen_reg_rtx (Pmode
);
9557 emit_insn (gen_rtx_SET (modreg
, modmem
));
9559 tmpreg
= gen_reg_rtx (Pmode
);
9560 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9562 dest
= gen_reg_rtx (Pmode
);
9564 emit_insn (gen_tls_get_addrsi (dest
, modreg
, tmpreg
));
9566 emit_insn (gen_tls_get_addrdi (dest
, modreg
, tmpreg
));
9569 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9570 else if (TARGET_32BIT
)
9572 tlsreg
= gen_reg_rtx (SImode
);
9573 emit_insn (gen_tls_get_tpointer (tlsreg
));
9577 tlsreg
= gen_rtx_REG (DImode
, 13);
9578 xcoff_tls_exec_model_detected
= true;
9581 /* Load the TOC value into temporary register. */
9582 tmpreg
= gen_reg_rtx (Pmode
);
9583 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9584 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
9585 gen_rtx_MINUS (Pmode
, addr
, tlsreg
));
9587 /* Add TOC symbol value to TLS pointer. */
9588 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tmpreg
, tlsreg
));
9593 /* Passes the tls arg value for global dynamic and local dynamic
9594 emit_library_call_value in rs6000_legitimize_tls_address to
9595 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9596 marker relocs put on __tls_get_addr calls. */
9597 static rtx global_tlsarg
;
9599 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9600 this (thread-local) address. */
9603 rs6000_legitimize_tls_address (rtx addr
, enum tls_model model
)
9608 return rs6000_legitimize_tls_address_aix (addr
, model
);
9610 dest
= gen_reg_rtx (Pmode
);
9611 if (model
== TLS_MODEL_LOCAL_EXEC
9612 && (rs6000_tls_size
== 16 || rs6000_pcrel_p ()))
9618 tlsreg
= gen_rtx_REG (Pmode
, 13);
9619 insn
= gen_tls_tprel_64 (dest
, tlsreg
, addr
);
9623 tlsreg
= gen_rtx_REG (Pmode
, 2);
9624 insn
= gen_tls_tprel_32 (dest
, tlsreg
, addr
);
9628 else if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 32)
9632 tmp
= gen_reg_rtx (Pmode
);
9635 tlsreg
= gen_rtx_REG (Pmode
, 13);
9636 insn
= gen_tls_tprel_ha_64 (tmp
, tlsreg
, addr
);
9640 tlsreg
= gen_rtx_REG (Pmode
, 2);
9641 insn
= gen_tls_tprel_ha_32 (tmp
, tlsreg
, addr
);
9645 insn
= gen_tls_tprel_lo_64 (dest
, tmp
, addr
);
9647 insn
= gen_tls_tprel_lo_32 (dest
, tmp
, addr
);
9652 rtx got
, tga
, tmp1
, tmp2
;
9654 /* We currently use relocations like @got@tlsgd for tls, which
9655 means the linker will handle allocation of tls entries, placing
9656 them in the .got section. So use a pointer to the .got section,
9657 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9658 or to secondary GOT sections used by 32-bit -fPIC. */
9659 if (rs6000_pcrel_p ())
9661 else if (TARGET_64BIT
)
9662 got
= gen_rtx_REG (Pmode
, 2);
9666 got
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
9669 rtx gsym
= rs6000_got_sym ();
9670 got
= gen_reg_rtx (Pmode
);
9672 rs6000_emit_move (got
, gsym
, Pmode
);
9677 tmp1
= gen_reg_rtx (Pmode
);
9678 tmp2
= gen_reg_rtx (Pmode
);
9679 mem
= gen_const_mem (Pmode
, tmp1
);
9680 lab
= gen_label_rtx ();
9681 emit_insn (gen_load_toc_v4_PIC_1b (gsym
, lab
));
9682 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
9683 if (TARGET_LINK_STACK
)
9684 emit_insn (gen_addsi3 (tmp1
, tmp1
, GEN_INT (4)));
9685 emit_move_insn (tmp2
, mem
);
9686 rtx_insn
*last
= emit_insn (gen_addsi3 (got
, tmp1
, tmp2
));
9687 set_unique_reg_note (last
, REG_EQUAL
, gsym
);
9692 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
)
9694 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addr
, got
),
9696 tga
= rs6000_tls_get_addr ();
9697 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9698 emit_insn (gen_rtx_SET (argreg
, arg
));
9699 global_tlsarg
= arg
;
9700 emit_library_call_value (tga
, dest
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9701 global_tlsarg
= NULL_RTX
;
9703 /* Make a note so that the result of this call can be CSEd. */
9704 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9705 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9706 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9708 else if (model
== TLS_MODEL_LOCAL_DYNAMIC
)
9710 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, got
), UNSPEC_TLSLD
);
9711 tga
= rs6000_tls_get_addr ();
9712 tmp1
= gen_reg_rtx (Pmode
);
9713 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9714 emit_insn (gen_rtx_SET (argreg
, arg
));
9715 global_tlsarg
= arg
;
9716 emit_library_call_value (tga
, tmp1
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9717 global_tlsarg
= NULL_RTX
;
9719 /* Make a note so that the result of this call can be CSEd. */
9720 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9721 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9722 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9724 if (rs6000_tls_size
== 16 || rs6000_pcrel_p ())
9727 insn
= gen_tls_dtprel_64 (dest
, tmp1
, addr
);
9729 insn
= gen_tls_dtprel_32 (dest
, tmp1
, addr
);
9731 else if (rs6000_tls_size
== 32)
9733 tmp2
= gen_reg_rtx (Pmode
);
9735 insn
= gen_tls_dtprel_ha_64 (tmp2
, tmp1
, addr
);
9737 insn
= gen_tls_dtprel_ha_32 (tmp2
, tmp1
, addr
);
9740 insn
= gen_tls_dtprel_lo_64 (dest
, tmp2
, addr
);
9742 insn
= gen_tls_dtprel_lo_32 (dest
, tmp2
, addr
);
9746 tmp2
= gen_reg_rtx (Pmode
);
9748 insn
= gen_tls_got_dtprel_64 (tmp2
, got
, addr
);
9750 insn
= gen_tls_got_dtprel_32 (tmp2
, got
, addr
);
9752 insn
= gen_rtx_SET (dest
, gen_rtx_PLUS (Pmode
, tmp2
, tmp1
));
9758 /* IE, or 64-bit offset LE. */
9759 tmp2
= gen_reg_rtx (Pmode
);
9761 insn
= gen_tls_got_tprel_64 (tmp2
, got
, addr
);
9763 insn
= gen_tls_got_tprel_32 (tmp2
, got
, addr
);
9765 if (rs6000_pcrel_p ())
9768 insn
= gen_tls_tls_pcrel_64 (dest
, tmp2
, addr
);
9770 insn
= gen_tls_tls_pcrel_32 (dest
, tmp2
, addr
);
9772 else if (TARGET_64BIT
)
9773 insn
= gen_tls_tls_64 (dest
, tmp2
, addr
);
9775 insn
= gen_tls_tls_32 (dest
, tmp2
, addr
);
9783 /* Only create the global variable for the stack protect guard if we are using
9784 the global flavor of that guard. */
9786 rs6000_init_stack_protect_guard (void)
9788 if (rs6000_stack_protector_guard
== SSP_GLOBAL
)
9789 return default_stack_protect_guard ();
9794 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9797 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9799 /* If GET_CODE (x) is HIGH, the 'X' represets the high part of a symbol_ref.
9800 It can not be put into a constant pool. e.g.
9801 (high:DI (unspec:DI [(symbol_ref/u:DI ("*.LC0")..)
9802 (high:DI (symbol_ref:DI ("var")..)). */
9803 if (GET_CODE (x
) == HIGH
)
9806 /* A TLS symbol in the TOC cannot contain a sum. */
9807 if (GET_CODE (x
) == CONST
9808 && GET_CODE (XEXP (x
, 0)) == PLUS
9809 && SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
9810 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0)) != 0)
9813 /* Allow AIX TOC TLS symbols in the constant pool,
9814 but not ELF TLS symbols. */
9815 return TARGET_ELF
&& tls_referenced_p (x
);
9818 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9819 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9820 can be addressed relative to the toc pointer. */
9823 use_toc_relative_ref (rtx sym
, machine_mode mode
)
9825 return ((constant_pool_expr_p (sym
)
9826 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym
),
9827 get_pool_mode (sym
)))
9828 || (TARGET_CMODEL
== CMODEL_MEDIUM
9829 && SYMBOL_REF_LOCAL_P (sym
)
9830 && GET_MODE_SIZE (mode
) <= POWERPC64_TOC_POINTER_ALIGNMENT
));
9833 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9834 that is a valid memory address for an instruction.
9835 The MODE argument is the machine mode for the MEM expression
9836 that wants to use this address.
9838 On the RS/6000, there are four valid address: a SYMBOL_REF that
9839 refers to a constant pool entry of an address (or the sum of it
9840 plus a constant), a short (16-bit signed) constant plus a register,
9841 the sum of two registers, or a register indirect, possibly with an
9842 auto-increment. For DFmode, DDmode and DImode with a constant plus
9843 register, we must ensure that both words are addressable or PowerPC64
9844 with offset word aligned.
9846 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9847 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9848 because adjacent memory cells are accessed by adding word-sized offsets
9849 during assembly output. */
9851 rs6000_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
,
9852 code_helper ch
= ERROR_MARK
)
9854 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
9855 bool quad_offset_p
= mode_supports_dq_form (mode
);
9857 if (TARGET_ELF
&& RS6000_SYMBOL_REF_TLS_P (x
))
9860 /* lxvl and stxvl doesn't support any addressing modes with PLUS. */
9861 if (ch
.is_internal_fn ()
9862 && (ch
== IFN_LEN_LOAD
|| ch
== IFN_LEN_STORE
)
9863 && GET_CODE (x
) == PLUS
)
9866 /* Handle unaligned altivec lvx/stvx type addresses. */
9867 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
9868 && GET_CODE (x
) == AND
9869 && CONST_INT_P (XEXP (x
, 1))
9870 && INTVAL (XEXP (x
, 1)) == -16)
9873 return (legitimate_indirect_address_p (x
, reg_ok_strict
)
9874 || legitimate_indexed_address_p (x
, reg_ok_strict
)
9875 || virtual_stack_registers_memory_p (x
));
9878 if (legitimate_indirect_address_p (x
, reg_ok_strict
))
9881 && (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == PRE_DEC
)
9882 && mode_supports_pre_incdec_p (mode
)
9883 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
))
9886 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9887 if (address_is_prefixed (x
, mode
, NON_PREFIXED_DEFAULT
))
9890 /* Handle restricted vector d-form offsets in ISA 3.0. */
9893 if (quad_address_p (x
, mode
, reg_ok_strict
))
9896 else if (virtual_stack_registers_memory_p (x
))
9899 else if (reg_offset_p
)
9901 if (legitimate_small_data_p (mode
, x
))
9903 if (legitimate_constant_pool_address_p (x
, mode
,
9904 reg_ok_strict
|| lra_in_progress
))
9908 /* For TImode, if we have TImode in VSX registers, only allow register
9909 indirect addresses. This will allow the values to go in either GPRs
9910 or VSX registers without reloading. The vector types would tend to
9911 go into VSX registers, so we allow REG+REG, while TImode seems
9912 somewhat split, in that some uses are GPR based, and some VSX based. */
9913 /* FIXME: We could loosen this by changing the following to
9914 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9915 but currently we cannot allow REG+REG addressing for TImode. See
9916 PR72827 for complete details on how this ends up hoodwinking DSE. */
9917 if (mode
== TImode
&& TARGET_VSX
)
9919 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9922 && GET_CODE (x
) == PLUS
9923 && REG_P (XEXP (x
, 0))
9924 && (XEXP (x
, 0) == virtual_stack_vars_rtx
9925 || XEXP (x
, 0) == arg_pointer_rtx
)
9926 && CONST_INT_P (XEXP (x
, 1)))
9928 if (rs6000_legitimate_offset_address_p (mode
, x
, reg_ok_strict
, false))
9930 if (!FLOAT128_2REG_P (mode
)
9931 && (TARGET_HARD_FLOAT
9933 || (mode
!= DFmode
&& mode
!= DDmode
))
9934 && (TARGET_POWERPC64
|| mode
!= DImode
)
9935 && (mode
!= TImode
|| VECTOR_MEM_VSX_P (TImode
))
9937 && !avoiding_indexed_address_p (mode
)
9938 && legitimate_indexed_address_p (x
, reg_ok_strict
))
9940 if (TARGET_UPDATE
&& GET_CODE (x
) == PRE_MODIFY
9941 && mode_supports_pre_modify_p (mode
)
9942 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
)
9943 && (rs6000_legitimate_offset_address_p (mode
, XEXP (x
, 1),
9944 reg_ok_strict
, false)
9945 || (!avoiding_indexed_address_p (mode
)
9946 && legitimate_indexed_address_p (XEXP (x
, 1), reg_ok_strict
)))
9947 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
9949 /* There is no prefixed version of the load/store with update. */
9950 rtx addr
= XEXP (x
, 1);
9951 return !address_is_prefixed (addr
, mode
, NON_PREFIXED_DEFAULT
);
9953 if (reg_offset_p
&& !quad_offset_p
9954 && legitimate_lo_sum_address_p (mode
, x
, reg_ok_strict
))
9959 /* Debug version of rs6000_legitimate_address_p. */
9961 rs6000_debug_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
,
9964 bool ret
= rs6000_legitimate_address_p (mode
, x
, reg_ok_strict
, ch
);
9966 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9967 "strict = %d, reload = %s, code = %s\n",
9968 ret
? "true" : "false",
9969 GET_MODE_NAME (mode
),
9971 (reload_completed
? "after" : "before"),
9972 GET_RTX_NAME (GET_CODE (x
)));
9978 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9981 rs6000_mode_dependent_address_p (const_rtx addr
,
9982 addr_space_t as ATTRIBUTE_UNUSED
)
9984 return rs6000_mode_dependent_address_ptr (addr
);
9987 /* Go to LABEL if ADDR (a legitimate address expression)
9988 has an effect that depends on the machine mode it is used for.
9990 On the RS/6000 this is true of all integral offsets (since AltiVec
9991 and VSX modes don't allow them) or is a pre-increment or decrement.
9993 ??? Except that due to conceptual problems in offsettable_address_p
9994 we can't really report the problems of integral offsets. So leave
9995 this assuming that the adjustable offset must be valid for the
9996 sub-words of a TFmode operand, which is what we had before. */
9999 rs6000_mode_dependent_address (const_rtx addr
)
10001 switch (GET_CODE (addr
))
10004 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10005 is considered a legitimate address before reload, so there
10006 are no offset restrictions in that case. Note that this
10007 condition is safe in strict mode because any address involving
10008 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10009 been rejected as illegitimate. */
10010 if (XEXP (addr
, 0) != virtual_stack_vars_rtx
10011 && XEXP (addr
, 0) != arg_pointer_rtx
10012 && CONST_INT_P (XEXP (addr
, 1)))
10014 HOST_WIDE_INT val
= INTVAL (XEXP (addr
, 1));
10015 HOST_WIDE_INT extra
= TARGET_POWERPC64
? 8 : 12;
10016 if (TARGET_PREFIXED
)
10017 return !SIGNED_34BIT_OFFSET_EXTRA_P (val
, extra
);
10019 return !SIGNED_16BIT_OFFSET_EXTRA_P (val
, extra
);
10024 /* Anything in the constant pool is sufficiently aligned that
10025 all bytes have the same high part address. */
10026 return !legitimate_constant_pool_address_p (addr
, QImode
, false);
10028 /* Auto-increment cases are now treated generically in recog.cc. */
10030 return TARGET_UPDATE
;
10032 /* AND is only allowed in Altivec loads. */
10043 /* Debug version of rs6000_mode_dependent_address. */
10045 rs6000_debug_mode_dependent_address (const_rtx addr
)
10047 bool ret
= rs6000_mode_dependent_address (addr
);
10049 fprintf (stderr
, "\nrs6000_mode_dependent_address: ret = %s\n",
10050 ret
? "true" : "false");
10056 /* Implement FIND_BASE_TERM. */
10059 rs6000_find_base_term (rtx op
)
10064 if (GET_CODE (base
) == CONST
)
10065 base
= XEXP (base
, 0);
10066 if (GET_CODE (base
) == PLUS
)
10067 base
= XEXP (base
, 0);
10068 if (GET_CODE (base
) == UNSPEC
)
10069 switch (XINT (base
, 1))
10071 case UNSPEC_TOCREL
:
10072 case UNSPEC_MACHOPIC_OFFSET
:
10073 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10074 for aliasing purposes. */
10075 return XVECEXP (base
, 0, 0);
10081 /* More elaborate version of recog's offsettable_memref_p predicate
10082 that works around the ??? note of rs6000_mode_dependent_address.
10083 In particular it accepts
10085 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10087 in 32-bit mode, that the recog predicate rejects. */
10090 rs6000_offsettable_memref_p (rtx op
, machine_mode reg_mode
, bool strict
)
10097 /* First mimic offsettable_memref_p. */
10098 if (offsettable_address_p (strict
, GET_MODE (op
), XEXP (op
, 0)))
10101 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10102 the latter predicate knows nothing about the mode of the memory
10103 reference and, therefore, assumes that it is the largest supported
10104 mode (TFmode). As a consequence, legitimate offsettable memory
10105 references are rejected. rs6000_legitimate_offset_address_p contains
10106 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10107 at least with a little bit of help here given that we know the
10108 actual registers used. */
10109 worst_case
= ((TARGET_POWERPC64
&& GET_MODE_CLASS (reg_mode
) == MODE_INT
)
10110 || GET_MODE_SIZE (reg_mode
) == 4);
10111 return rs6000_legitimate_offset_address_p (GET_MODE (op
), XEXP (op
, 0),
10112 strict
, worst_case
);
10115 /* Determine the reassociation width to be used in reassociate_bb.
10116 This takes into account how many parallel operations we
10117 can actually do of a given type, and also the latency.
10119 int add/sub 6/cycle
10121 vect add/sub/mul 2/cycle
10122 fp add/sub/mul 2/cycle
10127 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
10130 switch (rs6000_tune
)
10132 case PROCESSOR_POWER8
:
10133 case PROCESSOR_POWER9
:
10134 case PROCESSOR_POWER10
:
10135 case PROCESSOR_POWER11
:
10136 if (DECIMAL_FLOAT_MODE_P (mode
))
10138 if (VECTOR_MODE_P (mode
))
10140 if (INTEGRAL_MODE_P (mode
))
10142 if (FLOAT_MODE_P (mode
))
10151 /* Change register usage conditional on target flags. */
10153 rs6000_conditional_register_usage (void)
10157 if (TARGET_DEBUG_TARGET
)
10158 fprintf (stderr
, "rs6000_conditional_register_usage called\n");
10160 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10162 fixed_regs
[13] = call_used_regs
[13] = 1;
10164 /* Conditionally disable FPRs. */
10165 if (TARGET_SOFT_FLOAT
)
10166 for (i
= 32; i
< 64; i
++)
10167 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10169 /* For non PC-relative code, GPR2 is unavailable for register allocation. */
10170 if (FIXED_R2
&& !rs6000_pcrel_p ())
10173 /* The TOC register is not killed across calls in a way that is
10174 visible to the compiler. */
10175 if (fixed_regs
[2] && (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
))
10176 call_used_regs
[2] = 0;
10178 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
10179 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10181 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
10182 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10183 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10185 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
10186 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10187 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10189 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
)
10190 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10192 if (!TARGET_ALTIVEC
&& !TARGET_VSX
)
10194 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
10195 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10196 call_used_regs
[VRSAVE_REGNO
] = 1;
10199 if (TARGET_ALTIVEC
|| TARGET_VSX
)
10200 global_regs
[VSCR_REGNO
] = 1;
10202 if (TARGET_ALTIVEC_ABI
)
10204 for (i
= FIRST_ALTIVEC_REGNO
; i
< FIRST_ALTIVEC_REGNO
+ 20; ++i
)
10205 call_used_regs
[i
] = 1;
10207 /* AIX reserves VR20:31 in non-extended ABI mode. */
10208 if (TARGET_XCOFF
&& !rs6000_aix_extabi
)
10209 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
< FIRST_ALTIVEC_REGNO
+ 32; ++i
)
10210 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10215 /* Output insns to set DEST equal to the constant SOURCE as a series of
10216 lis, ori and shl instructions and return TRUE. */
10219 rs6000_emit_set_const (rtx dest
, rtx source
)
10221 machine_mode mode
= GET_MODE (dest
);
10226 gcc_checking_assert (CONST_INT_P (source
));
10227 c
= INTVAL (source
);
10232 emit_insn (gen_rtx_SET (dest
, source
));
10236 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (SImode
);
10238 emit_insn (gen_rtx_SET (temp
, GEN_INT (c
& ~(HOST_WIDE_INT
) 0xffff)));
10239 emit_insn (gen_rtx_SET (dest
,
10240 gen_rtx_IOR (SImode
, temp
,
10241 GEN_INT (c
& 0xffff))));
10245 if (!TARGET_POWERPC64
)
10249 hi
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
== 0, DImode
);
10250 lo
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
!= 0, DImode
);
10251 emit_move_insn (hi
, GEN_INT (c
>> 32));
10252 c
= sext_hwi (c
, 32);
10253 emit_move_insn (lo
, GEN_INT (c
));
10256 rs6000_emit_set_long_const (dest
, c
);
10260 gcc_unreachable ();
10263 insn
= get_last_insn ();
10264 set
= single_set (insn
);
10265 if (! CONSTANT_P (SET_SRC (set
)))
10266 set_unique_reg_note (insn
, REG_EQUAL
, GEN_INT (c
));
10271 /* Check if C can be rotated to a negative value which 'lis' instruction is
10272 able to load: 1..1xx0..0. If so, set *ROT to the number by which C is
10273 rotated, and return true. Return false otherwise. */
10276 can_be_rotated_to_negative_lis (HOST_WIDE_INT c
, int *rot
)
10278 /* case a. 1..1xxx0..01..1: up to 15 x's, at least 16 0's. */
10279 int leading_ones
= clz_hwi (~c
);
10280 int tailing_ones
= ctz_hwi (~c
);
10281 int middle_zeros
= ctz_hwi (c
>> tailing_ones
);
10282 if (middle_zeros
>= 16 && leading_ones
+ tailing_ones
>= 33)
10284 *rot
= HOST_BITS_PER_WIDE_INT
- tailing_ones
;
10288 /* case b. xx0..01..1xx: some of 15 x's (and some of 16 0's) are
10289 rotated over the highest bit. */
10290 int pos_one
= clz_hwi ((c
<< 16) >> 16);
10291 middle_zeros
= ctz_hwi (c
>> (HOST_BITS_PER_WIDE_INT
- pos_one
));
10292 int middle_ones
= clz_hwi (~(c
<< pos_one
));
10293 if (middle_zeros
>= 16 && middle_ones
>= 33)
10302 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10305 If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
10306 is set to the mask operand of rotldi(rldicl), and return true.
10307 Return false otherwise. */
10310 can_be_built_by_li_lis_and_rotldi (HOST_WIDE_INT c
, int *shift
,
10311 HOST_WIDE_INT
*mask
)
10313 /* If C or ~C contains at least 49 successive zeros, then C can be rotated
10314 to/from a positive or negative value that 'li' is able to load. */
10316 if (can_be_rotated_to_lowbits (c
, 15, &n
)
10317 || can_be_rotated_to_lowbits (~c
, 15, &n
)
10318 || can_be_rotated_to_negative_lis (c
, &n
))
10320 *mask
= HOST_WIDE_INT_M1
;
10321 *shift
= HOST_BITS_PER_WIDE_INT
- n
;
10328 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10331 If so, *SHIFT is set to the shift operand of rldicl, and *MASK is set to
10332 the mask operand of rldicl, and return true.
10333 Return false otherwise. */
10336 can_be_built_by_li_lis_and_rldicl (HOST_WIDE_INT c
, int *shift
,
10337 HOST_WIDE_INT
*mask
)
10339 /* Leading zeros may be cleaned by rldicl with a mask. Change leading zeros
10340 to ones and then recheck it. */
10341 int lz
= clz_hwi (c
);
10343 /* If lz == 0, the left shift is undefined. */
10347 HOST_WIDE_INT unmask_c
10348 = c
| (HOST_WIDE_INT_M1U
<< (HOST_BITS_PER_WIDE_INT
- lz
));
10350 if (can_be_rotated_to_lowbits (~unmask_c
, 15, &n
)
10351 || can_be_rotated_to_negative_lis (unmask_c
, &n
))
10353 *mask
= HOST_WIDE_INT_M1U
>> lz
;
10354 *shift
= n
== 0 ? 0 : HOST_BITS_PER_WIDE_INT
- n
;
10361 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10364 If so, *SHIFT is set to the shift operand of rldicr, and *MASK is set to
10365 the mask operand of rldicr, and return true.
10366 Return false otherwise. */
10369 can_be_built_by_li_lis_and_rldicr (HOST_WIDE_INT c
, int *shift
,
10370 HOST_WIDE_INT
*mask
)
10372 /* Tailing zeros may be cleaned by rldicr with a mask. Change tailing zeros
10373 to ones and then recheck it. */
10374 int tz
= ctz_hwi (c
);
10376 /* If tz == HOST_BITS_PER_WIDE_INT, the left shift is undefined. */
10377 if (tz
>= HOST_BITS_PER_WIDE_INT
)
10380 HOST_WIDE_INT unmask_c
= c
| ((HOST_WIDE_INT_1U
<< tz
) - 1);
10382 if (can_be_rotated_to_lowbits (~unmask_c
, 15, &n
)
10383 || can_be_rotated_to_negative_lis (unmask_c
, &n
))
10385 *mask
= HOST_WIDE_INT_M1U
<< tz
;
10386 *shift
= HOST_BITS_PER_WIDE_INT
- n
;
10393 /* Check if value C can be built by 2 instructions: one is 'li', another is
10396 If so, *SHIFT is set to the 'shift' operand of rldic; and *MASK is set
10397 to the mask value about the 'mb' operand of rldic; and return true.
10398 Return false otherwise. */
10401 can_be_built_by_li_and_rldic (HOST_WIDE_INT c
, int *shift
, HOST_WIDE_INT
*mask
)
10403 /* There are 49 successive ones in the negative value of 'li'. */
10406 /* 1..1xx1..1: negative value of li --> 0..01..1xx0..0:
10407 right bits are shifted as 0's, and left 1's(and x's) are cleaned. */
10408 int tz
= ctz_hwi (c
);
10409 int lz
= clz_hwi (c
);
10411 /* If lz == HOST_BITS_PER_WIDE_INT, the left shift is undefined. */
10412 if (lz
>= HOST_BITS_PER_WIDE_INT
)
10415 int middle_ones
= clz_hwi (~(c
<< lz
));
10416 if (tz
+ lz
+ middle_ones
>= ones
10417 && (tz
- lz
) < HOST_BITS_PER_WIDE_INT
10418 && tz
< HOST_BITS_PER_WIDE_INT
)
10420 *mask
= ((1LL << (HOST_BITS_PER_WIDE_INT
- tz
- lz
)) - 1LL) << tz
;
10425 /* 1..1xx1..1 --> 1..1xx0..01..1: some 1's(following x's) are cleaned. */
10426 int leading_ones
= clz_hwi (~c
);
10427 int tailing_ones
= ctz_hwi (~c
);
10428 int middle_zeros
= ctz_hwi (c
>> tailing_ones
);
10429 if (leading_ones
+ tailing_ones
+ middle_zeros
>= ones
10430 && middle_zeros
< HOST_BITS_PER_WIDE_INT
)
10432 *mask
= ~(((1ULL << middle_zeros
) - 1ULL) << tailing_ones
);
10433 *shift
= tailing_ones
+ middle_zeros
;
10437 /* xx1..1xx: --> xx0..01..1xx: some 1's(following x's) are cleaned. */
10438 /* Get the position for the first bit of successive 1.
10439 The 24th bit would be in successive 0 or 1. */
10440 HOST_WIDE_INT low_mask
= (HOST_WIDE_INT_1U
<< 24) - HOST_WIDE_INT_1U
;
10441 int pos_first_1
= ((c
& (low_mask
+ 1)) == 0)
10442 ? clz_hwi (c
& low_mask
)
10443 : HOST_BITS_PER_WIDE_INT
- ctz_hwi (~(c
| low_mask
));
10445 /* Make sure the left and right shifts are defined. */
10446 if (!IN_RANGE (pos_first_1
, 1, HOST_BITS_PER_WIDE_INT
-1))
10449 middle_ones
= clz_hwi (~c
<< pos_first_1
);
10450 middle_zeros
= ctz_hwi (c
>> (HOST_BITS_PER_WIDE_INT
- pos_first_1
));
10451 if (pos_first_1
< HOST_BITS_PER_WIDE_INT
10452 && middle_ones
+ middle_zeros
< HOST_BITS_PER_WIDE_INT
10453 && middle_ones
+ middle_zeros
>= ones
)
10455 *mask
= ~(((1ULL << middle_zeros
) - 1LL)
10456 << (HOST_BITS_PER_WIDE_INT
- pos_first_1
));
10457 *shift
= HOST_BITS_PER_WIDE_INT
- pos_first_1
+ middle_zeros
;
10464 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10465 Output insns to set DEST equal to the constant C as a series of
10466 lis, ori and shl instructions. If NUM_INSNS is not NULL, then
10467 only increase *NUM_INSNS as the number of insns, and do not emit
10471 rs6000_emit_set_long_const (rtx dest
, HOST_WIDE_INT c
, int *num_insns
)
10473 HOST_WIDE_INT ud1
, ud2
, ud3
, ud4
;
10476 ud2
= (c
>> 16) & 0xffff;
10477 ud3
= (c
>> 32) & 0xffff;
10478 ud4
= (c
>> 48) & 0xffff;
10480 /* This lambda is used to emit one insn or just increase the insn count.
10481 When counting the insn number, no need to emit the insn. */
10482 auto count_or_emit_insn
= [&num_insns
] (rtx dest_or_insn
, rtx src
= nullptr) {
10490 emit_move_insn (dest_or_insn
, src
);
10492 emit_insn (dest_or_insn
);
10495 if (TARGET_PREFIXED
&& SIGNED_INTEGER_34BIT_P (c
))
10498 count_or_emit_insn (dest
, GEN_INT (c
));
10502 if ((ud4
== 0xffff && ud3
== 0xffff && ud2
== 0xffff && (ud1
& 0x8000))
10503 || (ud4
== 0 && ud3
== 0 && ud2
== 0 && !(ud1
& 0x8000)))
10506 count_or_emit_insn (dest
, GEN_INT (sext_hwi (ud1
, 16)));
10511 = (num_insns
|| !can_create_pseudo_p ()) ? dest
: gen_reg_rtx (DImode
);
10513 if ((ud4
== 0xffff && ud3
== 0xffff && (ud2
& 0x8000))
10514 || (ud4
== 0 && ud3
== 0 && !(ud2
& 0x8000)))
10517 count_or_emit_insn (ud1
!= 0 ? temp
: dest
,
10518 GEN_INT (sext_hwi (ud2
<< 16, 32)));
10520 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10524 if (ud4
== 0xffff && ud3
== 0xffff && !(ud2
& 0x8000) && ud1
== 0)
10527 count_or_emit_insn (temp
, GEN_INT (sext_hwi ((ud2
| 0x8000) << 16, 32)));
10528 count_or_emit_insn (dest
,
10529 gen_rtx_XOR (DImode
, temp
, GEN_INT (0x80000000)));
10533 if (ud4
== 0xffff && ud3
== 0xffff && (ud1
& 0x8000))
10536 count_or_emit_insn (temp
, GEN_INT (sext_hwi (ud1
, 16)));
10537 count_or_emit_insn (dest
, gen_rtx_XOR (DImode
, temp
,
10538 GEN_INT ((ud2
^ 0xffff) << 16)));
10543 HOST_WIDE_INT mask
;
10544 if (can_be_built_by_li_lis_and_rotldi (c
, &shift
, &mask
)
10545 || can_be_built_by_li_lis_and_rldicl (c
, &shift
, &mask
)
10546 || can_be_built_by_li_lis_and_rldicr (c
, &shift
, &mask
)
10547 || can_be_built_by_li_and_rldic (c
, &shift
, &mask
))
10549 /* li/lis; rldicX */
10550 unsigned HOST_WIDE_INT imm
= (c
| ~mask
);
10551 imm
= (imm
>> shift
) | (imm
<< (HOST_BITS_PER_WIDE_INT
- shift
));
10553 count_or_emit_insn (temp
, GEN_INT (imm
));
10555 temp
= gen_rtx_ROTATE (DImode
, temp
, GEN_INT (shift
));
10556 if (mask
!= HOST_WIDE_INT_M1
)
10557 temp
= gen_rtx_AND (DImode
, temp
, GEN_INT (mask
));
10558 count_or_emit_insn (dest
, temp
);
10563 if (ud3
== 0 && ud4
== 0)
10565 gcc_assert ((ud2
& 0x8000) && ud1
!= 0);
10566 if (!(ud1
& 0x8000))
10569 count_or_emit_insn (temp
, GEN_INT (ud1
));
10570 count_or_emit_insn (dest
,
10571 gen_rtx_IOR (DImode
, temp
, GEN_INT (ud2
<< 16)));
10575 /* lis; ori; rldicl */
10576 count_or_emit_insn (temp
, GEN_INT (sext_hwi (ud2
<< 16, 32)));
10577 count_or_emit_insn (temp
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10578 count_or_emit_insn (dest
,
10579 gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff)));
10583 if (ud1
== ud3
&& ud2
== ud4
)
10585 /* load low 32bits first, e.g. "lis; ori", then "rldimi". */
10586 HOST_WIDE_INT num
= (ud2
<< 16) | ud1
;
10587 rs6000_emit_set_long_const (temp
, sext_hwi (num
, 32), num_insns
);
10589 rtx rldimi
= gen_rotldi3_insert_3 (dest
, temp
, GEN_INT (32), temp
,
10590 GEN_INT (0xffffffff));
10591 count_or_emit_insn (rldimi
);
10595 if ((ud4
== 0xffff && (ud3
& 0x8000)) || (ud4
== 0 && !(ud3
& 0x8000)))
10597 /* li; [ori;] rldicl [;oir]. */
10598 count_or_emit_insn (temp
, GEN_INT (sext_hwi (ud3
<< 16, 32)));
10600 count_or_emit_insn (temp
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud2
)));
10601 count_or_emit_insn (ud1
!= 0 ? temp
: dest
,
10602 gen_rtx_ASHIFT (DImode
, temp
, GEN_INT (16)));
10604 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10608 if (TARGET_PREFIXED
)
10610 if (can_create_pseudo_p ())
10612 /* pli A,L; pli B,H; rldimi A,B,32,0. */
10613 rtx temp1
= num_insns
? nullptr : gen_reg_rtx (DImode
);
10614 count_or_emit_insn (temp
, GEN_INT ((ud4
<< 16) | ud3
));
10615 count_or_emit_insn (temp1
, GEN_INT ((ud2
<< 16) | ud1
));
10616 rtx rldimi
= gen_rotldi3_insert_3 (dest
, temp
, GEN_INT (32), temp1
,
10617 GEN_INT (0xffffffff));
10618 count_or_emit_insn (rldimi
);
10622 /* pli A,H; sldi A,32; paddi A,A,L. */
10623 count_or_emit_insn (dest
, GEN_INT ((ud4
<< 16) | ud3
));
10624 count_or_emit_insn (dest
, gen_rtx_ASHIFT (DImode
, dest
, GEN_INT (32)));
10626 bool can_use_paddi
= dest
? REGNO (dest
) != FIRST_GPR_REGNO
: false;
10627 /* Use paddi for the low 32 bits. */
10628 if (ud2
!= 0 && ud1
!= 0 && can_use_paddi
)
10629 count_or_emit_insn (dest
, gen_rtx_PLUS (DImode
, dest
,
10630 GEN_INT ((ud2
<< 16) | ud1
)));
10631 /* Use oris, ori for low 32 bits. */
10632 if (ud2
!= 0 && (ud1
== 0 || !can_use_paddi
))
10633 count_or_emit_insn (dest
,
10634 gen_rtx_IOR (DImode
, dest
, GEN_INT (ud2
<< 16)));
10635 if (ud1
!= 0 && (ud2
== 0 || !can_use_paddi
))
10636 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud1
)));
10640 if (can_create_pseudo_p ())
10642 /* lis HIGH,UD4 ; ori HIGH,UD3 ;
10643 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */
10644 rtx high
= num_insns
? nullptr : gen_reg_rtx (DImode
);
10645 rtx low
= num_insns
? nullptr : gen_reg_rtx (DImode
);
10646 HOST_WIDE_INT num
= (ud2
<< 16) | ud1
;
10647 rs6000_emit_set_long_const (low
, sext_hwi (num
, 32), num_insns
);
10648 num
= (ud4
<< 16) | ud3
;
10649 rs6000_emit_set_long_const (high
, sext_hwi (num
, 32), num_insns
);
10651 rtx rldimi
= gen_rotldi3_insert_3 (dest
, high
, GEN_INT (32), low
,
10652 GEN_INT (0xffffffff));
10653 count_or_emit_insn (rldimi
);
10657 /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
10658 oris DEST,UD2 ; ori DEST,UD1. */
10659 count_or_emit_insn (dest
, GEN_INT (sext_hwi (ud4
<< 16, 32)));
10661 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud3
)));
10663 count_or_emit_insn (dest
, gen_rtx_ASHIFT (DImode
, dest
, GEN_INT (32)));
10665 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud2
<< 16)));
10667 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud1
)));
10672 /* Helper for the following. Get rid of [r+r] memory refs
10673 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10676 rs6000_eliminate_indexed_memrefs (rtx operands
[2])
10678 if (MEM_P (operands
[0])
10679 && !REG_P (XEXP (operands
[0], 0))
10680 && ! legitimate_constant_pool_address_p (XEXP (operands
[0], 0),
10681 GET_MODE (operands
[0]), false))
10683 = replace_equiv_address (operands
[0],
10684 copy_addr_to_reg (XEXP (operands
[0], 0)));
10686 if (MEM_P (operands
[1])
10687 && !REG_P (XEXP (operands
[1], 0))
10688 && ! legitimate_constant_pool_address_p (XEXP (operands
[1], 0),
10689 GET_MODE (operands
[1]), false))
10691 = replace_equiv_address (operands
[1],
10692 copy_addr_to_reg (XEXP (operands
[1], 0)));
10695 /* Generate a vector of constants to permute MODE for a little-endian
10696 storage operation by swapping the two halves of a vector. */
10698 rs6000_const_vec (machine_mode mode
)
10726 v
= rtvec_alloc (subparts
);
10728 for (i
= 0; i
< subparts
/ 2; ++i
)
10729 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
+ subparts
/ 2);
10730 for (i
= subparts
/ 2; i
< subparts
; ++i
)
10731 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
- subparts
/ 2);
10736 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10737 store operation. */
10739 rs6000_emit_le_vsx_permute (rtx dest
, rtx source
, machine_mode mode
)
10741 gcc_assert (!altivec_indexed_or_indirect_operand (dest
, mode
));
10742 gcc_assert (!altivec_indexed_or_indirect_operand (source
, mode
));
10744 /* Scalar permutations are easier to express in integer modes rather than
10745 floating-point modes, so cast them here. We use V1TImode instead
10746 of TImode to ensure that the values don't go through GPRs. */
10747 if (FLOAT128_VECTOR_P (mode
))
10749 dest
= gen_lowpart (V1TImode
, dest
);
10750 source
= gen_lowpart (V1TImode
, source
);
10754 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10756 if (mode
== TImode
|| mode
== V1TImode
)
10757 emit_insn (gen_rtx_SET (dest
, gen_rtx_ROTATE (mode
, source
,
10761 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rs6000_const_vec (mode
));
10762 emit_insn (gen_rtx_SET (dest
, gen_rtx_VEC_SELECT (mode
, source
, par
)));
10766 /* Emit a little-endian load from vector memory location SOURCE to VSX
10767 register DEST in mode MODE. The load is done with two permuting
10768 insn's that represent an lxvd2x and xxpermdi. */
10770 rs6000_emit_le_vsx_load (rtx dest
, rtx source
, machine_mode mode
)
10772 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10774 if (mode
== TImode
|| mode
== V1TImode
)
10777 dest
= gen_lowpart (V2DImode
, dest
);
10778 source
= adjust_address (source
, V2DImode
, 0);
10781 rtx tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest
) : dest
;
10782 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10783 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10786 /* Emit a little-endian store to vector memory location DEST from VSX
10787 register SOURCE in mode MODE. The store is done with two permuting
10788 insn's that represent an xxpermdi and an stxvd2x. */
10790 rs6000_emit_le_vsx_store (rtx dest
, rtx source
, machine_mode mode
)
10792 /* This should never be called after LRA. */
10793 gcc_assert (can_create_pseudo_p ());
10795 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10797 if (mode
== TImode
|| mode
== V1TImode
)
10800 dest
= adjust_address (dest
, V2DImode
, 0);
10801 source
= gen_lowpart (V2DImode
, source
);
10804 rtx tmp
= gen_reg_rtx_and_attrs (source
);
10805 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10806 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10809 /* Emit a sequence representing a little-endian VSX load or store,
10810 moving data from SOURCE to DEST in mode MODE. This is done
10811 separately from rs6000_emit_move to ensure it is called only
10812 during expand. LE VSX loads and stores introduced later are
10813 handled with a split. The expand-time RTL generation allows
10814 us to optimize away redundant pairs of register-permutes. */
10816 rs6000_emit_le_vsx_move (rtx dest
, rtx source
, machine_mode mode
)
10818 gcc_assert (!BYTES_BIG_ENDIAN
10819 && VECTOR_MEM_VSX_P (mode
)
10820 && !TARGET_P9_VECTOR
10821 && !gpr_or_gpr_p (dest
, source
)
10822 && (MEM_P (source
) ^ MEM_P (dest
)));
10824 if (MEM_P (source
))
10826 gcc_assert (REG_P (dest
) || SUBREG_P (dest
));
10827 rs6000_emit_le_vsx_load (dest
, source
, mode
);
10831 if (!REG_P (source
))
10832 source
= force_reg (mode
, source
);
10833 rs6000_emit_le_vsx_store (dest
, source
, mode
);
10837 /* Return whether a SFmode or SImode move can be done without converting one
10838 mode to another. This arrises when we have:
10840 (SUBREG:SF (REG:SI ...))
10841 (SUBREG:SI (REG:SF ...))
10843 and one of the values is in a floating point/vector register, where SFmode
10844 scalars are stored in DFmode format. */
10847 valid_sf_si_move (rtx dest
, rtx src
, machine_mode mode
)
10849 if (TARGET_ALLOW_SF_SUBREG
)
10852 if (mode
!= SFmode
&& GET_MODE_CLASS (mode
) != MODE_INT
)
10855 if (!SUBREG_P (src
) || !sf_subreg_operand (src
, mode
))
10858 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10859 if (SUBREG_P (dest
))
10861 rtx dest_subreg
= SUBREG_REG (dest
);
10862 rtx src_subreg
= SUBREG_REG (src
);
10863 return GET_MODE (dest_subreg
) == GET_MODE (src_subreg
);
10870 /* Helper function to change moves with:
10872 (SUBREG:SF (REG:SI)) and
10873 (SUBREG:SI (REG:SF))
10875 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10876 values are stored as DFmode values in the VSX registers. We need to convert
10877 the bits before we can use a direct move or operate on the bits in the
10878 vector register as an integer type.
10880 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10883 rs6000_emit_move_si_sf_subreg (rtx dest
, rtx source
, machine_mode mode
)
10885 if (TARGET_DIRECT_MOVE_64BIT
&& !reload_completed
10886 && (!SUBREG_P (dest
) || !sf_subreg_operand (dest
, mode
))
10887 && SUBREG_P (source
) && sf_subreg_operand (source
, mode
))
10889 rtx inner_source
= SUBREG_REG (source
);
10890 machine_mode inner_mode
= GET_MODE (inner_source
);
10892 if (mode
== SImode
&& inner_mode
== SFmode
)
10894 emit_insn (gen_movsi_from_sf (dest
, inner_source
));
10898 if (mode
== SFmode
&& inner_mode
== SImode
)
10900 emit_insn (gen_movsf_from_si (dest
, inner_source
));
10908 /* Emit a move from SOURCE to DEST in mode MODE. */
10910 rs6000_emit_move (rtx dest
, rtx source
, machine_mode mode
)
10913 operands
[0] = dest
;
10914 operands
[1] = source
;
10916 if (TARGET_DEBUG_ADDR
)
10919 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10920 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10921 GET_MODE_NAME (mode
),
10924 can_create_pseudo_p ());
10926 fprintf (stderr
, "source:\n");
10927 debug_rtx (source
);
10930 /* Check that we get CONST_WIDE_INT only when we should. */
10931 if (CONST_WIDE_INT_P (operands
[1])
10932 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
10933 gcc_unreachable ();
10935 #ifdef HAVE_AS_GNU_ATTRIBUTE
10936 /* If we use a long double type, set the flags in .gnu_attribute that say
10937 what the long double type is. This is to allow the linker's warning
10938 message for the wrong long double to be useful, even if the function does
10939 not do a call (for example, doing a 128-bit add on power9 if the long
10940 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10941 used if they aren't the default long dobule type. */
10942 if (rs6000_gnu_attr
&& (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
))
10944 if (TARGET_LONG_DOUBLE_128
&& (mode
== TFmode
|| mode
== TCmode
))
10945 rs6000_passes_float
= rs6000_passes_long_double
= true;
10947 else if (!TARGET_LONG_DOUBLE_128
&& (mode
== DFmode
|| mode
== DCmode
))
10948 rs6000_passes_float
= rs6000_passes_long_double
= true;
10952 /* See if we need to special case SImode/SFmode SUBREG moves. */
10953 if ((mode
== SImode
|| mode
== SFmode
) && SUBREG_P (source
)
10954 && rs6000_emit_move_si_sf_subreg (dest
, source
, mode
))
10957 /* Check if GCC is setting up a block move that will end up using FP
10958 registers as temporaries. We must make sure this is acceptable. */
10959 if (MEM_P (operands
[0])
10960 && MEM_P (operands
[1])
10962 && (rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[0]))
10963 || rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[1])))
10964 && ! (rs6000_slow_unaligned_access (SImode
,
10965 (MEM_ALIGN (operands
[0]) > 32
10966 ? 32 : MEM_ALIGN (operands
[0])))
10967 || rs6000_slow_unaligned_access (SImode
,
10968 (MEM_ALIGN (operands
[1]) > 32
10969 ? 32 : MEM_ALIGN (operands
[1]))))
10970 && ! MEM_VOLATILE_P (operands
[0])
10971 && ! MEM_VOLATILE_P (operands
[1]))
10973 emit_move_insn (adjust_address (operands
[0], SImode
, 0),
10974 adjust_address (operands
[1], SImode
, 0));
10975 emit_move_insn (adjust_address (copy_rtx (operands
[0]), SImode
, 4),
10976 adjust_address (copy_rtx (operands
[1]), SImode
, 4));
10980 if (can_create_pseudo_p () && MEM_P (operands
[0])
10981 && !gpc_reg_operand (operands
[1], mode
))
10982 operands
[1] = force_reg (mode
, operands
[1]);
10984 /* Recognize the case where operand[1] is a reference to thread-local
10985 data and load its address to a register. */
10986 if (tls_referenced_p (operands
[1]))
10988 enum tls_model model
;
10989 rtx tmp
= operands
[1];
10992 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
10994 addend
= XEXP (XEXP (tmp
, 0), 1);
10995 tmp
= XEXP (XEXP (tmp
, 0), 0);
10998 gcc_assert (SYMBOL_REF_P (tmp
));
10999 model
= SYMBOL_REF_TLS_MODEL (tmp
);
11000 gcc_assert (model
!= 0);
11002 tmp
= rs6000_legitimize_tls_address (tmp
, model
);
11005 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
11006 tmp
= force_operand (tmp
, operands
[0]);
11011 /* 128-bit constant floating-point values on Darwin should really be loaded
11012 as two parts. However, this premature splitting is a problem when DFmode
11013 values can go into Altivec registers. */
11014 if (TARGET_MACHO
&& CONST_DOUBLE_P (operands
[1]) && FLOAT128_IBM_P (mode
)
11015 && !reg_addr
[DFmode
].scalar_in_vmx_p
)
11017 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
, 0),
11018 simplify_gen_subreg (DFmode
, operands
[1], mode
, 0),
11020 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
,
11021 GET_MODE_SIZE (DFmode
)),
11022 simplify_gen_subreg (DFmode
, operands
[1], mode
,
11023 GET_MODE_SIZE (DFmode
)),
11028 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
11029 p1:SD) if p1 is not of floating point class and p0 is spilled as
11030 we can have no analogous movsd_store for this. */
11031 if (lra_in_progress
&& mode
== DDmode
11032 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
11033 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
11034 && SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1]))
11035 && GET_MODE (SUBREG_REG (operands
[1])) == SDmode
)
11038 int regno
= REGNO (SUBREG_REG (operands
[1]));
11040 if (!HARD_REGISTER_NUM_P (regno
))
11042 cl
= reg_preferred_class (regno
);
11043 regno
= reg_renumber
[regno
];
11045 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][1];
11047 if (regno
>= 0 && ! FP_REGNO_P (regno
))
11050 operands
[0] = gen_lowpart_SUBREG (SDmode
, operands
[0]);
11051 operands
[1] = SUBREG_REG (operands
[1]);
11054 if (lra_in_progress
11056 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
11057 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
11058 && (REG_P (operands
[1])
11059 || (SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1])))))
11061 int regno
= reg_or_subregno (operands
[1]);
11064 if (!HARD_REGISTER_NUM_P (regno
))
11066 cl
= reg_preferred_class (regno
);
11067 gcc_assert (cl
!= NO_REGS
);
11068 regno
= reg_renumber
[regno
];
11070 regno
= ira_class_hard_regs
[cl
][0];
11072 if (FP_REGNO_P (regno
))
11074 if (GET_MODE (operands
[0]) != DDmode
)
11075 operands
[0] = gen_rtx_SUBREG (DDmode
, operands
[0], 0);
11076 emit_insn (gen_movsd_store (operands
[0], operands
[1]));
11078 else if (INT_REGNO_P (regno
))
11079 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
11084 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
11085 p:DD)) if p0 is not of floating point class and p1 is spilled as
11086 we can have no analogous movsd_load for this. */
11087 if (lra_in_progress
&& mode
== DDmode
11088 && SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))
11089 && GET_MODE (SUBREG_REG (operands
[0])) == SDmode
11090 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
11091 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
11094 int regno
= REGNO (SUBREG_REG (operands
[0]));
11096 if (!HARD_REGISTER_NUM_P (regno
))
11098 cl
= reg_preferred_class (regno
);
11099 regno
= reg_renumber
[regno
];
11101 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][0];
11103 if (regno
>= 0 && ! FP_REGNO_P (regno
))
11106 operands
[0] = SUBREG_REG (operands
[0]);
11107 operands
[1] = gen_lowpart_SUBREG (SDmode
, operands
[1]);
11110 if (lra_in_progress
11112 && (REG_P (operands
[0])
11113 || (SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))))
11114 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
11115 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
11117 int regno
= reg_or_subregno (operands
[0]);
11120 if (!HARD_REGISTER_NUM_P (regno
))
11122 cl
= reg_preferred_class (regno
);
11123 gcc_assert (cl
!= NO_REGS
);
11124 regno
= reg_renumber
[regno
];
11126 regno
= ira_class_hard_regs
[cl
][0];
11128 if (FP_REGNO_P (regno
))
11130 if (GET_MODE (operands
[1]) != DDmode
)
11131 operands
[1] = gen_rtx_SUBREG (DDmode
, operands
[1], 0);
11132 emit_insn (gen_movsd_load (operands
[0], operands
[1]));
11134 else if (INT_REGNO_P (regno
))
11135 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
11141 /* FIXME: In the long term, this switch statement should go away
11142 and be replaced by a sequence of tests based on things like
11148 if (CONSTANT_P (operands
[1])
11149 && !CONST_INT_P (operands
[1]))
11150 operands
[1] = force_const_mem (mode
, operands
[1]);
11157 if (FLOAT128_2REG_P (mode
))
11158 rs6000_eliminate_indexed_memrefs (operands
);
11165 if (CONSTANT_P (operands
[1])
11166 && ! easy_fp_constant (operands
[1], mode
))
11167 operands
[1] = force_const_mem (mode
, operands
[1]);
11177 if (CONSTANT_P (operands
[1])
11178 && !easy_vector_constant (operands
[1], mode
))
11179 operands
[1] = force_const_mem (mode
, operands
[1]);
11184 if (CONST_INT_P (operands
[1]) && INTVAL (operands
[1]) != 0)
11185 error ("%qs is an opaque type, and you cannot set it to other values",
11186 (mode
== OOmode
) ? "__vector_pair" : "__vector_quad");
11191 /* Use default pattern for address of ELF small data */
11194 && DEFAULT_ABI
== ABI_V4
11195 && (SYMBOL_REF_P (operands
[1])
11196 || GET_CODE (operands
[1]) == CONST
)
11197 && small_data_operand (operands
[1], mode
))
11199 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11203 /* Use the default pattern for loading up PC-relative addresses. */
11204 if (TARGET_PCREL
&& mode
== Pmode
11205 && pcrel_local_or_external_address (operands
[1], Pmode
))
11207 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11211 if (DEFAULT_ABI
== ABI_V4
11212 && mode
== Pmode
&& mode
== SImode
11213 && flag_pic
== 1 && got_operand (operands
[1], mode
))
11215 emit_insn (gen_movsi_got (operands
[0], operands
[1]));
11219 if ((TARGET_ELF
|| DEFAULT_ABI
== ABI_DARWIN
)
11220 && TARGET_NO_TOC_OR_PCREL
11223 && CONSTANT_P (operands
[1])
11224 && GET_CODE (operands
[1]) != HIGH
11225 && !CONST_INT_P (operands
[1]))
11227 rtx target
= (!can_create_pseudo_p ()
11229 : gen_reg_rtx (mode
));
11231 /* If this is a function address on -mcall-aixdesc,
11232 convert it to the address of the descriptor. */
11233 if (DEFAULT_ABI
== ABI_AIX
11234 && SYMBOL_REF_P (operands
[1])
11235 && XSTR (operands
[1], 0)[0] == '.')
11237 const char *name
= XSTR (operands
[1], 0);
11239 while (*name
== '.')
11241 new_ref
= gen_rtx_SYMBOL_REF (Pmode
, name
);
11242 CONSTANT_POOL_ADDRESS_P (new_ref
)
11243 = CONSTANT_POOL_ADDRESS_P (operands
[1]);
11244 SYMBOL_REF_FLAGS (new_ref
) = SYMBOL_REF_FLAGS (operands
[1]);
11245 SYMBOL_REF_USED (new_ref
) = SYMBOL_REF_USED (operands
[1]);
11246 SYMBOL_REF_DATA (new_ref
) = SYMBOL_REF_DATA (operands
[1]);
11247 operands
[1] = new_ref
;
11250 if (DEFAULT_ABI
== ABI_DARWIN
)
11253 /* This is not PIC code, but could require the subset of
11254 indirections used by mdynamic-no-pic. */
11255 if (MACHO_DYNAMIC_NO_PIC_P
)
11257 /* Take care of any required data indirection. */
11258 operands
[1] = rs6000_machopic_legitimize_pic_address (
11259 operands
[1], mode
, operands
[0]);
11260 if (operands
[0] != operands
[1])
11261 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11265 emit_insn (gen_macho_high (Pmode
, target
, operands
[1]));
11266 emit_insn (gen_macho_low (Pmode
, operands
[0],
11267 target
, operands
[1]));
11271 emit_insn (gen_elf_high (target
, operands
[1]));
11272 emit_insn (gen_elf_low (operands
[0], target
, operands
[1]));
11276 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11277 and we have put it in the TOC, we just need to make a TOC-relative
11278 reference to it. */
11280 && SYMBOL_REF_P (operands
[1])
11281 && use_toc_relative_ref (operands
[1], mode
))
11282 operands
[1] = create_TOC_reference (operands
[1], operands
[0]);
11283 else if (mode
== Pmode
11284 && CONSTANT_P (operands
[1])
11285 && GET_CODE (operands
[1]) != HIGH
11286 && ((REG_P (operands
[0])
11287 && FP_REGNO_P (REGNO (operands
[0])))
11288 || !CONST_INT_P (operands
[1])
11289 || (num_insns_constant (operands
[1], mode
)
11290 > (TARGET_CMODEL
!= CMODEL_SMALL
? 3 : 2)))
11291 && !toc_relative_expr_p (operands
[1], false, NULL
, NULL
)
11292 && (TARGET_CMODEL
== CMODEL_SMALL
11293 || can_create_pseudo_p ()
11294 || (REG_P (operands
[0])
11295 && INT_REG_OK_FOR_BASE_P (operands
[0], true))))
11299 /* Darwin uses a special PIC legitimizer. */
11300 if (DEFAULT_ABI
== ABI_DARWIN
&& MACHOPIC_INDIRECT
)
11303 rs6000_machopic_legitimize_pic_address (operands
[1], mode
,
11305 if (operands
[0] != operands
[1])
11306 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11311 /* If we are to limit the number of things we put in the TOC and
11312 this is a symbol plus a constant we can add in one insn,
11313 just put the symbol in the TOC and add the constant. */
11314 if (GET_CODE (operands
[1]) == CONST
11315 && TARGET_NO_SUM_IN_TOC
11316 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
11317 && add_operand (XEXP (XEXP (operands
[1], 0), 1), mode
)
11318 && (GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == LABEL_REF
11319 || SYMBOL_REF_P (XEXP (XEXP (operands
[1], 0), 0)))
11320 && ! side_effects_p (operands
[0]))
11323 force_const_mem (mode
, XEXP (XEXP (operands
[1], 0), 0));
11324 rtx other
= XEXP (XEXP (operands
[1], 0), 1);
11326 sym
= force_reg (mode
, sym
);
11327 emit_insn (gen_add3_insn (operands
[0], sym
, other
));
11331 operands
[1] = force_const_mem (mode
, operands
[1]);
11334 && SYMBOL_REF_P (XEXP (operands
[1], 0))
11335 && use_toc_relative_ref (XEXP (operands
[1], 0), mode
))
11337 rtx tocref
= create_TOC_reference (XEXP (operands
[1], 0),
11339 operands
[1] = gen_const_mem (mode
, tocref
);
11340 set_mem_alias_set (operands
[1], get_TOC_alias_set ());
11346 if (!VECTOR_MEM_VSX_P (TImode
))
11347 rs6000_eliminate_indexed_memrefs (operands
);
11351 rs6000_eliminate_indexed_memrefs (operands
);
11355 fatal_insn ("bad move", gen_rtx_SET (dest
, source
));
11358 /* Above, we may have called force_const_mem which may have returned
11359 an invalid address. If we can, fix this up; otherwise, reload will
11360 have to deal with it. */
11361 if (MEM_P (operands
[1]))
11362 operands
[1] = validize_mem (operands
[1]);
11364 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11368 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
11370 init_float128_ibm (machine_mode mode
)
11372 if (!TARGET_XL_COMPAT
)
11374 set_optab_libfunc (add_optab
, mode
, "__gcc_qadd");
11375 set_optab_libfunc (sub_optab
, mode
, "__gcc_qsub");
11376 set_optab_libfunc (smul_optab
, mode
, "__gcc_qmul");
11377 set_optab_libfunc (sdiv_optab
, mode
, "__gcc_qdiv");
11379 if (!TARGET_HARD_FLOAT
)
11381 set_optab_libfunc (neg_optab
, mode
, "__gcc_qneg");
11382 set_optab_libfunc (eq_optab
, mode
, "__gcc_qeq");
11383 set_optab_libfunc (ne_optab
, mode
, "__gcc_qne");
11384 set_optab_libfunc (gt_optab
, mode
, "__gcc_qgt");
11385 set_optab_libfunc (ge_optab
, mode
, "__gcc_qge");
11386 set_optab_libfunc (lt_optab
, mode
, "__gcc_qlt");
11387 set_optab_libfunc (le_optab
, mode
, "__gcc_qle");
11388 set_optab_libfunc (unord_optab
, mode
, "__gcc_qunord");
11390 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__gcc_stoq");
11391 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__gcc_dtoq");
11392 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__gcc_qtos");
11393 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__gcc_qtod");
11394 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__gcc_qtoi");
11395 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__gcc_qtou");
11396 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__gcc_itoq");
11397 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__gcc_utoq");
11402 set_optab_libfunc (add_optab
, mode
, "_xlqadd");
11403 set_optab_libfunc (sub_optab
, mode
, "_xlqsub");
11404 set_optab_libfunc (smul_optab
, mode
, "_xlqmul");
11405 set_optab_libfunc (sdiv_optab
, mode
, "_xlqdiv");
11408 /* Add various conversions for IFmode to use the traditional TFmode
11410 if (mode
== IFmode
)
11412 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdtf");
11413 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddtf");
11414 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdtf");
11415 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunctfsd");
11416 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunctfdd");
11417 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtftd");
11419 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixtfdi");
11420 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunstfdi");
11422 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatditf");
11423 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatunditf");
11425 if (TARGET_POWERPC64
)
11427 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixtfti");
11428 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunstfti");
11429 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattitf");
11430 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntitf");
11435 /* Set up IEEE 128-bit floating point routines. Use different names if the
11436 arguments can be passed in a vector register. The historical PowerPC
11437 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11438 continue to use that if we aren't using vector registers to pass IEEE
11439 128-bit floating point. */
11442 init_float128_ieee (machine_mode mode
)
11444 if (FLOAT128_VECTOR_P (mode
))
11446 set_optab_libfunc (add_optab
, mode
, "__addkf3");
11447 set_optab_libfunc (sub_optab
, mode
, "__subkf3");
11448 set_optab_libfunc (neg_optab
, mode
, "__negkf2");
11449 set_optab_libfunc (smul_optab
, mode
, "__mulkf3");
11450 set_optab_libfunc (sdiv_optab
, mode
, "__divkf3");
11451 set_optab_libfunc (sqrt_optab
, mode
, "__sqrtkf2");
11452 set_optab_libfunc (abs_optab
, mode
, "__abskf2");
11453 set_optab_libfunc (powi_optab
, mode
, "__powikf2");
11455 set_optab_libfunc (eq_optab
, mode
, "__eqkf2");
11456 set_optab_libfunc (ne_optab
, mode
, "__nekf2");
11457 set_optab_libfunc (gt_optab
, mode
, "__gtkf2");
11458 set_optab_libfunc (ge_optab
, mode
, "__gekf2");
11459 set_optab_libfunc (lt_optab
, mode
, "__ltkf2");
11460 set_optab_libfunc (le_optab
, mode
, "__lekf2");
11461 set_optab_libfunc (unord_optab
, mode
, "__unordkf2");
11463 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__extendsfkf2");
11464 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__extenddfkf2");
11465 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__trunckfsf2");
11466 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__trunckfdf2");
11468 set_conv_libfunc (sext_optab
, mode
, IFmode
, "__trunctfkf2");
11469 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11470 set_conv_libfunc (sext_optab
, mode
, TFmode
, "__trunctfkf2");
11472 set_conv_libfunc (trunc_optab
, IFmode
, mode
, "__extendkftf2");
11473 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11474 set_conv_libfunc (trunc_optab
, TFmode
, mode
, "__extendkftf2");
11476 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdkf");
11477 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddkf");
11478 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdkf");
11479 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunckfsd");
11480 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunckfdd");
11481 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendkftd");
11483 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__fixkfsi");
11484 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__fixunskfsi");
11485 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixkfdi");
11486 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunskfdi");
11488 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__floatsikf");
11489 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__floatunsikf");
11490 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatdikf");
11491 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatundikf");
11493 if (TARGET_POWERPC64
)
11495 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixkfti_sw");
11496 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunskfti_sw");
11497 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattikf_sw");
11498 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntikf_sw");
11504 set_optab_libfunc (add_optab
, mode
, "_q_add");
11505 set_optab_libfunc (sub_optab
, mode
, "_q_sub");
11506 set_optab_libfunc (neg_optab
, mode
, "_q_neg");
11507 set_optab_libfunc (smul_optab
, mode
, "_q_mul");
11508 set_optab_libfunc (sdiv_optab
, mode
, "_q_div");
11509 if (TARGET_PPC_GPOPT
)
11510 set_optab_libfunc (sqrt_optab
, mode
, "_q_sqrt");
11512 set_optab_libfunc (eq_optab
, mode
, "_q_feq");
11513 set_optab_libfunc (ne_optab
, mode
, "_q_fne");
11514 set_optab_libfunc (gt_optab
, mode
, "_q_fgt");
11515 set_optab_libfunc (ge_optab
, mode
, "_q_fge");
11516 set_optab_libfunc (lt_optab
, mode
, "_q_flt");
11517 set_optab_libfunc (le_optab
, mode
, "_q_fle");
11519 set_conv_libfunc (sext_optab
, mode
, SFmode
, "_q_stoq");
11520 set_conv_libfunc (sext_optab
, mode
, DFmode
, "_q_dtoq");
11521 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "_q_qtos");
11522 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "_q_qtod");
11523 set_conv_libfunc (sfix_optab
, SImode
, mode
, "_q_qtoi");
11524 set_conv_libfunc (ufix_optab
, SImode
, mode
, "_q_qtou");
11525 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "_q_itoq");
11526 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "_q_utoq");
11531 rs6000_init_libfuncs (void)
11533 /* __float128 support. */
11534 if (TARGET_FLOAT128_TYPE
)
11536 init_float128_ibm (IFmode
);
11537 init_float128_ieee (KFmode
);
11540 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11541 if (TARGET_LONG_DOUBLE_128
)
11543 if (!TARGET_IEEEQUAD
)
11544 init_float128_ibm (TFmode
);
11546 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11548 init_float128_ieee (TFmode
);
11552 /* Emit a potentially record-form instruction, setting DST from SRC.
11553 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11554 signed comparison of DST with zero. If DOT is 1, the generated RTL
11555 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11556 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11557 a separate COMPARE. */
11560 rs6000_emit_dot_insn (rtx dst
, rtx src
, int dot
, rtx ccreg
)
11564 emit_move_insn (dst
, src
);
11568 if (cc_reg_not_cr0_operand (ccreg
, CCmode
))
11570 emit_move_insn (dst
, src
);
11571 emit_move_insn (ccreg
, gen_rtx_COMPARE (CCmode
, dst
, const0_rtx
));
11575 rtx ccset
= gen_rtx_SET (ccreg
, gen_rtx_COMPARE (CCmode
, src
, const0_rtx
));
11578 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, dst
);
11579 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, clobber
)));
11583 rtx set
= gen_rtx_SET (dst
, src
);
11584 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, set
)));
11589 /* A validation routine: say whether CODE, a condition code, and MODE
11590 match. The other alternatives either don't make sense or should
11591 never be generated. */
11594 validate_condition_mode (enum rtx_code code
, machine_mode mode
)
11596 gcc_assert ((GET_RTX_CLASS (code
) == RTX_COMPARE
11597 || GET_RTX_CLASS (code
) == RTX_COMM_COMPARE
)
11598 && GET_MODE_CLASS (mode
) == MODE_CC
);
11600 /* These don't make sense. */
11601 gcc_assert ((code
!= GT
&& code
!= LT
&& code
!= GE
&& code
!= LE
)
11602 || mode
!= CCUNSmode
);
11604 gcc_assert ((code
!= GTU
&& code
!= LTU
&& code
!= GEU
&& code
!= LEU
)
11605 || mode
== CCUNSmode
);
11607 gcc_assert (mode
== CCFPmode
11608 || (code
!= ORDERED
&& code
!= UNORDERED
11609 && code
!= UNEQ
&& code
!= LTGT
11610 && code
!= UNGT
&& code
!= UNLT
11611 && code
!= UNGE
&& code
!= UNLE
));
11613 /* These are invalid; the information is not there. */
11614 gcc_assert (mode
!= CCEQmode
|| code
== EQ
|| code
== NE
);
11618 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11619 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11620 not zero, store there the bit offset (counted from the right) where
11621 the single stretch of 1 bits begins; and similarly for B, the bit
11622 offset where it ends. */
11625 rs6000_is_valid_mask (rtx mask
, int *b
, int *e
, machine_mode mode
)
11627 unsigned HOST_WIDE_INT val
= INTVAL (mask
);
11628 unsigned HOST_WIDE_INT bit
;
11630 int n
= GET_MODE_PRECISION (mode
);
11632 if (mode
!= DImode
&& mode
!= SImode
)
11635 if (INTVAL (mask
) >= 0)
11638 ne
= exact_log2 (bit
);
11639 nb
= exact_log2 (val
+ bit
);
11641 else if (val
+ 1 == 0)
11650 nb
= exact_log2 (bit
);
11651 ne
= exact_log2 (val
+ bit
);
11656 ne
= exact_log2 (bit
);
11657 if (val
+ bit
== 0)
11665 if (nb
< 0 || ne
< 0 || nb
>= n
|| ne
>= n
)
11677 rs6000_is_valid_rotate_dot_mask (rtx mask
, machine_mode mode
)
11680 if (rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
) && nb
>= ne
&& ne
> 0)
11684 /* *rotldi3_mask_dot requires for -m32 -mpowerpc64 that the mask is
11686 return (UINTVAL (mask
) << (63 - nb
)) <= 0x7fffffff;
11692 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11693 or rldicr instruction, to implement an AND with it in mode MODE. */
11696 rs6000_is_valid_and_mask (rtx mask
, machine_mode mode
)
11700 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11703 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11705 if (mode
== DImode
)
11706 return (ne
== 0 || nb
== 63 || (nb
< 32 && ne
<= nb
));
11708 /* For SImode, rlwinm can do everything. */
11709 if (mode
== SImode
)
11710 return (nb
< 32 && ne
< 32);
11715 /* Return the instruction template for an AND with mask in mode MODE, with
11716 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11719 rs6000_insn_for_and_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11723 if (!rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
))
11724 gcc_unreachable ();
11726 if (mode
== DImode
&& ne
== 0)
11728 operands
[3] = GEN_INT (63 - nb
);
11730 return "rldicl. %0,%1,0,%3";
11731 return "rldicl %0,%1,0,%3";
11734 if (mode
== DImode
&& nb
== 63)
11736 operands
[3] = GEN_INT (63 - ne
);
11738 return "rldicr. %0,%1,0,%3";
11739 return "rldicr %0,%1,0,%3";
11742 if (nb
< 32 && ne
< 32)
11744 operands
[3] = GEN_INT (31 - nb
);
11745 operands
[4] = GEN_INT (31 - ne
);
11747 return "rlwinm. %0,%1,0,%3,%4";
11748 return "rlwinm %0,%1,0,%3,%4";
11751 gcc_unreachable ();
11754 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11755 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11756 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11759 rs6000_is_valid_shift_mask (rtx mask
, rtx shift
, machine_mode mode
)
11763 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11766 int n
= GET_MODE_PRECISION (mode
);
11769 if (CONST_INT_P (XEXP (shift
, 1)))
11771 sh
= INTVAL (XEXP (shift
, 1));
11772 if (sh
< 0 || sh
>= n
)
11776 rtx_code code
= GET_CODE (shift
);
11778 /* Convert any shift by 0 to a rotate, to simplify below code. */
11782 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11783 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11785 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11791 /* DImode rotates need rld*. */
11792 if (mode
== DImode
&& code
== ROTATE
)
11793 return (nb
== 63 || ne
== 0 || ne
== sh
);
11795 /* SImode rotates need rlw*. */
11796 if (mode
== SImode
&& code
== ROTATE
)
11797 return (nb
< 32 && ne
< 32 && sh
< 32);
11799 /* Wrap-around masks are only okay for rotates. */
11803 /* Variable shifts are only okay for rotates. */
11807 /* Don't allow ASHIFT if the mask is wrong for that. */
11808 if (code
== ASHIFT
&& ne
< sh
)
11811 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11812 if the mask is wrong for that. */
11813 if (nb
< 32 && ne
< 32 && sh
< 32
11814 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11817 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11818 if the mask is wrong for that. */
11819 if (code
== LSHIFTRT
)
11821 if (nb
== 63 || ne
== 0 || ne
== sh
)
11822 return !(code
== LSHIFTRT
&& nb
>= sh
);
11827 /* Return the instruction template for a shift with mask in mode MODE, with
11828 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11831 rs6000_insn_for_shift_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11835 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11836 gcc_unreachable ();
11838 if (mode
== DImode
&& ne
== 0)
11840 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11841 operands
[2] = GEN_INT (64 - INTVAL (operands
[2]));
11842 operands
[3] = GEN_INT (63 - nb
);
11844 return "rld%I2cl. %0,%1,%2,%3";
11845 return "rld%I2cl %0,%1,%2,%3";
11848 if (mode
== DImode
&& nb
== 63)
11850 operands
[3] = GEN_INT (63 - ne
);
11852 return "rld%I2cr. %0,%1,%2,%3";
11853 return "rld%I2cr %0,%1,%2,%3";
11857 && GET_CODE (operands
[4]) != LSHIFTRT
11858 && CONST_INT_P (operands
[2])
11859 && ne
== INTVAL (operands
[2]))
11861 operands
[3] = GEN_INT (63 - nb
);
11863 return "rld%I2c. %0,%1,%2,%3";
11864 return "rld%I2c %0,%1,%2,%3";
11867 if (nb
< 32 && ne
< 32)
11869 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11870 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11871 operands
[3] = GEN_INT (31 - nb
);
11872 operands
[4] = GEN_INT (31 - ne
);
11873 /* This insn can also be a 64-bit rotate with mask that really makes
11874 it just a shift right (with mask); the %h below are to adjust for
11875 that situation (shift count is >= 32 in that case). */
11877 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11878 return "rlw%I2nm %0,%1,%h2,%3,%4";
11881 gcc_unreachable ();
11884 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11885 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11886 ASHIFT, or LSHIFTRT) in mode MODE. */
11889 rs6000_is_valid_insert_mask (rtx mask
, rtx shift
, machine_mode mode
)
11893 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11896 int n
= GET_MODE_PRECISION (mode
);
11898 int sh
= INTVAL (XEXP (shift
, 1));
11899 if (sh
< 0 || sh
>= n
)
11902 rtx_code code
= GET_CODE (shift
);
11904 /* Convert any shift by 0 to a rotate, to simplify below code. */
11908 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11909 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11911 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11917 /* DImode rotates need rldimi. */
11918 if (mode
== DImode
&& code
== ROTATE
)
11921 /* SImode rotates need rlwimi. */
11922 if (mode
== SImode
&& code
== ROTATE
)
11923 return (nb
< 32 && ne
< 32 && sh
< 32);
11925 /* Wrap-around masks are only okay for rotates. */
11929 /* Don't allow ASHIFT if the mask is wrong for that. */
11930 if (code
== ASHIFT
&& ne
< sh
)
11933 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11934 if the mask is wrong for that. */
11935 if (nb
< 32 && ne
< 32 && sh
< 32
11936 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11939 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11940 if the mask is wrong for that. */
11941 if (code
== LSHIFTRT
)
11944 return !(code
== LSHIFTRT
&& nb
>= sh
);
11949 /* Return the instruction template for an insert with mask in mode MODE, with
11950 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11953 rs6000_insn_for_insert_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11957 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11958 gcc_unreachable ();
11960 /* Prefer rldimi because rlwimi is cracked. */
11961 if (TARGET_POWERPC64
11962 && (!dot
|| mode
== DImode
)
11963 && GET_CODE (operands
[4]) != LSHIFTRT
11964 && ne
== INTVAL (operands
[2]))
11966 operands
[3] = GEN_INT (63 - nb
);
11968 return "rldimi. %0,%1,%2,%3";
11969 return "rldimi %0,%1,%2,%3";
11972 if (nb
< 32 && ne
< 32)
11974 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11975 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11976 operands
[3] = GEN_INT (31 - nb
);
11977 operands
[4] = GEN_INT (31 - ne
);
11979 return "rlwimi. %0,%1,%2,%3,%4";
11980 return "rlwimi %0,%1,%2,%3,%4";
11983 gcc_unreachable ();
11986 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11987 using two machine instructions. */
11990 rs6000_is_valid_2insn_and (rtx c
, machine_mode mode
)
11992 /* There are two kinds of AND we can handle with two insns:
11993 1) those we can do with two rl* insn;
11996 We do not handle that last case yet. */
11998 /* If there is just one stretch of ones, we can do it. */
11999 if (rs6000_is_valid_mask (c
, NULL
, NULL
, mode
))
12002 /* Otherwise, fill in the lowest "hole"; if we can do the result with
12003 one insn, we can do the whole thing with two. */
12004 unsigned HOST_WIDE_INT val
= INTVAL (c
);
12005 unsigned HOST_WIDE_INT bit1
= val
& -val
;
12006 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
12007 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
12008 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
12009 return rs6000_is_valid_and_mask (GEN_INT (val
+ bit3
- bit2
), mode
);
12012 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
12013 If EXPAND is true, split rotate-and-mask instructions we generate to
12014 their constituent parts as well (this is used during expand); if DOT
12015 is 1, make the last insn a record-form instruction clobbering the
12016 destination GPR and setting the CC reg (from operands[3]); if 2, set
12017 that GPR as well as the CC reg. */
12020 rs6000_emit_2insn_and (machine_mode mode
, rtx
*operands
, bool expand
, int dot
)
12022 gcc_assert (!(expand
&& dot
));
12024 unsigned HOST_WIDE_INT val
= INTVAL (operands
[2]);
12026 /* If it is one stretch of ones, it is DImode; shift left, mask, then
12027 shift right. This generates better code than doing the masks without
12028 shifts, or shifting first right and then left. */
12030 if (rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
) && nb
>= ne
)
12032 gcc_assert (mode
== DImode
);
12034 int shift
= 63 - nb
;
12037 rtx tmp1
= gen_reg_rtx (DImode
);
12038 rtx tmp2
= gen_reg_rtx (DImode
);
12039 emit_insn (gen_ashldi3 (tmp1
, operands
[1], GEN_INT (shift
)));
12040 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (val
<< shift
)));
12041 emit_insn (gen_lshrdi3 (operands
[0], tmp2
, GEN_INT (shift
)));
12045 rtx tmp
= gen_rtx_ASHIFT (mode
, operands
[1], GEN_INT (shift
));
12046 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (val
<< shift
));
12047 emit_move_insn (operands
[0], tmp
);
12048 tmp
= gen_rtx_LSHIFTRT (mode
, operands
[0], GEN_INT (shift
));
12049 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
12054 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
12055 that does the rest. */
12056 unsigned HOST_WIDE_INT bit1
= val
& -val
;
12057 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
12058 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
12059 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
12061 unsigned HOST_WIDE_INT mask1
= -bit3
+ bit2
- 1;
12062 unsigned HOST_WIDE_INT mask2
= val
+ bit3
- bit2
;
12064 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2
), mode
));
12066 /* Two "no-rotate"-and-mask instructions, for SImode. */
12067 if (rs6000_is_valid_and_mask (GEN_INT (mask1
), mode
))
12069 gcc_assert (mode
== SImode
);
12071 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
12072 rtx tmp
= gen_rtx_AND (mode
, operands
[1], GEN_INT (mask1
));
12073 emit_move_insn (reg
, tmp
);
12074 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
12075 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
12079 gcc_assert (mode
== DImode
);
12081 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
12082 insns; we have to do the first in SImode, because it wraps. */
12083 if (mask2
<= 0xffffffff
12084 && rs6000_is_valid_and_mask (GEN_INT (mask1
), SImode
))
12086 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
12087 rtx tmp
= gen_rtx_AND (SImode
, gen_lowpart (SImode
, operands
[1]),
12089 rtx reg_low
= gen_lowpart (SImode
, reg
);
12090 emit_move_insn (reg_low
, tmp
);
12091 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
12092 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
12096 /* Two rld* insns: rotate, clear the hole in the middle (which now is
12097 at the top end), rotate back and clear the other hole. */
12098 int right
= exact_log2 (bit3
);
12099 int left
= 64 - right
;
12101 /* Rotate the mask too. */
12102 mask1
= (mask1
>> right
) | ((bit2
- 1) << left
);
12106 rtx tmp1
= gen_reg_rtx (DImode
);
12107 rtx tmp2
= gen_reg_rtx (DImode
);
12108 rtx tmp3
= gen_reg_rtx (DImode
);
12109 emit_insn (gen_rotldi3 (tmp1
, operands
[1], GEN_INT (left
)));
12110 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (mask1
)));
12111 emit_insn (gen_rotldi3 (tmp3
, tmp2
, GEN_INT (right
)));
12112 emit_insn (gen_anddi3 (operands
[0], tmp3
, GEN_INT (mask2
)));
12116 rtx tmp
= gen_rtx_ROTATE (mode
, operands
[1], GEN_INT (left
));
12117 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask1
));
12118 emit_move_insn (operands
[0], tmp
);
12119 tmp
= gen_rtx_ROTATE (mode
, operands
[0], GEN_INT (right
));
12120 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask2
));
12121 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
12125 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
12126 for lfq and stfq insns iff the registers are hard registers. */
12129 registers_ok_for_quad_peep (rtx reg1
, rtx reg2
)
12131 /* We might have been passed a SUBREG. */
12132 if (!REG_P (reg1
) || !REG_P (reg2
))
12135 /* We might have been passed non floating point registers. */
12136 if (!FP_REGNO_P (REGNO (reg1
))
12137 || !FP_REGNO_P (REGNO (reg2
)))
12140 return (REGNO (reg1
) == REGNO (reg2
) - 1);
12143 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
12144 addr1 and addr2 must be in consecutive memory locations
12145 (addr2 == addr1 + 8). */
12148 mems_ok_for_quad_peep (rtx mem1
, rtx mem2
)
12151 unsigned int reg1
, reg2
;
12152 int offset1
, offset2
;
12154 /* The mems cannot be volatile. */
12155 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
12158 addr1
= XEXP (mem1
, 0);
12159 addr2
= XEXP (mem2
, 0);
12161 /* Extract an offset (if used) from the first addr. */
12162 if (GET_CODE (addr1
) == PLUS
)
12164 /* If not a REG, return zero. */
12165 if (!REG_P (XEXP (addr1
, 0)))
12169 reg1
= REGNO (XEXP (addr1
, 0));
12170 /* The offset must be constant! */
12171 if (!CONST_INT_P (XEXP (addr1
, 1)))
12173 offset1
= INTVAL (XEXP (addr1
, 1));
12176 else if (!REG_P (addr1
))
12180 reg1
= REGNO (addr1
);
12181 /* This was a simple (mem (reg)) expression. Offset is 0. */
12185 /* And now for the second addr. */
12186 if (GET_CODE (addr2
) == PLUS
)
12188 /* If not a REG, return zero. */
12189 if (!REG_P (XEXP (addr2
, 0)))
12193 reg2
= REGNO (XEXP (addr2
, 0));
12194 /* The offset must be constant. */
12195 if (!CONST_INT_P (XEXP (addr2
, 1)))
12197 offset2
= INTVAL (XEXP (addr2
, 1));
12200 else if (!REG_P (addr2
))
12204 reg2
= REGNO (addr2
);
12205 /* This was a simple (mem (reg)) expression. Offset is 0. */
12209 /* Both of these must have the same base register. */
12213 /* The offset for the second addr must be 8 more than the first addr. */
12214 if (offset2
!= offset1
+ 8)
12217 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
12222 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
12223 need to use DDmode, in all other cases we can use the same mode. */
12224 static machine_mode
12225 rs6000_secondary_memory_needed_mode (machine_mode mode
)
12227 if (lra_in_progress
&& mode
== SDmode
)
12232 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
12233 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
12234 only work on the traditional altivec registers, note if an altivec register
12237 static enum rs6000_reg_type
12238 register_to_reg_type (rtx reg
, bool *is_altivec
)
12240 HOST_WIDE_INT regno
;
12241 enum reg_class rclass
;
12243 if (SUBREG_P (reg
))
12244 reg
= SUBREG_REG (reg
);
12247 return NO_REG_TYPE
;
12249 regno
= REGNO (reg
);
12250 if (!HARD_REGISTER_NUM_P (regno
))
12252 if (!lra_in_progress
&& !reload_completed
)
12253 return PSEUDO_REG_TYPE
;
12255 regno
= true_regnum (reg
);
12256 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
))
12257 return PSEUDO_REG_TYPE
;
12260 gcc_assert (regno
>= 0);
12262 if (is_altivec
&& ALTIVEC_REGNO_P (regno
))
12263 *is_altivec
= true;
12265 rclass
= rs6000_regno_regclass
[regno
];
12266 return reg_class_to_reg_type
[(int)rclass
];
12269 /* Helper function to return the cost of adding a TOC entry address. */
12272 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask
)
12276 if (TARGET_CMODEL
!= CMODEL_SMALL
)
12277 ret
= ((addr_mask
& RELOAD_REG_OFFSET
) == 0) ? 1 : 2;
12280 ret
= (TARGET_MINIMAL_TOC
) ? 6 : 3;
12285 /* Helper function for rs6000_secondary_reload to determine whether the memory
12286 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
12287 needs reloading. Return negative if the memory is not handled by the memory
12288 helper functions and to try a different reload method, 0 if no additional
12289 instructions are need, and positive to give the extra cost for the
12293 rs6000_secondary_reload_memory (rtx addr
,
12294 enum reg_class rclass
,
12297 int extra_cost
= 0;
12298 rtx reg
, and_arg
, plus_arg0
, plus_arg1
;
12299 addr_mask_type addr_mask
;
12300 const char *type
= NULL
;
12301 const char *fail_msg
= NULL
;
12303 if (GPR_REG_CLASS_P (rclass
))
12304 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
12306 else if (rclass
== FLOAT_REGS
)
12307 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
12309 else if (rclass
== ALTIVEC_REGS
)
12310 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
12312 /* For the combined VSX_REGS, turn off Altivec AND -16. */
12313 else if (rclass
== VSX_REGS
)
12314 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
]
12315 & ~RELOAD_REG_AND_M16
);
12317 /* If the register allocator hasn't made up its mind yet on the register
12318 class to use, settle on defaults to use. */
12319 else if (rclass
== NO_REGS
)
12321 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
]
12322 & ~RELOAD_REG_AND_M16
);
12324 if ((addr_mask
& RELOAD_REG_MULTIPLE
) != 0)
12325 addr_mask
&= ~(RELOAD_REG_INDEXED
12326 | RELOAD_REG_PRE_INCDEC
12327 | RELOAD_REG_PRE_MODIFY
);
12333 /* If the register isn't valid in this register class, just return now. */
12334 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
12336 if (TARGET_DEBUG_ADDR
)
12339 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12340 "not valid in class\n",
12341 GET_MODE_NAME (mode
), reg_class_names
[rclass
]);
12348 switch (GET_CODE (addr
))
12350 /* Does the register class supports auto update forms for this mode? We
12351 don't need a scratch register, since the powerpc only supports
12352 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12355 reg
= XEXP (addr
, 0);
12356 if (!base_reg_operand (addr
, GET_MODE (reg
)))
12358 fail_msg
= "no base register #1";
12362 else if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
12370 reg
= XEXP (addr
, 0);
12371 plus_arg1
= XEXP (addr
, 1);
12372 if (!base_reg_operand (reg
, GET_MODE (reg
))
12373 || GET_CODE (plus_arg1
) != PLUS
12374 || !rtx_equal_p (reg
, XEXP (plus_arg1
, 0)))
12376 fail_msg
= "bad PRE_MODIFY";
12380 else if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
12387 /* Do we need to simulate AND -16 to clear the bottom address bits used
12388 in VMX load/stores? Only allow the AND for vector sizes. */
12390 and_arg
= XEXP (addr
, 0);
12391 if (GET_MODE_SIZE (mode
) != 16
12392 || !CONST_INT_P (XEXP (addr
, 1))
12393 || INTVAL (XEXP (addr
, 1)) != -16)
12395 fail_msg
= "bad Altivec AND #1";
12399 if (rclass
!= ALTIVEC_REGS
)
12401 if (legitimate_indirect_address_p (and_arg
, false))
12404 else if (legitimate_indexed_address_p (and_arg
, false))
12409 fail_msg
= "bad Altivec AND #2";
12417 /* If this is an indirect address, make sure it is a base register. */
12420 if (!legitimate_indirect_address_p (addr
, false))
12427 /* If this is an indexed address, make sure the register class can handle
12428 indexed addresses for this mode. */
12430 plus_arg0
= XEXP (addr
, 0);
12431 plus_arg1
= XEXP (addr
, 1);
12433 /* (plus (plus (reg) (constant)) (constant)) is generated during
12434 push_reload processing, so handle it now. */
12435 if (GET_CODE (plus_arg0
) == PLUS
&& CONST_INT_P (plus_arg1
))
12437 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12444 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12445 push_reload processing, so handle it now. */
12446 else if (GET_CODE (plus_arg0
) == PLUS
&& REG_P (plus_arg1
))
12448 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12451 type
= "indexed #2";
12455 else if (!base_reg_operand (plus_arg0
, GET_MODE (plus_arg0
)))
12457 fail_msg
= "no base register #2";
12461 else if (int_reg_operand (plus_arg1
, GET_MODE (plus_arg1
)))
12463 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0
12464 || !legitimate_indexed_address_p (addr
, false))
12471 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0
12472 && CONST_INT_P (plus_arg1
))
12474 if (!quad_address_offset_p (INTVAL (plus_arg1
)))
12477 type
= "vector d-form offset";
12481 /* Make sure the register class can handle offset addresses. */
12482 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
12484 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12487 type
= "offset #2";
12493 fail_msg
= "bad PLUS";
12500 /* Quad offsets are restricted and can't handle normal addresses. */
12501 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12504 type
= "vector d-form lo_sum";
12507 else if (!legitimate_lo_sum_address_p (mode
, addr
, false))
12509 fail_msg
= "bad LO_SUM";
12513 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12520 /* Static addresses need to create a TOC entry. */
12524 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12527 type
= "vector d-form lo_sum #2";
12533 extra_cost
= rs6000_secondary_reload_toc_costs (addr_mask
);
12537 /* TOC references look like offsetable memory. */
12539 if (TARGET_CMODEL
== CMODEL_SMALL
|| XINT (addr
, 1) != UNSPEC_TOCREL
)
12541 fail_msg
= "bad UNSPEC";
12545 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12548 type
= "vector d-form lo_sum #3";
12551 else if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12554 type
= "toc reference";
12560 fail_msg
= "bad address";
12565 if (TARGET_DEBUG_ADDR
/* && extra_cost != 0 */)
12567 if (extra_cost
< 0)
12569 "rs6000_secondary_reload_memory error: mode = %s, "
12570 "class = %s, addr_mask = '%s', %s\n",
12571 GET_MODE_NAME (mode
),
12572 reg_class_names
[rclass
],
12573 rs6000_debug_addr_mask (addr_mask
, false),
12574 (fail_msg
!= NULL
) ? fail_msg
: "<bad address>");
12578 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12579 "addr_mask = '%s', extra cost = %d, %s\n",
12580 GET_MODE_NAME (mode
),
12581 reg_class_names
[rclass
],
12582 rs6000_debug_addr_mask (addr_mask
, false),
12584 (type
) ? type
: "<none>");
12592 /* Helper function for rs6000_secondary_reload to return true if a move to a
12593 different register classe is really a simple move. */
12596 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type
,
12597 enum rs6000_reg_type from_type
,
12600 int size
= GET_MODE_SIZE (mode
);
12602 /* Add support for various direct moves available. In this function, we only
12603 look at cases where we don't need any extra registers, and one or more
12604 simple move insns are issued. Originally small integers are not allowed
12605 in FPR/VSX registers. Single precision binary floating is not a simple
12606 move because we need to convert to the single precision memory layout.
12607 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12608 need special direct move handling, which we do not support yet. */
12609 if (TARGET_DIRECT_MOVE
12610 && ((to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12611 || (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12613 if (TARGET_POWERPC64
)
12615 /* ISA 2.07: MTVSRD or MVFVSRD. */
12619 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12620 if (size
== 16 && TARGET_P9_VECTOR
&& mode
!= TDmode
)
12624 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12625 if (TARGET_P8_VECTOR
)
12627 if (mode
== SImode
)
12630 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
12634 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12635 if (mode
== SDmode
)
12639 /* Move to/from SPR. */
12640 else if ((size
== 4 || (TARGET_POWERPC64
&& size
== 8))
12641 && ((to_type
== GPR_REG_TYPE
&& from_type
== SPR_REG_TYPE
)
12642 || (to_type
== SPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12648 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12649 special direct moves that involve allocating an extra register, return the
12650 insn code of the helper function if there is such a function or
12651 CODE_FOR_nothing if not. */
12654 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type
,
12655 enum rs6000_reg_type from_type
,
12657 secondary_reload_info
*sri
,
12661 enum insn_code icode
= CODE_FOR_nothing
;
12663 int size
= GET_MODE_SIZE (mode
);
12665 if (TARGET_POWERPC64
&& size
== 16)
12667 /* Handle moving 128-bit values from GPRs to VSX point registers on
12668 ISA 2.07 (power8, power9) when running in 64-bit mode using
12669 XXPERMDI to glue the two 64-bit values back together. */
12670 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12672 cost
= 3; /* 2 mtvsrd's, 1 xxpermdi. */
12673 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12676 /* Handle moving 128-bit values from VSX point registers to GPRs on
12677 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12678 bottom 64-bit value. */
12679 else if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12681 cost
= 3; /* 2 mfvsrd's, 1 xxpermdi. */
12682 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12686 else if (TARGET_POWERPC64
&& mode
== SFmode
)
12688 if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12690 cost
= 3; /* xscvdpspn, mfvsrd, and. */
12691 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12694 else if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12696 cost
= 2; /* mtvsrz, xscvspdpn. */
12697 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12701 else if (!TARGET_POWERPC64
&& size
== 8)
12703 /* Handle moving 64-bit values from GPRs to floating point registers on
12704 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12705 32-bit values back together. Altivec register classes must be handled
12706 specially since a different instruction is used, and the secondary
12707 reload support requires a single instruction class in the scratch
12708 register constraint. However, right now TFmode is not allowed in
12709 Altivec registers, so the pattern will never match. */
12710 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
&& !altivec_p
)
12712 cost
= 3; /* 2 mtvsrwz's, 1 fmrgow. */
12713 icode
= reg_addr
[mode
].reload_fpr_gpr
;
12717 if (icode
!= CODE_FOR_nothing
)
12722 sri
->icode
= icode
;
12723 sri
->extra_cost
= cost
;
12730 /* Return whether a move between two register classes can be done either
12731 directly (simple move) or via a pattern that uses a single extra temporary
12732 (using ISA 2.07's direct move in this case. */
12735 rs6000_secondary_reload_move (enum rs6000_reg_type to_type
,
12736 enum rs6000_reg_type from_type
,
12738 secondary_reload_info
*sri
,
12741 /* Fall back to load/store reloads if either type is not a register. */
12742 if (to_type
== NO_REG_TYPE
|| from_type
== NO_REG_TYPE
)
12745 /* If we haven't allocated registers yet, assume the move can be done for the
12746 standard register types. */
12747 if ((to_type
== PSEUDO_REG_TYPE
&& from_type
== PSEUDO_REG_TYPE
)
12748 || (to_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (from_type
))
12749 || (from_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (to_type
)))
12752 /* Moves to the same set of registers is a simple move for non-specialized
12754 if (to_type
== from_type
&& IS_STD_REG_TYPE (to_type
))
12757 /* Check whether a simple move can be done directly. */
12758 if (rs6000_secondary_reload_simple_move (to_type
, from_type
, mode
))
12762 sri
->icode
= CODE_FOR_nothing
;
12763 sri
->extra_cost
= 0;
12768 /* Now check if we can do it in a few steps. */
12769 return rs6000_secondary_reload_direct_move (to_type
, from_type
, mode
, sri
,
12773 /* Inform reload about cases where moving X with a mode MODE to a register in
12774 RCLASS requires an extra scratch or immediate register. Return the class
12775 needed for the immediate register.
12777 For VSX and Altivec, we may need a register to convert sp+offset into
12780 For misaligned 64-bit gpr loads and stores we need a register to
12781 convert an offset address to indirect. */
12784 rs6000_secondary_reload (bool in_p
,
12786 reg_class_t rclass_i
,
12788 secondary_reload_info
*sri
)
12790 enum reg_class rclass
= (enum reg_class
) rclass_i
;
12791 reg_class_t ret
= ALL_REGS
;
12792 enum insn_code icode
;
12793 bool default_p
= false;
12794 bool done_p
= false;
12796 /* Allow subreg of memory before/during reload. */
12797 bool memory_p
= (MEM_P (x
)
12798 || (!reload_completed
&& SUBREG_P (x
)
12799 && MEM_P (SUBREG_REG (x
))));
12801 sri
->icode
= CODE_FOR_nothing
;
12802 sri
->t_icode
= CODE_FOR_nothing
;
12803 sri
->extra_cost
= 0;
12805 ? reg_addr
[mode
].reload_load
12806 : reg_addr
[mode
].reload_store
);
12808 if (REG_P (x
) || register_operand (x
, mode
))
12810 enum rs6000_reg_type to_type
= reg_class_to_reg_type
[(int)rclass
];
12811 bool altivec_p
= (rclass
== ALTIVEC_REGS
);
12812 enum rs6000_reg_type from_type
= register_to_reg_type (x
, &altivec_p
);
12815 std::swap (to_type
, from_type
);
12817 /* Can we do a direct move of some sort? */
12818 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
, sri
,
12821 icode
= (enum insn_code
)sri
->icode
;
12828 /* Make sure 0.0 is not reloaded or forced into memory. */
12829 if (x
== CONST0_RTX (mode
) && VSX_REG_CLASS_P (rclass
))
12836 /* If this is a scalar floating point value and we want to load it into the
12837 traditional Altivec registers, do it via a move via a traditional floating
12838 point register, unless we have D-form addressing. Also make sure that
12839 non-zero constants use a FPR. */
12840 if (!done_p
&& reg_addr
[mode
].scalar_in_vmx_p
12841 && !mode_supports_vmx_dform (mode
)
12842 && (rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
12843 && (memory_p
|| CONST_DOUBLE_P (x
)))
12850 /* Handle reload of load/stores if we have reload helper functions. */
12851 if (!done_p
&& icode
!= CODE_FOR_nothing
&& memory_p
)
12853 int extra_cost
= rs6000_secondary_reload_memory (XEXP (x
, 0), rclass
,
12856 if (extra_cost
>= 0)
12860 if (extra_cost
> 0)
12862 sri
->extra_cost
= extra_cost
;
12863 sri
->icode
= icode
;
12868 /* Handle unaligned loads and stores of integer registers. */
12869 if (!done_p
&& TARGET_POWERPC64
12870 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12872 && GET_MODE_SIZE (GET_MODE (x
)) >= UNITS_PER_WORD
)
12874 rtx addr
= XEXP (x
, 0);
12875 rtx off
= address_offset (addr
);
12877 if (off
!= NULL_RTX
)
12879 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12880 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12882 /* We need a secondary reload when our legitimate_address_p
12883 says the address is good (as otherwise the entire address
12884 will be reloaded), and the offset is not a multiple of
12885 four or we have an address wrap. Address wrap will only
12886 occur for LO_SUMs since legitimate_offset_address_p
12887 rejects addresses for 16-byte mems that will wrap. */
12888 if (GET_CODE (addr
) == LO_SUM
12889 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12890 && ((offset
& 3) != 0
12891 || ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
))
12892 : (offset
+ 0x8000 < 0x10000 - extra
/* legitimate_address_p */
12893 && (offset
& 3) != 0))
12895 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12897 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_load
12898 : CODE_FOR_reload_di_load
);
12900 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_store
12901 : CODE_FOR_reload_di_store
);
12902 sri
->extra_cost
= 2;
12913 if (!done_p
&& !TARGET_POWERPC64
12914 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12916 && GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
12918 rtx addr
= XEXP (x
, 0);
12919 rtx off
= address_offset (addr
);
12921 if (off
!= NULL_RTX
)
12923 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12924 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12926 /* We need a secondary reload when our legitimate_address_p
12927 says the address is good (as otherwise the entire address
12928 will be reloaded), and we have a wrap.
12930 legitimate_lo_sum_address_p allows LO_SUM addresses to
12931 have any offset so test for wrap in the low 16 bits.
12933 legitimate_offset_address_p checks for the range
12934 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12935 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12936 [0x7ff4,0x7fff] respectively, so test for the
12937 intersection of these ranges, [0x7ffc,0x7fff] and
12938 [0x7ff4,0x7ff7] respectively.
12940 Note that the address we see here may have been
12941 manipulated by legitimize_reload_address. */
12942 if (GET_CODE (addr
) == LO_SUM
12943 ? ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
12944 : offset
- (0x8000 - extra
) < UNITS_PER_WORD
)
12947 sri
->icode
= CODE_FOR_reload_si_load
;
12949 sri
->icode
= CODE_FOR_reload_si_store
;
12950 sri
->extra_cost
= 2;
12965 ret
= default_secondary_reload (in_p
, x
, rclass
, mode
, sri
);
12967 gcc_assert (ret
!= ALL_REGS
);
12969 if (TARGET_DEBUG_ADDR
)
12972 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12974 reg_class_names
[ret
],
12975 in_p
? "true" : "false",
12976 reg_class_names
[rclass
],
12977 GET_MODE_NAME (mode
));
12979 if (reload_completed
)
12980 fputs (", after reload", stderr
);
12983 fputs (", done_p not set", stderr
);
12986 fputs (", default secondary reload", stderr
);
12988 if (sri
->icode
!= CODE_FOR_nothing
)
12989 fprintf (stderr
, ", reload func = %s, extra cost = %d",
12990 insn_data
[sri
->icode
].name
, sri
->extra_cost
);
12992 else if (sri
->extra_cost
> 0)
12993 fprintf (stderr
, ", extra cost = %d", sri
->extra_cost
);
12995 fputs ("\n", stderr
);
13002 /* Better tracing for rs6000_secondary_reload_inner. */
13005 rs6000_secondary_reload_trace (int line
, rtx reg
, rtx mem
, rtx scratch
,
13010 gcc_assert (reg
!= NULL_RTX
&& mem
!= NULL_RTX
&& scratch
!= NULL_RTX
);
13012 fprintf (stderr
, "rs6000_secondary_reload_inner:%d, type = %s\n", line
,
13013 store_p
? "store" : "load");
13016 set
= gen_rtx_SET (mem
, reg
);
13018 set
= gen_rtx_SET (reg
, mem
);
13020 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
13021 debug_rtx (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
13024 static void rs6000_secondary_reload_fail (int, rtx
, rtx
, rtx
, bool)
13025 ATTRIBUTE_NORETURN
;
13028 rs6000_secondary_reload_fail (int line
, rtx reg
, rtx mem
, rtx scratch
,
13031 rs6000_secondary_reload_trace (line
, reg
, mem
, scratch
, store_p
);
13032 gcc_unreachable ();
13035 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
13036 reload helper functions. These were identified in
13037 rs6000_secondary_reload_memory, and if reload decided to use the secondary
13038 reload, it calls the insns:
13039 reload_<RELOAD:mode>_<P:mptrsize>_store
13040 reload_<RELOAD:mode>_<P:mptrsize>_load
13042 which in turn calls this function, to do whatever is necessary to create
13043 valid addresses. */
13046 rs6000_secondary_reload_inner (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
13048 int regno
= true_regnum (reg
);
13049 machine_mode mode
= GET_MODE (reg
);
13050 addr_mask_type addr_mask
;
13053 rtx op_reg
, op0
, op1
;
13058 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
) || !MEM_P (mem
)
13059 || !base_reg_operand (scratch
, GET_MODE (scratch
)))
13060 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13062 if (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
))
13063 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
13065 else if (IN_RANGE (regno
, FIRST_FPR_REGNO
, LAST_FPR_REGNO
))
13066 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
13068 else if (IN_RANGE (regno
, FIRST_ALTIVEC_REGNO
, LAST_ALTIVEC_REGNO
))
13069 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
13072 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13074 /* Make sure the mode is valid in this register class. */
13075 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
13076 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13078 if (TARGET_DEBUG_ADDR
)
13079 rs6000_secondary_reload_trace (__LINE__
, reg
, mem
, scratch
, store_p
);
13081 new_addr
= addr
= XEXP (mem
, 0);
13082 switch (GET_CODE (addr
))
13084 /* Does the register class support auto update forms for this mode? If
13085 not, do the update now. We don't need a scratch register, since the
13086 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
13089 op_reg
= XEXP (addr
, 0);
13090 if (!base_reg_operand (op_reg
, Pmode
))
13091 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13093 if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
13095 int delta
= GET_MODE_SIZE (mode
);
13096 if (GET_CODE (addr
) == PRE_DEC
)
13098 emit_insn (gen_add2_insn (op_reg
, GEN_INT (delta
)));
13104 op0
= XEXP (addr
, 0);
13105 op1
= XEXP (addr
, 1);
13106 if (!base_reg_operand (op0
, Pmode
)
13107 || GET_CODE (op1
) != PLUS
13108 || !rtx_equal_p (op0
, XEXP (op1
, 0)))
13109 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13111 if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
13113 emit_insn (gen_rtx_SET (op0
, op1
));
13118 /* Do we need to simulate AND -16 to clear the bottom address bits used
13119 in VMX load/stores? */
13121 op0
= XEXP (addr
, 0);
13122 op1
= XEXP (addr
, 1);
13123 if ((addr_mask
& RELOAD_REG_AND_M16
) == 0)
13125 if (REG_P (op0
) || SUBREG_P (op0
))
13128 else if (GET_CODE (op1
) == PLUS
)
13130 emit_insn (gen_rtx_SET (scratch
, op1
));
13135 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13137 and_op
= gen_rtx_AND (GET_MODE (scratch
), op_reg
, op1
);
13138 cc_clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (CCmode
));
13139 rv
= gen_rtvec (2, gen_rtx_SET (scratch
, and_op
), cc_clobber
);
13140 emit_insn (gen_rtx_PARALLEL (VOIDmode
, rv
));
13141 new_addr
= scratch
;
13145 /* If this is an indirect address, make sure it is a base register. */
13148 if (!base_reg_operand (addr
, GET_MODE (addr
)))
13150 emit_insn (gen_rtx_SET (scratch
, addr
));
13151 new_addr
= scratch
;
13155 /* If this is an indexed address, make sure the register class can handle
13156 indexed addresses for this mode. */
13158 op0
= XEXP (addr
, 0);
13159 op1
= XEXP (addr
, 1);
13160 if (!base_reg_operand (op0
, Pmode
))
13161 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13163 else if (int_reg_operand (op1
, Pmode
))
13165 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
13167 emit_insn (gen_rtx_SET (scratch
, addr
));
13168 new_addr
= scratch
;
13172 else if (mode_supports_dq_form (mode
) && CONST_INT_P (op1
))
13174 if (((addr_mask
& RELOAD_REG_QUAD_OFFSET
) == 0)
13175 || !quad_address_p (addr
, mode
, false))
13177 emit_insn (gen_rtx_SET (scratch
, addr
));
13178 new_addr
= scratch
;
13182 /* Make sure the register class can handle offset addresses. */
13183 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
13185 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
13187 emit_insn (gen_rtx_SET (scratch
, addr
));
13188 new_addr
= scratch
;
13193 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13198 op0
= XEXP (addr
, 0);
13199 op1
= XEXP (addr
, 1);
13200 if (!base_reg_operand (op0
, Pmode
))
13201 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13203 else if (int_reg_operand (op1
, Pmode
))
13205 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
13207 emit_insn (gen_rtx_SET (scratch
, addr
));
13208 new_addr
= scratch
;
13212 /* Quad offsets are restricted and can't handle normal addresses. */
13213 else if (mode_supports_dq_form (mode
))
13215 emit_insn (gen_rtx_SET (scratch
, addr
));
13216 new_addr
= scratch
;
13219 /* Make sure the register class can handle offset addresses. */
13220 else if (legitimate_lo_sum_address_p (mode
, addr
, false))
13222 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
13224 emit_insn (gen_rtx_SET (scratch
, addr
));
13225 new_addr
= scratch
;
13230 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13237 rs6000_emit_move (scratch
, addr
, Pmode
);
13238 new_addr
= scratch
;
13242 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13245 /* Adjust the address if it changed. */
13246 if (addr
!= new_addr
)
13248 mem
= replace_equiv_address_nv (mem
, new_addr
);
13249 if (TARGET_DEBUG_ADDR
)
13250 fprintf (stderr
, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
13253 /* Now create the move. */
13255 emit_insn (gen_rtx_SET (mem
, reg
));
13257 emit_insn (gen_rtx_SET (reg
, mem
));
13262 /* Convert reloads involving 64-bit gprs and misaligned offset
13263 addressing, or multiple 32-bit gprs and offsets that are too large,
13264 to use indirect addressing. */
13267 rs6000_secondary_reload_gpr (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
13269 int regno
= true_regnum (reg
);
13270 enum reg_class rclass
;
13272 rtx scratch_or_premodify
= scratch
;
13274 if (TARGET_DEBUG_ADDR
)
13276 fprintf (stderr
, "\nrs6000_secondary_reload_gpr, type = %s\n",
13277 store_p
? "store" : "load");
13278 fprintf (stderr
, "reg:\n");
13280 fprintf (stderr
, "mem:\n");
13282 fprintf (stderr
, "scratch:\n");
13283 debug_rtx (scratch
);
13286 gcc_assert (regno
>= 0 && HARD_REGISTER_NUM_P (regno
));
13287 gcc_assert (MEM_P (mem
));
13288 rclass
= REGNO_REG_CLASS (regno
);
13289 gcc_assert (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
);
13290 addr
= XEXP (mem
, 0);
13292 if (GET_CODE (addr
) == PRE_MODIFY
)
13294 gcc_assert (REG_P (XEXP (addr
, 0))
13295 && GET_CODE (XEXP (addr
, 1)) == PLUS
13296 && XEXP (XEXP (addr
, 1), 0) == XEXP (addr
, 0));
13297 scratch_or_premodify
= XEXP (addr
, 0);
13298 addr
= XEXP (addr
, 1);
13300 gcc_assert (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
);
13302 rs6000_emit_move (scratch_or_premodify
, addr
, Pmode
);
13304 mem
= replace_equiv_address_nv (mem
, scratch_or_premodify
);
13306 /* Now create the move. */
13308 emit_insn (gen_rtx_SET (mem
, reg
));
13310 emit_insn (gen_rtx_SET (reg
, mem
));
13315 /* Given an rtx X being reloaded into a reg required to be
13316 in class CLASS, return the class of reg to actually use.
13317 In general this is just CLASS; but on some machines
13318 in some cases it is preferable to use a more restrictive class.
13320 On the RS/6000, we have to return NO_REGS when we want to reload a
13321 floating-point CONST_DOUBLE to force it to be copied to memory.
13323 We also don't want to reload integer values into floating-point
13324 registers if we can at all help it. In fact, this can
13325 cause reload to die, if it tries to generate a reload of CTR
13326 into a FP register and discovers it doesn't have the memory location
13329 ??? Would it be a good idea to have reload do the converse, that is
13330 try to reload floating modes into FP registers if possible?
13333 static enum reg_class
13334 rs6000_preferred_reload_class (rtx x
, enum reg_class rclass
)
13336 machine_mode mode
= GET_MODE (x
);
13337 bool is_constant
= CONSTANT_P (x
);
13339 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
13340 reload class for it. */
13341 if ((rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
13342 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
) == 0)
13345 if ((rclass
== FLOAT_REGS
|| rclass
== VSX_REGS
)
13346 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
] & RELOAD_REG_VALID
) == 0)
13349 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13350 the reloading of address expressions using PLUS into floating point
13352 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
) && GET_CODE (x
) != PLUS
)
13356 /* Zero is always allowed in all VSX registers. */
13357 if (x
== CONST0_RTX (mode
))
13360 /* If this is a vector constant that can be formed with a few Altivec
13361 instructions, we want altivec registers. */
13362 if (GET_CODE (x
) == CONST_VECTOR
&& easy_vector_constant (x
, mode
))
13363 return ALTIVEC_REGS
;
13365 /* If this is an integer constant that can easily be loaded into
13366 vector registers, allow it. */
13367 if (CONST_INT_P (x
))
13369 HOST_WIDE_INT value
= INTVAL (x
);
13371 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13372 2.06 can generate it in the Altivec registers with
13376 if (TARGET_P8_VECTOR
)
13378 else if (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
13379 return ALTIVEC_REGS
;
13384 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13385 a sign extend in the Altivec registers. */
13386 if (IN_RANGE (value
, -128, 127) && TARGET_P9_VECTOR
13387 && (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
))
13388 return ALTIVEC_REGS
;
13391 /* Force constant to memory. */
13395 /* D-form addressing can easily reload the value. */
13396 if (mode_supports_vmx_dform (mode
)
13397 || mode_supports_dq_form (mode
))
13400 /* If this is a scalar floating point value and we don't have D-form
13401 addressing, prefer the traditional floating point registers so that we
13402 can use D-form (register+offset) addressing. */
13403 if (rclass
== VSX_REGS
13404 && (mode
== SFmode
|| GET_MODE_SIZE (mode
) == 8))
13407 /* Prefer the Altivec registers if Altivec is handling the vector
13408 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13410 if (VECTOR_UNIT_ALTIVEC_P (mode
) || VECTOR_MEM_ALTIVEC_P (mode
)
13411 || mode
== V1TImode
)
13412 return ALTIVEC_REGS
;
13417 if (is_constant
|| GET_CODE (x
) == PLUS
)
13419 if (reg_class_subset_p (GENERAL_REGS
, rclass
))
13420 return GENERAL_REGS
;
13421 if (reg_class_subset_p (BASE_REGS
, rclass
))
13426 /* For the vector pair and vector quad modes, prefer their natural register
13427 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13428 the GPR registers. */
13429 if (rclass
== GEN_OR_FLOAT_REGS
)
13431 if (mode
== OOmode
)
13434 if (mode
== XOmode
)
13437 if (GET_MODE_CLASS (mode
) == MODE_INT
)
13438 return GENERAL_REGS
;
13444 /* Debug version of rs6000_preferred_reload_class. */
13445 static enum reg_class
13446 rs6000_debug_preferred_reload_class (rtx x
, enum reg_class rclass
)
13448 enum reg_class ret
= rs6000_preferred_reload_class (x
, rclass
);
13451 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13453 reg_class_names
[ret
], reg_class_names
[rclass
],
13454 GET_MODE_NAME (GET_MODE (x
)));
13460 /* If we are copying between FP or AltiVec registers and anything else, we need
13461 a memory location. The exception is when we are targeting ppc64 and the
13462 move to/from fpr to gpr instructions are available. Also, under VSX, you
13463 can copy vector registers from the FP register set to the Altivec register
13464 set and vice versa. */
13467 rs6000_secondary_memory_needed (machine_mode mode
,
13468 reg_class_t from_class
,
13469 reg_class_t to_class
)
13471 enum rs6000_reg_type from_type
, to_type
;
13472 bool altivec_p
= ((from_class
== ALTIVEC_REGS
)
13473 || (to_class
== ALTIVEC_REGS
));
13475 /* If a simple/direct move is available, we don't need secondary memory */
13476 from_type
= reg_class_to_reg_type
[(int)from_class
];
13477 to_type
= reg_class_to_reg_type
[(int)to_class
];
13479 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
,
13480 (secondary_reload_info
*)0, altivec_p
))
13483 /* If we have a floating point or vector register class, we need to use
13484 memory to transfer the data. */
13485 if (IS_FP_VECT_REG_TYPE (from_type
) || IS_FP_VECT_REG_TYPE (to_type
))
13491 /* Debug version of rs6000_secondary_memory_needed. */
13493 rs6000_debug_secondary_memory_needed (machine_mode mode
,
13494 reg_class_t from_class
,
13495 reg_class_t to_class
)
13497 bool ret
= rs6000_secondary_memory_needed (mode
, from_class
, to_class
);
13500 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13501 "to_class = %s, mode = %s\n",
13502 ret
? "true" : "false",
13503 reg_class_names
[from_class
],
13504 reg_class_names
[to_class
],
13505 GET_MODE_NAME (mode
));
13510 /* Return the register class of a scratch register needed to copy IN into
13511 or out of a register in RCLASS in MODE. If it can be done directly,
13512 NO_REGS is returned. */
13514 static enum reg_class
13515 rs6000_secondary_reload_class (enum reg_class rclass
, machine_mode mode
,
13520 if (TARGET_ELF
|| (DEFAULT_ABI
== ABI_DARWIN
13522 && MACHOPIC_INDIRECT
13526 /* We cannot copy a symbolic operand directly into anything
13527 other than BASE_REGS for TARGET_ELF. So indicate that a
13528 register from BASE_REGS is needed as an intermediate
13531 On Darwin, pic addresses require a load from memory, which
13532 needs a base register. */
13533 if (rclass
!= BASE_REGS
13534 && (SYMBOL_REF_P (in
)
13535 || GET_CODE (in
) == HIGH
13536 || GET_CODE (in
) == LABEL_REF
13537 || GET_CODE (in
) == CONST
))
13543 regno
= REGNO (in
);
13544 if (!HARD_REGISTER_NUM_P (regno
))
13546 regno
= true_regnum (in
);
13547 if (!HARD_REGISTER_NUM_P (regno
))
13551 else if (SUBREG_P (in
))
13553 regno
= true_regnum (in
);
13554 if (!HARD_REGISTER_NUM_P (regno
))
13560 /* If we have VSX register moves, prefer moving scalar values between
13561 Altivec registers and GPR by going via an FPR (and then via memory)
13562 instead of reloading the secondary memory address for Altivec moves. */
13564 && GET_MODE_SIZE (mode
) < 16
13565 && !mode_supports_vmx_dform (mode
)
13566 && (((rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
)
13567 && (regno
>= 0 && ALTIVEC_REGNO_P (regno
)))
13568 || ((rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
13569 && (regno
>= 0 && INT_REGNO_P (regno
)))))
13572 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13574 if (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
13575 || (regno
>= 0 && INT_REGNO_P (regno
)))
13578 /* Constants, memory, and VSX registers can go into VSX registers (both the
13579 traditional floating point and the altivec registers). */
13580 if (rclass
== VSX_REGS
13581 && (regno
== -1 || VSX_REGNO_P (regno
)))
13584 /* Constants, memory, and FP registers can go into FP registers. */
13585 if ((regno
== -1 || FP_REGNO_P (regno
))
13586 && (rclass
== FLOAT_REGS
|| rclass
== GEN_OR_FLOAT_REGS
))
13587 return (mode
!= SDmode
|| lra_in_progress
) ? NO_REGS
: GENERAL_REGS
;
13589 /* Memory, and AltiVec registers can go into AltiVec registers. */
13590 if ((regno
== -1 || ALTIVEC_REGNO_P (regno
))
13591 && rclass
== ALTIVEC_REGS
)
13594 /* We can copy among the CR registers. */
13595 if ((rclass
== CR_REGS
|| rclass
== CR0_REGS
)
13596 && regno
>= 0 && CR_REGNO_P (regno
))
13599 /* Otherwise, we need GENERAL_REGS. */
13600 return GENERAL_REGS
;
13603 /* Debug version of rs6000_secondary_reload_class. */
13604 static enum reg_class
13605 rs6000_debug_secondary_reload_class (enum reg_class rclass
,
13606 machine_mode mode
, rtx in
)
13608 enum reg_class ret
= rs6000_secondary_reload_class (rclass
, mode
, in
);
13610 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13611 "mode = %s, input rtx:\n",
13612 reg_class_names
[ret
], reg_class_names
[rclass
],
13613 GET_MODE_NAME (mode
));
13619 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13622 rs6000_can_change_mode_class (machine_mode from
,
13624 reg_class_t rclass
)
13626 unsigned from_size
= GET_MODE_SIZE (from
);
13627 unsigned to_size
= GET_MODE_SIZE (to
);
13629 if (from_size
!= to_size
)
13631 enum reg_class xclass
= (TARGET_VSX
) ? VSX_REGS
: FLOAT_REGS
;
13633 if (reg_classes_intersect_p (xclass
, rclass
))
13635 unsigned to_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, to
);
13636 unsigned from_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, from
);
13637 bool to_float128_vector_p
= FLOAT128_VECTOR_P (to
);
13638 bool from_float128_vector_p
= FLOAT128_VECTOR_P (from
);
13640 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13641 single register under VSX because the scalar part of the register
13642 is in the upper 64-bits, and not the lower 64-bits. Types like
13643 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13644 IEEE floating point can't overlap, and neither can small
13647 if (to_float128_vector_p
&& from_float128_vector_p
)
13650 else if (to_float128_vector_p
|| from_float128_vector_p
)
13653 /* TDmode in floating-mode registers must always go into a register
13654 pair with the most significant word in the even-numbered register
13655 to match ISA requirements. In little-endian mode, this does not
13656 match subreg numbering, so we cannot allow subregs. */
13657 if (!BYTES_BIG_ENDIAN
&& (to
== TDmode
|| from
== TDmode
))
13660 /* Allow SD<->DD changes, since SDmode values are stored in
13661 the low half of the DDmode, just like target-independent
13662 code expects. We need to allow at least SD->DD since
13663 rs6000_secondary_memory_needed_mode asks for that change
13664 to be made for SD reloads. */
13665 if ((to
== DDmode
&& from
== SDmode
)
13666 || (to
== SDmode
&& from
== DDmode
))
13669 if (from_size
< 8 || to_size
< 8)
13672 if (from_size
== 8 && (8 * to_nregs
) != to_size
)
13675 if (to_size
== 8 && (8 * from_nregs
) != from_size
)
13684 /* Since the VSX register set includes traditional floating point registers
13685 and altivec registers, just check for the size being different instead of
13686 trying to check whether the modes are vector modes. Otherwise it won't
13687 allow say DF and DI to change classes. For types like TFmode and TDmode
13688 that take 2 64-bit registers, rather than a single 128-bit register, don't
13689 allow subregs of those types to other 128 bit types. */
13690 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
))
13692 unsigned num_regs
= (from_size
+ 15) / 16;
13693 if (hard_regno_nregs (FIRST_FPR_REGNO
, to
) > num_regs
13694 || hard_regno_nregs (FIRST_FPR_REGNO
, from
) > num_regs
)
13697 return (from_size
== 8 || from_size
== 16);
13700 if (TARGET_ALTIVEC
&& rclass
== ALTIVEC_REGS
13701 && (ALTIVEC_VECTOR_MODE (from
) + ALTIVEC_VECTOR_MODE (to
)) == 1)
13707 /* Debug version of rs6000_can_change_mode_class. */
13709 rs6000_debug_can_change_mode_class (machine_mode from
,
13711 reg_class_t rclass
)
13713 bool ret
= rs6000_can_change_mode_class (from
, to
, rclass
);
13716 "rs6000_can_change_mode_class, return %s, from = %s, "
13717 "to = %s, rclass = %s\n",
13718 ret
? "true" : "false",
13719 GET_MODE_NAME (from
), GET_MODE_NAME (to
),
13720 reg_class_names
[rclass
]);
13725 /* Return a string to do a move operation of 128 bits of data. */
13728 rs6000_output_move_128bit (rtx operands
[])
13730 rtx dest
= operands
[0];
13731 rtx src
= operands
[1];
13732 machine_mode mode
= GET_MODE (dest
);
13735 bool dest_gpr_p
, dest_fp_p
, dest_vmx_p
, dest_vsx_p
;
13736 bool src_gpr_p
, src_fp_p
, src_vmx_p
, src_vsx_p
;
13740 dest_regno
= REGNO (dest
);
13741 dest_gpr_p
= INT_REGNO_P (dest_regno
);
13742 dest_fp_p
= FP_REGNO_P (dest_regno
);
13743 dest_vmx_p
= ALTIVEC_REGNO_P (dest_regno
);
13744 dest_vsx_p
= dest_fp_p
| dest_vmx_p
;
13749 dest_gpr_p
= dest_fp_p
= dest_vmx_p
= dest_vsx_p
= false;
13754 src_regno
= REGNO (src
);
13755 src_gpr_p
= INT_REGNO_P (src_regno
);
13756 src_fp_p
= FP_REGNO_P (src_regno
);
13757 src_vmx_p
= ALTIVEC_REGNO_P (src_regno
);
13758 src_vsx_p
= src_fp_p
| src_vmx_p
;
13763 src_gpr_p
= src_fp_p
= src_vmx_p
= src_vsx_p
= false;
13766 /* Register moves. */
13767 if (dest_regno
>= 0 && src_regno
>= 0)
13774 if (TARGET_DIRECT_MOVE_128
&& src_vsx_p
)
13775 return (WORDS_BIG_ENDIAN
13776 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13777 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13779 else if (TARGET_VSX
&& TARGET_DIRECT_MOVE
&& src_vsx_p
)
13783 else if (TARGET_VSX
&& dest_vsx_p
)
13786 return "xxlor %x0,%x1,%x1";
13788 else if (TARGET_DIRECT_MOVE_128
&& src_gpr_p
)
13789 return (WORDS_BIG_ENDIAN
13790 ? "mtvsrdd %x0,%1,%L1"
13791 : "mtvsrdd %x0,%L1,%1");
13793 else if (TARGET_DIRECT_MOVE
&& src_gpr_p
)
13797 else if (TARGET_ALTIVEC
&& dest_vmx_p
&& src_vmx_p
)
13798 return "vor %0,%1,%1";
13800 else if (dest_fp_p
&& src_fp_p
)
13805 else if (dest_regno
>= 0 && MEM_P (src
))
13809 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13815 else if (TARGET_ALTIVEC
&& dest_vmx_p
13816 && altivec_indexed_or_indirect_operand (src
, mode
))
13817 return "lvx %0,%y1";
13819 else if (TARGET_VSX
&& dest_vsx_p
)
13821 if (mode_supports_dq_form (mode
)
13822 && quad_address_p (XEXP (src
, 0), mode
, true))
13823 return "lxv %x0,%1";
13825 else if (TARGET_P9_VECTOR
)
13826 return "lxvx %x0,%y1";
13828 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13829 return "lxvw4x %x0,%y1";
13832 return "lxvd2x %x0,%y1";
13835 else if (TARGET_ALTIVEC
&& dest_vmx_p
)
13836 return "lvx %0,%y1";
13838 else if (dest_fp_p
)
13843 else if (src_regno
>= 0 && MEM_P (dest
))
13847 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13848 return "stq %1,%0";
13853 else if (TARGET_ALTIVEC
&& src_vmx_p
13854 && altivec_indexed_or_indirect_operand (dest
, mode
))
13855 return "stvx %1,%y0";
13857 else if (TARGET_VSX
&& src_vsx_p
)
13859 if (mode_supports_dq_form (mode
)
13860 && quad_address_p (XEXP (dest
, 0), mode
, true))
13861 return "stxv %x1,%0";
13863 else if (TARGET_P9_VECTOR
)
13864 return "stxvx %x1,%y0";
13866 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13867 return "stxvw4x %x1,%y0";
13870 return "stxvd2x %x1,%y0";
13873 else if (TARGET_ALTIVEC
&& src_vmx_p
)
13874 return "stvx %1,%y0";
13881 else if (dest_regno
>= 0
13882 && (CONST_INT_P (src
)
13883 || CONST_WIDE_INT_P (src
)
13884 || CONST_DOUBLE_P (src
)
13885 || GET_CODE (src
) == CONST_VECTOR
))
13890 else if ((dest_vmx_p
&& TARGET_ALTIVEC
)
13891 || (dest_vsx_p
&& TARGET_VSX
))
13892 return output_vec_const_move (operands
);
13895 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest
, src
));
13898 /* Validate a 128-bit move. */
13900 rs6000_move_128bit_ok_p (rtx operands
[])
13902 machine_mode mode
= GET_MODE (operands
[0]);
13903 return (gpc_reg_operand (operands
[0], mode
)
13904 || gpc_reg_operand (operands
[1], mode
));
13907 /* Return true if a 128-bit move needs to be split. */
13909 rs6000_split_128bit_ok_p (rtx operands
[])
13911 if (!reload_completed
)
13914 if (!gpr_or_gpr_p (operands
[0], operands
[1]))
13917 if (quad_load_store_p (operands
[0], operands
[1]))
13924 /* Given a comparison operation, return the bit number in CCR to test. We
13925 know this is a valid comparison.
13927 SCC_P is 1 if this is for an scc. That means that %D will have been
13928 used instead of %C, so the bits will be in different places.
13930 Return -1 if OP isn't a valid comparison for some reason. */
13933 ccr_bit (rtx op
, int scc_p
)
13935 enum rtx_code code
= GET_CODE (op
);
13936 machine_mode cc_mode
;
13941 if (!COMPARISON_P (op
))
13944 reg
= XEXP (op
, 0);
13946 if (!REG_P (reg
) || !CR_REGNO_P (REGNO (reg
)))
13949 cc_mode
= GET_MODE (reg
);
13950 cc_regnum
= REGNO (reg
);
13951 base_bit
= 4 * (cc_regnum
- CR0_REGNO
);
13953 validate_condition_mode (code
, cc_mode
);
13955 /* When generating a sCOND operation, only positive conditions are
13974 return scc_p
? base_bit
+ 3 : base_bit
+ 2;
13976 return base_bit
+ 2;
13977 case GT
: case GTU
: case UNLE
:
13978 return base_bit
+ 1;
13979 case LT
: case LTU
: case UNGE
:
13981 case ORDERED
: case UNORDERED
:
13982 return base_bit
+ 3;
13985 /* If scc, we will have done a cror to put the bit in the
13986 unordered position. So test that bit. For integer, this is ! LT
13987 unless this is an scc insn. */
13988 return scc_p
? base_bit
+ 3 : base_bit
;
13991 return scc_p
? base_bit
+ 3 : base_bit
+ 1;
13998 /* Return the GOT register. */
14001 rs6000_got_register (rtx value ATTRIBUTE_UNUSED
)
14003 /* The second flow pass currently (June 1999) can't update
14004 regs_ever_live without disturbing other parts of the compiler, so
14005 update it here to make the prolog/epilogue code happy. */
14006 if (!can_create_pseudo_p ()
14007 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
14008 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM
, true);
14010 crtl
->uses_pic_offset_table
= 1;
14012 return pic_offset_table_rtx
;
14015 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
14017 /* Write out a function code label. */
14020 rs6000_output_function_entry (FILE *file
, const char *fname
)
14022 if (fname
[0] != '.')
14024 switch (DEFAULT_ABI
)
14027 gcc_unreachable ();
14033 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "L.");
14043 RS6000_OUTPUT_BASENAME (file
, fname
);
14046 /* Print an operand. Recognize special options, documented below. */
14049 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
14050 only introduced by the linker, when applying the sda21
14052 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
14053 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
14055 #define SMALL_DATA_RELOC "sda21"
14056 #define SMALL_DATA_REG 0
14060 print_operand (FILE *file
, rtx x
, int code
)
14063 unsigned HOST_WIDE_INT uval
;
14067 /* %a is output_address. */
14069 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
14073 /* Write the MMA accumulator number associated with VSX register X. */
14074 if (!REG_P (x
) || !FP_REGNO_P (REGNO (x
)) || (REGNO (x
) % 4) != 0)
14075 output_operand_lossage ("invalid %%A value");
14077 fprintf (file
, "%d", (REGNO (x
) - FIRST_FPR_REGNO
) / 4);
14081 /* Like 'J' but get to the GT bit only. */
14082 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14084 output_operand_lossage ("invalid %%D value");
14088 /* Bit 1 is GT bit. */
14089 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 1;
14091 /* Add one for shift count in rlinm for scc. */
14092 fprintf (file
, "%d", i
+ 1);
14096 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
14099 output_operand_lossage ("invalid %%e value");
14104 if ((uval
& 0xffff) == 0 && uval
!= 0)
14109 /* X is a CR register. Print the number of the EQ bit of the CR */
14110 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14111 output_operand_lossage ("invalid %%E value");
14113 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
) + 2);
14117 /* X is a CR register. Print the shift count needed to move it
14118 to the high-order four bits. */
14119 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14120 output_operand_lossage ("invalid %%f value");
14122 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
));
14126 /* Similar, but print the count for the rotate in the opposite
14128 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14129 output_operand_lossage ("invalid %%F value");
14131 fprintf (file
, "%d", 32 - 4 * (REGNO (x
) - CR0_REGNO
));
14135 /* X is a constant integer. If it is negative, print "m",
14136 otherwise print "z". This is to make an aze or ame insn. */
14137 if (!CONST_INT_P (x
))
14138 output_operand_lossage ("invalid %%G value");
14139 else if (INTVAL (x
) >= 0)
14146 /* If constant, output low-order five bits. Otherwise, write
14149 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 31);
14151 print_operand (file
, x
, 0);
14155 /* If constant, output low-order six bits. Otherwise, write
14158 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 63);
14160 print_operand (file
, x
, 0);
14164 /* Print `i' if this is a constant, else nothing. */
14170 /* Write the bit number in CCR for jump. */
14171 i
= ccr_bit (x
, 0);
14173 output_operand_lossage ("invalid %%j code");
14175 fprintf (file
, "%d", i
);
14179 /* Similar, but add one for shift count in rlinm for scc and pass
14180 scc flag to `ccr_bit'. */
14181 i
= ccr_bit (x
, 1);
14183 output_operand_lossage ("invalid %%J code");
14185 /* If we want bit 31, write a shift count of zero, not 32. */
14186 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
14190 /* X must be a constant. Write the 1's complement of the
14193 output_operand_lossage ("invalid %%k value");
14195 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~ INTVAL (x
));
14199 /* X must be a symbolic constant on ELF. Write an
14200 expression suitable for an 'addi' that adds in the low 16
14201 bits of the MEM. */
14202 if (GET_CODE (x
) == CONST
)
14204 if (GET_CODE (XEXP (x
, 0)) != PLUS
14205 || (!SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
14206 && GET_CODE (XEXP (XEXP (x
, 0), 0)) != LABEL_REF
)
14207 || !CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
14208 output_operand_lossage ("invalid %%K value");
14210 print_operand_address (file
, x
);
14211 fputs ("@l", file
);
14214 /* %l is output_asm_label. */
14217 /* Write second word of DImode or DFmode reference. Works on register
14218 or non-indexed memory only. */
14220 fputs (reg_names
[REGNO (x
) + 1], file
);
14221 else if (MEM_P (x
))
14223 machine_mode mode
= GET_MODE (x
);
14224 /* Handle possible auto-increment. Since it is pre-increment and
14225 we have already done it, we can just use an offset of word. */
14226 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14227 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14228 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
14230 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14231 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
14234 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
,
14238 if (small_data_operand (x
, GET_MODE (x
)))
14239 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14240 reg_names
[SMALL_DATA_REG
]);
14244 case 'N': /* Unused */
14245 /* Write the number of elements in the vector times 4. */
14246 if (GET_CODE (x
) != PARALLEL
)
14247 output_operand_lossage ("invalid %%N value");
14249 fprintf (file
, "%d", XVECLEN (x
, 0) * 4);
14252 case 'O': /* Unused */
14253 /* Similar, but subtract 1 first. */
14254 if (GET_CODE (x
) != PARALLEL
)
14255 output_operand_lossage ("invalid %%O value");
14257 fprintf (file
, "%d", (XVECLEN (x
, 0) - 1) * 4);
14261 /* X is a CONST_INT that is a power of two. Output the logarithm. */
14264 || (i
= exact_log2 (INTVAL (x
))) < 0)
14265 output_operand_lossage ("invalid %%p value");
14267 fprintf (file
, "%d", i
);
14271 /* The operand must be an indirect memory reference. The result
14272 is the register name. */
14273 if (!MEM_P (x
) || !REG_P (XEXP (x
, 0))
14274 || REGNO (XEXP (x
, 0)) >= 32)
14275 output_operand_lossage ("invalid %%P value");
14277 fputs (reg_names
[REGNO (XEXP (x
, 0))], file
);
14281 /* This outputs the logical code corresponding to a boolean
14282 expression. The expression may have one or both operands
14283 negated (if one, only the first one). For condition register
14284 logical operations, it will also treat the negated
14285 CR codes as NOTs, but not handle NOTs of them. */
14287 const char *const *t
= 0;
14289 enum rtx_code code
= GET_CODE (x
);
14290 static const char * const tbl
[3][3] = {
14291 { "and", "andc", "nor" },
14292 { "or", "orc", "nand" },
14293 { "xor", "eqv", "xor" } };
14297 else if (code
== IOR
)
14299 else if (code
== XOR
)
14302 output_operand_lossage ("invalid %%q value");
14304 if (GET_CODE (XEXP (x
, 0)) != NOT
)
14308 if (GET_CODE (XEXP (x
, 1)) == NOT
)
14319 if (! TARGET_MFCRF
)
14325 /* X is a CR register. Print the mask for `mtcrf'. */
14326 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14327 output_operand_lossage ("invalid %%R value");
14329 fprintf (file
, "%d", 128 >> (REGNO (x
) - CR0_REGNO
));
14333 /* Low 5 bits of 32 - value */
14335 output_operand_lossage ("invalid %%s value");
14337 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (32 - INTVAL (x
)) & 31);
14341 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
14342 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14344 output_operand_lossage ("invalid %%t value");
14348 /* Bit 3 is OV bit. */
14349 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 3;
14351 /* If we want bit 31, write a shift count of zero, not 32. */
14352 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
14356 /* Print the symbolic name of a branch target register. */
14357 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14358 x
= XVECEXP (x
, 0, 0);
14359 if (!REG_P (x
) || (REGNO (x
) != LR_REGNO
14360 && REGNO (x
) != CTR_REGNO
))
14361 output_operand_lossage ("invalid %%T value");
14362 else if (REGNO (x
) == LR_REGNO
)
14363 fputs ("lr", file
);
14365 fputs ("ctr", file
);
14369 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14370 for use in unsigned operand. */
14373 output_operand_lossage ("invalid %%u value");
14378 if ((uval
& 0xffff) == 0)
14381 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
, uval
& 0xffff);
14385 /* High-order 16 bits of constant for use in signed operand. */
14387 output_operand_lossage ("invalid %%v value");
14389 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
,
14390 (INTVAL (x
) >> 16) & 0xffff);
14394 /* Print `u' if this has an auto-increment or auto-decrement. */
14396 && (GET_CODE (XEXP (x
, 0)) == PRE_INC
14397 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
14398 || GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
))
14403 /* Print the trap code for this operand. */
14404 switch (GET_CODE (x
))
14407 fputs ("eq", file
); /* 4 */
14410 fputs ("ne", file
); /* 24 */
14413 fputs ("lt", file
); /* 16 */
14416 fputs ("le", file
); /* 20 */
14419 fputs ("gt", file
); /* 8 */
14422 fputs ("ge", file
); /* 12 */
14425 fputs ("llt", file
); /* 2 */
14428 fputs ("lle", file
); /* 6 */
14431 fputs ("lgt", file
); /* 1 */
14434 fputs ("lge", file
); /* 5 */
14437 output_operand_lossage ("invalid %%V value");
14442 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14445 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, sext_hwi (INTVAL (x
), 16));
14447 print_operand (file
, x
, 0);
14451 /* X is a FPR or Altivec register used in a VSX context. */
14452 if (!REG_P (x
) || !VSX_REGNO_P (REGNO (x
)))
14453 output_operand_lossage ("invalid %%x value");
14456 int reg
= REGNO (x
);
14457 int vsx_reg
= (FP_REGNO_P (reg
)
14459 : reg
- FIRST_ALTIVEC_REGNO
+ 32);
14461 #ifdef TARGET_REGNAMES
14462 if (TARGET_REGNAMES
)
14463 fprintf (file
, "%%vs%d", vsx_reg
);
14466 fprintf (file
, "%d", vsx_reg
);
14472 && (legitimate_indexed_address_p (XEXP (x
, 0), 0)
14473 || (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
14474 && legitimate_indexed_address_p (XEXP (XEXP (x
, 0), 1), 0))))
14479 /* Like 'L', for third word of TImode/PTImode */
14481 fputs (reg_names
[REGNO (x
) + 2], file
);
14482 else if (MEM_P (x
))
14484 machine_mode mode
= GET_MODE (x
);
14485 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14486 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14487 output_address (mode
, plus_constant (Pmode
,
14488 XEXP (XEXP (x
, 0), 0), 8));
14489 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14490 output_address (mode
, plus_constant (Pmode
,
14491 XEXP (XEXP (x
, 0), 0), 8));
14493 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 8), 0));
14494 if (small_data_operand (x
, GET_MODE (x
)))
14495 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14496 reg_names
[SMALL_DATA_REG
]);
14501 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14502 x
= XVECEXP (x
, 0, 1);
14503 /* X is a SYMBOL_REF. Write out the name preceded by a
14504 period and without any trailing data in brackets. Used for function
14505 names. If we are configured for System V (or the embedded ABI) on
14506 the PowerPC, do not emit the period, since those systems do not use
14507 TOCs and the like. */
14508 if (!SYMBOL_REF_P (x
))
14510 output_operand_lossage ("invalid %%z value");
14514 /* For macho, check to see if we need a stub. */
14517 const char *name
= XSTR (x
, 0);
14519 if (darwin_symbol_stubs
14520 && MACHOPIC_INDIRECT
14521 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
14522 name
= machopic_indirection_name (x
, /*stub_p=*/true);
14524 assemble_name (file
, name
);
14526 else if (!DOT_SYMBOLS
)
14527 assemble_name (file
, XSTR (x
, 0));
14529 rs6000_output_function_entry (file
, XSTR (x
, 0));
14533 /* Like 'L', for last word of TImode/PTImode. */
14535 fputs (reg_names
[REGNO (x
) + 3], file
);
14536 else if (MEM_P (x
))
14538 machine_mode mode
= GET_MODE (x
);
14539 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14540 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14541 output_address (mode
, plus_constant (Pmode
,
14542 XEXP (XEXP (x
, 0), 0), 12));
14543 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14544 output_address (mode
, plus_constant (Pmode
,
14545 XEXP (XEXP (x
, 0), 0), 12));
14547 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 12), 0));
14548 if (small_data_operand (x
, GET_MODE (x
)))
14549 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14550 reg_names
[SMALL_DATA_REG
]);
14554 /* Print AltiVec memory operand. */
14559 gcc_assert (MEM_P (x
));
14563 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x
))
14564 && GET_CODE (tmp
) == AND
14565 && CONST_INT_P (XEXP (tmp
, 1))
14566 && INTVAL (XEXP (tmp
, 1)) == -16)
14567 tmp
= XEXP (tmp
, 0);
14568 else if (VECTOR_MEM_VSX_P (GET_MODE (x
))
14569 && GET_CODE (tmp
) == PRE_MODIFY
)
14570 tmp
= XEXP (tmp
, 1);
14572 fprintf (file
, "0,%s", reg_names
[REGNO (tmp
)]);
14575 if (GET_CODE (tmp
) != PLUS
14576 || !REG_P (XEXP (tmp
, 0))
14577 || !REG_P (XEXP (tmp
, 1)))
14579 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14583 if (REGNO (XEXP (tmp
, 0)) == 0)
14584 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 1)) ],
14585 reg_names
[ REGNO (XEXP (tmp
, 0)) ]);
14587 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 0)) ],
14588 reg_names
[ REGNO (XEXP (tmp
, 1)) ]);
14595 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
14596 else if (MEM_P (x
))
14598 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14599 know the width from the mode. */
14600 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
)
14601 fprintf (file
, "%d(%s)", GET_MODE_SIZE (GET_MODE (x
)),
14602 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14603 else if (GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14604 fprintf (file
, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x
)),
14605 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14606 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14607 output_address (GET_MODE (x
), XEXP (XEXP (x
, 0), 1));
14609 output_address (GET_MODE (x
), XEXP (x
, 0));
14611 else if (toc_relative_expr_p (x
, false,
14612 &tocrel_base_oac
, &tocrel_offset_oac
))
14613 /* This hack along with a corresponding hack in
14614 rs6000_output_addr_const_extra arranges to output addends
14615 where the assembler expects to find them. eg.
14616 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14617 without this hack would be output as "x@toc+4". We
14619 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14620 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
14621 output_addr_const (file
, XVECEXP (x
, 0, 0));
14622 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14623 output_addr_const (file
, XVECEXP (x
, 0, 1));
14625 output_addr_const (file
, x
);
14629 if (const char *name
= get_some_local_dynamic_name ())
14630 assemble_name (file
, name
);
14632 output_operand_lossage ("'%%&' used without any "
14633 "local dynamic TLS references");
14637 output_operand_lossage ("invalid %%xn code");
14641 /* Print the address of an operand. */
14644 print_operand_address (FILE *file
, rtx x
)
14647 fprintf (file
, "0(%s)", reg_names
[ REGNO (x
) ]);
14649 /* Is it a PC-relative address? */
14650 else if (TARGET_PCREL
&& pcrel_local_or_external_address (x
, VOIDmode
))
14652 HOST_WIDE_INT offset
;
14654 if (GET_CODE (x
) == CONST
)
14657 if (GET_CODE (x
) == PLUS
)
14659 offset
= INTVAL (XEXP (x
, 1));
14665 output_addr_const (file
, x
);
14668 fprintf (file
, "%+" PRId64
, offset
);
14670 if (SYMBOL_REF_P (x
) && !SYMBOL_REF_LOCAL_P (x
))
14671 fprintf (file
, "@got");
14673 fprintf (file
, "@pcrel");
14675 else if (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
14676 || GET_CODE (x
) == LABEL_REF
)
14678 output_addr_const (file
, x
);
14679 if (small_data_operand (x
, GET_MODE (x
)))
14680 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14681 reg_names
[SMALL_DATA_REG
]);
14683 gcc_assert (!TARGET_TOC
);
14685 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14686 && REG_P (XEXP (x
, 1)))
14688 if (REGNO (XEXP (x
, 0)) == 0)
14689 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 1)) ],
14690 reg_names
[ REGNO (XEXP (x
, 0)) ]);
14692 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 0)) ],
14693 reg_names
[ REGNO (XEXP (x
, 1)) ]);
14695 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14696 && CONST_INT_P (XEXP (x
, 1)))
14697 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
14698 INTVAL (XEXP (x
, 1)), reg_names
[ REGNO (XEXP (x
, 0)) ]);
14700 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14701 && CONSTANT_P (XEXP (x
, 1)))
14703 fprintf (file
, "lo16(");
14704 output_addr_const (file
, XEXP (x
, 1));
14705 fprintf (file
, ")(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14709 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14710 && CONSTANT_P (XEXP (x
, 1)))
14712 output_addr_const (file
, XEXP (x
, 1));
14713 fprintf (file
, "@l(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14716 else if (toc_relative_expr_p (x
, false, &tocrel_base_oac
, &tocrel_offset_oac
))
14718 /* This hack along with a corresponding hack in
14719 rs6000_output_addr_const_extra arranges to output addends
14720 where the assembler expects to find them. eg.
14722 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14723 without this hack would be output as "x@toc+8@l(9)". We
14724 want "x+8@toc@l(9)". */
14725 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14726 if (GET_CODE (x
) == LO_SUM
)
14727 fprintf (file
, "@l(%s)", reg_names
[REGNO (XEXP (x
, 0))]);
14729 fprintf (file
, "(%s)", reg_names
[REGNO (XVECEXP (tocrel_base_oac
, 0, 1))]);
14732 output_addr_const (file
, x
);
14735 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14738 rs6000_output_addr_const_extra (FILE *file
, rtx x
)
14740 if (GET_CODE (x
) == UNSPEC
)
14741 switch (XINT (x
, 1))
14743 case UNSPEC_TOCREL
:
14744 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x
, 0, 0))
14745 && REG_P (XVECEXP (x
, 0, 1))
14746 && REGNO (XVECEXP (x
, 0, 1)) == TOC_REGISTER
);
14747 output_addr_const (file
, XVECEXP (x
, 0, 0));
14748 if (x
== tocrel_base_oac
&& tocrel_offset_oac
!= const0_rtx
)
14750 if (INTVAL (tocrel_offset_oac
) >= 0)
14751 fprintf (file
, "+");
14752 output_addr_const (file
, CONST_CAST_RTX (tocrel_offset_oac
));
14754 if (!TARGET_AIX
|| (TARGET_ELF
&& TARGET_MINIMAL_TOC
))
14757 assemble_name (file
, toc_label_name
);
14760 else if (TARGET_ELF
)
14761 fputs ("@toc", file
);
14765 case UNSPEC_MACHOPIC_OFFSET
:
14766 output_addr_const (file
, XVECEXP (x
, 0, 0));
14768 machopic_output_function_base_name (file
);
14775 /* Target hook for assembling integer objects. The PowerPC version has
14776 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14777 is defined. It also needs to handle DI-mode objects on 64-bit
14781 rs6000_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
14783 #ifdef RELOCATABLE_NEEDS_FIXUP
14784 /* Special handling for SI values. */
14785 if (RELOCATABLE_NEEDS_FIXUP
&& size
== 4 && aligned_p
)
14787 static int recurse
= 0;
14789 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14790 the .fixup section. Since the TOC section is already relocated, we
14791 don't need to mark it here. We used to skip the text section, but it
14792 should never be valid for relocated addresses to be placed in the text
14794 if (DEFAULT_ABI
== ABI_V4
14795 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
14796 && in_section
!= toc_section
14798 && !CONST_SCALAR_INT_P (x
)
14804 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCP", fixuplabelno
);
14806 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
14807 fprintf (asm_out_file
, "\t.long\t(");
14808 output_addr_const (asm_out_file
, x
);
14809 fprintf (asm_out_file
, ")@fixup\n");
14810 fprintf (asm_out_file
, "\t.section\t\".fixup\",\"aw\"\n");
14811 ASM_OUTPUT_ALIGN (asm_out_file
, 2);
14812 fprintf (asm_out_file
, "\t.long\t");
14813 assemble_name (asm_out_file
, buf
);
14814 fprintf (asm_out_file
, "\n\t.previous\n");
14818 /* Remove initial .'s to turn a -mcall-aixdesc function
14819 address into the address of the descriptor, not the function
14821 else if (SYMBOL_REF_P (x
)
14822 && XSTR (x
, 0)[0] == '.'
14823 && DEFAULT_ABI
== ABI_AIX
)
14825 const char *name
= XSTR (x
, 0);
14826 while (*name
== '.')
14829 fprintf (asm_out_file
, "\t.long\t%s\n", name
);
14833 #endif /* RELOCATABLE_NEEDS_FIXUP */
14834 return default_assemble_integer (x
, size
, aligned_p
);
14837 /* Return a template string for assembly to emit when making an
14838 external call. FUNOP is the call mem argument operand number. */
14840 static const char *
14841 rs6000_call_template_1 (rtx
*operands
, unsigned int funop
, bool sibcall
)
14843 /* -Wformat-overflow workaround, without which gcc thinks that %u
14844 might produce 10 digits. */
14845 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14849 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14851 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14852 sprintf (arg
, "(%%%u@tlsgd)", funop
+ 1);
14853 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14854 sprintf (arg
, "(%%&@tlsld)");
14857 /* The magic 32768 offset here corresponds to the offset of
14858 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14860 sprintf (z
, "%%z%u%s", funop
,
14861 (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
== 2
14864 static char str
[32]; /* 1 spare */
14865 if (rs6000_pcrel_p ())
14866 sprintf (str
, "b%s %s@notoc%s", sibcall
? "" : "l", z
, arg
);
14867 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
14868 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14869 sibcall
? "" : "\n\tnop");
14870 else if (DEFAULT_ABI
== ABI_V4
)
14871 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14872 flag_pic
? "@plt" : "");
14874 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14875 else if (DEFAULT_ABI
== ABI_DARWIN
)
14877 /* The cookie is in operand func+2. */
14878 gcc_checking_assert (GET_CODE (operands
[funop
+ 2]) == CONST_INT
);
14879 int cookie
= INTVAL (operands
[funop
+ 2]);
14880 if (cookie
& CALL_LONG
)
14882 tree funname
= get_identifier (XSTR (operands
[funop
], 0));
14883 tree labelname
= get_prev_label (funname
);
14884 gcc_checking_assert (labelname
&& !sibcall
);
14886 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14887 instruction will reach 'foo', otherwise link as 'bl L42'".
14888 "L42" should be a 'branch island', that will do a far jump to
14889 'foo'. Branch islands are generated in
14890 macho_branch_islands(). */
14891 sprintf (str
, "jbsr %%z%u,%.10s", funop
,
14892 IDENTIFIER_POINTER (labelname
));
14895 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14897 sprintf (str
, "b%s %s%s", sibcall
? "" : "l", z
, arg
);
14901 gcc_unreachable ();
14906 rs6000_call_template (rtx
*operands
, unsigned int funop
)
14908 return rs6000_call_template_1 (operands
, funop
, false);
14912 rs6000_sibcall_template (rtx
*operands
, unsigned int funop
)
14914 return rs6000_call_template_1 (operands
, funop
, true);
14917 /* As above, for indirect calls. */
14919 static const char *
14920 rs6000_indirect_call_template_1 (rtx
*operands
, unsigned int funop
,
14923 /* -Wformat-overflow workaround, without which gcc thinks that %u
14924 might produce 10 digits. Note that -Wformat-overflow will not
14925 currently warn here for str[], so do not rely on a warning to
14926 ensure str[] is correctly sized. */
14927 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14929 /* Currently, funop is either 0 or 1. The maximum string is always
14930 a !speculate 64-bit __tls_get_addr call.
14933 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14934 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14936 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14937 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14944 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14945 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14947 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14948 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14955 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14956 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14958 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14959 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14966 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14967 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14969 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14970 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14974 static char str
[160]; /* 8 spare */
14976 const char *ptrload
= TARGET_64BIT
? "d" : "wz";
14978 if (DEFAULT_ABI
== ABI_AIX
)
14981 ptrload
, funop
+ 3);
14983 /* We don't need the extra code to stop indirect call speculation if
14985 bool speculate
= (TARGET_MACHO
14986 || rs6000_speculate_indirect_jumps
14987 || (REG_P (operands
[funop
])
14988 && REGNO (operands
[funop
]) == LR_REGNO
));
14990 if (TARGET_PLTSEQ
&& GET_CODE (operands
[funop
]) == UNSPEC
)
14992 const char *rel64
= TARGET_64BIT
? "64" : "";
14995 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14997 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14998 sprintf (tls
, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
15000 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
15001 sprintf (tls
, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
15005 const char *notoc
= rs6000_pcrel_p () ? "_NOTOC" : "";
15006 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
15007 && flag_pic
== 2 ? "+32768" : "");
15011 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
15012 tls
, rel64
, notoc
, funop
, addend
);
15013 s
+= sprintf (s
, "crset 2\n\t");
15016 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
15017 tls
, rel64
, notoc
, funop
, addend
);
15019 else if (!speculate
)
15020 s
+= sprintf (s
, "crset 2\n\t");
15022 if (rs6000_pcrel_p ())
15025 sprintf (s
, "b%%T%ul", funop
);
15027 sprintf (s
, "beq%%T%ul-", funop
);
15029 else if (DEFAULT_ABI
== ABI_AIX
)
15035 funop
, ptrload
, funop
+ 4);
15040 funop
, ptrload
, funop
+ 4);
15042 else if (DEFAULT_ABI
== ABI_ELFv2
)
15048 funop
, ptrload
, funop
+ 3);
15053 funop
, ptrload
, funop
+ 3);
15060 funop
, sibcall
? "" : "l");
15064 funop
, sibcall
? "" : "l", sibcall
? "\n\tb $" : "");
15070 rs6000_indirect_call_template (rtx
*operands
, unsigned int funop
)
15072 return rs6000_indirect_call_template_1 (operands
, funop
, false);
15076 rs6000_indirect_sibcall_template (rtx
*operands
, unsigned int funop
)
15078 return rs6000_indirect_call_template_1 (operands
, funop
, true);
15082 /* Output indirect call insns. WHICH identifies the type of sequence. */
15084 rs6000_pltseq_template (rtx
*operands
, int which
)
15086 const char *rel64
= TARGET_64BIT
? "64" : "";
15089 if (GET_CODE (operands
[3]) == UNSPEC
)
15091 char off
= which
== RS6000_PLTSEQ_PLT_PCREL34
? '8' : '4';
15092 if (XINT (operands
[3], 1) == UNSPEC_TLSGD
)
15093 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
15095 else if (XINT (operands
[3], 1) == UNSPEC_TLSLD
)
15096 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
15100 gcc_assert (DEFAULT_ABI
== ABI_ELFv2
|| DEFAULT_ABI
== ABI_V4
);
15101 static char str
[96]; /* 10 spare */
15102 char off
= WORDS_BIG_ENDIAN
? '2' : '4';
15103 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
15104 && flag_pic
== 2 ? "+32768" : "");
15107 case RS6000_PLTSEQ_TOCSAVE
:
15110 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
15111 TARGET_64BIT
? "d 2,24(1)" : "w 2,12(1)",
15114 case RS6000_PLTSEQ_PLT16_HA
:
15115 if (DEFAULT_ABI
== ABI_V4
&& !flag_pic
)
15118 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
15122 "addis %%0,%%1,0\n\t"
15123 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
15124 tls
, off
, rel64
, addend
);
15126 case RS6000_PLTSEQ_PLT16_LO
:
15128 "l%s %%0,0(%%1)\n\t"
15129 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
15130 TARGET_64BIT
? "d" : "wz",
15131 tls
, off
, rel64
, TARGET_64BIT
? "_DS" : "", addend
);
15133 case RS6000_PLTSEQ_MTCTR
:
15136 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
15137 tls
, rel64
, addend
);
15139 case RS6000_PLTSEQ_PLT_PCREL34
:
15141 "pl%s %%0,0(0),1\n\t"
15142 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
15143 TARGET_64BIT
? "d" : "wz",
15147 gcc_unreachable ();
15153 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
15154 /* Emit an assembler directive to set symbol visibility for DECL to
15155 VISIBILITY_TYPE. */
15158 rs6000_assemble_visibility (tree decl
, int vis
)
15163 /* Functions need to have their entry point symbol visibility set as
15164 well as their descriptor symbol visibility. */
15165 if (DEFAULT_ABI
== ABI_AIX
15167 && TREE_CODE (decl
) == FUNCTION_DECL
)
15169 static const char * const visibility_types
[] = {
15170 NULL
, "protected", "hidden", "internal"
15173 const char *name
, *type
;
15175 name
= ((* targetm
.strip_name_encoding
)
15176 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
))));
15177 type
= visibility_types
[vis
];
15179 fprintf (asm_out_file
, "\t.%s\t%s\n", type
, name
);
15180 fprintf (asm_out_file
, "\t.%s\t.%s\n", type
, name
);
15183 default_assemble_visibility (decl
, vis
);
15187 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
15188 entry. If RECORD_P is true and the target supports named sections,
15189 the location of the NOPs will be recorded in a special object section
15190 called "__patchable_function_entries". This routine may be called
15191 twice per function to put NOPs before and after the function
15195 rs6000_print_patchable_function_entry (FILE *file
,
15196 unsigned HOST_WIDE_INT patch_area_size
,
15199 bool global_entry_needed_p
= rs6000_global_entry_point_prologue_needed_p ();
15200 /* For a function which needs global entry point, we will emit the
15201 patchable area before and after local entry point under the control of
15202 cfun->machine->global_entry_emitted, see the handling in function
15203 rs6000_output_function_prologue. */
15204 if (!global_entry_needed_p
|| cfun
->machine
->global_entry_emitted
)
15205 default_print_patchable_function_entry (file
, patch_area_size
, record_p
);
15209 rs6000_reverse_condition (machine_mode mode
, enum rtx_code code
)
15211 /* Reversal of FP compares takes care -- an ordered compare
15212 becomes an unordered compare and vice versa. */
15213 if (mode
== CCFPmode
15214 && (!flag_finite_math_only
15215 || code
== UNLT
|| code
== UNLE
|| code
== UNGT
|| code
== UNGE
15216 || code
== UNEQ
|| code
== LTGT
))
15217 return reverse_condition_maybe_unordered (code
);
15219 return reverse_condition (code
);
15222 /* Check if C (as 64bit integer) can be rotated to a constant which constains
15223 nonzero bits at the LOWBITS low bits only.
15225 Return true if C can be rotated to such constant. If so, *ROT is written
15226 to the number by which C is rotated.
15227 Return false otherwise. */
15230 can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c
, int lowbits
, int *rot
)
15232 int clz
= HOST_BITS_PER_WIDE_INT
- lowbits
;
15234 /* case a. 0..0xxx: already at least clz zeros. */
15235 int lz
= clz_hwi (c
);
15242 /* case b. 0..0xxx0..0: at least clz zeros. */
15243 int tz
= ctz_hwi (c
);
15244 if (lz
+ tz
>= clz
)
15246 *rot
= HOST_BITS_PER_WIDE_INT
- tz
;
15250 /* case c. xx10.....0xx: rotate 'clz - 1' bits first, then check case b.
15251 ^bit -> Vbit, , then zeros are at head or tail.
15252 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
15253 const int rot_bits
= lowbits
+ 1;
15254 unsigned HOST_WIDE_INT rc
= (c
>> rot_bits
) | (c
<< (clz
- 1));
15256 if (clz_hwi (rc
) + tz
>= clz
)
15258 *rot
= HOST_BITS_PER_WIDE_INT
- (tz
+ rot_bits
);
15265 /* Check if C (as 64bit integer) can be rotated to a positive 16bits constant
15266 which contains 48bits leading zeros and 16bits of any value. */
15269 can_be_rotated_to_positive_16bits (HOST_WIDE_INT c
)
15272 bool res
= can_be_rotated_to_lowbits (c
, 16, &rot
);
15273 return res
&& rot
> 0;
15276 /* Check if C (as 64bit integer) can be rotated to a negative 15bits constant
15277 which contains 49bits leading ones and 15bits of any value. */
15280 can_be_rotated_to_negative_15bits (HOST_WIDE_INT c
)
15283 bool res
= can_be_rotated_to_lowbits (~c
, 15, &rot
);
15284 return res
&& rot
> 0;
15287 /* Generate a compare for CODE. Return a brand-new rtx that
15288 represents the result of the compare. */
15291 rs6000_generate_compare (rtx cmp
, machine_mode mode
)
15293 machine_mode comp_mode
;
15294 rtx compare_result
;
15295 enum rtx_code code
= GET_CODE (cmp
);
15296 rtx op0
= XEXP (cmp
, 0);
15297 rtx op1
= XEXP (cmp
, 1);
15299 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
15300 comp_mode
= CCmode
;
15301 else if (FLOAT_MODE_P (mode
))
15302 comp_mode
= CCFPmode
;
15303 else if (code
== GTU
|| code
== LTU
15304 || code
== GEU
|| code
== LEU
)
15305 comp_mode
= CCUNSmode
;
15306 else if ((code
== EQ
|| code
== NE
)
15307 && unsigned_reg_p (op0
)
15308 && (unsigned_reg_p (op1
)
15309 || (CONST_INT_P (op1
) && INTVAL (op1
) != 0)))
15310 /* These are unsigned values, perhaps there will be a later
15311 ordering compare that can be shared with this one. */
15312 comp_mode
= CCUNSmode
;
15314 comp_mode
= CCmode
;
15316 /* If we have an unsigned compare, make sure we don't have a signed value as
15318 if (comp_mode
== CCUNSmode
&& CONST_INT_P (op1
)
15319 && INTVAL (op1
) < 0)
15321 op0
= copy_rtx_if_shared (op0
);
15322 op1
= force_reg (GET_MODE (op0
), op1
);
15323 cmp
= gen_rtx_fmt_ee (code
, GET_MODE (cmp
), op0
, op1
);
15326 /* First, the compare. */
15327 compare_result
= gen_reg_rtx (comp_mode
);
15329 /* IEEE 128-bit support in VSX registers when we do not have hardware
15331 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
15333 rtx libfunc
= NULL_RTX
;
15334 bool check_nan
= false;
15341 libfunc
= optab_libfunc (eq_optab
, mode
);
15346 libfunc
= optab_libfunc (ge_optab
, mode
);
15351 libfunc
= optab_libfunc (le_optab
, mode
);
15356 libfunc
= optab_libfunc (unord_optab
, mode
);
15357 code
= (code
== UNORDERED
) ? NE
: EQ
;
15363 libfunc
= optab_libfunc (ge_optab
, mode
);
15364 code
= (code
== UNGE
) ? GE
: GT
;
15370 libfunc
= optab_libfunc (le_optab
, mode
);
15371 code
= (code
== UNLE
) ? LE
: LT
;
15377 libfunc
= optab_libfunc (eq_optab
, mode
);
15378 code
= (code
= UNEQ
) ? EQ
: NE
;
15382 gcc_unreachable ();
15385 gcc_assert (libfunc
);
15388 dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
15389 SImode
, op0
, mode
, op1
, mode
);
15391 /* The library signals an exception for signalling NaNs, so we need to
15392 handle isgreater, etc. by first checking isordered. */
15395 rtx ne_rtx
, normal_dest
, unord_dest
;
15396 rtx unord_func
= optab_libfunc (unord_optab
, mode
);
15397 rtx join_label
= gen_label_rtx ();
15398 rtx join_ref
= gen_rtx_LABEL_REF (VOIDmode
, join_label
);
15399 rtx unord_cmp
= gen_reg_rtx (comp_mode
);
15402 /* Test for either value being a NaN. */
15403 gcc_assert (unord_func
);
15404 unord_dest
= emit_library_call_value (unord_func
, NULL_RTX
, LCT_CONST
,
15405 SImode
, op0
, mode
, op1
, mode
);
15407 /* Set value (0) if either value is a NaN, and jump to the join
15409 dest
= gen_reg_rtx (SImode
);
15410 emit_move_insn (dest
, const1_rtx
);
15411 emit_insn (gen_rtx_SET (unord_cmp
,
15412 gen_rtx_COMPARE (comp_mode
, unord_dest
,
15415 ne_rtx
= gen_rtx_NE (comp_mode
, unord_cmp
, const0_rtx
);
15416 emit_jump_insn (gen_rtx_SET (pc_rtx
,
15417 gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
15421 /* Do the normal comparison, knowing that the values are not
15423 normal_dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
15424 SImode
, op0
, mode
, op1
, mode
);
15426 emit_insn (gen_cstoresi4 (dest
,
15427 gen_rtx_fmt_ee (code
, SImode
, normal_dest
,
15429 normal_dest
, const0_rtx
));
15431 /* Join NaN and non-Nan paths. Compare dest against 0. */
15432 emit_label (join_label
);
15436 emit_insn (gen_rtx_SET (compare_result
,
15437 gen_rtx_COMPARE (comp_mode
, dest
, const0_rtx
)));
15442 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15443 CLOBBERs to match cmptf_internal2 pattern. */
15444 if (comp_mode
== CCFPmode
&& TARGET_XL_COMPAT
15445 && FLOAT128_IBM_P (GET_MODE (op0
))
15446 && TARGET_HARD_FLOAT
)
15447 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
15449 gen_rtx_SET (compare_result
,
15450 gen_rtx_COMPARE (comp_mode
, op0
, op1
)),
15451 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15452 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15453 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15454 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15455 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15456 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15457 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15458 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15459 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (Pmode
)))));
15460 else if (GET_CODE (op1
) == UNSPEC
15461 && XINT (op1
, 1) == UNSPEC_SP_TEST
)
15463 rtx op1b
= XVECEXP (op1
, 0, 0);
15464 comp_mode
= CCEQmode
;
15465 compare_result
= gen_reg_rtx (CCEQmode
);
15467 emit_insn (gen_stack_protect_testdi (compare_result
, op0
, op1b
));
15469 emit_insn (gen_stack_protect_testsi (compare_result
, op0
, op1b
));
15471 else if (mode
== V16QImode
)
15473 gcc_assert (code
== EQ
|| code
== NE
);
15475 rtx result_vector
= gen_reg_rtx (V16QImode
);
15476 rtx cc_bit
= gen_reg_rtx (SImode
);
15477 emit_insn (gen_altivec_vcmpequb_p (result_vector
, op0
, op1
));
15478 emit_insn (gen_cr6_test_for_lt (cc_bit
));
15479 emit_insn (gen_rtx_SET (compare_result
,
15480 gen_rtx_COMPARE (comp_mode
, cc_bit
,
15484 emit_insn (gen_rtx_SET (compare_result
,
15485 gen_rtx_COMPARE (comp_mode
, op0
, op1
)));
15488 validate_condition_mode (code
, GET_MODE (compare_result
));
15490 return gen_rtx_fmt_ee (code
, VOIDmode
, compare_result
, const0_rtx
);
15494 /* Return the diagnostic message string if the binary operation OP is
15495 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15498 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED
,
15502 machine_mode mode1
= TYPE_MODE (type1
);
15503 machine_mode mode2
= TYPE_MODE (type2
);
15505 /* For complex modes, use the inner type. */
15506 if (COMPLEX_MODE_P (mode1
))
15507 mode1
= GET_MODE_INNER (mode1
);
15509 if (COMPLEX_MODE_P (mode2
))
15510 mode2
= GET_MODE_INNER (mode2
);
15512 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15513 double to intermix unless -mfloat128-convert. */
15514 if (mode1
== mode2
)
15517 if (!TARGET_FLOAT128_CVT
)
15519 if ((FLOAT128_IEEE_P (mode1
) && FLOAT128_IBM_P (mode2
))
15520 || (FLOAT128_IBM_P (mode1
) && FLOAT128_IEEE_P (mode2
)))
15521 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15529 /* Expand floating point conversion to/from __float128 and __ibm128. */
15532 rs6000_expand_float128_convert (rtx dest
, rtx src
, bool unsigned_p
)
15534 machine_mode dest_mode
= GET_MODE (dest
);
15535 machine_mode src_mode
= GET_MODE (src
);
15536 convert_optab cvt
= unknown_optab
;
15537 bool do_move
= false;
15538 rtx libfunc
= NULL_RTX
;
15540 typedef rtx (*rtx_2func_t
) (rtx
, rtx
);
15541 rtx_2func_t hw_convert
= (rtx_2func_t
)0;
15545 rtx_2func_t from_df
;
15546 rtx_2func_t from_sf
;
15547 rtx_2func_t from_si_sign
;
15548 rtx_2func_t from_si_uns
;
15549 rtx_2func_t from_di_sign
;
15550 rtx_2func_t from_di_uns
;
15553 rtx_2func_t to_si_sign
;
15554 rtx_2func_t to_si_uns
;
15555 rtx_2func_t to_di_sign
;
15556 rtx_2func_t to_di_uns
;
15557 } hw_conversions
[2] = {
15558 /* convertions to/from KFmode */
15560 gen_extenddfkf2_hw
, /* KFmode <- DFmode. */
15561 gen_extendsfkf2_hw
, /* KFmode <- SFmode. */
15562 gen_float_kfsi2_hw
, /* KFmode <- SImode (signed). */
15563 gen_floatuns_kfsi2_hw
, /* KFmode <- SImode (unsigned). */
15564 gen_float_kfdi2_hw
, /* KFmode <- DImode (signed). */
15565 gen_floatuns_kfdi2_hw
, /* KFmode <- DImode (unsigned). */
15566 gen_trunckfdf2_hw
, /* DFmode <- KFmode. */
15567 gen_trunckfsf2_hw
, /* SFmode <- KFmode. */
15568 gen_fix_kfsi2_hw
, /* SImode <- KFmode (signed). */
15569 gen_fixuns_kfsi2_hw
, /* SImode <- KFmode (unsigned). */
15570 gen_fix_kfdi2_hw
, /* DImode <- KFmode (signed). */
15571 gen_fixuns_kfdi2_hw
, /* DImode <- KFmode (unsigned). */
15574 /* convertions to/from TFmode */
15576 gen_extenddftf2_hw
, /* TFmode <- DFmode. */
15577 gen_extendsftf2_hw
, /* TFmode <- SFmode. */
15578 gen_float_tfsi2_hw
, /* TFmode <- SImode (signed). */
15579 gen_floatuns_tfsi2_hw
, /* TFmode <- SImode (unsigned). */
15580 gen_float_tfdi2_hw
, /* TFmode <- DImode (signed). */
15581 gen_floatuns_tfdi2_hw
, /* TFmode <- DImode (unsigned). */
15582 gen_trunctfdf2_hw
, /* DFmode <- TFmode. */
15583 gen_trunctfsf2_hw
, /* SFmode <- TFmode. */
15584 gen_fix_tfsi2_hw
, /* SImode <- TFmode (signed). */
15585 gen_fixuns_tfsi2_hw
, /* SImode <- TFmode (unsigned). */
15586 gen_fix_tfdi2_hw
, /* DImode <- TFmode (signed). */
15587 gen_fixuns_tfdi2_hw
, /* DImode <- TFmode (unsigned). */
15591 if (dest_mode
== src_mode
)
15592 gcc_unreachable ();
15594 /* Eliminate memory operations. */
15596 src
= force_reg (src_mode
, src
);
15600 rtx tmp
= gen_reg_rtx (dest_mode
);
15601 rs6000_expand_float128_convert (tmp
, src
, unsigned_p
);
15602 rs6000_emit_move (dest
, tmp
, dest_mode
);
15606 /* Convert to IEEE 128-bit floating point. */
15607 if (FLOAT128_IEEE_P (dest_mode
))
15609 if (dest_mode
== KFmode
)
15611 else if (dest_mode
== TFmode
)
15614 gcc_unreachable ();
15620 hw_convert
= hw_conversions
[kf_or_tf
].from_df
;
15625 hw_convert
= hw_conversions
[kf_or_tf
].from_sf
;
15631 if (FLOAT128_IBM_P (src_mode
))
15640 cvt
= ufloat_optab
;
15641 hw_convert
= hw_conversions
[kf_or_tf
].from_si_uns
;
15645 cvt
= sfloat_optab
;
15646 hw_convert
= hw_conversions
[kf_or_tf
].from_si_sign
;
15653 cvt
= ufloat_optab
;
15654 hw_convert
= hw_conversions
[kf_or_tf
].from_di_uns
;
15658 cvt
= sfloat_optab
;
15659 hw_convert
= hw_conversions
[kf_or_tf
].from_di_sign
;
15664 gcc_unreachable ();
15668 /* Convert from IEEE 128-bit floating point. */
15669 else if (FLOAT128_IEEE_P (src_mode
))
15671 if (src_mode
== KFmode
)
15673 else if (src_mode
== TFmode
)
15676 gcc_unreachable ();
15682 hw_convert
= hw_conversions
[kf_or_tf
].to_df
;
15687 hw_convert
= hw_conversions
[kf_or_tf
].to_sf
;
15693 if (FLOAT128_IBM_P (dest_mode
))
15703 hw_convert
= hw_conversions
[kf_or_tf
].to_si_uns
;
15708 hw_convert
= hw_conversions
[kf_or_tf
].to_si_sign
;
15716 hw_convert
= hw_conversions
[kf_or_tf
].to_di_uns
;
15721 hw_convert
= hw_conversions
[kf_or_tf
].to_di_sign
;
15726 gcc_unreachable ();
15730 /* Both IBM format. */
15731 else if (FLOAT128_IBM_P (dest_mode
) && FLOAT128_IBM_P (src_mode
))
15735 gcc_unreachable ();
15737 /* Handle conversion between TFmode/KFmode/IFmode. */
15739 emit_insn (gen_rtx_SET (dest
, gen_rtx_FLOAT_EXTEND (dest_mode
, src
)));
15741 /* Handle conversion if we have hardware support. */
15742 else if (TARGET_FLOAT128_HW
&& hw_convert
)
15743 emit_insn ((hw_convert
) (dest
, src
));
15745 /* Call an external function to do the conversion. */
15746 else if (cvt
!= unknown_optab
)
15748 libfunc
= convert_optab_libfunc (cvt
, dest_mode
, src_mode
);
15749 gcc_assert (libfunc
!= NULL_RTX
);
15751 dest2
= emit_library_call_value (libfunc
, dest
, LCT_CONST
, dest_mode
,
15754 gcc_assert (dest2
!= NULL_RTX
);
15755 if (!rtx_equal_p (dest
, dest2
))
15756 emit_move_insn (dest
, dest2
);
15760 gcc_unreachable ();
15766 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15767 can be used as that dest register. Return the dest register. */
15770 rs6000_emit_eqne (machine_mode mode
, rtx op1
, rtx op2
, rtx scratch
)
15772 if (op2
== const0_rtx
)
15775 if (GET_CODE (scratch
) == SCRATCH
)
15776 scratch
= gen_reg_rtx (mode
);
15778 if (logical_operand (op2
, mode
))
15779 emit_insn (gen_rtx_SET (scratch
, gen_rtx_XOR (mode
, op1
, op2
)));
15781 emit_insn (gen_rtx_SET (scratch
,
15782 gen_rtx_PLUS (mode
, op1
, negate_rtx (mode
, op2
))));
15787 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15788 requires this. The result is mode MODE. */
15790 rs6000_emit_fp_cror (rtx_code code
, machine_mode mode
, rtx x
)
15794 if (code
== LTGT
|| code
== LE
|| code
== UNLT
)
15795 cond
[n
++] = gen_rtx_fmt_ee (LT
, mode
, x
, const0_rtx
);
15796 if (code
== LTGT
|| code
== GE
|| code
== UNGT
)
15797 cond
[n
++] = gen_rtx_fmt_ee (GT
, mode
, x
, const0_rtx
);
15798 if (code
== LE
|| code
== GE
|| code
== UNEQ
)
15799 cond
[n
++] = gen_rtx_fmt_ee (EQ
, mode
, x
, const0_rtx
);
15800 if (code
== UNLT
|| code
== UNGT
|| code
== UNEQ
)
15801 cond
[n
++] = gen_rtx_fmt_ee (UNORDERED
, mode
, x
, const0_rtx
);
15803 gcc_assert (n
== 2);
15805 rtx cc
= gen_reg_rtx (CCEQmode
);
15806 rtx logical
= gen_rtx_IOR (mode
, cond
[0], cond
[1]);
15807 emit_insn (gen_cceq_ior_compare (mode
, cc
, logical
, cond
[0], x
, cond
[1], x
));
15813 rs6000_emit_sCOND (machine_mode mode
, rtx operands
[])
15815 rtx condition_rtx
= rs6000_generate_compare (operands
[1], mode
);
15816 rtx_code cond_code
= GET_CODE (condition_rtx
);
15818 if (FLOAT_MODE_P (mode
) && HONOR_NANS (mode
)
15819 && !(FLOAT128_VECTOR_P (mode
) && !TARGET_FLOAT128_HW
))
15821 else if (cond_code
== NE
15822 || cond_code
== GE
|| cond_code
== LE
15823 || cond_code
== GEU
|| cond_code
== LEU
15824 || cond_code
== ORDERED
|| cond_code
== UNGE
|| cond_code
== UNLE
)
15826 rtx not_result
= gen_reg_rtx (CCEQmode
);
15827 rtx not_op
, rev_cond_rtx
;
15828 machine_mode cc_mode
;
15830 cc_mode
= GET_MODE (XEXP (condition_rtx
, 0));
15832 rev_cond_rtx
= gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode
, cond_code
),
15833 SImode
, XEXP (condition_rtx
, 0), const0_rtx
);
15834 not_op
= gen_rtx_COMPARE (CCEQmode
, rev_cond_rtx
, const0_rtx
);
15835 emit_insn (gen_rtx_SET (not_result
, not_op
));
15836 condition_rtx
= gen_rtx_EQ (VOIDmode
, not_result
, const0_rtx
);
15839 machine_mode op_mode
= GET_MODE (XEXP (operands
[1], 0));
15840 if (op_mode
== VOIDmode
)
15841 op_mode
= GET_MODE (XEXP (operands
[1], 1));
15843 if (TARGET_POWERPC64
&& (op_mode
== DImode
|| FLOAT_MODE_P (mode
)))
15845 PUT_MODE (condition_rtx
, DImode
);
15846 convert_move (operands
[0], condition_rtx
, 0);
15850 PUT_MODE (condition_rtx
, SImode
);
15851 emit_insn (gen_rtx_SET (operands
[0], condition_rtx
));
15855 /* Emit a branch of kind CODE to location LOC. */
15858 rs6000_emit_cbranch (machine_mode mode
, rtx operands
[])
15860 rtx condition_rtx
= rs6000_generate_compare (operands
[0], mode
);
15861 rtx loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
15862 rtx ite
= gen_rtx_IF_THEN_ELSE (VOIDmode
, condition_rtx
, loc_ref
, pc_rtx
);
15863 emit_jump_insn (gen_rtx_SET (pc_rtx
, ite
));
15866 /* Return the string to output a conditional branch to LABEL, which is
15867 the operand template of the label, or NULL if the branch is really a
15868 conditional return.
15870 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15871 condition code register and its mode specifies what kind of
15872 comparison we made.
15874 REVERSED is nonzero if we should reverse the sense of the comparison.
15876 INSN is the insn. */
15879 output_cbranch (rtx op
, const char *label
, int reversed
, rtx_insn
*insn
)
15881 static char string
[64];
15882 enum rtx_code code
= GET_CODE (op
);
15883 rtx cc_reg
= XEXP (op
, 0);
15884 machine_mode mode
= GET_MODE (cc_reg
);
15885 int cc_regno
= REGNO (cc_reg
) - CR0_REGNO
;
15886 int need_longbranch
= label
!= NULL
&& get_attr_length (insn
) == 8;
15887 int really_reversed
= reversed
^ need_longbranch
;
15893 validate_condition_mode (code
, mode
);
15895 /* Work out which way this really branches. We could use
15896 reverse_condition_maybe_unordered here always but this
15897 makes the resulting assembler clearer. */
15898 if (really_reversed
)
15900 /* Reversal of FP compares takes care -- an ordered compare
15901 becomes an unordered compare and vice versa. */
15902 if (mode
== CCFPmode
)
15903 code
= reverse_condition_maybe_unordered (code
);
15905 code
= reverse_condition (code
);
15910 /* Not all of these are actually distinct opcodes, but
15911 we distinguish them for clarity of the resulting assembler. */
15912 case NE
: case LTGT
:
15913 ccode
= "ne"; break;
15914 case EQ
: case UNEQ
:
15915 ccode
= "eq"; break;
15917 ccode
= "ge"; break;
15918 case GT
: case GTU
: case UNGT
:
15919 ccode
= "gt"; break;
15921 ccode
= "le"; break;
15922 case LT
: case LTU
: case UNLT
:
15923 ccode
= "lt"; break;
15924 case UNORDERED
: ccode
= "un"; break;
15925 case ORDERED
: ccode
= "nu"; break;
15926 case UNGE
: ccode
= "nl"; break;
15927 case UNLE
: ccode
= "ng"; break;
15929 gcc_unreachable ();
15932 /* Maybe we have a guess as to how likely the branch is. */
15934 note
= find_reg_note (insn
, REG_BR_PROB
, NULL_RTX
);
15935 if (note
!= NULL_RTX
)
15937 /* PROB is the difference from 50%. */
15938 int prob
= profile_probability::from_reg_br_prob_note (XINT (note
, 0))
15939 .to_reg_br_prob_base () - REG_BR_PROB_BASE
/ 2;
15941 /* Only hint for highly probable/improbable branches on newer cpus when
15942 we have real profile data, as static prediction overrides processor
15943 dynamic prediction. For older cpus we may as well always hint, but
15944 assume not taken for branches that are very close to 50% as a
15945 mispredicted taken branch is more expensive than a
15946 mispredicted not-taken branch. */
15947 if (rs6000_always_hint
15948 || (abs (prob
) > REG_BR_PROB_BASE
/ 100 * 48
15949 && (profile_status_for_fn (cfun
) != PROFILE_GUESSED
)
15950 && br_prob_note_reliable_p (note
)))
15952 if (abs (prob
) > REG_BR_PROB_BASE
/ 20
15953 && ((prob
> 0) ^ need_longbranch
))
15961 s
+= sprintf (s
, "b%slr%s ", ccode
, pred
);
15963 s
+= sprintf (s
, "b%s%s ", ccode
, pred
);
15965 /* We need to escape any '%' characters in the reg_names string.
15966 Assume they'd only be the first character.... */
15967 if (reg_names
[cc_regno
+ CR0_REGNO
][0] == '%')
15969 s
+= sprintf (s
, "%s", reg_names
[cc_regno
+ CR0_REGNO
]);
15973 /* If the branch distance was too far, we may have to use an
15974 unconditional branch to go the distance. */
15975 if (need_longbranch
)
15976 s
+= sprintf (s
, ",$+8\n\tb %s", label
);
15978 s
+= sprintf (s
, ",%s", label
);
15984 /* Return insn for VSX or Altivec comparisons. */
15987 rs6000_emit_vector_compare_inner (enum rtx_code code
, rtx op0
, rtx op1
)
15990 machine_mode mode
= GET_MODE (op0
);
15998 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16009 mask
= gen_reg_rtx (mode
);
16010 emit_insn (gen_rtx_SET (mask
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
16017 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
16018 DMODE is expected destination mode. This is a recursive function. */
16021 rs6000_emit_vector_compare (enum rtx_code rcode
,
16023 machine_mode dmode
)
16026 bool swap_operands
= false;
16027 bool try_again
= false;
16029 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode
));
16030 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
16032 /* See if the comparison works as is. */
16033 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
16041 swap_operands
= true;
16046 swap_operands
= true;
16054 /* Invert condition and try again.
16055 e.g., A != B becomes ~(A==B). */
16057 enum rtx_code rev_code
;
16058 enum insn_code nor_code
;
16061 rev_code
= reverse_condition_maybe_unordered (rcode
);
16062 if (rev_code
== UNKNOWN
)
16065 nor_code
= optab_handler (one_cmpl_optab
, dmode
);
16066 if (nor_code
== CODE_FOR_nothing
)
16069 mask2
= rs6000_emit_vector_compare (rev_code
, op0
, op1
, dmode
);
16073 mask
= gen_reg_rtx (dmode
);
16074 emit_insn (GEN_FCN (nor_code
) (mask
, mask2
));
16082 /* Try GT/GTU/LT/LTU OR EQ */
16085 enum insn_code ior_code
;
16086 enum rtx_code new_code
;
16107 gcc_unreachable ();
16110 ior_code
= optab_handler (ior_optab
, dmode
);
16111 if (ior_code
== CODE_FOR_nothing
)
16114 c_rtx
= rs6000_emit_vector_compare (new_code
, op0
, op1
, dmode
);
16118 eq_rtx
= rs6000_emit_vector_compare (EQ
, op0
, op1
, dmode
);
16122 mask
= gen_reg_rtx (dmode
);
16123 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
16134 std::swap (op0
, op1
);
16136 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
16141 /* You only get two chances. */
16145 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
16146 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
16147 operands for the relation operation COND. */
16150 rs6000_emit_vector_cond_expr (rtx dest
, rtx op_true
, rtx op_false
,
16151 rtx cond
, rtx cc_op0
, rtx cc_op1
)
16153 machine_mode dest_mode
= GET_MODE (dest
);
16154 machine_mode mask_mode
= GET_MODE (cc_op0
);
16155 enum rtx_code rcode
= GET_CODE (cond
);
16157 bool invert_move
= false;
16159 if (VECTOR_UNIT_NONE_P (dest_mode
))
16162 gcc_assert (GET_MODE_SIZE (dest_mode
) == GET_MODE_SIZE (mask_mode
)
16163 && GET_MODE_NUNITS (dest_mode
) == GET_MODE_NUNITS (mask_mode
));
16167 /* Swap operands if we can, and fall back to doing the operation as
16168 specified, and doing a NOR to invert the test. */
16174 /* Invert condition and try again.
16175 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
16176 invert_move
= true;
16177 rcode
= reverse_condition_maybe_unordered (rcode
);
16178 if (rcode
== UNKNOWN
)
16184 if (GET_MODE_CLASS (mask_mode
) == MODE_VECTOR_INT
)
16186 /* Invert condition to avoid compound test. */
16187 invert_move
= true;
16188 rcode
= reverse_condition (rcode
);
16197 /* Invert condition to avoid compound test if necessary. */
16198 if (rcode
== GEU
|| rcode
== LEU
)
16200 invert_move
= true;
16201 rcode
= reverse_condition (rcode
);
16209 /* Get the vector mask for the given relational operations. */
16210 mask
= rs6000_emit_vector_compare (rcode
, cc_op0
, cc_op1
, mask_mode
);
16215 if (mask_mode
!= dest_mode
)
16216 mask
= simplify_gen_subreg (dest_mode
, mask
, mask_mode
, 0);
16219 std::swap (op_true
, op_false
);
16221 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
16222 if (GET_MODE_CLASS (dest_mode
) == MODE_VECTOR_INT
16223 && (GET_CODE (op_true
) == CONST_VECTOR
16224 || GET_CODE (op_false
) == CONST_VECTOR
))
16226 rtx constant_0
= CONST0_RTX (dest_mode
);
16227 rtx constant_m1
= CONSTM1_RTX (dest_mode
);
16229 if (op_true
== constant_m1
&& op_false
== constant_0
)
16231 emit_move_insn (dest
, mask
);
16235 else if (op_true
== constant_0
&& op_false
== constant_m1
)
16237 emit_insn (gen_rtx_SET (dest
, gen_rtx_NOT (dest_mode
, mask
)));
16241 /* If we can't use the vector comparison directly, perhaps we can use
16242 the mask for the true or false fields, instead of loading up a
16244 if (op_true
== constant_m1
)
16247 if (op_false
== constant_0
)
16251 if (!REG_P (op_true
) && !SUBREG_P (op_true
))
16252 op_true
= force_reg (dest_mode
, op_true
);
16254 if (!REG_P (op_false
) && !SUBREG_P (op_false
))
16255 op_false
= force_reg (dest_mode
, op_false
);
16257 rtx tmp
= gen_rtx_IOR (dest_mode
,
16258 gen_rtx_AND (dest_mode
, gen_rtx_NOT (dest_mode
, mask
),
16260 gen_rtx_AND (dest_mode
, mask
, op_true
));
16261 emit_insn (gen_rtx_SET (dest
, tmp
));
16265 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
16266 maximum or minimum with "C" semantics.
16268 Unless you use -ffast-math, you can't use these instructions to replace
16269 conditions that implicitly reverse the condition because the comparison
16270 might generate a NaN or signed zer0.
16272 I.e. the following can be replaced all of the time
16273 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
16274 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
16275 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
16276 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
16278 The following can be replaced only if -ffast-math is used:
16279 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
16280 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
16281 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
16282 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
16284 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16285 nonzero/true, FALSE_COND if it is zero/false.
16287 Return false if we can't generate the appropriate minimum or maximum, and
16288 true if we can did the minimum or maximum. */
16291 rs6000_maybe_emit_maxc_minc (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16293 enum rtx_code code
= GET_CODE (op
);
16294 rtx op0
= XEXP (op
, 0);
16295 rtx op1
= XEXP (op
, 1);
16296 machine_mode compare_mode
= GET_MODE (op0
);
16297 machine_mode result_mode
= GET_MODE (dest
);
16299 if (result_mode
!= compare_mode
)
16302 /* See the comments of this function, it simply expects GE/GT/LE/LT in
16303 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
16304 we need to do the reversions first to make the following checks
16305 support fewer cases, like:
16307 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
16308 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
16309 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
16310 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
16312 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
16313 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
16314 have to check for fast-math or the like. */
16315 if (code
== UNGE
|| code
== UNGT
|| code
== UNLE
|| code
== UNLT
)
16317 code
= reverse_condition_maybe_unordered (code
);
16318 std::swap (true_cond
, false_cond
);
16322 if (code
== GE
|| code
== GT
)
16324 else if (code
== LE
|| code
== LT
)
16329 if (rtx_equal_p (op0
, true_cond
) && rtx_equal_p (op1
, false_cond
))
16332 /* Only when NaNs and signed-zeros are not in effect, smax could be
16333 used for `op0 < op1 ? op1 : op0`, and smin could be used for
16334 `op0 > op1 ? op1 : op0`. */
16335 else if (rtx_equal_p (op1
, true_cond
) && rtx_equal_p (op0
, false_cond
)
16336 && !HONOR_NANS (compare_mode
) && !HONOR_SIGNED_ZEROS (compare_mode
))
16342 rs6000_emit_minmax (dest
, max_p
? SMAX
: SMIN
, op0
, op1
);
16346 /* Possibly emit a floating point conditional move by generating a compare that
16347 sets a mask instruction and a XXSEL select instruction.
16349 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16350 nonzero/true, FALSE_COND if it is zero/false.
16352 Return false if the operation cannot be generated, and true if we could
16353 generate the instruction. */
16356 rs6000_maybe_emit_fp_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16358 enum rtx_code code
= GET_CODE (op
);
16359 rtx op0
= XEXP (op
, 0);
16360 rtx op1
= XEXP (op
, 1);
16361 machine_mode compare_mode
= GET_MODE (op0
);
16362 machine_mode result_mode
= GET_MODE (dest
);
16367 if (!can_create_pseudo_p ())
16370 /* We allow the comparison to be either SFmode/DFmode and the true/false
16371 condition to be either SFmode/DFmode. I.e. we allow:
16376 r = (a == b) ? c : d;
16383 r = (a == b) ? c : d;
16385 but we don't allow intermixing the IEEE 128-bit floating point types with
16386 the 32/64-bit scalar types. */
16388 if (!(compare_mode
== result_mode
16389 || (compare_mode
== SFmode
&& result_mode
== DFmode
)
16390 || (compare_mode
== DFmode
&& result_mode
== SFmode
)))
16403 code
= swap_condition (code
);
16404 std::swap (op0
, op1
);
16411 /* Generate: [(parallel [(set (dest)
16412 (if_then_else (op (cmp1) (cmp2))
16415 (clobber (scratch))])]. */
16417 compare_rtx
= gen_rtx_fmt_ee (code
, CCFPmode
, op0
, op1
);
16418 cmove_rtx
= gen_rtx_SET (dest
,
16419 gen_rtx_IF_THEN_ELSE (result_mode
,
16424 clobber_rtx
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (V2DImode
));
16425 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
16426 gen_rtvec (2, cmove_rtx
, clobber_rtx
)));
16431 /* Helper function to return true if the target has instructions to do a
16432 compare and set mask instruction that can be used with XXSEL to implement a
16433 conditional move. It is also assumed that such a target also supports the
16434 "C" minimum and maximum instructions. */
16437 have_compare_and_set_mask (machine_mode mode
)
16443 return TARGET_P9_MINMAX
;
16447 return TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
);
16456 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16457 operands of the last comparison is nonzero/true, FALSE_COND if it
16458 is zero/false. Return 0 if the hardware has no such operation. */
16461 rs6000_emit_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16463 enum rtx_code code
= GET_CODE (op
);
16464 rtx op0
= XEXP (op
, 0);
16465 rtx op1
= XEXP (op
, 1);
16466 machine_mode compare_mode
= GET_MODE (op0
);
16467 machine_mode result_mode
= GET_MODE (dest
);
16469 bool is_against_zero
;
16471 /* These modes should always match. */
16472 if (GET_MODE (op1
) != compare_mode
16473 /* In the isel case however, we can use a compare immediate, so
16474 op1 may be a small constant. */
16475 && (!TARGET_ISEL
|| !short_cint_operand (op1
, VOIDmode
)))
16477 if (GET_MODE (true_cond
) != result_mode
)
16479 if (GET_MODE (false_cond
) != result_mode
)
16482 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16484 if (have_compare_and_set_mask (compare_mode
)
16485 && have_compare_and_set_mask (result_mode
))
16487 if (rs6000_maybe_emit_maxc_minc (dest
, op
, true_cond
, false_cond
))
16490 if (rs6000_maybe_emit_fp_cmove (dest
, op
, true_cond
, false_cond
))
16494 /* Don't allow using floating point comparisons for integer results for
16496 if (FLOAT_MODE_P (compare_mode
) && !FLOAT_MODE_P (result_mode
))
16499 /* First, work out if the hardware can do this at all, or
16500 if it's too slow.... */
16501 if (!FLOAT_MODE_P (compare_mode
))
16504 return rs6000_emit_int_cmove (dest
, op
, true_cond
, false_cond
);
16508 is_against_zero
= op1
== CONST0_RTX (compare_mode
);
16510 /* A floating-point subtract might overflow, underflow, or produce
16511 an inexact result, thus changing the floating-point flags, so it
16512 can't be generated if we care about that. It's safe if one side
16513 of the construct is zero, since then no subtract will be
16515 if (SCALAR_FLOAT_MODE_P (compare_mode
)
16516 && flag_trapping_math
&& ! is_against_zero
)
16519 /* Eliminate half of the comparisons by switching operands, this
16520 makes the remaining code simpler. */
16521 if (code
== UNLT
|| code
== UNGT
|| code
== UNORDERED
|| code
== NE
16522 || code
== LTGT
|| code
== LT
|| code
== UNLE
)
16524 code
= reverse_condition_maybe_unordered (code
);
16526 true_cond
= false_cond
;
16530 /* UNEQ and LTGT take four instructions for a comparison with zero,
16531 it'll probably be faster to use a branch here too. */
16532 if (code
== UNEQ
&& HONOR_NANS (compare_mode
))
16535 /* We're going to try to implement comparisons by performing
16536 a subtract, then comparing against zero. Unfortunately,
16537 Inf - Inf is NaN which is not zero, and so if we don't
16538 know that the operand is finite and the comparison
16539 would treat EQ different to UNORDERED, we can't do it. */
16540 if (HONOR_INFINITIES (compare_mode
)
16541 && code
!= GT
&& code
!= UNGE
16542 && (!CONST_DOUBLE_P (op1
)
16543 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1
)))
16544 /* Constructs of the form (a OP b ? a : b) are safe. */
16545 && ((! rtx_equal_p (op0
, false_cond
) && ! rtx_equal_p (op1
, false_cond
))
16546 || (! rtx_equal_p (op0
, true_cond
)
16547 && ! rtx_equal_p (op1
, true_cond
))))
16550 /* At this point we know we can use fsel. */
16552 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16553 is no fsel instruction. */
16554 if (compare_mode
!= SFmode
&& compare_mode
!= DFmode
)
16557 /* Reduce the comparison to a comparison against zero. */
16558 if (! is_against_zero
)
16560 temp
= gen_reg_rtx (compare_mode
);
16561 emit_insn (gen_rtx_SET (temp
, gen_rtx_MINUS (compare_mode
, op0
, op1
)));
16563 op1
= CONST0_RTX (compare_mode
);
16566 /* If we don't care about NaNs we can reduce some of the comparisons
16567 down to faster ones. */
16568 if (! HONOR_NANS (compare_mode
))
16574 true_cond
= false_cond
;
16587 /* Now, reduce everything down to a GE. */
16594 temp
= gen_reg_rtx (compare_mode
);
16595 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16600 temp
= gen_reg_rtx (compare_mode
);
16601 emit_insn (gen_rtx_SET (temp
, gen_rtx_ABS (compare_mode
, op0
)));
16606 temp
= gen_reg_rtx (compare_mode
);
16607 emit_insn (gen_rtx_SET (temp
,
16608 gen_rtx_NEG (compare_mode
,
16609 gen_rtx_ABS (compare_mode
, op0
))));
16614 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16615 temp
= gen_reg_rtx (result_mode
);
16616 emit_insn (gen_rtx_SET (temp
,
16617 gen_rtx_IF_THEN_ELSE (result_mode
,
16618 gen_rtx_GE (VOIDmode
,
16620 true_cond
, false_cond
)));
16621 false_cond
= true_cond
;
16624 temp
= gen_reg_rtx (compare_mode
);
16625 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16630 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16631 temp
= gen_reg_rtx (result_mode
);
16632 emit_insn (gen_rtx_SET (temp
,
16633 gen_rtx_IF_THEN_ELSE (result_mode
,
16634 gen_rtx_GE (VOIDmode
,
16636 true_cond
, false_cond
)));
16637 true_cond
= false_cond
;
16640 temp
= gen_reg_rtx (compare_mode
);
16641 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16646 gcc_unreachable ();
16649 emit_insn (gen_rtx_SET (dest
,
16650 gen_rtx_IF_THEN_ELSE (result_mode
,
16651 gen_rtx_GE (VOIDmode
,
16653 true_cond
, false_cond
)));
16657 /* Same as above, but for ints (isel). */
16660 rs6000_emit_int_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16662 rtx condition_rtx
, cr
;
16663 machine_mode mode
= GET_MODE (dest
);
16664 enum rtx_code cond_code
;
16665 rtx (*isel_func
) (rtx
, rtx
, rtx
, rtx
, rtx
);
16668 if (mode
!= SImode
&& (!TARGET_POWERPC64
|| mode
!= DImode
))
16671 /* PR104335: We now need to expect CC-mode "comparisons"
16672 coming from ifcvt. The following code expects proper
16673 comparisons so better abort here. */
16674 if (GET_MODE_CLASS (GET_MODE (XEXP (op
, 0))) == MODE_CC
)
16677 /* We still have to do the compare, because isel doesn't do a
16678 compare, it just looks at the CRx bits set by a previous compare
16680 condition_rtx
= rs6000_generate_compare (op
, mode
);
16681 cond_code
= GET_CODE (condition_rtx
);
16682 cr
= XEXP (condition_rtx
, 0);
16683 signedp
= GET_MODE (cr
) == CCmode
;
16685 isel_func
= (mode
== SImode
16686 ? (signedp
? gen_isel_cc_si
: gen_isel_ccuns_si
)
16687 : (signedp
? gen_isel_cc_di
: gen_isel_ccuns_di
));
16691 case LT
: case GT
: case LTU
: case GTU
: case EQ
:
16692 /* isel handles these directly. */
16696 /* We need to swap the sense of the comparison. */
16698 std::swap (false_cond
, true_cond
);
16699 PUT_CODE (condition_rtx
, reverse_condition (cond_code
));
16704 false_cond
= force_reg (mode
, false_cond
);
16705 if (true_cond
!= const0_rtx
)
16706 true_cond
= force_reg (mode
, true_cond
);
16708 emit_insn (isel_func (dest
, condition_rtx
, true_cond
, false_cond
, cr
));
16714 rs6000_emit_minmax (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
16716 machine_mode mode
= GET_MODE (op0
);
16720 /* VSX/altivec have direct min/max insns. */
16721 if ((code
== SMAX
|| code
== SMIN
)
16722 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode
)
16723 || (mode
== SFmode
&& VECTOR_UNIT_VSX_P (DFmode
))
16724 || (TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
))))
16726 emit_insn (gen_rtx_SET (dest
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
16730 if (code
== SMAX
|| code
== SMIN
)
16735 if (code
== SMAX
|| code
== UMAX
)
16736 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16737 op0
, op1
, mode
, 0);
16739 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16740 op1
, op0
, mode
, 0);
16741 gcc_assert (target
);
16742 if (target
!= dest
)
16743 emit_move_insn (dest
, target
);
16746 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16747 COND is true. Mark the jump as unlikely to be taken. */
16750 emit_unlikely_jump (rtx cond
, rtx label
)
16752 rtx x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, label
, pc_rtx
);
16753 rtx_insn
*insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, x
));
16754 add_reg_br_prob_note (insn
, profile_probability::very_unlikely ());
16757 /* A subroutine of the atomic operation splitters. Emit a load-locked
16758 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16759 the zero_extend operation. */
16762 emit_load_locked (machine_mode mode
, rtx reg
, rtx mem
)
16764 rtx (*fn
) (rtx
, rtx
) = NULL
;
16769 fn
= gen_load_lockedqi
;
16772 fn
= gen_load_lockedhi
;
16775 if (GET_MODE (mem
) == QImode
)
16776 fn
= gen_load_lockedqi_si
;
16777 else if (GET_MODE (mem
) == HImode
)
16778 fn
= gen_load_lockedhi_si
;
16780 fn
= gen_load_lockedsi
;
16783 fn
= gen_load_lockeddi
;
16786 fn
= gen_load_lockedti
;
16789 gcc_unreachable ();
16791 emit_insn (fn (reg
, mem
));
16794 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16795 instruction in MODE. */
16798 emit_store_conditional (machine_mode mode
, rtx res
, rtx mem
, rtx val
)
16800 rtx (*fn
) (rtx
, rtx
, rtx
) = NULL
;
16805 fn
= gen_store_conditionalqi
;
16808 fn
= gen_store_conditionalhi
;
16811 fn
= gen_store_conditionalsi
;
16814 fn
= gen_store_conditionaldi
;
16817 fn
= gen_store_conditionalti
;
16820 gcc_unreachable ();
16823 /* Emit sync before stwcx. to address PPC405 Erratum. */
16824 if (PPC405_ERRATUM77
)
16825 emit_insn (gen_hwsync ());
16827 emit_insn (fn (res
, mem
, val
));
16830 /* Expand barriers before and after a load_locked/store_cond sequence. */
16833 rs6000_pre_atomic_barrier (rtx mem
, enum memmodel model
)
16835 rtx addr
= XEXP (mem
, 0);
16837 if (!legitimate_indirect_address_p (addr
, reload_completed
)
16838 && !legitimate_indexed_address_p (addr
, reload_completed
))
16840 addr
= force_reg (Pmode
, addr
);
16841 mem
= replace_equiv_address_nv (mem
, addr
);
16846 case MEMMODEL_RELAXED
:
16847 case MEMMODEL_CONSUME
:
16848 case MEMMODEL_ACQUIRE
:
16850 case MEMMODEL_RELEASE
:
16851 case MEMMODEL_ACQ_REL
:
16852 emit_insn (gen_lwsync ());
16854 case MEMMODEL_SEQ_CST
:
16855 emit_insn (gen_hwsync ());
16858 gcc_unreachable ();
16864 rs6000_post_atomic_barrier (enum memmodel model
)
16868 case MEMMODEL_RELAXED
:
16869 case MEMMODEL_CONSUME
:
16870 case MEMMODEL_RELEASE
:
16872 case MEMMODEL_ACQUIRE
:
16873 case MEMMODEL_ACQ_REL
:
16874 case MEMMODEL_SEQ_CST
:
16875 emit_insn (gen_isync ());
16878 gcc_unreachable ();
16882 /* A subroutine of the various atomic expanders. For sub-word operations,
16883 we must adjust things to operate on SImode. Given the original MEM,
16884 return a new aligned memory. Also build and return the quantities by
16885 which to shift and mask. */
16888 rs6000_adjust_atomic_subword (rtx orig_mem
, rtx
*pshift
, rtx
*pmask
)
16890 rtx addr
, align
, shift
, mask
, mem
;
16891 HOST_WIDE_INT shift_mask
;
16892 machine_mode mode
= GET_MODE (orig_mem
);
16894 /* For smaller modes, we have to implement this via SImode. */
16895 shift_mask
= (mode
== QImode
? 0x18 : 0x10);
16897 addr
= XEXP (orig_mem
, 0);
16898 addr
= force_reg (GET_MODE (addr
), addr
);
16900 /* Aligned memory containing subword. Generate a new memory. We
16901 do not want any of the existing MEM_ATTR data, as we're now
16902 accessing memory outside the original object. */
16903 align
= expand_simple_binop (Pmode
, AND
, addr
, GEN_INT (-4),
16904 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16905 mem
= gen_rtx_MEM (SImode
, align
);
16906 MEM_VOLATILE_P (mem
) = MEM_VOLATILE_P (orig_mem
);
16907 if (MEM_ALIAS_SET (orig_mem
) == ALIAS_SET_MEMORY_BARRIER
)
16908 set_mem_alias_set (mem
, ALIAS_SET_MEMORY_BARRIER
);
16910 /* Shift amount for subword relative to aligned word. */
16911 shift
= gen_reg_rtx (SImode
);
16912 addr
= gen_lowpart (SImode
, addr
);
16913 rtx tmp
= gen_reg_rtx (SImode
);
16914 emit_insn (gen_ashlsi3 (tmp
, addr
, GEN_INT (3)));
16915 emit_insn (gen_andsi3 (shift
, tmp
, GEN_INT (shift_mask
)));
16916 if (BYTES_BIG_ENDIAN
)
16917 shift
= expand_simple_binop (SImode
, XOR
, shift
, GEN_INT (shift_mask
),
16918 shift
, 1, OPTAB_LIB_WIDEN
);
16921 /* Mask for insertion. */
16922 mask
= expand_simple_binop (SImode
, ASHIFT
, GEN_INT (GET_MODE_MASK (mode
)),
16923 shift
, NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16929 /* A subroutine of the various atomic expanders. For sub-word operands,
16930 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16933 rs6000_mask_atomic_subword (rtx oldval
, rtx newval
, rtx mask
)
16937 x
= gen_reg_rtx (SImode
);
16938 emit_insn (gen_rtx_SET (x
, gen_rtx_AND (SImode
,
16939 gen_rtx_NOT (SImode
, mask
),
16942 x
= expand_simple_binop (SImode
, IOR
, newval
, x
, x
, 1, OPTAB_LIB_WIDEN
);
16947 /* A subroutine of the various atomic expanders. For sub-word operands,
16948 extract WIDE to NARROW via SHIFT. */
16951 rs6000_finish_atomic_subword (rtx narrow
, rtx wide
, rtx shift
)
16953 wide
= expand_simple_binop (SImode
, LSHIFTRT
, wide
, shift
,
16954 wide
, 1, OPTAB_LIB_WIDEN
);
16955 emit_move_insn (narrow
, gen_lowpart (GET_MODE (narrow
), wide
));
16958 /* Expand an atomic compare and swap operation. */
16961 rs6000_expand_atomic_compare_and_swap (rtx operands
[])
16963 rtx boolval
, retval
, mem
, oldval
, newval
, cond
;
16964 rtx label1
, label2
, x
, mask
, shift
;
16965 machine_mode mode
, orig_mode
;
16966 enum memmodel mod_s
, mod_f
;
16969 boolval
= operands
[0];
16970 retval
= operands
[1];
16972 oldval
= operands
[3];
16973 newval
= operands
[4];
16974 is_weak
= (INTVAL (operands
[5]) != 0);
16975 mod_s
= memmodel_base (INTVAL (operands
[6]));
16976 mod_f
= memmodel_base (INTVAL (operands
[7]));
16977 orig_mode
= mode
= GET_MODE (mem
);
16979 mask
= shift
= NULL_RTX
;
16980 if (mode
== QImode
|| mode
== HImode
)
16982 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16983 lwarx and shift/mask operations. With power8, we need to do the
16984 comparison in SImode, but the store is still done in QI/HImode. */
16985 oldval
= convert_modes (SImode
, mode
, oldval
, 1);
16987 if (!TARGET_SYNC_HI_QI
)
16989 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16991 /* Shift and mask OLDVAL into position with the word. */
16992 oldval
= expand_simple_binop (SImode
, ASHIFT
, oldval
, shift
,
16993 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16995 /* Shift and mask NEWVAL into position within the word. */
16996 newval
= convert_modes (SImode
, mode
, newval
, 1);
16997 newval
= expand_simple_binop (SImode
, ASHIFT
, newval
, shift
,
16998 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17001 /* Prepare to adjust the return value. */
17002 retval
= gen_reg_rtx (SImode
);
17005 else if (reg_overlap_mentioned_p (retval
, oldval
))
17006 oldval
= copy_to_reg (oldval
);
17008 if (mode
!= TImode
&& !reg_or_short_operand (oldval
, mode
))
17009 oldval
= copy_to_mode_reg (mode
, oldval
);
17011 if (reg_overlap_mentioned_p (retval
, newval
))
17012 newval
= copy_to_reg (newval
);
17014 mem
= rs6000_pre_atomic_barrier (mem
, mod_s
);
17019 label1
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
17020 emit_label (XEXP (label1
, 0));
17022 label2
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
17024 emit_load_locked (mode
, retval
, mem
);
17028 x
= expand_simple_binop (SImode
, AND
, retval
, mask
,
17029 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17031 cond
= gen_reg_rtx (CCmode
);
17032 /* If we have TImode, synthesize a comparison. */
17033 if (mode
!= TImode
)
17034 x
= gen_rtx_COMPARE (CCmode
, x
, oldval
);
17037 rtx xor1_result
= gen_reg_rtx (DImode
);
17038 rtx xor2_result
= gen_reg_rtx (DImode
);
17039 rtx or_result
= gen_reg_rtx (DImode
);
17040 rtx new_word0
= simplify_gen_subreg (DImode
, x
, TImode
, 0);
17041 rtx new_word1
= simplify_gen_subreg (DImode
, x
, TImode
, 8);
17042 rtx old_word0
= simplify_gen_subreg (DImode
, oldval
, TImode
, 0);
17043 rtx old_word1
= simplify_gen_subreg (DImode
, oldval
, TImode
, 8);
17045 emit_insn (gen_xordi3 (xor1_result
, new_word0
, old_word0
));
17046 emit_insn (gen_xordi3 (xor2_result
, new_word1
, old_word1
));
17047 emit_insn (gen_iordi3 (or_result
, xor1_result
, xor2_result
));
17048 x
= gen_rtx_COMPARE (CCmode
, or_result
, const0_rtx
);
17051 emit_insn (gen_rtx_SET (cond
, x
));
17053 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
17054 emit_unlikely_jump (x
, label2
);
17058 x
= rs6000_mask_atomic_subword (retval
, newval
, mask
);
17060 emit_store_conditional (orig_mode
, cond
, mem
, x
);
17064 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
17065 emit_unlikely_jump (x
, label1
);
17068 if (!is_mm_relaxed (mod_f
))
17069 emit_label (XEXP (label2
, 0));
17071 rs6000_post_atomic_barrier (mod_s
);
17073 if (is_mm_relaxed (mod_f
))
17074 emit_label (XEXP (label2
, 0));
17077 rs6000_finish_atomic_subword (operands
[1], retval
, shift
);
17078 else if (mode
!= GET_MODE (operands
[1]))
17079 convert_move (operands
[1], retval
, 1);
17081 /* In all cases, CR0 contains EQ on success, and NE on failure. */
17082 x
= gen_rtx_EQ (SImode
, cond
, const0_rtx
);
17083 emit_insn (gen_rtx_SET (boolval
, x
));
17086 /* Expand an atomic exchange operation. */
17089 rs6000_expand_atomic_exchange (rtx operands
[])
17091 rtx retval
, mem
, val
, cond
;
17093 enum memmodel model
;
17094 rtx label
, x
, mask
, shift
;
17096 retval
= operands
[0];
17099 model
= memmodel_base (INTVAL (operands
[3]));
17100 mode
= GET_MODE (mem
);
17102 mask
= shift
= NULL_RTX
;
17103 if (!TARGET_SYNC_HI_QI
&& (mode
== QImode
|| mode
== HImode
))
17105 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
17107 /* Shift and mask VAL into position with the word. */
17108 val
= convert_modes (SImode
, mode
, val
, 1);
17109 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
17110 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17112 /* Prepare to adjust the return value. */
17113 retval
= gen_reg_rtx (SImode
);
17117 mem
= rs6000_pre_atomic_barrier (mem
, model
);
17119 label
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
17120 emit_label (XEXP (label
, 0));
17122 emit_load_locked (mode
, retval
, mem
);
17126 x
= rs6000_mask_atomic_subword (retval
, val
, mask
);
17128 cond
= gen_reg_rtx (CCmode
);
17129 emit_store_conditional (mode
, cond
, mem
, x
);
17131 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
17132 emit_unlikely_jump (x
, label
);
17134 rs6000_post_atomic_barrier (model
);
17137 rs6000_finish_atomic_subword (operands
[0], retval
, shift
);
17140 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
17141 to perform. MEM is the memory on which to operate. VAL is the second
17142 operand of the binary operator. BEFORE and AFTER are optional locations to
17143 return the value of MEM either before of after the operation. MODEL_RTX
17144 is a CONST_INT containing the memory model to use. */
17147 rs6000_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
17148 rtx orig_before
, rtx orig_after
, rtx model_rtx
)
17150 enum memmodel model
= memmodel_base (INTVAL (model_rtx
));
17151 machine_mode mode
= GET_MODE (mem
);
17152 machine_mode store_mode
= mode
;
17153 rtx label
, x
, cond
, mask
, shift
;
17154 rtx before
= orig_before
, after
= orig_after
;
17156 mask
= shift
= NULL_RTX
;
17157 /* On power8, we want to use SImode for the operation. On previous systems,
17158 use the operation in a subword and shift/mask to get the proper byte or
17160 if (mode
== QImode
|| mode
== HImode
)
17162 if (TARGET_SYNC_HI_QI
)
17164 val
= convert_modes (SImode
, mode
, val
, 1);
17166 /* Prepare to adjust the return value. */
17167 before
= gen_reg_rtx (SImode
);
17169 after
= gen_reg_rtx (SImode
);
17174 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
17176 /* Shift and mask VAL into position with the word. */
17177 val
= convert_modes (SImode
, mode
, val
, 1);
17178 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
17179 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17185 /* We've already zero-extended VAL. That is sufficient to
17186 make certain that it does not affect other bits. */
17191 /* If we make certain that all of the other bits in VAL are
17192 set, that will be sufficient to not affect other bits. */
17193 x
= gen_rtx_NOT (SImode
, mask
);
17194 x
= gen_rtx_IOR (SImode
, x
, val
);
17195 emit_insn (gen_rtx_SET (val
, x
));
17202 /* These will all affect bits outside the field and need
17203 adjustment via MASK within the loop. */
17207 gcc_unreachable ();
17210 /* Prepare to adjust the return value. */
17211 before
= gen_reg_rtx (SImode
);
17213 after
= gen_reg_rtx (SImode
);
17214 store_mode
= mode
= SImode
;
17218 mem
= rs6000_pre_atomic_barrier (mem
, model
);
17220 label
= gen_label_rtx ();
17221 emit_label (label
);
17222 label
= gen_rtx_LABEL_REF (VOIDmode
, label
);
17224 if (before
== NULL_RTX
)
17225 before
= gen_reg_rtx (mode
);
17227 emit_load_locked (mode
, before
, mem
);
17231 x
= expand_simple_binop (mode
, AND
, before
, val
,
17232 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17233 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
17237 after
= expand_simple_binop (mode
, code
, before
, val
,
17238 after
, 1, OPTAB_LIB_WIDEN
);
17244 x
= expand_simple_binop (SImode
, AND
, after
, mask
,
17245 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17246 x
= rs6000_mask_atomic_subword (before
, x
, mask
);
17248 else if (store_mode
!= mode
)
17249 x
= convert_modes (store_mode
, mode
, x
, 1);
17251 cond
= gen_reg_rtx (CCmode
);
17252 emit_store_conditional (store_mode
, cond
, mem
, x
);
17254 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
17255 emit_unlikely_jump (x
, label
);
17257 rs6000_post_atomic_barrier (model
);
17261 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
17262 then do the calcuations in a SImode register. */
17264 rs6000_finish_atomic_subword (orig_before
, before
, shift
);
17266 rs6000_finish_atomic_subword (orig_after
, after
, shift
);
17268 else if (store_mode
!= mode
)
17270 /* QImode/HImode on machines with lbarx/lharx where we do the native
17271 operation and then do the calcuations in a SImode register. */
17273 convert_move (orig_before
, before
, 1);
17275 convert_move (orig_after
, after
, 1);
17277 else if (orig_after
&& after
!= orig_after
)
17278 emit_move_insn (orig_after
, after
);
17281 static GTY(()) alias_set_type TOC_alias_set
= -1;
17284 get_TOC_alias_set (void)
17286 if (TOC_alias_set
== -1)
17287 TOC_alias_set
= new_alias_set ();
17288 return TOC_alias_set
;
17291 /* The mode the ABI uses for a word. This is not the same as word_mode
17292 for -m32 -mpowerpc64. This is used to implement various target hooks. */
17294 static scalar_int_mode
17295 rs6000_abi_word_mode (void)
17297 return TARGET_32BIT
? SImode
: DImode
;
17300 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
17302 rs6000_offload_options (void)
17305 return xstrdup ("-foffload-abi=lp64");
17307 return xstrdup ("-foffload-abi=ilp32");
17311 /* A quick summary of the various types of 'constant-pool tables'
17314 Target Flags Name One table per
17315 AIX (none) AIX TOC object file
17316 AIX -mfull-toc AIX TOC object file
17317 AIX -mminimal-toc AIX minimal TOC translation unit
17318 SVR4/EABI (none) SVR4 SDATA object file
17319 SVR4/EABI -fpic SVR4 pic object file
17320 SVR4/EABI -fPIC SVR4 PIC translation unit
17321 SVR4/EABI -mrelocatable EABI TOC function
17322 SVR4/EABI -maix AIX TOC object file
17323 SVR4/EABI -maix -mminimal-toc
17324 AIX minimal TOC translation unit
17326 Name Reg. Set by entries contains:
17327 made by addrs? fp? sum?
17329 AIX TOC 2 crt0 as Y option option
17330 AIX minimal TOC 30 prolog gcc Y Y option
17331 SVR4 SDATA 13 crt0 gcc N Y N
17332 SVR4 pic 30 prolog ld Y not yet N
17333 SVR4 PIC 30 prolog gcc Y option option
17334 EABI TOC 30 prolog gcc Y option option
17338 /* Hash functions for the hash table. */
17341 rs6000_hash_constant (rtx k
)
17343 enum rtx_code code
= GET_CODE (k
);
17344 machine_mode mode
= GET_MODE (k
);
17345 unsigned result
= (code
<< 3) ^ mode
;
17346 const char *format
;
17349 format
= GET_RTX_FORMAT (code
);
17350 flen
= strlen (format
);
17356 return result
* 1231 + (unsigned) INSN_UID (XEXP (k
, 0));
17358 case CONST_WIDE_INT
:
17361 flen
= CONST_WIDE_INT_NUNITS (k
);
17362 for (i
= 0; i
< flen
; i
++)
17363 result
= result
* 613 + CONST_WIDE_INT_ELT (k
, i
);
17368 return real_hash (CONST_DOUBLE_REAL_VALUE (k
)) * result
;
17378 for (; fidx
< flen
; fidx
++)
17379 switch (format
[fidx
])
17384 const char *str
= XSTR (k
, fidx
);
17385 len
= strlen (str
);
17386 result
= result
* 613 + len
;
17387 for (i
= 0; i
< len
; i
++)
17388 result
= result
* 613 + (unsigned) str
[i
];
17393 result
= result
* 1231 + rs6000_hash_constant (XEXP (k
, fidx
));
17397 result
= result
* 613 + (unsigned) XINT (k
, fidx
);
17400 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT
))
17401 result
= result
* 613 + (unsigned) XWINT (k
, fidx
);
17405 for (i
= 0; i
< sizeof (HOST_WIDE_INT
) / sizeof (unsigned); i
++)
17406 result
= result
* 613 + (unsigned) (XWINT (k
, fidx
)
17413 gcc_unreachable ();
17420 toc_hasher::hash (toc_hash_struct
*thc
)
17422 return rs6000_hash_constant (thc
->key
) ^ thc
->key_mode
;
17425 /* Compare H1 and H2 for equivalence. */
17428 toc_hasher::equal (toc_hash_struct
*h1
, toc_hash_struct
*h2
)
17433 if (h1
->key_mode
!= h2
->key_mode
)
17436 return rtx_equal_p (r1
, r2
);
17439 /* These are the names given by the C++ front-end to vtables, and
17440 vtable-like objects. Ideally, this logic should not be here;
17441 instead, there should be some programmatic way of inquiring as
17442 to whether or not an object is a vtable. */
17444 #define VTABLE_NAME_P(NAME) \
17445 (startswith (name, "_vt.") \
17446 || startswith (name, "_ZTV") \
17447 || startswith (name, "_ZTT") \
17448 || startswith (name, "_ZTI") \
17449 || startswith (name, "_ZTC"))
17451 #ifdef NO_DOLLAR_IN_LABEL
17452 /* Return a GGC-allocated character string translating dollar signs in
17453 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17456 rs6000_xcoff_strip_dollar (const char *name
)
17462 q
= (const char *) strchr (name
, '$');
17464 if (q
== 0 || q
== name
)
17467 len
= strlen (name
);
17468 strip
= XALLOCAVEC (char, len
+ 1);
17469 strcpy (strip
, name
);
17470 p
= strip
+ (q
- name
);
17474 p
= strchr (p
+ 1, '$');
17477 return ggc_alloc_string (strip
, len
);
17482 rs6000_output_symbol_ref (FILE *file
, rtx x
)
17484 const char *name
= XSTR (x
, 0);
17486 /* Currently C++ toc references to vtables can be emitted before it
17487 is decided whether the vtable is public or private. If this is
17488 the case, then the linker will eventually complain that there is
17489 a reference to an unknown section. Thus, for vtables only,
17490 we emit the TOC reference to reference the identifier and not the
17492 if (VTABLE_NAME_P (name
))
17494 RS6000_OUTPUT_BASENAME (file
, name
);
17497 assemble_name (file
, name
);
17500 /* Output a TOC entry. We derive the entry name from what is being
17504 output_toc (FILE *file
, rtx x
, int labelno
, machine_mode mode
)
17507 const char *name
= buf
;
17509 HOST_WIDE_INT offset
= 0;
17511 gcc_assert (!TARGET_NO_TOC_OR_PCREL
);
17513 /* When the linker won't eliminate them, don't output duplicate
17514 TOC entries (this happens on AIX if there is any kind of TOC,
17515 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17517 if (TARGET_TOC
&& GET_CODE (x
) != LABEL_REF
)
17519 struct toc_hash_struct
*h
;
17521 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17522 time because GGC is not initialized at that point. */
17523 if (toc_hash_table
== NULL
)
17524 toc_hash_table
= hash_table
<toc_hasher
>::create_ggc (1021);
17526 h
= ggc_alloc
<toc_hash_struct
> ();
17528 h
->key_mode
= mode
;
17529 h
->labelno
= labelno
;
17531 toc_hash_struct
**found
= toc_hash_table
->find_slot (h
, INSERT
);
17532 if (*found
== NULL
)
17534 else /* This is indeed a duplicate.
17535 Set this label equal to that label. */
17537 fputs ("\t.set ", file
);
17538 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17539 fprintf (file
, "%d,", labelno
);
17540 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17541 fprintf (file
, "%d\n", ((*found
)->labelno
));
17544 if (TARGET_XCOFF
&& SYMBOL_REF_P (x
)
17545 && (SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_GLOBAL_DYNAMIC
17546 || SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
))
17548 fputs ("\t.set ", file
);
17549 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17550 fprintf (file
, "%d,", labelno
);
17551 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17552 fprintf (file
, "%d\n", ((*found
)->labelno
));
17559 /* If we're going to put a double constant in the TOC, make sure it's
17560 aligned properly when strict alignment is on. */
17561 if ((CONST_DOUBLE_P (x
) || CONST_WIDE_INT_P (x
))
17562 && STRICT_ALIGNMENT
17563 && GET_MODE_BITSIZE (mode
) >= 64
17564 && ! (TARGET_NO_FP_IN_TOC
&& ! TARGET_MINIMAL_TOC
)) {
17565 ASM_OUTPUT_ALIGN (file
, 3);
17568 (*targetm
.asm_out
.internal_label
) (file
, "LC", labelno
);
17570 /* Handle FP constants specially. Note that if we have a minimal
17571 TOC, things we put here aren't actually in the TOC, so we can allow
17573 if (CONST_DOUBLE_P (x
)
17574 && (GET_MODE (x
) == TFmode
|| GET_MODE (x
) == TDmode
17575 || GET_MODE (x
) == IFmode
|| GET_MODE (x
) == KFmode
))
17579 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17580 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17582 real_to_target (k
, CONST_DOUBLE_REAL_VALUE (x
), GET_MODE (x
));
17586 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17587 fputs (DOUBLE_INT_ASM_OP
, file
);
17589 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17590 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17591 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17592 fprintf (file
, "0x%lx%08lx,0x%lx%08lx\n",
17593 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17594 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff,
17595 k
[WORDS_BIG_ENDIAN
? 2 : 3] & 0xffffffff,
17596 k
[WORDS_BIG_ENDIAN
? 3 : 2] & 0xffffffff);
17601 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17602 fputs ("\t.long ", file
);
17604 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17605 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17606 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17607 fprintf (file
, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17608 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17609 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17613 else if (CONST_DOUBLE_P (x
)
17614 && (GET_MODE (x
) == DFmode
|| GET_MODE (x
) == DDmode
))
17618 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17619 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17621 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17625 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17626 fputs (DOUBLE_INT_ASM_OP
, file
);
17628 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17629 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17630 fprintf (file
, "0x%lx%08lx\n",
17631 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17632 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff);
17637 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17638 fputs ("\t.long ", file
);
17640 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17641 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17642 fprintf (file
, "0x%lx,0x%lx\n",
17643 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17647 else if (CONST_DOUBLE_P (x
)
17648 && (GET_MODE (x
) == SFmode
|| GET_MODE (x
) == SDmode
))
17652 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17653 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17655 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17659 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17660 fputs (DOUBLE_INT_ASM_OP
, file
);
17662 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17663 if (WORDS_BIG_ENDIAN
)
17664 fprintf (file
, "0x%lx00000000\n", l
& 0xffffffff);
17666 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17671 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17672 fputs ("\t.long ", file
);
17674 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17675 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17679 else if (GET_MODE (x
) == VOIDmode
&& CONST_INT_P (x
))
17681 unsigned HOST_WIDE_INT low
;
17682 HOST_WIDE_INT high
;
17684 low
= INTVAL (x
) & 0xffffffff;
17685 high
= (HOST_WIDE_INT
) INTVAL (x
) >> 32;
17687 /* TOC entries are always Pmode-sized, so when big-endian
17688 smaller integer constants in the TOC need to be padded.
17689 (This is still a win over putting the constants in
17690 a separate constant pool, because then we'd have
17691 to have both a TOC entry _and_ the actual constant.)
17693 For a 32-bit target, CONST_INT values are loaded and shifted
17694 entirely within `low' and can be stored in one TOC entry. */
17696 /* It would be easy to make this work, but it doesn't now. */
17697 gcc_assert (!TARGET_64BIT
|| POINTER_SIZE
>= GET_MODE_BITSIZE (mode
));
17699 if (WORDS_BIG_ENDIAN
&& POINTER_SIZE
> GET_MODE_BITSIZE (mode
))
17702 low
<<= POINTER_SIZE
- GET_MODE_BITSIZE (mode
);
17703 high
= (HOST_WIDE_INT
) low
>> 32;
17709 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17710 fputs (DOUBLE_INT_ASM_OP
, file
);
17712 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17713 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17714 fprintf (file
, "0x%lx%08lx\n",
17715 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17720 if (POINTER_SIZE
< GET_MODE_BITSIZE (mode
))
17722 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17723 fputs ("\t.long ", file
);
17725 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17726 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17727 fprintf (file
, "0x%lx,0x%lx\n",
17728 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17732 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17733 fputs ("\t.long ", file
);
17735 fprintf (file
, "\t.tc IS_%lx[TC],", (long) low
& 0xffffffff);
17736 fprintf (file
, "0x%lx\n", (long) low
& 0xffffffff);
17742 if (GET_CODE (x
) == CONST
)
17744 gcc_assert (GET_CODE (XEXP (x
, 0)) == PLUS
17745 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)));
17747 base
= XEXP (XEXP (x
, 0), 0);
17748 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
17751 switch (GET_CODE (base
))
17754 name
= XSTR (base
, 0);
17758 ASM_GENERATE_INTERNAL_LABEL (buf
, "L",
17759 CODE_LABEL_NUMBER (XEXP (base
, 0)));
17763 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (base
));
17767 gcc_unreachable ();
17770 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17771 fputs (TARGET_32BIT
? "\t.long " : DOUBLE_INT_ASM_OP
, file
);
17774 fputs ("\t.tc ", file
);
17775 RS6000_OUTPUT_BASENAME (file
, name
);
17778 fprintf (file
, ".N" HOST_WIDE_INT_PRINT_UNSIGNED
, - offset
);
17780 fprintf (file
, ".P" HOST_WIDE_INT_PRINT_UNSIGNED
, offset
);
17782 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17783 after other TOC symbols, reducing overflow of small TOC access
17784 to [TC] symbols. */
17785 fputs (TARGET_XCOFF
&& TARGET_CMODEL
!= CMODEL_SMALL
17786 ? "[TE]," : "[TC],", file
);
17789 /* Currently C++ toc references to vtables can be emitted before it
17790 is decided whether the vtable is public or private. If this is
17791 the case, then the linker will eventually complain that there is
17792 a TOC reference to an unknown section. Thus, for vtables only,
17793 we emit the TOC reference to reference the symbol and not the
17795 if (VTABLE_NAME_P (name
))
17797 RS6000_OUTPUT_BASENAME (file
, name
);
17799 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
);
17800 else if (offset
> 0)
17801 fprintf (file
, "+" HOST_WIDE_INT_PRINT_DEC
, offset
);
17804 output_addr_const (file
, x
);
17807 if (TARGET_XCOFF
&& SYMBOL_REF_P (base
))
17809 switch (SYMBOL_REF_TLS_MODEL (base
))
17813 case TLS_MODEL_LOCAL_EXEC
:
17814 fputs ("@le", file
);
17816 case TLS_MODEL_INITIAL_EXEC
:
17817 fputs ("@ie", file
);
17819 /* Use global-dynamic for local-dynamic. */
17820 case TLS_MODEL_GLOBAL_DYNAMIC
:
17821 case TLS_MODEL_LOCAL_DYNAMIC
:
17823 (*targetm
.asm_out
.internal_label
) (file
, "LCM", labelno
);
17824 fputs ("\t.tc .", file
);
17825 RS6000_OUTPUT_BASENAME (file
, name
);
17826 fputs ("[TC],", file
);
17827 output_addr_const (file
, x
);
17828 fputs ("@m", file
);
17831 gcc_unreachable ();
17839 /* Output an assembler pseudo-op to write an ASCII string of N characters
17840 starting at P to FILE.
17842 On the RS/6000, we have to do this using the .byte operation and
17843 write out special characters outside the quoted string.
17844 Also, the assembler is broken; very long strings are truncated,
17845 so we must artificially break them up early. */
17848 output_ascii (FILE *file
, const char *p
, int n
)
17851 int i
, count_string
;
17852 const char *for_string
= "\t.byte \"";
17853 const char *for_decimal
= "\t.byte ";
17854 const char *to_close
= NULL
;
17857 for (i
= 0; i
< n
; i
++)
17860 if (c
>= ' ' && c
< 0177)
17863 fputs (for_string
, file
);
17866 /* Write two quotes to get one. */
17874 for_decimal
= "\"\n\t.byte ";
17878 if (count_string
>= 512)
17880 fputs (to_close
, file
);
17882 for_string
= "\t.byte \"";
17883 for_decimal
= "\t.byte ";
17891 fputs (for_decimal
, file
);
17892 fprintf (file
, "%d", c
);
17894 for_string
= "\n\t.byte \"";
17895 for_decimal
= ", ";
17901 /* Now close the string if we have written one. Then end the line. */
17903 fputs (to_close
, file
);
17906 /* Generate a unique section name for FILENAME for a section type
17907 represented by SECTION_DESC. Output goes into BUF.
17909 SECTION_DESC can be any string, as long as it is different for each
17910 possible section type.
17912 We name the section in the same manner as xlc. The name begins with an
17913 underscore followed by the filename (after stripping any leading directory
17914 names) with the last period replaced by the string SECTION_DESC. If
17915 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17919 rs6000_gen_section_name (char **buf
, const char *filename
,
17920 const char *section_desc
)
17922 const char *q
, *after_last_slash
, *last_period
= 0;
17926 after_last_slash
= filename
;
17927 for (q
= filename
; *q
; q
++)
17930 after_last_slash
= q
+ 1;
17931 else if (*q
== '.')
17935 len
= strlen (after_last_slash
) + strlen (section_desc
) + 2;
17936 *buf
= (char *) xmalloc (len
);
17941 for (q
= after_last_slash
; *q
; q
++)
17943 if (q
== last_period
)
17945 strcpy (p
, section_desc
);
17946 p
+= strlen (section_desc
);
17950 else if (ISALNUM (*q
))
17954 if (last_period
== 0)
17955 strcpy (p
, section_desc
);
17960 /* Emit profile function. */
17963 output_profile_hook (int labelno ATTRIBUTE_UNUSED
)
17965 /* Non-standard profiling for kernels, which just saves LR then calls
17966 _mcount without worrying about arg saves. The idea is to change
17967 the function prologue as little as possible as it isn't easy to
17968 account for arg save/restore code added just for _mcount. */
17969 if (TARGET_PROFILE_KERNEL
)
17972 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
17974 #ifndef NO_PROFILE_COUNTERS
17975 # define NO_PROFILE_COUNTERS 0
17977 if (NO_PROFILE_COUNTERS
)
17978 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17979 LCT_NORMAL
, VOIDmode
);
17983 const char *label_name
;
17986 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
17987 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
17988 fun
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
17990 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17991 LCT_NORMAL
, VOIDmode
, fun
, Pmode
);
17994 else if (DEFAULT_ABI
== ABI_DARWIN
)
17996 const char *mcount_name
= RS6000_MCOUNT
;
17997 int caller_addr_regno
= LR_REGNO
;
17999 /* Be conservative and always set this, at least for now. */
18000 crtl
->uses_pic_offset_table
= 1;
18003 /* For PIC code, set up a stub and collect the caller's address
18004 from r0, which is where the prologue puts it. */
18005 if (MACHOPIC_INDIRECT
18006 && crtl
->uses_pic_offset_table
)
18007 caller_addr_regno
= 0;
18009 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, mcount_name
),
18010 LCT_NORMAL
, VOIDmode
,
18011 gen_rtx_REG (Pmode
, caller_addr_regno
), Pmode
);
18015 /* Write function profiler code. */
18018 output_function_profiler (FILE *file
, int labelno
)
18022 switch (DEFAULT_ABI
)
18025 gcc_unreachable ();
18030 warning (0, "no profiling of 64-bit code for this ABI");
18033 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
18034 fprintf (file
, "\tmflr %s\n", reg_names
[0]);
18035 if (NO_PROFILE_COUNTERS
)
18037 asm_fprintf (file
, "\tstw %s,4(%s)\n",
18038 reg_names
[0], reg_names
[1]);
18040 else if (TARGET_SECURE_PLT
&& flag_pic
)
18042 if (TARGET_LINK_STACK
)
18045 get_ppc476_thunk_name (name
);
18046 asm_fprintf (file
, "\tbl %s\n", name
);
18049 asm_fprintf (file
, "\tbcl 20,31,1f\n1:\n");
18050 asm_fprintf (file
, "\tstw %s,4(%s)\n",
18051 reg_names
[0], reg_names
[1]);
18052 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
18053 asm_fprintf (file
, "\taddis %s,%s,",
18054 reg_names
[12], reg_names
[12]);
18055 assemble_name (file
, buf
);
18056 asm_fprintf (file
, "-1b@ha\n\tla %s,", reg_names
[0]);
18057 assemble_name (file
, buf
);
18058 asm_fprintf (file
, "-1b@l(%s)\n", reg_names
[12]);
18060 else if (flag_pic
== 1)
18062 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file
);
18063 asm_fprintf (file
, "\tstw %s,4(%s)\n",
18064 reg_names
[0], reg_names
[1]);
18065 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
18066 asm_fprintf (file
, "\tlwz %s,", reg_names
[0]);
18067 assemble_name (file
, buf
);
18068 asm_fprintf (file
, "@got(%s)\n", reg_names
[12]);
18070 else if (flag_pic
> 1)
18072 asm_fprintf (file
, "\tstw %s,4(%s)\n",
18073 reg_names
[0], reg_names
[1]);
18074 /* Now, we need to get the address of the label. */
18075 if (TARGET_LINK_STACK
)
18078 get_ppc476_thunk_name (name
);
18079 asm_fprintf (file
, "\tbl %s\n\tb 1f\n\t.long ", name
);
18080 assemble_name (file
, buf
);
18081 fputs ("-.\n1:", file
);
18082 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
18083 asm_fprintf (file
, "\taddi %s,%s,4\n",
18084 reg_names
[11], reg_names
[11]);
18088 fputs ("\tbcl 20,31,1f\n\t.long ", file
);
18089 assemble_name (file
, buf
);
18090 fputs ("-.\n1:", file
);
18091 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
18093 asm_fprintf (file
, "\tlwz %s,0(%s)\n",
18094 reg_names
[0], reg_names
[11]);
18095 asm_fprintf (file
, "\tadd %s,%s,%s\n",
18096 reg_names
[0], reg_names
[0], reg_names
[11]);
18100 asm_fprintf (file
, "\tlis %s,", reg_names
[12]);
18101 assemble_name (file
, buf
);
18102 fputs ("@ha\n", file
);
18103 asm_fprintf (file
, "\tstw %s,4(%s)\n",
18104 reg_names
[0], reg_names
[1]);
18105 asm_fprintf (file
, "\tla %s,", reg_names
[0]);
18106 assemble_name (file
, buf
);
18107 asm_fprintf (file
, "@l(%s)\n", reg_names
[12]);
18110 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
18111 fprintf (file
, "\tbl %s%s\n",
18112 RS6000_MCOUNT
, flag_pic
? "@plt" : "");
18118 /* Don't do anything, done in output_profile_hook (). */
18125 /* The following variable value is the last issued insn. */
18127 static rtx_insn
*last_scheduled_insn
;
18129 /* The following variable helps to balance issuing of load and
18130 store instructions */
18132 static int load_store_pendulum
;
18134 /* The following variable helps pair divide insns during scheduling. */
18135 static int divide_cnt
;
18136 /* The following variable helps pair and alternate vector and vector load
18137 insns during scheduling. */
18138 static int vec_pairing
;
18141 /* Power4 load update and store update instructions are cracked into a
18142 load or store and an integer insn which are executed in the same cycle.
18143 Branches have their own dispatch slot which does not count against the
18144 GCC issue rate, but it changes the program flow so there are no other
18145 instructions to issue in this cycle. */
18148 rs6000_variable_issue_1 (rtx_insn
*insn
, int more
)
18150 last_scheduled_insn
= insn
;
18151 if (GET_CODE (PATTERN (insn
)) == USE
18152 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18154 cached_can_issue_more
= more
;
18155 return cached_can_issue_more
;
18158 if (insn_terminates_group_p (insn
, current_group
))
18160 cached_can_issue_more
= 0;
18161 return cached_can_issue_more
;
18164 /* If no reservation, but reach here */
18165 if (recog_memoized (insn
) < 0)
18168 if (rs6000_sched_groups
)
18170 if (is_microcoded_insn (insn
))
18171 cached_can_issue_more
= 0;
18172 else if (is_cracked_insn (insn
))
18173 cached_can_issue_more
= more
> 2 ? more
- 2 : 0;
18175 cached_can_issue_more
= more
- 1;
18177 return cached_can_issue_more
;
18180 if (rs6000_tune
== PROCESSOR_CELL
&& is_nonpipeline_insn (insn
))
18183 cached_can_issue_more
= more
- 1;
18184 return cached_can_issue_more
;
18188 rs6000_variable_issue (FILE *stream
, int verbose
, rtx_insn
*insn
, int more
)
18190 int r
= rs6000_variable_issue_1 (insn
, more
);
18192 fprintf (stream
, "// rs6000_variable_issue (more = %d) = %d\n", more
, r
);
18196 /* Adjust the cost of a scheduling dependency. Return the new cost of
18197 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
18200 rs6000_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
18203 enum attr_type attr_type
;
18205 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
18212 /* Data dependency; DEP_INSN writes a register that INSN reads
18213 some cycles later. */
18215 /* Separate a load from a narrower, dependent store. */
18216 if ((rs6000_sched_groups
|| rs6000_tune
== PROCESSOR_POWER9
18217 || rs6000_tune
== PROCESSOR_POWER10
18218 || rs6000_tune
== PROCESSOR_POWER11
)
18219 && GET_CODE (PATTERN (insn
)) == SET
18220 && GET_CODE (PATTERN (dep_insn
)) == SET
18221 && MEM_P (XEXP (PATTERN (insn
), 1))
18222 && MEM_P (XEXP (PATTERN (dep_insn
), 0))
18223 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn
), 1)))
18224 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn
), 0)))))
18227 attr_type
= get_attr_type (insn
);
18232 /* Tell the first scheduling pass about the latency between
18233 a mtctr and bctr (and mtlr and br/blr). The first
18234 scheduling pass will not know about this latency since
18235 the mtctr instruction, which has the latency associated
18236 to it, will be generated by reload. */
18239 /* Leave some extra cycles between a compare and its
18240 dependent branch, to inhibit expensive mispredicts. */
18241 if ((rs6000_tune
== PROCESSOR_PPC603
18242 || rs6000_tune
== PROCESSOR_PPC604
18243 || rs6000_tune
== PROCESSOR_PPC604e
18244 || rs6000_tune
== PROCESSOR_PPC620
18245 || rs6000_tune
== PROCESSOR_PPC630
18246 || rs6000_tune
== PROCESSOR_PPC750
18247 || rs6000_tune
== PROCESSOR_PPC7400
18248 || rs6000_tune
== PROCESSOR_PPC7450
18249 || rs6000_tune
== PROCESSOR_PPCE5500
18250 || rs6000_tune
== PROCESSOR_PPCE6500
18251 || rs6000_tune
== PROCESSOR_POWER4
18252 || rs6000_tune
== PROCESSOR_POWER5
18253 || rs6000_tune
== PROCESSOR_POWER7
18254 || rs6000_tune
== PROCESSOR_POWER8
18255 || rs6000_tune
== PROCESSOR_POWER9
18256 || rs6000_tune
== PROCESSOR_POWER10
18257 || rs6000_tune
== PROCESSOR_POWER11
18258 || rs6000_tune
== PROCESSOR_CELL
)
18259 && recog_memoized (dep_insn
)
18260 && (INSN_CODE (dep_insn
) >= 0))
18262 switch (get_attr_type (dep_insn
))
18265 case TYPE_FPCOMPARE
:
18266 case TYPE_CR_LOGICAL
:
18270 if (get_attr_dot (dep_insn
) == DOT_YES
)
18275 if (get_attr_dot (dep_insn
) == DOT_YES
18276 && get_attr_var_shift (dep_insn
) == VAR_SHIFT_NO
)
18287 if ((rs6000_tune
== PROCESSOR_POWER6
)
18288 && recog_memoized (dep_insn
)
18289 && (INSN_CODE (dep_insn
) >= 0))
18292 if (GET_CODE (PATTERN (insn
)) != SET
)
18293 /* If this happens, we have to extend this to schedule
18294 optimally. Return default for now. */
18297 /* Adjust the cost for the case where the value written
18298 by a fixed point operation is used as the address
18299 gen value on a store. */
18300 switch (get_attr_type (dep_insn
))
18305 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18306 return get_attr_sign_extend (dep_insn
)
18307 == SIGN_EXTEND_YES
? 6 : 4;
18312 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18313 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
18323 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18331 if (get_attr_update (dep_insn
) == UPDATE_YES
18332 && ! rs6000_store_data_bypass_p (dep_insn
, insn
))
18338 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18344 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18345 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
18355 if ((rs6000_tune
== PROCESSOR_POWER6
)
18356 && recog_memoized (dep_insn
)
18357 && (INSN_CODE (dep_insn
) >= 0))
18360 /* Adjust the cost for the case where the value written
18361 by a fixed point instruction is used within the address
18362 gen portion of a subsequent load(u)(x) */
18363 switch (get_attr_type (dep_insn
))
18368 if (set_to_load_agen (dep_insn
, insn
))
18369 return get_attr_sign_extend (dep_insn
)
18370 == SIGN_EXTEND_YES
? 6 : 4;
18375 if (set_to_load_agen (dep_insn
, insn
))
18376 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
18386 if (set_to_load_agen (dep_insn
, insn
))
18394 if (get_attr_update (dep_insn
) == UPDATE_YES
18395 && set_to_load_agen (dep_insn
, insn
))
18401 if (set_to_load_agen (dep_insn
, insn
))
18407 if (set_to_load_agen (dep_insn
, insn
))
18408 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
18421 /* Fall out to return default cost. */
18425 case REG_DEP_OUTPUT
:
18426 /* Output dependency; DEP_INSN writes a register that INSN writes some
18428 if ((rs6000_tune
== PROCESSOR_POWER6
)
18429 && recog_memoized (dep_insn
)
18430 && (INSN_CODE (dep_insn
) >= 0))
18432 attr_type
= get_attr_type (insn
);
18437 case TYPE_FPSIMPLE
:
18438 if (get_attr_type (dep_insn
) == TYPE_FP
18439 || get_attr_type (dep_insn
) == TYPE_FPSIMPLE
)
18446 /* Fall through, no cost for output dependency. */
18450 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18455 gcc_unreachable ();
18461 /* Debug version of rs6000_adjust_cost. */
18464 rs6000_debug_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
18465 int cost
, unsigned int dw
)
18467 int ret
= rs6000_adjust_cost (insn
, dep_type
, dep_insn
, cost
, dw
);
18475 default: dep
= "unknown depencency"; break;
18476 case REG_DEP_TRUE
: dep
= "data dependency"; break;
18477 case REG_DEP_OUTPUT
: dep
= "output dependency"; break;
18478 case REG_DEP_ANTI
: dep
= "anti depencency"; break;
18482 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18483 "%s, insn:\n", ret
, cost
, dep
);
18491 /* The function returns a true if INSN is microcoded.
18492 Return false otherwise. */
18495 is_microcoded_insn (rtx_insn
*insn
)
18497 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18498 || GET_CODE (PATTERN (insn
)) == USE
18499 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18502 if (rs6000_tune
== PROCESSOR_CELL
)
18503 return get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
;
18505 if (rs6000_sched_groups
18506 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18508 enum attr_type type
= get_attr_type (insn
);
18509 if ((type
== TYPE_LOAD
18510 && get_attr_update (insn
) == UPDATE_YES
18511 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
18512 || ((type
== TYPE_LOAD
|| type
== TYPE_STORE
)
18513 && get_attr_update (insn
) == UPDATE_YES
18514 && get_attr_indexed (insn
) == INDEXED_YES
)
18515 || type
== TYPE_MFCR
)
18522 /* The function returns true if INSN is cracked into 2 instructions
18523 by the processor (and therefore occupies 2 issue slots). */
18526 is_cracked_insn (rtx_insn
*insn
)
18528 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18529 || GET_CODE (PATTERN (insn
)) == USE
18530 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18533 if (rs6000_sched_groups
18534 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18536 enum attr_type type
= get_attr_type (insn
);
18537 if ((type
== TYPE_LOAD
18538 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
18539 && get_attr_update (insn
) == UPDATE_NO
)
18540 || (type
== TYPE_LOAD
18541 && get_attr_sign_extend (insn
) == SIGN_EXTEND_NO
18542 && get_attr_update (insn
) == UPDATE_YES
18543 && get_attr_indexed (insn
) == INDEXED_NO
)
18544 || (type
== TYPE_STORE
18545 && get_attr_update (insn
) == UPDATE_YES
18546 && get_attr_indexed (insn
) == INDEXED_NO
)
18547 || ((type
== TYPE_FPLOAD
|| type
== TYPE_FPSTORE
)
18548 && get_attr_update (insn
) == UPDATE_YES
)
18549 || (type
== TYPE_CR_LOGICAL
18550 && get_attr_cr_logical_3op (insn
) == CR_LOGICAL_3OP_YES
)
18551 || (type
== TYPE_EXTS
18552 && get_attr_dot (insn
) == DOT_YES
)
18553 || (type
== TYPE_SHIFT
18554 && get_attr_dot (insn
) == DOT_YES
18555 && get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
18556 || (type
== TYPE_MUL
18557 && get_attr_dot (insn
) == DOT_YES
)
18558 || type
== TYPE_DIV
18559 || (type
== TYPE_INSERT
18560 && get_attr_size (insn
) == SIZE_32
))
18567 /* The function returns true if INSN can be issued only from
18568 the branch slot. */
18571 is_branch_slot_insn (rtx_insn
*insn
)
18573 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18574 || GET_CODE (PATTERN (insn
)) == USE
18575 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18578 if (rs6000_sched_groups
)
18580 enum attr_type type
= get_attr_type (insn
);
18581 if (type
== TYPE_BRANCH
|| type
== TYPE_JMPREG
)
18589 /* The function returns true if out_inst sets a value that is
18590 used in the address generation computation of in_insn */
18592 set_to_load_agen (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
18594 rtx out_set
, in_set
;
18596 /* For performance reasons, only handle the simple case where
18597 both loads are a single_set. */
18598 out_set
= single_set (out_insn
);
18601 in_set
= single_set (in_insn
);
18603 return reg_mentioned_p (SET_DEST (out_set
), SET_SRC (in_set
));
18609 /* Try to determine base/offset/size parts of the given MEM.
18610 Return true if successful, false if all the values couldn't
18613 This function only looks for REG or REG+CONST address forms.
18614 REG+REG address form will return false. */
18617 get_memref_parts (rtx mem
, rtx
*base
, HOST_WIDE_INT
*offset
,
18618 HOST_WIDE_INT
*size
)
18621 if (MEM_SIZE_KNOWN_P (mem
))
18622 *size
= MEM_SIZE (mem
);
18626 addr_rtx
= (XEXP (mem
, 0));
18627 if (GET_CODE (addr_rtx
) == PRE_MODIFY
)
18628 addr_rtx
= XEXP (addr_rtx
, 1);
18631 while (GET_CODE (addr_rtx
) == PLUS
18632 && CONST_INT_P (XEXP (addr_rtx
, 1)))
18634 *offset
+= INTVAL (XEXP (addr_rtx
, 1));
18635 addr_rtx
= XEXP (addr_rtx
, 0);
18637 if (!REG_P (addr_rtx
))
18644 /* If the target storage locations of arguments MEM1 and MEM2 are
18645 adjacent, then return the argument that has the lower address.
18646 Otherwise, return NULL_RTX. */
18649 adjacent_mem_locations (rtx mem1
, rtx mem2
)
18652 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18656 && get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18657 && get_memref_parts (mem2
, ®2
, &off2
, &size2
)
18658 && REGNO (reg1
) == REGNO (reg2
))
18660 if (off1
+ size1
== off2
)
18662 else if (off2
+ size2
== off1
)
18669 /* This function returns true if it can be determined that the two MEM
18670 locations overlap by at least 1 byte based on base reg/offset/size. */
18673 mem_locations_overlap (rtx mem1
, rtx mem2
)
18676 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18678 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18679 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
18680 return ((REGNO (reg1
) == REGNO (reg2
))
18681 && (((off1
<= off2
) && (off1
+ size1
> off2
))
18682 || ((off2
<= off1
) && (off2
+ size2
> off1
))));
18687 /* A C statement (sans semicolon) to update the integer scheduling
18688 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18689 INSN earlier, reduce the priority to execute INSN later. Do not
18690 define this macro if you do not need to adjust the scheduling
18691 priorities of insns. */
18694 rs6000_adjust_priority (rtx_insn
*insn ATTRIBUTE_UNUSED
, int priority
)
18696 rtx load_mem
, str_mem
;
18697 /* On machines (like the 750) which have asymmetric integer units,
18698 where one integer unit can do multiply and divides and the other
18699 can't, reduce the priority of multiply/divide so it is scheduled
18700 before other integer operations. */
18703 if (! INSN_P (insn
))
18706 if (GET_CODE (PATTERN (insn
)) == USE
)
18709 switch (rs6000_tune
) {
18710 case PROCESSOR_PPC750
:
18711 switch (get_attr_type (insn
))
18718 fprintf (stderr
, "priority was %#x (%d) before adjustment\n",
18719 priority
, priority
);
18720 if (priority
>= 0 && priority
< 0x01000000)
18727 if (insn_must_be_first_in_group (insn
)
18728 && reload_completed
18729 && current_sched_info
->sched_max_insns_priority
18730 && rs6000_sched_restricted_insns_priority
)
18733 /* Prioritize insns that can be dispatched only in the first
18735 if (rs6000_sched_restricted_insns_priority
== 1)
18736 /* Attach highest priority to insn. This means that in
18737 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18738 precede 'priority' (critical path) considerations. */
18739 return current_sched_info
->sched_max_insns_priority
;
18740 else if (rs6000_sched_restricted_insns_priority
== 2)
18741 /* Increase priority of insn by a minimal amount. This means that in
18742 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18743 considerations precede dispatch-slot restriction considerations. */
18744 return (priority
+ 1);
18747 if (rs6000_tune
== PROCESSOR_POWER6
18748 && ((load_store_pendulum
== -2 && is_load_insn (insn
, &load_mem
))
18749 || (load_store_pendulum
== 2 && is_store_insn (insn
, &str_mem
))))
18750 /* Attach highest priority to insn if the scheduler has just issued two
18751 stores and this instruction is a load, or two loads and this instruction
18752 is a store. Power6 wants loads and stores scheduled alternately
18754 return current_sched_info
->sched_max_insns_priority
;
18759 /* Return true if the instruction is nonpipelined on the Cell. */
18761 is_nonpipeline_insn (rtx_insn
*insn
)
18763 enum attr_type type
;
18764 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18765 || GET_CODE (PATTERN (insn
)) == USE
18766 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18769 type
= get_attr_type (insn
);
18770 if (type
== TYPE_MUL
18771 || type
== TYPE_DIV
18772 || type
== TYPE_SDIV
18773 || type
== TYPE_DDIV
18774 || type
== TYPE_SSQRT
18775 || type
== TYPE_DSQRT
18776 || type
== TYPE_MFCR
18777 || type
== TYPE_MFCRF
18778 || type
== TYPE_MFJMPR
)
18786 /* Return how many instructions the machine can issue per cycle. */
18789 rs6000_issue_rate (void)
18791 /* Unless scheduling for register pressure, use issue rate of 1 for
18792 first scheduling pass to decrease degradation. */
18793 if (!reload_completed
&& !flag_sched_pressure
)
18796 switch (rs6000_tune
) {
18797 case PROCESSOR_RS64A
:
18798 case PROCESSOR_PPC601
: /* ? */
18799 case PROCESSOR_PPC7450
:
18801 case PROCESSOR_PPC440
:
18802 case PROCESSOR_PPC603
:
18803 case PROCESSOR_PPC750
:
18804 case PROCESSOR_PPC7400
:
18805 case PROCESSOR_PPC8540
:
18806 case PROCESSOR_PPC8548
:
18807 case PROCESSOR_CELL
:
18808 case PROCESSOR_PPCE300C2
:
18809 case PROCESSOR_PPCE300C3
:
18810 case PROCESSOR_PPCE500MC
:
18811 case PROCESSOR_PPCE500MC64
:
18812 case PROCESSOR_PPCE5500
:
18813 case PROCESSOR_PPCE6500
:
18814 case PROCESSOR_TITAN
:
18816 case PROCESSOR_PPC476
:
18817 case PROCESSOR_PPC604
:
18818 case PROCESSOR_PPC604e
:
18819 case PROCESSOR_PPC620
:
18820 case PROCESSOR_PPC630
:
18822 case PROCESSOR_POWER4
:
18823 case PROCESSOR_POWER5
:
18824 case PROCESSOR_POWER6
:
18825 case PROCESSOR_POWER7
:
18827 case PROCESSOR_POWER8
:
18829 case PROCESSOR_POWER9
:
18831 case PROCESSOR_POWER10
:
18832 case PROCESSOR_POWER11
:
18839 /* Return how many instructions to look ahead for better insn
18843 rs6000_use_sched_lookahead (void)
18845 switch (rs6000_tune
)
18847 case PROCESSOR_PPC8540
:
18848 case PROCESSOR_PPC8548
:
18851 case PROCESSOR_CELL
:
18852 return (reload_completed
? 8 : 0);
18859 /* We are choosing insn from the ready queue. Return zero if INSN can be
18862 rs6000_use_sched_lookahead_guard (rtx_insn
*insn
, int ready_index
)
18864 if (ready_index
== 0)
18867 if (rs6000_tune
!= PROCESSOR_CELL
)
18870 gcc_assert (insn
!= NULL_RTX
&& INSN_P (insn
));
18872 if (!reload_completed
18873 || is_nonpipeline_insn (insn
)
18874 || is_microcoded_insn (insn
))
18880 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18881 and return true. */
18884 find_mem_ref (rtx pat
, rtx
*mem_ref
)
18889 /* stack_tie does not produce any real memory traffic. */
18890 if (tie_operand (pat
, VOIDmode
))
18899 /* Recursively process the pattern. */
18900 fmt
= GET_RTX_FORMAT (GET_CODE (pat
));
18902 for (i
= GET_RTX_LENGTH (GET_CODE (pat
)) - 1; i
>= 0; i
--)
18906 if (find_mem_ref (XEXP (pat
, i
), mem_ref
))
18909 else if (fmt
[i
] == 'E')
18910 for (j
= XVECLEN (pat
, i
) - 1; j
>= 0; j
--)
18912 if (find_mem_ref (XVECEXP (pat
, i
, j
), mem_ref
))
18920 /* Determine if PAT is a PATTERN of a load insn. */
18923 is_load_insn1 (rtx pat
, rtx
*load_mem
)
18925 if (!pat
|| pat
== NULL_RTX
)
18928 if (GET_CODE (pat
) == SET
)
18930 if (REG_P (SET_DEST (pat
)))
18931 return find_mem_ref (SET_SRC (pat
), load_mem
);
18936 if (GET_CODE (pat
) == PARALLEL
)
18940 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18941 if (is_load_insn1 (XVECEXP (pat
, 0, i
), load_mem
))
18948 /* Determine if INSN loads from memory. */
18951 is_load_insn (rtx insn
, rtx
*load_mem
)
18953 if (!insn
|| !INSN_P (insn
))
18959 return is_load_insn1 (PATTERN (insn
), load_mem
);
18962 /* Determine if PAT is a PATTERN of a store insn. */
18965 is_store_insn1 (rtx pat
, rtx
*str_mem
)
18967 if (!pat
|| pat
== NULL_RTX
)
18970 if (GET_CODE (pat
) == SET
)
18972 if (REG_P (SET_SRC (pat
)) || SUBREG_P (SET_SRC (pat
)))
18973 return find_mem_ref (SET_DEST (pat
), str_mem
);
18978 if (GET_CODE (pat
) == PARALLEL
)
18982 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18983 if (is_store_insn1 (XVECEXP (pat
, 0, i
), str_mem
))
18990 /* Determine if INSN stores to memory. */
18993 is_store_insn (rtx insn
, rtx
*str_mem
)
18995 if (!insn
|| !INSN_P (insn
))
18998 return is_store_insn1 (PATTERN (insn
), str_mem
);
19001 /* Return whether TYPE is a Power9 pairable vector instruction type. */
19004 is_power9_pairable_vec_type (enum attr_type type
)
19008 case TYPE_VECSIMPLE
:
19009 case TYPE_VECCOMPLEX
:
19013 case TYPE_VECFLOAT
:
19015 case TYPE_VECDOUBLE
:
19023 /* Returns whether the dependence between INSN and NEXT is considered
19024 costly by the given target. */
19027 rs6000_is_costly_dependence (dep_t dep
, int cost
, int distance
)
19031 rtx load_mem
, str_mem
;
19033 /* If the flag is not enabled - no dependence is considered costly;
19034 allow all dependent insns in the same group.
19035 This is the most aggressive option. */
19036 if (rs6000_sched_costly_dep
== no_dep_costly
)
19039 /* If the flag is set to 1 - a dependence is always considered costly;
19040 do not allow dependent instructions in the same group.
19041 This is the most conservative option. */
19042 if (rs6000_sched_costly_dep
== all_deps_costly
)
19045 insn
= DEP_PRO (dep
);
19046 next
= DEP_CON (dep
);
19048 if (rs6000_sched_costly_dep
== store_to_load_dep_costly
19049 && is_load_insn (next
, &load_mem
)
19050 && is_store_insn (insn
, &str_mem
))
19051 /* Prevent load after store in the same group. */
19054 if (rs6000_sched_costly_dep
== true_store_to_load_dep_costly
19055 && is_load_insn (next
, &load_mem
)
19056 && is_store_insn (insn
, &str_mem
)
19057 && DEP_TYPE (dep
) == REG_DEP_TRUE
19058 && mem_locations_overlap(str_mem
, load_mem
))
19059 /* Prevent load after store in the same group if it is a true
19063 /* The flag is set to X; dependences with latency >= X are considered costly,
19064 and will not be scheduled in the same group. */
19065 if (rs6000_sched_costly_dep
<= max_dep_latency
19066 && ((cost
- distance
) >= (int)rs6000_sched_costly_dep
))
19072 /* Return the next insn after INSN that is found before TAIL is reached,
19073 skipping any "non-active" insns - insns that will not actually occupy
19074 an issue slot. Return NULL_RTX if such an insn is not found. */
19077 get_next_active_insn (rtx_insn
*insn
, rtx_insn
*tail
)
19079 if (insn
== NULL_RTX
|| insn
== tail
)
19084 insn
= NEXT_INSN (insn
);
19085 if (insn
== NULL_RTX
|| insn
== tail
)
19089 || JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
)
19090 || (NONJUMP_INSN_P (insn
)
19091 && GET_CODE (PATTERN (insn
)) != USE
19092 && GET_CODE (PATTERN (insn
)) != CLOBBER
19093 && INSN_CODE (insn
) != CODE_FOR_stack_tie
))
19099 /* Move instruction at POS to the end of the READY list. */
19102 move_to_end_of_ready (rtx_insn
**ready
, int pos
, int lastpos
)
19108 for (i
= pos
; i
< lastpos
; i
++)
19109 ready
[i
] = ready
[i
+ 1];
19110 ready
[lastpos
] = tmp
;
19113 /* Do Power6 specific sched_reorder2 reordering of ready list. */
19116 power6_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
19118 /* For Power6, we need to handle some special cases to try and keep the
19119 store queue from overflowing and triggering expensive flushes.
19121 This code monitors how load and store instructions are being issued
19122 and skews the ready list one way or the other to increase the likelihood
19123 that a desired instruction is issued at the proper time.
19125 A couple of things are done. First, we maintain a "load_store_pendulum"
19126 to track the current state of load/store issue.
19128 - If the pendulum is at zero, then no loads or stores have been
19129 issued in the current cycle so we do nothing.
19131 - If the pendulum is 1, then a single load has been issued in this
19132 cycle and we attempt to locate another load in the ready list to
19135 - If the pendulum is -2, then two stores have already been
19136 issued in this cycle, so we increase the priority of the first load
19137 in the ready list to increase it's likelihood of being chosen first
19140 - If the pendulum is -1, then a single store has been issued in this
19141 cycle and we attempt to locate another store in the ready list to
19142 issue with it, preferring a store to an adjacent memory location to
19143 facilitate store pairing in the store queue.
19145 - If the pendulum is 2, then two loads have already been
19146 issued in this cycle, so we increase the priority of the first store
19147 in the ready list to increase it's likelihood of being chosen first
19150 - If the pendulum < -2 or > 2, then do nothing.
19152 Note: This code covers the most common scenarios. There exist non
19153 load/store instructions which make use of the LSU and which
19154 would need to be accounted for to strictly model the behavior
19155 of the machine. Those instructions are currently unaccounted
19156 for to help minimize compile time overhead of this code.
19159 rtx load_mem
, str_mem
;
19161 if (is_store_insn (last_scheduled_insn
, &str_mem
))
19162 /* Issuing a store, swing the load_store_pendulum to the left */
19163 load_store_pendulum
--;
19164 else if (is_load_insn (last_scheduled_insn
, &load_mem
))
19165 /* Issuing a load, swing the load_store_pendulum to the right */
19166 load_store_pendulum
++;
19168 return cached_can_issue_more
;
19170 /* If the pendulum is balanced, or there is only one instruction on
19171 the ready list, then all is well, so return. */
19172 if ((load_store_pendulum
== 0) || (lastpos
<= 0))
19173 return cached_can_issue_more
;
19175 if (load_store_pendulum
== 1)
19177 /* A load has been issued in this cycle. Scan the ready list
19178 for another load to issue with it */
19183 if (is_load_insn (ready
[pos
], &load_mem
))
19185 /* Found a load. Move it to the head of the ready list,
19186 and adjust it's priority so that it is more likely to
19188 move_to_end_of_ready (ready
, pos
, lastpos
);
19190 if (!sel_sched_p ()
19191 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19192 INSN_PRIORITY (ready
[lastpos
])++;
19198 else if (load_store_pendulum
== -2)
19200 /* Two stores have been issued in this cycle. Increase the
19201 priority of the first load in the ready list to favor it for
19202 issuing in the next cycle. */
19207 if (is_load_insn (ready
[pos
], &load_mem
)
19209 && INSN_PRIORITY_KNOWN (ready
[pos
]))
19211 INSN_PRIORITY (ready
[pos
])++;
19213 /* Adjust the pendulum to account for the fact that a load
19214 was found and increased in priority. This is to prevent
19215 increasing the priority of multiple loads */
19216 load_store_pendulum
--;
19223 else if (load_store_pendulum
== -1)
19225 /* A store has been issued in this cycle. Scan the ready list for
19226 another store to issue with it, preferring a store to an adjacent
19228 int first_store_pos
= -1;
19234 if (is_store_insn (ready
[pos
], &str_mem
))
19237 /* Maintain the index of the first store found on the
19239 if (first_store_pos
== -1)
19240 first_store_pos
= pos
;
19242 if (is_store_insn (last_scheduled_insn
, &str_mem2
)
19243 && adjacent_mem_locations (str_mem
, str_mem2
))
19245 /* Found an adjacent store. Move it to the head of the
19246 ready list, and adjust it's priority so that it is
19247 more likely to stay there */
19248 move_to_end_of_ready (ready
, pos
, lastpos
);
19250 if (!sel_sched_p ()
19251 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19252 INSN_PRIORITY (ready
[lastpos
])++;
19254 first_store_pos
= -1;
19262 if (first_store_pos
>= 0)
19264 /* An adjacent store wasn't found, but a non-adjacent store was,
19265 so move the non-adjacent store to the front of the ready
19266 list, and adjust its priority so that it is more likely to
19268 move_to_end_of_ready (ready
, first_store_pos
, lastpos
);
19269 if (!sel_sched_p ()
19270 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19271 INSN_PRIORITY (ready
[lastpos
])++;
19274 else if (load_store_pendulum
== 2)
19276 /* Two loads have been issued in this cycle. Increase the priority
19277 of the first store in the ready list to favor it for issuing in
19283 if (is_store_insn (ready
[pos
], &str_mem
)
19285 && INSN_PRIORITY_KNOWN (ready
[pos
]))
19287 INSN_PRIORITY (ready
[pos
])++;
19289 /* Adjust the pendulum to account for the fact that a store
19290 was found and increased in priority. This is to prevent
19291 increasing the priority of multiple stores */
19292 load_store_pendulum
++;
19300 return cached_can_issue_more
;
19303 /* Do Power9 specific sched_reorder2 reordering of ready list. */
19306 power9_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
19309 enum attr_type type
, type2
;
19311 type
= get_attr_type (last_scheduled_insn
);
19313 /* Try to issue fixed point divides back-to-back in pairs so they will be
19314 routed to separate execution units and execute in parallel. */
19315 if (type
== TYPE_DIV
&& divide_cnt
== 0)
19317 /* First divide has been scheduled. */
19320 /* Scan the ready list looking for another divide, if found move it
19321 to the end of the list so it is chosen next. */
19325 if (recog_memoized (ready
[pos
]) >= 0
19326 && get_attr_type (ready
[pos
]) == TYPE_DIV
)
19328 move_to_end_of_ready (ready
, pos
, lastpos
);
19336 /* Last insn was the 2nd divide or not a divide, reset the counter. */
19339 /* The best dispatch throughput for vector and vector load insns can be
19340 achieved by interleaving a vector and vector load such that they'll
19341 dispatch to the same superslice. If this pairing cannot be achieved
19342 then it is best to pair vector insns together and vector load insns
19345 To aid in this pairing, vec_pairing maintains the current state with
19346 the following values:
19348 0 : Initial state, no vecload/vector pairing has been started.
19350 1 : A vecload or vector insn has been issued and a candidate for
19351 pairing has been found and moved to the end of the ready
19353 if (type
== TYPE_VECLOAD
)
19355 /* Issued a vecload. */
19356 if (vec_pairing
== 0)
19358 int vecload_pos
= -1;
19359 /* We issued a single vecload, look for a vector insn to pair it
19360 with. If one isn't found, try to pair another vecload. */
19364 if (recog_memoized (ready
[pos
]) >= 0)
19366 type2
= get_attr_type (ready
[pos
]);
19367 if (is_power9_pairable_vec_type (type2
))
19369 /* Found a vector insn to pair with, move it to the
19370 end of the ready list so it is scheduled next. */
19371 move_to_end_of_ready (ready
, pos
, lastpos
);
19373 return cached_can_issue_more
;
19375 else if (type2
== TYPE_VECLOAD
&& vecload_pos
== -1)
19376 /* Remember position of first vecload seen. */
19381 if (vecload_pos
>= 0)
19383 /* Didn't find a vector to pair with but did find a vecload,
19384 move it to the end of the ready list. */
19385 move_to_end_of_ready (ready
, vecload_pos
, lastpos
);
19387 return cached_can_issue_more
;
19391 else if (is_power9_pairable_vec_type (type
))
19393 /* Issued a vector operation. */
19394 if (vec_pairing
== 0)
19397 /* We issued a single vector insn, look for a vecload to pair it
19398 with. If one isn't found, try to pair another vector. */
19402 if (recog_memoized (ready
[pos
]) >= 0)
19404 type2
= get_attr_type (ready
[pos
]);
19405 if (type2
== TYPE_VECLOAD
)
19407 /* Found a vecload insn to pair with, move it to the
19408 end of the ready list so it is scheduled next. */
19409 move_to_end_of_ready (ready
, pos
, lastpos
);
19411 return cached_can_issue_more
;
19413 else if (is_power9_pairable_vec_type (type2
)
19415 /* Remember position of first vector insn seen. */
19422 /* Didn't find a vecload to pair with but did find a vector
19423 insn, move it to the end of the ready list. */
19424 move_to_end_of_ready (ready
, vec_pos
, lastpos
);
19426 return cached_can_issue_more
;
19431 /* We've either finished a vec/vecload pair, couldn't find an insn to
19432 continue the current pair, or the last insn had nothing to do with
19433 with pairing. In any case, reset the state. */
19437 return cached_can_issue_more
;
19440 /* Determine if INSN is a store to memory that can be fused with a similar
19444 is_fusable_store (rtx_insn
*insn
, rtx
*str_mem
)
19446 /* Insn must be a non-prefixed base+disp form store. */
19447 if (is_store_insn (insn
, str_mem
)
19448 && get_attr_prefixed (insn
) == PREFIXED_NO
19449 && get_attr_update (insn
) == UPDATE_NO
19450 && get_attr_indexed (insn
) == INDEXED_NO
)
19452 /* Further restrictions by mode and size. */
19453 if (!MEM_SIZE_KNOWN_P (*str_mem
))
19456 machine_mode mode
= GET_MODE (*str_mem
);
19457 HOST_WIDE_INT size
= MEM_SIZE (*str_mem
);
19459 if (INTEGRAL_MODE_P (mode
))
19460 /* Must be word or dword size. */
19461 return (size
== 4 || size
== 8);
19462 else if (FLOAT_MODE_P (mode
))
19463 /* Must be dword size. */
19464 return (size
== 8);
19470 /* Do Power10 specific reordering of the ready list. */
19473 power10_sched_reorder (rtx_insn
**ready
, int lastpos
)
19477 /* Do store fusion during sched2 only. */
19478 if (!reload_completed
)
19479 return cached_can_issue_more
;
19481 /* If the prior insn finished off a store fusion pair then simply
19482 reset the counter and return, nothing more to do. */
19483 if (load_store_pendulum
!= 0)
19485 load_store_pendulum
= 0;
19486 return cached_can_issue_more
;
19489 /* Try to pair certain store insns to adjacent memory locations
19490 so that the hardware will fuse them to a single operation. */
19491 if (TARGET_P10_FUSION
&& is_fusable_store (last_scheduled_insn
, &mem1
))
19494 /* A fusable store was just scheduled. Scan the ready list for another
19495 store that it can fuse with. */
19500 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19501 must be ascending only. */
19502 if (is_fusable_store (ready
[pos
], &mem2
)
19503 && ((INTEGRAL_MODE_P (GET_MODE (mem1
))
19504 && adjacent_mem_locations (mem1
, mem2
))
19505 || (FLOAT_MODE_P (GET_MODE (mem1
))
19506 && (adjacent_mem_locations (mem1
, mem2
) == mem1
))))
19508 /* Found a fusable store. Move it to the end of the ready list
19509 so it is scheduled next. */
19510 move_to_end_of_ready (ready
, pos
, lastpos
);
19512 load_store_pendulum
= -1;
19519 return cached_can_issue_more
;
19522 /* We are about to begin issuing insns for this clock cycle. */
19525 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
, int sched_verbose
,
19526 rtx_insn
**ready ATTRIBUTE_UNUSED
,
19527 int *pn_ready ATTRIBUTE_UNUSED
,
19528 int clock_var ATTRIBUTE_UNUSED
)
19530 int n_ready
= *pn_ready
;
19533 fprintf (dump
, "// rs6000_sched_reorder :\n");
19535 /* Reorder the ready list, if the second to last ready insn
19536 is a nonepipeline insn. */
19537 if (rs6000_tune
== PROCESSOR_CELL
&& n_ready
> 1)
19539 if (is_nonpipeline_insn (ready
[n_ready
- 1])
19540 && (recog_memoized (ready
[n_ready
- 2]) > 0))
19541 /* Simply swap first two insns. */
19542 std::swap (ready
[n_ready
- 1], ready
[n_ready
- 2]);
19545 if (rs6000_tune
== PROCESSOR_POWER6
)
19546 load_store_pendulum
= 0;
19548 /* Do Power10/power11 dependent reordering. */
19549 if (last_scheduled_insn
19550 && (rs6000_tune
== PROCESSOR_POWER10
19551 || rs6000_tune
== PROCESSOR_POWER11
))
19552 power10_sched_reorder (ready
, n_ready
- 1);
19554 return rs6000_issue_rate ();
19557 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19560 rs6000_sched_reorder2 (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
19561 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
)
19564 fprintf (dump
, "// rs6000_sched_reorder2 :\n");
19566 /* Do Power6 dependent reordering if necessary. */
19567 if (rs6000_tune
== PROCESSOR_POWER6
&& last_scheduled_insn
)
19568 return power6_sched_reorder2 (ready
, *pn_ready
- 1);
19570 /* Do Power9 dependent reordering if necessary. */
19571 if (rs6000_tune
== PROCESSOR_POWER9
&& last_scheduled_insn
19572 && recog_memoized (last_scheduled_insn
) >= 0)
19573 return power9_sched_reorder2 (ready
, *pn_ready
- 1);
19575 /* Do Power10/power11 dependent reordering. */
19576 if (last_scheduled_insn
19577 && (rs6000_tune
== PROCESSOR_POWER10
19578 || rs6000_tune
== PROCESSOR_POWER11
))
19579 return power10_sched_reorder (ready
, *pn_ready
- 1);
19581 return cached_can_issue_more
;
19584 /* Return whether the presence of INSN causes a dispatch group termination
19585 of group WHICH_GROUP.
19587 If WHICH_GROUP == current_group, this function will return true if INSN
19588 causes the termination of the current group (i.e, the dispatch group to
19589 which INSN belongs). This means that INSN will be the last insn in the
19590 group it belongs to.
19592 If WHICH_GROUP == previous_group, this function will return true if INSN
19593 causes the termination of the previous group (i.e, the dispatch group that
19594 precedes the group to which INSN belongs). This means that INSN will be
19595 the first insn in the group it belongs to). */
19598 insn_terminates_group_p (rtx_insn
*insn
, enum group_termination which_group
)
19605 first
= insn_must_be_first_in_group (insn
);
19606 last
= insn_must_be_last_in_group (insn
);
19611 if (which_group
== current_group
)
19613 else if (which_group
== previous_group
)
19621 insn_must_be_first_in_group (rtx_insn
*insn
)
19623 enum attr_type type
;
19627 || DEBUG_INSN_P (insn
)
19628 || GET_CODE (PATTERN (insn
)) == USE
19629 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19632 switch (rs6000_tune
)
19634 case PROCESSOR_POWER5
:
19635 if (is_cracked_insn (insn
))
19638 case PROCESSOR_POWER4
:
19639 if (is_microcoded_insn (insn
))
19642 if (!rs6000_sched_groups
)
19645 type
= get_attr_type (insn
);
19652 case TYPE_CR_LOGICAL
:
19665 case PROCESSOR_POWER6
:
19666 type
= get_attr_type (insn
);
19675 case TYPE_FPCOMPARE
:
19686 if (get_attr_dot (insn
) == DOT_NO
19687 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19692 if (get_attr_size (insn
) == SIZE_32
)
19700 if (get_attr_update (insn
) == UPDATE_YES
)
19708 case PROCESSOR_POWER7
:
19709 type
= get_attr_type (insn
);
19713 case TYPE_CR_LOGICAL
:
19727 if (get_attr_dot (insn
) == DOT_YES
)
19732 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19733 || get_attr_update (insn
) == UPDATE_YES
)
19740 if (get_attr_update (insn
) == UPDATE_YES
)
19748 case PROCESSOR_POWER8
:
19749 type
= get_attr_type (insn
);
19753 case TYPE_CR_LOGICAL
:
19761 case TYPE_VECSTORE
:
19768 if (get_attr_dot (insn
) == DOT_YES
)
19773 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19774 || get_attr_update (insn
) == UPDATE_YES
)
19779 if (get_attr_update (insn
) == UPDATE_YES
19780 && get_attr_indexed (insn
) == INDEXED_YES
)
19796 insn_must_be_last_in_group (rtx_insn
*insn
)
19798 enum attr_type type
;
19802 || DEBUG_INSN_P (insn
)
19803 || GET_CODE (PATTERN (insn
)) == USE
19804 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19807 switch (rs6000_tune
) {
19808 case PROCESSOR_POWER4
:
19809 case PROCESSOR_POWER5
:
19810 if (is_microcoded_insn (insn
))
19813 if (is_branch_slot_insn (insn
))
19817 case PROCESSOR_POWER6
:
19818 type
= get_attr_type (insn
);
19826 case TYPE_FPCOMPARE
:
19837 if (get_attr_dot (insn
) == DOT_NO
19838 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19843 if (get_attr_size (insn
) == SIZE_32
)
19851 case PROCESSOR_POWER7
:
19852 type
= get_attr_type (insn
);
19862 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19863 && get_attr_update (insn
) == UPDATE_YES
)
19868 if (get_attr_update (insn
) == UPDATE_YES
19869 && get_attr_indexed (insn
) == INDEXED_YES
)
19877 case PROCESSOR_POWER8
:
19878 type
= get_attr_type (insn
);
19890 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19891 && get_attr_update (insn
) == UPDATE_YES
)
19896 if (get_attr_update (insn
) == UPDATE_YES
19897 && get_attr_indexed (insn
) == INDEXED_YES
)
19912 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19913 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19916 is_costly_group (rtx
*group_insns
, rtx next_insn
)
19919 int issue_rate
= rs6000_issue_rate ();
19921 for (i
= 0; i
< issue_rate
; i
++)
19923 sd_iterator_def sd_it
;
19925 rtx insn
= group_insns
[i
];
19930 FOR_EACH_DEP (insn
, SD_LIST_RES_FORW
, sd_it
, dep
)
19932 rtx next
= DEP_CON (dep
);
19934 if (next
== next_insn
19935 && rs6000_is_costly_dependence (dep
, dep_cost (dep
), 0))
19943 /* Utility of the function redefine_groups.
19944 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19945 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19946 to keep it "far" (in a separate group) from GROUP_INSNS, following
19947 one of the following schemes, depending on the value of the flag
19948 -minsert_sched_nops = X:
19949 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19950 in order to force NEXT_INSN into a separate group.
19951 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19952 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19953 insertion (has a group just ended, how many vacant issue slots remain in the
19954 last group, and how many dispatch groups were encountered so far). */
19957 force_new_group (int sched_verbose
, FILE *dump
, rtx
*group_insns
,
19958 rtx_insn
*next_insn
, bool *group_end
, int can_issue_more
,
19963 int issue_rate
= rs6000_issue_rate ();
19964 bool end
= *group_end
;
19967 if (next_insn
== NULL_RTX
|| DEBUG_INSN_P (next_insn
))
19968 return can_issue_more
;
19970 if (rs6000_sched_insert_nops
> sched_finish_regroup_exact
)
19971 return can_issue_more
;
19973 force
= is_costly_group (group_insns
, next_insn
);
19975 return can_issue_more
;
19977 if (sched_verbose
> 6)
19978 fprintf (dump
,"force: group count = %d, can_issue_more = %d\n",
19979 *group_count
,can_issue_more
);
19981 if (rs6000_sched_insert_nops
== sched_finish_regroup_exact
)
19984 can_issue_more
= 0;
19986 /* Since only a branch can be issued in the last issue_slot, it is
19987 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19988 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19989 in this case the last nop will start a new group and the branch
19990 will be forced to the new group. */
19991 if (can_issue_more
&& !is_branch_slot_insn (next_insn
))
19994 /* Do we have a special group ending nop? */
19995 if (rs6000_tune
== PROCESSOR_POWER6
|| rs6000_tune
== PROCESSOR_POWER7
19996 || rs6000_tune
== PROCESSOR_POWER8
)
19998 nop
= gen_group_ending_nop ();
19999 emit_insn_before (nop
, next_insn
);
20000 can_issue_more
= 0;
20003 while (can_issue_more
> 0)
20006 emit_insn_before (nop
, next_insn
);
20014 if (rs6000_sched_insert_nops
< sched_finish_regroup_exact
)
20016 int n_nops
= rs6000_sched_insert_nops
;
20018 /* Nops can't be issued from the branch slot, so the effective
20019 issue_rate for nops is 'issue_rate - 1'. */
20020 if (can_issue_more
== 0)
20021 can_issue_more
= issue_rate
;
20023 if (can_issue_more
== 0)
20025 can_issue_more
= issue_rate
- 1;
20028 for (i
= 0; i
< issue_rate
; i
++)
20030 group_insns
[i
] = 0;
20037 emit_insn_before (nop
, next_insn
);
20038 if (can_issue_more
== issue_rate
- 1) /* new group begins */
20041 if (can_issue_more
== 0)
20043 can_issue_more
= issue_rate
- 1;
20046 for (i
= 0; i
< issue_rate
; i
++)
20048 group_insns
[i
] = 0;
20054 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
20057 /* Is next_insn going to start a new group? */
20060 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
20061 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
20062 || (can_issue_more
< issue_rate
&&
20063 insn_terminates_group_p (next_insn
, previous_group
)));
20064 if (*group_end
&& end
)
20067 if (sched_verbose
> 6)
20068 fprintf (dump
, "done force: group count = %d, can_issue_more = %d\n",
20069 *group_count
, can_issue_more
);
20070 return can_issue_more
;
20073 return can_issue_more
;
20076 /* This function tries to synch the dispatch groups that the compiler "sees"
20077 with the dispatch groups that the processor dispatcher is expected to
20078 form in practice. It tries to achieve this synchronization by forcing the
20079 estimated processor grouping on the compiler (as opposed to the function
20080 'pad_goups' which tries to force the scheduler's grouping on the processor).
20082 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
20083 examines the (estimated) dispatch groups that will be formed by the processor
20084 dispatcher. It marks these group boundaries to reflect the estimated
20085 processor grouping, overriding the grouping that the scheduler had marked.
20086 Depending on the value of the flag '-minsert-sched-nops' this function can
20087 force certain insns into separate groups or force a certain distance between
20088 them by inserting nops, for example, if there exists a "costly dependence"
20091 The function estimates the group boundaries that the processor will form as
20092 follows: It keeps track of how many vacant issue slots are available after
20093 each insn. A subsequent insn will start a new group if one of the following
20095 - no more vacant issue slots remain in the current dispatch group.
20096 - only the last issue slot, which is the branch slot, is vacant, but the next
20097 insn is not a branch.
20098 - only the last 2 or less issue slots, including the branch slot, are vacant,
20099 which means that a cracked insn (which occupies two issue slots) can't be
20100 issued in this group.
20101 - less than 'issue_rate' slots are vacant, and the next insn always needs to
20102 start a new group. */
20105 redefine_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
20108 rtx_insn
*insn
, *next_insn
;
20110 int can_issue_more
;
20113 int group_count
= 0;
20117 issue_rate
= rs6000_issue_rate ();
20118 group_insns
= XALLOCAVEC (rtx
, issue_rate
);
20119 for (i
= 0; i
< issue_rate
; i
++)
20121 group_insns
[i
] = 0;
20123 can_issue_more
= issue_rate
;
20125 insn
= get_next_active_insn (prev_head_insn
, tail
);
20128 while (insn
!= NULL_RTX
)
20130 slot
= (issue_rate
- can_issue_more
);
20131 group_insns
[slot
] = insn
;
20133 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
20134 if (insn_terminates_group_p (insn
, current_group
))
20135 can_issue_more
= 0;
20137 next_insn
= get_next_active_insn (insn
, tail
);
20138 if (next_insn
== NULL_RTX
)
20139 return group_count
+ 1;
20141 /* Is next_insn going to start a new group? */
20143 = (can_issue_more
== 0
20144 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
20145 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
20146 || (can_issue_more
< issue_rate
&&
20147 insn_terminates_group_p (next_insn
, previous_group
)));
20149 can_issue_more
= force_new_group (sched_verbose
, dump
, group_insns
,
20150 next_insn
, &group_end
, can_issue_more
,
20156 can_issue_more
= 0;
20157 for (i
= 0; i
< issue_rate
; i
++)
20159 group_insns
[i
] = 0;
20163 if (GET_MODE (next_insn
) == TImode
&& can_issue_more
)
20164 PUT_MODE (next_insn
, VOIDmode
);
20165 else if (!can_issue_more
&& GET_MODE (next_insn
) != TImode
)
20166 PUT_MODE (next_insn
, TImode
);
20169 if (can_issue_more
== 0)
20170 can_issue_more
= issue_rate
;
20173 return group_count
;
20176 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
20177 dispatch group boundaries that the scheduler had marked. Pad with nops
20178 any dispatch groups which have vacant issue slots, in order to force the
20179 scheduler's grouping on the processor dispatcher. The function
20180 returns the number of dispatch groups found. */
20183 pad_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
20186 rtx_insn
*insn
, *next_insn
;
20189 int can_issue_more
;
20191 int group_count
= 0;
20193 /* Initialize issue_rate. */
20194 issue_rate
= rs6000_issue_rate ();
20195 can_issue_more
= issue_rate
;
20197 insn
= get_next_active_insn (prev_head_insn
, tail
);
20198 next_insn
= get_next_active_insn (insn
, tail
);
20200 while (insn
!= NULL_RTX
)
20203 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
20205 group_end
= (next_insn
== NULL_RTX
|| GET_MODE (next_insn
) == TImode
);
20207 if (next_insn
== NULL_RTX
)
20212 /* If the scheduler had marked group termination at this location
20213 (between insn and next_insn), and neither insn nor next_insn will
20214 force group termination, pad the group with nops to force group
20217 && (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
20218 && !insn_terminates_group_p (insn
, current_group
)
20219 && !insn_terminates_group_p (next_insn
, previous_group
))
20221 if (!is_branch_slot_insn (next_insn
))
20224 while (can_issue_more
)
20227 emit_insn_before (nop
, next_insn
);
20232 can_issue_more
= issue_rate
;
20237 next_insn
= get_next_active_insn (insn
, tail
);
20240 return group_count
;
20243 /* We're beginning a new block. Initialize data structures as necessary. */
20246 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
20247 int sched_verbose ATTRIBUTE_UNUSED
,
20248 int max_ready ATTRIBUTE_UNUSED
)
20250 last_scheduled_insn
= NULL
;
20251 load_store_pendulum
= 0;
20256 /* The following function is called at the end of scheduling BB.
20257 After reload, it inserts nops at insn group bundling. */
20260 rs6000_sched_finish (FILE *dump
, int sched_verbose
)
20265 fprintf (dump
, "=== Finishing schedule.\n");
20267 if (reload_completed
&& rs6000_sched_groups
)
20269 /* Do not run sched_finish hook when selective scheduling enabled. */
20270 if (sel_sched_p ())
20273 if (rs6000_sched_insert_nops
== sched_finish_none
)
20276 if (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
20277 n_groups
= pad_groups (dump
, sched_verbose
,
20278 current_sched_info
->prev_head
,
20279 current_sched_info
->next_tail
);
20281 n_groups
= redefine_groups (dump
, sched_verbose
,
20282 current_sched_info
->prev_head
,
20283 current_sched_info
->next_tail
);
20285 if (sched_verbose
>= 6)
20287 fprintf (dump
, "ngroups = %d\n", n_groups
);
20288 print_rtl (dump
, current_sched_info
->prev_head
);
20289 fprintf (dump
, "Done finish_sched\n");
20294 struct rs6000_sched_context
20296 short cached_can_issue_more
;
20297 rtx_insn
*last_scheduled_insn
;
20298 int load_store_pendulum
;
20303 typedef struct rs6000_sched_context rs6000_sched_context_def
;
20304 typedef rs6000_sched_context_def
*rs6000_sched_context_t
;
20306 /* Allocate store for new scheduling context. */
20308 rs6000_alloc_sched_context (void)
20310 return xmalloc (sizeof (rs6000_sched_context_def
));
20313 /* If CLEAN_P is true then initializes _SC with clean data,
20314 and from the global context otherwise. */
20316 rs6000_init_sched_context (void *_sc
, bool clean_p
)
20318 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
20322 sc
->cached_can_issue_more
= 0;
20323 sc
->last_scheduled_insn
= NULL
;
20324 sc
->load_store_pendulum
= 0;
20325 sc
->divide_cnt
= 0;
20326 sc
->vec_pairing
= 0;
20330 sc
->cached_can_issue_more
= cached_can_issue_more
;
20331 sc
->last_scheduled_insn
= last_scheduled_insn
;
20332 sc
->load_store_pendulum
= load_store_pendulum
;
20333 sc
->divide_cnt
= divide_cnt
;
20334 sc
->vec_pairing
= vec_pairing
;
20338 /* Sets the global scheduling context to the one pointed to by _SC. */
20340 rs6000_set_sched_context (void *_sc
)
20342 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
20344 gcc_assert (sc
!= NULL
);
20346 cached_can_issue_more
= sc
->cached_can_issue_more
;
20347 last_scheduled_insn
= sc
->last_scheduled_insn
;
20348 load_store_pendulum
= sc
->load_store_pendulum
;
20349 divide_cnt
= sc
->divide_cnt
;
20350 vec_pairing
= sc
->vec_pairing
;
20355 rs6000_free_sched_context (void *_sc
)
20357 gcc_assert (_sc
!= NULL
);
20363 rs6000_sched_can_speculate_insn (rtx_insn
*insn
)
20365 switch (get_attr_type (insn
))
20380 /* Length in units of the trampoline for entering a nested function. */
20383 rs6000_trampoline_size (void)
20387 switch (DEFAULT_ABI
)
20390 gcc_unreachable ();
20393 ret
= (TARGET_32BIT
) ? 12 : 24;
20397 gcc_assert (!TARGET_32BIT
);
20403 ret
= (TARGET_32BIT
) ? 40 : 48;
20410 /* Emit RTL insns to initialize the variable parts of a trampoline.
20411 FNADDR is an RTX for the address of the function's pure code.
20412 CXT is an RTX for the static chain value for the function. */
20415 rs6000_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
20417 int regsize
= (TARGET_32BIT
) ? 4 : 8;
20418 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
20419 rtx ctx_reg
= force_reg (Pmode
, cxt
);
20420 rtx addr
= force_reg (Pmode
, XEXP (m_tramp
, 0));
20422 switch (DEFAULT_ABI
)
20425 gcc_unreachable ();
20427 /* Under AIX, just build the 3 word function descriptor */
20430 rtx fnmem
, fn_reg
, toc_reg
;
20432 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS
)
20433 error ("you cannot take the address of a nested function if you use "
20434 "the %qs option", "-mno-pointers-to-nested-functions");
20436 fnmem
= gen_const_mem (Pmode
, force_reg (Pmode
, fnaddr
));
20437 fn_reg
= gen_reg_rtx (Pmode
);
20438 toc_reg
= gen_reg_rtx (Pmode
);
20440 /* Macro to shorten the code expansions below. */
20441 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20443 m_tramp
= replace_equiv_address (m_tramp
, addr
);
20445 emit_move_insn (fn_reg
, MEM_PLUS (fnmem
, 0));
20446 emit_move_insn (toc_reg
, MEM_PLUS (fnmem
, regsize
));
20447 emit_move_insn (MEM_PLUS (m_tramp
, 0), fn_reg
);
20448 emit_move_insn (MEM_PLUS (m_tramp
, regsize
), toc_reg
);
20449 emit_move_insn (MEM_PLUS (m_tramp
, 2*regsize
), ctx_reg
);
20455 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20459 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__trampoline_setup"),
20460 LCT_NORMAL
, VOIDmode
,
20462 GEN_INT (rs6000_trampoline_size ()), SImode
,
20470 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20471 identifier as an argument, so the front end shouldn't look it up. */
20474 rs6000_attribute_takes_identifier_p (const_tree attr_id
)
20476 return is_attribute_p ("altivec", attr_id
);
20479 /* Handle the "altivec" attribute. The attribute may have
20480 arguments as follows:
20482 __attribute__((altivec(vector__)))
20483 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20484 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20486 and may appear more than once (e.g., 'vector bool char') in a
20487 given declaration. */
20490 rs6000_handle_altivec_attribute (tree
*node
,
20491 tree name ATTRIBUTE_UNUSED
,
20493 int flags ATTRIBUTE_UNUSED
,
20494 bool *no_add_attrs
)
20496 tree type
= *node
, result
= NULL_TREE
;
20500 = ((args
&& TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
)
20501 && TREE_CODE (TREE_VALUE (args
)) == IDENTIFIER_NODE
)
20502 ? *IDENTIFIER_POINTER (TREE_VALUE (args
))
20505 while (POINTER_TYPE_P (type
)
20506 || TREE_CODE (type
) == FUNCTION_TYPE
20507 || TREE_CODE (type
) == METHOD_TYPE
20508 || TREE_CODE (type
) == ARRAY_TYPE
)
20509 type
= TREE_TYPE (type
);
20511 mode
= TYPE_MODE (type
);
20513 /* Check for invalid AltiVec type qualifiers. */
20514 if (type
== long_double_type_node
)
20515 error ("use of %<long double%> in AltiVec types is invalid");
20516 else if (type
== boolean_type_node
)
20517 error ("use of boolean types in AltiVec types is invalid");
20518 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
20519 error ("use of %<complex%> in AltiVec types is invalid");
20520 else if (DECIMAL_FLOAT_MODE_P (mode
))
20521 error ("use of decimal floating-point types in AltiVec types is invalid");
20522 else if (!TARGET_VSX
)
20524 if (type
== long_unsigned_type_node
|| type
== long_integer_type_node
)
20527 error ("use of %<long%> in AltiVec types is invalid for "
20528 "64-bit code without %qs", "-mvsx");
20529 else if (rs6000_warn_altivec_long
)
20530 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20533 else if (type
== long_long_unsigned_type_node
20534 || type
== long_long_integer_type_node
)
20535 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20537 else if (type
== double_type_node
)
20538 error ("use of %<double%> in AltiVec types is invalid without %qs",
20542 switch (altivec_type
)
20545 unsigned_p
= TYPE_UNSIGNED (type
);
20549 result
= (unsigned_p
? unsigned_V1TI_type_node
: V1TI_type_node
);
20552 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
20555 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
20558 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
20561 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
20563 case E_SFmode
: result
= V4SF_type_node
; break;
20564 case E_DFmode
: result
= V2DF_type_node
; break;
20565 /* If the user says 'vector int bool', we may be handed the 'bool'
20566 attribute _before_ the 'vector' attribute, and so select the
20567 proper type in the 'b' case below. */
20568 case E_V4SImode
: case E_V8HImode
: case E_V16QImode
: case E_V4SFmode
:
20569 case E_V2DImode
: case E_V2DFmode
:
20577 case E_TImode
: case E_V1TImode
: result
= bool_V1TI_type_node
; break;
20578 case E_DImode
: case E_V2DImode
: result
= bool_V2DI_type_node
; break;
20579 case E_SImode
: case E_V4SImode
: result
= bool_V4SI_type_node
; break;
20580 case E_HImode
: case E_V8HImode
: result
= bool_V8HI_type_node
; break;
20581 case E_QImode
: case E_V16QImode
: result
= bool_V16QI_type_node
;
20588 case E_V8HImode
: result
= pixel_V8HI_type_node
;
20594 /* Propagate qualifiers attached to the element type
20595 onto the vector type. */
20596 if (result
&& result
!= type
&& TYPE_QUALS (type
))
20597 result
= build_qualified_type (result
, TYPE_QUALS (type
));
20599 *no_add_attrs
= true; /* No need to hang on to the attribute. */
20602 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
20607 /* AltiVec defines five built-in scalar types that serve as vector
20608 elements; we must teach the compiler how to mangle them. The 128-bit
20609 floating point mangling is target-specific as well. MMA defines
20610 two built-in types to be used as opaque vector types. */
20612 static const char *
20613 rs6000_mangle_type (const_tree type
)
20615 type
= TYPE_MAIN_VARIANT (type
);
20617 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
20618 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
20619 && TREE_CODE (type
) != OPAQUE_TYPE
)
20622 if (type
== bool_char_type_node
) return "U6__boolc";
20623 if (type
== bool_short_type_node
) return "U6__bools";
20624 if (type
== pixel_type_node
) return "u7__pixel";
20625 if (type
== bool_int_type_node
) return "U6__booli";
20626 if (type
== bool_long_long_type_node
) return "U6__boolx";
20628 if (type
== float128_type_node
|| type
== float64x_type_node
)
20631 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IBM_P (TYPE_MODE (type
)))
20633 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IEEE_P (TYPE_MODE (type
)))
20634 return "u9__ieee128";
20636 if (type
== vector_pair_type_node
)
20637 return "u13__vector_pair";
20638 if (type
== vector_quad_type_node
)
20639 return "u13__vector_quad";
20641 /* For all other types, use the default mangling. */
20645 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20646 struct attribute_spec.handler. */
20649 rs6000_handle_longcall_attribute (tree
*node
, tree name
,
20650 tree args ATTRIBUTE_UNUSED
,
20651 int flags ATTRIBUTE_UNUSED
,
20652 bool *no_add_attrs
)
20654 if (TREE_CODE (*node
) != FUNCTION_TYPE
20655 && TREE_CODE (*node
) != FIELD_DECL
20656 && TREE_CODE (*node
) != TYPE_DECL
)
20658 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
20660 *no_add_attrs
= true;
20666 /* Set longcall attributes on all functions declared when
20667 rs6000_default_long_calls is true. */
20669 rs6000_set_default_type_attributes (tree type
)
20671 if (rs6000_default_long_calls
20672 && FUNC_OR_METHOD_TYPE_P (type
))
20673 TYPE_ATTRIBUTES (type
) = tree_cons (get_identifier ("longcall"),
20675 TYPE_ATTRIBUTES (type
));
20678 darwin_set_default_type_attributes (type
);
20682 /* Return a reference suitable for calling a function with the
20683 longcall attribute. */
20686 rs6000_longcall_ref (rtx call_ref
, rtx arg
)
20688 /* System V adds '.' to the internal name, so skip them. */
20689 const char *call_name
= XSTR (call_ref
, 0);
20690 if (*call_name
== '.')
20692 while (*call_name
== '.')
20695 tree node
= get_identifier (call_name
);
20696 call_ref
= gen_rtx_SYMBOL_REF (VOIDmode
, IDENTIFIER_POINTER (node
));
20701 rtx base
= const0_rtx
;
20703 if (rs6000_pcrel_p ())
20705 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20706 rtx u
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20707 gen_rtvec (3, base
, call_ref
, arg
),
20708 UNSPECV_PLT_PCREL
);
20709 emit_insn (gen_rtx_SET (reg
, u
));
20713 if (DEFAULT_ABI
== ABI_ELFv2
)
20714 base
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
20718 base
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
20721 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20722 may be used by a function global entry point. For SysV4, r11
20723 is used by __glink_PLTresolve lazy resolver entry. */
20724 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20725 rtx hi
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (3, base
, call_ref
, arg
),
20727 rtx lo
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20728 gen_rtvec (3, reg
, call_ref
, arg
),
20730 emit_insn (gen_rtx_SET (reg
, hi
));
20731 emit_insn (gen_rtx_SET (reg
, lo
));
20735 return force_reg (Pmode
, call_ref
);
20738 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20739 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20742 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20743 struct attribute_spec.handler. */
20745 rs6000_handle_struct_attribute (tree
*node
, tree name
,
20746 tree args ATTRIBUTE_UNUSED
,
20747 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
20750 if (DECL_P (*node
))
20752 if (TREE_CODE (*node
) == TYPE_DECL
)
20753 type
= &TREE_TYPE (*node
);
20758 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
20759 || TREE_CODE (*type
) == UNION_TYPE
)))
20761 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
20762 *no_add_attrs
= true;
20765 else if ((is_attribute_p ("ms_struct", name
)
20766 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
20767 || ((is_attribute_p ("gcc_struct", name
)
20768 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
20770 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
20772 *no_add_attrs
= true;
20779 rs6000_ms_bitfield_layout_p (const_tree record_type
)
20781 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
20782 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
20783 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
20786 #ifdef USING_ELFOS_H
20788 /* A get_unnamed_section callback, used for switching to toc_section. */
20791 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
20793 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20794 && TARGET_MINIMAL_TOC
)
20796 if (!toc_initialized
)
20798 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20799 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20800 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LCTOC", 0);
20801 fprintf (asm_out_file
, "\t.tc ");
20802 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1[TC],");
20803 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20804 fprintf (asm_out_file
, "\n");
20806 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20807 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20808 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20809 fprintf (asm_out_file
, " = .+32768\n");
20810 toc_initialized
= 1;
20813 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20815 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20817 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20818 if (!toc_initialized
)
20820 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20821 toc_initialized
= 1;
20826 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20827 if (!toc_initialized
)
20829 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20830 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20831 fprintf (asm_out_file
, " = .+32768\n");
20832 toc_initialized
= 1;
20837 /* Implement TARGET_ASM_INIT_SECTIONS. */
20840 rs6000_elf_asm_init_sections (void)
20843 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op
, NULL
);
20846 = get_unnamed_section (SECTION_WRITE
, output_section_asm_op
,
20847 SDATA2_SECTION_ASM_OP
);
20850 /* Implement TARGET_SELECT_RTX_SECTION. */
20853 rs6000_elf_select_rtx_section (machine_mode mode
, rtx x
,
20854 unsigned HOST_WIDE_INT align
)
20856 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
20857 return toc_section
;
20859 return default_elf_select_rtx_section (mode
, x
, align
);
20862 /* For a SYMBOL_REF, set generic flags and then perform some
20863 target-specific processing.
20865 When the AIX ABI is requested on a non-AIX system, replace the
20866 function name with the real name (with a leading .) rather than the
20867 function descriptor name. This saves a lot of overriding code to
20868 read the prefixes. */
20870 static void rs6000_elf_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
20872 rs6000_elf_encode_section_info (tree decl
, rtx rtl
, int first
)
20874 default_encode_section_info (decl
, rtl
, first
);
20877 && TREE_CODE (decl
) == FUNCTION_DECL
20879 && DEFAULT_ABI
== ABI_AIX
)
20881 rtx sym_ref
= XEXP (rtl
, 0);
20882 size_t len
= strlen (XSTR (sym_ref
, 0));
20883 char *str
= XALLOCAVEC (char, len
+ 2);
20885 memcpy (str
+ 1, XSTR (sym_ref
, 0), len
+ 1);
20886 XSTR (sym_ref
, 0) = ggc_alloc_string (str
, len
+ 1);
20891 compare_section_name (const char *section
, const char *templ
)
20895 len
= strlen (templ
);
20896 return (strncmp (section
, templ
, len
) == 0
20897 && (section
[len
] == 0 || section
[len
] == '.'));
20901 rs6000_elf_in_small_data_p (const_tree decl
)
20903 if (rs6000_sdata
== SDATA_NONE
)
20906 /* We want to merge strings, so we never consider them small data. */
20907 if (TREE_CODE (decl
) == STRING_CST
)
20910 /* Functions are never in the small data area. */
20911 if (TREE_CODE (decl
) == FUNCTION_DECL
)
20914 if (VAR_P (decl
) && DECL_SECTION_NAME (decl
))
20916 const char *section
= DECL_SECTION_NAME (decl
);
20917 if (compare_section_name (section
, ".sdata")
20918 || compare_section_name (section
, ".sdata2")
20919 || compare_section_name (section
, ".gnu.linkonce.s")
20920 || compare_section_name (section
, ".sbss")
20921 || compare_section_name (section
, ".sbss2")
20922 || compare_section_name (section
, ".gnu.linkonce.sb")
20923 || strcmp (section
, ".PPC.EMB.sdata0") == 0
20924 || strcmp (section
, ".PPC.EMB.sbss0") == 0)
20929 /* If we are told not to put readonly data in sdata, then don't. */
20930 if (TREE_READONLY (decl
) && rs6000_sdata
!= SDATA_EABI
20931 && !rs6000_readonly_in_sdata
)
20934 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
20937 && size
<= g_switch_value
20938 /* If it's not public, and we're not going to reference it there,
20939 there's no need to put it in the small data section. */
20940 && (rs6000_sdata
!= SDATA_DATA
|| TREE_PUBLIC (decl
)))
20947 #endif /* USING_ELFOS_H */
20949 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20952 rs6000_use_blocks_for_constant_p (machine_mode mode
, const_rtx x
)
20954 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
);
20957 /* Do not place thread-local symbols refs in the object blocks. */
20960 rs6000_use_blocks_for_decl_p (const_tree decl
)
20962 return !DECL_THREAD_LOCAL_P (decl
);
20965 /* Return a REG that occurs in ADDR with coefficient 1.
20966 ADDR can be effectively incremented by incrementing REG.
20968 r0 is special and we must not select it as an address
20969 register by this routine since our caller will try to
20970 increment the returned register via an "la" instruction. */
20973 find_addr_reg (rtx addr
)
20975 while (GET_CODE (addr
) == PLUS
)
20977 if (REG_P (XEXP (addr
, 0))
20978 && REGNO (XEXP (addr
, 0)) != 0)
20979 addr
= XEXP (addr
, 0);
20980 else if (REG_P (XEXP (addr
, 1))
20981 && REGNO (XEXP (addr
, 1)) != 0)
20982 addr
= XEXP (addr
, 1);
20983 else if (CONSTANT_P (XEXP (addr
, 0)))
20984 addr
= XEXP (addr
, 1);
20985 else if (CONSTANT_P (XEXP (addr
, 1)))
20986 addr
= XEXP (addr
, 0);
20988 gcc_unreachable ();
20990 gcc_assert (REG_P (addr
) && REGNO (addr
) != 0);
20995 rs6000_fatal_bad_address (rtx op
)
20997 fatal_insn ("bad address", op
);
21002 vec
<branch_island
, va_gc
> *branch_islands
;
21004 /* Remember to generate a branch island for far calls to the given
21008 add_compiler_branch_island (tree label_name
, tree function_name
,
21011 branch_island bi
= {function_name
, label_name
, line_number
};
21012 vec_safe_push (branch_islands
, bi
);
21015 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
21016 already there or not. */
21019 no_previous_def (tree function_name
)
21024 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
21025 if (function_name
== bi
->function_name
)
21030 /* GET_PREV_LABEL gets the label name from the previous definition of
21034 get_prev_label (tree function_name
)
21039 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
21040 if (function_name
== bi
->function_name
)
21041 return bi
->label_name
;
21045 /* Generate external symbol indirection stubs (PIC and non-PIC). */
21048 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
21050 unsigned int length
;
21051 char *symbol_name
, *lazy_ptr_name
;
21052 char *local_label_0
;
21053 static unsigned label
= 0;
21055 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21056 symb
= (*targetm
.strip_name_encoding
) (symb
);
21058 length
= strlen (symb
);
21059 symbol_name
= XALLOCAVEC (char, length
+ 32);
21060 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
21062 lazy_ptr_name
= XALLOCAVEC (char, length
+ 32);
21063 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name
, symb
, length
);
21067 switch_to_section (darwin_sections
[machopic_picsymbol_stub1_section
]);
21068 fprintf (file
, "\t.align 5\n");
21070 fprintf (file
, "%s:\n", stub
);
21071 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21074 local_label_0
= XALLOCAVEC (char, 16);
21075 sprintf (local_label_0
, "L%u$spb", label
);
21077 fprintf (file
, "\tmflr r0\n");
21078 fprintf (file
, "\tbcl 20,31,%s\n", local_label_0
);
21079 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
21080 fprintf (file
, "\taddis r11,r11,ha16(%s-%s)\n",
21081 lazy_ptr_name
, local_label_0
);
21082 fprintf (file
, "\tmtlr r0\n");
21083 fprintf (file
, "\t%s r12,lo16(%s-%s)(r11)\n",
21084 (TARGET_64BIT
? "ldu" : "lwzu"),
21085 lazy_ptr_name
, local_label_0
);
21086 fprintf (file
, "\tmtctr r12\n");
21087 fprintf (file
, "\tbctr\n");
21089 else /* mdynamic-no-pic or mkernel. */
21091 switch_to_section (darwin_sections
[machopic_symbol_stub1_section
]);
21092 fprintf (file
, "\t.align 4\n");
21094 fprintf (file
, "%s:\n", stub
);
21095 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21097 fprintf (file
, "\tlis r11,ha16(%s)\n", lazy_ptr_name
);
21098 fprintf (file
, "\t%s r12,lo16(%s)(r11)\n",
21099 (TARGET_64BIT
? "ldu" : "lwzu"),
21101 fprintf (file
, "\tmtctr r12\n");
21102 fprintf (file
, "\tbctr\n");
21105 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
21106 fprintf (file
, "%s:\n", lazy_ptr_name
);
21107 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21108 fprintf (file
, "%sdyld_stub_binding_helper\n",
21109 (TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t"));
21112 /* Legitimize PIC addresses. If the address is already
21113 position-independent, we return ORIG. Newly generated
21114 position-independent addresses go into a reg. This is REG if non
21115 zero, otherwise we allocate register(s) as necessary. */
21117 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
21120 rs6000_machopic_legitimize_pic_address (rtx orig
, machine_mode mode
,
21125 if (reg
== NULL
&& !reload_completed
)
21126 reg
= gen_reg_rtx (Pmode
);
21128 if (GET_CODE (orig
) == CONST
)
21132 if (GET_CODE (XEXP (orig
, 0)) == PLUS
21133 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
21136 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
21138 /* Use a different reg for the intermediate value, as
21139 it will be marked UNCHANGING. */
21140 reg_temp
= !can_create_pseudo_p () ? reg
: gen_reg_rtx (Pmode
);
21141 base
= rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 0),
21144 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 1),
21147 if (CONST_INT_P (offset
))
21149 if (SMALL_INT (offset
))
21150 return plus_constant (Pmode
, base
, INTVAL (offset
));
21151 else if (!reload_completed
)
21152 offset
= force_reg (Pmode
, offset
);
21155 rtx mem
= force_const_mem (Pmode
, orig
);
21156 return machopic_legitimize_pic_address (mem
, Pmode
, reg
);
21159 return gen_rtx_PLUS (Pmode
, base
, offset
);
21162 /* Fall back on generic machopic code. */
21163 return machopic_legitimize_pic_address (orig
, mode
, reg
);
21166 /* Output a .machine directive for the Darwin assembler, and call
21167 the generic start_file routine. */
21170 rs6000_darwin_file_start (void)
21172 static const struct
21176 HOST_WIDE_INT if_set
;
21178 { "ppc64", "ppc64", MASK_64BIT
},
21179 { "970", "ppc970", OPTION_MASK_PPC_GPOPT
| OPTION_MASK_MFCRF \
21180 | MASK_POWERPC64
},
21181 { "power4", "ppc970", 0 },
21182 { "G5", "ppc970", 0 },
21183 { "7450", "ppc7450", 0 },
21184 { "7400", "ppc7400", OPTION_MASK_ALTIVEC
},
21185 { "G4", "ppc7400", 0 },
21186 { "750", "ppc750", 0 },
21187 { "740", "ppc750", 0 },
21188 { "G3", "ppc750", 0 },
21189 { "604e", "ppc604e", 0 },
21190 { "604", "ppc604", 0 },
21191 { "603e", "ppc603", 0 },
21192 { "603", "ppc603", 0 },
21193 { "601", "ppc601", 0 },
21194 { NULL
, "ppc", 0 } };
21195 const char *cpu_id
= "";
21198 rs6000_file_start ();
21199 darwin_file_start ();
21201 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
21203 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
21204 cpu_id
= rs6000_default_cpu
;
21206 if (OPTION_SET_P (rs6000_cpu_index
))
21207 cpu_id
= processor_target_table
[rs6000_cpu_index
].name
;
21209 /* Look through the mapping array. Pick the first name that either
21210 matches the argument, has a bit set in IF_SET that is also set
21211 in the target flags, or has a NULL name. */
21214 while (mapping
[i
].arg
!= NULL
21215 && strcmp (mapping
[i
].arg
, cpu_id
) != 0
21216 && (mapping
[i
].if_set
& rs6000_isa_flags
) == 0)
21219 fprintf (asm_out_file
, "\t.machine %s\n", mapping
[i
].name
);
21222 #endif /* TARGET_MACHO */
21226 rs6000_elf_reloc_rw_mask (void)
21230 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
21236 /* Record an element in the table of global constructors. SYMBOL is
21237 a SYMBOL_REF of the function to be called; PRIORITY is a number
21238 between 0 and MAX_INIT_PRIORITY.
21240 This differs from default_named_section_asm_out_constructor in
21241 that we have special handling for -mrelocatable. */
21243 static void rs6000_elf_asm_out_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
21245 rs6000_elf_asm_out_constructor (rtx symbol
, int priority
)
21247 const char *section
= ".ctors";
21250 if (priority
!= DEFAULT_INIT_PRIORITY
)
21252 sprintf (buf
, ".ctors.%.5u",
21253 /* Invert the numbering so the linker puts us in the proper
21254 order; constructors are run from right to left, and the
21255 linker sorts in increasing order. */
21256 MAX_INIT_PRIORITY
- priority
);
21260 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
21261 assemble_align (POINTER_SIZE
);
21263 if (DEFAULT_ABI
== ABI_V4
21264 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
21266 fputs ("\t.long (", asm_out_file
);
21267 output_addr_const (asm_out_file
, symbol
);
21268 fputs (")@fixup\n", asm_out_file
);
21271 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
21274 static void rs6000_elf_asm_out_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
21276 rs6000_elf_asm_out_destructor (rtx symbol
, int priority
)
21278 const char *section
= ".dtors";
21281 if (priority
!= DEFAULT_INIT_PRIORITY
)
21283 sprintf (buf
, ".dtors.%.5u",
21284 /* Invert the numbering so the linker puts us in the proper
21285 order; constructors are run from right to left, and the
21286 linker sorts in increasing order. */
21287 MAX_INIT_PRIORITY
- priority
);
21291 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
21292 assemble_align (POINTER_SIZE
);
21294 if (DEFAULT_ABI
== ABI_V4
21295 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
21297 fputs ("\t.long (", asm_out_file
);
21298 output_addr_const (asm_out_file
, symbol
);
21299 fputs (")@fixup\n", asm_out_file
);
21302 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
21306 rs6000_elf_declare_function_name (FILE *file
, const char *name
, tree decl
)
21308 if (TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
21310 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file
);
21311 ASM_OUTPUT_LABEL (file
, name
);
21312 fputs (DOUBLE_INT_ASM_OP
, file
);
21313 rs6000_output_function_entry (file
, name
);
21314 fputs (",.TOC.@tocbase,0\n\t.previous\n", file
);
21317 fputs ("\t.size\t", file
);
21318 assemble_name (file
, name
);
21319 fputs (",24\n\t.type\t.", file
);
21320 assemble_name (file
, name
);
21321 fputs (",@function\n", file
);
21322 if (TREE_PUBLIC (decl
) && ! DECL_WEAK (decl
))
21324 fputs ("\t.globl\t.", file
);
21325 assemble_name (file
, name
);
21330 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21331 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21332 rs6000_output_function_entry (file
, name
);
21333 fputs (":\n", file
);
21334 assemble_function_label_final ();
21339 if (DEFAULT_ABI
== ABI_V4
21340 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
21341 && !TARGET_SECURE_PLT
21342 && (!constant_pool_empty_p () || crtl
->profile
)
21343 && (uses_toc
= uses_TOC ()))
21348 switch_to_other_text_partition ();
21349 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
21351 fprintf (file
, "\t.long ");
21352 assemble_name (file
, toc_label_name
);
21355 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
21356 assemble_name (file
, buf
);
21359 switch_to_other_text_partition ();
21362 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21363 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21365 if (TARGET_CMODEL
== CMODEL_LARGE
21366 && rs6000_global_entry_point_prologue_needed_p ())
21370 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
21372 fprintf (file
, "\t.quad .TOC.-");
21373 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
21374 assemble_name (file
, buf
);
21378 if (DEFAULT_ABI
== ABI_AIX
)
21380 const char *desc_name
, *orig_name
;
21382 orig_name
= (*targetm
.strip_name_encoding
) (name
);
21383 desc_name
= orig_name
;
21384 while (*desc_name
== '.')
21387 if (TREE_PUBLIC (decl
))
21388 fprintf (file
, "\t.globl %s\n", desc_name
);
21390 fprintf (file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
21391 fprintf (file
, "%s:\n", desc_name
);
21392 fprintf (file
, "\t.long %s\n", orig_name
);
21393 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file
);
21394 fputs ("\t.long 0\n", file
);
21395 fprintf (file
, "\t.previous\n");
21397 ASM_OUTPUT_FUNCTION_LABEL (file
, name
, decl
);
21400 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED
;
21402 rs6000_elf_file_end (void)
21404 #ifdef HAVE_AS_GNU_ATTRIBUTE
21405 /* ??? The value emitted depends on options active at file end.
21406 Assume anyone using #pragma or attributes that might change
21407 options knows what they are doing. */
21408 if ((TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
21409 && rs6000_passes_float
)
21413 if (TARGET_HARD_FLOAT
)
21417 if (rs6000_passes_long_double
)
21419 if (!TARGET_LONG_DOUBLE_128
)
21421 else if (TARGET_IEEEQUAD
)
21426 fprintf (asm_out_file
, "\t.gnu_attribute 4, %d\n", fp
);
21428 if (TARGET_32BIT
&& DEFAULT_ABI
== ABI_V4
)
21430 if (rs6000_passes_vector
)
21431 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
21432 (TARGET_ALTIVEC_ABI
? 2 : 1));
21433 if (rs6000_returns_struct
)
21434 fprintf (asm_out_file
, "\t.gnu_attribute 12, %d\n",
21435 aix_struct_return
? 2 : 1);
21438 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21439 if (TARGET_32BIT
|| DEFAULT_ABI
== ABI_ELFv2
)
21440 file_end_indicate_exec_stack ();
21443 if (flag_split_stack
)
21444 file_end_indicate_split_stack ();
21448 /* We have expanded a CPU builtin, so we need to emit a reference to
21449 the special symbol that LIBC uses to declare it supports the
21450 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21451 switch_to_section (data_section
);
21452 fprintf (asm_out_file
, "\t.align %u\n", TARGET_32BIT
? 2 : 3);
21453 fprintf (asm_out_file
, "\t%s %s\n",
21454 TARGET_32BIT
? ".long" : ".quad", tcb_verification_symbol
);
21461 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21462 #define HAVE_XCOFF_DWARF_EXTRAS 0
21466 /* Names of bss and data sections. These should be unique names for each
21467 compilation unit. */
21469 char *xcoff_bss_section_name
;
21470 char *xcoff_private_data_section_name
;
21471 char *xcoff_private_rodata_section_name
;
21472 char *xcoff_tls_data_section_name
;
21473 char *xcoff_read_only_section_name
;
21475 static enum unwind_info_type
21476 rs6000_xcoff_debug_unwind_info (void)
21482 rs6000_xcoff_asm_output_anchor (rtx symbol
)
21486 sprintf (buffer
, "$ + " HOST_WIDE_INT_PRINT_DEC
,
21487 SYMBOL_REF_BLOCK_OFFSET (symbol
));
21488 fprintf (asm_out_file
, "%s", SET_ASM_OP
);
21489 RS6000_OUTPUT_BASENAME (asm_out_file
, XSTR (symbol
, 0));
21490 fprintf (asm_out_file
, ",");
21491 RS6000_OUTPUT_BASENAME (asm_out_file
, buffer
);
21492 fprintf (asm_out_file
, "\n");
21496 rs6000_xcoff_asm_globalize_label (FILE *stream
, const char *name
)
21498 fputs (GLOBAL_ASM_OP
, stream
);
21499 RS6000_OUTPUT_BASENAME (stream
, name
);
21500 putc ('\n', stream
);
21503 /* A get_unnamed_decl callback, used for read-only sections. PTR
21504 points to the section string variable. */
21507 rs6000_xcoff_output_readonly_section_asm_op (const char *directive
)
21509 fprintf (asm_out_file
, "\t.csect %s[RO],%s\n",
21511 ? xcoff_private_rodata_section_name
21512 : xcoff_read_only_section_name
,
21513 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21516 /* Likewise for read-write sections. */
21519 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21521 fprintf (asm_out_file
, "\t.csect %s[RW],%s\n",
21522 xcoff_private_data_section_name
,
21523 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21527 rs6000_xcoff_output_tls_section_asm_op (const char *directive
)
21529 fprintf (asm_out_file
, "\t.csect %s[TL],%s\n",
21531 ? xcoff_private_data_section_name
21532 : xcoff_tls_data_section_name
,
21533 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21536 /* A get_unnamed_section callback, used for switching to toc_section. */
21539 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
21541 if (TARGET_MINIMAL_TOC
)
21543 /* toc_section is always selected at least once from
21544 rs6000_xcoff_file_start, so this is guaranteed to
21545 always be defined once and only once in each file. */
21546 if (!toc_initialized
)
21548 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file
);
21549 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file
);
21550 toc_initialized
= 1;
21552 fprintf (asm_out_file
, "\t.csect toc_table[RW]%s\n",
21553 (TARGET_32BIT
? "" : ",3"));
21556 fputs ("\t.toc\n", asm_out_file
);
21559 /* Implement TARGET_ASM_INIT_SECTIONS. */
21562 rs6000_xcoff_asm_init_sections (void)
21564 read_only_data_section
21565 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21568 private_data_section
21569 = get_unnamed_section (SECTION_WRITE
,
21570 rs6000_xcoff_output_readwrite_section_asm_op
,
21573 read_only_private_data_section
21574 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21578 = get_unnamed_section (SECTION_TLS
,
21579 rs6000_xcoff_output_tls_section_asm_op
,
21582 tls_private_data_section
21583 = get_unnamed_section (SECTION_TLS
,
21584 rs6000_xcoff_output_tls_section_asm_op
,
21588 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op
, NULL
);
21590 readonly_data_section
= read_only_data_section
;
21594 rs6000_xcoff_reloc_rw_mask (void)
21600 rs6000_xcoff_asm_named_section (const char *name
, unsigned int flags
,
21601 tree decl ATTRIBUTE_UNUSED
)
21604 static const char * const suffix
[7]
21605 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21607 if (flags
& SECTION_EXCLUDE
)
21609 else if (flags
& SECTION_DEBUG
)
21611 fprintf (asm_out_file
, "\t.dwsect %s\n", name
);
21614 else if (flags
& SECTION_CODE
)
21616 else if (flags
& SECTION_TLS
)
21618 if (flags
& SECTION_BSS
)
21623 else if (flags
& SECTION_WRITE
)
21625 if (flags
& SECTION_BSS
)
21633 fprintf (asm_out_file
, "\t.csect %s%s[%s],%u\n",
21634 (flags
& SECTION_CODE
) ? "." : "",
21635 name
, suffix
[smclass
], flags
& SECTION_ENTSIZE
);
21638 #define IN_NAMED_SECTION(DECL) \
21639 ((TREE_CODE (DECL) == FUNCTION_DECL || VAR_P (DECL)) \
21640 && DECL_SECTION_NAME (DECL) != NULL)
21643 rs6000_xcoff_select_section (tree decl
, int reloc
,
21644 unsigned HOST_WIDE_INT align
)
21646 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21648 if (align
> BIGGEST_ALIGNMENT
&& VAR_OR_FUNCTION_DECL_P (decl
))
21650 resolve_unique_section (decl
, reloc
, true);
21651 if (IN_NAMED_SECTION (decl
))
21652 return get_named_section (decl
, NULL
, reloc
);
21655 if (decl_readonly_section (decl
, reloc
))
21657 if (TREE_PUBLIC (decl
))
21658 return read_only_data_section
;
21660 return read_only_private_data_section
;
21665 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
21667 if (bss_initializer_p (decl
))
21668 return tls_comm_section
;
21669 else if (TREE_PUBLIC (decl
))
21670 return tls_data_section
;
21672 return tls_private_data_section
;
21676 if (TREE_PUBLIC (decl
))
21677 return data_section
;
21679 return private_data_section
;
21684 rs6000_xcoff_unique_section (tree decl
, int reloc ATTRIBUTE_UNUSED
)
21688 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
21689 name
= (*targetm
.strip_name_encoding
) (name
);
21690 set_decl_section_name (decl
, name
);
21693 /* Select section for constant in constant pool.
21695 On RS/6000, all constants are in the private read-only data area.
21696 However, if this is being placed in the TOC it must be output as a
21700 rs6000_xcoff_select_rtx_section (machine_mode mode
, rtx x
,
21701 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
21703 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
21704 return toc_section
;
21706 return read_only_private_data_section
;
21709 /* Remove any trailing [DS] or the like from the symbol name. */
21711 static const char *
21712 rs6000_xcoff_strip_name_encoding (const char *name
)
21717 len
= strlen (name
);
21718 if (name
[len
- 1] == ']')
21719 return ggc_alloc_string (name
, len
- 4);
21724 /* Section attributes. AIX is always PIC. */
21726 static unsigned int
21727 rs6000_xcoff_section_type_flags (tree decl
, const char *name
, int reloc
)
21729 unsigned int align
;
21730 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
21732 if (decl
&& DECL_P (decl
) && VAR_P (decl
) && bss_initializer_p (decl
))
21733 flags
|= SECTION_BSS
;
21735 /* Align to at least UNIT size. */
21736 if (!decl
|| !DECL_P (decl
))
21737 align
= MIN_UNITS_PER_WORD
;
21738 /* Align code CSECT to at least 32 bytes. */
21739 else if ((flags
& SECTION_CODE
) != 0)
21740 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
), 32);
21742 /* Increase alignment of large objects if not already stricter. */
21743 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
),
21744 int_size_in_bytes (TREE_TYPE (decl
)) > MIN_UNITS_PER_WORD
21745 ? UNITS_PER_FP_WORD
: MIN_UNITS_PER_WORD
);
21747 return flags
| (exact_log2 (align
) & SECTION_ENTSIZE
);
21750 /* Output at beginning of assembler file.
21752 Initialize the section names for the RS/6000 at this point.
21754 Specify filename, including full path, to assembler.
21756 We want to go into the TOC section so at least one .toc will be emitted.
21757 Also, in order to output proper .bs/.es pairs, we need at least one static
21758 [RW] section emitted.
21760 Finally, declare mcount when profiling to make the assembler happy. */
21763 rs6000_xcoff_file_start (void)
21765 rs6000_gen_section_name (&xcoff_bss_section_name
,
21766 main_input_filename
, ".bss_");
21767 rs6000_gen_section_name (&xcoff_private_data_section_name
,
21768 main_input_filename
, ".rw_");
21769 rs6000_gen_section_name (&xcoff_private_rodata_section_name
,
21770 main_input_filename
, ".rop_");
21771 rs6000_gen_section_name (&xcoff_read_only_section_name
,
21772 main_input_filename
, ".ro_");
21773 rs6000_gen_section_name (&xcoff_tls_data_section_name
,
21774 main_input_filename
, ".tls_");
21776 fputs ("\t.file\t", asm_out_file
);
21777 output_quoted_string (asm_out_file
, main_input_filename
);
21778 fputc ('\n', asm_out_file
);
21779 if (write_symbols
!= NO_DEBUG
)
21780 switch_to_section (private_data_section
);
21781 switch_to_section (toc_section
);
21782 switch_to_section (text_section
);
21784 fprintf (asm_out_file
, "\t.extern %s\n", RS6000_MCOUNT
);
21785 rs6000_file_start ();
21788 /* Output at end of assembler file.
21789 On the RS/6000, referencing data should automatically pull in text. */
21792 rs6000_xcoff_file_end (void)
21794 switch_to_section (text_section
);
21795 if (xcoff_tls_exec_model_detected
)
21797 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21798 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file
);
21800 fputs ("_section_.text:\n", asm_out_file
);
21801 switch_to_section (data_section
);
21802 fputs (TARGET_32BIT
21803 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21808 struct declare_alias_data
21811 bool function_descriptor
;
21814 /* Declare alias N. A helper function for for_node_and_aliases. */
21817 rs6000_declare_alias (struct symtab_node
*n
, void *d
)
21819 struct declare_alias_data
*data
= (struct declare_alias_data
*)d
;
21820 /* Main symbol is output specially, because varasm machinery does part of
21821 the job for us - we do not need to declare .globl/lglobs and such. */
21822 if (!n
->alias
|| n
->weakref
)
21825 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n
->decl
)))
21828 /* Prevent assemble_alias from trying to use .set pseudo operation
21829 that does not behave as expected by the middle-end. */
21830 TREE_ASM_WRITTEN (n
->decl
) = true;
21832 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
));
21833 char *buffer
= (char *) alloca (strlen (name
) + 2);
21835 int dollar_inside
= 0;
21837 strcpy (buffer
, name
);
21838 p
= strchr (buffer
, '$');
21842 p
= strchr (p
+ 1, '$');
21844 if (TREE_PUBLIC (n
->decl
))
21846 if (!RS6000_WEAK
|| !DECL_WEAK (n
->decl
))
21848 if (dollar_inside
) {
21849 if (data
->function_descriptor
)
21850 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21851 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21853 if (data
->function_descriptor
)
21855 fputs ("\t.globl .", data
->file
);
21856 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21857 putc ('\n', data
->file
);
21859 fputs ("\t.globl ", data
->file
);
21860 assemble_name (data
->file
, buffer
);
21861 putc ('\n', data
->file
);
21863 #ifdef ASM_WEAKEN_DECL
21864 else if (DECL_WEAK (n
->decl
) && !data
->function_descriptor
)
21865 ASM_WEAKEN_DECL (data
->file
, n
->decl
, name
, NULL
);
21872 if (data
->function_descriptor
)
21873 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21874 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21876 if (data
->function_descriptor
)
21878 fputs ("\t.lglobl .", data
->file
);
21879 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21880 putc ('\n', data
->file
);
21882 fputs ("\t.lglobl ", data
->file
);
21883 assemble_name (data
->file
, buffer
);
21884 putc ('\n', data
->file
);
21886 if (data
->function_descriptor
)
21887 putc ('.', data
->file
);
21888 ASM_OUTPUT_LABEL (data
->file
, buffer
);
21893 #ifdef HAVE_GAS_HIDDEN
21894 /* Helper function to calculate visibility of a DECL
21895 and return the value as a const string. */
21897 static const char *
21898 rs6000_xcoff_visibility (tree decl
)
21900 static const char * const visibility_types
[] = {
21901 "", ",protected", ",hidden", ",internal"
21904 enum symbol_visibility vis
= DECL_VISIBILITY (decl
);
21905 return visibility_types
[vis
];
21910 /* This macro produces the initial definition of a function name.
21911 On the RS/6000, we need to place an extra '.' in the function name and
21912 output the function descriptor.
21913 Dollar signs are converted to underscores.
21915 The csect for the function will have already been created when
21916 text_section was selected. We do have to go back to that csect, however.
21918 The third and fourth parameters to the .function pseudo-op (16 and 044)
21919 are placeholders which no longer have any use.
21921 Because AIX assembler's .set command has unexpected semantics, we output
21922 all aliases as alternative labels in front of the definition. */
21925 rs6000_xcoff_declare_function_name (FILE *file
, const char *name
, tree decl
)
21927 char *buffer
= (char *) alloca (strlen (name
) + 1);
21929 int dollar_inside
= 0;
21930 struct declare_alias_data data
= {file
, false};
21932 strcpy (buffer
, name
);
21933 p
= strchr (buffer
, '$');
21937 p
= strchr (p
+ 1, '$');
21939 if (TREE_PUBLIC (decl
))
21941 if (!RS6000_WEAK
|| !DECL_WEAK (decl
))
21943 if (dollar_inside
) {
21944 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21945 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21947 fputs ("\t.globl .", file
);
21948 RS6000_OUTPUT_BASENAME (file
, buffer
);
21949 #ifdef HAVE_GAS_HIDDEN
21950 fputs (rs6000_xcoff_visibility (decl
), file
);
21957 if (dollar_inside
) {
21958 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21959 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21961 fputs ("\t.lglobl .", file
);
21962 RS6000_OUTPUT_BASENAME (file
, buffer
);
21966 fputs ("\t.csect ", file
);
21967 assemble_name (file
, buffer
);
21968 fputs (TARGET_32BIT
? "\n" : ",3\n", file
);
21970 ASM_OUTPUT_FUNCTION_LABEL (file
, buffer
, decl
);
21972 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21974 fputs (TARGET_32BIT
? "\t.long ." : "\t.llong .", file
);
21975 RS6000_OUTPUT_BASENAME (file
, buffer
);
21976 fputs (", TOC[tc0], 0\n", file
);
21979 switch_to_section (function_section (decl
));
21981 ASM_OUTPUT_LABEL (file
, buffer
);
21983 data
.function_descriptor
= true;
21984 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21986 if (!DECL_IGNORED_P (decl
))
21988 if (dwarf_debuginfo_p ())
21990 name
= (*targetm
.strip_name_encoding
) (name
);
21991 fprintf (file
, "\t.function .%s,.%s,2,0\n", name
, name
);
21998 /* Output assembly language to globalize a symbol from a DECL,
21999 possibly with visibility. */
22002 rs6000_xcoff_asm_globalize_decl_name (FILE *stream
, tree decl
)
22004 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
22005 fputs (GLOBAL_ASM_OP
, stream
);
22006 assemble_name (stream
, name
);
22007 #ifdef HAVE_GAS_HIDDEN
22008 fputs (rs6000_xcoff_visibility (decl
), stream
);
22010 putc ('\n', stream
);
22013 /* Output assembly language to define a symbol as COMMON from a DECL,
22014 possibly with visibility. */
22017 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream
,
22018 tree decl ATTRIBUTE_UNUSED
,
22020 unsigned HOST_WIDE_INT size
,
22021 unsigned int align
)
22023 unsigned int align2
= 2;
22026 align
= DATA_ABI_ALIGNMENT (TREE_TYPE (decl
), DECL_ALIGN (decl
));
22029 align2
= floor_log2 (align
/ BITS_PER_UNIT
);
22033 if (! DECL_COMMON (decl
))
22035 /* Forget section. */
22038 /* Globalize TLS BSS. */
22039 if (TREE_PUBLIC (decl
) && DECL_THREAD_LOCAL_P (decl
))
22041 fputs (GLOBAL_ASM_OP
, stream
);
22042 assemble_name (stream
, name
);
22043 fputc ('\n', stream
);
22046 /* Switch to section and skip space. */
22047 fputs ("\t.csect ", stream
);
22048 assemble_name (stream
, name
);
22049 fprintf (stream
, ",%u\n", align2
);
22050 ASM_DECLARE_OBJECT_NAME (stream
, name
, decl
);
22051 ASM_OUTPUT_SKIP (stream
, size
? size
: 1);
22055 if (TREE_PUBLIC (decl
))
22058 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%u" ,
22059 name
, size
, align2
);
22061 #ifdef HAVE_GAS_HIDDEN
22063 fputs (rs6000_xcoff_visibility (decl
), stream
);
22065 putc ('\n', stream
);
22069 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%s,%u\n",
22070 (*targetm
.strip_name_encoding
) (name
), size
, name
, align2
);
22073 /* This macro produces the initial definition of a object (variable) name.
22074 Because AIX assembler's .set command has unexpected semantics, we output
22075 all aliases as alternative labels in front of the definition. */
22078 rs6000_xcoff_declare_object_name (FILE *file
, const char *name
, tree decl
)
22080 struct declare_alias_data data
= {file
, false};
22081 ASM_OUTPUT_LABEL (file
, name
);
22082 symtab_node::get_create (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
22086 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
22089 rs6000_asm_output_dwarf_pcrel (FILE *file
, int size
, const char *label
)
22091 fputs (integer_asm_op (size
, FALSE
), file
);
22092 assemble_name (file
, label
);
22093 fputs ("-$", file
);
22096 /* Output a symbol offset relative to the dbase for the current object.
22097 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
22100 __gcc_unwind_dbase is embedded in all executables/libraries through
22101 libgcc/config/rs6000/crtdbase.S. */
22104 rs6000_asm_output_dwarf_datarel (FILE *file
, int size
, const char *label
)
22106 fputs (integer_asm_op (size
, FALSE
), file
);
22107 assemble_name (file
, label
);
22108 fputs("-__gcc_unwind_dbase", file
);
22113 rs6000_xcoff_encode_section_info (tree decl
, rtx rtl
, int first
)
22117 const char *symname
;
22119 default_encode_section_info (decl
, rtl
, first
);
22121 /* Careful not to prod global register variables. */
22124 symbol
= XEXP (rtl
, 0);
22125 if (!SYMBOL_REF_P (symbol
))
22128 flags
= SYMBOL_REF_FLAGS (symbol
);
22130 if (VAR_P (decl
) && DECL_THREAD_LOCAL_P (decl
))
22131 flags
&= ~SYMBOL_FLAG_HAS_BLOCK_INFO
;
22133 SYMBOL_REF_FLAGS (symbol
) = flags
;
22135 symname
= XSTR (symbol
, 0);
22137 /* Append CSECT mapping class, unless the symbol already is qualified.
22138 Aliases are implemented as labels, so the symbol name should not add
22139 a mapping class. */
22142 && VAR_OR_FUNCTION_DECL_P (decl
)
22143 && (symtab_node::get (decl
) == NULL
22144 || symtab_node::get (decl
)->alias
== 0)
22145 && symname
[strlen (symname
) - 1] != ']')
22147 const char *smclass
= NULL
;
22149 if (TREE_CODE (decl
) == FUNCTION_DECL
)
22151 else if (DECL_THREAD_LOCAL_P (decl
))
22153 if (bss_initializer_p (decl
))
22155 else if (flag_data_sections
)
22158 else if (DECL_EXTERNAL (decl
))
22160 else if (bss_initializer_p (decl
))
22162 else if (flag_data_sections
)
22164 /* This must exactly match the logic of select section. */
22165 if (decl_readonly_section (decl
, compute_reloc_for_var (decl
)))
22171 if (smclass
!= NULL
)
22173 char *newname
= XALLOCAVEC (char, strlen (symname
) + 5);
22175 strcpy (newname
, symname
);
22176 strcat (newname
, smclass
);
22177 XSTR (symbol
, 0) = ggc_strdup (newname
);
22181 #endif /* HAVE_AS_TLS */
22182 #endif /* TARGET_XCOFF */
22185 rs6000_asm_weaken_decl (FILE *stream
, tree decl
,
22186 const char *name
, const char *val
)
22188 fputs ("\t.weak\t", stream
);
22189 assemble_name (stream
, name
);
22190 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
22191 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
22193 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22195 fputs (rs6000_xcoff_visibility (decl
), stream
);
22197 fputs ("\n\t.weak\t.", stream
);
22198 RS6000_OUTPUT_BASENAME (stream
, name
);
22200 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22202 fputs (rs6000_xcoff_visibility (decl
), stream
);
22204 fputc ('\n', stream
);
22208 #ifdef ASM_OUTPUT_DEF
22209 ASM_OUTPUT_DEF (stream
, name
, val
);
22211 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
22212 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
22214 fputs ("\t.set\t.", stream
);
22215 RS6000_OUTPUT_BASENAME (stream
, name
);
22216 fputs (",.", stream
);
22217 RS6000_OUTPUT_BASENAME (stream
, val
);
22218 fputc ('\n', stream
);
22224 /* Return true if INSN should not be copied. */
22227 rs6000_cannot_copy_insn_p (rtx_insn
*insn
)
22229 return recog_memoized (insn
) >= 0
22230 && get_attr_cannot_copy (insn
);
22233 /* Compute a (partial) cost for rtx X. Return true if the complete
22234 cost has been computed, and false if subexpressions should be
22235 scanned. In either case, *TOTAL contains the cost result. */
22238 rs6000_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
22239 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
22241 int code
= GET_CODE (x
);
22245 /* On the RS/6000, if it is valid in the insn, it is free. */
22247 if (((outer_code
== SET
22248 || outer_code
== PLUS
22249 || outer_code
== MINUS
)
22250 && (satisfies_constraint_I (x
)
22251 || satisfies_constraint_L (x
)))
22252 || (outer_code
== AND
22253 && (satisfies_constraint_K (x
)
22255 ? satisfies_constraint_L (x
)
22256 : satisfies_constraint_J (x
))))
22257 || ((outer_code
== IOR
|| outer_code
== XOR
)
22258 && (satisfies_constraint_K (x
)
22260 ? satisfies_constraint_L (x
)
22261 : satisfies_constraint_J (x
))))
22262 || outer_code
== ASHIFT
22263 || outer_code
== ASHIFTRT
22264 || outer_code
== LSHIFTRT
22265 || outer_code
== ROTATE
22266 || outer_code
== ROTATERT
22267 || outer_code
== ZERO_EXTRACT
22268 || (outer_code
== MULT
22269 && satisfies_constraint_I (x
))
22270 || ((outer_code
== DIV
|| outer_code
== UDIV
22271 || outer_code
== MOD
|| outer_code
== UMOD
)
22272 && exact_log2 (INTVAL (x
)) >= 0)
22273 || (outer_code
== COMPARE
22274 && (satisfies_constraint_I (x
)
22275 || satisfies_constraint_K (x
)))
22276 || ((outer_code
== EQ
|| outer_code
== NE
)
22277 && (satisfies_constraint_I (x
)
22278 || satisfies_constraint_K (x
)
22280 ? satisfies_constraint_L (x
)
22281 : satisfies_constraint_J (x
))))
22282 || (outer_code
== GTU
22283 && satisfies_constraint_I (x
))
22284 || (outer_code
== LTU
22285 && satisfies_constraint_P (x
)))
22290 else if ((outer_code
== PLUS
22291 && reg_or_add_cint_operand (x
, mode
))
22292 || (outer_code
== MINUS
22293 && reg_or_sub_cint_operand (x
, mode
))
22294 || ((outer_code
== SET
22295 || outer_code
== IOR
22296 || outer_code
== XOR
)
22298 & ~ (unsigned HOST_WIDE_INT
) 0xffffffff) == 0))
22300 *total
= COSTS_N_INSNS (1);
22306 case CONST_WIDE_INT
:
22310 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22314 /* When optimizing for size, MEM should be slightly more expensive
22315 than generating address, e.g., (plus (reg) (const)).
22316 L1 cache latency is about two instructions. */
22317 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22318 if (rs6000_slow_unaligned_access (mode
, MEM_ALIGN (x
)))
22319 *total
+= COSTS_N_INSNS (100);
22328 if (FLOAT_MODE_P (mode
))
22329 *total
= rs6000_cost
->fp
;
22331 *total
= COSTS_N_INSNS (1);
22335 if (CONST_INT_P (XEXP (x
, 1))
22336 && satisfies_constraint_I (XEXP (x
, 1)))
22338 if (INTVAL (XEXP (x
, 1)) >= -256
22339 && INTVAL (XEXP (x
, 1)) <= 255)
22340 *total
= rs6000_cost
->mulsi_const9
;
22342 *total
= rs6000_cost
->mulsi_const
;
22344 else if (mode
== SFmode
)
22345 *total
= rs6000_cost
->fp
;
22346 else if (FLOAT_MODE_P (mode
))
22347 *total
= rs6000_cost
->dmul
;
22348 else if (mode
== DImode
)
22349 *total
= rs6000_cost
->muldi
;
22351 *total
= rs6000_cost
->mulsi
;
22355 if (mode
== SFmode
)
22356 *total
= rs6000_cost
->fp
;
22358 *total
= rs6000_cost
->dmul
;
22363 if (FLOAT_MODE_P (mode
))
22365 *total
= mode
== DFmode
? rs6000_cost
->ddiv
22366 : rs6000_cost
->sdiv
;
22373 if (CONST_INT_P (XEXP (x
, 1))
22374 && exact_log2 (INTVAL (XEXP (x
, 1))) >= 0)
22376 if (code
== DIV
|| code
== MOD
)
22378 *total
= COSTS_N_INSNS (2);
22381 *total
= COSTS_N_INSNS (1);
22385 if (GET_MODE (XEXP (x
, 1)) == DImode
)
22386 *total
= rs6000_cost
->divdi
;
22388 *total
= rs6000_cost
->divsi
;
22390 /* Add in shift and subtract for MOD unless we have a mod instruction. */
22391 if ((!TARGET_MODULO
22392 || (RS6000_DISABLE_SCALAR_MODULO
&& SCALAR_INT_MODE_P (mode
)))
22393 && (code
== MOD
|| code
== UMOD
))
22394 *total
+= COSTS_N_INSNS (2);
22398 *total
= COSTS_N_INSNS (TARGET_CTZ
? 1 : 4);
22402 *total
= COSTS_N_INSNS (4);
22406 *total
= COSTS_N_INSNS (TARGET_POPCNTD
? 1 : 6);
22410 *total
= COSTS_N_INSNS (TARGET_CMPB
? 2 : 6);
22414 if (outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
22417 *total
= COSTS_N_INSNS (1);
22421 if (CONST_INT_P (XEXP (x
, 1)))
22423 rtx left
= XEXP (x
, 0);
22424 rtx_code left_code
= GET_CODE (left
);
22426 /* rotate-and-mask: 1 insn. */
22427 if ((left_code
== ROTATE
22428 || left_code
== ASHIFT
22429 || left_code
== LSHIFTRT
)
22430 && rs6000_is_valid_shift_mask (XEXP (x
, 1), left
, mode
))
22432 *total
= rtx_cost (XEXP (left
, 0), mode
, left_code
, 0, speed
);
22433 if (!CONST_INT_P (XEXP (left
, 1)))
22434 *total
+= rtx_cost (XEXP (left
, 1), SImode
, left_code
, 1, speed
);
22435 *total
+= COSTS_N_INSNS (1);
22439 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22440 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
22441 if (rs6000_is_valid_and_mask (XEXP (x
, 1), mode
)
22442 || (val
& 0xffff) == val
22443 || (val
& 0xffff0000) == val
22444 || ((val
& 0xffff) == 0 && mode
== SImode
))
22446 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
22447 *total
+= COSTS_N_INSNS (1);
22452 if (rs6000_is_valid_2insn_and (XEXP (x
, 1), mode
))
22454 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
22455 *total
+= COSTS_N_INSNS (2);
22460 *total
= COSTS_N_INSNS (1);
22465 *total
= COSTS_N_INSNS (1);
22471 *total
= COSTS_N_INSNS (1);
22475 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22476 the sign extend and shift separately within the insn. */
22477 if (TARGET_EXTSWSLI
&& mode
== DImode
22478 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
22479 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
)
22490 /* Handle mul_highpart. */
22491 if (outer_code
== TRUNCATE
22492 && GET_CODE (XEXP (x
, 0)) == MULT
)
22494 if (mode
== DImode
)
22495 *total
= rs6000_cost
->muldi
;
22497 *total
= rs6000_cost
->mulsi
;
22500 else if (outer_code
== AND
)
22503 *total
= COSTS_N_INSNS (1);
22508 if (MEM_P (XEXP (x
, 0)))
22511 *total
= COSTS_N_INSNS (1);
22517 if (!FLOAT_MODE_P (mode
))
22519 *total
= COSTS_N_INSNS (1);
22525 case UNSIGNED_FLOAT
:
22528 case FLOAT_TRUNCATE
:
22529 *total
= rs6000_cost
->fp
;
22533 if (mode
== DFmode
)
22534 *total
= rs6000_cost
->sfdf_convert
;
22536 *total
= rs6000_cost
->fp
;
22543 *total
= COSTS_N_INSNS (1);
22546 else if (FLOAT_MODE_P (mode
) && TARGET_PPC_GFXOPT
&& TARGET_HARD_FLOAT
)
22548 *total
= rs6000_cost
->fp
;
22557 /* Carry bit requires mode == Pmode.
22558 NEG or PLUS already counted so only add one. */
22560 && (outer_code
== NEG
|| outer_code
== PLUS
))
22562 *total
= COSTS_N_INSNS (1);
22570 if (outer_code
== SET
)
22572 if (XEXP (x
, 1) == const0_rtx
)
22574 *total
= COSTS_N_INSNS (2);
22579 *total
= COSTS_N_INSNS (3);
22584 if (outer_code
== COMPARE
)
22592 if (XINT (x
, 1) == UNSPECV_MMA_XXSETACCZ
)
22606 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22609 rs6000_debug_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
22610 int opno
, int *total
, bool speed
)
22612 bool ret
= rs6000_rtx_costs (x
, mode
, outer_code
, opno
, total
, speed
);
22615 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22616 "opno = %d, total = %d, speed = %s, x:\n",
22617 ret
? "complete" : "scan inner",
22618 GET_MODE_NAME (mode
),
22619 GET_RTX_NAME (outer_code
),
22622 speed
? "true" : "false");
22630 rs6000_insn_cost (rtx_insn
*insn
, bool speed
)
22632 if (recog_memoized (insn
) < 0)
22635 /* If we are optimizing for size, just use the length. */
22637 return get_attr_length (insn
);
22639 /* Use the cost if provided. */
22640 int cost
= get_attr_cost (insn
);
22644 /* If the insn tells us how many insns there are, use that. Otherwise use
22645 the length/4. Adjust the insn length to remove the extra size that
22646 prefixed instructions take. */
22647 int n
= get_attr_num_insns (insn
);
22650 int length
= get_attr_length (insn
);
22651 if (get_attr_prefixed (insn
) == PREFIXED_YES
)
22654 ADJUST_INSN_LENGTH (insn
, adjust
);
22661 enum attr_type type
= get_attr_type (insn
);
22668 cost
= COSTS_N_INSNS (n
+ 1);
22672 switch (get_attr_size (insn
))
22675 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const9
;
22678 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const
;
22681 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi
;
22684 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->muldi
;
22687 gcc_unreachable ();
22691 switch (get_attr_size (insn
))
22694 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divsi
;
22697 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divdi
;
22700 gcc_unreachable ();
22705 cost
= n
* rs6000_cost
->fp
;
22708 cost
= n
* rs6000_cost
->dmul
;
22711 cost
= n
* rs6000_cost
->sdiv
;
22714 cost
= n
* rs6000_cost
->ddiv
;
22721 cost
= COSTS_N_INSNS (n
+ 2);
22725 cost
= COSTS_N_INSNS (n
);
22731 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22734 rs6000_debug_address_cost (rtx x
, machine_mode mode
,
22735 addr_space_t as
, bool speed
)
22737 int ret
= TARGET_ADDRESS_COST (x
, mode
, as
, speed
);
22739 fprintf (stderr
, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22740 ret
, speed
? "true" : "false");
22747 /* A C expression returning the cost of moving data from a register of class
22748 CLASS1 to one of CLASS2. */
22751 rs6000_register_move_cost (machine_mode mode
,
22752 reg_class_t from
, reg_class_t to
)
22755 reg_class_t rclass
;
22757 if (TARGET_DEBUG_COST
)
22760 /* If we have VSX, we can easily move between FPR or Altivec registers,
22761 otherwise we can only easily move within classes.
22762 Do this first so we give best-case answers for union classes
22763 containing both gprs and vsx regs. */
22764 HARD_REG_SET to_vsx
, from_vsx
;
22765 to_vsx
= reg_class_contents
[to
] & reg_class_contents
[VSX_REGS
];
22766 from_vsx
= reg_class_contents
[from
] & reg_class_contents
[VSX_REGS
];
22767 if (!hard_reg_set_empty_p (to_vsx
)
22768 && !hard_reg_set_empty_p (from_vsx
)
22770 || hard_reg_set_intersect_p (to_vsx
, from_vsx
)))
22772 int reg
= FIRST_FPR_REGNO
;
22774 || (TEST_HARD_REG_BIT (to_vsx
, FIRST_ALTIVEC_REGNO
)
22775 && TEST_HARD_REG_BIT (from_vsx
, FIRST_ALTIVEC_REGNO
)))
22776 reg
= FIRST_ALTIVEC_REGNO
;
22777 ret
= 2 * hard_regno_nregs (reg
, mode
);
22780 /* Moves from/to GENERAL_REGS. */
22781 else if ((rclass
= from
, reg_classes_intersect_p (to
, GENERAL_REGS
))
22782 || (rclass
= to
, reg_classes_intersect_p (from
, GENERAL_REGS
)))
22784 if (rclass
== FLOAT_REGS
|| rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
22786 if (TARGET_DIRECT_MOVE
)
22788 /* Keep the cost for direct moves above that for within
22789 a register class even if the actual processor cost is
22790 comparable. We do this because a direct move insn
22791 can't be a nop, whereas with ideal register
22792 allocation a move within the same class might turn
22793 out to be a nop. */
22794 if (rs6000_tune
== PROCESSOR_POWER9
22795 || rs6000_tune
== PROCESSOR_POWER10
22796 || rs6000_tune
== PROCESSOR_POWER11
)
22797 ret
= 3 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22799 ret
= 4 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22800 /* SFmode requires a conversion when moving between gprs
22802 if (mode
== SFmode
)
22806 ret
= (rs6000_memory_move_cost (mode
, rclass
, false)
22807 + rs6000_memory_move_cost (mode
, GENERAL_REGS
, false));
22810 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22812 else if (rclass
== CR_REGS
)
22815 /* For those processors that have slow LR/CTR moves, make them more
22816 expensive than memory in order to bias spills to memory .*/
22817 else if ((rs6000_tune
== PROCESSOR_POWER6
22818 || rs6000_tune
== PROCESSOR_POWER7
22819 || rs6000_tune
== PROCESSOR_POWER8
22820 || rs6000_tune
== PROCESSOR_POWER9
)
22821 && reg_class_subset_p (rclass
, SPECIAL_REGS
))
22822 ret
= 6 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22825 /* A move will cost one instruction per GPR moved. */
22826 ret
= 2 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22829 /* Everything else has to go through GENERAL_REGS. */
22831 ret
= (rs6000_register_move_cost (mode
, GENERAL_REGS
, to
)
22832 + rs6000_register_move_cost (mode
, from
, GENERAL_REGS
));
22834 if (TARGET_DEBUG_COST
)
22836 if (dbg_cost_ctrl
== 1)
22838 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22839 ret
, GET_MODE_NAME (mode
), reg_class_names
[from
],
22840 reg_class_names
[to
]);
22847 /* A C expressions returning the cost of moving data of MODE from a register to
22851 rs6000_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
22852 bool in ATTRIBUTE_UNUSED
)
22856 if (TARGET_DEBUG_COST
)
22859 if (reg_classes_intersect_p (rclass
, GENERAL_REGS
))
22860 ret
= 4 * hard_regno_nregs (0, mode
);
22861 else if ((reg_classes_intersect_p (rclass
, FLOAT_REGS
)
22862 || reg_classes_intersect_p (rclass
, VSX_REGS
)))
22863 ret
= 4 * hard_regno_nregs (32, mode
);
22864 else if (reg_classes_intersect_p (rclass
, ALTIVEC_REGS
))
22865 ret
= 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO
, mode
);
22867 ret
= 4 + rs6000_register_move_cost (mode
, rclass
, GENERAL_REGS
);
22869 if (TARGET_DEBUG_COST
)
22871 if (dbg_cost_ctrl
== 1)
22873 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22874 ret
, GET_MODE_NAME (mode
), reg_class_names
[rclass
], in
);
22881 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22883 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22884 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22885 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22886 move cost between GENERAL_REGS and VSX_REGS low.
22888 It might seem reasonable to use a union class. After all, if usage
22889 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22890 rather than memory. However, in cases where register pressure of
22891 both is high, like the cactus_adm spec test, allowing
22892 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22893 the first scheduling pass. This is partly due to an allocno of
22894 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22895 class, which gives too high a pressure for GENERAL_REGS and too low
22896 for VSX_REGS. So, force a choice of the subclass here.
22898 The best class is also the union if GENERAL_REGS and VSX_REGS have
22899 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22900 allocno class, since trying to narrow down the class by regno mode
22901 is prone to error. For example, SImode is allowed in VSX regs and
22902 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22903 it would be wrong to choose an allocno of GENERAL_REGS based on
22907 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED
,
22908 reg_class_t allocno_class
,
22909 reg_class_t best_class
)
22911 switch (allocno_class
)
22913 case GEN_OR_VSX_REGS
:
22914 /* best_class must be a subset of allocno_class. */
22915 gcc_checking_assert (best_class
== GEN_OR_VSX_REGS
22916 || best_class
== GEN_OR_FLOAT_REGS
22917 || best_class
== VSX_REGS
22918 || best_class
== ALTIVEC_REGS
22919 || best_class
== FLOAT_REGS
22920 || best_class
== GENERAL_REGS
22921 || best_class
== BASE_REGS
);
22922 /* Use best_class but choose wider classes when copying from the
22923 wider class to best_class is cheap. This mimics IRA choice
22924 of allocno class. */
22925 if (best_class
== BASE_REGS
)
22926 return GENERAL_REGS
;
22927 if (TARGET_VSX
&& best_class
== FLOAT_REGS
)
22932 if (best_class
== ALTIVEC_REGS
)
22933 return ALTIVEC_REGS
;
22939 return allocno_class
;
22942 /* Load up a constant. If the mode is a vector mode, splat the value across
22943 all of the vector elements. */
22946 rs6000_load_constant_and_splat (machine_mode mode
, REAL_VALUE_TYPE dconst
)
22950 if (mode
== SFmode
|| mode
== DFmode
)
22952 rtx d
= const_double_from_real_value (dconst
, mode
);
22953 reg
= force_reg (mode
, d
);
22955 else if (mode
== V4SFmode
)
22957 rtx d
= const_double_from_real_value (dconst
, SFmode
);
22958 rtvec v
= gen_rtvec (4, d
, d
, d
, d
);
22959 reg
= gen_reg_rtx (mode
);
22960 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22962 else if (mode
== V2DFmode
)
22964 rtx d
= const_double_from_real_value (dconst
, DFmode
);
22965 rtvec v
= gen_rtvec (2, d
, d
);
22966 reg
= gen_reg_rtx (mode
);
22967 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22970 gcc_unreachable ();
22975 /* Generate an FMA instruction. */
22978 rs6000_emit_madd (rtx target
, rtx m1
, rtx m2
, rtx a
)
22980 machine_mode mode
= GET_MODE (target
);
22983 dst
= expand_ternary_op (mode
, fma_optab
, m1
, m2
, a
, target
, 0);
22984 gcc_assert (dst
!= NULL
);
22987 emit_move_insn (target
, dst
);
22990 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22993 rs6000_emit_nmsub (rtx dst
, rtx m1
, rtx m2
, rtx a
)
22995 machine_mode mode
= GET_MODE (dst
);
22998 /* This is a tad more complicated, since the fnma_optab is for
22999 a different expression: fma(-m1, m2, a), which is the same
23000 thing except in the case of signed zeros.
23002 Fortunately we know that if FMA is supported that FNMSUB is
23003 also supported in the ISA. Just expand it directly. */
23005 gcc_assert (optab_handler (fma_optab
, mode
) != CODE_FOR_nothing
);
23007 r
= gen_rtx_NEG (mode
, a
);
23008 r
= gen_rtx_FMA (mode
, m1
, m2
, r
);
23009 r
= gen_rtx_NEG (mode
, r
);
23010 emit_insn (gen_rtx_SET (dst
, r
));
23013 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
23014 add a reg_note saying that this was a division. Support both scalar and
23015 vector divide. Assumes no trapping math and finite arguments. */
23018 rs6000_emit_swdiv (rtx dst
, rtx n
, rtx d
, bool note_p
)
23020 machine_mode mode
= GET_MODE (dst
);
23021 rtx one
, x0
, e0
, x1
, xprev
, eprev
, xnext
, enext
, u
, v
;
23024 /* Low precision estimates guarantee 5 bits of accuracy. High
23025 precision estimates guarantee 14 bits of accuracy. SFmode
23026 requires 23 bits of accuracy. DFmode requires 52 bits of
23027 accuracy. Each pass at least doubles the accuracy, leading
23028 to the following. */
23029 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
23030 if (mode
== DFmode
|| mode
== V2DFmode
)
23033 enum insn_code code
= optab_handler (smul_optab
, mode
);
23034 insn_gen_fn gen_mul
= GEN_FCN (code
);
23036 gcc_assert (code
!= CODE_FOR_nothing
);
23038 one
= rs6000_load_constant_and_splat (mode
, dconst1
);
23040 /* x0 = 1./d estimate */
23041 x0
= gen_reg_rtx (mode
);
23042 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, d
),
23045 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
23048 /* e0 = 1. - d * x0 */
23049 e0
= gen_reg_rtx (mode
);
23050 rs6000_emit_nmsub (e0
, d
, x0
, one
);
23052 /* x1 = x0 + e0 * x0 */
23053 x1
= gen_reg_rtx (mode
);
23054 rs6000_emit_madd (x1
, e0
, x0
, x0
);
23056 for (i
= 0, xprev
= x1
, eprev
= e0
; i
< passes
- 2;
23057 ++i
, xprev
= xnext
, eprev
= enext
) {
23059 /* enext = eprev * eprev */
23060 enext
= gen_reg_rtx (mode
);
23061 emit_insn (gen_mul (enext
, eprev
, eprev
));
23063 /* xnext = xprev + enext * xprev */
23064 xnext
= gen_reg_rtx (mode
);
23065 rs6000_emit_madd (xnext
, enext
, xprev
, xprev
);
23071 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
23073 /* u = n * xprev */
23074 u
= gen_reg_rtx (mode
);
23075 emit_insn (gen_mul (u
, n
, xprev
));
23077 /* v = n - (d * u) */
23078 v
= gen_reg_rtx (mode
);
23079 rs6000_emit_nmsub (v
, d
, u
, n
);
23081 /* dst = (v * xprev) + u */
23082 rs6000_emit_madd (dst
, v
, xprev
, u
);
23085 add_reg_note (get_last_insn (), REG_EQUAL
, gen_rtx_DIV (mode
, n
, d
));
23088 /* Goldschmidt's Algorithm for single/double-precision floating point
23089 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
23092 rs6000_emit_swsqrt (rtx dst
, rtx src
, bool recip
)
23094 machine_mode mode
= GET_MODE (src
);
23095 rtx e
= gen_reg_rtx (mode
);
23096 rtx g
= gen_reg_rtx (mode
);
23097 rtx h
= gen_reg_rtx (mode
);
23099 /* Low precision estimates guarantee 5 bits of accuracy. High
23100 precision estimates guarantee 14 bits of accuracy. SFmode
23101 requires 23 bits of accuracy. DFmode requires 52 bits of
23102 accuracy. Each pass at least doubles the accuracy, leading
23103 to the following. */
23104 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
23105 if (mode
== DFmode
|| mode
== V2DFmode
)
23110 enum insn_code code
= optab_handler (smul_optab
, mode
);
23111 insn_gen_fn gen_mul
= GEN_FCN (code
);
23113 gcc_assert (code
!= CODE_FOR_nothing
);
23115 mhalf
= rs6000_load_constant_and_splat (mode
, dconsthalf
);
23117 /* e = rsqrt estimate */
23118 emit_insn (gen_rtx_SET (e
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, src
),
23121 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
23124 rtx zero
= force_reg (mode
, CONST0_RTX (mode
));
23126 if (mode
== SFmode
)
23128 rtx target
= emit_conditional_move (e
, { GT
, src
, zero
, mode
},
23131 emit_move_insn (e
, target
);
23135 rtx cond
= gen_rtx_GT (VOIDmode
, e
, zero
);
23136 rs6000_emit_vector_cond_expr (e
, e
, zero
, cond
, src
, zero
);
23140 /* g = sqrt estimate. */
23141 emit_insn (gen_mul (g
, e
, src
));
23142 /* h = 1/(2*sqrt) estimate. */
23143 emit_insn (gen_mul (h
, e
, mhalf
));
23149 rtx t
= gen_reg_rtx (mode
);
23150 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
23151 /* Apply correction directly to 1/rsqrt estimate. */
23152 rs6000_emit_madd (dst
, e
, t
, e
);
23156 for (i
= 0; i
< passes
; i
++)
23158 rtx t1
= gen_reg_rtx (mode
);
23159 rtx g1
= gen_reg_rtx (mode
);
23160 rtx h1
= gen_reg_rtx (mode
);
23162 rs6000_emit_nmsub (t1
, g
, h
, mhalf
);
23163 rs6000_emit_madd (g1
, g
, t1
, g
);
23164 rs6000_emit_madd (h1
, h
, t1
, h
);
23169 /* Multiply by 2 for 1/rsqrt. */
23170 emit_insn (gen_add3_insn (dst
, h
, h
));
23175 rtx t
= gen_reg_rtx (mode
);
23176 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
23177 rs6000_emit_madd (dst
, g
, t
, g
);
23183 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
23184 (Power7) targets. DST is the target, and SRC is the argument operand. */
23187 rs6000_emit_popcount (rtx dst
, rtx src
)
23189 machine_mode mode
= GET_MODE (dst
);
23192 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
23193 if (TARGET_POPCNTD
)
23195 if (mode
== SImode
)
23196 emit_insn (gen_popcntdsi2 (dst
, src
));
23198 emit_insn (gen_popcntddi2 (dst
, src
));
23202 tmp1
= gen_reg_rtx (mode
);
23204 if (mode
== SImode
)
23206 emit_insn (gen_popcntbsi2 (tmp1
, src
));
23207 tmp2
= expand_mult (SImode
, tmp1
, GEN_INT (0x01010101),
23209 tmp2
= force_reg (SImode
, tmp2
);
23210 emit_insn (gen_lshrsi3 (dst
, tmp2
, GEN_INT (24)));
23214 emit_insn (gen_popcntbdi2 (tmp1
, src
));
23215 tmp2
= expand_mult (DImode
, tmp1
,
23216 GEN_INT ((HOST_WIDE_INT
)
23217 0x01010101 << 32 | 0x01010101),
23219 tmp2
= force_reg (DImode
, tmp2
);
23220 emit_insn (gen_lshrdi3 (dst
, tmp2
, GEN_INT (56)));
23225 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
23226 target, and SRC is the argument operand. */
23229 rs6000_emit_parity (rtx dst
, rtx src
)
23231 machine_mode mode
= GET_MODE (dst
);
23234 tmp
= gen_reg_rtx (mode
);
23236 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
23239 if (mode
== SImode
)
23241 emit_insn (gen_popcntbsi2 (tmp
, src
));
23242 emit_insn (gen_paritysi2_cmpb (dst
, tmp
));
23246 emit_insn (gen_popcntbdi2 (tmp
, src
));
23247 emit_insn (gen_paritydi2_cmpb (dst
, tmp
));
23252 if (mode
== SImode
)
23254 /* Is mult+shift >= shift+xor+shift+xor? */
23255 if (rs6000_cost
->mulsi_const
>= COSTS_N_INSNS (3))
23257 rtx tmp1
, tmp2
, tmp3
, tmp4
;
23259 tmp1
= gen_reg_rtx (SImode
);
23260 emit_insn (gen_popcntbsi2 (tmp1
, src
));
23262 tmp2
= gen_reg_rtx (SImode
);
23263 emit_insn (gen_lshrsi3 (tmp2
, tmp1
, GEN_INT (16)));
23264 tmp3
= gen_reg_rtx (SImode
);
23265 emit_insn (gen_xorsi3 (tmp3
, tmp1
, tmp2
));
23267 tmp4
= gen_reg_rtx (SImode
);
23268 emit_insn (gen_lshrsi3 (tmp4
, tmp3
, GEN_INT (8)));
23269 emit_insn (gen_xorsi3 (tmp
, tmp3
, tmp4
));
23272 rs6000_emit_popcount (tmp
, src
);
23273 emit_insn (gen_andsi3 (dst
, tmp
, const1_rtx
));
23277 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
23278 if (rs6000_cost
->muldi
>= COSTS_N_INSNS (5))
23280 rtx tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
;
23282 tmp1
= gen_reg_rtx (DImode
);
23283 emit_insn (gen_popcntbdi2 (tmp1
, src
));
23285 tmp2
= gen_reg_rtx (DImode
);
23286 emit_insn (gen_lshrdi3 (tmp2
, tmp1
, GEN_INT (32)));
23287 tmp3
= gen_reg_rtx (DImode
);
23288 emit_insn (gen_xordi3 (tmp3
, tmp1
, tmp2
));
23290 tmp4
= gen_reg_rtx (DImode
);
23291 emit_insn (gen_lshrdi3 (tmp4
, tmp3
, GEN_INT (16)));
23292 tmp5
= gen_reg_rtx (DImode
);
23293 emit_insn (gen_xordi3 (tmp5
, tmp3
, tmp4
));
23295 tmp6
= gen_reg_rtx (DImode
);
23296 emit_insn (gen_lshrdi3 (tmp6
, tmp5
, GEN_INT (8)));
23297 emit_insn (gen_xordi3 (tmp
, tmp5
, tmp6
));
23300 rs6000_emit_popcount (tmp
, src
);
23301 emit_insn (gen_anddi3 (dst
, tmp
, const1_rtx
));
23305 /* Expand an Altivec constant permutation for little endian mode.
23306 OP0 and OP1 are the input vectors and TARGET is the output vector.
23307 SEL specifies the constant permutation vector.
23309 There are two issues: First, the two input operands must be
23310 swapped so that together they form a double-wide array in LE
23311 order. Second, the vperm instruction has surprising behavior
23312 in LE mode: it interprets the elements of the source vectors
23313 in BE mode ("left to right") and interprets the elements of
23314 the destination vector in LE mode ("right to left"). To
23315 correct for this, we must subtract each element of the permute
23316 control vector from 31.
23318 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
23319 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
23320 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
23321 serve as the permute control vector. Then, in BE mode,
23325 places the desired result in vr9. However, in LE mode the
23326 vector contents will be
23328 vr10 = 00000003 00000002 00000001 00000000
23329 vr11 = 00000007 00000006 00000005 00000004
23331 The result of the vperm using the same permute control vector is
23333 vr9 = 05000000 07000000 01000000 03000000
23335 That is, the leftmost 4 bytes of vr10 are interpreted as the
23336 source for the rightmost 4 bytes of vr9, and so on.
23338 If we change the permute control vector to
23340 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
23348 vr9 = 00000006 00000004 00000002 00000000. */
23351 altivec_expand_vec_perm_const_le (rtx target
, rtx op0
, rtx op1
,
23352 const vec_perm_indices
&sel
)
23356 rtx constv
, unspec
;
23358 /* Unpack and adjust the constant selector. */
23359 for (i
= 0; i
< 16; ++i
)
23361 unsigned int elt
= 31 - (sel
[i
] & 31);
23362 perm
[i
] = GEN_INT (elt
);
23365 /* Expand to a permute, swapping the inputs and using the
23366 adjusted selector. */
23368 op0
= force_reg (V16QImode
, op0
);
23370 op1
= force_reg (V16QImode
, op1
);
23372 constv
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
));
23373 constv
= force_reg (V16QImode
, constv
);
23374 unspec
= gen_rtx_UNSPEC (V16QImode
, gen_rtvec (3, op1
, op0
, constv
),
23376 if (!REG_P (target
))
23378 rtx tmp
= gen_reg_rtx (V16QImode
);
23379 emit_move_insn (tmp
, unspec
);
23383 emit_move_insn (target
, unspec
);
23386 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
23387 permute control vector. But here it's not a constant, so we must
23388 generate a vector NAND or NOR to do the adjustment. */
23391 altivec_expand_vec_perm_le (rtx operands
[4])
23393 rtx notx
, iorx
, unspec
;
23394 rtx target
= operands
[0];
23395 rtx op0
= operands
[1];
23396 rtx op1
= operands
[2];
23397 rtx sel
= operands
[3];
23399 rtx norreg
= gen_reg_rtx (V16QImode
);
23400 machine_mode mode
= GET_MODE (target
);
23402 /* Get everything in regs so the pattern matches. */
23404 op0
= force_reg (mode
, op0
);
23406 op1
= force_reg (mode
, op1
);
23408 sel
= force_reg (V16QImode
, sel
);
23409 if (!REG_P (target
))
23410 tmp
= gen_reg_rtx (mode
);
23412 if (TARGET_P9_VECTOR
)
23414 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, sel
),
23419 /* Invert the selector with a VNAND if available, else a VNOR.
23420 The VNAND is preferred for future fusion opportunities. */
23421 notx
= gen_rtx_NOT (V16QImode
, sel
);
23422 iorx
= (TARGET_P8_VECTOR
23423 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
23424 : gen_rtx_AND (V16QImode
, notx
, notx
));
23425 emit_insn (gen_rtx_SET (norreg
, iorx
));
23427 /* Permute with operands reversed and adjusted selector. */
23428 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, norreg
),
23432 /* Copy into target, possibly by way of a register. */
23433 if (!REG_P (target
))
23435 emit_move_insn (tmp
, unspec
);
23439 emit_move_insn (target
, unspec
);
23442 /* Expand an Altivec constant permutation. Return true if we match
23443 an efficient implementation; false to fall back to VPERM.
23445 OP0 and OP1 are the input vectors and TARGET is the output vector.
23446 SEL specifies the constant permutation vector. */
23449 altivec_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
,
23450 const vec_perm_indices
&sel
)
23452 struct altivec_perm_insn
{
23453 HOST_WIDE_INT mask
;
23454 enum insn_code impl
;
23455 unsigned char perm
[16];
23457 static const struct altivec_perm_insn patterns
[] = {
23458 {OPTION_MASK_ALTIVEC
,
23459 CODE_FOR_altivec_vpkuhum_direct
,
23460 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23461 {OPTION_MASK_ALTIVEC
,
23462 CODE_FOR_altivec_vpkuwum_direct
,
23463 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23464 {OPTION_MASK_ALTIVEC
,
23465 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghb_direct
23466 : CODE_FOR_altivec_vmrglb_direct
,
23467 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23468 {OPTION_MASK_ALTIVEC
,
23469 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghh_direct
23470 : CODE_FOR_altivec_vmrglh_direct
,
23471 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23472 {OPTION_MASK_ALTIVEC
,
23473 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghw_direct_v4si
23474 : CODE_FOR_altivec_vmrglw_direct_v4si
,
23475 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23476 {OPTION_MASK_ALTIVEC
,
23477 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglb_direct
23478 : CODE_FOR_altivec_vmrghb_direct
,
23479 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23480 {OPTION_MASK_ALTIVEC
,
23481 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglh_direct
23482 : CODE_FOR_altivec_vmrghh_direct
,
23483 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23484 {OPTION_MASK_ALTIVEC
,
23485 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglw_direct_v4si
23486 : CODE_FOR_altivec_vmrghw_direct_v4si
,
23487 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23488 {OPTION_MASK_P8_VECTOR
,
23489 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgew_v4sf_direct
23490 : CODE_FOR_p8_vmrgow_v4sf_direct
,
23491 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23492 {OPTION_MASK_P8_VECTOR
,
23493 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgow_v4sf_direct
23494 : CODE_FOR_p8_vmrgew_v4sf_direct
,
23495 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23496 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23497 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23498 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23499 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23500 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23501 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23502 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23503 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23505 unsigned int i
, j
, elt
, which
;
23506 unsigned char perm
[16];
23510 /* Unpack the constant selector. */
23511 for (i
= which
= 0; i
< 16; ++i
)
23514 which
|= (elt
< 16 ? 1 : 2);
23518 /* Simplify the constant selector based on operands. */
23522 gcc_unreachable ();
23526 if (!rtx_equal_p (op0
, op1
))
23531 for (i
= 0; i
< 16; ++i
)
23543 /* Look for splat patterns. */
23548 for (i
= 0; i
< 16; ++i
)
23549 if (perm
[i
] != elt
)
23553 if (!BYTES_BIG_ENDIAN
)
23555 emit_insn (gen_altivec_vspltb_direct (target
, op0
, GEN_INT (elt
)));
23561 for (i
= 0; i
< 16; i
+= 2)
23562 if (perm
[i
] != elt
|| perm
[i
+ 1] != elt
+ 1)
23566 int field
= BYTES_BIG_ENDIAN
? elt
/ 2 : 7 - elt
/ 2;
23567 x
= gen_reg_rtx (V8HImode
);
23568 emit_insn (gen_altivec_vsplth_direct (x
, gen_lowpart (V8HImode
, op0
),
23570 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23577 for (i
= 0; i
< 16; i
+= 4)
23579 || perm
[i
+ 1] != elt
+ 1
23580 || perm
[i
+ 2] != elt
+ 2
23581 || perm
[i
+ 3] != elt
+ 3)
23585 int field
= BYTES_BIG_ENDIAN
? elt
/ 4 : 3 - elt
/ 4;
23586 x
= gen_reg_rtx (V4SImode
);
23587 emit_insn (gen_altivec_vspltw_direct (x
, gen_lowpart (V4SImode
, op0
),
23589 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23595 /* Look for merge and pack patterns. */
23596 for (j
= 0; j
< ARRAY_SIZE (patterns
); ++j
)
23600 if ((patterns
[j
].mask
& rs6000_isa_flags
) == 0)
23603 elt
= patterns
[j
].perm
[0];
23604 if (perm
[0] == elt
)
23606 else if (perm
[0] == elt
+ 16)
23610 for (i
= 1; i
< 16; ++i
)
23612 elt
= patterns
[j
].perm
[i
];
23614 elt
= (elt
>= 16 ? elt
- 16 : elt
+ 16);
23615 else if (one_vec
&& elt
>= 16)
23617 if (perm
[i
] != elt
)
23622 enum insn_code icode
= patterns
[j
].impl
;
23623 machine_mode omode
= insn_data
[icode
].operand
[0].mode
;
23624 machine_mode imode
= insn_data
[icode
].operand
[1].mode
;
23626 rtx perm_idx
= GEN_INT (0);
23627 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23644 perm_idx
= GEN_INT (perm_val
);
23647 /* For little-endian, don't use vpkuwum and vpkuhum if the
23648 underlying vector type is not V4SI and V8HI, respectively.
23649 For example, using vpkuwum with a V8HI picks up the even
23650 halfwords (BE numbering) when the even halfwords (LE
23651 numbering) are what we need. */
23652 if (!BYTES_BIG_ENDIAN
23653 && icode
== CODE_FOR_altivec_vpkuwum_direct
23655 && GET_MODE (op0
) != V4SImode
)
23657 && GET_MODE (XEXP (op0
, 0)) != V4SImode
)))
23659 if (!BYTES_BIG_ENDIAN
23660 && icode
== CODE_FOR_altivec_vpkuhum_direct
23662 && GET_MODE (op0
) != V8HImode
)
23664 && GET_MODE (XEXP (op0
, 0)) != V8HImode
)))
23667 /* For little-endian, the two input operands must be swapped
23668 (or swapped back) to ensure proper right-to-left numbering
23670 if (swapped
== BYTES_BIG_ENDIAN
23671 && icode
!= CODE_FOR_vsx_xxpermdi_v16qi
)
23672 std::swap (op0
, op1
);
23673 if (imode
!= V16QImode
)
23675 op0
= gen_lowpart (imode
, op0
);
23676 op1
= gen_lowpart (imode
, op1
);
23678 if (omode
== V16QImode
)
23681 x
= gen_reg_rtx (omode
);
23682 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23683 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
, perm_idx
));
23685 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
));
23686 if (omode
!= V16QImode
)
23687 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23692 if (!BYTES_BIG_ENDIAN
)
23694 altivec_expand_vec_perm_const_le (target
, op0
, op1
, sel
);
23701 /* Expand a VSX Permute Doubleword constant permutation.
23702 Return true if we match an efficient implementation. */
23705 rs6000_expand_vec_perm_const_1 (rtx target
, rtx op0
, rtx op1
,
23706 unsigned char perm0
, unsigned char perm1
)
23710 /* If both selectors come from the same operand, fold to single op. */
23711 if ((perm0
& 2) == (perm1
& 2))
23718 /* If both operands are equal, fold to simpler permutation. */
23719 if (rtx_equal_p (op0
, op1
))
23722 perm1
= (perm1
& 1) + 2;
23724 /* If the first selector comes from the second operand, swap. */
23725 else if (perm0
& 2)
23731 std::swap (op0
, op1
);
23733 /* If the second selector does not come from the second operand, fail. */
23734 else if ((perm1
& 2) == 0)
23738 if (target
!= NULL
)
23740 machine_mode vmode
, dmode
;
23743 vmode
= GET_MODE (target
);
23744 gcc_assert (GET_MODE_NUNITS (vmode
) == 2);
23745 dmode
= mode_for_vector (GET_MODE_INNER (vmode
), 4).require ();
23746 x
= gen_rtx_VEC_CONCAT (dmode
, op0
, op1
);
23747 v
= gen_rtvec (2, GEN_INT (perm0
), GEN_INT (perm1
));
23748 x
= gen_rtx_VEC_SELECT (vmode
, x
, gen_rtx_PARALLEL (VOIDmode
, v
));
23749 emit_insn (gen_rtx_SET (target
, x
));
23754 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23757 rs6000_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
23758 rtx target
, rtx op0
, rtx op1
,
23759 const vec_perm_indices
&sel
)
23761 if (vmode
!= op_mode
)
23764 bool testing_p
= !target
;
23766 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23767 if (TARGET_ALTIVEC
&& testing_p
)
23772 rtx nop0
= force_reg (vmode
, op0
);
23778 op1
= force_reg (vmode
, op1
);
23780 /* Check for ps_merge* or xxpermdi insns. */
23781 if ((vmode
== V2DFmode
|| vmode
== V2DImode
) && VECTOR_MEM_VSX_P (vmode
))
23785 op0
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 1);
23786 op1
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 2);
23788 if (rs6000_expand_vec_perm_const_1 (target
, op0
, op1
, sel
[0], sel
[1]))
23792 if (TARGET_ALTIVEC
)
23794 /* Force the target-independent code to lower to V16QImode. */
23795 if (vmode
!= V16QImode
)
23797 if (altivec_expand_vec_perm_const (target
, op0
, op1
, sel
))
23804 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23805 OP0 and OP1 are the input vectors and TARGET is the output vector.
23806 PERM specifies the constant permutation vector. */
23809 rs6000_do_expand_vec_perm (rtx target
, rtx op0
, rtx op1
,
23810 machine_mode vmode
, const vec_perm_builder
&perm
)
23812 rtx x
= expand_vec_perm_const (vmode
, op0
, op1
, perm
, BLKmode
, target
);
23814 emit_move_insn (target
, x
);
23817 /* Expand an extract even operation. */
23820 rs6000_expand_extract_even (rtx target
, rtx op0
, rtx op1
)
23822 machine_mode vmode
= GET_MODE (target
);
23823 unsigned i
, nelt
= GET_MODE_NUNITS (vmode
);
23824 vec_perm_builder
perm (nelt
, nelt
, 1);
23826 for (i
= 0; i
< nelt
; i
++)
23827 perm
.quick_push (i
* 2);
23829 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23832 /* Expand a vector interleave operation. */
23835 rs6000_expand_interleave (rtx target
, rtx op0
, rtx op1
, bool highp
)
23837 machine_mode vmode
= GET_MODE (target
);
23838 unsigned i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
23839 vec_perm_builder
perm (nelt
, nelt
, 1);
23841 high
= (highp
? 0 : nelt
/ 2);
23842 for (i
= 0; i
< nelt
/ 2; i
++)
23844 perm
.quick_push (i
+ high
);
23845 perm
.quick_push (i
+ nelt
+ high
);
23848 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23851 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23853 rs6000_scale_v2df (rtx tgt
, rtx src
, int scale
)
23855 HOST_WIDE_INT
hwi_scale (scale
);
23856 REAL_VALUE_TYPE r_pow
;
23857 rtvec v
= rtvec_alloc (2);
23859 rtx scale_vec
= gen_reg_rtx (V2DFmode
);
23860 (void)real_powi (&r_pow
, DFmode
, &dconst2
, hwi_scale
);
23861 elt
= const_double_from_real_value (r_pow
, DFmode
);
23862 RTVEC_ELT (v
, 0) = elt
;
23863 RTVEC_ELT (v
, 1) = elt
;
23864 rs6000_expand_vector_init (scale_vec
, gen_rtx_PARALLEL (V2DFmode
, v
));
23865 emit_insn (gen_mulv2df3 (tgt
, src
, scale_vec
));
23868 /* Return an RTX representing where to find the function value of a
23869 function returning MODE. */
23871 rs6000_complex_function_value (machine_mode mode
)
23873 unsigned int regno
;
23875 machine_mode inner
= GET_MODE_INNER (mode
);
23876 unsigned int inner_bytes
= GET_MODE_UNIT_SIZE (mode
);
23878 if (TARGET_FLOAT128_TYPE
23880 || (mode
== TCmode
&& TARGET_IEEEQUAD
)))
23881 regno
= ALTIVEC_ARG_RETURN
;
23883 else if (FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23884 regno
= FP_ARG_RETURN
;
23888 regno
= GP_ARG_RETURN
;
23890 /* 32-bit is OK since it'll go in r3/r4. */
23891 if (TARGET_32BIT
&& inner_bytes
>= 4)
23892 return gen_rtx_REG (mode
, regno
);
23895 if (inner_bytes
>= 8)
23896 return gen_rtx_REG (mode
, regno
);
23898 r1
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
),
23900 r2
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
+ 1),
23901 GEN_INT (inner_bytes
));
23902 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
23905 /* Return an rtx describing a return value of MODE as a PARALLEL
23906 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23907 stride REG_STRIDE. */
23910 rs6000_parallel_return (machine_mode mode
,
23911 int n_elts
, machine_mode elt_mode
,
23912 unsigned int regno
, unsigned int reg_stride
)
23914 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
23917 for (i
= 0; i
< n_elts
; i
++)
23919 rtx r
= gen_rtx_REG (elt_mode
, regno
);
23920 rtx off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
23921 XVECEXP (par
, 0, i
) = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
23922 regno
+= reg_stride
;
23928 /* Target hook for TARGET_FUNCTION_VALUE.
23930 An integer value is in r3 and a floating-point value is in fp1,
23931 unless -msoft-float. */
23934 rs6000_function_value (const_tree valtype
,
23935 const_tree fn_decl_or_type ATTRIBUTE_UNUSED
,
23936 bool outgoing ATTRIBUTE_UNUSED
)
23939 unsigned int regno
;
23940 machine_mode elt_mode
;
23943 /* Special handling for structs in darwin64. */
23945 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype
), valtype
))
23947 CUMULATIVE_ARGS valcum
;
23951 valcum
.fregno
= FP_ARG_MIN_REG
;
23952 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
23953 /* Do a trial code generation as if this were going to be passed as
23954 an argument; if any part goes in memory, we return NULL. */
23955 valret
= rs6000_darwin64_record_arg (&valcum
, valtype
, true, /* retval= */ true);
23958 /* Otherwise fall through to standard ABI rules. */
23961 mode
= TYPE_MODE (valtype
);
23963 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23964 if (rs6000_discover_homogeneous_aggregate (mode
, valtype
, &elt_mode
, &n_elts
))
23966 int first_reg
, n_regs
;
23968 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode
))
23970 /* _Decimal128 must use even/odd register pairs. */
23971 first_reg
= (elt_mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23972 n_regs
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
23976 first_reg
= ALTIVEC_ARG_RETURN
;
23980 return rs6000_parallel_return (mode
, n_elts
, elt_mode
, first_reg
, n_regs
);
23983 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23984 if (TARGET_32BIT
&& TARGET_POWERPC64
)
23993 int count
= GET_MODE_SIZE (mode
) / 4;
23994 return rs6000_parallel_return (mode
, count
, SImode
, GP_ARG_RETURN
, 1);
23997 if ((INTEGRAL_TYPE_P (valtype
)
23998 && GET_MODE_BITSIZE (mode
) < (TARGET_32BIT
? 32 : 64))
23999 || POINTER_TYPE_P (valtype
))
24000 mode
= TARGET_32BIT
? SImode
: DImode
;
24002 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
24003 /* _Decimal128 must use an even/odd register pair. */
24004 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
24005 else if (SCALAR_FLOAT_TYPE_P (valtype
) && TARGET_HARD_FLOAT
24006 && !FLOAT128_VECTOR_P (mode
))
24007 regno
= FP_ARG_RETURN
;
24008 else if (TREE_CODE (valtype
) == COMPLEX_TYPE
24009 && targetm
.calls
.split_complex_arg
)
24010 return rs6000_complex_function_value (mode
);
24011 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
24012 return register is used in both cases, and we won't see V2DImode/V2DFmode
24013 for pure altivec, combine the two cases. */
24014 else if ((VECTOR_TYPE_P (valtype
) || VECTOR_ALIGNMENT_P (mode
))
24015 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
24016 && ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
24017 regno
= ALTIVEC_ARG_RETURN
;
24019 regno
= GP_ARG_RETURN
;
24021 return gen_rtx_REG (mode
, regno
);
24024 /* Define how to find the value returned by a library function
24025 assuming the value has mode MODE. */
24027 rs6000_libcall_value (machine_mode mode
)
24029 unsigned int regno
;
24031 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
24032 if (TARGET_32BIT
&& TARGET_POWERPC64
&& mode
== DImode
)
24033 return rs6000_parallel_return (mode
, 2, SImode
, GP_ARG_RETURN
, 1);
24035 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
24036 /* _Decimal128 must use an even/odd register pair. */
24037 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
24038 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && TARGET_HARD_FLOAT
)
24039 regno
= FP_ARG_RETURN
;
24040 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
24041 return register is used in both cases, and we won't see V2DImode/V2DFmode
24042 for pure altivec, combine the two cases. */
24043 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
24044 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
)
24045 regno
= ALTIVEC_ARG_RETURN
;
24046 else if (COMPLEX_MODE_P (mode
) && targetm
.calls
.split_complex_arg
)
24047 return rs6000_complex_function_value (mode
);
24049 regno
= GP_ARG_RETURN
;
24051 return gen_rtx_REG (mode
, regno
);
24054 /* Compute register pressure classes. We implement the target hook to avoid
24055 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
24056 lead to incorrect estimates of number of available registers and therefor
24057 increased register pressure/spill. */
24059 rs6000_compute_pressure_classes (enum reg_class
*pressure_classes
)
24064 pressure_classes
[n
++] = GENERAL_REGS
;
24065 if (TARGET_ALTIVEC
)
24066 pressure_classes
[n
++] = ALTIVEC_REGS
;
24068 pressure_classes
[n
++] = VSX_REGS
;
24071 if (TARGET_HARD_FLOAT
)
24072 pressure_classes
[n
++] = FLOAT_REGS
;
24074 pressure_classes
[n
++] = CR_REGS
;
24075 pressure_classes
[n
++] = SPECIAL_REGS
;
24080 /* Given FROM and TO register numbers, say whether this elimination is allowed.
24081 Frame pointer elimination is automatically handled.
24083 For the RS/6000, if frame pointer elimination is being done, we would like
24084 to convert ap into fp, not sp.
24086 We need r30 if -mminimal-toc was specified, and there are constant pool
24090 rs6000_can_eliminate (const int from
, const int to
)
24092 return (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
24093 ? ! frame_pointer_needed
24094 : from
== RS6000_PIC_OFFSET_TABLE_REGNUM
24095 ? ! TARGET_MINIMAL_TOC
|| TARGET_NO_TOC_OR_PCREL
24096 || constant_pool_empty_p ()
24100 /* Define the offset between two registers, FROM to be eliminated and its
24101 replacement TO, at the start of a routine. */
24103 rs6000_initial_elimination_offset (int from
, int to
)
24105 rs6000_stack_t
*info
= rs6000_stack_info ();
24106 HOST_WIDE_INT offset
;
24108 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
24109 offset
= info
->push_p
? 0 : -info
->total_size
;
24110 else if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
24112 offset
= info
->push_p
? 0 : -info
->total_size
;
24113 if (FRAME_GROWS_DOWNWARD
)
24114 offset
+= info
->fixed_size
+ info
->vars_size
+ info
->parm_size
;
24116 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
24117 offset
= FRAME_GROWS_DOWNWARD
24118 ? info
->fixed_size
+ info
->vars_size
+ info
->parm_size
24120 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
24121 offset
= info
->total_size
;
24122 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
24123 offset
= info
->push_p
? info
->total_size
: 0;
24124 else if (from
== RS6000_PIC_OFFSET_TABLE_REGNUM
)
24127 gcc_unreachable ();
24132 /* Fill in sizes of registers used by unwinder. */
24135 rs6000_init_dwarf_reg_sizes_extra (tree address
)
24137 if (TARGET_MACHO
&& ! TARGET_ALTIVEC
)
24140 machine_mode mode
= TYPE_MODE (char_type_node
);
24141 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
24142 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
24143 rtx value
= gen_int_mode (16, mode
);
24145 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
24146 The unwinder still needs to know the size of Altivec registers. */
24148 for (i
= FIRST_ALTIVEC_REGNO
; i
< LAST_ALTIVEC_REGNO
+1; i
++)
24150 int column
= DWARF_REG_TO_UNWIND_COLUMN
24151 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
24152 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
24154 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
24159 /* Map internal gcc register numbers to debug format register numbers.
24160 FORMAT specifies the type of debug register number to use:
24161 0 -- debug information, except for frame-related sections
24162 1 -- DWARF .debug_frame section
24163 2 -- DWARF .eh_frame section */
24166 rs6000_debugger_regno (unsigned int regno
, unsigned int format
)
24168 /* On some platforms, we use the standard DWARF register
24169 numbering for .debug_info and .debug_frame. */
24170 if ((format
== 0 && dwarf_debuginfo_p ()) || format
== 1)
24172 #ifdef RS6000_USE_DWARF_NUMBERING
24175 if (FP_REGNO_P (regno
))
24176 return regno
- FIRST_FPR_REGNO
+ 32;
24177 if (ALTIVEC_REGNO_P (regno
))
24178 return regno
- FIRST_ALTIVEC_REGNO
+ 1124;
24179 if (regno
== LR_REGNO
)
24181 if (regno
== CTR_REGNO
)
24183 if (regno
== CA_REGNO
)
24184 return 101; /* XER */
24185 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
24186 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
24187 The actual code emitted saves the whole of CR, so we map CR2_REGNO
24188 to the DWARF reg for CR. */
24189 if (format
== 1 && regno
== CR2_REGNO
)
24191 if (CR_REGNO_P (regno
))
24192 return regno
- CR0_REGNO
+ 86;
24193 if (regno
== VRSAVE_REGNO
)
24195 if (regno
== VSCR_REGNO
)
24198 /* These do not make much sense. */
24199 if (regno
== FRAME_POINTER_REGNUM
)
24201 if (regno
== ARG_POINTER_REGNUM
)
24206 gcc_unreachable ();
24210 /* We use the GCC 7 (and before) internal number for non-DWARF debug
24211 information, and also for .eh_frame. */
24212 /* Translate the regnos to their numbers in GCC 7 (and before). */
24215 if (FP_REGNO_P (regno
))
24216 return regno
- FIRST_FPR_REGNO
+ 32;
24217 if (ALTIVEC_REGNO_P (regno
))
24218 return regno
- FIRST_ALTIVEC_REGNO
+ 77;
24219 if (regno
== LR_REGNO
)
24221 if (regno
== CTR_REGNO
)
24223 if (regno
== CA_REGNO
)
24224 return 76; /* XER */
24225 if (CR_REGNO_P (regno
))
24226 return regno
- CR0_REGNO
+ 68;
24227 if (regno
== VRSAVE_REGNO
)
24229 if (regno
== VSCR_REGNO
)
24232 if (regno
== FRAME_POINTER_REGNUM
)
24234 if (regno
== ARG_POINTER_REGNUM
)
24239 gcc_unreachable ();
24242 /* target hook eh_return_filter_mode */
24243 static scalar_int_mode
24244 rs6000_eh_return_filter_mode (void)
24246 return TARGET_32BIT
? SImode
: word_mode
;
24249 /* Target hook for translate_mode_attribute. */
24250 static machine_mode
24251 rs6000_translate_mode_attribute (machine_mode mode
)
24253 if ((FLOAT128_IEEE_P (mode
)
24254 && ieee128_float_type_node
== long_double_type_node
)
24255 || (FLOAT128_IBM_P (mode
)
24256 && ibm128_float_type_node
== long_double_type_node
))
24257 return COMPLEX_MODE_P (mode
) ? E_TCmode
: E_TFmode
;
24261 /* Target hook for scalar_mode_supported_p. */
24263 rs6000_scalar_mode_supported_p (scalar_mode mode
)
24265 /* -m32 does not support TImode. This is the default, from
24266 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
24267 same ABI as for -m32. But default_scalar_mode_supported_p allows
24268 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
24269 for -mpowerpc64. */
24270 if (TARGET_32BIT
&& mode
== TImode
)
24273 if (DECIMAL_FLOAT_MODE_P (mode
))
24274 return default_decimal_float_supported_p ();
24275 else if (TARGET_FLOAT128_TYPE
&& (mode
== KFmode
|| mode
== IFmode
))
24278 return default_scalar_mode_supported_p (mode
);
24281 /* Target hook for libgcc_floating_mode_supported_p. */
24284 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode
)
24293 /* We only return true for KFmode if IEEE 128-bit types are supported, and
24294 if long double does not use the IEEE 128-bit format. If long double
24295 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
24296 Because the code will not use KFmode in that case, there will be aborts
24297 because it can't find KFmode in the Floatn types. */
24299 return TARGET_FLOAT128_TYPE
&& !TARGET_IEEEQUAD
;
24306 /* Target hook for vector_mode_supported_p. */
24308 rs6000_vector_mode_supported_p (machine_mode mode
)
24310 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
24311 128-bit, the compiler might try to widen IEEE 128-bit to IBM
24313 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
) && !FLOAT128_IEEE_P (mode
))
24320 /* Target hook for floatn_mode. */
24321 static opt_scalar_float_mode
24322 rs6000_floatn_mode (int n
, bool extended
)
24332 if (TARGET_FLOAT128_TYPE
)
24333 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24335 return opt_scalar_float_mode ();
24338 return opt_scalar_float_mode ();
24341 /* Those are the only valid _FloatNx types. */
24342 gcc_unreachable ();
24356 if (TARGET_FLOAT128_TYPE
)
24357 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24359 return opt_scalar_float_mode ();
24362 return opt_scalar_float_mode ();
24368 /* Target hook for c_mode_for_suffix. */
24369 static machine_mode
24370 rs6000_c_mode_for_suffix (char suffix
)
24372 if (TARGET_FLOAT128_TYPE
)
24374 if (suffix
== 'q' || suffix
== 'Q')
24375 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24377 /* At the moment, we are not defining a suffix for IBM extended double.
24378 If/when the default for -mabi=ieeelongdouble is changed, and we want
24379 to support __ibm128 constants in legacy library code, we may need to
24380 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
24381 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
24382 __float80 constants. */
24388 /* Target hook for invalid_arg_for_unprototyped_fn. */
24389 static const char *
24390 invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
24392 return (!rs6000_darwin64_abi
24394 && VECTOR_TYPE_P (TREE_TYPE (val
))
24395 && (funcdecl
== NULL_TREE
24396 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
24397 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
24398 && !fndecl_built_in_p (funcdecl
, BUILT_IN_CLASSIFY_TYPE
))))
24399 ? N_("AltiVec argument passed to unprototyped function")
24403 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
24404 setup by using __stack_chk_fail_local hidden function instead of
24405 calling __stack_chk_fail directly. Otherwise it is better to call
24406 __stack_chk_fail directly. */
24408 static tree ATTRIBUTE_UNUSED
24409 rs6000_stack_protect_fail (void)
24411 return (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
24412 ? default_hidden_stack_protect_fail ()
24413 : default_external_stack_protect_fail ();
24416 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
24419 static unsigned HOST_WIDE_INT
24420 rs6000_asan_shadow_offset (void)
24422 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_64BIT
? 41 : 29);
24426 /* Mask options that we want to support inside of attribute((target)) and
24427 #pragma GCC target operations. Note, we do not include things like
24428 64/32-bit, endianness, hard/soft floating point, etc. that would have
24429 different calling sequences. */
24431 struct rs6000_opt_mask
{
24432 const char *name
; /* option name */
24433 HOST_WIDE_INT mask
; /* mask to set */
24434 bool invert
; /* invert sense of mask */
24435 bool valid_target
; /* option is a target option */
24438 static struct rs6000_opt_mask
const rs6000_opt_masks
[] =
24440 { "altivec", OPTION_MASK_ALTIVEC
, false, true },
24441 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
,
24443 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
,
24445 { "cmpb", OPTION_MASK_CMPB
, false, true },
24446 { "crypto", OPTION_MASK_CRYPTO
, false, true },
24447 { "direct-move", OPTION_MASK_DIRECT_MOVE
, false, true },
24448 { "dlmzb", OPTION_MASK_DLMZB
, false, true },
24449 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX
,
24451 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, true },
24452 { "float128-hardware", OPTION_MASK_FLOAT128_HW
, false, true },
24453 { "fprnd", OPTION_MASK_FPRND
, false, true },
24454 { "power10", OPTION_MASK_POWER10
, false, true },
24455 { "power11", OPTION_MASK_POWER11
, false, false },
24456 { "hard-dfp", OPTION_MASK_DFP
, false, true },
24457 { "htm", OPTION_MASK_HTM
, false, true },
24458 { "isel", OPTION_MASK_ISEL
, false, true },
24459 { "mfcrf", OPTION_MASK_MFCRF
, false, true },
24460 { "mfpgpr", 0, false, true },
24461 { "mma", OPTION_MASK_MMA
, false, true },
24462 { "modulo", OPTION_MASK_MODULO
, false, true },
24463 { "mulhw", OPTION_MASK_MULHW
, false, true },
24464 { "multiple", OPTION_MASK_MULTIPLE
, false, true },
24465 { "pcrel", OPTION_MASK_PCREL
, false, true },
24466 { "pcrel-opt", OPTION_MASK_PCREL_OPT
, false, true },
24467 { "popcntb", OPTION_MASK_POPCNTB
, false, true },
24468 { "popcntd", OPTION_MASK_POPCNTD
, false, true },
24469 { "power8-fusion", OPTION_MASK_P8_FUSION
, false, true },
24470 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN
, false, true },
24471 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, true },
24472 { "power9-minmax", OPTION_MASK_P9_MINMAX
, false, true },
24473 { "power9-misc", OPTION_MASK_P9_MISC
, false, true },
24474 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, true },
24475 { "power10-fusion", OPTION_MASK_P10_FUSION
, false, true },
24476 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT
, false, true },
24477 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT
, false, true },
24478 { "prefixed", OPTION_MASK_PREFIXED
, false, true },
24479 { "quad-memory", OPTION_MASK_QUAD_MEMORY
, false, true },
24480 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC
, false, true },
24481 { "recip-precision", OPTION_MASK_RECIP_PRECISION
, false, true },
24482 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT
, false, true },
24483 { "string", 0, false, true },
24484 { "update", OPTION_MASK_NO_UPDATE
, true , true },
24485 { "vsx", OPTION_MASK_VSX
, false, true },
24486 #ifdef OPTION_MASK_64BIT
24488 { "aix64", OPTION_MASK_64BIT
, false, false },
24489 { "aix32", OPTION_MASK_64BIT
, true, false },
24491 { "64", OPTION_MASK_64BIT
, false, false },
24492 { "32", OPTION_MASK_64BIT
, true, false },
24495 #ifdef OPTION_MASK_EABI
24496 { "eabi", OPTION_MASK_EABI
, false, false },
24498 #ifdef OPTION_MASK_LITTLE_ENDIAN
24499 { "little", OPTION_MASK_LITTLE_ENDIAN
, false, false },
24500 { "big", OPTION_MASK_LITTLE_ENDIAN
, true, false },
24502 #ifdef OPTION_MASK_RELOCATABLE
24503 { "relocatable", OPTION_MASK_RELOCATABLE
, false, false },
24505 #ifdef OPTION_MASK_STRICT_ALIGN
24506 { "strict-align", OPTION_MASK_STRICT_ALIGN
, false, false },
24508 { "soft-float", OPTION_MASK_SOFT_FLOAT
, false, false },
24509 { "string", 0, false, false },
24512 /* Option variables that we want to support inside attribute((target)) and
24513 #pragma GCC target operations. */
24515 struct rs6000_opt_var
{
24516 const char *name
; /* option name */
24517 size_t global_offset
; /* offset of the option in global_options. */
24518 size_t target_offset
; /* offset of the option in target options. */
24521 static struct rs6000_opt_var
const rs6000_opt_vars
[] =
24524 offsetof (struct gcc_options
, x_TARGET_FRIZ
),
24525 offsetof (struct cl_target_option
, x_TARGET_FRIZ
), },
24526 { "avoid-indexed-addresses",
24527 offsetof (struct gcc_options
, x_TARGET_AVOID_XFORM
),
24528 offsetof (struct cl_target_option
, x_TARGET_AVOID_XFORM
) },
24530 offsetof (struct gcc_options
, x_rs6000_default_long_calls
),
24531 offsetof (struct cl_target_option
, x_rs6000_default_long_calls
), },
24532 { "optimize-swaps",
24533 offsetof (struct gcc_options
, x_rs6000_optimize_swaps
),
24534 offsetof (struct cl_target_option
, x_rs6000_optimize_swaps
), },
24535 { "allow-movmisalign",
24536 offsetof (struct gcc_options
, x_TARGET_ALLOW_MOVMISALIGN
),
24537 offsetof (struct cl_target_option
, x_TARGET_ALLOW_MOVMISALIGN
), },
24539 offsetof (struct gcc_options
, x_TARGET_SCHED_GROUPS
),
24540 offsetof (struct cl_target_option
, x_TARGET_SCHED_GROUPS
), },
24542 offsetof (struct gcc_options
, x_TARGET_ALWAYS_HINT
),
24543 offsetof (struct cl_target_option
, x_TARGET_ALWAYS_HINT
), },
24544 { "align-branch-targets",
24545 offsetof (struct gcc_options
, x_TARGET_ALIGN_BRANCH_TARGETS
),
24546 offsetof (struct cl_target_option
, x_TARGET_ALIGN_BRANCH_TARGETS
), },
24548 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24549 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24551 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24552 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24553 { "speculate-indirect-jumps",
24554 offsetof (struct gcc_options
, x_rs6000_speculate_indirect_jumps
),
24555 offsetof (struct cl_target_option
, x_rs6000_speculate_indirect_jumps
), },
24558 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24559 parsing. Return true if there were no errors. */
24562 rs6000_inner_target_options (tree args
, bool attr_p
)
24566 if (args
== NULL_TREE
)
24569 else if (TREE_CODE (args
) == STRING_CST
)
24571 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24574 while ((q
= strtok (p
, ",")) != NULL
)
24576 bool error_p
= false;
24577 bool not_valid_p
= false;
24578 const char *cpu_opt
= NULL
;
24581 if (startswith (q
, "cpu="))
24583 int cpu_index
= rs6000_cpu_name_lookup (q
+4);
24584 if (cpu_index
>= 0)
24585 rs6000_cpu_index
= cpu_index
;
24592 else if (startswith (q
, "tune="))
24594 int tune_index
= rs6000_cpu_name_lookup (q
+5);
24595 if (tune_index
>= 0)
24596 rs6000_tune_index
= tune_index
;
24606 bool invert
= false;
24610 if (startswith (r
, "no-"))
24616 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_masks
); i
++)
24617 if (strcmp (r
, rs6000_opt_masks
[i
].name
) == 0)
24619 HOST_WIDE_INT mask
= rs6000_opt_masks
[i
].mask
;
24621 if (!rs6000_opt_masks
[i
].valid_target
)
24622 not_valid_p
= true;
24626 rs6000_isa_flags_explicit
|= mask
;
24628 /* VSX needs altivec, so -mvsx automagically sets
24629 altivec and disables -mavoid-indexed-addresses. */
24632 if (mask
== OPTION_MASK_VSX
)
24634 mask
|= OPTION_MASK_ALTIVEC
;
24635 TARGET_AVOID_XFORM
= 0;
24639 if (rs6000_opt_masks
[i
].invert
)
24643 rs6000_isa_flags
&= ~mask
;
24645 rs6000_isa_flags
|= mask
;
24650 if (error_p
&& !not_valid_p
)
24652 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_vars
); i
++)
24653 if (strcmp (r
, rs6000_opt_vars
[i
].name
) == 0)
24655 size_t j
= rs6000_opt_vars
[i
].global_offset
;
24656 *((int *) ((char *)&global_options
+ j
)) = !invert
;
24658 not_valid_p
= false;
24666 const char *eprefix
, *esuffix
;
24671 eprefix
= "__attribute__((__target__(";
24676 eprefix
= "#pragma GCC target ";
24681 error ("invalid cpu %qs for %s%qs%s", cpu_opt
, eprefix
,
24683 else if (not_valid_p
)
24684 error ("%s%qs%s is not allowed", eprefix
, q
, esuffix
);
24686 error ("%s%qs%s is invalid", eprefix
, q
, esuffix
);
24691 else if (TREE_CODE (args
) == TREE_LIST
)
24695 tree value
= TREE_VALUE (args
);
24698 bool ret2
= rs6000_inner_target_options (value
, attr_p
);
24702 args
= TREE_CHAIN (args
);
24704 while (args
!= NULL_TREE
);
24709 error ("attribute %<target%> argument not a string");
24716 /* Print out the target options as a list for -mdebug=target. */
24719 rs6000_debug_target_options (tree args
, const char *prefix
)
24721 if (args
== NULL_TREE
)
24722 fprintf (stderr
, "%s<NULL>", prefix
);
24724 else if (TREE_CODE (args
) == STRING_CST
)
24726 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24729 while ((q
= strtok (p
, ",")) != NULL
)
24732 fprintf (stderr
, "%s\"%s\"", prefix
, q
);
24737 else if (TREE_CODE (args
) == TREE_LIST
)
24741 tree value
= TREE_VALUE (args
);
24744 rs6000_debug_target_options (value
, prefix
);
24747 args
= TREE_CHAIN (args
);
24749 while (args
!= NULL_TREE
);
24753 gcc_unreachable ();
24759 /* Hook to validate attribute((target("..."))). */
24762 rs6000_valid_attribute_p (tree fndecl
,
24763 tree
ARG_UNUSED (name
),
24767 struct cl_target_option cur_target
;
24770 tree new_target
, new_optimize
;
24771 tree func_optimize
;
24773 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
24775 if (TARGET_DEBUG_TARGET
)
24777 tree tname
= DECL_NAME (fndecl
);
24778 fprintf (stderr
, "\n==================== rs6000_valid_attribute_p:\n");
24780 fprintf (stderr
, "function: %.*s\n",
24781 (int) IDENTIFIER_LENGTH (tname
),
24782 IDENTIFIER_POINTER (tname
));
24784 fprintf (stderr
, "function: unknown\n");
24786 fprintf (stderr
, "args:");
24787 rs6000_debug_target_options (args
, " ");
24788 fprintf (stderr
, "\n");
24791 fprintf (stderr
, "flags: 0x%x\n", flags
);
24793 fprintf (stderr
, "--------------------\n");
24796 /* attribute((target("default"))) does nothing, beyond
24797 affecting multi-versioning. */
24798 if (TREE_VALUE (args
)
24799 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
24800 && TREE_CHAIN (args
) == NULL_TREE
24801 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
24804 old_optimize
= build_optimization_node (&global_options
,
24805 &global_options_set
);
24806 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
24808 /* If the function changed the optimization levels as well as setting target
24809 options, start with the optimizations specified. */
24810 if (func_optimize
&& func_optimize
!= old_optimize
)
24811 cl_optimization_restore (&global_options
, &global_options_set
,
24812 TREE_OPTIMIZATION (func_optimize
));
24814 /* The target attributes may also change some optimization flags, so update
24815 the optimization options if necessary. */
24816 cl_target_option_save (&cur_target
, &global_options
, &global_options_set
);
24817 rs6000_cpu_index
= rs6000_tune_index
= -1;
24818 ret
= rs6000_inner_target_options (args
, true);
24820 /* Set up any additional state. */
24823 ret
= rs6000_option_override_internal (false);
24824 new_target
= build_target_option_node (&global_options
,
24825 &global_options_set
);
24830 new_optimize
= build_optimization_node (&global_options
,
24831 &global_options_set
);
24838 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
24840 if (old_optimize
!= new_optimize
)
24841 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
24844 cl_target_option_restore (&global_options
, &global_options_set
, &cur_target
);
24846 if (old_optimize
!= new_optimize
)
24847 cl_optimization_restore (&global_options
, &global_options_set
,
24848 TREE_OPTIMIZATION (old_optimize
));
24854 /* Hook to validate the current #pragma GCC target and set the state, and
24855 update the macros based on what was changed. If ARGS is NULL, then
24856 POP_TARGET is used to reset the options. */
24859 rs6000_pragma_target_parse (tree args
, tree pop_target
)
24861 tree prev_tree
= build_target_option_node (&global_options
,
24862 &global_options_set
);
24864 struct cl_target_option
*prev_opt
, *cur_opt
;
24865 HOST_WIDE_INT prev_flags
, cur_flags
, diff_flags
;
24867 if (TARGET_DEBUG_TARGET
)
24869 fprintf (stderr
, "\n==================== rs6000_pragma_target_parse\n");
24870 fprintf (stderr
, "args:");
24871 rs6000_debug_target_options (args
, " ");
24872 fprintf (stderr
, "\n");
24876 fprintf (stderr
, "pop_target:\n");
24877 debug_tree (pop_target
);
24880 fprintf (stderr
, "pop_target: <NULL>\n");
24882 fprintf (stderr
, "--------------------\n");
24887 cur_tree
= ((pop_target
)
24889 : target_option_default_node
);
24890 cl_target_option_restore (&global_options
, &global_options_set
,
24891 TREE_TARGET_OPTION (cur_tree
));
24895 rs6000_cpu_index
= rs6000_tune_index
= -1;
24896 if (!rs6000_inner_target_options (args
, false)
24897 || !rs6000_option_override_internal (false)
24898 || (cur_tree
= build_target_option_node (&global_options
,
24899 &global_options_set
))
24902 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
24903 fprintf (stderr
, "invalid pragma\n");
24909 target_option_current_node
= cur_tree
;
24910 rs6000_activate_target_options (target_option_current_node
);
24912 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24913 change the macros that are defined. */
24914 if (rs6000_target_modify_macros_ptr
)
24916 prev_opt
= TREE_TARGET_OPTION (prev_tree
);
24917 prev_flags
= prev_opt
->x_rs6000_isa_flags
;
24919 cur_opt
= TREE_TARGET_OPTION (cur_tree
);
24920 cur_flags
= cur_opt
->x_rs6000_isa_flags
;
24922 diff_flags
= (prev_flags
^ cur_flags
);
24924 if (diff_flags
!= 0)
24926 /* Delete old macros. */
24927 rs6000_target_modify_macros_ptr (false,
24928 prev_flags
& diff_flags
);
24930 /* Define new macros. */
24931 rs6000_target_modify_macros_ptr (true,
24932 cur_flags
& diff_flags
);
24940 /* Remember the last target of rs6000_set_current_function. */
24941 static GTY(()) tree rs6000_previous_fndecl
;
24943 /* Restore target's globals from NEW_TREE and invalidate the
24944 rs6000_previous_fndecl cache. */
24947 rs6000_activate_target_options (tree new_tree
)
24949 cl_target_option_restore (&global_options
, &global_options_set
,
24950 TREE_TARGET_OPTION (new_tree
));
24951 if (TREE_TARGET_GLOBALS (new_tree
))
24952 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
24953 else if (new_tree
== target_option_default_node
)
24954 restore_target_globals (&default_target_globals
);
24956 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
24957 rs6000_previous_fndecl
= NULL_TREE
;
24960 /* Establish appropriate back-end context for processing the function
24961 FNDECL. The argument might be NULL to indicate processing at top
24962 level, outside of any function scope. */
24964 rs6000_set_current_function (tree fndecl
)
24966 if (TARGET_DEBUG_TARGET
)
24968 fprintf (stderr
, "\n==================== rs6000_set_current_function");
24971 fprintf (stderr
, ", fndecl %s (%p)",
24972 (DECL_NAME (fndecl
)
24973 ? IDENTIFIER_POINTER (DECL_NAME (fndecl
))
24974 : "<unknown>"), (void *)fndecl
);
24976 if (rs6000_previous_fndecl
)
24977 fprintf (stderr
, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl
);
24979 fprintf (stderr
, "\n");
24982 /* Only change the context if the function changes. This hook is called
24983 several times in the course of compiling a function, and we don't want to
24984 slow things down too much or call target_reinit when it isn't safe. */
24985 if (fndecl
== rs6000_previous_fndecl
)
24989 if (rs6000_previous_fndecl
== NULL_TREE
)
24990 old_tree
= target_option_current_node
;
24991 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
))
24992 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
);
24994 old_tree
= target_option_default_node
;
24997 if (fndecl
== NULL_TREE
)
24999 if (old_tree
!= target_option_current_node
)
25000 new_tree
= target_option_current_node
;
25002 new_tree
= NULL_TREE
;
25006 new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
25007 if (new_tree
== NULL_TREE
)
25008 new_tree
= target_option_default_node
;
25011 if (TARGET_DEBUG_TARGET
)
25015 fprintf (stderr
, "\nnew fndecl target specific options:\n");
25016 debug_tree (new_tree
);
25021 fprintf (stderr
, "\nold fndecl target specific options:\n");
25022 debug_tree (old_tree
);
25025 if (old_tree
!= NULL_TREE
|| new_tree
!= NULL_TREE
)
25026 fprintf (stderr
, "--------------------\n");
25029 if (new_tree
&& old_tree
!= new_tree
)
25030 rs6000_activate_target_options (new_tree
);
25033 rs6000_previous_fndecl
= fndecl
;
25037 /* Save the current options */
25040 rs6000_function_specific_save (struct cl_target_option
*ptr
,
25041 struct gcc_options
*opts
,
25042 struct gcc_options */
* opts_set */
)
25044 ptr
->x_rs6000_isa_flags
= opts
->x_rs6000_isa_flags
;
25045 ptr
->x_rs6000_isa_flags_explicit
= opts
->x_rs6000_isa_flags_explicit
;
25048 /* Restore the current options */
25051 rs6000_function_specific_restore (struct gcc_options
*opts
,
25052 struct gcc_options */
* opts_set */
,
25053 struct cl_target_option
*ptr
)
25056 opts
->x_rs6000_isa_flags
= ptr
->x_rs6000_isa_flags
;
25057 opts
->x_rs6000_isa_flags_explicit
= ptr
->x_rs6000_isa_flags_explicit
;
25058 (void) rs6000_option_override_internal (false);
25061 /* Print the current options */
25064 rs6000_function_specific_print (FILE *file
, int indent
,
25065 struct cl_target_option
*ptr
)
25067 rs6000_print_isa_options (file
, indent
, "Isa options set",
25068 ptr
->x_rs6000_isa_flags
);
25070 rs6000_print_isa_options (file
, indent
, "Isa options explicit",
25071 ptr
->x_rs6000_isa_flags_explicit
);
25074 /* Helper function to print the current isa or misc options on a line. */
25077 rs6000_print_options_internal (FILE *file
,
25079 const char *string
,
25080 HOST_WIDE_INT flags
,
25081 const char *prefix
,
25082 const struct rs6000_opt_mask
*opts
,
25083 size_t num_elements
)
25086 size_t start_column
= 0;
25088 size_t max_column
= 120;
25089 size_t prefix_len
= strlen (prefix
);
25090 size_t comma_len
= 0;
25091 const char *comma
= "";
25094 start_column
+= fprintf (file
, "%*s", indent
, "");
25098 fprintf (stderr
, DEBUG_FMT_S
, string
, "<none>");
25102 start_column
+= fprintf (stderr
, DEBUG_FMT_WX
, string
, flags
);
25104 /* Print the various mask options. */
25105 cur_column
= start_column
;
25106 for (i
= 0; i
< num_elements
; i
++)
25108 bool invert
= opts
[i
].invert
;
25109 const char *name
= opts
[i
].name
;
25110 const char *no_str
= "";
25111 HOST_WIDE_INT mask
= opts
[i
].mask
;
25112 size_t len
= comma_len
+ prefix_len
+ strlen (name
);
25116 if ((flags
& mask
) == 0)
25119 len
+= strlen ("no-");
25127 if ((flags
& mask
) != 0)
25130 len
+= strlen ("no-");
25137 if (cur_column
> max_column
)
25139 fprintf (stderr
, ", \\\n%*s", (int)start_column
, "");
25140 cur_column
= start_column
+ len
;
25144 fprintf (file
, "%s%s%s%s", comma
, prefix
, no_str
, name
);
25146 comma_len
= strlen (", ");
25149 fputs ("\n", file
);
25152 /* Helper function to print the current isa options on a line. */
25155 rs6000_print_isa_options (FILE *file
, int indent
, const char *string
,
25156 HOST_WIDE_INT flags
)
25158 rs6000_print_options_internal (file
, indent
, string
, flags
, "-m",
25159 &rs6000_opt_masks
[0],
25160 ARRAY_SIZE (rs6000_opt_masks
));
25163 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
25164 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
25165 -mupper-regs-df, etc.).
25167 This function does not handle explicit options such as the user specifying
25168 -mdirect-move. These are handled in rs6000_option_override_internal, and
25169 the appropriate error is given if needed.
25171 We return a mask of all of the implicit options that should not be enabled
25174 static HOST_WIDE_INT
25175 rs6000_disable_incompatible_switches (void)
25177 HOST_WIDE_INT ignore_masks
= rs6000_isa_flags_explicit
;
25180 static const struct {
25181 const HOST_WIDE_INT no_flag
; /* flag explicitly turned off. */
25182 const HOST_WIDE_INT dep_flags
; /* flags that depend on this option. */
25183 const char *const name
; /* name of the switch. */
25185 { OPTION_MASK_VSX
, OTHER_VSX_VECTOR_MASKS
, "vsx" },
25186 { OPTION_MASK_ALTIVEC
, OTHER_ALTIVEC_MASKS
, "altivec" },
25189 for (i
= 0; i
< ARRAY_SIZE (flags
); i
++)
25191 HOST_WIDE_INT no_flag
= flags
[i
].no_flag
;
25193 if ((rs6000_isa_flags
& no_flag
) == 0
25194 && (rs6000_isa_flags_explicit
& no_flag
) != 0)
25196 HOST_WIDE_INT dep_flags
= flags
[i
].dep_flags
;
25197 HOST_WIDE_INT set_flags
= (rs6000_isa_flags_explicit
25203 for (j
= 0; j
< ARRAY_SIZE (rs6000_opt_masks
); j
++)
25204 if ((set_flags
& rs6000_opt_masks
[j
].mask
) != 0)
25206 set_flags
&= ~rs6000_opt_masks
[j
].mask
;
25207 error ("%<-mno-%s%> turns off %<-m%s%>",
25209 rs6000_opt_masks
[j
].name
);
25212 gcc_assert (!set_flags
);
25215 rs6000_isa_flags
&= ~dep_flags
;
25216 ignore_masks
|= no_flag
| dep_flags
;
25220 return ignore_masks
;
25224 /* Helper function for printing the function name when debugging. */
25226 static const char *
25227 get_decl_name (tree fn
)
25234 name
= DECL_NAME (fn
);
25236 return "<no-name>";
25238 return IDENTIFIER_POINTER (name
);
25241 /* Return the clone id of the target we are compiling code for in a target
25242 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
25243 the priority list for the target clones (ordered from lowest to
25247 rs6000_clone_priority (tree fndecl
)
25249 tree fn_opts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
25250 HOST_WIDE_INT isa_masks
;
25251 int ret
= CLONE_DEFAULT
;
25252 tree attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (fndecl
));
25253 const char *attrs_str
= NULL
;
25255 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
25256 attrs_str
= TREE_STRING_POINTER (attrs
);
25258 /* Return priority zero for default function. Return the ISA needed for the
25259 function if it is not the default. */
25260 if (strcmp (attrs_str
, "default") != 0)
25262 if (fn_opts
== NULL_TREE
)
25263 fn_opts
= target_option_default_node
;
25265 if (!fn_opts
|| !TREE_TARGET_OPTION (fn_opts
))
25266 isa_masks
= rs6000_isa_flags
;
25268 isa_masks
= TREE_TARGET_OPTION (fn_opts
)->x_rs6000_isa_flags
;
25270 for (ret
= CLONE_MAX
- 1; ret
!= 0; ret
--)
25271 if ((rs6000_clone_map
[ret
].isa_mask
& isa_masks
) != 0)
25275 if (TARGET_DEBUG_TARGET
)
25276 fprintf (stderr
, "rs6000_get_function_version_priority (%s) => %d\n",
25277 get_decl_name (fndecl
), ret
);
25282 /* This compares the priority of target features in function DECL1 and DECL2.
25283 It returns positive value if DECL1 is higher priority, negative value if
25284 DECL2 is higher priority and 0 if they are the same. Note, priorities are
25285 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
25288 rs6000_compare_version_priority (tree decl1
, tree decl2
)
25290 int priority1
= rs6000_clone_priority (decl1
);
25291 int priority2
= rs6000_clone_priority (decl2
);
25292 int ret
= priority1
- priority2
;
25294 if (TARGET_DEBUG_TARGET
)
25295 fprintf (stderr
, "rs6000_compare_version_priority (%s, %s) => %d\n",
25296 get_decl_name (decl1
), get_decl_name (decl2
), ret
);
25301 /* Make a dispatcher declaration for the multi-versioned function DECL.
25302 Calls to DECL function will be replaced with calls to the dispatcher
25303 by the front-end. Returns the decl of the dispatcher function. */
25306 rs6000_get_function_versions_dispatcher (void *decl
)
25308 tree fn
= (tree
) decl
;
25309 struct cgraph_node
*node
= NULL
;
25310 struct cgraph_node
*default_node
= NULL
;
25311 struct cgraph_function_version_info
*node_v
= NULL
;
25312 struct cgraph_function_version_info
*first_v
= NULL
;
25314 tree dispatch_decl
= NULL
;
25316 struct cgraph_function_version_info
*default_version_info
= NULL
;
25317 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
25319 if (TARGET_DEBUG_TARGET
)
25320 fprintf (stderr
, "rs6000_get_function_versions_dispatcher (%s)\n",
25321 get_decl_name (fn
));
25323 node
= cgraph_node::get (fn
);
25324 gcc_assert (node
!= NULL
);
25326 node_v
= node
->function_version ();
25327 gcc_assert (node_v
!= NULL
);
25329 if (node_v
->dispatcher_resolver
!= NULL
)
25330 return node_v
->dispatcher_resolver
;
25332 /* Find the default version and make it the first node. */
25334 /* Go to the beginning of the chain. */
25335 while (first_v
->prev
!= NULL
)
25336 first_v
= first_v
->prev
;
25338 default_version_info
= first_v
;
25339 while (default_version_info
!= NULL
)
25341 const tree decl2
= default_version_info
->this_node
->decl
;
25342 if (is_function_default_version (decl2
))
25344 default_version_info
= default_version_info
->next
;
25347 /* If there is no default node, just return NULL. */
25348 if (default_version_info
== NULL
)
25351 /* Make default info the first node. */
25352 if (first_v
!= default_version_info
)
25354 default_version_info
->prev
->next
= default_version_info
->next
;
25355 if (default_version_info
->next
)
25356 default_version_info
->next
->prev
= default_version_info
->prev
;
25357 first_v
->prev
= default_version_info
;
25358 default_version_info
->next
= first_v
;
25359 default_version_info
->prev
= NULL
;
25362 default_node
= default_version_info
->this_node
;
25364 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
25365 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
25366 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
25367 "exports hardware capability bits");
25370 if (targetm
.has_ifunc_p ())
25372 struct cgraph_function_version_info
*it_v
= NULL
;
25373 struct cgraph_node
*dispatcher_node
= NULL
;
25374 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
25376 /* Right now, the dispatching is done via ifunc. */
25377 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
25378 TREE_NOTHROW (dispatch_decl
) = TREE_NOTHROW (fn
);
25380 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
25381 gcc_assert (dispatcher_node
!= NULL
);
25382 dispatcher_node
->dispatcher_function
= 1;
25383 dispatcher_version_info
25384 = dispatcher_node
->insert_new_function_version ();
25385 dispatcher_version_info
->next
= default_version_info
;
25386 dispatcher_node
->definition
= 1;
25388 /* Set the dispatcher for all the versions. */
25389 it_v
= default_version_info
;
25390 while (it_v
!= NULL
)
25392 it_v
->dispatcher_resolver
= dispatch_decl
;
25398 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
25399 "multiversioning needs %<ifunc%> which is not supported "
25404 return dispatch_decl
;
25407 /* Make the resolver function decl to dispatch the versions of a multi-
25408 versioned function, DEFAULT_DECL. Create an empty basic block in the
25409 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25413 make_resolver_func (const tree default_decl
,
25414 const tree dispatch_decl
,
25415 basic_block
*empty_bb
)
25417 /* Make the resolver function static. The resolver function returns
25419 tree decl_name
= clone_function_name (default_decl
, "resolver");
25420 const char *resolver_name
= IDENTIFIER_POINTER (decl_name
);
25421 tree type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
25422 tree decl
= build_fn_decl (resolver_name
, type
);
25423 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
25425 DECL_NAME (decl
) = decl_name
;
25426 TREE_USED (decl
) = 1;
25427 DECL_ARTIFICIAL (decl
) = 1;
25428 DECL_IGNORED_P (decl
) = 0;
25429 TREE_PUBLIC (decl
) = 0;
25430 DECL_UNINLINABLE (decl
) = 1;
25432 /* Resolver is not external, body is generated. */
25433 DECL_EXTERNAL (decl
) = 0;
25434 DECL_EXTERNAL (dispatch_decl
) = 0;
25436 DECL_CONTEXT (decl
) = NULL_TREE
;
25437 DECL_INITIAL (decl
) = make_node (BLOCK
);
25438 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
25440 if (DECL_COMDAT_GROUP (default_decl
)
25441 || TREE_PUBLIC (default_decl
))
25443 /* In this case, each translation unit with a call to this
25444 versioned function will put out a resolver. Ensure it
25445 is comdat to keep just one copy. */
25446 DECL_COMDAT (decl
) = 1;
25447 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
25450 TREE_PUBLIC (dispatch_decl
) = 0;
25452 /* Build result decl and add to function_decl. */
25453 tree t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
25454 DECL_CONTEXT (t
) = decl
;
25455 DECL_ARTIFICIAL (t
) = 1;
25456 DECL_IGNORED_P (t
) = 1;
25457 DECL_RESULT (decl
) = t
;
25459 gimplify_function_tree (decl
);
25460 push_cfun (DECL_STRUCT_FUNCTION (decl
));
25461 *empty_bb
= init_lowered_empty_function (decl
, false,
25462 profile_count::uninitialized ());
25464 cgraph_node::add_new_function (decl
, true);
25465 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
25469 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25470 DECL_ATTRIBUTES (dispatch_decl
)
25471 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
25473 cgraph_node::create_same_body_alias (dispatch_decl
, decl
);
25478 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25479 return a pointer to VERSION_DECL if we are running on a machine that
25480 supports the index CLONE_ISA hardware architecture bits. This function will
25481 be called during version dispatch to decide which function version to
25482 execute. It returns the basic block at the end, to which more conditions
25486 add_condition_to_bb (tree function_decl
, tree version_decl
,
25487 int clone_isa
, basic_block new_bb
)
25489 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
25491 gcc_assert (new_bb
!= NULL
);
25492 gimple_seq gseq
= bb_seq (new_bb
);
25495 tree convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
25496 build_fold_addr_expr (version_decl
));
25497 tree result_var
= create_tmp_var (ptr_type_node
);
25498 gimple
*convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
25499 gimple
*return_stmt
= gimple_build_return (result_var
);
25501 if (clone_isa
== CLONE_DEFAULT
)
25503 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25504 gimple_seq_add_stmt (&gseq
, return_stmt
);
25505 set_bb_seq (new_bb
, gseq
);
25506 gimple_set_bb (convert_stmt
, new_bb
);
25507 gimple_set_bb (return_stmt
, new_bb
);
25512 tree bool_zero
= build_int_cst (bool_int_type_node
, 0);
25513 tree cond_var
= create_tmp_var (bool_int_type_node
);
25514 tree predicate_decl
= rs6000_builtin_decls
[(int) RS6000_BIF_CPU_SUPPORTS
];
25515 const char *arg_str
= rs6000_clone_map
[clone_isa
].name
;
25516 tree predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
25517 gimple
*call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
25518 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
25520 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
25521 gimple_set_bb (call_cond_stmt
, new_bb
);
25522 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
25524 gimple
*if_else_stmt
= gimple_build_cond (NE_EXPR
, cond_var
, bool_zero
,
25525 NULL_TREE
, NULL_TREE
);
25526 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
25527 gimple_set_bb (if_else_stmt
, new_bb
);
25528 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
25530 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25531 gimple_seq_add_stmt (&gseq
, return_stmt
);
25532 set_bb_seq (new_bb
, gseq
);
25534 basic_block bb1
= new_bb
;
25535 edge e12
= split_block (bb1
, if_else_stmt
);
25536 basic_block bb2
= e12
->dest
;
25537 e12
->flags
&= ~EDGE_FALLTHRU
;
25538 e12
->flags
|= EDGE_TRUE_VALUE
;
25540 edge e23
= split_block (bb2
, return_stmt
);
25541 gimple_set_bb (convert_stmt
, bb2
);
25542 gimple_set_bb (return_stmt
, bb2
);
25544 basic_block bb3
= e23
->dest
;
25545 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
25548 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
25554 /* This function generates the dispatch function for multi-versioned functions.
25555 DISPATCH_DECL is the function which will contain the dispatch logic.
25556 FNDECLS are the function choices for dispatch, and is a tree chain.
25557 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25558 code is generated. */
25561 dispatch_function_versions (tree dispatch_decl
,
25563 basic_block
*empty_bb
)
25567 vec
<tree
> *fndecls
;
25568 tree clones
[CLONE_MAX
];
25570 if (TARGET_DEBUG_TARGET
)
25571 fputs ("dispatch_function_versions, top\n", stderr
);
25573 gcc_assert (dispatch_decl
!= NULL
25574 && fndecls_p
!= NULL
25575 && empty_bb
!= NULL
);
25577 /* fndecls_p is actually a vector. */
25578 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
25580 /* At least one more version other than the default. */
25581 gcc_assert (fndecls
->length () >= 2);
25583 /* The first version in the vector is the default decl. */
25584 memset ((void *) clones
, '\0', sizeof (clones
));
25585 clones
[CLONE_DEFAULT
] = (*fndecls
)[0];
25587 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25588 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25589 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25590 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25591 to insert the code here to do the call. */
25593 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
25595 int priority
= rs6000_clone_priority (ele
);
25596 if (!clones
[priority
])
25597 clones
[priority
] = ele
;
25600 for (ix
= CLONE_MAX
- 1; ix
>= 0; ix
--)
25603 if (TARGET_DEBUG_TARGET
)
25604 fprintf (stderr
, "dispatch_function_versions, clone %d, %s\n",
25605 ix
, get_decl_name (clones
[ix
]));
25607 *empty_bb
= add_condition_to_bb (dispatch_decl
, clones
[ix
], ix
,
25614 /* Generate the dispatching code body to dispatch multi-versioned function
25615 DECL. The target hook is called to process the "target" attributes and
25616 provide the code to dispatch the right function at run-time. NODE points
25617 to the dispatcher decl whose body will be created. */
25620 rs6000_generate_version_dispatcher_body (void *node_p
)
25623 basic_block empty_bb
;
25624 struct cgraph_node
*node
= (cgraph_node
*) node_p
;
25625 struct cgraph_function_version_info
*ninfo
= node
->function_version ();
25627 if (ninfo
->dispatcher_resolver
)
25628 return ninfo
->dispatcher_resolver
;
25630 /* node is going to be an alias, so remove the finalized bit. */
25631 node
->definition
= false;
25633 /* The first version in the chain corresponds to the default version. */
25634 ninfo
->dispatcher_resolver
= resolver
25635 = make_resolver_func (ninfo
->next
->this_node
->decl
, node
->decl
, &empty_bb
);
25637 if (TARGET_DEBUG_TARGET
)
25638 fprintf (stderr
, "rs6000_get_function_versions_dispatcher, %s\n",
25639 get_decl_name (resolver
));
25641 push_cfun (DECL_STRUCT_FUNCTION (resolver
));
25642 auto_vec
<tree
, 2> fn_ver_vec
;
25644 for (struct cgraph_function_version_info
*vinfo
= ninfo
->next
;
25646 vinfo
= vinfo
->next
)
25648 struct cgraph_node
*version
= vinfo
->this_node
;
25649 /* Check for virtual functions here again, as by this time it should
25650 have been determined if this function needs a vtable index or
25651 not. This happens for methods in derived classes that override
25652 virtual methods in base classes but are not explicitly marked as
25654 if (DECL_VINDEX (version
->decl
))
25655 sorry ("Virtual function multiversioning not supported");
25657 fn_ver_vec
.safe_push (version
->decl
);
25660 dispatch_function_versions (resolver
, &fn_ver_vec
, &empty_bb
);
25661 cgraph_edge::rebuild_edges ();
25666 /* Hook to decide if we need to scan function gimple statements to
25667 collect target specific information for inlining, and update the
25668 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25669 to predict which ISA feature is used at this time. Return true
25670 if we need to scan, otherwise return false. */
25673 rs6000_need_ipa_fn_target_info (const_tree decl
,
25674 unsigned int &info ATTRIBUTE_UNUSED
)
25676 tree target
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
25678 target
= target_option_default_node
;
25679 struct cl_target_option
*opts
= TREE_TARGET_OPTION (target
);
25681 /* See PR102059, we only handle HTM for now, so will only do
25682 the consequent scannings when HTM feature enabled. */
25683 if (opts
->x_rs6000_isa_flags
& OPTION_MASK_HTM
)
25689 /* Hook to update target specific information INFO for inlining by
25690 checking the given STMT. Return false if we don't need to scan
25691 any more, otherwise return true. */
25694 rs6000_update_ipa_fn_target_info (unsigned int &info
, const gimple
*stmt
)
25696 #ifndef HAVE_AS_POWER10_HTM
25697 /* Assume inline asm can use any instruction features. */
25698 if (gimple_code (stmt
) == GIMPLE_ASM
)
25700 const char *asm_str
= gimple_asm_string (as_a
<const gasm
*> (stmt
));
25701 /* Ignore empty inline asm string. */
25702 if (strlen (asm_str
) > 0)
25703 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25704 the only bit we care about. */
25705 info
|= RS6000_FN_TARGET_INFO_HTM
;
25710 if (gimple_code (stmt
) == GIMPLE_CALL
)
25712 tree fndecl
= gimple_call_fndecl (stmt
);
25713 if (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_MD
))
25715 enum rs6000_gen_builtins fcode
25716 = (enum rs6000_gen_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
25717 /* HTM bifs definitely exploit HTM insns. */
25718 if (bif_is_htm (rs6000_builtin_info
[fcode
]))
25720 info
|= RS6000_FN_TARGET_INFO_HTM
;
25729 /* Hook to determine if one function can safely inline another. */
25732 rs6000_can_inline_p (tree caller
, tree callee
)
25735 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
25736 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
25738 /* If the caller/callee has option attributes, then use them.
25739 Otherwise, use the command line options. */
25741 callee_tree
= target_option_default_node
;
25743 caller_tree
= target_option_default_node
;
25745 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
25746 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
25748 HOST_WIDE_INT callee_isa
= callee_opts
->x_rs6000_isa_flags
;
25749 HOST_WIDE_INT caller_isa
= caller_opts
->x_rs6000_isa_flags
;
25750 HOST_WIDE_INT explicit_isa
= callee_opts
->x_rs6000_isa_flags_explicit
;
25752 cgraph_node
*callee_node
= cgraph_node::get (callee
);
25753 if (ipa_fn_summaries
&& ipa_fn_summaries
->get (callee_node
) != NULL
)
25755 unsigned int info
= ipa_fn_summaries
->get (callee_node
)->target_info
;
25756 if ((info
& RS6000_FN_TARGET_INFO_HTM
) == 0)
25758 callee_isa
&= ~OPTION_MASK_HTM
;
25759 explicit_isa
&= ~OPTION_MASK_HTM
;
25763 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25765 callee_isa
&= ~(OPTION_MASK_P8_FUSION
| OPTION_MASK_P10_FUSION
);
25766 explicit_isa
&= ~(OPTION_MASK_P8_FUSION
| OPTION_MASK_P10_FUSION
);
25768 /* The callee's options must be a subset of the caller's options, i.e.
25769 a vsx function may inline an altivec function, but a no-vsx function
25770 must not inline a vsx function. However, for those options that the
25771 callee has explicitly enabled or disabled, then we must enforce that
25772 the callee's and caller's options match exactly; see PR70010. */
25773 if (((caller_isa
& callee_isa
) == callee_isa
)
25774 && (caller_isa
& explicit_isa
) == (callee_isa
& explicit_isa
))
25777 if (TARGET_DEBUG_TARGET
)
25778 fprintf (stderr
, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25779 get_decl_name (caller
), get_decl_name (callee
),
25780 (ret
? "can" : "cannot"));
25785 /* Allocate a stack temp and fixup the address so it meets the particular
25786 memory requirements (either offetable or REG+REG addressing). */
25789 rs6000_allocate_stack_temp (machine_mode mode
,
25790 bool offsettable_p
,
25793 rtx stack
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
25794 rtx addr
= XEXP (stack
, 0);
25795 int strict_p
= reload_completed
;
25797 if (!legitimate_indirect_address_p (addr
, strict_p
))
25800 && !rs6000_legitimate_offset_address_p (mode
, addr
, strict_p
, true))
25801 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25803 else if (reg_reg_p
&& !legitimate_indexed_address_p (addr
, strict_p
))
25804 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25810 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25811 convert to such a form to deal with memory reference instructions
25812 like STFIWX and LDBRX that only take reg+reg addressing. */
25815 rs6000_force_indexed_or_indirect_mem (rtx x
)
25817 machine_mode mode
= GET_MODE (x
);
25819 gcc_assert (MEM_P (x
));
25820 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x
, mode
))
25822 rtx addr
= XEXP (x
, 0);
25823 if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
25825 rtx reg
= XEXP (addr
, 0);
25826 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (x
));
25827 rtx size_rtx
= GEN_INT ((GET_CODE (addr
) == PRE_DEC
) ? -size
: size
);
25828 gcc_assert (REG_P (reg
));
25829 emit_insn (gen_add3_insn (reg
, reg
, size_rtx
));
25832 else if (GET_CODE (addr
) == PRE_MODIFY
)
25834 rtx reg
= XEXP (addr
, 0);
25835 rtx expr
= XEXP (addr
, 1);
25836 gcc_assert (REG_P (reg
));
25837 gcc_assert (GET_CODE (expr
) == PLUS
);
25838 emit_insn (gen_add3_insn (reg
, XEXP (expr
, 0), XEXP (expr
, 1)));
25842 if (GET_CODE (addr
) == PLUS
)
25844 rtx op0
= XEXP (addr
, 0);
25845 rtx op1
= XEXP (addr
, 1);
25846 op0
= force_reg (Pmode
, op0
);
25847 op1
= force_reg (Pmode
, op1
);
25848 x
= replace_equiv_address (x
, gen_rtx_PLUS (Pmode
, op0
, op1
));
25851 x
= replace_equiv_address (x
, force_reg (Pmode
, addr
));
25857 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25859 On the RS/6000, all integer constants are acceptable, most won't be valid
25860 for particular insns, though. Only easy FP constants are acceptable. */
25863 rs6000_legitimate_constant_p (machine_mode mode
, rtx x
)
25865 if (TARGET_ELF
&& tls_referenced_p (x
))
25868 if (CONST_DOUBLE_P (x
))
25869 return easy_fp_constant (x
, mode
);
25871 if (GET_CODE (x
) == CONST_VECTOR
)
25872 return easy_vector_constant (x
, mode
);
25878 /* Implement TARGET_PRECOMPUTE_TLS_P.
25880 On the AIX, TLS symbols are in the TOC, which is maintained in the
25881 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25882 must be considered legitimate constants. */
25885 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
25887 return tls_referenced_p (x
);
25892 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25895 chain_already_loaded (rtx_insn
*last
)
25897 for (; last
!= NULL
; last
= PREV_INSN (last
))
25899 if (NONJUMP_INSN_P (last
))
25901 rtx patt
= PATTERN (last
);
25903 if (GET_CODE (patt
) == SET
)
25905 rtx lhs
= XEXP (patt
, 0);
25907 if (REG_P (lhs
) && REGNO (lhs
) == STATIC_CHAIN_REGNUM
)
25915 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25918 rs6000_call_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25920 rtx func
= func_desc
;
25921 rtx toc_reg
= gen_rtx_REG (Pmode
, TOC_REGNUM
);
25922 rtx toc_load
= NULL_RTX
;
25923 rtx toc_restore
= NULL_RTX
;
25925 rtx abi_reg
= NULL_RTX
;
25929 bool is_pltseq_longcall
;
25932 tlsarg
= global_tlsarg
;
25934 /* Handle longcall attributes. */
25935 is_pltseq_longcall
= false;
25936 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25937 && GET_CODE (func_desc
) == SYMBOL_REF
)
25939 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25941 is_pltseq_longcall
= true;
25944 /* Handle indirect calls. */
25945 if (!SYMBOL_REF_P (func
)
25946 || (DEFAULT_ABI
== ABI_AIX
&& !SYMBOL_REF_FUNCTION_P (func
)))
25948 if (!rs6000_pcrel_p ())
25950 /* Save the TOC into its reserved slot before the call,
25951 and prepare to restore it after the call. */
25952 rtx stack_toc_offset
= GEN_INT (RS6000_TOC_SAVE_SLOT
);
25953 rtx stack_toc_unspec
= gen_rtx_UNSPEC (Pmode
,
25954 gen_rtvec (1, stack_toc_offset
),
25956 toc_restore
= gen_rtx_SET (toc_reg
, stack_toc_unspec
);
25958 /* Can we optimize saving the TOC in the prologue or
25959 do we need to do it at every call? */
25960 if (TARGET_SAVE_TOC_INDIRECT
&& !cfun
->calls_alloca
)
25961 cfun
->machine
->save_toc_in_prologue
= true;
25964 rtx stack_ptr
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
25965 rtx stack_toc_mem
= gen_frame_mem (Pmode
,
25966 gen_rtx_PLUS (Pmode
, stack_ptr
,
25967 stack_toc_offset
));
25968 MEM_VOLATILE_P (stack_toc_mem
) = 1;
25969 if (is_pltseq_longcall
)
25971 rtvec v
= gen_rtvec (3, toc_reg
, func_desc
, tlsarg
);
25972 rtx mark_toc_reg
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25973 emit_insn (gen_rtx_SET (stack_toc_mem
, mark_toc_reg
));
25976 emit_move_insn (stack_toc_mem
, toc_reg
);
25980 if (DEFAULT_ABI
== ABI_ELFv2
)
25982 /* A function pointer in the ELFv2 ABI is just a plain address, but
25983 the ABI requires it to be loaded into r12 before the call. */
25984 func_addr
= gen_rtx_REG (Pmode
, 12);
25985 emit_move_insn (func_addr
, func
);
25986 abi_reg
= func_addr
;
25987 /* Indirect calls via CTR are strongly preferred over indirect
25988 calls via LR, so move the address there. Needed to mark
25989 this insn for linker plt sequence editing too. */
25990 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25991 if (is_pltseq_longcall
)
25993 rtvec v
= gen_rtvec (3, abi_reg
, func_desc
, tlsarg
);
25994 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25995 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25996 v
= gen_rtvec (2, func_addr
, func_desc
);
25997 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26000 emit_move_insn (func_addr
, abi_reg
);
26004 /* A function pointer under AIX is a pointer to a data area whose
26005 first word contains the actual address of the function, whose
26006 second word contains a pointer to its TOC, and whose third word
26007 contains a value to place in the static chain register (r11).
26008 Note that if we load the static chain, our "trampoline" need
26009 not have any executable code. */
26011 /* Load up address of the actual function. */
26012 func
= force_reg (Pmode
, func
);
26013 func_addr
= gen_reg_rtx (Pmode
);
26014 emit_move_insn (func_addr
, gen_rtx_MEM (Pmode
, func
));
26016 /* Indirect calls via CTR are strongly preferred over indirect
26017 calls via LR, so move the address there. */
26018 rtx ctr_reg
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26019 emit_move_insn (ctr_reg
, func_addr
);
26020 func_addr
= ctr_reg
;
26022 /* Prepare to load the TOC of the called function. Note that the
26023 TOC load must happen immediately before the actual call so
26024 that unwinding the TOC registers works correctly. See the
26025 comment in frob_update_context. */
26026 rtx func_toc_offset
= GEN_INT (GET_MODE_SIZE (Pmode
));
26027 rtx func_toc_mem
= gen_rtx_MEM (Pmode
,
26028 gen_rtx_PLUS (Pmode
, func
,
26030 toc_load
= gen_rtx_USE (VOIDmode
, func_toc_mem
);
26032 /* If we have a static chain, load it up. But, if the call was
26033 originally direct, the 3rd word has not been written since no
26034 trampoline has been built, so we ought not to load it, lest we
26035 override a static chain value. */
26036 if (!(GET_CODE (func_desc
) == SYMBOL_REF
26037 && SYMBOL_REF_FUNCTION_P (func_desc
))
26038 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
26039 && !chain_already_loaded (get_current_sequence ()->next
->last
))
26041 rtx sc_reg
= gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
26042 rtx func_sc_offset
= GEN_INT (2 * GET_MODE_SIZE (Pmode
));
26043 rtx func_sc_mem
= gen_rtx_MEM (Pmode
,
26044 gen_rtx_PLUS (Pmode
, func
,
26046 emit_move_insn (sc_reg
, func_sc_mem
);
26053 /* No TOC register needed for calls from PC-relative callers. */
26054 if (!rs6000_pcrel_p ())
26055 /* Direct calls use the TOC: for local calls, the callee will
26056 assume the TOC register is set; for non-local calls, the
26057 PLT stub needs the TOC register. */
26062 /* Create the call. */
26063 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26064 if (value
!= NULL_RTX
)
26065 call
[0] = gen_rtx_SET (value
, call
[0]);
26066 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
26070 call
[n_call
++] = toc_load
;
26072 call
[n_call
++] = toc_restore
;
26074 call
[n_call
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
26076 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n_call
, call
));
26077 insn
= emit_call_insn (insn
);
26079 /* Mention all registers defined by the ABI to hold information
26080 as uses in CALL_INSN_FUNCTION_USAGE. */
26082 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
26085 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
26088 rs6000_sibcall_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
26092 rtx r12
= NULL_RTX
;
26093 rtx func_addr
= func_desc
;
26096 tlsarg
= global_tlsarg
;
26098 /* Handle longcall attributes. */
26099 if (INTVAL (cookie
) & CALL_LONG
&& SYMBOL_REF_P (func_desc
))
26101 /* PCREL can do a sibling call to a longcall function
26102 because we don't need to restore the TOC register. */
26103 gcc_assert (rs6000_pcrel_p ());
26104 func_desc
= rs6000_longcall_ref (func_desc
, tlsarg
);
26107 gcc_assert (INTVAL (cookie
) == 0);
26109 /* For ELFv2, r12 and CTR need to hold the function address
26110 for an indirect call. */
26111 if (GET_CODE (func_desc
) != SYMBOL_REF
&& DEFAULT_ABI
== ABI_ELFv2
)
26113 r12
= gen_rtx_REG (Pmode
, 12);
26114 emit_move_insn (r12
, func_desc
);
26115 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26116 emit_move_insn (func_addr
, r12
);
26119 /* Create the call. */
26120 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26121 if (value
!= NULL_RTX
)
26122 call
[0] = gen_rtx_SET (value
, call
[0]);
26124 call
[1] = simple_return_rtx
;
26126 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (2, call
));
26127 insn
= emit_call_insn (insn
);
26129 /* Note use of the TOC register. */
26130 if (!rs6000_pcrel_p ())
26131 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
26132 gen_rtx_REG (Pmode
, TOC_REGNUM
));
26134 /* Note use of r12. */
26136 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), r12
);
26139 /* Expand code to perform a call under the SYSV4 ABI. */
26142 rs6000_call_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
26144 rtx func
= func_desc
;
26148 rtx abi_reg
= NULL_RTX
;
26152 tlsarg
= global_tlsarg
;
26154 /* Handle longcall attributes. */
26155 if ((INTVAL (cookie
) & CALL_LONG
) != 0
26156 && GET_CODE (func_desc
) == SYMBOL_REF
)
26158 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
26159 /* If the longcall was implemented as an inline PLT call using
26160 PLT unspecs then func will be REG:r11. If not, func will be
26161 a pseudo reg. The inline PLT call sequence supports lazy
26162 linking (and longcalls to functions in dlopen'd libraries).
26163 The other style of longcalls don't. The lazy linking entry
26164 to the dynamic symbol resolver requires r11 be the function
26165 address (as it is for linker generated PLT stubs). Ensure
26166 r11 stays valid to the bctrl by marking r11 used by the call. */
26171 /* Handle indirect calls. */
26172 if (GET_CODE (func
) != SYMBOL_REF
)
26174 func
= force_reg (Pmode
, func
);
26176 /* Indirect calls via CTR are strongly preferred over indirect
26177 calls via LR, so move the address there. That can't be left
26178 to reload because we want to mark every instruction in an
26179 inline PLT call sequence with a reloc, enabling the linker to
26180 edit the sequence back to a direct call when that makes sense. */
26181 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26184 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
26185 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26186 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
26187 v
= gen_rtvec (2, func_addr
, func_desc
);
26188 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26191 emit_move_insn (func_addr
, func
);
26196 /* Create the call. */
26197 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26198 if (value
!= NULL_RTX
)
26199 call
[0] = gen_rtx_SET (value
, call
[0]);
26201 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
26203 if (TARGET_SECURE_PLT
26205 && GET_CODE (func_addr
) == SYMBOL_REF
26206 && !SYMBOL_REF_LOCAL_P (func_addr
))
26207 call
[n
++] = gen_rtx_USE (VOIDmode
, pic_offset_table_rtx
);
26209 call
[n
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
26211 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n
, call
));
26212 insn
= emit_call_insn (insn
);
26214 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
26217 /* Expand code to perform a sibling call under the SysV4 ABI. */
26220 rs6000_sibcall_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
26222 rtx func
= func_desc
;
26226 rtx abi_reg
= NULL_RTX
;
26229 tlsarg
= global_tlsarg
;
26231 /* Handle longcall attributes. */
26232 if ((INTVAL (cookie
) & CALL_LONG
) != 0
26233 && GET_CODE (func_desc
) == SYMBOL_REF
)
26235 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
26236 /* If the longcall was implemented as an inline PLT call using
26237 PLT unspecs then func will be REG:r11. If not, func will be
26238 a pseudo reg. The inline PLT call sequence supports lazy
26239 linking (and longcalls to functions in dlopen'd libraries).
26240 The other style of longcalls don't. The lazy linking entry
26241 to the dynamic symbol resolver requires r11 be the function
26242 address (as it is for linker generated PLT stubs). Ensure
26243 r11 stays valid to the bctr by marking r11 used by the call. */
26248 /* Handle indirect calls. */
26249 if (GET_CODE (func
) != SYMBOL_REF
)
26251 func
= force_reg (Pmode
, func
);
26253 /* Indirect sibcalls must go via CTR. That can't be left to
26254 reload because we want to mark every instruction in an inline
26255 PLT call sequence with a reloc, enabling the linker to edit
26256 the sequence back to a direct call when that makes sense. */
26257 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26260 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
26261 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26262 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
26263 v
= gen_rtvec (2, func_addr
, func_desc
);
26264 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26267 emit_move_insn (func_addr
, func
);
26272 /* Create the call. */
26273 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26274 if (value
!= NULL_RTX
)
26275 call
[0] = gen_rtx_SET (value
, call
[0]);
26277 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
26278 call
[2] = simple_return_rtx
;
26280 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
26281 insn
= emit_call_insn (insn
);
26283 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
26288 /* Expand code to perform a call under the Darwin ABI.
26289 Modulo handling of mlongcall, this is much the same as sysv.
26290 if/when the longcall optimisation is removed, we could drop this
26291 code and use the sysv case (taking care to avoid the tls stuff).
26293 We can use this for sibcalls too, if needed. */
26296 rs6000_call_darwin_1 (rtx value
, rtx func_desc
, rtx tlsarg
,
26297 rtx cookie
, bool sibcall
)
26299 rtx func
= func_desc
;
26303 int cookie_val
= INTVAL (cookie
);
26304 bool make_island
= false;
26306 /* Handle longcall attributes, there are two cases for Darwin:
26307 1) Newer linkers are capable of synthesising any branch islands needed.
26308 2) We need a helper branch island synthesised by the compiler.
26309 The second case has mostly been retired and we don't use it for m64.
26310 In fact, it's is an optimisation, we could just indirect as sysv does..
26311 ... however, backwards compatibility for now.
26312 If we're going to use this, then we need to keep the CALL_LONG bit set,
26313 so that we can pick up the special insn form later. */
26314 if ((cookie_val
& CALL_LONG
) != 0
26315 && GET_CODE (func_desc
) == SYMBOL_REF
)
26317 /* FIXME: the longcall opt should not hang off this flag, it is most
26318 likely incorrect for kernel-mode code-generation. */
26319 if (darwin_symbol_stubs
&& TARGET_32BIT
)
26320 make_island
= true; /* Do nothing yet, retain the CALL_LONG flag. */
26323 /* The linker is capable of doing this, but the user explicitly
26324 asked for -mlongcall, so we'll do the 'normal' version. */
26325 func
= rs6000_longcall_ref (func_desc
, NULL_RTX
);
26326 cookie_val
&= ~CALL_LONG
; /* Handled, zap it. */
26330 /* Handle indirect calls. */
26331 if (GET_CODE (func
) != SYMBOL_REF
)
26333 func
= force_reg (Pmode
, func
);
26335 /* Indirect calls via CTR are strongly preferred over indirect
26336 calls via LR, and are required for indirect sibcalls, so move
26337 the address there. */
26338 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26339 emit_move_insn (func_addr
, func
);
26344 /* Create the call. */
26345 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26346 if (value
!= NULL_RTX
)
26347 call
[0] = gen_rtx_SET (value
, call
[0]);
26349 call
[1] = gen_rtx_USE (VOIDmode
, GEN_INT (cookie_val
));
26352 call
[2] = simple_return_rtx
;
26354 call
[2] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
26356 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
26357 insn
= emit_call_insn (insn
);
26358 /* Now we have the debug info in the insn, we can set up the branch island
26359 if we're using one. */
26362 tree funname
= get_identifier (XSTR (func_desc
, 0));
26364 if (no_previous_def (funname
))
26366 rtx label_rtx
= gen_label_rtx ();
26367 char *label_buf
, temp_buf
[256];
26368 ASM_GENERATE_INTERNAL_LABEL (temp_buf
, "L",
26369 CODE_LABEL_NUMBER (label_rtx
));
26370 label_buf
= temp_buf
[0] == '*' ? temp_buf
+ 1 : temp_buf
;
26371 tree labelname
= get_identifier (label_buf
);
26372 add_compiler_branch_island (labelname
, funname
,
26373 insn_line ((const rtx_insn
*)insn
));
26380 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
26381 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
26384 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, false);
26392 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
26393 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
26396 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, true);
26402 /* Return whether we should generate PC-relative code for FNDECL. */
26404 rs6000_fndecl_pcrel_p (const_tree fndecl
)
26406 if (DEFAULT_ABI
!= ABI_ELFv2
)
26409 struct cl_target_option
*opts
= target_opts_for_fn (fndecl
);
26411 return ((opts
->x_rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26412 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26415 /* Return whether we should generate PC-relative code for *FN. */
26417 rs6000_function_pcrel_p (struct function
*fn
)
26419 if (DEFAULT_ABI
!= ABI_ELFv2
)
26422 /* Optimize usual case. */
26424 return ((rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26425 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26427 return rs6000_fndecl_pcrel_p (fn
->decl
);
26430 /* Return whether we should generate PC-relative code for the current
26435 return (DEFAULT_ABI
== ABI_ELFv2
26436 && (rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26437 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26441 /* Given an address (ADDR), a mode (MODE), and what the format of the
26442 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26443 for the address. */
26446 address_to_insn_form (rtx addr
,
26448 enum non_prefixed_form non_prefixed_format
)
26450 /* Single register is easy. */
26451 if (REG_P (addr
) || SUBREG_P (addr
))
26452 return INSN_FORM_BASE_REG
;
26454 /* If the non prefixed instruction format doesn't support offset addressing,
26455 make sure only indexed addressing is allowed.
26457 We special case SDmode so that the register allocator does not try to move
26458 SDmode through GPR registers, but instead uses the 32-bit integer load and
26459 store instructions for the floating point registers. */
26460 if (non_prefixed_format
== NON_PREFIXED_X
|| (mode
== SDmode
&& TARGET_DFP
))
26462 if (GET_CODE (addr
) != PLUS
)
26463 return INSN_FORM_BAD
;
26465 rtx op0
= XEXP (addr
, 0);
26466 rtx op1
= XEXP (addr
, 1);
26467 if (!REG_P (op0
) && !SUBREG_P (op0
))
26468 return INSN_FORM_BAD
;
26470 if (!REG_P (op1
) && !SUBREG_P (op1
))
26471 return INSN_FORM_BAD
;
26473 return INSN_FORM_X
;
26476 /* Deal with update forms. */
26477 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
26478 return INSN_FORM_UPDATE
;
26480 /* Handle PC-relative symbols and labels. Check for both local and
26481 external symbols. Assume labels are always local. TLS symbols
26482 are not PC-relative for rs6000. */
26485 if (LABEL_REF_P (addr
))
26486 return INSN_FORM_PCREL_LOCAL
;
26488 if (SYMBOL_REF_P (addr
) && !SYMBOL_REF_TLS_MODEL (addr
))
26490 if (!SYMBOL_REF_LOCAL_P (addr
))
26491 return INSN_FORM_PCREL_EXTERNAL
;
26493 return INSN_FORM_PCREL_LOCAL
;
26497 if (GET_CODE (addr
) == CONST
)
26498 addr
= XEXP (addr
, 0);
26500 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26501 if (GET_CODE (addr
) == LO_SUM
)
26502 return INSN_FORM_LO_SUM
;
26504 /* Everything below must be an offset address of some form. */
26505 if (GET_CODE (addr
) != PLUS
)
26506 return INSN_FORM_BAD
;
26508 rtx op0
= XEXP (addr
, 0);
26509 rtx op1
= XEXP (addr
, 1);
26511 /* Check for indexed addresses. */
26512 if (REG_P (op1
) || SUBREG_P (op1
))
26514 if (REG_P (op0
) || SUBREG_P (op0
))
26515 return INSN_FORM_X
;
26517 return INSN_FORM_BAD
;
26520 if (!CONST_INT_P (op1
))
26521 return INSN_FORM_BAD
;
26523 HOST_WIDE_INT offset
= INTVAL (op1
);
26524 if (!SIGNED_INTEGER_34BIT_P (offset
))
26525 return INSN_FORM_BAD
;
26527 /* Check for local and external PC-relative addresses. Labels are always
26528 local. TLS symbols are not PC-relative for rs6000. */
26531 if (LABEL_REF_P (op0
))
26532 return INSN_FORM_PCREL_LOCAL
;
26534 if (SYMBOL_REF_P (op0
) && !SYMBOL_REF_TLS_MODEL (op0
))
26536 if (!SYMBOL_REF_LOCAL_P (op0
))
26537 return INSN_FORM_PCREL_EXTERNAL
;
26539 return INSN_FORM_PCREL_LOCAL
;
26543 /* If it isn't PC-relative, the address must use a base register. */
26544 if (!REG_P (op0
) && !SUBREG_P (op0
))
26545 return INSN_FORM_BAD
;
26547 /* Large offsets must be prefixed. */
26548 if (!SIGNED_INTEGER_16BIT_P (offset
))
26550 if (TARGET_PREFIXED
)
26551 return INSN_FORM_PREFIXED_NUMERIC
;
26553 return INSN_FORM_BAD
;
26556 /* We have a 16-bit offset, see what default instruction format to use. */
26557 if (non_prefixed_format
== NON_PREFIXED_DEFAULT
)
26559 unsigned size
= GET_MODE_SIZE (mode
);
26561 /* On 64-bit systems, assume 64-bit integers need to use DS form
26562 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26563 (for LXV and STXV). TImode is problematical in that its normal usage
26564 is expected to be GPRs where it wants a DS instruction format, but if
26565 it goes into the vector registers, it wants a DQ instruction
26567 if (TARGET_POWERPC64
&& size
>= 8 && GET_MODE_CLASS (mode
) == MODE_INT
)
26568 non_prefixed_format
= NON_PREFIXED_DS
;
26570 else if (TARGET_VSX
&& size
>= 16
26571 && (VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
)))
26572 non_prefixed_format
= NON_PREFIXED_DQ
;
26575 non_prefixed_format
= NON_PREFIXED_D
;
26578 /* Classify the D/DS/DQ-form addresses. */
26579 switch (non_prefixed_format
)
26581 /* Instruction format D, all 16 bits are valid. */
26582 case NON_PREFIXED_D
:
26583 return INSN_FORM_D
;
26585 /* Instruction format DS, bottom 2 bits must be 0. */
26586 case NON_PREFIXED_DS
:
26587 if ((offset
& 3) == 0)
26588 return INSN_FORM_DS
;
26590 else if (TARGET_PREFIXED
)
26591 return INSN_FORM_PREFIXED_NUMERIC
;
26594 return INSN_FORM_BAD
;
26596 /* Instruction format DQ, bottom 4 bits must be 0. */
26597 case NON_PREFIXED_DQ
:
26598 if ((offset
& 15) == 0)
26599 return INSN_FORM_DQ
;
26601 else if (TARGET_PREFIXED
)
26602 return INSN_FORM_PREFIXED_NUMERIC
;
26605 return INSN_FORM_BAD
;
26611 return INSN_FORM_BAD
;
26614 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26615 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26616 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26617 a D-form or DS-form instruction. X-form and base_reg are always
26620 address_is_non_pfx_d_or_x (rtx addr
, machine_mode mode
,
26621 enum non_prefixed_form non_prefixed_format
)
26623 enum insn_form result_form
;
26625 result_form
= address_to_insn_form (addr
, mode
, non_prefixed_format
);
26627 switch (non_prefixed_format
)
26629 case NON_PREFIXED_D
:
26630 switch (result_form
)
26635 case INSN_FORM_BASE_REG
:
26641 case NON_PREFIXED_DS
:
26642 switch (result_form
)
26646 case INSN_FORM_BASE_REG
:
26658 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26659 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26660 the load or store with the PCREL_OPT optimization to make sure it is an
26661 instruction that can be optimized.
26663 We need to specify the MODE separately from the REG to allow for loads that
26664 include zero/sign/float extension. */
26667 pcrel_opt_valid_mem_p (rtx reg
, machine_mode mode
, rtx mem
)
26669 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26670 PCREL_OPT optimization. */
26671 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mode
);
26672 if (non_prefixed
== NON_PREFIXED_X
)
26675 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26676 rtx addr
= XEXP (mem
, 0);
26677 enum insn_form iform
= address_to_insn_form (addr
, mode
, non_prefixed
);
26678 return (iform
== INSN_FORM_BASE_REG
26679 || iform
== INSN_FORM_D
26680 || iform
== INSN_FORM_DS
26681 || iform
== INSN_FORM_DQ
);
26684 /* Helper function to see if we're potentially looking at lfs/stfs.
26685 - PARALLEL containing a SET and a CLOBBER
26687 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26688 - CLOBBER is a V4SF
26690 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26695 is_lfs_stfs_insn (rtx_insn
*insn
)
26697 rtx pattern
= PATTERN (insn
);
26698 if (GET_CODE (pattern
) != PARALLEL
)
26701 /* This should be a parallel with exactly one set and one clobber. */
26702 if (XVECLEN (pattern
, 0) != 2)
26705 rtx set
= XVECEXP (pattern
, 0, 0);
26706 if (GET_CODE (set
) != SET
)
26709 rtx clobber
= XVECEXP (pattern
, 0, 1);
26710 if (GET_CODE (clobber
) != CLOBBER
)
26713 /* All we care is that the destination of the SET is a mem:SI,
26714 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26715 should be a scratch:V4SF. */
26717 rtx dest
= SET_DEST (set
);
26718 rtx src
= SET_SRC (set
);
26719 rtx scratch
= SET_DEST (clobber
);
26721 if (GET_CODE (src
) != UNSPEC
)
26725 if (XINT (src
, 1) == UNSPEC_SI_FROM_SF
26726 && GET_CODE (dest
) == MEM
&& GET_MODE (dest
) == SImode
26727 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == V4SFmode
)
26731 if (XINT (src
, 1) == UNSPEC_SF_FROM_SI
26732 && GET_CODE (dest
) == REG
&& GET_MODE (dest
) == SFmode
26733 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == DImode
)
26739 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26740 instruction format (D/DS/DQ) used for offset memory. */
26742 enum non_prefixed_form
26743 reg_to_non_prefixed (rtx reg
, machine_mode mode
)
26745 /* If it isn't a register, use the defaults. */
26746 if (!REG_P (reg
) && !SUBREG_P (reg
))
26747 return NON_PREFIXED_DEFAULT
;
26749 unsigned int r
= reg_or_subregno (reg
);
26751 /* If we have a pseudo, use the default instruction format. */
26752 if (!HARD_REGISTER_NUM_P (r
))
26753 return NON_PREFIXED_DEFAULT
;
26755 unsigned size
= GET_MODE_SIZE (mode
);
26757 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26758 128-bit floating point, and 128-bit integers. Before power9, only indexed
26759 addressing was available for vectors. */
26760 if (FP_REGNO_P (r
))
26762 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26763 return NON_PREFIXED_D
;
26766 return NON_PREFIXED_X
;
26768 else if (TARGET_VSX
&& size
>= 16
26769 && (VECTOR_MODE_P (mode
)
26770 || VECTOR_ALIGNMENT_P (mode
)
26771 || mode
== TImode
|| mode
== CTImode
))
26772 return (TARGET_P9_VECTOR
) ? NON_PREFIXED_DQ
: NON_PREFIXED_X
;
26775 return NON_PREFIXED_DEFAULT
;
26778 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26779 128-bit floating point, and 128-bit integers. Before power9, only indexed
26780 addressing was available. */
26781 else if (ALTIVEC_REGNO_P (r
))
26783 if (!TARGET_P9_VECTOR
)
26784 return NON_PREFIXED_X
;
26786 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26787 return NON_PREFIXED_DS
;
26790 return NON_PREFIXED_X
;
26792 else if (TARGET_VSX
&& size
>= 16
26793 && (VECTOR_MODE_P (mode
)
26794 || VECTOR_ALIGNMENT_P (mode
)
26795 || mode
== TImode
|| mode
== CTImode
))
26796 return NON_PREFIXED_DQ
;
26799 return NON_PREFIXED_DEFAULT
;
26802 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26803 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26804 through the GPR registers for memory operations. */
26805 else if (TARGET_POWERPC64
&& size
>= 8)
26806 return NON_PREFIXED_DS
;
26808 return NON_PREFIXED_D
;
26812 /* Whether a load instruction is a prefixed instruction. This is called from
26813 the prefixed attribute processing. */
26816 prefixed_load_p (rtx_insn
*insn
)
26818 /* Validate the insn to make sure it is a normal load insn. */
26819 extract_insn_cached (insn
);
26820 if (recog_data
.n_operands
< 2)
26823 rtx reg
= recog_data
.operand
[0];
26824 rtx mem
= recog_data
.operand
[1];
26826 if (!REG_P (reg
) && !SUBREG_P (reg
))
26832 /* Prefixed load instructions do not support update or indexed forms. */
26833 if (get_attr_indexed (insn
) == INDEXED_YES
26834 || get_attr_update (insn
) == UPDATE_YES
)
26837 /* LWA uses the DS format instead of the D format that LWZ uses. */
26838 enum non_prefixed_form non_prefixed
;
26839 machine_mode reg_mode
= GET_MODE (reg
);
26840 machine_mode mem_mode
= GET_MODE (mem
);
26842 if (mem_mode
== SImode
&& reg_mode
== DImode
26843 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
26844 non_prefixed
= NON_PREFIXED_DS
;
26847 non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26849 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26850 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, NON_PREFIXED_DEFAULT
);
26852 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, non_prefixed
);
26855 /* Whether a store instruction is a prefixed instruction. This is called from
26856 the prefixed attribute processing. */
26859 prefixed_store_p (rtx_insn
*insn
)
26861 /* Validate the insn to make sure it is a normal store insn. */
26862 extract_insn_cached (insn
);
26863 if (recog_data
.n_operands
< 2)
26866 rtx mem
= recog_data
.operand
[0];
26867 rtx reg
= recog_data
.operand
[1];
26869 if (!REG_P (reg
) && !SUBREG_P (reg
))
26875 /* Prefixed store instructions do not support update or indexed forms. */
26876 if (get_attr_indexed (insn
) == INDEXED_YES
26877 || get_attr_update (insn
) == UPDATE_YES
)
26880 machine_mode mem_mode
= GET_MODE (mem
);
26881 rtx addr
= XEXP (mem
, 0);
26882 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26884 /* Need to make sure we aren't looking at a stfs which doesn't look
26885 like the other things reg_to_non_prefixed/address_is_prefixed
26887 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26888 return address_is_prefixed (addr
, mem_mode
, NON_PREFIXED_DEFAULT
);
26890 return address_is_prefixed (addr
, mem_mode
, non_prefixed
);
26893 /* Whether a load immediate or add instruction is a prefixed instruction. This
26894 is called from the prefixed attribute processing. */
26897 prefixed_paddi_p (rtx_insn
*insn
)
26899 rtx set
= single_set (insn
);
26903 rtx dest
= SET_DEST (set
);
26904 rtx src
= SET_SRC (set
);
26906 if (!REG_P (dest
) && !SUBREG_P (dest
))
26909 /* Is this a load immediate that can't be done with a simple ADDI or
26911 if (CONST_INT_P (src
))
26912 return (satisfies_constraint_eI (src
)
26913 && !satisfies_constraint_I (src
)
26914 && !satisfies_constraint_L (src
));
26916 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26918 if (GET_CODE (src
) == PLUS
)
26920 rtx op1
= XEXP (src
, 1);
26922 return (CONST_INT_P (op1
)
26923 && satisfies_constraint_eI (op1
)
26924 && !satisfies_constraint_I (op1
)
26925 && !satisfies_constraint_L (op1
));
26928 /* If not, is it a load of a PC-relative address? */
26929 if (!TARGET_PCREL
|| GET_MODE (dest
) != Pmode
)
26932 if (!SYMBOL_REF_P (src
) && !LABEL_REF_P (src
) && GET_CODE (src
) != CONST
)
26935 enum insn_form iform
= address_to_insn_form (src
, Pmode
,
26936 NON_PREFIXED_DEFAULT
);
26938 return (iform
== INSN_FORM_PCREL_EXTERNAL
|| iform
== INSN_FORM_PCREL_LOCAL
);
26941 /* Whether the next instruction needs a 'p' prefix issued before the
26942 instruction is printed out. */
26943 static bool prepend_p_to_next_insn
;
26945 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26946 outputting the assembler code. On the PowerPC, we remember if the current
26947 insn is a prefixed insn where we need to emit a 'p' before the insn.
26949 In addition, if the insn is part of a PC-relative reference to an external
26950 label optimization, this is recorded also. */
26952 rs6000_final_prescan_insn (rtx_insn
*insn
, rtx
[], int)
26954 prepend_p_to_next_insn
= (get_attr_maybe_prefixed (insn
)
26955 == MAYBE_PREFIXED_YES
26956 && get_attr_prefixed (insn
) == PREFIXED_YES
);
26960 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26961 We use it to emit a 'p' for prefixed insns that is set in
26962 FINAL_PRESCAN_INSN. */
26964 rs6000_asm_output_opcode (FILE *stream
)
26966 if (prepend_p_to_next_insn
)
26968 fprintf (stream
, "p");
26970 /* Reset the flag in the case where there are separate insn lines in the
26971 sequence, so the 'p' is only emitted for the first line. This shows up
26972 when we are doing the PCREL_OPT optimization, in that the label created
26973 with %r<n> would have a leading 'p' printed. */
26974 prepend_p_to_next_insn
= false;
26980 /* Emit the relocation to tie the next instruction to a previous instruction
26981 that loads up an external address. This is used to do the PCREL_OPT
26982 optimization. Note, the label is generated after the PLD of the got
26983 pc-relative address to allow for the assembler to insert NOPs before the PLD
26984 instruction. The operand is a constant integer that is the label
26988 output_pcrel_opt_reloc (rtx label_num
)
26990 rtx operands
[1] = { label_num
};
26991 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26995 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26996 should be adjusted to reflect any required changes. This macro is used when
26997 there is some systematic length adjustment required that would be difficult
26998 to express in the length attribute.
27000 In the PowerPC, we use this to adjust the length of an instruction if one or
27001 more prefixed instructions are generated, using the attribute
27002 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
27003 hardware requires that a prefied instruciton does not cross a 64-byte
27004 boundary. This means the compiler has to assume the length of the first
27005 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
27006 already set for the non-prefixed instruction, we just need to udpate for the
27010 rs6000_adjust_insn_length (rtx_insn
*insn
, int length
)
27012 if (TARGET_PREFIXED
&& NONJUMP_INSN_P (insn
))
27014 rtx pattern
= PATTERN (insn
);
27015 if (GET_CODE (pattern
) != USE
&& GET_CODE (pattern
) != CLOBBER
27016 && get_attr_prefixed (insn
) == PREFIXED_YES
)
27018 int num_prefixed
= get_attr_max_prefixed_insns (insn
);
27019 length
+= 4 * (num_prefixed
+ 1);
27027 #ifdef HAVE_GAS_HIDDEN
27028 # define USE_HIDDEN_LINKONCE 1
27030 # define USE_HIDDEN_LINKONCE 0
27033 /* Fills in the label name that should be used for a 476 link stack thunk. */
27036 get_ppc476_thunk_name (char name
[32])
27038 gcc_assert (TARGET_LINK_STACK
);
27040 if (USE_HIDDEN_LINKONCE
)
27041 sprintf (name
, "__ppc476.get_thunk");
27043 ASM_GENERATE_INTERNAL_LABEL (name
, "LPPC476_", 0);
27046 /* This function emits the simple thunk routine that is used to preserve
27047 the link stack on the 476 cpu. */
27049 static void rs6000_code_end (void) ATTRIBUTE_UNUSED
;
27051 rs6000_code_end (void)
27056 if (!TARGET_LINK_STACK
)
27059 get_ppc476_thunk_name (name
);
27061 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
, get_identifier (name
),
27062 build_function_type_list (void_type_node
, NULL_TREE
));
27063 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
27064 NULL_TREE
, void_type_node
);
27065 TREE_PUBLIC (decl
) = 1;
27066 TREE_STATIC (decl
) = 1;
27069 if (USE_HIDDEN_LINKONCE
&& !TARGET_XCOFF
)
27071 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
27072 targetm
.asm_out
.unique_section (decl
, 0);
27073 switch_to_section (get_named_section (decl
, NULL
, 0));
27074 DECL_WEAK (decl
) = 1;
27075 ASM_WEAKEN_DECL (asm_out_file
, decl
, name
, 0);
27076 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
27077 targetm
.asm_out
.assemble_visibility (decl
, VISIBILITY_HIDDEN
);
27078 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
27083 switch_to_section (text_section
);
27084 ASM_OUTPUT_LABEL (asm_out_file
, name
);
27087 DECL_INITIAL (decl
) = make_node (BLOCK
);
27088 current_function_decl
= decl
;
27089 allocate_struct_function (decl
, false);
27090 init_function_start (decl
);
27091 first_function_block_is_cold
= false;
27092 /* Make sure unwind info is emitted for the thunk if needed. */
27093 final_start_function (emit_barrier (), asm_out_file
, 1);
27095 fputs ("\tblr\n", asm_out_file
);
27097 final_end_function ();
27098 init_insn_lengths ();
27099 free_after_compilation (cfun
);
27101 current_function_decl
= NULL
;
27104 /* Add r30 to hard reg set if the prologue sets it up and it is not
27105 pic_offset_table_rtx. */
27108 rs6000_set_up_by_prologue (struct hard_reg_set_container
*set
)
27110 if (!TARGET_SINGLE_PIC_BASE
27112 && TARGET_MINIMAL_TOC
27113 && !constant_pool_empty_p ())
27114 add_to_hard_reg_set (&set
->set
, Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
27115 if (cfun
->machine
->split_stack_argp_used
)
27116 add_to_hard_reg_set (&set
->set
, Pmode
, 12);
27118 /* Make sure the hard reg set doesn't include r2, which was possibly added
27119 via PIC_OFFSET_TABLE_REGNUM. */
27121 remove_from_hard_reg_set (&set
->set
, Pmode
, TOC_REGNUM
);
27125 /* Helper function for rs6000_split_logical to emit a logical instruction after
27126 spliting the operation to single GPR registers.
27128 DEST is the destination register.
27129 OP1 and OP2 are the input source registers.
27130 CODE is the base operation (AND, IOR, XOR, NOT).
27131 MODE is the machine mode.
27132 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27133 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27134 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27137 rs6000_split_logical_inner (rtx dest
,
27140 enum rtx_code code
,
27142 bool complement_final_p
,
27143 bool complement_op1_p
,
27144 bool complement_op2_p
)
27148 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
27149 if (op2
&& CONST_INT_P (op2
)
27150 && (mode
== SImode
|| (mode
== DImode
&& TARGET_POWERPC64
))
27151 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
27153 HOST_WIDE_INT mask
= GET_MODE_MASK (mode
);
27154 HOST_WIDE_INT value
= INTVAL (op2
) & mask
;
27156 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
27161 emit_insn (gen_rtx_SET (dest
, const0_rtx
));
27165 else if (value
== mask
)
27167 if (!rtx_equal_p (dest
, op1
))
27168 emit_insn (gen_rtx_SET (dest
, op1
));
27173 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
27174 into separate ORI/ORIS or XORI/XORIS instrucitons. */
27175 else if (code
== IOR
|| code
== XOR
)
27179 if (!rtx_equal_p (dest
, op1
))
27180 emit_insn (gen_rtx_SET (dest
, op1
));
27186 if (code
== AND
&& mode
== SImode
27187 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
27189 emit_insn (gen_andsi3 (dest
, op1
, op2
));
27193 if (complement_op1_p
)
27194 op1
= gen_rtx_NOT (mode
, op1
);
27196 if (complement_op2_p
)
27197 op2
= gen_rtx_NOT (mode
, op2
);
27199 /* For canonical RTL, if only one arm is inverted it is the first. */
27200 if (!complement_op1_p
&& complement_op2_p
)
27201 std::swap (op1
, op2
);
27203 bool_rtx
= ((code
== NOT
)
27204 ? gen_rtx_NOT (mode
, op1
)
27205 : gen_rtx_fmt_ee (code
, mode
, op1
, op2
));
27207 if (complement_final_p
)
27208 bool_rtx
= gen_rtx_NOT (mode
, bool_rtx
);
27210 emit_insn (gen_rtx_SET (dest
, bool_rtx
));
27213 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
27214 operations are split immediately during RTL generation to allow for more
27215 optimizations of the AND/IOR/XOR.
27217 OPERANDS is an array containing the destination and two input operands.
27218 CODE is the base operation (AND, IOR, XOR, NOT).
27219 MODE is the machine mode.
27220 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27221 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27222 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
27223 CLOBBER_REG is either NULL or a scratch register of type CC to allow
27224 formation of the AND instructions. */
27227 rs6000_split_logical_di (rtx operands
[3],
27228 enum rtx_code code
,
27229 bool complement_final_p
,
27230 bool complement_op1_p
,
27231 bool complement_op2_p
)
27233 const HOST_WIDE_INT lower_32bits
= HOST_WIDE_INT_C(0xffffffff);
27234 const HOST_WIDE_INT upper_32bits
= ~ lower_32bits
;
27235 const HOST_WIDE_INT sign_bit
= HOST_WIDE_INT_C(0x80000000);
27236 enum hi_lo
{ hi
= 0, lo
= 1 };
27237 rtx op0_hi_lo
[2], op1_hi_lo
[2], op2_hi_lo
[2];
27240 op0_hi_lo
[hi
] = gen_highpart (SImode
, operands
[0]);
27241 op1_hi_lo
[hi
] = gen_highpart (SImode
, operands
[1]);
27242 op0_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[0]);
27243 op1_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[1]);
27246 op2_hi_lo
[hi
] = op2_hi_lo
[lo
] = NULL_RTX
;
27249 if (!CONST_INT_P (operands
[2]))
27251 op2_hi_lo
[hi
] = gen_highpart_mode (SImode
, DImode
, operands
[2]);
27252 op2_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[2]);
27256 HOST_WIDE_INT value
= INTVAL (operands
[2]);
27257 HOST_WIDE_INT value_hi_lo
[2];
27259 gcc_assert (!complement_final_p
);
27260 gcc_assert (!complement_op1_p
);
27261 gcc_assert (!complement_op2_p
);
27263 value_hi_lo
[hi
] = value
>> 32;
27264 value_hi_lo
[lo
] = value
& lower_32bits
;
27266 for (i
= 0; i
< 2; i
++)
27268 HOST_WIDE_INT sub_value
= value_hi_lo
[i
];
27270 if (sub_value
& sign_bit
)
27271 sub_value
|= upper_32bits
;
27273 op2_hi_lo
[i
] = GEN_INT (sub_value
);
27275 /* If this is an AND instruction, check to see if we need to load
27276 the value in a register. */
27277 if (code
== AND
&& sub_value
!= -1 && sub_value
!= 0
27278 && !and_operand (op2_hi_lo
[i
], SImode
))
27279 op2_hi_lo
[i
] = force_reg (SImode
, op2_hi_lo
[i
]);
27284 for (i
= 0; i
< 2; i
++)
27286 /* Split large IOR/XOR operations. */
27287 if ((code
== IOR
|| code
== XOR
)
27288 && CONST_INT_P (op2_hi_lo
[i
])
27289 && !complement_final_p
27290 && !complement_op1_p
27291 && !complement_op2_p
27292 && !logical_const_operand (op2_hi_lo
[i
], SImode
))
27294 HOST_WIDE_INT value
= INTVAL (op2_hi_lo
[i
]);
27295 HOST_WIDE_INT hi_16bits
= value
& HOST_WIDE_INT_C(0xffff0000);
27296 HOST_WIDE_INT lo_16bits
= value
& HOST_WIDE_INT_C(0x0000ffff);
27297 rtx tmp
= gen_reg_rtx (SImode
);
27299 /* Make sure the constant is sign extended. */
27300 if ((hi_16bits
& sign_bit
) != 0)
27301 hi_16bits
|= upper_32bits
;
27303 rs6000_split_logical_inner (tmp
, op1_hi_lo
[i
], GEN_INT (hi_16bits
),
27304 code
, SImode
, false, false, false);
27306 rs6000_split_logical_inner (op0_hi_lo
[i
], tmp
, GEN_INT (lo_16bits
),
27307 code
, SImode
, false, false, false);
27310 rs6000_split_logical_inner (op0_hi_lo
[i
], op1_hi_lo
[i
], op2_hi_lo
[i
],
27311 code
, SImode
, complement_final_p
,
27312 complement_op1_p
, complement_op2_p
);
27318 /* Split the insns that make up boolean operations operating on multiple GPR
27319 registers. The boolean MD patterns ensure that the inputs either are
27320 exactly the same as the output registers, or there is no overlap.
27322 OPERANDS is an array containing the destination and two input operands.
27323 CODE is the base operation (AND, IOR, XOR, NOT).
27324 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27325 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27326 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27329 rs6000_split_logical (rtx operands
[3],
27330 enum rtx_code code
,
27331 bool complement_final_p
,
27332 bool complement_op1_p
,
27333 bool complement_op2_p
)
27335 machine_mode mode
= GET_MODE (operands
[0]);
27336 machine_mode sub_mode
;
27338 int sub_size
, regno0
, regno1
, nregs
, i
;
27340 /* If this is DImode, use the specialized version that can run before
27341 register allocation. */
27342 if (mode
== DImode
&& !TARGET_POWERPC64
)
27344 rs6000_split_logical_di (operands
, code
, complement_final_p
,
27345 complement_op1_p
, complement_op2_p
);
27351 op2
= (code
== NOT
) ? NULL_RTX
: operands
[2];
27352 sub_mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
27353 sub_size
= GET_MODE_SIZE (sub_mode
);
27354 regno0
= REGNO (op0
);
27355 regno1
= REGNO (op1
);
27357 gcc_assert (reload_completed
);
27358 gcc_assert (IN_RANGE (regno0
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27359 gcc_assert (IN_RANGE (regno1
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27361 nregs
= rs6000_hard_regno_nregs
[(int)mode
][regno0
];
27362 gcc_assert (nregs
> 1);
27364 if (op2
&& REG_P (op2
))
27365 gcc_assert (IN_RANGE (REGNO (op2
), FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27367 for (i
= 0; i
< nregs
; i
++)
27369 int offset
= i
* sub_size
;
27370 rtx sub_op0
= simplify_subreg (sub_mode
, op0
, mode
, offset
);
27371 rtx sub_op1
= simplify_subreg (sub_mode
, op1
, mode
, offset
);
27372 rtx sub_op2
= ((code
== NOT
)
27374 : simplify_subreg (sub_mode
, op2
, mode
, offset
));
27376 rs6000_split_logical_inner (sub_op0
, sub_op1
, sub_op2
, code
, sub_mode
,
27377 complement_final_p
, complement_op1_p
,
27384 /* Emit instructions to move SRC to DST. Called by splitters for
27385 multi-register moves. It will emit at most one instruction for
27386 each register that is accessed; that is, it won't emit li/lis pairs
27387 (or equivalent for 64-bit code). One of SRC or DST must be a hard
27391 rs6000_split_multireg_move (rtx dst
, rtx src
)
27393 /* The register number of the first register being moved. */
27395 /* The mode that is to be moved. */
27397 /* The mode that the move is being done in, and its size. */
27398 machine_mode reg_mode
;
27400 /* The number of registers that will be moved. */
27403 reg
= REG_P (dst
) ? REGNO (dst
) : REGNO (src
);
27404 mode
= GET_MODE (dst
);
27405 nregs
= hard_regno_nregs (reg
, mode
);
27407 /* If we have a vector quad register for MMA, and this is a load or store,
27408 see if we can use vector paired load/stores. */
27409 if (mode
== XOmode
&& TARGET_MMA
27410 && (MEM_P (dst
) || MEM_P (src
)))
27415 /* If we have a vector pair/quad mode, split it into two/four separate
27417 else if (mode
== OOmode
|| mode
== XOmode
)
27418 reg_mode
= V1TImode
;
27419 else if (FP_REGNO_P (reg
))
27420 reg_mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
:
27421 (TARGET_HARD_FLOAT
? DFmode
: SFmode
);
27422 else if (ALTIVEC_REGNO_P (reg
))
27423 reg_mode
= V16QImode
;
27425 reg_mode
= word_mode
;
27426 reg_mode_size
= GET_MODE_SIZE (reg_mode
);
27428 gcc_assert (reg_mode_size
* nregs
== GET_MODE_SIZE (mode
));
27430 /* TDmode residing in FP registers is special, since the ISA requires that
27431 the lower-numbered word of a register pair is always the most significant
27432 word, even in little-endian mode. This does not match the usual subreg
27433 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27434 the appropriate constituent registers "by hand" in little-endian mode.
27436 Note we do not need to check for destructive overlap here since TDmode
27437 can only reside in even/odd register pairs. */
27438 if (FP_REGNO_P (reg
) && DECIMAL_FLOAT_MODE_P (mode
) && !BYTES_BIG_ENDIAN
)
27443 for (i
= 0; i
< nregs
; i
++)
27445 if (REG_P (src
) && FP_REGNO_P (REGNO (src
)))
27446 p_src
= gen_rtx_REG (reg_mode
, REGNO (src
) + nregs
- 1 - i
);
27448 p_src
= simplify_gen_subreg (reg_mode
, src
, mode
,
27449 i
* reg_mode_size
);
27451 if (REG_P (dst
) && FP_REGNO_P (REGNO (dst
)))
27452 p_dst
= gen_rtx_REG (reg_mode
, REGNO (dst
) + nregs
- 1 - i
);
27454 p_dst
= simplify_gen_subreg (reg_mode
, dst
, mode
,
27455 i
* reg_mode_size
);
27457 emit_insn (gen_rtx_SET (p_dst
, p_src
));
27463 /* The __vector_pair and __vector_quad modes are multi-register
27464 modes, so if we have to load or store the registers, we have to be
27465 careful to properly swap them if we're in little endian mode
27466 below. This means the last register gets the first memory
27467 location. We also need to be careful of using the right register
27468 numbers if we are splitting XO to OO. */
27469 if (mode
== OOmode
|| mode
== XOmode
)
27471 nregs
= hard_regno_nregs (reg
, mode
);
27472 int reg_mode_nregs
= hard_regno_nregs (reg
, reg_mode
);
27475 unsigned offset
= 0;
27476 unsigned size
= GET_MODE_SIZE (reg_mode
);
27478 /* If we are reading an accumulator register, we have to
27479 deprime it before we can access it. */
27481 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27482 emit_insn (gen_mma_xxmfacc (src
, src
));
27484 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
27487 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
27488 rtx dst2
= adjust_address (dst
, reg_mode
, offset
);
27489 rtx src2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
27491 emit_insn (gen_rtx_SET (dst2
, src2
));
27499 unsigned offset
= 0;
27500 unsigned size
= GET_MODE_SIZE (reg_mode
);
27502 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
27505 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
27506 rtx dst2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
27507 rtx src2
= adjust_address (src
, reg_mode
, offset
);
27509 emit_insn (gen_rtx_SET (dst2
, src2
));
27512 /* If we are writing an accumulator register, we have to
27513 prime it after we've written it. */
27515 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27516 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27521 if (GET_CODE (src
) == UNSPEC
27522 || GET_CODE (src
) == UNSPEC_VOLATILE
)
27524 gcc_assert (XINT (src
, 1) == UNSPEC_VSX_ASSEMBLE
27525 || XINT (src
, 1) == UNSPECV_MMA_ASSEMBLE
);
27526 gcc_assert (REG_P (dst
));
27527 if (GET_MODE (src
) == XOmode
)
27528 gcc_assert (FP_REGNO_P (REGNO (dst
)));
27529 if (GET_MODE (src
) == OOmode
)
27530 gcc_assert (VSX_REGNO_P (REGNO (dst
)));
27532 int nvecs
= XVECLEN (src
, 0);
27533 for (int i
= 0; i
< nvecs
; i
++)
27536 int regno
= reg
+ i
;
27538 if (WORDS_BIG_ENDIAN
)
27540 op
= XVECEXP (src
, 0, i
);
27542 /* If we are loading an even VSX register and the memory location
27543 is adjacent to the next register's memory location (if any),
27544 then we can load them both with one LXVP instruction. */
27545 if ((regno
& 1) == 0)
27547 rtx op2
= XVECEXP (src
, 0, i
+ 1);
27548 if (adjacent_mem_locations (op
, op2
) == op
)
27550 op
= adjust_address (op
, OOmode
, 0);
27551 /* Skip the next register, since we're going to
27552 load it together with this register. */
27559 op
= XVECEXP (src
, 0, nvecs
- i
- 1);
27561 /* If we are loading an even VSX register and the memory location
27562 is adjacent to the next register's memory location (if any),
27563 then we can load them both with one LXVP instruction. */
27564 if ((regno
& 1) == 0)
27566 rtx op2
= XVECEXP (src
, 0, nvecs
- i
- 2);
27567 if (adjacent_mem_locations (op2
, op
) == op2
)
27569 op
= adjust_address (op2
, OOmode
, 0);
27570 /* Skip the next register, since we're going to
27571 load it together with this register. */
27577 rtx dst_i
= gen_rtx_REG (GET_MODE (op
), regno
);
27578 emit_insn (gen_rtx_SET (dst_i
, op
));
27581 /* We are writing an accumulator register, so we have to
27582 prime it after we've written it. */
27583 if (GET_MODE (src
) == XOmode
)
27584 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27589 /* Register -> register moves can use common code. */
27592 if (REG_P (src
) && REG_P (dst
) && (REGNO (src
) < REGNO (dst
)))
27594 /* If we are reading an accumulator register, we have to
27595 deprime it before we can access it. */
27597 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27598 emit_insn (gen_mma_xxmfacc (src
, src
));
27600 /* Move register range backwards, if we might have destructive
27603 /* XO/OO are opaque so cannot use subregs. */
27604 if (mode
== OOmode
|| mode
== XOmode
)
27606 for (i
= nregs
- 1; i
>= 0; i
--)
27608 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + i
);
27609 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + i
);
27610 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27615 for (i
= nregs
- 1; i
>= 0; i
--)
27616 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27617 i
* reg_mode_size
),
27618 simplify_gen_subreg (reg_mode
, src
, mode
,
27619 i
* reg_mode_size
)));
27622 /* If we are writing an accumulator register, we have to
27623 prime it after we've written it. */
27625 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27626 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27632 bool used_update
= false;
27633 rtx restore_basereg
= NULL_RTX
;
27635 if (MEM_P (src
) && INT_REGNO_P (reg
))
27639 if (GET_CODE (XEXP (src
, 0)) == PRE_INC
27640 || GET_CODE (XEXP (src
, 0)) == PRE_DEC
)
27643 breg
= XEXP (XEXP (src
, 0), 0);
27644 delta_rtx
= (GET_CODE (XEXP (src
, 0)) == PRE_INC
27645 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src
)))
27646 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src
))));
27647 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27648 src
= replace_equiv_address (src
, breg
);
27650 else if (! rs6000_offsettable_memref_p (src
, reg_mode
, true))
27652 if (GET_CODE (XEXP (src
, 0)) == PRE_MODIFY
)
27654 rtx basereg
= XEXP (XEXP (src
, 0), 0);
27657 rtx ndst
= simplify_gen_subreg (reg_mode
, dst
, mode
, 0);
27658 emit_insn (gen_rtx_SET (ndst
,
27659 gen_rtx_MEM (reg_mode
,
27661 used_update
= true;
27664 emit_insn (gen_rtx_SET (basereg
,
27665 XEXP (XEXP (src
, 0), 1)));
27666 src
= replace_equiv_address (src
, basereg
);
27670 rtx basereg
= gen_rtx_REG (Pmode
, reg
);
27671 emit_insn (gen_rtx_SET (basereg
, XEXP (src
, 0)));
27672 src
= replace_equiv_address (src
, basereg
);
27676 breg
= XEXP (src
, 0);
27677 if (GET_CODE (breg
) == PLUS
|| GET_CODE (breg
) == LO_SUM
)
27678 breg
= XEXP (breg
, 0);
27680 /* If the base register we are using to address memory is
27681 also a destination reg, then change that register last. */
27683 && REGNO (breg
) >= REGNO (dst
)
27684 && REGNO (breg
) < REGNO (dst
) + nregs
)
27685 j
= REGNO (breg
) - REGNO (dst
);
27687 else if (MEM_P (dst
) && INT_REGNO_P (reg
))
27691 if (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27692 || GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
27695 breg
= XEXP (XEXP (dst
, 0), 0);
27696 delta_rtx
= (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27697 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst
)))
27698 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst
))));
27700 /* We have to update the breg before doing the store.
27701 Use store with update, if available. */
27705 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27706 emit_insn (TARGET_32BIT
27707 ? (TARGET_POWERPC64
27708 ? gen_movdi_si_update (breg
, breg
, delta_rtx
, nsrc
)
27709 : gen_movsi_si_update (breg
, breg
, delta_rtx
, nsrc
))
27710 : gen_movdi_di_update (breg
, breg
, delta_rtx
, nsrc
));
27711 used_update
= true;
27714 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27715 dst
= replace_equiv_address (dst
, breg
);
27717 else if (!rs6000_offsettable_memref_p (dst
, reg_mode
, true)
27718 && GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27720 if (GET_CODE (XEXP (dst
, 0)) == PRE_MODIFY
)
27722 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27725 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27726 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode
,
27729 used_update
= true;
27732 emit_insn (gen_rtx_SET (basereg
,
27733 XEXP (XEXP (dst
, 0), 1)));
27734 dst
= replace_equiv_address (dst
, basereg
);
27738 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27739 rtx offsetreg
= XEXP (XEXP (dst
, 0), 1);
27740 gcc_assert (GET_CODE (XEXP (dst
, 0)) == PLUS
27742 && REG_P (offsetreg
)
27743 && REGNO (basereg
) != REGNO (offsetreg
));
27744 if (REGNO (basereg
) == 0)
27746 rtx tmp
= offsetreg
;
27747 offsetreg
= basereg
;
27750 emit_insn (gen_add3_insn (basereg
, basereg
, offsetreg
));
27751 restore_basereg
= gen_sub3_insn (basereg
, basereg
, offsetreg
);
27752 dst
= replace_equiv_address (dst
, basereg
);
27755 else if (GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27756 gcc_assert (rs6000_offsettable_memref_p (dst
, reg_mode
, true));
27759 /* If we are reading an accumulator register, we have to
27760 deprime it before we can access it. */
27761 if (TARGET_MMA
&& REG_P (src
)
27762 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27763 emit_insn (gen_mma_xxmfacc (src
, src
));
27765 for (i
= 0; i
< nregs
; i
++)
27767 /* Calculate index to next subword. */
27772 /* If compiler already emitted move of first word by
27773 store with update, no need to do anything. */
27774 if (j
== 0 && used_update
)
27777 /* XO/OO are opaque so cannot use subregs. */
27778 if (mode
== OOmode
|| mode
== XOmode
)
27780 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + j
);
27781 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + j
);
27782 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27785 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27786 j
* reg_mode_size
),
27787 simplify_gen_subreg (reg_mode
, src
, mode
,
27788 j
* reg_mode_size
)));
27791 /* If we are writing an accumulator register, we have to
27792 prime it after we've written it. */
27793 if (TARGET_MMA
&& REG_P (dst
)
27794 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27795 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27797 if (restore_basereg
!= NULL_RTX
)
27798 emit_insn (restore_basereg
);
27802 /* Return true if the peephole2 can combine a load involving a combination of
27803 an addis instruction and a load with an offset that can be fused together on
27807 fusion_gpr_load_p (rtx addis_reg
, /* register set via addis. */
27808 rtx addis_value
, /* addis value. */
27809 rtx target
, /* target register that is loaded. */
27810 rtx mem
) /* bottom part of the memory addr. */
27815 /* Validate arguments. */
27816 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
27819 if (!base_reg_operand (target
, GET_MODE (target
)))
27822 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
27825 /* Allow sign/zero extension. */
27826 if (GET_CODE (mem
) == ZERO_EXTEND
27827 || (GET_CODE (mem
) == SIGN_EXTEND
&& TARGET_P8_FUSION_SIGN
))
27828 mem
= XEXP (mem
, 0);
27833 if (!fusion_gpr_mem_load (mem
, GET_MODE (mem
)))
27836 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
27837 if (GET_CODE (addr
) != PLUS
&& GET_CODE (addr
) != LO_SUM
)
27840 /* Validate that the register used to load the high value is either the
27841 register being loaded, or we can safely replace its use.
27843 This function is only called from the peephole2 pass and we assume that
27844 there are 2 instructions in the peephole (addis and load), so we want to
27845 check if the target register was not used in the memory address and the
27846 register to hold the addis result is dead after the peephole. */
27847 if (REGNO (addis_reg
) != REGNO (target
))
27849 if (reg_mentioned_p (target
, mem
))
27852 if (!peep2_reg_dead_p (2, addis_reg
))
27855 /* If the target register being loaded is the stack pointer, we must
27856 avoid loading any other value into it, even temporarily. */
27857 if (REG_P (target
) && REGNO (target
) == STACK_POINTER_REGNUM
)
27861 base_reg
= XEXP (addr
, 0);
27862 return REGNO (addis_reg
) == REGNO (base_reg
);
27865 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27866 sequence. We adjust the addis register to use the target register. If the
27867 load sign extends, we adjust the code to do the zero extending load, and an
27868 explicit sign extension later since the fusion only covers zero extending
27872 operands[0] register set with addis (to be replaced with target)
27873 operands[1] value set via addis
27874 operands[2] target register being loaded
27875 operands[3] D-form memory reference using operands[0]. */
27878 expand_fusion_gpr_load (rtx
*operands
)
27880 rtx addis_value
= operands
[1];
27881 rtx target
= operands
[2];
27882 rtx orig_mem
= operands
[3];
27883 rtx new_addr
, new_mem
, orig_addr
, offset
;
27884 enum rtx_code plus_or_lo_sum
;
27885 machine_mode target_mode
= GET_MODE (target
);
27886 machine_mode extend_mode
= target_mode
;
27887 machine_mode ptr_mode
= Pmode
;
27888 enum rtx_code extend
= UNKNOWN
;
27890 if (GET_CODE (orig_mem
) == ZERO_EXTEND
27891 || (TARGET_P8_FUSION_SIGN
&& GET_CODE (orig_mem
) == SIGN_EXTEND
))
27893 extend
= GET_CODE (orig_mem
);
27894 orig_mem
= XEXP (orig_mem
, 0);
27895 target_mode
= GET_MODE (orig_mem
);
27898 gcc_assert (MEM_P (orig_mem
));
27900 orig_addr
= XEXP (orig_mem
, 0);
27901 plus_or_lo_sum
= GET_CODE (orig_addr
);
27902 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
27904 offset
= XEXP (orig_addr
, 1);
27905 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
27906 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
27908 if (extend
!= UNKNOWN
)
27909 new_mem
= gen_rtx_fmt_e (ZERO_EXTEND
, extend_mode
, new_mem
);
27911 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
27912 UNSPEC_FUSION_GPR
);
27913 emit_insn (gen_rtx_SET (target
, new_mem
));
27915 if (extend
== SIGN_EXTEND
)
27917 int sub_off
= ((BYTES_BIG_ENDIAN
)
27918 ? GET_MODE_SIZE (extend_mode
) - GET_MODE_SIZE (target_mode
)
27921 = simplify_subreg (target_mode
, target
, extend_mode
, sub_off
);
27923 emit_insn (gen_rtx_SET (target
,
27924 gen_rtx_SIGN_EXTEND (extend_mode
, sign_reg
)));
27930 /* Emit the addis instruction that will be part of a fused instruction
27934 emit_fusion_addis (rtx target
, rtx addis_value
)
27937 const char *addis_str
= NULL
;
27939 /* Emit the addis instruction. */
27940 fuse_ops
[0] = target
;
27941 if (satisfies_constraint_L (addis_value
))
27943 fuse_ops
[1] = addis_value
;
27944 addis_str
= "lis %0,%v1";
27947 else if (GET_CODE (addis_value
) == PLUS
)
27949 rtx op0
= XEXP (addis_value
, 0);
27950 rtx op1
= XEXP (addis_value
, 1);
27952 if (REG_P (op0
) && CONST_INT_P (op1
)
27953 && satisfies_constraint_L (op1
))
27957 addis_str
= "addis %0,%1,%v2";
27961 else if (GET_CODE (addis_value
) == HIGH
)
27963 rtx value
= XEXP (addis_value
, 0);
27964 if (GET_CODE (value
) == UNSPEC
&& XINT (value
, 1) == UNSPEC_TOCREL
)
27966 fuse_ops
[1] = XVECEXP (value
, 0, 0); /* symbol ref. */
27967 fuse_ops
[2] = XVECEXP (value
, 0, 1); /* TOC register. */
27969 addis_str
= "addis %0,%2,%1@toc@ha";
27971 else if (TARGET_XCOFF
)
27972 addis_str
= "addis %0,%1@u(%2)";
27975 gcc_unreachable ();
27978 else if (GET_CODE (value
) == PLUS
)
27980 rtx op0
= XEXP (value
, 0);
27981 rtx op1
= XEXP (value
, 1);
27983 if (GET_CODE (op0
) == UNSPEC
27984 && XINT (op0
, 1) == UNSPEC_TOCREL
27985 && CONST_INT_P (op1
))
27987 fuse_ops
[1] = XVECEXP (op0
, 0, 0); /* symbol ref. */
27988 fuse_ops
[2] = XVECEXP (op0
, 0, 1); /* TOC register. */
27991 addis_str
= "addis %0,%2,%1+%3@toc@ha";
27993 else if (TARGET_XCOFF
)
27994 addis_str
= "addis %0,%1+%3@u(%2)";
27997 gcc_unreachable ();
28001 else if (satisfies_constraint_L (value
))
28003 fuse_ops
[1] = value
;
28004 addis_str
= "lis %0,%v1";
28007 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (value
))
28009 fuse_ops
[1] = value
;
28010 addis_str
= "lis %0,%1@ha";
28015 fatal_insn ("Could not generate addis value for fusion", addis_value
);
28017 output_asm_insn (addis_str
, fuse_ops
);
28020 /* Emit a D-form load or store instruction that is the second instruction
28021 of a fusion sequence. */
28024 emit_fusion_load (rtx load_reg
, rtx addis_reg
, rtx offset
, const char *insn_str
)
28027 char insn_template
[80];
28029 fuse_ops
[0] = load_reg
;
28030 fuse_ops
[1] = addis_reg
;
28032 if (CONST_INT_P (offset
) && satisfies_constraint_I (offset
))
28034 sprintf (insn_template
, "%s %%0,%%2(%%1)", insn_str
);
28035 fuse_ops
[2] = offset
;
28036 output_asm_insn (insn_template
, fuse_ops
);
28039 else if (GET_CODE (offset
) == UNSPEC
28040 && XINT (offset
, 1) == UNSPEC_TOCREL
)
28043 sprintf (insn_template
, "%s %%0,%%2@toc@l(%%1)", insn_str
);
28045 else if (TARGET_XCOFF
)
28046 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
28049 gcc_unreachable ();
28051 fuse_ops
[2] = XVECEXP (offset
, 0, 0);
28052 output_asm_insn (insn_template
, fuse_ops
);
28055 else if (GET_CODE (offset
) == PLUS
28056 && GET_CODE (XEXP (offset
, 0)) == UNSPEC
28057 && XINT (XEXP (offset
, 0), 1) == UNSPEC_TOCREL
28058 && CONST_INT_P (XEXP (offset
, 1)))
28060 rtx tocrel_unspec
= XEXP (offset
, 0);
28062 sprintf (insn_template
, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str
);
28064 else if (TARGET_XCOFF
)
28065 sprintf (insn_template
, "%s %%0,%%2+%%3@l(%%1)", insn_str
);
28068 gcc_unreachable ();
28070 fuse_ops
[2] = XVECEXP (tocrel_unspec
, 0, 0);
28071 fuse_ops
[3] = XEXP (offset
, 1);
28072 output_asm_insn (insn_template
, fuse_ops
);
28075 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (offset
))
28077 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
28079 fuse_ops
[2] = offset
;
28080 output_asm_insn (insn_template
, fuse_ops
);
28084 fatal_insn ("Unable to generate load/store offset for fusion", offset
);
28089 /* Given an address, convert it into the addis and load offset parts. Addresses
28090 created during the peephole2 process look like:
28091 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
28092 (unspec [(...)] UNSPEC_TOCREL)) */
28095 fusion_split_address (rtx addr
, rtx
*p_hi
, rtx
*p_lo
)
28099 if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
28101 hi
= XEXP (addr
, 0);
28102 lo
= XEXP (addr
, 1);
28105 gcc_unreachable ();
28111 /* Return a string to fuse an addis instruction with a gpr load to the same
28112 register that we loaded up the addis instruction. The address that is used
28113 is the logical address that was formed during peephole2:
28114 (lo_sum (high) (low-part))
28116 The code is complicated, so we call output_asm_insn directly, and just
28120 emit_fusion_gpr_load (rtx target
, rtx mem
)
28125 const char *load_str
= NULL
;
28128 if (GET_CODE (mem
) == ZERO_EXTEND
)
28129 mem
= XEXP (mem
, 0);
28131 gcc_assert (REG_P (target
) && MEM_P (mem
));
28133 addr
= XEXP (mem
, 0);
28134 fusion_split_address (addr
, &addis_value
, &load_offset
);
28136 /* Now emit the load instruction to the same register. */
28137 mode
= GET_MODE (mem
);
28155 gcc_assert (TARGET_POWERPC64
);
28160 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target
, mem
));
28163 /* Emit the addis instruction. */
28164 emit_fusion_addis (target
, addis_value
);
28166 /* Emit the D-form load instruction. */
28167 emit_fusion_load (target
, target
, load_offset
, load_str
);
28172 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
28173 ignores it then. */
28174 static GTY(()) tree atomic_hold_decl
;
28175 static GTY(()) tree atomic_clear_decl
;
28176 static GTY(()) tree atomic_update_decl
;
28178 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
28180 rs6000_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
28182 if (!TARGET_HARD_FLOAT
)
28184 #ifdef RS6000_GLIBC_ATOMIC_FENV
28185 if (atomic_hold_decl
== NULL_TREE
)
28188 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
28189 get_identifier ("__atomic_feholdexcept"),
28190 build_function_type_list (void_type_node
,
28191 double_ptr_type_node
,
28193 TREE_PUBLIC (atomic_hold_decl
) = 1;
28194 DECL_EXTERNAL (atomic_hold_decl
) = 1;
28197 if (atomic_clear_decl
== NULL_TREE
)
28200 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
28201 get_identifier ("__atomic_feclearexcept"),
28202 build_function_type_list (void_type_node
,
28204 TREE_PUBLIC (atomic_clear_decl
) = 1;
28205 DECL_EXTERNAL (atomic_clear_decl
) = 1;
28208 tree const_double
= build_qualified_type (double_type_node
,
28210 tree const_double_ptr
= build_pointer_type (const_double
);
28211 if (atomic_update_decl
== NULL_TREE
)
28214 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
28215 get_identifier ("__atomic_feupdateenv"),
28216 build_function_type_list (void_type_node
,
28219 TREE_PUBLIC (atomic_update_decl
) = 1;
28220 DECL_EXTERNAL (atomic_update_decl
) = 1;
28223 tree fenv_var
= create_tmp_var_raw (double_type_node
);
28224 TREE_ADDRESSABLE (fenv_var
) = 1;
28225 tree fenv_addr
= build1 (ADDR_EXPR
, double_ptr_type_node
,
28226 build4 (TARGET_EXPR
, double_type_node
, fenv_var
,
28227 void_node
, NULL_TREE
, NULL_TREE
));
28229 *hold
= build_call_expr (atomic_hold_decl
, 1, fenv_addr
);
28230 *clear
= build_call_expr (atomic_clear_decl
, 0);
28231 *update
= build_call_expr (atomic_update_decl
, 1,
28232 fold_convert (const_double_ptr
, fenv_addr
));
28237 tree mffs
= rs6000_builtin_decls
[RS6000_BIF_MFFS
];
28238 tree mtfsf
= rs6000_builtin_decls
[RS6000_BIF_MTFSF
];
28239 tree call_mffs
= build_call_expr (mffs
, 0);
28241 /* Generates the equivalent of feholdexcept (&fenv_var)
28243 *fenv_var = __builtin_mffs ();
28245 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
28246 __builtin_mtfsf (0xff, fenv_hold); */
28248 /* Mask to clear everything except for the rounding modes and non-IEEE
28249 arithmetic flag. */
28250 const unsigned HOST_WIDE_INT hold_exception_mask
28251 = HOST_WIDE_INT_C (0xffffffff00000007);
28253 tree fenv_var
= create_tmp_var_raw (double_type_node
);
28255 tree hold_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_var
, call_mffs
,
28256 NULL_TREE
, NULL_TREE
);
28258 tree fenv_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_var
);
28259 tree fenv_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
28260 build_int_cst (uint64_type_node
,
28261 hold_exception_mask
));
28263 tree fenv_hold_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28266 tree hold_mtfsf
= build_call_expr (mtfsf
, 2,
28267 build_int_cst (unsigned_type_node
, 0xff),
28270 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_mffs
, hold_mtfsf
);
28272 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
28274 double fenv_clear = __builtin_mffs ();
28275 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
28276 __builtin_mtfsf (0xff, fenv_clear); */
28278 /* Mask to clear everything except for the rounding modes and non-IEEE
28279 arithmetic flag. */
28280 const unsigned HOST_WIDE_INT clear_exception_mask
28281 = HOST_WIDE_INT_C (0xffffffff00000000);
28283 tree fenv_clear
= create_tmp_var_raw (double_type_node
);
28285 tree clear_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_clear
,
28286 call_mffs
, NULL_TREE
, NULL_TREE
);
28288 tree fenv_clean_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_clear
);
28289 tree fenv_clear_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
,
28291 build_int_cst (uint64_type_node
,
28292 clear_exception_mask
));
28294 tree fenv_clear_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28295 fenv_clear_llu_and
);
28297 tree clear_mtfsf
= build_call_expr (mtfsf
, 2,
28298 build_int_cst (unsigned_type_node
, 0xff),
28301 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, clear_mffs
, clear_mtfsf
);
28303 /* Generates the equivalent of feupdateenv (&fenv_var)
28305 double old_fenv = __builtin_mffs ();
28306 double fenv_update;
28307 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
28308 (*(uint64_t*)fenv_var 0x1ff80fff);
28309 __builtin_mtfsf (0xff, fenv_update); */
28311 const unsigned HOST_WIDE_INT update_exception_mask
28312 = HOST_WIDE_INT_C (0xffffffff1fffff00);
28313 const unsigned HOST_WIDE_INT new_exception_mask
28314 = HOST_WIDE_INT_C (0x1ff80fff);
28316 tree old_fenv
= create_tmp_var_raw (double_type_node
);
28317 tree update_mffs
= build4 (TARGET_EXPR
, double_type_node
, old_fenv
,
28318 call_mffs
, NULL_TREE
, NULL_TREE
);
28320 tree old_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, old_fenv
);
28321 tree old_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, old_llu
,
28322 build_int_cst (uint64_type_node
,
28323 update_exception_mask
));
28325 tree new_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
28326 build_int_cst (uint64_type_node
,
28327 new_exception_mask
));
28329 tree new_llu_mask
= build2 (BIT_IOR_EXPR
, uint64_type_node
,
28330 old_llu_and
, new_llu_and
);
28332 tree fenv_update_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28335 tree update_mtfsf
= build_call_expr (mtfsf
, 2,
28336 build_int_cst (unsigned_type_node
, 0xff),
28337 fenv_update_mtfsf
);
28339 *update
= build2 (COMPOUND_EXPR
, void_type_node
, update_mffs
, update_mtfsf
);
28343 rs6000_generate_float2_double_code (rtx dst
, rtx src1
, rtx src2
)
28345 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28347 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
28348 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
28350 /* The destination of the vmrgew instruction layout is:
28351 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28352 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28353 vmrgew instruction will be correct. */
28354 if (BYTES_BIG_ENDIAN
)
28356 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0
, src1
, src2
,
28358 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1
, src1
, src2
,
28363 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
28364 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
28367 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
28368 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
28370 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2
, rtx_tmp0
));
28371 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3
, rtx_tmp1
));
28373 if (BYTES_BIG_ENDIAN
)
28374 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
28376 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
28380 rs6000_generate_float2_code (bool signed_convert
, rtx dst
, rtx src1
, rtx src2
)
28382 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28384 rtx_tmp0
= gen_reg_rtx (V2DImode
);
28385 rtx_tmp1
= gen_reg_rtx (V2DImode
);
28387 /* The destination of the vmrgew instruction layout is:
28388 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28389 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28390 vmrgew instruction will be correct. */
28391 if (BYTES_BIG_ENDIAN
)
28393 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
28394 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
28398 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
28399 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
28402 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
28403 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
28405 if (signed_convert
)
28407 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2
, rtx_tmp0
));
28408 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3
, rtx_tmp1
));
28412 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2
, rtx_tmp0
));
28413 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3
, rtx_tmp1
));
28416 if (BYTES_BIG_ENDIAN
)
28417 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
28419 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
28423 rs6000_generate_vsigned2_code (bool signed_convert
, rtx dst
, rtx src1
,
28426 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28428 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
28429 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
28431 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
28432 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
28434 rtx_tmp2
= gen_reg_rtx (V4SImode
);
28435 rtx_tmp3
= gen_reg_rtx (V4SImode
);
28437 if (signed_convert
)
28439 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2
, rtx_tmp0
));
28440 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3
, rtx_tmp1
));
28444 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2
, rtx_tmp0
));
28445 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3
, rtx_tmp1
));
28448 emit_insn (gen_p8_vmrgew_v4si (dst
, rtx_tmp2
, rtx_tmp3
));
28451 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28454 rs6000_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
28455 optimization_type opt_type
)
28460 return (opt_type
== OPTIMIZE_FOR_SPEED
28461 && RS6000_RECIP_AUTO_RSQRTE_P (mode1
));
28468 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28470 static HOST_WIDE_INT
28471 rs6000_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
28473 if (TREE_CODE (exp
) == STRING_CST
28474 && (STRICT_ALIGNMENT
|| !optimize_size
))
28475 return MAX (align
, BITS_PER_WORD
);
28479 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28481 static HOST_WIDE_INT
28482 rs6000_starting_frame_offset (void)
28484 if (FRAME_GROWS_DOWNWARD
)
28486 return RS6000_STARTING_FRAME_OFFSET
;
28489 /* Internal function to return the built-in function id for the complex
28490 multiply operation for a given mode. */
28492 static inline built_in_function
28493 complex_multiply_builtin_code (machine_mode mode
)
28495 gcc_assert (IN_RANGE (mode
, MIN_MODE_COMPLEX_FLOAT
, MAX_MODE_COMPLEX_FLOAT
));
28496 int func
= BUILT_IN_COMPLEX_MUL_MIN
+ mode
- MIN_MODE_COMPLEX_FLOAT
;
28497 return (built_in_function
) func
;
28500 /* Internal function to return the built-in function id for the complex divide
28501 operation for a given mode. */
28503 static inline built_in_function
28504 complex_divide_builtin_code (machine_mode mode
)
28506 gcc_assert (IN_RANGE (mode
, MIN_MODE_COMPLEX_FLOAT
, MAX_MODE_COMPLEX_FLOAT
));
28507 int func
= BUILT_IN_COMPLEX_DIV_MIN
+ mode
- MIN_MODE_COMPLEX_FLOAT
;
28508 return (built_in_function
) func
;
28511 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28512 function names from <foo>l to <foo>f128 if the default long double type is
28513 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28514 include file switches the names on systems that support long double as IEEE
28515 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28516 In the future, glibc will export names like __ieee128_sinf128 and we can
28517 switch to using those instead of using sinf128, which pollutes the user's
28520 This will switch the names for Fortran math functions as well (which doesn't
28521 use math.h). However, Fortran needs other changes to the compiler and
28522 library before you can switch the real*16 type at compile time.
28524 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28525 only do this transformation if the __float128 type is enabled. This
28526 prevents us from doing the transformation on older 32-bit ports that might
28527 have enabled using IEEE 128-bit floating point as the default long double
28530 We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the
28531 function names used for complex multiply and divide to the appropriate
28535 rs6000_mangle_decl_assembler_name (tree decl
, tree id
)
28537 /* Handle complex multiply/divide. For IEEE 128-bit, use __mulkc3 or
28538 __divkc3 and for IBM 128-bit use __multc3 and __divtc3. */
28539 if (TARGET_FLOAT128_TYPE
28540 && TREE_CODE (decl
) == FUNCTION_DECL
28541 && DECL_IS_UNDECLARED_BUILTIN (decl
)
28542 && DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
28544 built_in_function id
= DECL_FUNCTION_CODE (decl
);
28545 const char *newname
= NULL
;
28547 if (id
== complex_multiply_builtin_code (KCmode
))
28548 newname
= "__mulkc3";
28550 else if (id
== complex_multiply_builtin_code (ICmode
))
28551 newname
= "__multc3";
28553 else if (id
== complex_multiply_builtin_code (TCmode
))
28554 newname
= (TARGET_IEEEQUAD
) ? "__mulkc3" : "__multc3";
28556 else if (id
== complex_divide_builtin_code (KCmode
))
28557 newname
= "__divkc3";
28559 else if (id
== complex_divide_builtin_code (ICmode
))
28560 newname
= "__divtc3";
28562 else if (id
== complex_divide_builtin_code (TCmode
))
28563 newname
= (TARGET_IEEEQUAD
) ? "__divkc3" : "__divtc3";
28567 if (TARGET_DEBUG_BUILTIN
)
28568 fprintf (stderr
, "Map complex mul/div => %s\n", newname
);
28570 return get_identifier (newname
);
28574 /* Map long double built-in functions if long double is IEEE 128-bit. */
28575 if (TARGET_FLOAT128_TYPE
&& TARGET_IEEEQUAD
&& TARGET_LONG_DOUBLE_128
28576 && TREE_CODE (decl
) == FUNCTION_DECL
28577 && DECL_IS_UNDECLARED_BUILTIN (decl
)
28578 && DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
28580 size_t len
= IDENTIFIER_LENGTH (id
);
28581 const char *name
= IDENTIFIER_POINTER (id
);
28582 char *newname
= NULL
;
28584 /* See if it is one of the built-in functions with an unusual name. */
28585 switch (DECL_FUNCTION_CODE (decl
))
28587 case BUILT_IN_DREML
:
28588 newname
= xstrdup ("__remainderieee128");
28591 case BUILT_IN_GAMMAL
:
28592 newname
= xstrdup ("__lgammaieee128");
28595 case BUILT_IN_GAMMAL_R
:
28596 case BUILT_IN_LGAMMAL_R
:
28597 newname
= xstrdup ("__lgammaieee128_r");
28600 case BUILT_IN_NEXTTOWARD
:
28601 newname
= xstrdup ("__nexttoward_to_ieee128");
28604 case BUILT_IN_NEXTTOWARDF
:
28605 newname
= xstrdup ("__nexttowardf_to_ieee128");
28608 case BUILT_IN_NEXTTOWARDL
:
28609 newname
= xstrdup ("__nexttowardieee128");
28612 case BUILT_IN_POW10L
:
28613 newname
= xstrdup ("__exp10ieee128");
28616 case BUILT_IN_SCALBL
:
28617 newname
= xstrdup ("__scalbieee128");
28620 case BUILT_IN_SIGNIFICANDL
:
28621 newname
= xstrdup ("__significandieee128");
28624 case BUILT_IN_SINCOSL
:
28625 newname
= xstrdup ("__sincosieee128");
28632 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28635 size_t printf_len
= strlen ("printf");
28636 size_t scanf_len
= strlen ("scanf");
28637 size_t printf_chk_len
= strlen ("printf_chk");
28639 if (len
>= printf_len
28640 && strcmp (name
+ len
- printf_len
, "printf") == 0)
28641 newname
= xasprintf ("__%sieee128", name
);
28643 else if (len
>= scanf_len
28644 && strcmp (name
+ len
- scanf_len
, "scanf") == 0)
28645 newname
= xasprintf ("__isoc99_%sieee128", name
);
28647 else if (len
>= printf_chk_len
28648 && strcmp (name
+ len
- printf_chk_len
, "printf_chk") == 0)
28649 newname
= xasprintf ("%sieee128", name
);
28651 else if (name
[len
- 1] == 'l')
28653 bool uses_ieee128_p
= false;
28654 tree type
= TREE_TYPE (decl
);
28655 machine_mode ret_mode
= TYPE_MODE (type
);
28657 /* See if the function returns a IEEE 128-bit floating point type or
28659 if (ret_mode
== TFmode
|| ret_mode
== TCmode
)
28660 uses_ieee128_p
= true;
28663 function_args_iterator args_iter
;
28666 /* See if the function passes a IEEE 128-bit floating point type
28667 or complex type. */
28668 FOREACH_FUNCTION_ARGS (type
, arg
, args_iter
)
28670 machine_mode arg_mode
= TYPE_MODE (arg
);
28671 if (arg_mode
== TFmode
|| arg_mode
== TCmode
)
28673 uses_ieee128_p
= true;
28679 /* If we passed or returned an IEEE 128-bit floating point type,
28680 change the name. Use __<name>ieee128, instead of <name>l. */
28681 if (uses_ieee128_p
)
28682 newname
= xasprintf ("__%.*sieee128", (int)(len
- 1), name
);
28688 if (TARGET_DEBUG_BUILTIN
)
28689 fprintf (stderr
, "Map %s => %s\n", name
, newname
);
28691 id
= get_identifier (newname
);
28699 /* Predict whether the given loop in gimple will be transformed in the RTL
28700 doloop_optimize pass. */
28703 rs6000_predict_doloop_p (struct loop
*loop
)
28707 /* On rs6000, targetm.can_use_doloop_p is actually
28708 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28709 if (loop
->inner
!= NULL
)
28711 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
28712 fprintf (dump_file
, "Predict doloop failure due to"
28713 " loop nesting.\n");
28720 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28722 static machine_mode
28723 rs6000_preferred_doloop_mode (machine_mode
)
28728 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28731 rs6000_cannot_substitute_mem_equiv_p (rtx mem
)
28733 gcc_assert (MEM_P (mem
));
28735 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28736 type addresses, so don't allow MEMs with those address types to be
28737 substituted as an equivalent expression. See PR93974 for details. */
28738 if (GET_CODE (XEXP (mem
, 0)) == AND
)
28744 /* Implement TARGET_INVALID_CONVERSION. */
28746 static const char *
28747 rs6000_invalid_conversion (const_tree fromtype
, const_tree totype
)
28749 /* Make sure we're working with the canonical types. */
28750 if (TYPE_CANONICAL (fromtype
) != NULL_TREE
)
28751 fromtype
= TYPE_CANONICAL (fromtype
);
28752 if (TYPE_CANONICAL (totype
) != NULL_TREE
)
28753 totype
= TYPE_CANONICAL (totype
);
28755 machine_mode frommode
= TYPE_MODE (fromtype
);
28756 machine_mode tomode
= TYPE_MODE (totype
);
28758 if (frommode
!= tomode
)
28760 /* Do not allow conversions to/from XOmode and OOmode types. */
28761 if (frommode
== XOmode
)
28762 return N_("invalid conversion from type %<__vector_quad%>");
28763 if (tomode
== XOmode
)
28764 return N_("invalid conversion to type %<__vector_quad%>");
28765 if (frommode
== OOmode
)
28766 return N_("invalid conversion from type %<__vector_pair%>");
28767 if (tomode
== OOmode
)
28768 return N_("invalid conversion to type %<__vector_pair%>");
28771 /* Conversion allowed. */
28775 /* Convert a SFmode constant to the integer bit pattern. */
28778 rs6000_const_f32_to_i32 (rtx operand
)
28781 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (operand
);
28783 gcc_assert (GET_MODE (operand
) == SFmode
);
28784 REAL_VALUE_TO_TARGET_SINGLE (*rv
, value
);
28789 rs6000_emit_xxspltidp_v2df (rtx dst
, long value
)
28791 if (((value
& 0x7F800000) == 0) && ((value
& 0x7FFFFF) != 0))
28792 inform (input_location
,
28793 "the result for the xxspltidp instruction "
28794 "is undefined for subnormal input values");
28795 emit_insn( gen_xxspltidp_v2df_inst (dst
, GEN_INT (value
)));
28798 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28801 rs6000_gen_pic_addr_diff_vec (void)
28803 return rs6000_relative_jumptables
;
28807 rs6000_output_addr_vec_elt (FILE *file
, int value
)
28809 const char *directive
= TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t";
28812 fprintf (file
, "%s", directive
);
28813 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", value
);
28814 assemble_name (file
, buf
);
28815 fprintf (file
, "\n");
28819 /* Copy an integer constant to the vector constant structure. */
28822 constant_int_to_128bit_vector (rtx op
,
28825 vec_const_128bit_type
*info
)
28827 unsigned HOST_WIDE_INT uvalue
= UINTVAL (op
);
28828 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28830 for (int shift
= bitsize
- 8; shift
>= 0; shift
-= 8)
28831 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28834 /* Copy a floating point constant to the vector constant structure. */
28837 constant_fp_to_128bit_vector (rtx op
,
28840 vec_const_128bit_type
*info
)
28842 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28843 unsigned num_words
= bitsize
/ 32;
28844 const REAL_VALUE_TYPE
*rtype
= CONST_DOUBLE_REAL_VALUE (op
);
28845 long real_words
[VECTOR_128BIT_WORDS
];
28847 /* Make sure we don't overflow the real_words array and that it is
28848 filled completely. */
28849 gcc_assert (num_words
<= VECTOR_128BIT_WORDS
&& (bitsize
% 32) == 0);
28851 real_to_target (real_words
, rtype
, mode
);
28853 /* Iterate over each 32-bit word in the floating point constant. The
28854 real_to_target function puts out words in target endian fashion. We need
28855 to arrange the order so that the bytes are written in big endian order. */
28856 for (unsigned num
= 0; num
< num_words
; num
++)
28858 unsigned endian_num
= (BYTES_BIG_ENDIAN
28860 : num_words
- 1 - num
);
28862 unsigned uvalue
= real_words
[endian_num
];
28863 for (int shift
= 32 - 8; shift
>= 0; shift
-= 8)
28864 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28867 /* Mark that this constant involves floating point. */
28868 info
->fp_constant_p
= true;
28871 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28874 Break out the constant out to bytes, half words, words, and double words.
28875 Return true if we have successfully converted the constant.
28877 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28878 constants. Integer and floating point scalar constants are splatted to fill
28882 vec_const_128bit_to_bytes (rtx op
,
28884 vec_const_128bit_type
*info
)
28886 /* Initialize the constant structure. */
28887 memset ((void *)info
, 0, sizeof (vec_const_128bit_type
));
28889 /* Assume CONST_INTs are DImode. */
28890 if (mode
== VOIDmode
)
28891 mode
= CONST_INT_P (op
) ? DImode
: GET_MODE (op
);
28893 if (mode
== VOIDmode
)
28896 unsigned size
= GET_MODE_SIZE (mode
);
28897 bool splat_p
= false;
28899 if (size
> VECTOR_128BIT_BYTES
)
28902 /* Set up the bits. */
28903 switch (GET_CODE (op
))
28905 /* Integer constants, default to double word. */
28908 constant_int_to_128bit_vector (op
, mode
, 0, info
);
28913 /* Floating point constants. */
28916 /* Fail if the floating point constant is the wrong mode. */
28917 if (GET_MODE (op
) != mode
)
28920 /* SFmode stored as scalars are stored in DFmode format. */
28921 if (mode
== SFmode
)
28924 size
= GET_MODE_SIZE (DFmode
);
28927 constant_fp_to_128bit_vector (op
, mode
, 0, info
);
28932 /* Vector constants, iterate over each element. On little endian
28933 systems, we have to reverse the element numbers. */
28936 /* Fail if the vector constant is the wrong mode or size. */
28937 if (GET_MODE (op
) != mode
28938 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
28941 machine_mode ele_mode
= GET_MODE_INNER (mode
);
28942 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
28943 size_t nunits
= GET_MODE_NUNITS (mode
);
28945 for (size_t num
= 0; num
< nunits
; num
++)
28947 rtx ele
= CONST_VECTOR_ELT (op
, num
);
28948 size_t byte_num
= (BYTES_BIG_ENDIAN
28950 : nunits
- 1 - num
) * ele_size
;
28952 if (CONST_INT_P (ele
))
28953 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28954 else if (CONST_DOUBLE_P (ele
))
28955 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28963 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28964 Since we are duplicating the element, we don't have to worry about
28966 case VEC_DUPLICATE
:
28968 /* Fail if the vector duplicate is the wrong mode or size. */
28969 if (GET_MODE (op
) != mode
28970 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
28973 machine_mode ele_mode
= GET_MODE_INNER (mode
);
28974 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
28975 rtx ele
= XEXP (op
, 0);
28976 size_t nunits
= GET_MODE_NUNITS (mode
);
28978 if (!CONST_INT_P (ele
) && !CONST_DOUBLE_P (ele
))
28981 for (size_t num
= 0; num
< nunits
; num
++)
28983 size_t byte_num
= num
* ele_size
;
28985 if (CONST_INT_P (ele
))
28986 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28988 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28994 /* Any thing else, just return failure. */
28999 /* Splat the constant to fill 128 bits if desired. */
29000 if (splat_p
&& size
< VECTOR_128BIT_BYTES
)
29002 if ((VECTOR_128BIT_BYTES
% size
) != 0)
29005 for (size_t offset
= size
;
29006 offset
< VECTOR_128BIT_BYTES
;
29008 memcpy ((void *) &info
->bytes
[offset
],
29009 (void *) &info
->bytes
[0],
29013 /* Remember original size. */
29014 info
->original_size
= size
;
29016 /* Determine if the bytes are all the same. */
29017 unsigned char first_byte
= info
->bytes
[0];
29018 info
->all_bytes_same
= true;
29019 for (size_t i
= 1; i
< VECTOR_128BIT_BYTES
; i
++)
29020 if (first_byte
!= info
->bytes
[i
])
29022 info
->all_bytes_same
= false;
29026 /* Pack half words together & determine if all of the half words are the
29028 for (size_t i
= 0; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
29029 info
->half_words
[i
] = ((info
->bytes
[i
* 2] << 8)
29030 | info
->bytes
[(i
* 2) + 1]);
29032 unsigned short first_hword
= info
->half_words
[0];
29033 info
->all_half_words_same
= true;
29034 for (size_t i
= 1; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
29035 if (first_hword
!= info
->half_words
[i
])
29037 info
->all_half_words_same
= false;
29041 /* Pack words together & determine if all of the words are the same. */
29042 for (size_t i
= 0; i
< VECTOR_128BIT_WORDS
; i
++)
29043 info
->words
[i
] = ((info
->bytes
[i
* 4] << 24)
29044 | (info
->bytes
[(i
* 4) + 1] << 16)
29045 | (info
->bytes
[(i
* 4) + 2] << 8)
29046 | info
->bytes
[(i
* 4) + 3]);
29048 info
->all_words_same
29049 = (info
->words
[0] == info
->words
[1]
29050 && info
->words
[0] == info
->words
[2]
29051 && info
->words
[0] == info
->words
[3]);
29053 /* Pack double words together & determine if all of the double words are the
29055 for (size_t i
= 0; i
< VECTOR_128BIT_DOUBLE_WORDS
; i
++)
29057 unsigned HOST_WIDE_INT d_word
= 0;
29058 for (size_t j
= 0; j
< 8; j
++)
29059 d_word
= (d_word
<< 8) | info
->bytes
[(i
* 8) + j
];
29061 info
->double_words
[i
] = d_word
;
29064 info
->all_double_words_same
29065 = (info
->double_words
[0] == info
->double_words
[1]);
29070 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
29071 if the LXVKQ instruction cannot be used. Otherwise return the immediate
29072 value to be used with the LXVKQ instruction. */
29075 constant_generates_lxvkq (vec_const_128bit_type
*vsx_const
)
29077 /* Is the instruction supported with power10 code generation, IEEE 128-bit
29078 floating point hardware and VSX registers are available. */
29079 if (!TARGET_IEEE128_CONSTANT
|| !TARGET_FLOAT128_HW
|| !TARGET_POWER10
29083 /* All of the constants that are generated by LXVKQ have the bottom 3 words
29085 if (vsx_const
->words
[1] != 0
29086 || vsx_const
->words
[2] != 0
29087 || vsx_const
->words
[3] != 0)
29090 /* See if we have a match for the first word. */
29091 switch (vsx_const
->words
[0])
29093 case 0x3FFF0000U
: return 1; /* IEEE 128-bit +1.0. */
29094 case 0x40000000U
: return 2; /* IEEE 128-bit +2.0. */
29095 case 0x40008000U
: return 3; /* IEEE 128-bit +3.0. */
29096 case 0x40010000U
: return 4; /* IEEE 128-bit +4.0. */
29097 case 0x40014000U
: return 5; /* IEEE 128-bit +5.0. */
29098 case 0x40018000U
: return 6; /* IEEE 128-bit +6.0. */
29099 case 0x4001C000U
: return 7; /* IEEE 128-bit +7.0. */
29100 case 0x7FFF0000U
: return 8; /* IEEE 128-bit +Infinity. */
29101 case 0x7FFF8000U
: return 9; /* IEEE 128-bit quiet NaN. */
29102 case 0x80000000U
: return 16; /* IEEE 128-bit -0.0. */
29103 case 0xBFFF0000U
: return 17; /* IEEE 128-bit -1.0. */
29104 case 0xC0000000U
: return 18; /* IEEE 128-bit -2.0. */
29105 case 0xC0008000U
: return 19; /* IEEE 128-bit -3.0. */
29106 case 0xC0010000U
: return 20; /* IEEE 128-bit -4.0. */
29107 case 0xC0014000U
: return 21; /* IEEE 128-bit -5.0. */
29108 case 0xC0018000U
: return 22; /* IEEE 128-bit -6.0. */
29109 case 0xC001C000U
: return 23; /* IEEE 128-bit -7.0. */
29110 case 0xFFFF0000U
: return 24; /* IEEE 128-bit -Infinity. */
29112 /* anything else cannot be loaded. */
29120 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
29121 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
29122 value to be used with the XXSPLTIW instruction. */
29125 constant_generates_xxspltiw (vec_const_128bit_type
*vsx_const
)
29127 if (!TARGET_SPLAT_WORD_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
29130 if (!vsx_const
->all_words_same
)
29133 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
29134 if (vsx_const
->all_bytes_same
)
29137 /* See if we can use VSPLTISH or VSPLTISW. */
29138 if (vsx_const
->all_half_words_same
)
29140 short sign_h_word
= vsx_const
->half_words
[0];
29141 if (EASY_VECTOR_15 (sign_h_word
))
29145 int sign_word
= vsx_const
->words
[0];
29146 if (EASY_VECTOR_15 (sign_word
))
29149 return vsx_const
->words
[0];
29152 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
29153 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
29154 value to be used with the XXSPLTIDP instruction. */
29157 constant_generates_xxspltidp (vec_const_128bit_type
*vsx_const
)
29159 if (!TARGET_SPLAT_FLOAT_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
29162 /* Reject if the two 64-bit segments are not the same. */
29163 if (!vsx_const
->all_double_words_same
)
29166 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
29167 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
29168 if (vsx_const
->all_bytes_same
29169 || vsx_const
->all_half_words_same
29170 || vsx_const
->all_words_same
)
29173 unsigned HOST_WIDE_INT value
= vsx_const
->double_words
[0];
29175 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
29176 pattern and the signalling NaN bit pattern. Recognize infinity and
29177 negative infinity. */
29179 /* Bit representation of DFmode normal quiet NaN. */
29180 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
29182 /* Bit representation of DFmode normal signaling NaN. */
29183 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
29185 /* Bit representation of DFmode positive infinity. */
29186 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
29188 /* Bit representation of DFmode negative infinity. */
29189 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
29191 if (value
!= RS6000_CONST_DF_NAN
29192 && value
!= RS6000_CONST_DF_NANS
29193 && value
!= RS6000_CONST_DF_INF
29194 && value
!= RS6000_CONST_DF_NEG_INF
)
29196 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
29197 the exponent, and 52 bits for the mantissa (not counting the hidden
29198 bit used for normal numbers). NaN values have the exponent set to all
29199 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
29201 int df_exponent
= (value
>> 52) & 0x7ff;
29202 unsigned HOST_WIDE_INT
29203 df_mantissa
= value
& ((HOST_WIDE_INT_1U
<< 52) - HOST_WIDE_INT_1U
);
29205 if (df_exponent
== 0x7ff && df_mantissa
!= 0) /* other NaNs. */
29208 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
29209 the exponent all 0 bits, and the mantissa non-zero. If the value is
29210 subnormal, then the hidden bit in the mantissa is not set. */
29211 if (df_exponent
== 0 && df_mantissa
!= 0) /* subnormal. */
29215 /* Change the representation to DFmode constant. */
29216 long df_words
[2] = { vsx_const
->words
[0], vsx_const
->words
[1] };
29218 /* real_from_target takes the target words in target order. */
29219 if (!BYTES_BIG_ENDIAN
)
29220 std::swap (df_words
[0], df_words
[1]);
29222 REAL_VALUE_TYPE rv_type
;
29223 real_from_target (&rv_type
, df_words
, DFmode
);
29225 const REAL_VALUE_TYPE
*rv
= &rv_type
;
29227 /* Validate that the number can be stored as a SFmode value. */
29228 if (!exact_real_truncate (SFmode
, rv
))
29231 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
29232 mantissa field is non-zero) which is undefined for the XXSPLTIDP
29235 real_to_target (&sf_value
, rv
, SFmode
);
29237 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
29238 and 23 bits for the mantissa. Subnormal numbers have the exponent all
29239 0 bits, and the mantissa non-zero. */
29240 long sf_exponent
= (sf_value
>> 23) & 0xFF;
29241 long sf_mantissa
= sf_value
& 0x7FFFFF;
29243 if (sf_exponent
== 0 && sf_mantissa
!= 0)
29246 /* Return the immediate to be used. */
29250 /* Now we have only two opaque types, they are __vector_quad and
29251 __vector_pair built-in types. They are target specific and
29252 only available when MMA is supported. With MMA supported, it
29253 simply returns true, otherwise it checks if the given gimple
29254 STMT is an assignment, asm or call stmt and uses either of
29255 these two opaque types unexpectedly, if yes, it would raise
29256 an error message and returns true, otherwise it returns false. */
29259 rs6000_opaque_type_invalid_use_p (gimple
*stmt
)
29264 /* If the given TYPE is one MMA opaque type, emit the corresponding
29265 error messages and return true, otherwise return false. */
29266 auto check_and_error_invalid_use
= [](tree type
)
29268 tree mv
= TYPE_MAIN_VARIANT (type
);
29269 if (mv
== vector_quad_type_node
)
29271 error ("type %<__vector_quad%> requires the %qs option", "-mmma");
29274 else if (mv
== vector_pair_type_node
)
29276 error ("type %<__vector_pair%> requires the %qs option", "-mmma");
29284 /* The usage of MMA opaque types is very limited for now,
29285 to check with gassign, gasm and gcall is enough so far. */
29286 if (gassign
*ga
= dyn_cast
<gassign
*> (stmt
))
29288 tree lhs
= gimple_assign_lhs (ga
);
29289 tree type
= TREE_TYPE (lhs
);
29290 if (check_and_error_invalid_use (type
))
29293 else if (gasm
*gs
= dyn_cast
<gasm
*> (stmt
))
29295 unsigned ninputs
= gimple_asm_ninputs (gs
);
29296 for (unsigned i
= 0; i
< ninputs
; i
++)
29298 tree op
= gimple_asm_input_op (gs
, i
);
29299 tree val
= TREE_VALUE (op
);
29300 tree type
= TREE_TYPE (val
);
29301 if (check_and_error_invalid_use (type
))
29304 unsigned noutputs
= gimple_asm_noutputs (gs
);
29305 for (unsigned i
= 0; i
< noutputs
; i
++)
29307 tree op
= gimple_asm_output_op (gs
, i
);
29308 tree val
= TREE_VALUE (op
);
29309 tree type
= TREE_TYPE (val
);
29310 if (check_and_error_invalid_use (type
))
29314 else if (gcall
*gc
= dyn_cast
<gcall
*> (stmt
))
29316 unsigned nargs
= gimple_call_num_args (gc
);
29317 for (unsigned i
= 0; i
< nargs
; i
++)
29319 tree arg
= gimple_call_arg (gc
, i
);
29320 tree type
= TREE_TYPE (arg
);
29321 if (check_and_error_invalid_use (type
))
29330 struct gcc_target targetm
= TARGET_INITIALIZER
;
29332 #include "gt-rs6000.h"