1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2024 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
26 #include "coretypes.h"
36 #include "stringpool.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
47 #include "fold-const.h"
49 #include "stor-layout.h"
51 #include "print-tree.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
59 #include "sched-int.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
71 #include "tree-vector-builder.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
78 #include "ipa-fnsummary.h"
80 #include "case-cfn-macros.h"
82 #include "rs6000-internal.h"
85 /* This file should be included last. */
86 #include "target-def.h"
88 extern tree
rs6000_builtin_mask_for_load (void);
89 extern tree
rs6000_builtin_md_vectorized_function (tree
, tree
, tree
);
90 extern tree
rs6000_builtin_reciprocal (tree
);
92 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
93 systems will also set long double to be IEEE 128-bit. AIX and Darwin
94 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
95 those systems will not pick up this default. This needs to be after all
96 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
98 #ifndef TARGET_IEEEQUAD_DEFAULT
99 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
100 #define TARGET_IEEEQUAD_DEFAULT 1
102 #define TARGET_IEEEQUAD_DEFAULT 0
106 /* Don't enable PC-relative addressing if the target does not support it. */
107 #ifndef PCREL_SUPPORTED_BY_OS
108 #define PCREL_SUPPORTED_BY_OS 0
112 /* Counter for labels which are to be placed in .fixup. */
113 int fixuplabelno
= 0;
116 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
119 /* Specify the machine mode that pointers have. After generation of rtl, the
120 compiler makes no further distinction between pointers and any other objects
121 of this machine mode. */
122 scalar_int_mode rs6000_pmode
;
124 /* Track use of r13 in 64bit AIX TLS. */
125 static bool xcoff_tls_exec_model_detected
= false;
127 /* Width in bits of a pointer. */
128 unsigned rs6000_pointer_size
;
130 #ifdef HAVE_AS_GNU_ATTRIBUTE
131 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
132 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
134 /* Flag whether floating point values have been passed/returned.
135 Note that this doesn't say whether fprs are used, since the
136 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
137 should be set for soft-float values passed in gprs and ieee128
138 values passed in vsx registers. */
139 bool rs6000_passes_float
= false;
140 bool rs6000_passes_long_double
= false;
141 /* Flag whether vector values have been passed/returned. */
142 bool rs6000_passes_vector
= false;
143 /* Flag whether small (<= 8 byte) structures have been returned. */
144 bool rs6000_returns_struct
= false;
147 /* Value is TRUE if register/mode pair is acceptable. */
148 static bool rs6000_hard_regno_mode_ok_p
149 [NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
151 /* Maximum number of registers needed for a given register class and mode. */
152 unsigned char rs6000_class_max_nregs
[NUM_MACHINE_MODES
][LIM_REG_CLASSES
];
154 /* How many registers are needed for a given register and mode. */
155 unsigned char rs6000_hard_regno_nregs
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
157 /* Map register number to register class. */
158 enum reg_class rs6000_regno_regclass
[FIRST_PSEUDO_REGISTER
];
160 static int dbg_cost_ctrl
;
162 /* Flag to say the TOC is initialized */
163 int toc_initialized
, need_toc_init
;
164 char toc_label_name
[10];
166 /* Cached value of rs6000_variable_issue. This is cached in
167 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
168 static short cached_can_issue_more
;
170 static GTY(()) section
*read_only_data_section
;
171 static GTY(()) section
*private_data_section
;
172 static GTY(()) section
*tls_data_section
;
173 static GTY(()) section
*tls_private_data_section
;
174 static GTY(()) section
*read_only_private_data_section
;
175 static GTY(()) section
*sdata2_section
;
177 section
*toc_section
= 0;
179 /* Describe the vector unit used for modes. */
180 enum rs6000_vector rs6000_vector_unit
[NUM_MACHINE_MODES
];
181 enum rs6000_vector rs6000_vector_mem
[NUM_MACHINE_MODES
];
183 /* Register classes for various constraints that are based on the target
185 enum reg_class rs6000_constraints
[RS6000_CONSTRAINT_MAX
];
187 /* Describe the alignment of a vector. */
188 int rs6000_vector_align
[NUM_MACHINE_MODES
];
190 /* What modes to automatically generate reciprocal divide estimate (fre) and
191 reciprocal sqrt (frsqrte) for. */
192 unsigned char rs6000_recip_bits
[MAX_MACHINE_MODE
];
194 /* Masks to determine which reciprocal esitmate instructions to generate
196 enum rs6000_recip_mask
{
197 RECIP_SF_DIV
= 0x001, /* Use divide estimate */
198 RECIP_DF_DIV
= 0x002,
199 RECIP_V4SF_DIV
= 0x004,
200 RECIP_V2DF_DIV
= 0x008,
202 RECIP_SF_RSQRT
= 0x010, /* Use reciprocal sqrt estimate. */
203 RECIP_DF_RSQRT
= 0x020,
204 RECIP_V4SF_RSQRT
= 0x040,
205 RECIP_V2DF_RSQRT
= 0x080,
207 /* Various combination of flags for -mrecip=xxx. */
209 RECIP_ALL
= (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
210 | RECIP_V2DF_DIV
| RECIP_SF_RSQRT
| RECIP_DF_RSQRT
211 | RECIP_V4SF_RSQRT
| RECIP_V2DF_RSQRT
),
213 RECIP_HIGH_PRECISION
= RECIP_ALL
,
215 /* On low precision machines like the power5, don't enable double precision
216 reciprocal square root estimate, since it isn't accurate enough. */
217 RECIP_LOW_PRECISION
= (RECIP_ALL
& ~(RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
))
220 /* -mrecip options. */
223 const char *string
; /* option name */
224 unsigned int mask
; /* mask bits to set */
225 } recip_options
[] = {
226 { "all", RECIP_ALL
},
227 { "none", RECIP_NONE
},
228 { "div", (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
230 { "divf", (RECIP_SF_DIV
| RECIP_V4SF_DIV
) },
231 { "divd", (RECIP_DF_DIV
| RECIP_V2DF_DIV
) },
232 { "rsqrt", (RECIP_SF_RSQRT
| RECIP_DF_RSQRT
| RECIP_V4SF_RSQRT
233 | RECIP_V2DF_RSQRT
) },
234 { "rsqrtf", (RECIP_SF_RSQRT
| RECIP_V4SF_RSQRT
) },
235 { "rsqrtd", (RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
) },
238 /* On PowerPC, we have a limited number of target clones that we care about
239 which means we can use an array to hold the options, rather than having more
240 elaborate data structures to identify each possible variation. Order the
241 clones from the default to the highest ISA. */
243 CLONE_DEFAULT
= 0, /* default clone. */
244 CLONE_ISA_2_05
, /* ISA 2.05 (power6). */
245 CLONE_ISA_2_06
, /* ISA 2.06 (power7). */
246 CLONE_ISA_2_07
, /* ISA 2.07 (power8). */
247 CLONE_ISA_3_00
, /* ISA 3.0 (power9). */
248 CLONE_ISA_3_1
, /* ISA 3.1 (power10). */
252 /* Map compiler ISA bits into HWCAP names. */
254 HOST_WIDE_INT isa_mask
; /* rs6000_isa mask */
255 const char *name
; /* name to use in __builtin_cpu_supports. */
258 static const struct clone_map rs6000_clone_map
[CLONE_MAX
] = {
259 { 0, "" }, /* Default options. */
260 { OPTION_MASK_CMPB
, "arch_2_05" }, /* ISA 2.05 (power6). */
261 { OPTION_MASK_POPCNTD
, "arch_2_06" }, /* ISA 2.06 (power7). */
262 { OPTION_MASK_P8_VECTOR
, "arch_2_07" }, /* ISA 2.07 (power8). */
263 { OPTION_MASK_P9_VECTOR
, "arch_3_00" }, /* ISA 3.0 (power9). */
264 { OPTION_MASK_POWER10
, "arch_3_1" }, /* ISA 3.1 (power10). */
268 /* Newer LIBCs explicitly export this symbol to declare that they provide
269 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
270 reference to this symbol whenever we expand a CPU builtin, so that
271 we never link against an old LIBC. */
272 const char *tcb_verification_symbol
= "__parse_hwcap_and_convert_at_platform";
274 /* True if we have expanded a CPU builtin. */
275 bool cpu_builtin_p
= false;
277 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
278 macros that have changed. Languages that don't support the preprocessor
279 don't link in rs6000-c.cc, so we can't call it directly. */
280 void (*rs6000_target_modify_macros_ptr
) (bool, HOST_WIDE_INT
);
282 /* Simplfy register classes into simpler classifications. We assume
283 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
284 check for standard register classes (gpr/floating/altivec/vsx) and
285 floating/vector classes (float/altivec/vsx). */
287 enum rs6000_reg_type
{
298 /* Map register class to register type. */
299 static enum rs6000_reg_type reg_class_to_reg_type
[N_REG_CLASSES
];
301 /* First/last register type for the 'normal' register types (i.e. general
302 purpose, floating point, altivec, and VSX registers). */
303 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
305 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
308 /* Register classes we care about in secondary reload or go if legitimate
309 address. We only need to worry about GPR, FPR, and Altivec registers here,
310 along an ANY field that is the OR of the 3 register classes. */
312 enum rs6000_reload_reg_type
{
313 RELOAD_REG_GPR
, /* General purpose registers. */
314 RELOAD_REG_FPR
, /* Traditional floating point regs. */
315 RELOAD_REG_VMX
, /* Altivec (VMX) registers. */
316 RELOAD_REG_ANY
, /* OR of GPR, FPR, Altivec masks. */
320 /* For setting up register classes, loop through the 3 register classes mapping
321 into real registers, and skip the ANY class, which is just an OR of the
323 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
324 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
326 /* Map reload register type to a register in the register class. */
327 struct reload_reg_map_type
{
328 const char *name
; /* Register class name. */
329 int reg
; /* Register in the register class. */
332 static const struct reload_reg_map_type reload_reg_map
[N_RELOAD_REG
] = {
333 { "Gpr", FIRST_GPR_REGNO
}, /* RELOAD_REG_GPR. */
334 { "Fpr", FIRST_FPR_REGNO
}, /* RELOAD_REG_FPR. */
335 { "VMX", FIRST_ALTIVEC_REGNO
}, /* RELOAD_REG_VMX. */
336 { "Any", -1 }, /* RELOAD_REG_ANY. */
339 /* Mask bits for each register class, indexed per mode. Historically the
340 compiler has been more restrictive which types can do PRE_MODIFY instead of
341 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
342 typedef unsigned char addr_mask_type
;
344 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
345 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
346 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
347 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
348 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
349 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
350 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
351 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
353 /* Register type masks based on the type, of valid addressing modes. */
354 struct rs6000_reg_addr
{
355 enum insn_code reload_load
; /* INSN to reload for loading. */
356 enum insn_code reload_store
; /* INSN to reload for storing. */
357 enum insn_code reload_fpr_gpr
; /* INSN to move from FPR to GPR. */
358 enum insn_code reload_gpr_vsx
; /* INSN to move from GPR to VSX. */
359 enum insn_code reload_vsx_gpr
; /* INSN to move from VSX to GPR. */
360 addr_mask_type addr_mask
[(int)N_RELOAD_REG
]; /* Valid address masks. */
361 bool scalar_in_vmx_p
; /* Scalar value can go in VMX. */
364 static struct rs6000_reg_addr reg_addr
[NUM_MACHINE_MODES
];
366 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
368 mode_supports_pre_incdec_p (machine_mode mode
)
370 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_INCDEC
)
374 /* Helper function to say whether a mode supports PRE_MODIFY. */
376 mode_supports_pre_modify_p (machine_mode mode
)
378 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_MODIFY
)
382 /* Return true if we have D-form addressing in altivec registers. */
384 mode_supports_vmx_dform (machine_mode mode
)
386 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_OFFSET
) != 0);
389 /* Return true if we have D-form addressing in VSX registers. This addressing
390 is more limited than normal d-form addressing in that the offset must be
391 aligned on a 16-byte boundary. */
393 mode_supports_dq_form (machine_mode mode
)
395 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_QUAD_OFFSET
)
399 /* Given that there exists at least one variable that is set (produced)
400 by OUT_INSN and read (consumed) by IN_INSN, return true iff
401 IN_INSN represents one or more memory store operations and none of
402 the variables set by OUT_INSN is used by IN_INSN as the address of a
403 store operation. If either IN_INSN or OUT_INSN does not represent
404 a "single" RTL SET expression (as loosely defined by the
405 implementation of the single_set function) or a PARALLEL with only
406 SETs, CLOBBERs, and USEs inside, this function returns false.
408 This rs6000-specific version of store_data_bypass_p checks for
409 certain conditions that result in assertion failures (and internal
410 compiler errors) in the generic store_data_bypass_p function and
411 returns false rather than calling store_data_bypass_p if one of the
412 problematic conditions is detected. */
415 rs6000_store_data_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
422 in_set
= single_set (in_insn
);
425 if (MEM_P (SET_DEST (in_set
)))
427 out_set
= single_set (out_insn
);
430 out_pat
= PATTERN (out_insn
);
431 if (GET_CODE (out_pat
) == PARALLEL
)
433 for (i
= 0; i
< XVECLEN (out_pat
, 0); i
++)
435 out_exp
= XVECEXP (out_pat
, 0, i
);
436 if ((GET_CODE (out_exp
) == CLOBBER
)
437 || (GET_CODE (out_exp
) == USE
))
439 else if (GET_CODE (out_exp
) != SET
)
448 in_pat
= PATTERN (in_insn
);
449 if (GET_CODE (in_pat
) != PARALLEL
)
452 for (i
= 0; i
< XVECLEN (in_pat
, 0); i
++)
454 in_exp
= XVECEXP (in_pat
, 0, i
);
455 if ((GET_CODE (in_exp
) == CLOBBER
) || (GET_CODE (in_exp
) == USE
))
457 else if (GET_CODE (in_exp
) != SET
)
460 if (MEM_P (SET_DEST (in_exp
)))
462 out_set
= single_set (out_insn
);
465 out_pat
= PATTERN (out_insn
);
466 if (GET_CODE (out_pat
) != PARALLEL
)
468 for (j
= 0; j
< XVECLEN (out_pat
, 0); j
++)
470 out_exp
= XVECEXP (out_pat
, 0, j
);
471 if ((GET_CODE (out_exp
) == CLOBBER
)
472 || (GET_CODE (out_exp
) == USE
))
474 else if (GET_CODE (out_exp
) != SET
)
481 return store_data_bypass_p (out_insn
, in_insn
);
485 /* Processor costs (relative to an add) */
487 const struct processor_costs
*rs6000_cost
;
489 /* Instruction size costs on 32bit processors. */
491 struct processor_costs size32_cost
= {
492 COSTS_N_INSNS (1), /* mulsi */
493 COSTS_N_INSNS (1), /* mulsi_const */
494 COSTS_N_INSNS (1), /* mulsi_const9 */
495 COSTS_N_INSNS (1), /* muldi */
496 COSTS_N_INSNS (1), /* divsi */
497 COSTS_N_INSNS (1), /* divdi */
498 COSTS_N_INSNS (1), /* fp */
499 COSTS_N_INSNS (1), /* dmul */
500 COSTS_N_INSNS (1), /* sdiv */
501 COSTS_N_INSNS (1), /* ddiv */
502 32, /* cache line size */
506 0, /* SF->DF convert */
509 /* Instruction size costs on 64bit processors. */
511 struct processor_costs size64_cost
= {
512 COSTS_N_INSNS (1), /* mulsi */
513 COSTS_N_INSNS (1), /* mulsi_const */
514 COSTS_N_INSNS (1), /* mulsi_const9 */
515 COSTS_N_INSNS (1), /* muldi */
516 COSTS_N_INSNS (1), /* divsi */
517 COSTS_N_INSNS (1), /* divdi */
518 COSTS_N_INSNS (1), /* fp */
519 COSTS_N_INSNS (1), /* dmul */
520 COSTS_N_INSNS (1), /* sdiv */
521 COSTS_N_INSNS (1), /* ddiv */
522 128, /* cache line size */
526 0, /* SF->DF convert */
529 /* Instruction costs on RS64A processors. */
531 struct processor_costs rs64a_cost
= {
532 COSTS_N_INSNS (20), /* mulsi */
533 COSTS_N_INSNS (12), /* mulsi_const */
534 COSTS_N_INSNS (8), /* mulsi_const9 */
535 COSTS_N_INSNS (34), /* muldi */
536 COSTS_N_INSNS (65), /* divsi */
537 COSTS_N_INSNS (67), /* divdi */
538 COSTS_N_INSNS (4), /* fp */
539 COSTS_N_INSNS (4), /* dmul */
540 COSTS_N_INSNS (31), /* sdiv */
541 COSTS_N_INSNS (31), /* ddiv */
542 128, /* cache line size */
546 0, /* SF->DF convert */
549 /* Instruction costs on MPCCORE processors. */
551 struct processor_costs mpccore_cost
= {
552 COSTS_N_INSNS (2), /* mulsi */
553 COSTS_N_INSNS (2), /* mulsi_const */
554 COSTS_N_INSNS (2), /* mulsi_const9 */
555 COSTS_N_INSNS (2), /* muldi */
556 COSTS_N_INSNS (6), /* divsi */
557 COSTS_N_INSNS (6), /* divdi */
558 COSTS_N_INSNS (4), /* fp */
559 COSTS_N_INSNS (5), /* dmul */
560 COSTS_N_INSNS (10), /* sdiv */
561 COSTS_N_INSNS (17), /* ddiv */
562 32, /* cache line size */
566 0, /* SF->DF convert */
569 /* Instruction costs on PPC403 processors. */
571 struct processor_costs ppc403_cost
= {
572 COSTS_N_INSNS (4), /* mulsi */
573 COSTS_N_INSNS (4), /* mulsi_const */
574 COSTS_N_INSNS (4), /* mulsi_const9 */
575 COSTS_N_INSNS (4), /* muldi */
576 COSTS_N_INSNS (33), /* divsi */
577 COSTS_N_INSNS (33), /* divdi */
578 COSTS_N_INSNS (11), /* fp */
579 COSTS_N_INSNS (11), /* dmul */
580 COSTS_N_INSNS (11), /* sdiv */
581 COSTS_N_INSNS (11), /* ddiv */
582 32, /* cache line size */
586 0, /* SF->DF convert */
589 /* Instruction costs on PPC405 processors. */
591 struct processor_costs ppc405_cost
= {
592 COSTS_N_INSNS (5), /* mulsi */
593 COSTS_N_INSNS (4), /* mulsi_const */
594 COSTS_N_INSNS (3), /* mulsi_const9 */
595 COSTS_N_INSNS (5), /* muldi */
596 COSTS_N_INSNS (35), /* divsi */
597 COSTS_N_INSNS (35), /* divdi */
598 COSTS_N_INSNS (11), /* fp */
599 COSTS_N_INSNS (11), /* dmul */
600 COSTS_N_INSNS (11), /* sdiv */
601 COSTS_N_INSNS (11), /* ddiv */
602 32, /* cache line size */
606 0, /* SF->DF convert */
609 /* Instruction costs on PPC440 processors. */
611 struct processor_costs ppc440_cost
= {
612 COSTS_N_INSNS (3), /* mulsi */
613 COSTS_N_INSNS (2), /* mulsi_const */
614 COSTS_N_INSNS (2), /* mulsi_const9 */
615 COSTS_N_INSNS (3), /* muldi */
616 COSTS_N_INSNS (34), /* divsi */
617 COSTS_N_INSNS (34), /* divdi */
618 COSTS_N_INSNS (5), /* fp */
619 COSTS_N_INSNS (5), /* dmul */
620 COSTS_N_INSNS (19), /* sdiv */
621 COSTS_N_INSNS (33), /* ddiv */
622 32, /* cache line size */
626 0, /* SF->DF convert */
629 /* Instruction costs on PPC476 processors. */
631 struct processor_costs ppc476_cost
= {
632 COSTS_N_INSNS (4), /* mulsi */
633 COSTS_N_INSNS (4), /* mulsi_const */
634 COSTS_N_INSNS (4), /* mulsi_const9 */
635 COSTS_N_INSNS (4), /* muldi */
636 COSTS_N_INSNS (11), /* divsi */
637 COSTS_N_INSNS (11), /* divdi */
638 COSTS_N_INSNS (6), /* fp */
639 COSTS_N_INSNS (6), /* dmul */
640 COSTS_N_INSNS (19), /* sdiv */
641 COSTS_N_INSNS (33), /* ddiv */
642 32, /* l1 cache line size */
646 0, /* SF->DF convert */
649 /* Instruction costs on PPC601 processors. */
651 struct processor_costs ppc601_cost
= {
652 COSTS_N_INSNS (5), /* mulsi */
653 COSTS_N_INSNS (5), /* mulsi_const */
654 COSTS_N_INSNS (5), /* mulsi_const9 */
655 COSTS_N_INSNS (5), /* muldi */
656 COSTS_N_INSNS (36), /* divsi */
657 COSTS_N_INSNS (36), /* divdi */
658 COSTS_N_INSNS (4), /* fp */
659 COSTS_N_INSNS (5), /* dmul */
660 COSTS_N_INSNS (17), /* sdiv */
661 COSTS_N_INSNS (31), /* ddiv */
662 32, /* cache line size */
666 0, /* SF->DF convert */
669 /* Instruction costs on PPC603 processors. */
671 struct processor_costs ppc603_cost
= {
672 COSTS_N_INSNS (5), /* mulsi */
673 COSTS_N_INSNS (3), /* mulsi_const */
674 COSTS_N_INSNS (2), /* mulsi_const9 */
675 COSTS_N_INSNS (5), /* muldi */
676 COSTS_N_INSNS (37), /* divsi */
677 COSTS_N_INSNS (37), /* divdi */
678 COSTS_N_INSNS (3), /* fp */
679 COSTS_N_INSNS (4), /* dmul */
680 COSTS_N_INSNS (18), /* sdiv */
681 COSTS_N_INSNS (33), /* ddiv */
682 32, /* cache line size */
686 0, /* SF->DF convert */
689 /* Instruction costs on PPC604 processors. */
691 struct processor_costs ppc604_cost
= {
692 COSTS_N_INSNS (4), /* mulsi */
693 COSTS_N_INSNS (4), /* mulsi_const */
694 COSTS_N_INSNS (4), /* mulsi_const9 */
695 COSTS_N_INSNS (4), /* muldi */
696 COSTS_N_INSNS (20), /* divsi */
697 COSTS_N_INSNS (20), /* divdi */
698 COSTS_N_INSNS (3), /* fp */
699 COSTS_N_INSNS (3), /* dmul */
700 COSTS_N_INSNS (18), /* sdiv */
701 COSTS_N_INSNS (32), /* ddiv */
702 32, /* cache line size */
706 0, /* SF->DF convert */
709 /* Instruction costs on PPC604e processors. */
711 struct processor_costs ppc604e_cost
= {
712 COSTS_N_INSNS (2), /* mulsi */
713 COSTS_N_INSNS (2), /* mulsi_const */
714 COSTS_N_INSNS (2), /* mulsi_const9 */
715 COSTS_N_INSNS (2), /* muldi */
716 COSTS_N_INSNS (20), /* divsi */
717 COSTS_N_INSNS (20), /* divdi */
718 COSTS_N_INSNS (3), /* fp */
719 COSTS_N_INSNS (3), /* dmul */
720 COSTS_N_INSNS (18), /* sdiv */
721 COSTS_N_INSNS (32), /* ddiv */
722 32, /* cache line size */
726 0, /* SF->DF convert */
729 /* Instruction costs on PPC620 processors. */
731 struct processor_costs ppc620_cost
= {
732 COSTS_N_INSNS (5), /* mulsi */
733 COSTS_N_INSNS (4), /* mulsi_const */
734 COSTS_N_INSNS (3), /* mulsi_const9 */
735 COSTS_N_INSNS (7), /* muldi */
736 COSTS_N_INSNS (21), /* divsi */
737 COSTS_N_INSNS (37), /* divdi */
738 COSTS_N_INSNS (3), /* fp */
739 COSTS_N_INSNS (3), /* dmul */
740 COSTS_N_INSNS (18), /* sdiv */
741 COSTS_N_INSNS (32), /* ddiv */
742 128, /* cache line size */
746 0, /* SF->DF convert */
749 /* Instruction costs on PPC630 processors. */
751 struct processor_costs ppc630_cost
= {
752 COSTS_N_INSNS (5), /* mulsi */
753 COSTS_N_INSNS (4), /* mulsi_const */
754 COSTS_N_INSNS (3), /* mulsi_const9 */
755 COSTS_N_INSNS (7), /* muldi */
756 COSTS_N_INSNS (21), /* divsi */
757 COSTS_N_INSNS (37), /* divdi */
758 COSTS_N_INSNS (3), /* fp */
759 COSTS_N_INSNS (3), /* dmul */
760 COSTS_N_INSNS (17), /* sdiv */
761 COSTS_N_INSNS (21), /* ddiv */
762 128, /* cache line size */
766 0, /* SF->DF convert */
769 /* Instruction costs on Cell processor. */
770 /* COSTS_N_INSNS (1) ~ one add. */
772 struct processor_costs ppccell_cost
= {
773 COSTS_N_INSNS (9/2)+2, /* mulsi */
774 COSTS_N_INSNS (6/2), /* mulsi_const */
775 COSTS_N_INSNS (6/2), /* mulsi_const9 */
776 COSTS_N_INSNS (15/2)+2, /* muldi */
777 COSTS_N_INSNS (38/2), /* divsi */
778 COSTS_N_INSNS (70/2), /* divdi */
779 COSTS_N_INSNS (10/2), /* fp */
780 COSTS_N_INSNS (10/2), /* dmul */
781 COSTS_N_INSNS (74/2), /* sdiv */
782 COSTS_N_INSNS (74/2), /* ddiv */
783 128, /* cache line size */
787 0, /* SF->DF convert */
790 /* Instruction costs on PPC750 and PPC7400 processors. */
792 struct processor_costs ppc750_cost
= {
793 COSTS_N_INSNS (5), /* mulsi */
794 COSTS_N_INSNS (3), /* mulsi_const */
795 COSTS_N_INSNS (2), /* mulsi_const9 */
796 COSTS_N_INSNS (5), /* muldi */
797 COSTS_N_INSNS (17), /* divsi */
798 COSTS_N_INSNS (17), /* divdi */
799 COSTS_N_INSNS (3), /* fp */
800 COSTS_N_INSNS (3), /* dmul */
801 COSTS_N_INSNS (17), /* sdiv */
802 COSTS_N_INSNS (31), /* ddiv */
803 32, /* cache line size */
807 0, /* SF->DF convert */
810 /* Instruction costs on PPC7450 processors. */
812 struct processor_costs ppc7450_cost
= {
813 COSTS_N_INSNS (4), /* mulsi */
814 COSTS_N_INSNS (3), /* mulsi_const */
815 COSTS_N_INSNS (3), /* mulsi_const9 */
816 COSTS_N_INSNS (4), /* muldi */
817 COSTS_N_INSNS (23), /* divsi */
818 COSTS_N_INSNS (23), /* divdi */
819 COSTS_N_INSNS (5), /* fp */
820 COSTS_N_INSNS (5), /* dmul */
821 COSTS_N_INSNS (21), /* sdiv */
822 COSTS_N_INSNS (35), /* ddiv */
823 32, /* cache line size */
827 0, /* SF->DF convert */
830 /* Instruction costs on PPC8540 processors. */
832 struct processor_costs ppc8540_cost
= {
833 COSTS_N_INSNS (4), /* mulsi */
834 COSTS_N_INSNS (4), /* mulsi_const */
835 COSTS_N_INSNS (4), /* mulsi_const9 */
836 COSTS_N_INSNS (4), /* muldi */
837 COSTS_N_INSNS (19), /* divsi */
838 COSTS_N_INSNS (19), /* divdi */
839 COSTS_N_INSNS (4), /* fp */
840 COSTS_N_INSNS (4), /* dmul */
841 COSTS_N_INSNS (29), /* sdiv */
842 COSTS_N_INSNS (29), /* ddiv */
843 32, /* cache line size */
846 1, /* prefetch streams /*/
847 0, /* SF->DF convert */
850 /* Instruction costs on E300C2 and E300C3 cores. */
852 struct processor_costs ppce300c2c3_cost
= {
853 COSTS_N_INSNS (4), /* mulsi */
854 COSTS_N_INSNS (4), /* mulsi_const */
855 COSTS_N_INSNS (4), /* mulsi_const9 */
856 COSTS_N_INSNS (4), /* muldi */
857 COSTS_N_INSNS (19), /* divsi */
858 COSTS_N_INSNS (19), /* divdi */
859 COSTS_N_INSNS (3), /* fp */
860 COSTS_N_INSNS (4), /* dmul */
861 COSTS_N_INSNS (18), /* sdiv */
862 COSTS_N_INSNS (33), /* ddiv */
866 1, /* prefetch streams /*/
867 0, /* SF->DF convert */
870 /* Instruction costs on PPCE500MC processors. */
872 struct processor_costs ppce500mc_cost
= {
873 COSTS_N_INSNS (4), /* mulsi */
874 COSTS_N_INSNS (4), /* mulsi_const */
875 COSTS_N_INSNS (4), /* mulsi_const9 */
876 COSTS_N_INSNS (4), /* muldi */
877 COSTS_N_INSNS (14), /* divsi */
878 COSTS_N_INSNS (14), /* divdi */
879 COSTS_N_INSNS (8), /* fp */
880 COSTS_N_INSNS (10), /* dmul */
881 COSTS_N_INSNS (36), /* sdiv */
882 COSTS_N_INSNS (66), /* ddiv */
883 64, /* cache line size */
886 1, /* prefetch streams /*/
887 0, /* SF->DF convert */
890 /* Instruction costs on PPCE500MC64 processors. */
892 struct processor_costs ppce500mc64_cost
= {
893 COSTS_N_INSNS (4), /* mulsi */
894 COSTS_N_INSNS (4), /* mulsi_const */
895 COSTS_N_INSNS (4), /* mulsi_const9 */
896 COSTS_N_INSNS (4), /* muldi */
897 COSTS_N_INSNS (14), /* divsi */
898 COSTS_N_INSNS (14), /* divdi */
899 COSTS_N_INSNS (4), /* fp */
900 COSTS_N_INSNS (10), /* dmul */
901 COSTS_N_INSNS (36), /* sdiv */
902 COSTS_N_INSNS (66), /* ddiv */
903 64, /* cache line size */
906 1, /* prefetch streams /*/
907 0, /* SF->DF convert */
910 /* Instruction costs on PPCE5500 processors. */
912 struct processor_costs ppce5500_cost
= {
913 COSTS_N_INSNS (5), /* mulsi */
914 COSTS_N_INSNS (5), /* mulsi_const */
915 COSTS_N_INSNS (4), /* mulsi_const9 */
916 COSTS_N_INSNS (5), /* muldi */
917 COSTS_N_INSNS (14), /* divsi */
918 COSTS_N_INSNS (14), /* divdi */
919 COSTS_N_INSNS (7), /* fp */
920 COSTS_N_INSNS (10), /* dmul */
921 COSTS_N_INSNS (36), /* sdiv */
922 COSTS_N_INSNS (66), /* ddiv */
923 64, /* cache line size */
926 1, /* prefetch streams /*/
927 0, /* SF->DF convert */
930 /* Instruction costs on PPCE6500 processors. */
932 struct processor_costs ppce6500_cost
= {
933 COSTS_N_INSNS (5), /* mulsi */
934 COSTS_N_INSNS (5), /* mulsi_const */
935 COSTS_N_INSNS (4), /* mulsi_const9 */
936 COSTS_N_INSNS (5), /* muldi */
937 COSTS_N_INSNS (14), /* divsi */
938 COSTS_N_INSNS (14), /* divdi */
939 COSTS_N_INSNS (7), /* fp */
940 COSTS_N_INSNS (10), /* dmul */
941 COSTS_N_INSNS (36), /* sdiv */
942 COSTS_N_INSNS (66), /* ddiv */
943 64, /* cache line size */
946 1, /* prefetch streams /*/
947 0, /* SF->DF convert */
950 /* Instruction costs on AppliedMicro Titan processors. */
952 struct processor_costs titan_cost
= {
953 COSTS_N_INSNS (5), /* mulsi */
954 COSTS_N_INSNS (5), /* mulsi_const */
955 COSTS_N_INSNS (5), /* mulsi_const9 */
956 COSTS_N_INSNS (5), /* muldi */
957 COSTS_N_INSNS (18), /* divsi */
958 COSTS_N_INSNS (18), /* divdi */
959 COSTS_N_INSNS (10), /* fp */
960 COSTS_N_INSNS (10), /* dmul */
961 COSTS_N_INSNS (46), /* sdiv */
962 COSTS_N_INSNS (72), /* ddiv */
963 32, /* cache line size */
966 1, /* prefetch streams /*/
967 0, /* SF->DF convert */
970 /* Instruction costs on POWER4 and POWER5 processors. */
972 struct processor_costs power4_cost
= {
973 COSTS_N_INSNS (3), /* mulsi */
974 COSTS_N_INSNS (2), /* mulsi_const */
975 COSTS_N_INSNS (2), /* mulsi_const9 */
976 COSTS_N_INSNS (4), /* muldi */
977 COSTS_N_INSNS (18), /* divsi */
978 COSTS_N_INSNS (34), /* divdi */
979 COSTS_N_INSNS (3), /* fp */
980 COSTS_N_INSNS (3), /* dmul */
981 COSTS_N_INSNS (17), /* sdiv */
982 COSTS_N_INSNS (17), /* ddiv */
983 128, /* cache line size */
986 8, /* prefetch streams /*/
987 0, /* SF->DF convert */
990 /* Instruction costs on POWER6 processors. */
992 struct processor_costs power6_cost
= {
993 COSTS_N_INSNS (8), /* mulsi */
994 COSTS_N_INSNS (8), /* mulsi_const */
995 COSTS_N_INSNS (8), /* mulsi_const9 */
996 COSTS_N_INSNS (8), /* muldi */
997 COSTS_N_INSNS (22), /* divsi */
998 COSTS_N_INSNS (28), /* divdi */
999 COSTS_N_INSNS (3), /* fp */
1000 COSTS_N_INSNS (3), /* dmul */
1001 COSTS_N_INSNS (13), /* sdiv */
1002 COSTS_N_INSNS (16), /* ddiv */
1003 128, /* cache line size */
1005 2048, /* l2 cache */
1006 16, /* prefetch streams */
1007 0, /* SF->DF convert */
1010 /* Instruction costs on POWER7 processors. */
1012 struct processor_costs power7_cost
= {
1013 COSTS_N_INSNS (2), /* mulsi */
1014 COSTS_N_INSNS (2), /* mulsi_const */
1015 COSTS_N_INSNS (2), /* mulsi_const9 */
1016 COSTS_N_INSNS (2), /* muldi */
1017 COSTS_N_INSNS (18), /* divsi */
1018 COSTS_N_INSNS (34), /* divdi */
1019 COSTS_N_INSNS (3), /* fp */
1020 COSTS_N_INSNS (3), /* dmul */
1021 COSTS_N_INSNS (13), /* sdiv */
1022 COSTS_N_INSNS (16), /* ddiv */
1023 128, /* cache line size */
1026 12, /* prefetch streams */
1027 COSTS_N_INSNS (3), /* SF->DF convert */
1030 /* Instruction costs on POWER8 processors. */
1032 struct processor_costs power8_cost
= {
1033 COSTS_N_INSNS (3), /* mulsi */
1034 COSTS_N_INSNS (3), /* mulsi_const */
1035 COSTS_N_INSNS (3), /* mulsi_const9 */
1036 COSTS_N_INSNS (3), /* muldi */
1037 COSTS_N_INSNS (19), /* divsi */
1038 COSTS_N_INSNS (35), /* divdi */
1039 COSTS_N_INSNS (3), /* fp */
1040 COSTS_N_INSNS (3), /* dmul */
1041 COSTS_N_INSNS (14), /* sdiv */
1042 COSTS_N_INSNS (17), /* ddiv */
1043 128, /* cache line size */
1046 12, /* prefetch streams */
1047 COSTS_N_INSNS (3), /* SF->DF convert */
1050 /* Instruction costs on POWER9 processors. */
1052 struct processor_costs power9_cost
= {
1053 COSTS_N_INSNS (3), /* mulsi */
1054 COSTS_N_INSNS (3), /* mulsi_const */
1055 COSTS_N_INSNS (3), /* mulsi_const9 */
1056 COSTS_N_INSNS (3), /* muldi */
1057 COSTS_N_INSNS (8), /* divsi */
1058 COSTS_N_INSNS (12), /* divdi */
1059 COSTS_N_INSNS (3), /* fp */
1060 COSTS_N_INSNS (3), /* dmul */
1061 COSTS_N_INSNS (13), /* sdiv */
1062 COSTS_N_INSNS (18), /* ddiv */
1063 128, /* cache line size */
1066 8, /* prefetch streams */
1067 COSTS_N_INSNS (3), /* SF->DF convert */
1070 /* Instruction costs on Power10/Power11 processors. */
1072 struct processor_costs power10_cost
= {
1073 COSTS_N_INSNS (2), /* mulsi */
1074 COSTS_N_INSNS (2), /* mulsi_const */
1075 COSTS_N_INSNS (2), /* mulsi_const9 */
1076 COSTS_N_INSNS (2), /* muldi */
1077 COSTS_N_INSNS (6), /* divsi */
1078 COSTS_N_INSNS (6), /* divdi */
1079 COSTS_N_INSNS (2), /* fp */
1080 COSTS_N_INSNS (2), /* dmul */
1081 COSTS_N_INSNS (11), /* sdiv */
1082 COSTS_N_INSNS (13), /* ddiv */
1083 128, /* cache line size */
1086 16, /* prefetch streams */
1087 COSTS_N_INSNS (2), /* SF->DF convert */
1090 /* Instruction costs on POWER A2 processors. */
1092 struct processor_costs ppca2_cost
= {
1093 COSTS_N_INSNS (16), /* mulsi */
1094 COSTS_N_INSNS (16), /* mulsi_const */
1095 COSTS_N_INSNS (16), /* mulsi_const9 */
1096 COSTS_N_INSNS (16), /* muldi */
1097 COSTS_N_INSNS (22), /* divsi */
1098 COSTS_N_INSNS (28), /* divdi */
1099 COSTS_N_INSNS (3), /* fp */
1100 COSTS_N_INSNS (3), /* dmul */
1101 COSTS_N_INSNS (59), /* sdiv */
1102 COSTS_N_INSNS (72), /* ddiv */
1105 2048, /* l2 cache */
1106 16, /* prefetch streams */
1107 0, /* SF->DF convert */
1110 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1111 static tree (*rs6000_veclib_handler
) (combined_fn
, tree
, tree
);
1114 static bool rs6000_debug_legitimate_address_p (machine_mode
, rtx
, bool,
1115 code_helper
= ERROR_MARK
);
1116 static tree
rs6000_handle_longcall_attribute (tree
*, tree
, tree
, int, bool *);
1117 static tree
rs6000_handle_altivec_attribute (tree
*, tree
, tree
, int, bool *);
1118 static tree
rs6000_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1119 static tree
rs6000_builtin_vectorized_libmass (combined_fn
, tree
, tree
);
1120 static void rs6000_emit_set_long_const (rtx
, HOST_WIDE_INT
, int * = nullptr);
1121 static int rs6000_memory_move_cost (machine_mode
, reg_class_t
, bool);
1122 static bool rs6000_debug_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
1123 static int rs6000_debug_address_cost (rtx
, machine_mode
, addr_space_t
,
1125 static int rs6000_debug_adjust_cost (rtx_insn
*, int, rtx_insn
*, int,
1127 static bool is_microcoded_insn (rtx_insn
*);
1128 static bool is_nonpipeline_insn (rtx_insn
*);
1129 static bool is_cracked_insn (rtx_insn
*);
1130 static bool is_load_insn (rtx
, rtx
*);
1131 static bool is_store_insn (rtx
, rtx
*);
1132 static bool set_to_load_agen (rtx_insn
*,rtx_insn
*);
1133 static bool insn_terminates_group_p (rtx_insn
*, enum group_termination
);
1134 static bool insn_must_be_first_in_group (rtx_insn
*);
1135 static bool insn_must_be_last_in_group (rtx_insn
*);
1136 bool easy_vector_constant (rtx
, machine_mode
);
1137 static rtx
rs6000_debug_legitimize_address (rtx
, rtx
, machine_mode
);
1138 static rtx
rs6000_legitimize_tls_address (rtx
, enum tls_model
);
1140 static tree
get_prev_label (tree
);
1142 static bool rs6000_mode_dependent_address (const_rtx
);
1143 static bool rs6000_debug_mode_dependent_address (const_rtx
);
1144 static bool rs6000_offsettable_memref_p (rtx
, machine_mode
, bool);
1145 static enum reg_class
rs6000_secondary_reload_class (enum reg_class
,
1147 static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class
,
1150 static enum reg_class
rs6000_preferred_reload_class (rtx
, enum reg_class
);
1151 static enum reg_class
rs6000_debug_preferred_reload_class (rtx
,
1153 static bool rs6000_debug_secondary_memory_needed (machine_mode
,
1156 static bool rs6000_debug_can_change_mode_class (machine_mode
,
1160 static bool (*rs6000_mode_dependent_address_ptr
) (const_rtx
)
1161 = rs6000_mode_dependent_address
;
1163 enum reg_class (*rs6000_secondary_reload_class_ptr
) (enum reg_class
,
1165 = rs6000_secondary_reload_class
;
1167 enum reg_class (*rs6000_preferred_reload_class_ptr
) (rtx
, enum reg_class
)
1168 = rs6000_preferred_reload_class
;
1170 const int INSN_NOT_AVAILABLE
= -1;
1172 static void rs6000_print_isa_options (FILE *, int, const char *,
1174 static HOST_WIDE_INT
rs6000_disable_incompatible_switches (void);
1176 static enum rs6000_reg_type
register_to_reg_type (rtx
, bool *);
1177 static bool rs6000_secondary_reload_move (enum rs6000_reg_type
,
1178 enum rs6000_reg_type
,
1180 secondary_reload_info
*,
1182 rtl_opt_pass
*make_pass_analyze_swaps (gcc::context
*);
1184 /* Hash table stuff for keeping track of TOC entries. */
1186 struct GTY((for_user
)) toc_hash_struct
1188 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1189 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1191 machine_mode key_mode
;
1195 struct toc_hasher
: ggc_ptr_hash
<toc_hash_struct
>
1197 static hashval_t
hash (toc_hash_struct
*);
1198 static bool equal (toc_hash_struct
*, toc_hash_struct
*);
1201 static GTY (()) hash_table
<toc_hasher
> *toc_hash_table
;
1205 /* Default register names. */
1206 char rs6000_reg_names
[][8] =
1209 "0", "1", "2", "3", "4", "5", "6", "7",
1210 "8", "9", "10", "11", "12", "13", "14", "15",
1211 "16", "17", "18", "19", "20", "21", "22", "23",
1212 "24", "25", "26", "27", "28", "29", "30", "31",
1214 "0", "1", "2", "3", "4", "5", "6", "7",
1215 "8", "9", "10", "11", "12", "13", "14", "15",
1216 "16", "17", "18", "19", "20", "21", "22", "23",
1217 "24", "25", "26", "27", "28", "29", "30", "31",
1219 "0", "1", "2", "3", "4", "5", "6", "7",
1220 "8", "9", "10", "11", "12", "13", "14", "15",
1221 "16", "17", "18", "19", "20", "21", "22", "23",
1222 "24", "25", "26", "27", "28", "29", "30", "31",
1224 "lr", "ctr", "ca", "ap",
1226 "0", "1", "2", "3", "4", "5", "6", "7",
1227 /* vrsave vscr sfp */
1228 "vrsave", "vscr", "sfp",
1231 #ifdef TARGET_REGNAMES
1232 static const char alt_reg_names
[][8] =
1235 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1236 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1237 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1238 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1240 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1241 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1242 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1243 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1245 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1246 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1247 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1248 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1250 "lr", "ctr", "ca", "ap",
1252 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1253 /* vrsave vscr sfp */
1254 "vrsave", "vscr", "sfp",
1258 /* Table of valid machine attributes. */
1260 static const attribute_spec rs6000_gnu_attributes
[] =
1262 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1263 affects_type_identity, handler, exclude } */
1264 { "altivec", 1, 1, false, true, false, false,
1265 rs6000_handle_altivec_attribute
, NULL
},
1266 { "longcall", 0, 0, false, true, true, false,
1267 rs6000_handle_longcall_attribute
, NULL
},
1268 { "shortcall", 0, 0, false, true, true, false,
1269 rs6000_handle_longcall_attribute
, NULL
},
1270 { "ms_struct", 0, 0, false, false, false, false,
1271 rs6000_handle_struct_attribute
, NULL
},
1272 { "gcc_struct", 0, 0, false, false, false, false,
1273 rs6000_handle_struct_attribute
, NULL
},
1274 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1275 SUBTARGET_ATTRIBUTE_TABLE
,
1279 static const scoped_attribute_specs rs6000_gnu_attribute_table
=
1281 "gnu", { rs6000_gnu_attributes
}
1284 static const scoped_attribute_specs
*const rs6000_attribute_table
[] =
1286 &rs6000_gnu_attribute_table
1289 #ifndef TARGET_PROFILE_KERNEL
1290 #define TARGET_PROFILE_KERNEL 0
1293 /* Initialize the GCC target structure. */
1294 #undef TARGET_ATTRIBUTE_TABLE
1295 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1296 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1297 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1298 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1299 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1301 #undef TARGET_ASM_ALIGNED_DI_OP
1302 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1304 /* Default unaligned ops are only provided for ELF. Find the ops needed
1305 for non-ELF systems. */
1306 #ifndef OBJECT_FORMAT_ELF
1308 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1310 #undef TARGET_ASM_UNALIGNED_HI_OP
1311 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1312 #undef TARGET_ASM_UNALIGNED_SI_OP
1313 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1314 #undef TARGET_ASM_UNALIGNED_DI_OP
1315 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1318 #undef TARGET_ASM_UNALIGNED_HI_OP
1319 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1320 #undef TARGET_ASM_UNALIGNED_SI_OP
1321 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1322 #undef TARGET_ASM_UNALIGNED_DI_OP
1323 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1324 #undef TARGET_ASM_ALIGNED_DI_OP
1325 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1329 /* This hook deals with fixups for relocatable code and DI-mode objects
1331 #undef TARGET_ASM_INTEGER
1332 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1334 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1335 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1336 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1339 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1340 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1341 rs6000_print_patchable_function_entry
1343 #undef TARGET_SET_UP_BY_PROLOGUE
1344 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1346 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1347 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1348 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1349 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1350 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1351 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1352 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1353 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1354 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1355 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1356 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1357 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1359 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1360 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1362 #undef TARGET_INTERNAL_ARG_POINTER
1363 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1365 #undef TARGET_HAVE_TLS
1366 #define TARGET_HAVE_TLS HAVE_AS_TLS
1368 #undef TARGET_CANNOT_FORCE_CONST_MEM
1369 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1371 #undef TARGET_DELEGITIMIZE_ADDRESS
1372 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1374 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1375 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1377 #undef TARGET_LEGITIMATE_COMBINED_INSN
1378 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1380 #undef TARGET_ASM_FUNCTION_PROLOGUE
1381 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1382 #undef TARGET_ASM_FUNCTION_EPILOGUE
1383 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1385 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1386 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1388 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1389 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1391 #undef TARGET_LEGITIMIZE_ADDRESS
1392 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1394 #undef TARGET_SCHED_VARIABLE_ISSUE
1395 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1397 #undef TARGET_SCHED_ISSUE_RATE
1398 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1399 #undef TARGET_SCHED_ADJUST_COST
1400 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1401 #undef TARGET_SCHED_ADJUST_PRIORITY
1402 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1403 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1404 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1405 #undef TARGET_SCHED_INIT
1406 #define TARGET_SCHED_INIT rs6000_sched_init
1407 #undef TARGET_SCHED_FINISH
1408 #define TARGET_SCHED_FINISH rs6000_sched_finish
1409 #undef TARGET_SCHED_REORDER
1410 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1411 #undef TARGET_SCHED_REORDER2
1412 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1414 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1415 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1417 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1418 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1420 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1421 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1422 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1423 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1424 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1425 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1426 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1427 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1429 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1430 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1432 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1433 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1434 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1435 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1436 rs6000_builtin_support_vector_misalignment
1437 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1438 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1439 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1440 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1441 rs6000_builtin_vectorization_cost
1442 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1443 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1444 rs6000_preferred_simd_mode
1445 #undef TARGET_VECTORIZE_CREATE_COSTS
1446 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1448 #undef TARGET_LOOP_UNROLL_ADJUST
1449 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1451 #undef TARGET_INIT_BUILTINS
1452 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1453 #undef TARGET_BUILTIN_DECL
1454 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1456 #undef TARGET_FOLD_BUILTIN
1457 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1458 #undef TARGET_GIMPLE_FOLD_BUILTIN
1459 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1461 #undef TARGET_EXPAND_BUILTIN
1462 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1464 #undef TARGET_MANGLE_TYPE
1465 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1467 #undef TARGET_INIT_LIBFUNCS
1468 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1471 #undef TARGET_BINDS_LOCAL_P
1472 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1475 #undef TARGET_MS_BITFIELD_LAYOUT_P
1476 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1478 #undef TARGET_ASM_OUTPUT_MI_THUNK
1479 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1481 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1482 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1484 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1485 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1487 #undef TARGET_REGISTER_MOVE_COST
1488 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1489 #undef TARGET_MEMORY_MOVE_COST
1490 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1491 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1492 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1493 rs6000_ira_change_pseudo_allocno_class
1494 #undef TARGET_CANNOT_COPY_INSN_P
1495 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1496 #undef TARGET_RTX_COSTS
1497 #define TARGET_RTX_COSTS rs6000_rtx_costs
1498 #undef TARGET_ADDRESS_COST
1499 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1500 #undef TARGET_INSN_COST
1501 #define TARGET_INSN_COST rs6000_insn_cost
1503 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1504 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1506 #undef TARGET_PROMOTE_FUNCTION_MODE
1507 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1509 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1510 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1512 #undef TARGET_RETURN_IN_MEMORY
1513 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1515 #undef TARGET_RETURN_IN_MSB
1516 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1518 #undef TARGET_SETUP_INCOMING_VARARGS
1519 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1521 /* Always strict argument naming on rs6000. */
1522 #undef TARGET_STRICT_ARGUMENT_NAMING
1523 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1524 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1525 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1526 #undef TARGET_SPLIT_COMPLEX_ARG
1527 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1528 #undef TARGET_MUST_PASS_IN_STACK
1529 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1530 #undef TARGET_PASS_BY_REFERENCE
1531 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1532 #undef TARGET_ARG_PARTIAL_BYTES
1533 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1534 #undef TARGET_FUNCTION_ARG_ADVANCE
1535 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1536 #undef TARGET_FUNCTION_ARG
1537 #define TARGET_FUNCTION_ARG rs6000_function_arg
1538 #undef TARGET_FUNCTION_ARG_PADDING
1539 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1540 #undef TARGET_FUNCTION_ARG_BOUNDARY
1541 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1543 #undef TARGET_BUILD_BUILTIN_VA_LIST
1544 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1546 #undef TARGET_EXPAND_BUILTIN_VA_START
1547 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1549 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1550 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1552 #undef TARGET_EH_RETURN_FILTER_MODE
1553 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1555 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1556 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1558 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1559 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1561 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1562 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1563 rs6000_libgcc_floating_mode_supported_p
1565 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1566 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1568 #undef TARGET_FLOATN_MODE
1569 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1571 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1572 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1574 #undef TARGET_MD_ASM_ADJUST
1575 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1577 #undef TARGET_OPTION_OVERRIDE
1578 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1580 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1581 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1582 rs6000_builtin_vectorized_function
1584 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1585 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1586 rs6000_builtin_md_vectorized_function
1588 #undef TARGET_STACK_PROTECT_GUARD
1589 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1592 #undef TARGET_STACK_PROTECT_FAIL
1593 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1597 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1598 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1601 /* Use a 32-bit anchor range. This leads to sequences like:
1603 addis tmp,anchor,high
1606 where tmp itself acts as an anchor, and can be shared between
1607 accesses to the same 64k page. */
1608 #undef TARGET_MIN_ANCHOR_OFFSET
1609 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1610 #undef TARGET_MAX_ANCHOR_OFFSET
1611 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1612 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1613 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1614 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1615 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1617 #undef TARGET_BUILTIN_RECIPROCAL
1618 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1620 #undef TARGET_SECONDARY_RELOAD
1621 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1622 #undef TARGET_SECONDARY_MEMORY_NEEDED
1623 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1624 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1625 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1627 #undef TARGET_LEGITIMATE_ADDRESS_P
1628 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1630 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1631 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1633 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1634 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1636 #undef TARGET_CAN_ELIMINATE
1637 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1639 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1640 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1642 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1643 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1645 #undef TARGET_TRAMPOLINE_INIT
1646 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1648 #undef TARGET_FUNCTION_VALUE
1649 #define TARGET_FUNCTION_VALUE rs6000_function_value
1651 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1652 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1654 #undef TARGET_OPTION_SAVE
1655 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1657 #undef TARGET_OPTION_RESTORE
1658 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1660 #undef TARGET_OPTION_PRINT
1661 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1663 #undef TARGET_CAN_INLINE_P
1664 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1666 #undef TARGET_SET_CURRENT_FUNCTION
1667 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1669 #undef TARGET_LEGITIMATE_CONSTANT_P
1670 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1672 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1673 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1675 #undef TARGET_CAN_USE_DOLOOP_P
1676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1678 #undef TARGET_PREDICT_DOLOOP_P
1679 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1681 #undef TARGET_HAVE_COUNT_REG_DECR_P
1682 #define TARGET_HAVE_COUNT_REG_DECR_P true
1684 /* 1000000000 is infinite cost in IVOPTs. */
1685 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1686 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1688 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1689 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1691 #undef TARGET_PREFERRED_DOLOOP_MODE
1692 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1694 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1695 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1697 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1698 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1699 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1700 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1701 #undef TARGET_UNWIND_WORD_MODE
1702 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1704 #undef TARGET_OFFLOAD_OPTIONS
1705 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1707 #undef TARGET_C_MODE_FOR_SUFFIX
1708 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1710 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
1711 #define TARGET_C_MODE_FOR_FLOATING_TYPE rs6000_c_mode_for_floating_type
1713 #undef TARGET_INVALID_BINARY_OP
1714 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1716 #undef TARGET_OPTAB_SUPPORTED_P
1717 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1719 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1720 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1722 #undef TARGET_COMPARE_VERSION_PRIORITY
1723 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1725 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1726 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1727 rs6000_generate_version_dispatcher_body
1729 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1730 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1731 rs6000_get_function_versions_dispatcher
1733 #undef TARGET_OPTION_FUNCTION_VERSIONS
1734 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1736 #undef TARGET_HARD_REGNO_NREGS
1737 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1738 #undef TARGET_HARD_REGNO_MODE_OK
1739 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1741 #undef TARGET_MODES_TIEABLE_P
1742 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1744 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1745 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1746 rs6000_hard_regno_call_part_clobbered
1748 #undef TARGET_SLOW_UNALIGNED_ACCESS
1749 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1751 #undef TARGET_CAN_CHANGE_MODE_CLASS
1752 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1754 #undef TARGET_CONSTANT_ALIGNMENT
1755 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1757 #undef TARGET_STARTING_FRAME_OFFSET
1758 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1760 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1761 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1763 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1764 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1766 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1767 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1768 rs6000_cannot_substitute_mem_equiv_p
1770 #undef TARGET_INVALID_CONVERSION
1771 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1773 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1774 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1776 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1777 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1779 #undef TARGET_CONST_ANCHOR
1780 #define TARGET_CONST_ANCHOR 0x8000
1782 #undef TARGET_OVERLAP_OP_BY_PIECES_P
1783 #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
1787 /* Processor table. */
1790 const char *const name
; /* Canonical processor name. */
1791 const enum processor_type processor
; /* Processor type enum value. */
1792 const HOST_WIDE_INT target_enable
; /* Target flags to enable. */
1795 static struct rs6000_ptt
const processor_target_table
[] =
1797 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1798 #include "rs6000-cpus.def"
1802 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1806 rs6000_cpu_name_lookup (const char *name
)
1812 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
1813 if (! strcmp (name
, processor_target_table
[i
].name
))
1821 /* Return number of consecutive hard regs needed starting at reg REGNO
1822 to hold something of mode MODE.
1823 This is ordinarily the length in words of a value of mode MODE
1824 but can be less for certain modes in special long registers.
1826 POWER and PowerPC GPRs hold 32 bits worth;
1827 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1830 rs6000_hard_regno_nregs_internal (int regno
, machine_mode mode
)
1832 unsigned HOST_WIDE_INT reg_size
;
1834 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1835 128-bit floating point that can go in vector registers, which has VSX
1836 memory addressing. */
1837 if (FP_REGNO_P (regno
))
1838 reg_size
= (VECTOR_MEM_VSX_P (mode
) || VECTOR_ALIGNMENT_P (mode
)
1839 ? UNITS_PER_VSX_WORD
1840 : UNITS_PER_FP_WORD
);
1842 else if (ALTIVEC_REGNO_P (regno
))
1843 reg_size
= UNITS_PER_ALTIVEC_WORD
;
1846 reg_size
= UNITS_PER_WORD
;
1848 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
1851 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1854 rs6000_hard_regno_mode_ok_uncached (int regno
, machine_mode mode
)
1856 int last_regno
= regno
+ rs6000_hard_regno_nregs
[mode
][regno
] - 1;
1858 if (COMPLEX_MODE_P (mode
))
1859 mode
= GET_MODE_INNER (mode
);
1861 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1864 return (TARGET_MMA
&& VSX_REGNO_P (regno
) && (regno
& 1) == 0);
1866 /* MMA accumulator modes need FPR registers divisible by 4. */
1868 return (TARGET_MMA
&& FP_REGNO_P (regno
) && (regno
& 3) == 0);
1870 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1871 register combinations, and use PTImode where we need to deal with quad
1872 word memory operations. Don't allow quad words in the argument or frame
1873 pointer registers, just registers 0..31. */
1874 if (mode
== PTImode
)
1875 return (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1876 && IN_RANGE (last_regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1877 && ((regno
& 1) == 0));
1879 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1880 implementations. Don't allow an item to be split between a FP register
1881 and an Altivec register. Allow TImode in all VSX registers if the user
1883 if (TARGET_VSX
&& VSX_REGNO_P (regno
)
1884 && (VECTOR_MEM_VSX_P (mode
)
1885 || VECTOR_ALIGNMENT_P (mode
)
1886 || reg_addr
[mode
].scalar_in_vmx_p
1888 || (TARGET_VADDUQM
&& mode
== V1TImode
)))
1890 if (FP_REGNO_P (regno
))
1891 return FP_REGNO_P (last_regno
);
1893 if (ALTIVEC_REGNO_P (regno
))
1895 if (GET_MODE_SIZE (mode
) < 16 && !reg_addr
[mode
].scalar_in_vmx_p
)
1898 return ALTIVEC_REGNO_P (last_regno
);
1902 /* The GPRs can hold any mode, but values bigger than one register
1903 cannot go past R31. */
1904 if (INT_REGNO_P (regno
))
1905 return INT_REGNO_P (last_regno
);
1907 /* The float registers (except for VSX vector modes) can only hold floating
1908 modes and DImode. */
1909 if (FP_REGNO_P (regno
))
1911 if (VECTOR_ALIGNMENT_P (mode
))
1914 if (SCALAR_FLOAT_MODE_P (mode
)
1915 && (mode
!= TDmode
|| (regno
% 2) == 0)
1916 && FP_REGNO_P (last_regno
))
1919 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1921 if(GET_MODE_SIZE (mode
) == UNITS_PER_FP_WORD
)
1924 if (TARGET_POPCNTD
&& mode
== SImode
)
1927 if (TARGET_P9_VECTOR
&& (mode
== QImode
|| mode
== HImode
))
1934 /* The CR register can only hold CC modes. */
1935 if (CR_REGNO_P (regno
))
1936 return GET_MODE_CLASS (mode
) == MODE_CC
;
1938 if (CA_REGNO_P (regno
))
1939 return mode
== Pmode
|| mode
== SImode
;
1941 /* AltiVec only in AldyVec registers. */
1942 if (ALTIVEC_REGNO_P (regno
))
1943 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
1944 || mode
== V1TImode
);
1946 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1947 and it must be able to fit within the register set. */
1949 return GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
;
1952 /* Implement TARGET_HARD_REGNO_NREGS. */
1955 rs6000_hard_regno_nregs_hook (unsigned int regno
, machine_mode mode
)
1957 return rs6000_hard_regno_nregs
[mode
][regno
];
1960 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1963 rs6000_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
1965 return rs6000_hard_regno_mode_ok_p
[mode
][regno
];
1968 /* Implement TARGET_MODES_TIEABLE_P.
1970 PTImode cannot tie with other modes because PTImode is restricted to even
1971 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1974 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1975 registers) or XOmode (vector quad, restricted to FPR registers divisible
1976 by 4) to tie with other modes.
1978 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1979 128-bit floating point on VSX systems ties with other vectors. */
1982 rs6000_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
1984 if (mode1
== PTImode
|| mode1
== OOmode
|| mode1
== XOmode
1985 || mode2
== PTImode
|| mode2
== OOmode
|| mode2
== XOmode
)
1986 return mode1
== mode2
;
1988 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1
))
1989 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2
);
1990 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2
))
1993 if (SCALAR_FLOAT_MODE_P (mode1
))
1994 return SCALAR_FLOAT_MODE_P (mode2
);
1995 if (SCALAR_FLOAT_MODE_P (mode2
))
1998 if (GET_MODE_CLASS (mode1
) == MODE_CC
)
1999 return GET_MODE_CLASS (mode2
) == MODE_CC
;
2000 if (GET_MODE_CLASS (mode2
) == MODE_CC
)
2006 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2009 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
2014 && GET_MODE_SIZE (mode
) > 4
2015 && INT_REGNO_P (regno
))
2019 && FP_REGNO_P (regno
)
2020 && GET_MODE_SIZE (mode
) > 8
2021 && !FLOAT128_2REG_P (mode
))
2027 /* Print interesting facts about registers. */
2029 rs6000_debug_reg_print (int first_regno
, int last_regno
, const char *reg_name
)
2033 for (r
= first_regno
; r
<= last_regno
; ++r
)
2035 const char *comma
= "";
2038 if (first_regno
== last_regno
)
2039 fprintf (stderr
, "%s:\t", reg_name
);
2041 fprintf (stderr
, "%s%d:\t", reg_name
, r
- first_regno
);
2044 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2045 if (rs6000_hard_regno_mode_ok_p
[m
][r
] && rs6000_hard_regno_nregs
[m
][r
])
2049 fprintf (stderr
, ",\n\t");
2054 if (rs6000_hard_regno_nregs
[m
][r
] > 1)
2055 len
+= fprintf (stderr
, "%s%s/%d", comma
, GET_MODE_NAME (m
),
2056 rs6000_hard_regno_nregs
[m
][r
]);
2058 len
+= fprintf (stderr
, "%s%s", comma
, GET_MODE_NAME (m
));
2063 if (call_used_or_fixed_reg_p (r
))
2067 fprintf (stderr
, ",\n\t");
2072 len
+= fprintf (stderr
, "%s%s", comma
, "call-used");
2080 fprintf (stderr
, ",\n\t");
2085 len
+= fprintf (stderr
, "%s%s", comma
, "fixed");
2091 fprintf (stderr
, ",\n\t");
2095 len
+= fprintf (stderr
, "%sreg-class = %s", comma
,
2096 reg_class_names
[(int)rs6000_regno_regclass
[r
]]);
2101 fprintf (stderr
, ",\n\t");
2105 fprintf (stderr
, "%sregno = %d\n", comma
, r
);
2110 rs6000_debug_vector_unit (enum rs6000_vector v
)
2116 case VECTOR_NONE
: ret
= "none"; break;
2117 case VECTOR_ALTIVEC
: ret
= "altivec"; break;
2118 case VECTOR_VSX
: ret
= "vsx"; break;
2119 case VECTOR_P8_VECTOR
: ret
= "p8_vector"; break;
2120 default: ret
= "unknown"; break;
2126 /* Inner function printing just the address mask for a particular reload
2128 DEBUG_FUNCTION
char *
2129 rs6000_debug_addr_mask (addr_mask_type mask
, bool keep_spaces
)
2134 if ((mask
& RELOAD_REG_VALID
) != 0)
2136 else if (keep_spaces
)
2139 if ((mask
& RELOAD_REG_MULTIPLE
) != 0)
2141 else if (keep_spaces
)
2144 if ((mask
& RELOAD_REG_INDEXED
) != 0)
2146 else if (keep_spaces
)
2149 if ((mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
2151 else if ((mask
& RELOAD_REG_OFFSET
) != 0)
2153 else if (keep_spaces
)
2156 if ((mask
& RELOAD_REG_PRE_INCDEC
) != 0)
2158 else if (keep_spaces
)
2161 if ((mask
& RELOAD_REG_PRE_MODIFY
) != 0)
2163 else if (keep_spaces
)
2166 if ((mask
& RELOAD_REG_AND_M16
) != 0)
2168 else if (keep_spaces
)
2176 /* Print the address masks in a human readble fashion. */
2178 rs6000_debug_print_mode (ssize_t m
)
2183 fprintf (stderr
, "Mode: %-5s", GET_MODE_NAME (m
));
2184 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2185 fprintf (stderr
, " %s: %s", reload_reg_map
[rc
].name
,
2186 rs6000_debug_addr_mask (reg_addr
[m
].addr_mask
[rc
], true));
2188 if ((reg_addr
[m
].reload_store
!= CODE_FOR_nothing
)
2189 || (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
))
2191 fprintf (stderr
, "%*s Reload=%c%c", spaces
, "",
2192 (reg_addr
[m
].reload_store
!= CODE_FOR_nothing
) ? 's' : '*',
2193 (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
) ? 'l' : '*');
2197 spaces
+= strlen (" Reload=sl");
2199 if (reg_addr
[m
].scalar_in_vmx_p
)
2201 fprintf (stderr
, "%*s Upper=y", spaces
, "");
2205 spaces
+= strlen (" Upper=y");
2207 if (rs6000_vector_unit
[m
] != VECTOR_NONE
2208 || rs6000_vector_mem
[m
] != VECTOR_NONE
)
2210 fprintf (stderr
, "%*s vector: arith=%-10s mem=%s",
2212 rs6000_debug_vector_unit (rs6000_vector_unit
[m
]),
2213 rs6000_debug_vector_unit (rs6000_vector_mem
[m
]));
2216 fputs ("\n", stderr
);
2219 #define DEBUG_FMT_ID "%-32s= "
2220 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2221 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2222 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2224 /* Print various interesting information with -mdebug=reg. */
2226 rs6000_debug_reg_global (void)
2228 static const char *const tf
[2] = { "false", "true" };
2229 const char *nl
= (const char *)0;
2232 char costly_num
[20];
2234 char flags_buffer
[40];
2235 const char *costly_str
;
2236 const char *nop_str
;
2237 const char *trace_str
;
2238 const char *abi_str
;
2239 const char *cmodel_str
;
2240 struct cl_target_option cl_opts
;
2242 /* Modes we want tieable information on. */
2243 static const machine_mode print_tieable_modes
[] = {
2282 /* Virtual regs we are interested in. */
2283 const static struct {
2284 int regno
; /* register number. */
2285 const char *name
; /* register name. */
2286 } virtual_regs
[] = {
2287 { STACK_POINTER_REGNUM
, "stack pointer:" },
2288 { TOC_REGNUM
, "toc: " },
2289 { STATIC_CHAIN_REGNUM
, "static chain: " },
2290 { RS6000_PIC_OFFSET_TABLE_REGNUM
, "pic offset: " },
2291 { HARD_FRAME_POINTER_REGNUM
, "hard frame: " },
2292 { ARG_POINTER_REGNUM
, "arg pointer: " },
2293 { FRAME_POINTER_REGNUM
, "frame pointer:" },
2294 { FIRST_PSEUDO_REGISTER
, "first pseudo: " },
2295 { FIRST_VIRTUAL_REGISTER
, "first virtual:" },
2296 { VIRTUAL_INCOMING_ARGS_REGNUM
, "incoming_args:" },
2297 { VIRTUAL_STACK_VARS_REGNUM
, "stack_vars: " },
2298 { VIRTUAL_STACK_DYNAMIC_REGNUM
, "stack_dynamic:" },
2299 { VIRTUAL_OUTGOING_ARGS_REGNUM
, "outgoing_args:" },
2300 { VIRTUAL_CFA_REGNUM
, "cfa (frame): " },
2301 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM
, "stack boundry:" },
2302 { LAST_VIRTUAL_REGISTER
, "last virtual: " },
2305 fputs ("\nHard register information:\n", stderr
);
2306 rs6000_debug_reg_print (FIRST_GPR_REGNO
, LAST_GPR_REGNO
, "gr");
2307 rs6000_debug_reg_print (FIRST_FPR_REGNO
, LAST_FPR_REGNO
, "fp");
2308 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO
,
2311 rs6000_debug_reg_print (LR_REGNO
, LR_REGNO
, "lr");
2312 rs6000_debug_reg_print (CTR_REGNO
, CTR_REGNO
, "ctr");
2313 rs6000_debug_reg_print (CR0_REGNO
, CR7_REGNO
, "cr");
2314 rs6000_debug_reg_print (CA_REGNO
, CA_REGNO
, "ca");
2315 rs6000_debug_reg_print (VRSAVE_REGNO
, VRSAVE_REGNO
, "vrsave");
2316 rs6000_debug_reg_print (VSCR_REGNO
, VSCR_REGNO
, "vscr");
2318 fputs ("\nVirtual/stack/frame registers:\n", stderr
);
2319 for (v
= 0; v
< ARRAY_SIZE (virtual_regs
); v
++)
2320 fprintf (stderr
, "%s regno = %3d\n", virtual_regs
[v
].name
, virtual_regs
[v
].regno
);
2324 "d reg_class = %s\n"
2325 "v reg_class = %s\n"
2326 "wa reg_class = %s\n"
2327 "we reg_class = %s\n"
2328 "wr reg_class = %s\n"
2329 "wx reg_class = %s\n"
2330 "wA reg_class = %s\n"
2332 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_d
]],
2333 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_v
]],
2334 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wa
]],
2335 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_we
]],
2336 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wr
]],
2337 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wx
]],
2338 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wA
]]);
2341 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2342 rs6000_debug_print_mode (m
);
2344 fputs ("\n", stderr
);
2346 for (m1
= 0; m1
< ARRAY_SIZE (print_tieable_modes
); m1
++)
2348 machine_mode mode1
= print_tieable_modes
[m1
];
2349 bool first_time
= true;
2351 nl
= (const char *)0;
2352 for (m2
= 0; m2
< ARRAY_SIZE (print_tieable_modes
); m2
++)
2354 machine_mode mode2
= print_tieable_modes
[m2
];
2355 if (mode1
!= mode2
&& rs6000_modes_tieable_p (mode1
, mode2
))
2359 fprintf (stderr
, "Tieable modes %s:", GET_MODE_NAME (mode1
));
2364 fprintf (stderr
, " %s", GET_MODE_NAME (mode2
));
2369 fputs ("\n", stderr
);
2375 if (rs6000_recip_control
)
2377 fprintf (stderr
, "\nReciprocal mask = 0x%x\n", rs6000_recip_control
);
2379 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2380 if (rs6000_recip_bits
[m
])
2383 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2385 (RS6000_RECIP_AUTO_RE_P (m
)
2387 : (RS6000_RECIP_HAVE_RE_P (m
) ? "have" : "none")),
2388 (RS6000_RECIP_AUTO_RSQRTE_P (m
)
2390 : (RS6000_RECIP_HAVE_RSQRTE_P (m
) ? "have" : "none")));
2393 fputs ("\n", stderr
);
2396 if (rs6000_cpu_index
>= 0)
2398 const char *name
= processor_target_table
[rs6000_cpu_index
].name
;
2400 = processor_target_table
[rs6000_cpu_index
].target_enable
;
2402 sprintf (flags_buffer
, "-mcpu=%s flags", name
);
2403 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2406 fprintf (stderr
, DEBUG_FMT_S
, "cpu", "<none>");
2408 if (rs6000_tune_index
>= 0)
2410 const char *name
= processor_target_table
[rs6000_tune_index
].name
;
2412 = processor_target_table
[rs6000_tune_index
].target_enable
;
2414 sprintf (flags_buffer
, "-mtune=%s flags", name
);
2415 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2418 fprintf (stderr
, DEBUG_FMT_S
, "tune", "<none>");
2420 cl_target_option_save (&cl_opts
, &global_options
, &global_options_set
);
2421 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags",
2424 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags_explicit",
2425 rs6000_isa_flags_explicit
);
2427 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
2429 fprintf (stderr
, DEBUG_FMT_S
, "--with-cpu default",
2430 OPTION_TARGET_CPU_DEFAULT
? OPTION_TARGET_CPU_DEFAULT
: "<none>");
2432 switch (rs6000_sched_costly_dep
)
2434 case max_dep_latency
:
2435 costly_str
= "max_dep_latency";
2439 costly_str
= "no_dep_costly";
2442 case all_deps_costly
:
2443 costly_str
= "all_deps_costly";
2446 case true_store_to_load_dep_costly
:
2447 costly_str
= "true_store_to_load_dep_costly";
2450 case store_to_load_dep_costly
:
2451 costly_str
= "store_to_load_dep_costly";
2455 costly_str
= costly_num
;
2456 sprintf (costly_num
, "%d", (int)rs6000_sched_costly_dep
);
2460 fprintf (stderr
, DEBUG_FMT_S
, "sched_costly_dep", costly_str
);
2462 switch (rs6000_sched_insert_nops
)
2464 case sched_finish_regroup_exact
:
2465 nop_str
= "sched_finish_regroup_exact";
2468 case sched_finish_pad_groups
:
2469 nop_str
= "sched_finish_pad_groups";
2472 case sched_finish_none
:
2473 nop_str
= "sched_finish_none";
2478 sprintf (nop_num
, "%d", (int)rs6000_sched_insert_nops
);
2482 fprintf (stderr
, DEBUG_FMT_S
, "sched_insert_nops", nop_str
);
2484 switch (rs6000_sdata
)
2491 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "data");
2495 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "sysv");
2499 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "eabi");
2504 switch (rs6000_traceback
)
2506 case traceback_default
: trace_str
= "default"; break;
2507 case traceback_none
: trace_str
= "none"; break;
2508 case traceback_part
: trace_str
= "part"; break;
2509 case traceback_full
: trace_str
= "full"; break;
2510 default: trace_str
= "unknown"; break;
2513 fprintf (stderr
, DEBUG_FMT_S
, "traceback", trace_str
);
2515 switch (rs6000_current_cmodel
)
2517 case CMODEL_SMALL
: cmodel_str
= "small"; break;
2518 case CMODEL_MEDIUM
: cmodel_str
= "medium"; break;
2519 case CMODEL_LARGE
: cmodel_str
= "large"; break;
2520 default: cmodel_str
= "unknown"; break;
2523 fprintf (stderr
, DEBUG_FMT_S
, "cmodel", cmodel_str
);
2525 switch (rs6000_current_abi
)
2527 case ABI_NONE
: abi_str
= "none"; break;
2528 case ABI_AIX
: abi_str
= "aix"; break;
2529 case ABI_ELFv2
: abi_str
= "ELFv2"; break;
2530 case ABI_V4
: abi_str
= "V4"; break;
2531 case ABI_DARWIN
: abi_str
= "darwin"; break;
2532 default: abi_str
= "unknown"; break;
2535 fprintf (stderr
, DEBUG_FMT_S
, "abi", abi_str
);
2537 if (rs6000_altivec_abi
)
2538 fprintf (stderr
, DEBUG_FMT_S
, "altivec_abi", "true");
2540 if (rs6000_aix_extabi
)
2541 fprintf (stderr
, DEBUG_FMT_S
, "AIX vec-extabi", "true");
2543 if (rs6000_darwin64_abi
)
2544 fprintf (stderr
, DEBUG_FMT_S
, "darwin64_abi", "true");
2546 fprintf (stderr
, DEBUG_FMT_S
, "soft_float",
2547 (TARGET_SOFT_FLOAT
? "true" : "false"));
2549 if (TARGET_LINK_STACK
)
2550 fprintf (stderr
, DEBUG_FMT_S
, "link_stack", "true");
2552 if (TARGET_P8_FUSION
)
2556 strcpy (options
, "power8");
2557 if (TARGET_P8_FUSION_SIGN
)
2558 strcat (options
, ", sign");
2560 fprintf (stderr
, DEBUG_FMT_S
, "fusion", options
);
2563 fprintf (stderr
, DEBUG_FMT_S
, "plt-format",
2564 TARGET_SECURE_PLT
? "secure" : "bss");
2565 fprintf (stderr
, DEBUG_FMT_S
, "struct-return",
2566 aix_struct_return
? "aix" : "sysv");
2567 fprintf (stderr
, DEBUG_FMT_S
, "always_hint", tf
[!!rs6000_always_hint
]);
2568 fprintf (stderr
, DEBUG_FMT_S
, "sched_groups", tf
[!!rs6000_sched_groups
]);
2569 fprintf (stderr
, DEBUG_FMT_S
, "align_branch",
2570 tf
[!!rs6000_align_branch_targets
]);
2571 fprintf (stderr
, DEBUG_FMT_D
, "tls_size", rs6000_tls_size
);
2572 fprintf (stderr
, DEBUG_FMT_D
, "long_double_size",
2573 rs6000_long_double_type_size
);
2574 if (rs6000_long_double_type_size
> 64)
2576 fprintf (stderr
, DEBUG_FMT_S
, "long double type",
2577 TARGET_IEEEQUAD
? "IEEE" : "IBM");
2578 fprintf (stderr
, DEBUG_FMT_S
, "default long double type",
2579 TARGET_IEEEQUAD_DEFAULT
? "IEEE" : "IBM");
2581 fprintf (stderr
, DEBUG_FMT_D
, "sched_restricted_insns_priority",
2582 (int)rs6000_sched_restricted_insns_priority
);
2583 fprintf (stderr
, DEBUG_FMT_D
, "Number of standard builtins",
2586 fprintf (stderr
, DEBUG_FMT_D
, "Enable float128 on VSX",
2587 (int)TARGET_FLOAT128_ENABLE_TYPE
);
2590 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit scalar element",
2591 (int)VECTOR_ELEMENT_SCALAR_64BIT
);
2593 if (TARGET_DIRECT_MOVE_128
)
2594 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit mfvsrld element",
2595 (int)VECTOR_ELEMENT_MFVSRLD_64BIT
);
2599 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2600 legitimate address support to figure out the appropriate addressing to
2604 rs6000_setup_reg_addr_masks (void)
2606 ssize_t rc
, reg
, m
, nregs
;
2607 addr_mask_type any_addr_mask
, addr_mask
;
2609 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2611 machine_mode m2
= (machine_mode
) m
;
2612 bool complex_p
= false;
2613 bool small_int_p
= (m2
== QImode
|| m2
== HImode
|| m2
== SImode
);
2616 if (COMPLEX_MODE_P (m2
))
2619 m2
= GET_MODE_INNER (m2
);
2622 msize
= GET_MODE_SIZE (m2
);
2624 /* SDmode is special in that we want to access it only via REG+REG
2625 addressing on power7 and above, since we want to use the LFIWZX and
2626 STFIWZX instructions to load it. */
2627 bool indexed_only_p
= (m
== SDmode
&& TARGET_NO_SDMODE_STACK
);
2630 for (rc
= FIRST_RELOAD_REG_CLASS
; rc
<= LAST_RELOAD_REG_CLASS
; rc
++)
2633 reg
= reload_reg_map
[rc
].reg
;
2635 /* Can mode values go in the GPR/FPR/Altivec registers? */
2636 if (reg
>= 0 && rs6000_hard_regno_mode_ok_p
[m
][reg
])
2638 bool small_int_vsx_p
= (small_int_p
2639 && (rc
== RELOAD_REG_FPR
2640 || rc
== RELOAD_REG_VMX
));
2642 nregs
= rs6000_hard_regno_nregs
[m
][reg
];
2643 addr_mask
|= RELOAD_REG_VALID
;
2645 /* Indicate if the mode takes more than 1 physical register. If
2646 it takes a single register, indicate it can do REG+REG
2647 addressing. Small integers in VSX registers can only do
2648 REG+REG addressing. */
2649 if (small_int_vsx_p
)
2650 addr_mask
|= RELOAD_REG_INDEXED
;
2651 else if (nregs
> 1 || m
== BLKmode
|| complex_p
)
2652 addr_mask
|= RELOAD_REG_MULTIPLE
;
2654 addr_mask
|= RELOAD_REG_INDEXED
;
2656 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2657 addressing. If we allow scalars into Altivec registers,
2658 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2660 For VSX systems, we don't allow update addressing for
2661 DFmode/SFmode if those registers can go in both the
2662 traditional floating point registers and Altivec registers.
2663 The load/store instructions for the Altivec registers do not
2664 have update forms. If we allowed update addressing, it seems
2665 to break IV-OPT code using floating point if the index type is
2666 int instead of long (PR target/81550 and target/84042). */
2669 && (rc
== RELOAD_REG_GPR
|| rc
== RELOAD_REG_FPR
)
2671 && !VECTOR_MODE_P (m2
)
2672 && !VECTOR_ALIGNMENT_P (m2
)
2674 && (m
!= E_DFmode
|| !TARGET_VSX
)
2675 && (m
!= E_SFmode
|| !TARGET_P8_VECTOR
)
2676 && !small_int_vsx_p
)
2678 addr_mask
|= RELOAD_REG_PRE_INCDEC
;
2680 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2681 we don't allow PRE_MODIFY for some multi-register
2686 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2690 if (TARGET_POWERPC64
)
2691 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2696 if (TARGET_HARD_FLOAT
)
2697 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2703 /* GPR and FPR registers can do REG+OFFSET addressing, except
2704 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2705 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2706 if ((addr_mask
!= 0) && !indexed_only_p
2708 && (rc
== RELOAD_REG_GPR
2709 || ((msize
== 8 || m2
== SFmode
)
2710 && (rc
== RELOAD_REG_FPR
2711 || (rc
== RELOAD_REG_VMX
&& TARGET_P9_VECTOR
)))))
2712 addr_mask
|= RELOAD_REG_OFFSET
;
2714 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2715 instructions are enabled. The offset for 128-bit VSX registers is
2716 only 12-bits. While GPRs can handle the full offset range, VSX
2717 registers can only handle the restricted range. */
2718 else if ((addr_mask
!= 0) && !indexed_only_p
2719 && msize
== 16 && TARGET_P9_VECTOR
2720 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2
)
2721 || (m2
== TImode
&& TARGET_VSX
)))
2723 addr_mask
|= RELOAD_REG_OFFSET
;
2724 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2725 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2728 /* Vector pairs can do both indexed and offset loads if the
2729 instructions are enabled, otherwise they can only do offset loads
2730 since it will be broken into two vector moves. Vector quads can
2731 only do offset loads. */
2732 else if ((addr_mask
!= 0) && TARGET_MMA
2733 && (m2
== OOmode
|| m2
== XOmode
))
2735 addr_mask
|= RELOAD_REG_OFFSET
;
2736 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2738 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2740 addr_mask
|= RELOAD_REG_INDEXED
;
2744 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2745 addressing on 128-bit types. */
2746 if (rc
== RELOAD_REG_VMX
&& msize
== 16
2747 && (addr_mask
& RELOAD_REG_VALID
) != 0)
2748 addr_mask
|= RELOAD_REG_AND_M16
;
2750 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
2751 any_addr_mask
|= addr_mask
;
2754 reg_addr
[m
].addr_mask
[RELOAD_REG_ANY
] = any_addr_mask
;
2759 /* Initialize the various global tables that are based on register size. */
2761 rs6000_init_hard_regno_mode_ok (bool global_init_p
)
2767 /* Precalculate REGNO_REG_CLASS. */
2768 rs6000_regno_regclass
[0] = GENERAL_REGS
;
2769 for (r
= 1; r
< 32; ++r
)
2770 rs6000_regno_regclass
[r
] = BASE_REGS
;
2772 for (r
= 32; r
< 64; ++r
)
2773 rs6000_regno_regclass
[r
] = FLOAT_REGS
;
2775 for (r
= 64; HARD_REGISTER_NUM_P (r
); ++r
)
2776 rs6000_regno_regclass
[r
] = NO_REGS
;
2778 for (r
= FIRST_ALTIVEC_REGNO
; r
<= LAST_ALTIVEC_REGNO
; ++r
)
2779 rs6000_regno_regclass
[r
] = ALTIVEC_REGS
;
2781 rs6000_regno_regclass
[CR0_REGNO
] = CR0_REGS
;
2782 for (r
= CR1_REGNO
; r
<= CR7_REGNO
; ++r
)
2783 rs6000_regno_regclass
[r
] = CR_REGS
;
2785 rs6000_regno_regclass
[LR_REGNO
] = LINK_REGS
;
2786 rs6000_regno_regclass
[CTR_REGNO
] = CTR_REGS
;
2787 rs6000_regno_regclass
[CA_REGNO
] = NO_REGS
;
2788 rs6000_regno_regclass
[VRSAVE_REGNO
] = VRSAVE_REGS
;
2789 rs6000_regno_regclass
[VSCR_REGNO
] = VRSAVE_REGS
;
2790 rs6000_regno_regclass
[ARG_POINTER_REGNUM
] = BASE_REGS
;
2791 rs6000_regno_regclass
[FRAME_POINTER_REGNUM
] = BASE_REGS
;
2793 /* Precalculate register class to simpler reload register class. We don't
2794 need all of the register classes that are combinations of different
2795 classes, just the simple ones that have constraint letters. */
2796 for (c
= 0; c
< N_REG_CLASSES
; c
++)
2797 reg_class_to_reg_type
[c
] = NO_REG_TYPE
;
2799 reg_class_to_reg_type
[(int)GENERAL_REGS
] = GPR_REG_TYPE
;
2800 reg_class_to_reg_type
[(int)BASE_REGS
] = GPR_REG_TYPE
;
2801 reg_class_to_reg_type
[(int)VSX_REGS
] = VSX_REG_TYPE
;
2802 reg_class_to_reg_type
[(int)VRSAVE_REGS
] = SPR_REG_TYPE
;
2803 reg_class_to_reg_type
[(int)VSCR_REGS
] = SPR_REG_TYPE
;
2804 reg_class_to_reg_type
[(int)LINK_REGS
] = SPR_REG_TYPE
;
2805 reg_class_to_reg_type
[(int)CTR_REGS
] = SPR_REG_TYPE
;
2806 reg_class_to_reg_type
[(int)LINK_OR_CTR_REGS
] = SPR_REG_TYPE
;
2807 reg_class_to_reg_type
[(int)CR_REGS
] = CR_REG_TYPE
;
2808 reg_class_to_reg_type
[(int)CR0_REGS
] = CR_REG_TYPE
;
2812 reg_class_to_reg_type
[(int)FLOAT_REGS
] = VSX_REG_TYPE
;
2813 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = VSX_REG_TYPE
;
2817 reg_class_to_reg_type
[(int)FLOAT_REGS
] = FPR_REG_TYPE
;
2818 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = ALTIVEC_REG_TYPE
;
2821 /* Precalculate the valid memory formats as well as the vector information,
2822 this must be set up before the rs6000_hard_regno_nregs_internal calls
2824 gcc_assert ((int)VECTOR_NONE
== 0);
2825 memset ((void *) &rs6000_vector_unit
[0], '\0', sizeof (rs6000_vector_unit
));
2826 memset ((void *) &rs6000_vector_mem
[0], '\0', sizeof (rs6000_vector_mem
));
2828 gcc_assert ((int)CODE_FOR_nothing
== 0);
2829 memset ((void *) ®_addr
[0], '\0', sizeof (reg_addr
));
2831 gcc_assert ((int)NO_REGS
== 0);
2832 memset ((void *) &rs6000_constraints
[0], '\0', sizeof (rs6000_constraints
));
2834 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2835 believes it can use native alignment or still uses 128-bit alignment. */
2836 if (TARGET_VSX
&& !TARGET_VSX_ALIGN_128
)
2847 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2848 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2849 if (TARGET_FLOAT128_TYPE
)
2851 rs6000_vector_mem
[KFmode
] = VECTOR_VSX
;
2852 rs6000_vector_align
[KFmode
] = 128;
2854 if (FLOAT128_IEEE_P (TFmode
))
2856 rs6000_vector_mem
[TFmode
] = VECTOR_VSX
;
2857 rs6000_vector_align
[TFmode
] = 128;
2861 /* V2DF mode, VSX only. */
2864 rs6000_vector_unit
[V2DFmode
] = VECTOR_VSX
;
2865 rs6000_vector_mem
[V2DFmode
] = VECTOR_VSX
;
2866 rs6000_vector_align
[V2DFmode
] = align64
;
2869 /* V4SF mode, either VSX or Altivec. */
2872 rs6000_vector_unit
[V4SFmode
] = VECTOR_VSX
;
2873 rs6000_vector_mem
[V4SFmode
] = VECTOR_VSX
;
2874 rs6000_vector_align
[V4SFmode
] = align32
;
2876 else if (TARGET_ALTIVEC
)
2878 rs6000_vector_unit
[V4SFmode
] = VECTOR_ALTIVEC
;
2879 rs6000_vector_mem
[V4SFmode
] = VECTOR_ALTIVEC
;
2880 rs6000_vector_align
[V4SFmode
] = align32
;
2883 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2887 rs6000_vector_unit
[V4SImode
] = VECTOR_ALTIVEC
;
2888 rs6000_vector_unit
[V8HImode
] = VECTOR_ALTIVEC
;
2889 rs6000_vector_unit
[V16QImode
] = VECTOR_ALTIVEC
;
2890 rs6000_vector_align
[V4SImode
] = align32
;
2891 rs6000_vector_align
[V8HImode
] = align32
;
2892 rs6000_vector_align
[V16QImode
] = align32
;
2896 rs6000_vector_mem
[V4SImode
] = VECTOR_VSX
;
2897 rs6000_vector_mem
[V8HImode
] = VECTOR_VSX
;
2898 rs6000_vector_mem
[V16QImode
] = VECTOR_VSX
;
2902 rs6000_vector_mem
[V4SImode
] = VECTOR_ALTIVEC
;
2903 rs6000_vector_mem
[V8HImode
] = VECTOR_ALTIVEC
;
2904 rs6000_vector_mem
[V16QImode
] = VECTOR_ALTIVEC
;
2908 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2909 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2912 rs6000_vector_mem
[V2DImode
] = VECTOR_VSX
;
2913 rs6000_vector_unit
[V2DImode
]
2914 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2915 rs6000_vector_align
[V2DImode
] = align64
;
2917 rs6000_vector_mem
[V1TImode
] = VECTOR_VSX
;
2918 rs6000_vector_unit
[V1TImode
]
2919 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2920 rs6000_vector_align
[V1TImode
] = 128;
2923 /* DFmode, see if we want to use the VSX unit. Memory is handled
2924 differently, so don't set rs6000_vector_mem. */
2927 rs6000_vector_unit
[DFmode
] = VECTOR_VSX
;
2928 rs6000_vector_align
[DFmode
] = 64;
2931 /* SFmode, see if we want to use the VSX unit. */
2932 if (TARGET_P8_VECTOR
)
2934 rs6000_vector_unit
[SFmode
] = VECTOR_VSX
;
2935 rs6000_vector_align
[SFmode
] = 32;
2938 /* Allow TImode in VSX register and set the VSX memory macros. */
2941 rs6000_vector_mem
[TImode
] = VECTOR_VSX
;
2942 rs6000_vector_align
[TImode
] = align64
;
2945 /* Add support for vector pairs and vector quad registers. */
2948 rs6000_vector_unit
[OOmode
] = VECTOR_NONE
;
2949 rs6000_vector_mem
[OOmode
] = VECTOR_VSX
;
2950 rs6000_vector_align
[OOmode
] = 256;
2952 rs6000_vector_unit
[XOmode
] = VECTOR_NONE
;
2953 rs6000_vector_mem
[XOmode
] = VECTOR_VSX
;
2954 rs6000_vector_align
[XOmode
] = 512;
2957 /* Register class constraints for the constraints that depend on compile
2958 switches. When the VSX code was added, different constraints were added
2959 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2960 of the VSX registers are used. The register classes for scalar floating
2961 point types is set, based on whether we allow that type into the upper
2962 (Altivec) registers. GCC has register classes to target the Altivec
2963 registers for load/store operations, to select using a VSX memory
2964 operation instead of the traditional floating point operation. The
2967 d - Register class to use with traditional DFmode instructions.
2968 v - Altivec register.
2969 wa - Any VSX register.
2970 wc - Reserved to represent individual CR bits (used in LLVM).
2971 wn - always NO_REGS.
2972 wr - GPR if 64-bit mode is permitted.
2973 wx - Float register if we can do 32-bit int stores. */
2975 if (TARGET_HARD_FLOAT
)
2976 rs6000_constraints
[RS6000_CONSTRAINT_d
] = FLOAT_REGS
;
2978 rs6000_constraints
[RS6000_CONSTRAINT_v
] = ALTIVEC_REGS
;
2980 rs6000_constraints
[RS6000_CONSTRAINT_wa
] = VSX_REGS
;
2982 if (TARGET_POWERPC64
)
2984 rs6000_constraints
[RS6000_CONSTRAINT_wr
] = GENERAL_REGS
;
2985 rs6000_constraints
[RS6000_CONSTRAINT_wA
] = BASE_REGS
;
2989 rs6000_constraints
[RS6000_CONSTRAINT_wx
] = FLOAT_REGS
; /* DImode */
2991 /* Support for new direct moves (ISA 3.0 + 64bit). */
2992 if (TARGET_DIRECT_MOVE_128
)
2993 rs6000_constraints
[RS6000_CONSTRAINT_we
] = VSX_REGS
;
2995 /* Set up the reload helper and direct move functions. */
2996 if (TARGET_VSX
|| TARGET_ALTIVEC
)
3000 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_di_store
;
3001 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_di_load
;
3002 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_di_store
;
3003 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_di_load
;
3004 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_di_store
;
3005 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_di_load
;
3006 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_di_store
;
3007 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_di_load
;
3008 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_di_store
;
3009 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_di_load
;
3010 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_di_store
;
3011 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_di_load
;
3012 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_di_store
;
3013 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_di_load
;
3014 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_di_store
;
3015 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_di_load
;
3016 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_di_store
;
3017 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_di_load
;
3018 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_di_store
;
3019 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_di_load
;
3021 if (FLOAT128_VECTOR_P (KFmode
))
3023 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_di_store
;
3024 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_di_load
;
3027 if (FLOAT128_VECTOR_P (TFmode
))
3029 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_di_store
;
3030 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_di_load
;
3033 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3035 if (TARGET_NO_SDMODE_STACK
)
3037 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_di_store
;
3038 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_di_load
;
3043 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_di_store
;
3044 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_di_load
;
3047 if (TARGET_DIRECT_MOVE
&& !TARGET_DIRECT_MOVE_128
)
3049 reg_addr
[TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxti
;
3050 reg_addr
[V1TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv1ti
;
3051 reg_addr
[V2DFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2df
;
3052 reg_addr
[V2DImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2di
;
3053 reg_addr
[V4SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4sf
;
3054 reg_addr
[V4SImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4si
;
3055 reg_addr
[V8HImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv8hi
;
3056 reg_addr
[V16QImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv16qi
;
3057 reg_addr
[SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxsf
;
3059 reg_addr
[TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprti
;
3060 reg_addr
[V1TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv1ti
;
3061 reg_addr
[V2DFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2df
;
3062 reg_addr
[V2DImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2di
;
3063 reg_addr
[V4SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4sf
;
3064 reg_addr
[V4SImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4si
;
3065 reg_addr
[V8HImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv8hi
;
3066 reg_addr
[V16QImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv16qi
;
3067 reg_addr
[SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprsf
;
3069 if (FLOAT128_VECTOR_P (KFmode
))
3071 reg_addr
[KFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxkf
;
3072 reg_addr
[KFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprkf
;
3075 if (FLOAT128_VECTOR_P (TFmode
))
3077 reg_addr
[TFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxtf
;
3078 reg_addr
[TFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprtf
;
3083 reg_addr
[OOmode
].reload_store
= CODE_FOR_reload_oo_di_store
;
3084 reg_addr
[OOmode
].reload_load
= CODE_FOR_reload_oo_di_load
;
3085 reg_addr
[XOmode
].reload_store
= CODE_FOR_reload_xo_di_store
;
3086 reg_addr
[XOmode
].reload_load
= CODE_FOR_reload_xo_di_load
;
3092 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_si_store
;
3093 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_si_load
;
3094 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_si_store
;
3095 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_si_load
;
3096 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_si_store
;
3097 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_si_load
;
3098 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_si_store
;
3099 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_si_load
;
3100 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_si_store
;
3101 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_si_load
;
3102 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_si_store
;
3103 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_si_load
;
3104 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_si_store
;
3105 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_si_load
;
3106 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_si_store
;
3107 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_si_load
;
3108 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_si_store
;
3109 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_si_load
;
3110 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_si_store
;
3111 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_si_load
;
3113 if (FLOAT128_VECTOR_P (KFmode
))
3115 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_si_store
;
3116 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_si_load
;
3119 if (FLOAT128_IEEE_P (TFmode
))
3121 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_si_store
;
3122 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_si_load
;
3125 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3127 if (TARGET_NO_SDMODE_STACK
)
3129 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_si_store
;
3130 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_si_load
;
3135 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_si_store
;
3136 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_si_load
;
3139 if (TARGET_DIRECT_MOVE
)
3141 reg_addr
[DImode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdi
;
3142 reg_addr
[DDmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdd
;
3143 reg_addr
[DFmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdf
;
3147 reg_addr
[DFmode
].scalar_in_vmx_p
= true;
3148 reg_addr
[DImode
].scalar_in_vmx_p
= true;
3150 if (TARGET_P8_VECTOR
)
3152 reg_addr
[SFmode
].scalar_in_vmx_p
= true;
3153 reg_addr
[SImode
].scalar_in_vmx_p
= true;
3155 if (TARGET_P9_VECTOR
)
3157 reg_addr
[HImode
].scalar_in_vmx_p
= true;
3158 reg_addr
[QImode
].scalar_in_vmx_p
= true;
3163 /* Precalculate HARD_REGNO_NREGS. */
3164 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3165 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3166 rs6000_hard_regno_nregs
[m
][r
]
3167 = rs6000_hard_regno_nregs_internal (r
, (machine_mode
) m
);
3169 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3170 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3171 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3172 rs6000_hard_regno_mode_ok_p
[m
][r
]
3173 = rs6000_hard_regno_mode_ok_uncached (r
, (machine_mode
) m
);
3175 /* Precalculate CLASS_MAX_NREGS sizes. */
3176 for (c
= 0; c
< LIM_REG_CLASSES
; ++c
)
3180 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
))
3181 reg_size
= UNITS_PER_VSX_WORD
;
3183 else if (c
== ALTIVEC_REGS
)
3184 reg_size
= UNITS_PER_ALTIVEC_WORD
;
3186 else if (c
== FLOAT_REGS
)
3187 reg_size
= UNITS_PER_FP_WORD
;
3190 reg_size
= UNITS_PER_WORD
;
3192 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3194 machine_mode m2
= (machine_mode
)m
;
3195 int reg_size2
= reg_size
;
3197 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3199 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
) && FLOAT128_2REG_P (m
))
3200 reg_size2
= UNITS_PER_FP_WORD
;
3202 rs6000_class_max_nregs
[m
][c
]
3203 = (GET_MODE_SIZE (m2
) + reg_size2
- 1) / reg_size2
;
3207 /* Calculate which modes to automatically generate code to use a the
3208 reciprocal divide and square root instructions. In the future, possibly
3209 automatically generate the instructions even if the user did not specify
3210 -mrecip. The older machines double precision reciprocal sqrt estimate is
3211 not accurate enough. */
3212 memset (rs6000_recip_bits
, 0, sizeof (rs6000_recip_bits
));
3214 rs6000_recip_bits
[SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3216 rs6000_recip_bits
[DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3217 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3218 rs6000_recip_bits
[V4SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3219 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3220 rs6000_recip_bits
[V2DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3222 if (TARGET_FRSQRTES
)
3223 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3225 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3226 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3227 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3228 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3229 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3231 if (rs6000_recip_control
)
3233 if (!flag_finite_math_only
)
3234 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3236 if (flag_trapping_math
)
3237 warning (0, "%qs requires %qs or %qs", "-mrecip",
3238 "-fno-trapping-math", "-ffast-math");
3239 if (!flag_reciprocal_math
)
3240 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3242 if (flag_finite_math_only
&& !flag_trapping_math
&& flag_reciprocal_math
)
3244 if (RS6000_RECIP_HAVE_RE_P (SFmode
)
3245 && (rs6000_recip_control
& RECIP_SF_DIV
) != 0)
3246 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3248 if (RS6000_RECIP_HAVE_RE_P (DFmode
)
3249 && (rs6000_recip_control
& RECIP_DF_DIV
) != 0)
3250 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3252 if (RS6000_RECIP_HAVE_RE_P (V4SFmode
)
3253 && (rs6000_recip_control
& RECIP_V4SF_DIV
) != 0)
3254 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3256 if (RS6000_RECIP_HAVE_RE_P (V2DFmode
)
3257 && (rs6000_recip_control
& RECIP_V2DF_DIV
) != 0)
3258 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3260 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode
)
3261 && (rs6000_recip_control
& RECIP_SF_RSQRT
) != 0)
3262 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3264 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode
)
3265 && (rs6000_recip_control
& RECIP_DF_RSQRT
) != 0)
3266 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3268 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode
)
3269 && (rs6000_recip_control
& RECIP_V4SF_RSQRT
) != 0)
3270 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3272 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode
)
3273 && (rs6000_recip_control
& RECIP_V2DF_RSQRT
) != 0)
3274 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3278 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3279 legitimate address support to figure out the appropriate addressing to
3281 rs6000_setup_reg_addr_masks ();
3283 if (global_init_p
|| TARGET_DEBUG_TARGET
)
3285 if (TARGET_DEBUG_REG
)
3286 rs6000_debug_reg_global ();
3288 if (TARGET_DEBUG_COST
|| TARGET_DEBUG_REG
)
3290 "SImode variable mult cost = %d\n"
3291 "SImode constant mult cost = %d\n"
3292 "SImode short constant mult cost = %d\n"
3293 "DImode multipliciation cost = %d\n"
3294 "SImode division cost = %d\n"
3295 "DImode division cost = %d\n"
3296 "Simple fp operation cost = %d\n"
3297 "DFmode multiplication cost = %d\n"
3298 "SFmode division cost = %d\n"
3299 "DFmode division cost = %d\n"
3300 "cache line size = %d\n"
3301 "l1 cache size = %d\n"
3302 "l2 cache size = %d\n"
3303 "simultaneous prefetches = %d\n"
3306 rs6000_cost
->mulsi_const
,
3307 rs6000_cost
->mulsi_const9
,
3315 rs6000_cost
->cache_line_size
,
3316 rs6000_cost
->l1_cache_size
,
3317 rs6000_cost
->l2_cache_size
,
3318 rs6000_cost
->simultaneous_prefetches
);
3323 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3326 darwin_rs6000_override_options (void)
3328 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3330 rs6000_altivec_abi
= 1;
3331 TARGET_ALTIVEC_VRSAVE
= 1;
3332 rs6000_current_abi
= ABI_DARWIN
;
3334 if (DEFAULT_ABI
== ABI_DARWIN
3336 darwin_one_byte_bool
= 1;
3338 if (TARGET_64BIT
&& ! TARGET_POWERPC64
)
3340 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3341 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3344 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3345 optimisation, and will not work with the most generic case (where the
3346 symbol is undefined external, but there is no symbl stub). */
3348 rs6000_default_long_calls
= 0;
3350 /* ld_classic is (so far) still used for kernel (static) code, and supports
3351 the JBSR longcall / branch islands. */
3354 rs6000_default_long_calls
= 1;
3356 /* Allow a kext author to do -mkernel -mhard-float. */
3357 if (! (rs6000_isa_flags_explicit
& OPTION_MASK_SOFT_FLOAT
))
3358 rs6000_isa_flags
|= OPTION_MASK_SOFT_FLOAT
;
3361 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3363 if (!flag_mkernel
&& !flag_apple_kext
3365 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3366 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3368 /* Unless the user (not the configurer) has explicitly overridden
3369 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3370 G4 unless targeting the kernel. */
3373 && strverscmp (darwin_macosx_version_min
, "10.5") >= 0
3374 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
)
3375 && ! OPTION_SET_P (rs6000_cpu_index
))
3377 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3382 /* If not otherwise specified by a target, make 'long double' equivalent to
3385 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3386 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3389 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3390 to clobber the XER[CA] bit because clobbering that bit without telling
3391 the compiler worked just fine with versions of GCC before GCC 5, and
3392 breaking a lot of older code in ways that are hard to track down is
3393 not such a great idea. */
3396 rs6000_md_asm_adjust (vec
<rtx
> & /*outputs*/, vec
<rtx
> & /*inputs*/,
3397 vec
<machine_mode
> & /*input_modes*/,
3398 vec
<const char *> & /*constraints*/,
3399 vec
<rtx
> &/*uses*/, vec
<rtx
> &clobbers
,
3400 HARD_REG_SET
&clobbered_regs
, location_t
/*loc*/)
3402 clobbers
.safe_push (gen_rtx_REG (SImode
, CA_REGNO
));
3403 SET_HARD_REG_BIT (clobbered_regs
, CA_REGNO
);
3407 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3408 but is called when the optimize level is changed via an attribute or
3409 pragma or when it is reset at the end of the code affected by the
3410 attribute or pragma. It is not called at the beginning of compilation
3411 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3412 actions then, you should have TARGET_OPTION_OVERRIDE call
3413 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3416 rs6000_override_options_after_change (void)
3418 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3419 turns -frename-registers on. */
3420 if ((OPTION_SET_P (flag_unroll_loops
) && flag_unroll_loops
)
3421 || (OPTION_SET_P (flag_unroll_all_loops
)
3422 && flag_unroll_all_loops
))
3424 if (!OPTION_SET_P (unroll_only_small_loops
))
3425 unroll_only_small_loops
= 0;
3426 if (!OPTION_SET_P (flag_rename_registers
))
3427 flag_rename_registers
= 1;
3428 if (!OPTION_SET_P (flag_cunroll_grow_size
))
3429 flag_cunroll_grow_size
= 1;
3431 else if (!OPTION_SET_P (flag_cunroll_grow_size
))
3432 flag_cunroll_grow_size
= flag_peel_loops
|| optimize
>= 3;
3434 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3435 if (rs6000_rop_protect
)
3436 flag_shrink_wrap
= 0;
3438 /* One of the late-combine passes runs after register allocation
3439 and can match define_insn_and_splits that were previously used
3440 only before register allocation. Some of those define_insn_and_splits
3441 use gen_reg_rtx unconditionally. Disable late-combine by default
3442 until the define_insn_and_splits are fixed. */
3443 if (!OPTION_SET_P (flag_late_combine_instructions
))
3444 flag_late_combine_instructions
= 0;
3447 #ifdef TARGET_USES_LINUX64_OPT
3449 rs6000_linux64_override_options ()
3451 if (!OPTION_SET_P (rs6000_alignment_flags
))
3452 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
3453 if (rs6000_isa_flags
& OPTION_MASK_64BIT
)
3455 if (DEFAULT_ABI
!= ABI_AIX
)
3457 rs6000_current_abi
= ABI_AIX
;
3458 error (INVALID_64BIT
, "call");
3460 dot_symbols
= !strcmp (rs6000_abi_name
, "aixdesc");
3461 if (ELFv2_ABI_CHECK
)
3463 rs6000_current_abi
= ABI_ELFv2
;
3465 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3467 if (rs6000_isa_flags
& OPTION_MASK_RELOCATABLE
)
3469 rs6000_isa_flags
&= ~OPTION_MASK_RELOCATABLE
;
3470 error (INVALID_64BIT
, "relocatable");
3472 if (rs6000_isa_flags
& OPTION_MASK_EABI
)
3474 rs6000_isa_flags
&= ~OPTION_MASK_EABI
;
3475 error (INVALID_64BIT
, "eabi");
3477 if (TARGET_PROTOTYPE
)
3479 target_prototype
= 0;
3480 error (INVALID_64BIT
, "prototype");
3482 if ((rs6000_isa_flags
& OPTION_MASK_POWERPC64
) == 0)
3484 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3485 error ("%<-m64%> requires a PowerPC64 cpu");
3487 if (!OPTION_SET_P (rs6000_current_cmodel
))
3488 SET_CMODEL (CMODEL_MEDIUM
);
3489 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MINIMAL_TOC
) != 0)
3491 if (OPTION_SET_P (rs6000_current_cmodel
)
3492 && rs6000_current_cmodel
!= CMODEL_SMALL
)
3493 error ("%<-mcmodel%> incompatible with other toc options");
3494 if (TARGET_MINIMAL_TOC
)
3495 SET_CMODEL (CMODEL_SMALL
);
3496 else if (TARGET_PCREL
3497 || (PCREL_SUPPORTED_BY_OS
3498 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0))
3499 /* Ignore -mno-minimal-toc. */
3502 SET_CMODEL (CMODEL_SMALL
);
3504 if (rs6000_current_cmodel
!= CMODEL_SMALL
)
3506 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
3507 TARGET_NO_FP_IN_TOC
= rs6000_current_cmodel
== CMODEL_MEDIUM
;
3508 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC
))
3509 TARGET_NO_SUM_IN_TOC
= 0;
3511 if (TARGET_PLTSEQ
&& DEFAULT_ABI
!= ABI_ELFv2
)
3513 if (OPTION_SET_P (rs6000_pltseq
))
3514 warning (0, "%qs unsupported for this ABI",
3516 rs6000_pltseq
= false;
3519 else if (TARGET_64BIT
)
3520 error (INVALID_32BIT
, "32");
3523 if (TARGET_PROFILE_KERNEL
)
3526 error (INVALID_32BIT
, "profile-kernel");
3528 if (OPTION_SET_P (rs6000_current_cmodel
))
3530 SET_CMODEL (CMODEL_SMALL
);
3531 error (INVALID_32BIT
, "cmodel");
3537 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3538 This support is only in little endian GLIBC 2.32 or newer. */
3540 glibc_supports_ieee_128bit (void)
3543 if (OPTION_GLIBC
&& !BYTES_BIG_ENDIAN
3544 && ((TARGET_GLIBC_MAJOR
* 1000) + TARGET_GLIBC_MINOR
) >= 2032)
3546 #endif /* OPTION_GLIBC. */
3551 /* Override command line options.
3553 Combine build-specific configuration information with options
3554 specified on the command line to set various state variables which
3555 influence code generation, optimization, and expansion of built-in
3556 functions. Assure that command-line configuration preferences are
3557 compatible with each other and with the build configuration; issue
3558 warnings while adjusting configuration or error messages while
3559 rejecting configuration.
3561 Upon entry to this function:
3563 This function is called once at the beginning of
3564 compilation, and then again at the start and end of compiling
3565 each section of code that has a different configuration, as
3566 indicated, for example, by adding the
3568 __attribute__((__target__("cpu=power9")))
3570 qualifier to a function definition or, for example, by bracketing
3573 #pragma GCC target("altivec")
3577 #pragma GCC reset_options
3579 directives. Parameter global_init_p is true for the initial
3580 invocation, which initializes global variables, and false for all
3581 subsequent invocations.
3584 Various global state information is assumed to be valid. This
3585 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3586 default CPU specified at build configure time, TARGET_DEFAULT,
3587 representing the default set of option flags for the default
3588 target, and OPTION_SET_P (rs6000_isa_flags), representing
3589 which options were requested on the command line.
3591 Upon return from this function:
3593 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3594 was set by name on the command line. Additionally, if certain
3595 attributes are automatically enabled or disabled by this function
3596 in order to assure compatibility between options and
3597 configuration, the flags associated with those attributes are
3598 also set. By setting these "explicit bits", we avoid the risk
3599 that other code might accidentally overwrite these particular
3600 attributes with "default values".
3602 The various bits of rs6000_isa_flags are set to indicate the
3603 target options that have been selected for the most current
3604 compilation efforts. This has the effect of also turning on the
3605 associated TARGET_XXX values since these are macros which are
3606 generally defined to test the corresponding bit of the
3607 rs6000_isa_flags variable.
3609 Various other global variables and fields of global structures
3610 (over 50 in all) are initialized to reflect the desired options
3611 for the most current compilation efforts. */
3614 rs6000_option_override_internal (bool global_init_p
)
3618 HOST_WIDE_INT set_masks
;
3619 HOST_WIDE_INT ignore_masks
;
3622 struct cl_target_option
*main_target_opt
3623 = ((global_init_p
|| target_option_default_node
== NULL
)
3624 ? NULL
: TREE_TARGET_OPTION (target_option_default_node
));
3626 /* Print defaults. */
3627 if ((TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
) && global_init_p
)
3628 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
3630 /* Remember the explicit arguments. */
3632 rs6000_isa_flags_explicit
= OPTION_SET_P (rs6000_isa_flags
);
3634 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3635 library functions, so warn about it. The flag may be useful for
3636 performance studies from time to time though, so don't disable it
3638 if (OPTION_SET_P (rs6000_alignment_flags
)
3639 && rs6000_alignment_flags
== MASK_ALIGN_POWER
3640 && DEFAULT_ABI
== ABI_DARWIN
3642 warning (0, "%qs is not supported for 64-bit Darwin;"
3643 " it is incompatible with the installed C and C++ libraries",
3646 /* Numerous experiment shows that IRA based loop pressure
3647 calculation works better for RTL loop invariant motion on targets
3648 with enough (>= 32) registers. It is an expensive optimization.
3649 So it is on only for peak performance. */
3650 if (optimize
>= 3 && global_init_p
3651 && !OPTION_SET_P (flag_ira_loop_pressure
))
3652 flag_ira_loop_pressure
= 1;
3654 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3655 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3656 options were already specified. */
3657 if (flag_sanitize
& SANITIZE_USER_ADDRESS
3658 && !OPTION_SET_P (flag_asynchronous_unwind_tables
))
3659 flag_asynchronous_unwind_tables
= 1;
3661 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3662 loop unroller is active. It is only checked during unrolling, so
3663 we can just set it on by default. */
3664 if (!OPTION_SET_P (flag_variable_expansion_in_unroller
))
3665 flag_variable_expansion_in_unroller
= 1;
3667 /* Set the pointer size. */
3670 rs6000_pmode
= DImode
;
3671 rs6000_pointer_size
= 64;
3675 rs6000_pmode
= SImode
;
3676 rs6000_pointer_size
= 32;
3679 /* Some OSs don't support saving Altivec registers. On those OSs, we don't
3680 touch the OPTION_MASK_ALTIVEC settings; if the user wants it, the user
3681 must explicitly specify it and we won't interfere with the user's
3684 set_masks
= POWERPC_MASKS
;
3685 #ifdef OS_MISSING_ALTIVEC
3686 if (OS_MISSING_ALTIVEC
)
3687 set_masks
&= ~(OPTION_MASK_ALTIVEC
| OPTION_MASK_VSX
3688 | OTHER_VSX_VECTOR_MASKS
);
3691 /* Don't override by the processor default if given explicitly. */
3692 set_masks
&= ~rs6000_isa_flags_explicit
;
3694 /* Without option powerpc64 specified explicitly, we need to ensure
3695 powerpc64 always enabled for 64 bit here, otherwise some following
3696 checks can use unexpected TARGET_POWERPC64 value. */
3697 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_POWERPC64
)
3700 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3701 /* Need to stop powerpc64 from being unset in later processing,
3702 so clear it in set_masks. But as PR108240 shows, to keep it
3703 consistent with before, we want to make this only if 64 bit
3704 is enabled explicitly. This is a hack, revisit this later. */
3705 if (rs6000_isa_flags_explicit
& OPTION_MASK_64BIT
)
3706 set_masks
&= ~OPTION_MASK_POWERPC64
;
3709 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3710 the cpu in a target attribute or pragma, but did not specify a tuning
3711 option, use the cpu for the tuning option rather than the option specified
3712 with -mtune on the command line. Process a '--with-cpu' configuration
3713 request as an implicit --cpu. */
3714 if (rs6000_cpu_index
>= 0)
3715 cpu_index
= rs6000_cpu_index
;
3716 else if (main_target_opt
!= NULL
&& main_target_opt
->x_rs6000_cpu_index
>= 0)
3717 cpu_index
= main_target_opt
->x_rs6000_cpu_index
;
3718 else if (OPTION_TARGET_CPU_DEFAULT
)
3719 cpu_index
= rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT
);
3721 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3722 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3723 with those from the cpu, except for options that were explicitly set. If
3724 we don't have a cpu, do not override the target bits set in
3728 rs6000_cpu_index
= cpu_index
;
3729 rs6000_isa_flags
&= ~set_masks
;
3730 rs6000_isa_flags
|= (processor_target_table
[cpu_index
].target_enable
3735 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3736 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3737 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3738 to using rs6000_isa_flags, we need to do the initialization here.
3740 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3741 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3742 HOST_WIDE_INT flags
;
3744 flags
= TARGET_DEFAULT
;
3747 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3748 const char *default_cpu
= (!TARGET_POWERPC64
3753 int default_cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
3754 flags
= processor_target_table
[default_cpu_index
].target_enable
;
3756 rs6000_isa_flags
|= (flags
& ~rs6000_isa_flags_explicit
);
3759 /* Don't expect powerpc64 enabled on those OSes with OS_MISSING_POWERPC64,
3760 since they do not save and restore the high half of the GPRs correctly
3761 in all cases. If the user explicitly specifies it, we won't interfere
3762 with the user's specification. */
3763 #ifdef OS_MISSING_POWERPC64
3764 if (OS_MISSING_POWERPC64
3767 && !(rs6000_isa_flags_explicit
& OPTION_MASK_POWERPC64
))
3768 rs6000_isa_flags
&= ~OPTION_MASK_POWERPC64
;
3771 if (rs6000_tune_index
>= 0)
3772 tune_index
= rs6000_tune_index
;
3773 else if (cpu_index
>= 0)
3774 rs6000_tune_index
= tune_index
= cpu_index
;
3778 enum processor_type tune_proc
3779 = (TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
);
3782 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
3783 if (processor_target_table
[i
].processor
== tune_proc
)
3791 rs6000_cpu
= processor_target_table
[cpu_index
].processor
;
3793 rs6000_cpu
= TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
;
3795 gcc_assert (tune_index
>= 0);
3796 rs6000_tune
= processor_target_table
[tune_index
].processor
;
3798 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
3799 || rs6000_cpu
== PROCESSOR_PPCE500MC
|| rs6000_cpu
== PROCESSOR_PPCE500MC64
3800 || rs6000_cpu
== PROCESSOR_PPCE5500
)
3803 error ("AltiVec not supported in this target");
3806 /* If we are optimizing big endian systems for space, use the load/store
3807 multiple instructions. */
3808 if (BYTES_BIG_ENDIAN
&& optimize_size
)
3809 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
;
3811 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3812 because the hardware doesn't support the instructions used in little
3813 endian mode, and causes an alignment trap. The 750 does not cause an
3814 alignment trap (except when the target is unaligned). */
3816 if (!BYTES_BIG_ENDIAN
&& rs6000_cpu
!= PROCESSOR_PPC750
&& TARGET_MULTIPLE
)
3818 rs6000_isa_flags
&= ~OPTION_MASK_MULTIPLE
;
3819 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
) != 0)
3820 warning (0, "%qs is not supported on little endian systems",
3824 /* If little-endian, default to -mstrict-align on older processors. */
3825 if (!BYTES_BIG_ENDIAN
3826 && !(processor_target_table
[tune_index
].target_enable
3827 & OPTION_MASK_POWER8
))
3828 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_STRICT_ALIGN
;
3830 /* Add some warnings for VSX. */
3833 bool explicit_vsx_p
= rs6000_isa_flags_explicit
& OPTION_MASK_VSX
;
3834 if (!TARGET_HARD_FLOAT
)
3838 if (rs6000_isa_flags_explicit
& OPTION_MASK_SOFT_FLOAT
)
3839 error ("%<-mvsx%> and %<-msoft-float%> are incompatible");
3841 warning (0, N_("%<-mvsx%> requires hardware floating-point"));
3843 rs6000_isa_flags
&= ~OPTION_MASK_VSX
;
3844 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3846 else if (TARGET_AVOID_XFORM
> 0)
3848 if (explicit_vsx_p
&& OPTION_SET_P (TARGET_AVOID_XFORM
))
3849 error ("%<-mvsx%> and %<-mavoid-indexed-addresses%>"
3850 " are incompatible");
3852 warning (0, N_("%<-mvsx%> needs indexed addressing"));
3853 rs6000_isa_flags
&= ~OPTION_MASK_VSX
;
3854 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3856 else if (!TARGET_ALTIVEC
3857 && (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3860 error ("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3862 warning (0, N_("%<-mno-altivec%> disables vsx"));
3863 rs6000_isa_flags
&= ~OPTION_MASK_VSX
;
3864 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3868 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3869 the -mcpu setting to enable options that conflict. */
3870 if ((!TARGET_HARD_FLOAT
|| !TARGET_ALTIVEC
|| !TARGET_VSX
)
3871 && (rs6000_isa_flags_explicit
& (OPTION_MASK_SOFT_FLOAT
3872 | OPTION_MASK_ALTIVEC
3873 | OPTION_MASK_VSX
)) != 0)
3874 rs6000_isa_flags
&= ~((OPTION_MASK_P8_VECTOR
| OPTION_MASK_CRYPTO
)
3875 & ~rs6000_isa_flags_explicit
);
3877 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
3878 rs6000_print_isa_options (stderr
, 0, "before defaults", rs6000_isa_flags
);
3880 #ifdef XCOFF_DEBUGGING_INFO
3881 /* For AIX default to 64-bit DWARF. */
3882 if (!OPTION_SET_P (dwarf_offset_size
))
3883 dwarf_offset_size
= POINTER_SIZE_UNITS
;
3886 /* Handle explicit -mno-{altivec,vsx} and turn off all of
3887 the options that depend on those flags. */
3888 ignore_masks
= rs6000_disable_incompatible_switches ();
3890 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3891 unless the user explicitly used the -mno-<option> to disable the code. */
3892 if (TARGET_P9_VECTOR
|| TARGET_MODULO
|| TARGET_P9_MISC
)
3893 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3894 else if (TARGET_P9_MINMAX
)
3898 if (cpu_index
== PROCESSOR_POWER9
)
3900 /* legacy behavior: allow -mcpu=power9 with certain
3901 capabilities explicitly disabled. */
3902 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3905 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3906 "for <xxx> less than power9", "-mcpu");
3908 else if ((ISA_3_0_MASKS_SERVER
& rs6000_isa_flags_explicit
)
3909 != (ISA_3_0_MASKS_SERVER
& rs6000_isa_flags
3910 & rs6000_isa_flags_explicit
))
3911 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3912 were explicitly cleared. */
3913 error ("%qs incompatible with explicitly disabled options",
3916 rs6000_isa_flags
|= ISA_3_0_MASKS_SERVER
;
3918 else if (TARGET_P8_VECTOR
|| TARGET_POWER8
|| TARGET_CRYPTO
)
3919 rs6000_isa_flags
|= (ISA_2_7_MASKS_SERVER
& ~ignore_masks
);
3920 else if (TARGET_VSX
)
3921 rs6000_isa_flags
|= (ISA_2_6_MASKS_SERVER
& ~ignore_masks
);
3922 else if (TARGET_POPCNTD
)
3923 rs6000_isa_flags
|= (ISA_2_6_MASKS_EMBEDDED
& ~ignore_masks
);
3924 else if (TARGET_DFP
)
3925 rs6000_isa_flags
|= (ISA_2_5_MASKS_SERVER
& ~ignore_masks
);
3926 else if (TARGET_CMPB
)
3927 rs6000_isa_flags
|= (ISA_2_5_MASKS_EMBEDDED
& ~ignore_masks
);
3928 else if (TARGET_FPRND
)
3929 rs6000_isa_flags
|= (ISA_2_4_MASKS
& ~ignore_masks
);
3930 else if (TARGET_POPCNTB
)
3931 rs6000_isa_flags
|= (ISA_2_2_MASKS
& ~ignore_masks
);
3932 else if (TARGET_ALTIVEC
)
3933 rs6000_isa_flags
|= (OPTION_MASK_PPC_GFXOPT
& ~ignore_masks
);
3935 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3936 target attribute or pragma which automatically enables both options,
3937 unless the altivec ABI was set. This is set by default for 64-bit, but
3938 not for 32-bit. Don't move this before the above code using ignore_masks,
3939 since it can reset the cleared VSX/ALTIVEC flag again. */
3940 if (main_target_opt
&& !main_target_opt
->x_rs6000_altivec_abi
)
3942 rs6000_isa_flags
&= ~(OPTION_MASK_VSX
& ~rs6000_isa_flags_explicit
);
3943 /* Don't mask off ALTIVEC if it is enabled by an explicit VSX. */
3945 rs6000_isa_flags
&= ~(OPTION_MASK_ALTIVEC
& ~rs6000_isa_flags_explicit
);
3948 if (TARGET_CRYPTO
&& !TARGET_ALTIVEC
)
3950 if (rs6000_isa_flags_explicit
& OPTION_MASK_CRYPTO
)
3951 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3952 rs6000_isa_flags
&= ~OPTION_MASK_CRYPTO
;
3955 if (!TARGET_FPRND
&& TARGET_VSX
)
3957 if (rs6000_isa_flags_explicit
& OPTION_MASK_FPRND
)
3958 /* TARGET_VSX = 1 implies Power 7 and newer */
3959 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3960 rs6000_isa_flags
&= ~OPTION_MASK_FPRND
;
3963 /* Assert !TARGET_VSX if !TARGET_ALTIVEC and make some adjustments
3964 based on either !TARGET_VSX or !TARGET_ALTIVEC concise. */
3965 gcc_assert (TARGET_ALTIVEC
|| !TARGET_VSX
);
3967 if (TARGET_P8_VECTOR
&& !TARGET_VSX
)
3968 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3970 if (TARGET_DFP
&& !TARGET_HARD_FLOAT
)
3972 if (rs6000_isa_flags_explicit
& OPTION_MASK_DFP
)
3973 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3974 rs6000_isa_flags
&= ~OPTION_MASK_DFP
;
3977 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3978 silently turn off quad memory mode. */
3979 if ((TARGET_QUAD_MEMORY
|| TARGET_QUAD_MEMORY_ATOMIC
) && !TARGET_POWERPC64
)
3981 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3982 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3984 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) != 0)
3985 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3987 rs6000_isa_flags
&= ~(OPTION_MASK_QUAD_MEMORY
3988 | OPTION_MASK_QUAD_MEMORY_ATOMIC
);
3991 /* Non-atomic quad memory load/store are disabled for little endian, since
3992 the words are reversed, but atomic operations can still be done by
3993 swapping the words. */
3994 if (TARGET_QUAD_MEMORY
&& !WORDS_BIG_ENDIAN
)
3996 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3997 warning (0, N_("%<-mquad-memory%> is not available in little endian "
4000 rs6000_isa_flags
&= ~OPTION_MASK_QUAD_MEMORY
;
4003 /* Assume if the user asked for normal quad memory instructions, they want
4004 the atomic versions as well, unless they explicity told us not to use quad
4005 word atomic instructions. */
4006 if (TARGET_QUAD_MEMORY
4007 && !TARGET_QUAD_MEMORY_ATOMIC
4008 && ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) == 0))
4009 rs6000_isa_flags
|= OPTION_MASK_QUAD_MEMORY_ATOMIC
;
4011 /* If we can shrink-wrap the TOC register save separately, then use
4012 -msave-toc-indirect unless explicitly disabled. */
4013 if ((rs6000_isa_flags_explicit
& OPTION_MASK_SAVE_TOC_INDIRECT
) == 0
4014 && flag_shrink_wrap_separate
4015 && optimize_function_for_speed_p (cfun
))
4016 rs6000_isa_flags
|= OPTION_MASK_SAVE_TOC_INDIRECT
;
4018 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4019 generating power8 instructions. Power9 does not optimize power8 fusion
4021 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
))
4023 if (processor_target_table
[tune_index
].processor
== PROCESSOR_POWER8
)
4024 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4026 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4029 /* Setting additional fusion flags turns on base fusion. */
4030 if (!TARGET_P8_FUSION
&& TARGET_P8_FUSION_SIGN
)
4032 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4034 if (TARGET_P8_FUSION_SIGN
)
4035 error ("%qs requires %qs", "-mpower8-fusion-sign",
4038 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4041 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4044 /* Power8 does not fuse sign extended loads with the addis. If we are
4045 optimizing at high levels for speed, convert a sign extended load into a
4046 zero extending load, and an explicit sign extension. */
4047 if (TARGET_P8_FUSION
4048 && !(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION_SIGN
)
4049 && optimize_function_for_speed_p (cfun
)
4051 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION_SIGN
;
4053 /* ISA 3.0 vector instructions include ISA 2.07. */
4054 if (TARGET_P9_VECTOR
&& !TARGET_P8_VECTOR
)
4055 rs6000_isa_flags
&= ~OPTION_MASK_P9_VECTOR
;
4057 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4058 support. If we only have ISA 2.06 support, and the user did not specify
4059 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4060 but we don't enable the full vectorization support */
4061 if (TARGET_ALLOW_MOVMISALIGN
== -1 && TARGET_P8_VECTOR
&& TARGET_DIRECT_MOVE
)
4062 TARGET_ALLOW_MOVMISALIGN
= 1;
4064 else if (TARGET_ALLOW_MOVMISALIGN
&& !TARGET_VSX
)
4066 if (TARGET_ALLOW_MOVMISALIGN
> 0
4067 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN
))
4068 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4070 TARGET_ALLOW_MOVMISALIGN
= 0;
4073 /* Determine when unaligned vector accesses are permitted, and when
4074 they are preferred over masked Altivec loads. Note that if
4075 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4076 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4078 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4082 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4083 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4085 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4088 else if (!TARGET_ALLOW_MOVMISALIGN
)
4090 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4091 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4092 "-mallow-movmisalign");
4094 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4098 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
))
4100 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4101 rs6000_isa_flags
|= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4103 rs6000_isa_flags
&= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4106 /* Use long double size to select the appropriate long double. We use
4107 TYPE_PRECISION to differentiate the 3 different long double types. We map
4108 128 into the precision used for TFmode. */
4109 int default_long_double_size
= (RS6000_DEFAULT_LONG_DOUBLE_SIZE
== 64
4113 /* Set long double size before the IEEE 128-bit tests. */
4114 if (!OPTION_SET_P (rs6000_long_double_type_size
))
4116 if (main_target_opt
!= NULL
4117 && (main_target_opt
->x_rs6000_long_double_type_size
4118 != default_long_double_size
))
4119 error ("target attribute or pragma changes %<long double%> size");
4121 rs6000_long_double_type_size
= default_long_double_size
;
4124 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4125 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4126 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4127 those systems will not pick up this default. Warn if the user changes the
4128 default unless -Wno-psabi. */
4129 if (!OPTION_SET_P (rs6000_ieeequad
))
4130 rs6000_ieeequad
= TARGET_IEEEQUAD_DEFAULT
;
4132 else if (TARGET_LONG_DOUBLE_128
)
4134 if (global_options
.x_rs6000_ieeequad
4135 && (!TARGET_POPCNTD
|| !TARGET_VSX
))
4136 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4138 if (rs6000_ieeequad
!= TARGET_IEEEQUAD_DEFAULT
)
4140 /* Determine if the user can change the default long double type at
4141 compilation time. You need GLIBC 2.32 or newer to be able to
4142 change the long double type. Only issue one warning. */
4143 static bool warned_change_long_double
;
4145 if (!warned_change_long_double
&& !glibc_supports_ieee_128bit ())
4147 warned_change_long_double
= true;
4148 if (TARGET_IEEEQUAD
)
4149 warning (OPT_Wpsabi
, "Using IEEE extended precision "
4152 warning (OPT_Wpsabi
, "Using IBM extended precision "
4158 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4159 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4160 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4161 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4162 the keyword as well as the type. */
4163 TARGET_FLOAT128_TYPE
= TARGET_FLOAT128_ENABLE_TYPE
&& TARGET_VSX
;
4165 /* IEEE 128-bit floating point requires VSX support. */
4166 if (TARGET_FLOAT128_KEYWORD
)
4170 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4171 error ("%qs requires VSX support", "-mfloat128");
4173 TARGET_FLOAT128_TYPE
= 0;
4174 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_KEYWORD
4175 | OPTION_MASK_FLOAT128_HW
);
4177 else if (!TARGET_FLOAT128_TYPE
)
4179 TARGET_FLOAT128_TYPE
= 1;
4180 warning (0, "The %<-mfloat128%> option may not be fully supported");
4184 /* Enable the __float128 keyword under Linux by default. */
4185 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_KEYWORD
4186 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) == 0)
4187 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_KEYWORD
;
4189 /* If we have are supporting the float128 type and full ISA 3.0 support,
4190 enable -mfloat128-hardware by default. However, don't enable the
4191 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4192 because sometimes the compiler wants to put things in an integer
4193 container, and if we don't have __int128 support, it is impossible. */
4194 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_HW
&& TARGET_64BIT
4195 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) == ISA_3_0_MASKS_IEEE
4196 && !(rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
))
4197 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_HW
;
4199 if (TARGET_FLOAT128_HW
4200 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) != ISA_3_0_MASKS_IEEE
)
4202 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4203 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4205 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4208 if (TARGET_FLOAT128_HW
&& !TARGET_64BIT
)
4210 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4211 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4213 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4216 /* Enable -mprefixed by default on power10 systems. */
4217 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) == 0)
4218 rs6000_isa_flags
|= OPTION_MASK_PREFIXED
;
4220 /* -mprefixed requires -mcpu=power10 (or later). */
4221 else if (TARGET_PREFIXED
&& !TARGET_POWER10
)
4223 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) != 0)
4224 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4226 rs6000_isa_flags
&= ~OPTION_MASK_PREFIXED
;
4229 /* -mpcrel requires prefixed load/store addressing. */
4230 if (TARGET_PCREL
&& !TARGET_PREFIXED
)
4232 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4233 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4235 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4238 /* Print the options after updating the defaults. */
4239 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4240 rs6000_print_isa_options (stderr
, 0, "after defaults", rs6000_isa_flags
);
4242 /* E500mc does "better" if we inline more aggressively. Respect the
4243 user's opinion, though. */
4244 if (rs6000_block_move_inline_limit
== 0
4245 && (rs6000_tune
== PROCESSOR_PPCE500MC
4246 || rs6000_tune
== PROCESSOR_PPCE500MC64
4247 || rs6000_tune
== PROCESSOR_PPCE5500
4248 || rs6000_tune
== PROCESSOR_PPCE6500
))
4249 rs6000_block_move_inline_limit
= 128;
4251 /* store_one_arg depends on expand_block_move to handle at least the
4252 size of reg_parm_stack_space. */
4253 if (rs6000_block_move_inline_limit
< (TARGET_POWERPC64
? 64 : 32))
4254 rs6000_block_move_inline_limit
= (TARGET_POWERPC64
? 64 : 32);
4258 /* If the appropriate debug option is enabled, replace the target hooks
4259 with debug versions that call the real version and then prints
4260 debugging information. */
4261 if (TARGET_DEBUG_COST
)
4263 targetm
.rtx_costs
= rs6000_debug_rtx_costs
;
4264 targetm
.address_cost
= rs6000_debug_address_cost
;
4265 targetm
.sched
.adjust_cost
= rs6000_debug_adjust_cost
;
4268 if (TARGET_DEBUG_ADDR
)
4270 targetm
.legitimate_address_p
= rs6000_debug_legitimate_address_p
;
4271 targetm
.legitimize_address
= rs6000_debug_legitimize_address
;
4272 rs6000_secondary_reload_class_ptr
4273 = rs6000_debug_secondary_reload_class
;
4274 targetm
.secondary_memory_needed
4275 = rs6000_debug_secondary_memory_needed
;
4276 targetm
.can_change_mode_class
4277 = rs6000_debug_can_change_mode_class
;
4278 rs6000_preferred_reload_class_ptr
4279 = rs6000_debug_preferred_reload_class
;
4280 rs6000_mode_dependent_address_ptr
4281 = rs6000_debug_mode_dependent_address
;
4284 if (rs6000_veclibabi_name
)
4286 if (strcmp (rs6000_veclibabi_name
, "mass") == 0)
4287 rs6000_veclib_handler
= rs6000_builtin_vectorized_libmass
;
4290 error ("unknown vectorization library ABI type in "
4291 "%<-mveclibabi=%s%>", rs6000_veclibabi_name
);
4297 /* Enable Altivec ABI for AIX -maltivec. */
4299 && (TARGET_ALTIVEC
|| TARGET_VSX
)
4300 && !OPTION_SET_P (rs6000_altivec_abi
))
4302 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4303 error ("target attribute or pragma changes AltiVec ABI");
4305 rs6000_altivec_abi
= 1;
4308 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4309 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4310 be explicitly overridden in either case. */
4313 if (!OPTION_SET_P (rs6000_altivec_abi
)
4314 && (TARGET_64BIT
|| TARGET_ALTIVEC
|| TARGET_VSX
))
4316 if (main_target_opt
!= NULL
&&
4317 !main_target_opt
->x_rs6000_altivec_abi
)
4318 error ("target attribute or pragma changes AltiVec ABI");
4320 rs6000_altivec_abi
= 1;
4324 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4325 So far, the only darwin64 targets are also MACH-O. */
4327 && DEFAULT_ABI
== ABI_DARWIN
4330 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_darwin64_abi
)
4331 error ("target attribute or pragma changes darwin64 ABI");
4334 rs6000_darwin64_abi
= 1;
4335 /* Default to natural alignment, for better performance. */
4336 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
4340 /* Place FP constants in the constant pool instead of TOC
4341 if section anchors enabled. */
4342 if (flag_section_anchors
4343 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
4344 TARGET_NO_FP_IN_TOC
= 1;
4346 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4347 rs6000_print_isa_options (stderr
, 0, "before subtarget", rs6000_isa_flags
);
4349 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4350 SUBTARGET_OVERRIDE_OPTIONS
;
4352 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4353 SUBSUBTARGET_OVERRIDE_OPTIONS
;
4355 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4356 SUB3TARGET_OVERRIDE_OPTIONS
;
4359 /* If the ABI has support for PC-relative relocations, enable it by default.
4360 This test depends on the sub-target tests above setting the code model to
4361 medium for ELF v2 systems. */
4362 if (PCREL_SUPPORTED_BY_OS
4363 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0)
4364 rs6000_isa_flags
|= OPTION_MASK_PCREL
;
4366 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4367 after the subtarget override options are done. */
4368 else if (TARGET_PCREL
&& TARGET_CMODEL
!= CMODEL_MEDIUM
)
4370 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4371 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4373 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4376 /* Enable -mmma by default on power10 systems. */
4377 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) == 0)
4378 rs6000_isa_flags
|= OPTION_MASK_MMA
;
4380 /* Turn off vector pair/mma options on non-power10 systems. */
4381 else if (!TARGET_POWER10
&& TARGET_MMA
)
4383 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4384 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4386 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4389 /* Enable power10 fusion if we are tuning for power10, even if we aren't
4390 generating power10 instructions. */
4391 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P10_FUSION
))
4393 if (rs6000_tune
== PROCESSOR_POWER10
4394 || rs6000_tune
== PROCESSOR_POWER11
)
4395 rs6000_isa_flags
|= OPTION_MASK_P10_FUSION
;
4397 rs6000_isa_flags
&= ~OPTION_MASK_P10_FUSION
;
4400 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4401 such as "*movoo" uses vector pair access which use VSX registers.
4402 So make MMA require VSX support here. */
4403 if (TARGET_MMA
&& !TARGET_VSX
)
4405 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4406 error ("%qs requires %qs", "-mmma", "-mvsx");
4407 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4410 if (!TARGET_PCREL
&& TARGET_PCREL_OPT
)
4411 rs6000_isa_flags
&= ~OPTION_MASK_PCREL_OPT
;
4413 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4414 rs6000_print_isa_options (stderr
, 0, "after subtarget", rs6000_isa_flags
);
4416 rs6000_always_hint
= (rs6000_tune
!= PROCESSOR_POWER4
4417 && rs6000_tune
!= PROCESSOR_POWER5
4418 && rs6000_tune
!= PROCESSOR_POWER6
4419 && rs6000_tune
!= PROCESSOR_POWER7
4420 && rs6000_tune
!= PROCESSOR_POWER8
4421 && rs6000_tune
!= PROCESSOR_POWER9
4422 && rs6000_tune
!= PROCESSOR_POWER10
4423 && rs6000_tune
!= PROCESSOR_POWER11
4424 && rs6000_tune
!= PROCESSOR_PPCA2
4425 && rs6000_tune
!= PROCESSOR_CELL
4426 && rs6000_tune
!= PROCESSOR_PPC476
);
4427 rs6000_sched_groups
= (rs6000_tune
== PROCESSOR_POWER4
4428 || rs6000_tune
== PROCESSOR_POWER5
4429 || rs6000_tune
== PROCESSOR_POWER7
4430 || rs6000_tune
== PROCESSOR_POWER8
);
4431 rs6000_align_branch_targets
= (rs6000_tune
== PROCESSOR_POWER4
4432 || rs6000_tune
== PROCESSOR_POWER5
4433 || rs6000_tune
== PROCESSOR_POWER6
4434 || rs6000_tune
== PROCESSOR_POWER7
4435 || rs6000_tune
== PROCESSOR_POWER8
4436 || rs6000_tune
== PROCESSOR_POWER9
4437 || rs6000_tune
== PROCESSOR_POWER10
4438 || rs6000_tune
== PROCESSOR_POWER11
4439 || rs6000_tune
== PROCESSOR_PPCE500MC
4440 || rs6000_tune
== PROCESSOR_PPCE500MC64
4441 || rs6000_tune
== PROCESSOR_PPCE5500
4442 || rs6000_tune
== PROCESSOR_PPCE6500
);
4444 /* Allow debug switches to override the above settings. These are set to -1
4445 in rs6000.opt to indicate the user hasn't directly set the switch. */
4446 if (TARGET_ALWAYS_HINT
>= 0)
4447 rs6000_always_hint
= TARGET_ALWAYS_HINT
;
4449 if (TARGET_SCHED_GROUPS
>= 0)
4450 rs6000_sched_groups
= TARGET_SCHED_GROUPS
;
4452 if (TARGET_ALIGN_BRANCH_TARGETS
>= 0)
4453 rs6000_align_branch_targets
= TARGET_ALIGN_BRANCH_TARGETS
;
4455 rs6000_sched_restricted_insns_priority
4456 = (rs6000_sched_groups
? 1 : 0);
4458 /* Handle -msched-costly-dep option. */
4459 rs6000_sched_costly_dep
4460 = (rs6000_sched_groups
? true_store_to_load_dep_costly
: no_dep_costly
);
4462 if (rs6000_sched_costly_dep_str
)
4464 if (! strcmp (rs6000_sched_costly_dep_str
, "no"))
4465 rs6000_sched_costly_dep
= no_dep_costly
;
4466 else if (! strcmp (rs6000_sched_costly_dep_str
, "all"))
4467 rs6000_sched_costly_dep
= all_deps_costly
;
4468 else if (! strcmp (rs6000_sched_costly_dep_str
, "true_store_to_load"))
4469 rs6000_sched_costly_dep
= true_store_to_load_dep_costly
;
4470 else if (! strcmp (rs6000_sched_costly_dep_str
, "store_to_load"))
4471 rs6000_sched_costly_dep
= store_to_load_dep_costly
;
4473 rs6000_sched_costly_dep
= ((enum rs6000_dependence_cost
)
4474 atoi (rs6000_sched_costly_dep_str
));
4477 /* Handle -minsert-sched-nops option. */
4478 rs6000_sched_insert_nops
4479 = (rs6000_sched_groups
? sched_finish_regroup_exact
: sched_finish_none
);
4481 if (rs6000_sched_insert_nops_str
)
4483 if (! strcmp (rs6000_sched_insert_nops_str
, "no"))
4484 rs6000_sched_insert_nops
= sched_finish_none
;
4485 else if (! strcmp (rs6000_sched_insert_nops_str
, "pad"))
4486 rs6000_sched_insert_nops
= sched_finish_pad_groups
;
4487 else if (! strcmp (rs6000_sched_insert_nops_str
, "regroup_exact"))
4488 rs6000_sched_insert_nops
= sched_finish_regroup_exact
;
4490 rs6000_sched_insert_nops
= ((enum rs6000_nop_insertion
)
4491 atoi (rs6000_sched_insert_nops_str
));
4494 /* Handle stack protector */
4495 if (!OPTION_SET_P (rs6000_stack_protector_guard
))
4496 #ifdef TARGET_THREAD_SSP_OFFSET
4497 rs6000_stack_protector_guard
= SSP_TLS
;
4499 rs6000_stack_protector_guard
= SSP_GLOBAL
;
4502 #ifdef TARGET_THREAD_SSP_OFFSET
4503 rs6000_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
4504 rs6000_stack_protector_guard_reg
= TARGET_64BIT
? 13 : 2;
4507 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str
))
4510 const char *str
= rs6000_stack_protector_guard_offset_str
;
4513 long offset
= strtol (str
, &endp
, 0);
4514 if (!*str
|| *endp
|| errno
)
4515 error ("%qs is not a valid number in %qs", str
,
4516 "-mstack-protector-guard-offset=");
4518 if (!IN_RANGE (offset
, -0x8000, 0x7fff)
4519 || (TARGET_64BIT
&& (offset
& 3)))
4520 error ("%qs is not a valid offset in %qs", str
,
4521 "-mstack-protector-guard-offset=");
4523 rs6000_stack_protector_guard_offset
= offset
;
4526 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str
))
4528 const char *str
= rs6000_stack_protector_guard_reg_str
;
4529 int reg
= decode_reg_name (str
);
4531 if (!IN_RANGE (reg
, 1, 31))
4532 error ("%qs is not a valid base register in %qs", str
,
4533 "-mstack-protector-guard-reg=");
4535 rs6000_stack_protector_guard_reg
= reg
;
4538 if (rs6000_stack_protector_guard
== SSP_TLS
4539 && !IN_RANGE (rs6000_stack_protector_guard_reg
, 1, 31))
4540 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4544 #ifdef TARGET_REGNAMES
4545 /* If the user desires alternate register names, copy in the
4546 alternate names now. */
4547 if (TARGET_REGNAMES
)
4548 memcpy (rs6000_reg_names
, alt_reg_names
, sizeof (rs6000_reg_names
));
4551 /* Set aix_struct_return last, after the ABI is determined.
4552 If -maix-struct-return or -msvr4-struct-return was explicitly
4553 used, don't override with the ABI default. */
4554 if (!OPTION_SET_P (aix_struct_return
))
4555 aix_struct_return
= (DEFAULT_ABI
!= ABI_V4
|| DRAFT_V4_STRUCT_RET
);
4558 /* IBM XL compiler defaults to unsigned bitfields. */
4559 if (TARGET_XL_COMPAT
)
4560 flag_signed_bitfields
= 0;
4563 if (TARGET_LONG_DOUBLE_128
&& !TARGET_IEEEQUAD
)
4564 REAL_MODE_FORMAT (TFmode
) = &ibm_extended_format
;
4566 ASM_GENERATE_INTERNAL_LABEL (toc_label_name
, "LCTOC", 1);
4568 /* We can only guarantee the availability of DI pseudo-ops when
4569 assembling for 64-bit targets. */
4572 targetm
.asm_out
.aligned_op
.di
= NULL
;
4573 targetm
.asm_out
.unaligned_op
.di
= NULL
;
4577 /* Set branch target alignment, if not optimizing for size. */
4580 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4581 aligned 8byte to avoid misprediction by the branch predictor. */
4582 if (rs6000_tune
== PROCESSOR_TITAN
4583 || rs6000_tune
== PROCESSOR_CELL
)
4585 if (flag_align_functions
&& !str_align_functions
)
4586 str_align_functions
= "8";
4587 if (flag_align_jumps
&& !str_align_jumps
)
4588 str_align_jumps
= "8";
4589 if (flag_align_loops
&& !str_align_loops
)
4590 str_align_loops
= "8";
4592 if (rs6000_align_branch_targets
)
4594 if (flag_align_functions
&& !str_align_functions
)
4595 str_align_functions
= "16";
4596 if (flag_align_jumps
&& !str_align_jumps
)
4597 str_align_jumps
= "16";
4598 if (flag_align_loops
&& !str_align_loops
)
4600 can_override_loop_align
= 1;
4601 str_align_loops
= "16";
4606 /* Arrange to save and restore machine status around nested functions. */
4607 init_machine_status
= rs6000_init_machine_status
;
4609 /* We should always be splitting complex arguments, but we can't break
4610 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4611 if (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
4612 targetm
.calls
.split_complex_arg
= NULL
;
4614 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4615 if (DEFAULT_ABI
== ABI_AIX
)
4616 targetm
.calls
.custom_function_descriptors
= 0;
4619 /* Initialize rs6000_cost with the appropriate target costs. */
4621 rs6000_cost
= TARGET_POWERPC64
? &size64_cost
: &size32_cost
;
4623 switch (rs6000_tune
)
4625 case PROCESSOR_RS64A
:
4626 rs6000_cost
= &rs64a_cost
;
4629 case PROCESSOR_MPCCORE
:
4630 rs6000_cost
= &mpccore_cost
;
4633 case PROCESSOR_PPC403
:
4634 rs6000_cost
= &ppc403_cost
;
4637 case PROCESSOR_PPC405
:
4638 rs6000_cost
= &ppc405_cost
;
4641 case PROCESSOR_PPC440
:
4642 rs6000_cost
= &ppc440_cost
;
4645 case PROCESSOR_PPC476
:
4646 rs6000_cost
= &ppc476_cost
;
4649 case PROCESSOR_PPC601
:
4650 rs6000_cost
= &ppc601_cost
;
4653 case PROCESSOR_PPC603
:
4654 rs6000_cost
= &ppc603_cost
;
4657 case PROCESSOR_PPC604
:
4658 rs6000_cost
= &ppc604_cost
;
4661 case PROCESSOR_PPC604e
:
4662 rs6000_cost
= &ppc604e_cost
;
4665 case PROCESSOR_PPC620
:
4666 rs6000_cost
= &ppc620_cost
;
4669 case PROCESSOR_PPC630
:
4670 rs6000_cost
= &ppc630_cost
;
4673 case PROCESSOR_CELL
:
4674 rs6000_cost
= &ppccell_cost
;
4677 case PROCESSOR_PPC750
:
4678 case PROCESSOR_PPC7400
:
4679 rs6000_cost
= &ppc750_cost
;
4682 case PROCESSOR_PPC7450
:
4683 rs6000_cost
= &ppc7450_cost
;
4686 case PROCESSOR_PPC8540
:
4687 case PROCESSOR_PPC8548
:
4688 rs6000_cost
= &ppc8540_cost
;
4691 case PROCESSOR_PPCE300C2
:
4692 case PROCESSOR_PPCE300C3
:
4693 rs6000_cost
= &ppce300c2c3_cost
;
4696 case PROCESSOR_PPCE500MC
:
4697 rs6000_cost
= &ppce500mc_cost
;
4700 case PROCESSOR_PPCE500MC64
:
4701 rs6000_cost
= &ppce500mc64_cost
;
4704 case PROCESSOR_PPCE5500
:
4705 rs6000_cost
= &ppce5500_cost
;
4708 case PROCESSOR_PPCE6500
:
4709 rs6000_cost
= &ppce6500_cost
;
4712 case PROCESSOR_TITAN
:
4713 rs6000_cost
= &titan_cost
;
4716 case PROCESSOR_POWER4
:
4717 case PROCESSOR_POWER5
:
4718 rs6000_cost
= &power4_cost
;
4721 case PROCESSOR_POWER6
:
4722 rs6000_cost
= &power6_cost
;
4725 case PROCESSOR_POWER7
:
4726 rs6000_cost
= &power7_cost
;
4729 case PROCESSOR_POWER8
:
4730 rs6000_cost
= &power8_cost
;
4733 case PROCESSOR_POWER9
:
4734 rs6000_cost
= &power9_cost
;
4737 case PROCESSOR_POWER10
:
4738 case PROCESSOR_POWER11
:
4739 rs6000_cost
= &power10_cost
;
4742 case PROCESSOR_PPCA2
:
4743 rs6000_cost
= &ppca2_cost
;
4752 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4753 param_simultaneous_prefetches
,
4754 rs6000_cost
->simultaneous_prefetches
);
4755 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4756 param_l1_cache_size
,
4757 rs6000_cost
->l1_cache_size
);
4758 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4759 param_l1_cache_line_size
,
4760 rs6000_cost
->cache_line_size
);
4761 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4762 param_l2_cache_size
,
4763 rs6000_cost
->l2_cache_size
);
4765 /* Increase loop peeling limits based on performance analysis. */
4766 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4767 param_max_peeled_insns
, 400);
4768 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4769 param_max_completely_peeled_insns
, 400);
4771 /* The lxvl/stxvl instructions don't perform well before Power10. */
4773 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4774 param_vect_partial_vector_usage
, 1);
4776 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4777 param_vect_partial_vector_usage
, 0);
4779 /* Use the 'model' -fsched-pressure algorithm by default. */
4780 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4781 param_sched_pressure_algorithm
,
4782 SCHED_PRESSURE_MODEL
);
4784 /* If using typedef char *va_list, signal that
4785 __builtin_va_start (&ap, 0) can be optimized to
4786 ap = __builtin_next_arg (0). */
4787 if (DEFAULT_ABI
!= ABI_V4
)
4788 targetm
.expand_builtin_va_start
= NULL
;
4791 rs6000_override_options_after_change ();
4793 /* If not explicitly specified via option, decide whether to generate indexed
4794 load/store instructions. A value of -1 indicates that the
4795 initial value of this variable has not been overwritten. During
4796 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4797 if (TARGET_AVOID_XFORM
== -1)
4798 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4799 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4800 need indexed accesses and the type used is the scalar type of the element
4801 being loaded or stored. */
4802 TARGET_AVOID_XFORM
= (rs6000_tune
== PROCESSOR_POWER6
&& TARGET_CMPB
4803 && !TARGET_ALTIVEC
);
4805 /* Set the -mrecip options. */
4806 if (rs6000_recip_name
)
4808 char *p
= ASTRDUP (rs6000_recip_name
);
4810 unsigned int mask
, i
;
4813 while ((q
= strtok (p
, ",")) != NULL
)
4824 if (!strcmp (q
, "default"))
4825 mask
= ((TARGET_RECIP_PRECISION
)
4826 ? RECIP_HIGH_PRECISION
: RECIP_LOW_PRECISION
);
4829 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4830 if (!strcmp (q
, recip_options
[i
].string
))
4832 mask
= recip_options
[i
].mask
;
4836 if (i
== ARRAY_SIZE (recip_options
))
4838 error ("unknown option for %<%s=%s%>", "-mrecip", q
);
4846 rs6000_recip_control
&= ~mask
;
4848 rs6000_recip_control
|= mask
;
4852 /* We only support ROP protection on certain targets. */
4853 if (rs6000_rop_protect
)
4855 /* Disallow CPU targets we don't support. */
4857 error ("%<-mrop-protect%> requires %<-mcpu=power8%> or later");
4859 /* Disallow ABI targets we don't support. */
4860 if (DEFAULT_ABI
!= ABI_ELFv2
)
4861 error ("%<-mrop-protect%> requires the ELFv2 ABI");
4864 /* Initialize all of the registers. */
4865 rs6000_init_hard_regno_mode_ok (global_init_p
);
4867 /* Save the initial options in case the user does function specific options */
4869 target_option_default_node
= target_option_current_node
4870 = build_target_option_node (&global_options
, &global_options_set
);
4872 /* If not explicitly specified via option, decide whether to generate the
4873 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4874 if (TARGET_LINK_STACK
== -1)
4875 SET_TARGET_LINK_STACK (rs6000_tune
== PROCESSOR_PPC476
&& flag_pic
);
4877 /* Deprecate use of -mno-speculate-indirect-jumps. */
4878 if (!rs6000_speculate_indirect_jumps
)
4879 warning (0, "%qs is deprecated and not recommended in any circumstances",
4880 "-mno-speculate-indirect-jumps");
4885 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4886 define the target cpu type. */
4889 rs6000_option_override (void)
4891 (void) rs6000_option_override_internal (true);
4895 /* Implement LOOP_ALIGN. */
4897 rs6000_loop_align (rtx label
)
4902 /* Don't override loop alignment if -falign-loops was specified. */
4903 if (!can_override_loop_align
)
4906 bb
= BLOCK_FOR_INSN (label
);
4907 ninsns
= num_loop_insns(bb
->loop_father
);
4909 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4910 if (ninsns
> 4 && ninsns
<= 8
4911 && (rs6000_tune
== PROCESSOR_POWER4
4912 || rs6000_tune
== PROCESSOR_POWER5
4913 || rs6000_tune
== PROCESSOR_POWER6
4914 || rs6000_tune
== PROCESSOR_POWER7
4915 || rs6000_tune
== PROCESSOR_POWER8
))
4916 return align_flags (5);
4921 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4922 after applying N number of iterations. This routine does not determine
4923 how may iterations are required to reach desired alignment. */
4926 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
4933 if (rs6000_alignment_flags
== MASK_ALIGN_NATURAL
)
4936 if (rs6000_alignment_flags
== MASK_ALIGN_POWER
)
4946 /* Assuming that all other types are naturally aligned. CHECKME! */
4951 /* Return true if the vector misalignment factor is supported by the
4954 rs6000_builtin_support_vector_misalignment (machine_mode mode
,
4961 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4964 /* Return if movmisalign pattern is not supported for this mode. */
4965 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
4968 if (misalignment
== -1)
4970 /* Misalignment factor is unknown at compile time but we know
4971 it's word aligned. */
4972 if (rs6000_vector_alignment_reachable (type
, is_packed
))
4974 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
4976 if (element_size
== 64 || element_size
== 32)
4983 /* VSX supports word-aligned vector. */
4984 if (misalignment
% 4 == 0)
4990 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4992 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
4993 tree vectype
, int misalign
)
4998 switch (type_of_cost
)
5006 case cond_branch_not_taken
:
5010 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5014 /* Power7 has only one permute unit, make it a bit expensive. */
5015 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5020 case vec_promote_demote
:
5021 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5022 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5027 case cond_branch_taken
:
5030 case unaligned_load
:
5031 case vector_gather_load
:
5032 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5033 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5036 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5038 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5039 /* See PR102767, consider V1TI to keep consistency. */
5040 if (elements
== 2 || elements
== 1)
5041 /* Double word aligned. */
5049 /* Double word aligned. */
5053 /* Unknown misalignment. */
5066 /* Misaligned loads are not supported. */
5069 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5072 case unaligned_store
:
5073 case vector_scatter_store
:
5074 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5077 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5079 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5080 /* See PR102767, consider V1TI to keep consistency. */
5081 if (elements
== 2 || elements
== 1)
5082 /* Double word aligned. */
5090 /* Double word aligned. */
5094 /* Unknown misalignment. */
5107 /* Misaligned stores are not supported. */
5113 /* This is a rough approximation assuming non-constant elements
5114 constructed into a vector via element insertion. FIXME:
5115 vec_construct is not granular enough for uniformly good
5116 decisions. If the initialization is a splat, this is
5117 cheaper than we estimate. Improve this someday. */
5118 elem_type
= TREE_TYPE (vectype
);
5119 /* 32-bit vectors loaded into registers are stored as double
5120 precision, so we need 2 permutes, 2 converts, and 1 merge
5121 to construct a vector of short floats from them. */
5122 if (SCALAR_FLOAT_TYPE_P (elem_type
)
5123 && TYPE_PRECISION (elem_type
) == 32)
5125 /* On POWER9, integer vector types are built up in GPRs and then
5126 use a direct move (2 cycles). For POWER8 this is even worse,
5127 as we need two direct moves and a merge, and the direct moves
5129 else if (INTEGRAL_TYPE_P (elem_type
))
5131 if (TARGET_P9_VECTOR
)
5132 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 2;
5134 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 5;
5137 /* V2DFmode doesn't need a direct move. */
5145 /* Implement targetm.vectorize.preferred_simd_mode. */
5148 rs6000_preferred_simd_mode (scalar_mode mode
)
5150 opt_machine_mode vmode
= mode_for_vector (mode
, 16 / GET_MODE_SIZE (mode
));
5152 if (vmode
.exists () && !VECTOR_MEM_NONE_P (vmode
.require ()))
5153 return vmode
.require ();
5158 class rs6000_cost_data
: public vector_costs
5161 using vector_costs::vector_costs
;
5163 unsigned int add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5164 stmt_vec_info stmt_info
, slp_tree
, tree vectype
,
5166 vect_cost_model_location where
) override
;
5167 void finish_cost (const vector_costs
*) override
;
5170 void update_target_cost_per_stmt (vect_cost_for_stmt
, stmt_vec_info
,
5171 vect_cost_model_location
, unsigned int);
5172 void density_test (loop_vec_info
);
5173 void adjust_vect_cost_per_loop (loop_vec_info
);
5174 unsigned int determine_suggested_unroll_factor (loop_vec_info
);
5176 /* Total number of vectorized stmts (loop only). */
5177 unsigned m_nstmts
= 0;
5178 /* Total number of loads (loop only). */
5179 unsigned m_nloads
= 0;
5180 /* Total number of stores (loop only). */
5181 unsigned m_nstores
= 0;
5182 /* Reduction factor for suggesting unroll factor (loop only). */
5183 unsigned m_reduc_factor
= 0;
5184 /* Possible extra penalized cost on vector construction (loop only). */
5185 unsigned m_extra_ctor_cost
= 0;
5186 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5187 instruction is needed by the vectorization. */
5188 bool m_vect_nonmem
= false;
5189 /* If this loop gets vectorized with emulated gather load. */
5190 bool m_gather_load
= false;
5193 /* Test for likely overcommitment of vector hardware resources. If a
5194 loop iteration is relatively large, and too large a percentage of
5195 instructions in the loop are vectorized, the cost model may not
5196 adequately reflect delays from unavailable vector resources.
5197 Penalize the loop body cost for this case. */
5200 rs6000_cost_data::density_test (loop_vec_info loop_vinfo
)
5202 /* This density test only cares about the cost of vector version of the
5203 loop, so immediately return if we are passed costing for the scalar
5204 version (namely computing single scalar iteration cost). */
5205 if (m_costing_for_scalar
)
5208 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5209 basic_block
*bbs
= get_loop_body (loop
);
5210 int nbbs
= loop
->num_nodes
;
5211 int vec_cost
= m_costs
[vect_body
], not_vec_cost
= 0;
5213 for (int i
= 0; i
< nbbs
; i
++)
5215 basic_block bb
= bbs
[i
];
5216 gimple_stmt_iterator gsi
;
5218 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5220 gimple
*stmt
= gsi_stmt (gsi
);
5221 if (is_gimple_debug (stmt
))
5224 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
5226 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5227 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
5233 int density_pct
= (vec_cost
* 100) / (vec_cost
+ not_vec_cost
);
5235 if (density_pct
> rs6000_density_pct_threshold
5236 && vec_cost
+ not_vec_cost
> rs6000_density_size_threshold
)
5238 m_costs
[vect_body
] = vec_cost
* (100 + rs6000_density_penalty
) / 100;
5239 if (dump_enabled_p ())
5240 dump_printf_loc (MSG_NOTE
, vect_location
,
5241 "density %d%%, cost %d exceeds threshold, penalizing "
5242 "loop body cost by %u%%\n", density_pct
,
5243 vec_cost
+ not_vec_cost
, rs6000_density_penalty
);
5246 /* Check whether we need to penalize the body cost to account
5247 for excess strided or elementwise loads. */
5248 if (m_extra_ctor_cost
> 0)
5250 gcc_assert (m_nloads
<= m_nstmts
);
5251 unsigned int load_pct
= (m_nloads
* 100) / m_nstmts
;
5253 /* It's likely to be bounded by latency and execution resources
5254 from many scalar loads which are strided or elementwise loads
5255 into a vector if both conditions below are found:
5256 1. there are many loads, it's easy to result in a long wait
5258 2. load has a big proportion of all vectorized statements,
5259 it's not easy to schedule other statements to spread among
5261 One typical case is the innermost loop of the hotspot of SPEC2017
5262 503.bwaves_r without loop interchange. */
5263 if (m_nloads
> (unsigned int) rs6000_density_load_num_threshold
5264 && load_pct
> (unsigned int) rs6000_density_load_pct_threshold
)
5266 m_costs
[vect_body
] += m_extra_ctor_cost
;
5267 if (dump_enabled_p ())
5268 dump_printf_loc (MSG_NOTE
, vect_location
,
5269 "Found %u loads and "
5270 "load pct. %u%% exceed "
5272 "penalizing loop body "
5273 "cost by extra cost %u "
5281 /* Implement targetm.vectorize.create_costs. */
5283 static vector_costs
*
5284 rs6000_vectorize_create_costs (vec_info
*vinfo
, bool costing_for_scalar
)
5286 return new rs6000_cost_data (vinfo
, costing_for_scalar
);
5289 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5290 For some statement, we would like to further fine-grain tweak the cost on
5291 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5292 information on statement operation codes etc. One typical case here is
5293 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5294 for scalar cost, but it should be priced more whatever transformed to either
5295 compare + branch or compare + isel instructions. */
5298 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind
,
5299 struct _stmt_vec_info
*stmt_info
)
5301 if (kind
== scalar_stmt
&& stmt_info
&& stmt_info
->stmt
5302 && gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
5304 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
5305 if (subcode
== COND_EXPR
)
5312 /* Helper function for add_stmt_cost. Check each statement cost
5313 entry, gather information and update the target_cost fields
5316 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind
,
5317 stmt_vec_info stmt_info
,
5318 vect_cost_model_location where
,
5319 unsigned int orig_count
)
5322 /* Check whether we're doing something other than just a copy loop.
5323 Not all such loops may be profitably vectorized; see
5324 rs6000_finish_cost. */
5325 if (kind
== vec_to_scalar
5327 || kind
== vec_promote_demote
5328 || kind
== vec_construct
5329 || kind
== scalar_to_vec
5330 || (where
== vect_body
&& kind
== vector_stmt
))
5331 m_vect_nonmem
= true;
5333 /* Gather some information when we are costing the vectorized instruction
5334 for the statements located in a loop body. */
5335 if (!m_costing_for_scalar
5336 && is_a
<loop_vec_info
> (m_vinfo
)
5337 && where
== vect_body
)
5339 m_nstmts
+= orig_count
;
5341 if (kind
== scalar_load
5342 || kind
== vector_load
5343 || kind
== unaligned_load
5344 || kind
== vector_gather_load
)
5346 m_nloads
+= orig_count
;
5347 if (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5348 m_gather_load
= true;
5350 else if (kind
== scalar_store
5351 || kind
== vector_store
5352 || kind
== unaligned_store
5353 || kind
== vector_scatter_store
)
5354 m_nstores
+= orig_count
;
5355 else if ((kind
== scalar_stmt
5356 || kind
== vector_stmt
5357 || kind
== vec_to_scalar
)
5359 && vect_is_reduction (stmt_info
))
5361 /* Loop body contains normal int or fp operations and epilogue
5362 contains vector reduction. For simplicity, we assume int
5363 operation takes one cycle and fp operation takes one more. */
5364 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
5365 bool is_float
= FLOAT_TYPE_P (TREE_TYPE (lhs
));
5366 unsigned int basic_cost
= is_float
? 2 : 1;
5367 m_reduc_factor
= MAX (basic_cost
* orig_count
, m_reduc_factor
);
5370 /* Power processors do not currently have instructions for strided
5371 and elementwise loads, and instead we must generate multiple
5372 scalar loads. This leads to undercounting of the cost. We
5373 account for this by scaling the construction cost by the number
5374 of elements involved, and saving this as extra cost that we may
5375 or may not need to apply. When finalizing the cost of the loop,
5376 the extra penalty is applied when the load density heuristics
5378 if (kind
== vec_construct
&& stmt_info
5379 && STMT_VINFO_TYPE (stmt_info
) == load_vec_info_type
5380 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_ELEMENTWISE
5381 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_STRIDED_SLP
))
5383 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5384 unsigned int nunits
= vect_nunits_for_cost (vectype
);
5385 /* As PR103702 shows, it's possible that vectorizer wants to do
5386 costings for only one unit here, it's no need to do any
5387 penalization for it, so simply early return here. */
5390 /* i386 port adopts nunits * stmt_cost as the penalized cost
5391 for this kind of penalization, we used to follow it but
5392 found it could result in an unreliable body cost especially
5393 for V16QI/V8HI modes. To make it better, we choose this
5394 new heuristic: for each scalar load, we use 2 as penalized
5395 cost for the case with 2 nunits and use 1 for the other
5396 cases. It's without much supporting theory, mainly
5397 concluded from the broad performance evaluations on Power8,
5398 Power9 and Power10. One possibly related point is that:
5399 vector construction for more units would use more insns,
5400 it has more chances to schedule them better (even run in
5401 parallelly when enough available units at that time), so
5402 it seems reasonable not to penalize that much for them. */
5403 unsigned int adjusted_cost
= (nunits
== 2) ? 2 : 1;
5404 unsigned int extra_cost
= nunits
* adjusted_cost
;
5405 m_extra_ctor_cost
+= extra_cost
;
5411 rs6000_cost_data::add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5412 stmt_vec_info stmt_info
, slp_tree
,
5413 tree vectype
, int misalign
,
5414 vect_cost_model_location where
)
5416 unsigned retval
= 0;
5418 if (flag_vect_cost_model
)
5420 int stmt_cost
= rs6000_builtin_vectorization_cost (kind
, vectype
,
5422 stmt_cost
+= rs6000_adjust_vect_cost_per_stmt (kind
, stmt_info
);
5423 /* Statements in an inner loop relative to the loop being
5424 vectorized are weighted more heavily. The value here is
5425 arbitrary and could potentially be improved with analysis. */
5426 unsigned int orig_count
= count
;
5427 retval
= adjust_cost_for_freq (stmt_info
, where
, count
* stmt_cost
);
5428 m_costs
[where
] += retval
;
5430 update_target_cost_per_stmt (kind
, stmt_info
, where
, orig_count
);
5436 /* For some target specific vectorization cost which can't be handled per stmt,
5437 we check the requisite conditions and adjust the vectorization cost
5438 accordingly if satisfied. One typical example is to model shift cost for
5439 vector with length by counting number of required lengths under condition
5440 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5443 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo
)
5445 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
5447 rgroup_controls
*rgc
;
5448 unsigned int num_vectors_m1
;
5449 unsigned int shift_cnt
= 0;
5450 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo
), num_vectors_m1
, rgc
)
5452 /* Each length needs one shift to fill into bits 0-7. */
5453 shift_cnt
+= num_vectors_m1
+ 1;
5455 add_stmt_cost (shift_cnt
, scalar_stmt
, NULL
, NULL
,
5456 NULL_TREE
, 0, vect_body
);
5460 /* Determine suggested unroll factor by considering some below factors:
5462 - unroll option/pragma which can disable unrolling for this loop;
5463 - simple hardware resource model for non memory vector insns;
5464 - aggressive heuristics when iteration count is unknown:
5465 - reduction case to break cross iteration dependency;
5466 - emulated gather load;
5467 - estimated iteration count when iteration count is unknown;
5472 rs6000_cost_data::determine_suggested_unroll_factor (loop_vec_info loop_vinfo
)
5474 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5476 /* Don't unroll if it's specified explicitly not to be unrolled. */
5477 if (loop
->unroll
== 1
5478 || (OPTION_SET_P (flag_unroll_loops
) && !flag_unroll_loops
)
5479 || (OPTION_SET_P (flag_unroll_all_loops
) && !flag_unroll_all_loops
))
5482 unsigned int nstmts_nonldst
= m_nstmts
- m_nloads
- m_nstores
;
5483 /* Don't unroll if no vector instructions excepting for memory access. */
5484 if (nstmts_nonldst
== 0)
5487 /* Consider breaking cross iteration dependency for reduction. */
5488 unsigned int reduc_factor
= m_reduc_factor
> 1 ? m_reduc_factor
: 1;
5490 /* Use this simple hardware resource model that how many non ld/st
5491 vector instructions can be issued per cycle. */
5492 unsigned int issue_width
= rs6000_vect_unroll_issue
;
5493 unsigned int uf
= CEIL (reduc_factor
* issue_width
, nstmts_nonldst
);
5494 uf
= MIN ((unsigned int) rs6000_vect_unroll_limit
, uf
);
5495 /* Make sure it is power of 2. */
5496 uf
= 1 << ceil_log2 (uf
);
5498 /* If the iteration count is known, the costing would be exact enough,
5499 don't worry it could be worse. */
5500 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
5503 /* Inspired by SPEC2017 parest_r, we want to aggressively unroll the
5504 loop if either condition is satisfied:
5505 - reduction factor exceeds the threshold;
5506 - emulated gather load adopted. */
5507 if (reduc_factor
> (unsigned int) rs6000_vect_unroll_reduc_threshold
5511 /* Check if we can conclude it's good to unroll from the estimated
5513 HOST_WIDE_INT est_niter
= get_estimated_loop_iterations_int (loop
);
5514 unsigned int vf
= vect_vf_for_cost (loop_vinfo
);
5515 unsigned int unrolled_vf
= vf
* uf
;
5516 if (est_niter
== -1 || est_niter
< unrolled_vf
)
5517 /* When the estimated iteration of this loop is unknown, it's possible
5518 that we are able to vectorize this loop with the original VF but fail
5519 to vectorize it with the unrolled VF any more if the actual iteration
5520 count is in between. */
5524 unsigned int epil_niter_unr
= est_niter
% unrolled_vf
;
5525 unsigned int epil_niter
= est_niter
% vf
;
5526 /* Even if we have partial vector support, it can be still inefficent
5527 to calculate the length when the iteration count is unknown, so
5528 only expect it's good to unroll when the epilogue iteration count
5529 is not bigger than VF (only one time length calculation). */
5530 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
5531 && epil_niter_unr
<= vf
)
5533 /* Without partial vector support, conservatively unroll this when
5534 the epilogue iteration count is less than the original one
5535 (epilogue execution time wouldn't be longer than before). */
5536 else if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
5537 && epil_niter_unr
<= epil_niter
)
5545 rs6000_cost_data::finish_cost (const vector_costs
*scalar_costs
)
5547 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (m_vinfo
))
5549 adjust_vect_cost_per_loop (loop_vinfo
);
5550 density_test (loop_vinfo
);
5552 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5553 that require versioning for any reason. The vectorization is at
5554 best a wash inside the loop, and the versioning checks make
5555 profitability highly unlikely and potentially quite harmful. */
5557 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
) == 2
5558 && LOOP_REQUIRES_VERSIONING (loop_vinfo
))
5559 m_costs
[vect_body
] += 10000;
5561 m_suggested_unroll_factor
5562 = determine_suggested_unroll_factor (loop_vinfo
);
5565 vector_costs::finish_cost (scalar_costs
);
5568 /* Implement targetm.loop_unroll_adjust. */
5571 rs6000_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
5573 if (unroll_only_small_loops
)
5575 /* TODO: These are hardcoded values right now. We probably should use
5577 if (loop
->ninsns
<= 6)
5578 return MIN (4, nunroll
);
5579 if (loop
->ninsns
<= 10)
5580 return MIN (2, nunroll
);
5588 /* Returns a function decl for a vectorized version of the builtin function
5589 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5590 if it is not available.
5592 Implement targetm.vectorize.builtin_vectorized_function. */
5595 rs6000_builtin_vectorized_function (unsigned int fn
, tree type_out
,
5598 machine_mode in_mode
, out_mode
;
5601 if (TARGET_DEBUG_BUILTIN
)
5602 fprintf (stderr
, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5603 combined_fn_name (combined_fn (fn
)),
5604 GET_MODE_NAME (TYPE_MODE (type_out
)),
5605 GET_MODE_NAME (TYPE_MODE (type_in
)));
5607 /* TODO: Should this be gcc_assert? */
5608 if (TREE_CODE (type_out
) != VECTOR_TYPE
5609 || TREE_CODE (type_in
) != VECTOR_TYPE
)
5612 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5613 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
5614 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5615 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5620 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5621 && out_mode
== DFmode
&& out_n
== 2
5622 && in_mode
== DFmode
&& in_n
== 2)
5623 return rs6000_builtin_decls
[RS6000_BIF_CPSGNDP
];
5624 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5625 && out_mode
== SFmode
&& out_n
== 4
5626 && in_mode
== SFmode
&& in_n
== 4)
5627 return rs6000_builtin_decls
[RS6000_BIF_CPSGNSP
];
5628 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5629 && out_mode
== SFmode
&& out_n
== 4
5630 && in_mode
== SFmode
&& in_n
== 4)
5631 return rs6000_builtin_decls
[RS6000_BIF_COPYSIGN_V4SF
];
5634 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5635 && out_mode
== DFmode
&& out_n
== 2
5636 && in_mode
== DFmode
&& in_n
== 2)
5637 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIP
];
5638 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5639 && out_mode
== SFmode
&& out_n
== 4
5640 && in_mode
== SFmode
&& in_n
== 4)
5641 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIP
];
5642 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5643 && out_mode
== SFmode
&& out_n
== 4
5644 && in_mode
== SFmode
&& in_n
== 4)
5645 return rs6000_builtin_decls
[RS6000_BIF_VRFIP
];
5648 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5649 && out_mode
== DFmode
&& out_n
== 2
5650 && in_mode
== DFmode
&& in_n
== 2)
5651 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIM
];
5652 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5653 && out_mode
== SFmode
&& out_n
== 4
5654 && in_mode
== SFmode
&& in_n
== 4)
5655 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIM
];
5656 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5657 && out_mode
== SFmode
&& out_n
== 4
5658 && in_mode
== SFmode
&& in_n
== 4)
5659 return rs6000_builtin_decls
[RS6000_BIF_VRFIM
];
5662 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5663 && out_mode
== DFmode
&& out_n
== 2
5664 && in_mode
== DFmode
&& in_n
== 2)
5665 return rs6000_builtin_decls
[RS6000_BIF_XVMADDDP
];
5666 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5667 && out_mode
== SFmode
&& out_n
== 4
5668 && in_mode
== SFmode
&& in_n
== 4)
5669 return rs6000_builtin_decls
[RS6000_BIF_XVMADDSP
];
5670 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5671 && out_mode
== SFmode
&& out_n
== 4
5672 && in_mode
== SFmode
&& in_n
== 4)
5673 return rs6000_builtin_decls
[RS6000_BIF_VMADDFP
];
5676 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5677 && out_mode
== DFmode
&& out_n
== 2
5678 && in_mode
== DFmode
&& in_n
== 2)
5679 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIZ
];
5680 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5681 && out_mode
== SFmode
&& out_n
== 4
5682 && in_mode
== SFmode
&& in_n
== 4)
5683 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIZ
];
5684 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5685 && out_mode
== SFmode
&& out_n
== 4
5686 && in_mode
== SFmode
&& in_n
== 4)
5687 return rs6000_builtin_decls
[RS6000_BIF_VRFIZ
];
5690 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5691 && flag_unsafe_math_optimizations
5692 && out_mode
== DFmode
&& out_n
== 2
5693 && in_mode
== DFmode
&& in_n
== 2)
5694 return rs6000_builtin_decls
[RS6000_BIF_XVRDPI
];
5695 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5696 && flag_unsafe_math_optimizations
5697 && out_mode
== SFmode
&& out_n
== 4
5698 && in_mode
== SFmode
&& in_n
== 4)
5699 return rs6000_builtin_decls
[RS6000_BIF_XVRSPI
];
5702 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5703 && !flag_trapping_math
5704 && out_mode
== DFmode
&& out_n
== 2
5705 && in_mode
== DFmode
&& in_n
== 2)
5706 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIC
];
5707 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5708 && !flag_trapping_math
5709 && out_mode
== SFmode
&& out_n
== 4
5710 && in_mode
== SFmode
&& in_n
== 4)
5711 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIC
];
5717 /* Generate calls to libmass if appropriate. */
5718 if (rs6000_veclib_handler
)
5719 return rs6000_veclib_handler (combined_fn (fn
), type_out
, type_in
);
5724 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5725 library with vectorized intrinsics. */
5728 rs6000_builtin_vectorized_libmass (combined_fn fn
, tree type_out
,
5732 const char *suffix
= NULL
;
5733 tree fntype
, new_fndecl
, bdecl
= NULL_TREE
;
5736 machine_mode el_mode
, in_mode
;
5739 /* Libmass is suitable for unsafe math only as it does not correctly support
5740 parts of IEEE with the required precision such as denormals. Only support
5741 it if we have VSX to use the simd d2 or f4 functions.
5742 XXX: Add variable length support. */
5743 if (!flag_unsafe_math_optimizations
|| !TARGET_VSX
)
5746 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5747 n
= TYPE_VECTOR_SUBPARTS (type_out
);
5748 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5749 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5750 if (el_mode
!= in_mode
5786 if (el_mode
== DFmode
&& n
== 2)
5788 bdecl
= mathfn_built_in (double_type_node
, fn
);
5789 suffix
= "d2"; /* pow -> powd2 */
5791 else if (el_mode
== SFmode
&& n
== 4)
5793 bdecl
= mathfn_built_in (float_type_node
, fn
);
5794 suffix
= "4"; /* powf -> powf4 */
5806 gcc_assert (suffix
!= NULL
);
5807 bname
= IDENTIFIER_POINTER (DECL_NAME (bdecl
));
5811 strcpy (name
, bname
+ strlen ("__builtin_"));
5812 strcat (name
, suffix
);
5815 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
5816 else if (n_args
== 2)
5817 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
5821 /* Build a function declaration for the vectorized function. */
5822 new_fndecl
= build_decl (BUILTINS_LOCATION
,
5823 FUNCTION_DECL
, get_identifier (name
), fntype
);
5824 TREE_PUBLIC (new_fndecl
) = 1;
5825 DECL_EXTERNAL (new_fndecl
) = 1;
5826 DECL_IS_NOVOPS (new_fndecl
) = 1;
5827 TREE_READONLY (new_fndecl
) = 1;
5833 /* Default CPU string for rs6000*_file_start functions. */
5834 static const char *rs6000_default_cpu
;
5836 #ifdef USING_ELFOS_H
5837 const char *rs6000_machine
;
5840 rs6000_machine_from_flags (void)
5843 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
)
5845 if (rs6000_cpu
== PROCESSOR_PPC8540
|| rs6000_cpu
== PROCESSOR_PPC8548
)
5847 if (rs6000_cpu
== PROCESSOR_PPCE500MC
)
5849 if (rs6000_cpu
== PROCESSOR_PPCE500MC64
)
5851 if (rs6000_cpu
== PROCESSOR_PPCE5500
)
5853 if (rs6000_cpu
== PROCESSOR_PPCE6500
)
5857 if (rs6000_cpu
== PROCESSOR_PPC403
)
5859 if (rs6000_cpu
== PROCESSOR_PPC405
)
5861 if (rs6000_cpu
== PROCESSOR_PPC440
)
5863 if (rs6000_cpu
== PROCESSOR_PPC476
)
5867 if (rs6000_cpu
== PROCESSOR_PPCA2
)
5871 if (rs6000_cpu
== PROCESSOR_CELL
)
5875 if (rs6000_cpu
== PROCESSOR_TITAN
)
5878 /* 500 series and 800 series */
5879 if (rs6000_cpu
== PROCESSOR_MPCCORE
)
5883 /* This (and ppc64 below) are disabled here (for now at least) because
5884 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5885 are #define'd as some of these. Untangling that is a job for later. */
5887 /* 600 series and 700 series, "classic" */
5888 if (rs6000_cpu
== PROCESSOR_PPC601
|| rs6000_cpu
== PROCESSOR_PPC603
5889 || rs6000_cpu
== PROCESSOR_PPC604
|| rs6000_cpu
== PROCESSOR_PPC604e
5890 || rs6000_cpu
== PROCESSOR_PPC750
)
5894 /* Classic with AltiVec, "G4" */
5895 if (rs6000_cpu
== PROCESSOR_PPC7400
|| rs6000_cpu
== PROCESSOR_PPC7450
)
5899 /* The older 64-bit CPUs */
5900 if (rs6000_cpu
== PROCESSOR_PPC620
|| rs6000_cpu
== PROCESSOR_PPC630
5901 || rs6000_cpu
== PROCESSOR_RS64A
)
5905 HOST_WIDE_INT flags
= rs6000_isa_flags
;
5907 /* Disable the flags that should never influence the .machine selection. */
5908 flags
&= ~(OPTION_MASK_PPC_GFXOPT
| OPTION_MASK_PPC_GPOPT
| OPTION_MASK_ISEL
5909 | OPTION_MASK_ALTIVEC
);
5911 if ((flags
& (POWER11_MASKS_SERVER
& ~ISA_3_1_MASKS_SERVER
)) != 0)
5913 if ((flags
& (ISA_3_1_MASKS_SERVER
& ~ISA_3_0_MASKS_SERVER
)) != 0)
5915 if ((flags
& (ISA_3_0_MASKS_SERVER
& ~ISA_2_7_MASKS_SERVER
)) != 0)
5917 if ((flags
& (ISA_2_7_MASKS_SERVER
& ~ISA_2_6_MASKS_SERVER
)) != 0)
5919 if ((flags
& (ISA_2_6_MASKS_SERVER
& ~ISA_2_5_MASKS_SERVER
)) != 0)
5921 if ((flags
& (ISA_2_5_MASKS_SERVER
& ~ISA_2_4_MASKS
)) != 0)
5923 if ((flags
& (ISA_2_4_MASKS
& ~ISA_2_1_MASKS
)) != 0)
5925 if ((flags
& ISA_2_1_MASKS
) != 0)
5927 if ((flags
& OPTION_MASK_POWERPC64
) != 0)
5933 emit_asm_machine (void)
5935 fprintf (asm_out_file
, "\t.machine %s\n", rs6000_machine
);
5937 fprintf (asm_out_file
, "\t.machine altivec\n");
5941 /* Do anything needed at the start of the asm file. */
5944 rs6000_file_start (void)
5947 const char *start
= buffer
;
5948 FILE *file
= asm_out_file
;
5950 rs6000_default_cpu
= TARGET_CPU_DEFAULT
;
5952 default_file_start ();
5954 if (flag_verbose_asm
)
5956 sprintf (buffer
, "\n%s rs6000/powerpc options:", ASM_COMMENT_START
);
5958 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
5960 fprintf (file
, "%s --with-cpu=%s", start
, rs6000_default_cpu
);
5964 if (OPTION_SET_P (rs6000_cpu_index
))
5966 fprintf (file
, "%s -mcpu=%s", start
,
5967 processor_target_table
[rs6000_cpu_index
].name
);
5971 if (OPTION_SET_P (rs6000_tune_index
))
5973 fprintf (file
, "%s -mtune=%s", start
,
5974 processor_target_table
[rs6000_tune_index
].name
);
5978 if (PPC405_ERRATUM77
)
5980 fprintf (file
, "%s PPC405CR_ERRATUM77", start
);
5984 #ifdef USING_ELFOS_H
5985 switch (rs6000_sdata
)
5987 case SDATA_NONE
: fprintf (file
, "%s -msdata=none", start
); start
= ""; break;
5988 case SDATA_DATA
: fprintf (file
, "%s -msdata=data", start
); start
= ""; break;
5989 case SDATA_SYSV
: fprintf (file
, "%s -msdata=sysv", start
); start
= ""; break;
5990 case SDATA_EABI
: fprintf (file
, "%s -msdata=eabi", start
); start
= ""; break;
5993 if (rs6000_sdata
&& g_switch_value
)
5995 fprintf (file
, "%s -G %d", start
,
6005 #ifdef USING_ELFOS_H
6006 rs6000_machine
= rs6000_machine_from_flags ();
6007 emit_asm_machine ();
6010 if (DEFAULT_ABI
== ABI_ELFv2
)
6011 fprintf (file
, "\t.abiversion 2\n");
6015 /* Return nonzero if this function is known to have a null epilogue. */
6018 direct_return (void)
6020 if (reload_completed
)
6022 rs6000_stack_t
*info
= rs6000_stack_info ();
6024 if (info
->first_gp_reg_save
== 32
6025 && info
->first_fp_reg_save
== 64
6026 && info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1
6027 && ! info
->lr_save_p
6028 && ! info
->cr_save_p
6029 && info
->vrsave_size
== 0
6037 /* Helper for num_insns_constant. Calculate number of instructions to
6038 load VALUE to a single gpr using combinations of addi, addis, ori,
6039 oris, sldi and rldimi instructions. */
6042 num_insns_constant_gpr (HOST_WIDE_INT value
)
6044 /* signed constant loadable with addi */
6045 if (SIGNED_INTEGER_16BIT_P (value
))
6048 /* constant loadable with addis */
6049 else if ((value
& 0xffff) == 0
6050 && (value
>> 31 == -1 || value
>> 31 == 0))
6053 /* PADDI can support up to 34 bit signed integers. */
6054 else if (TARGET_PREFIXED
&& SIGNED_INTEGER_34BIT_P (value
))
6057 else if (TARGET_POWERPC64
)
6060 rs6000_emit_set_long_const (nullptr, value
, &num_insns
);
6068 /* Helper for num_insns_constant. Allow constants formed by the
6069 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
6070 and handle modes that require multiple gprs. */
6073 num_insns_constant_multi (HOST_WIDE_INT value
, machine_mode mode
)
6075 int nregs
= (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6079 HOST_WIDE_INT low
= sext_hwi (value
, BITS_PER_WORD
);
6080 int insns
= num_insns_constant_gpr (low
);
6082 /* We won't get more than 2 from num_insns_constant_gpr
6083 except when TARGET_POWERPC64 and mode is DImode or
6084 wider, so the register mode must be DImode. */
6085 && rs6000_is_valid_and_mask (GEN_INT (low
), DImode
))
6088 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6089 it all at once would be UB. */
6090 value
>>= (BITS_PER_WORD
- 1);
6096 /* Return the number of instructions it takes to form a constant in as
6097 many gprs are needed for MODE. */
6100 num_insns_constant (rtx op
, machine_mode mode
)
6104 switch (GET_CODE (op
))
6110 case CONST_WIDE_INT
:
6113 for (int i
= 0; i
< CONST_WIDE_INT_NUNITS (op
); i
++)
6114 insns
+= num_insns_constant_multi (CONST_WIDE_INT_ELT (op
, i
),
6121 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (op
);
6123 if (mode
== SFmode
|| mode
== SDmode
)
6128 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv
, l
);
6130 REAL_VALUE_TO_TARGET_SINGLE (*rv
, l
);
6131 /* See the first define_split in rs6000.md handling a
6132 const_double_operand. */
6136 else if (mode
== DFmode
|| mode
== DDmode
)
6141 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv
, l
);
6143 REAL_VALUE_TO_TARGET_DOUBLE (*rv
, l
);
6145 /* See the second (32-bit) and third (64-bit) define_split
6146 in rs6000.md handling a const_double_operand. */
6147 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 1] << 32;
6148 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffffUL
;
6151 else if (mode
== TFmode
|| mode
== TDmode
6152 || mode
== KFmode
|| mode
== IFmode
)
6158 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv
, l
);
6160 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv
, l
);
6162 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 3] << 32;
6163 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 2] & 0xffffffffUL
;
6164 insns
= num_insns_constant_multi (val
, DImode
);
6165 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 2 : 1] << 32;
6166 val
|= l
[WORDS_BIG_ENDIAN
? 3 : 0] & 0xffffffffUL
;
6167 insns
+= num_insns_constant_multi (val
, DImode
);
6179 return num_insns_constant_multi (val
, mode
);
6182 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6183 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6184 corresponding element of the vector, but for V4SFmode, the
6185 corresponding "float" is interpreted as an SImode integer. */
6188 const_vector_elt_as_int (rtx op
, unsigned int elt
)
6192 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6193 gcc_assert (GET_MODE (op
) != V2DImode
6194 && GET_MODE (op
) != V2DFmode
);
6196 tmp
= CONST_VECTOR_ELT (op
, elt
);
6197 if (GET_MODE (op
) == V4SFmode
)
6198 tmp
= gen_lowpart (SImode
, tmp
);
6199 return INTVAL (tmp
);
6202 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6203 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6204 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6205 all items are set to the same value and contain COPIES replicas of the
6206 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6207 operand and the others are set to the value of the operand's msb. */
6210 vspltis_constant (rtx op
, unsigned step
, unsigned copies
)
6212 machine_mode mode
= GET_MODE (op
);
6213 machine_mode inner
= GET_MODE_INNER (mode
);
6221 HOST_WIDE_INT splat_val
;
6222 HOST_WIDE_INT msb_val
;
6224 if (mode
== V2DImode
|| mode
== V2DFmode
|| mode
== V1TImode
)
6227 nunits
= GET_MODE_NUNITS (mode
);
6228 bitsize
= GET_MODE_BITSIZE (inner
);
6229 mask
= GET_MODE_MASK (inner
);
6231 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6233 msb_val
= val
>= 0 ? 0 : -1;
6235 if (val
== 0 && step
> 1)
6237 /* Special case for loading most significant bit with step > 1.
6238 In that case, match 0s in all but step-1s elements, where match
6240 for (i
= 1; i
< nunits
; ++i
)
6242 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6243 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6244 if ((i
& (step
- 1)) == step
- 1)
6246 if (!EASY_VECTOR_MSB (elt_val
, inner
))
6256 /* Construct the value to be splatted, if possible. If not, return 0. */
6257 for (i
= 2; i
<= copies
; i
*= 2)
6259 HOST_WIDE_INT small_val
;
6261 small_val
= splat_val
>> bitsize
;
6263 if (splat_val
!= ((HOST_WIDE_INT
)
6264 ((unsigned HOST_WIDE_INT
) small_val
<< bitsize
)
6265 | (small_val
& mask
)))
6267 splat_val
= small_val
;
6268 inner
= smallest_int_mode_for_size (bitsize
);
6271 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6272 if (EASY_VECTOR_15 (splat_val
))
6275 /* Also check if we can splat, and then add the result to itself. Do so if
6276 the value is positive, of if the splat instruction is using OP's mode;
6277 for splat_val < 0, the splat and the add should use the same mode. */
6278 else if (EASY_VECTOR_15_ADD_SELF (splat_val
)
6279 && (splat_val
>= 0 || (step
== 1 && copies
== 1)))
6282 /* Also check if are loading up the most significant bit which can be done by
6283 loading up -1 and shifting the value left by -1. Only do this for
6284 step 1 here, for larger steps it is done earlier. */
6285 else if (EASY_VECTOR_MSB (splat_val
, inner
) && step
== 1)
6291 /* Check if VAL is present in every STEP-th element, and the
6292 other elements are filled with its most significant bit. */
6293 for (i
= 1; i
< nunits
; ++i
)
6295 HOST_WIDE_INT desired_val
;
6296 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6297 if ((i
& (step
- 1)) == 0)
6300 desired_val
= msb_val
;
6302 if (desired_val
!= const_vector_elt_as_int (op
, elt
))
6309 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6310 instruction, filling in the bottom elements with 0 or -1.
6312 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6313 for the number of zeroes to shift in, or negative for the number of 0xff
6316 OP is a CONST_VECTOR. */
6319 vspltis_shifted (rtx op
)
6321 machine_mode mode
= GET_MODE (op
);
6322 machine_mode inner
= GET_MODE_INNER (mode
);
6330 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
)
6333 /* We need to create pseudo registers to do the shift, so don't recognize
6334 shift vector constants after reload. Don't match it even before RA
6335 after split1 is done, because there won't be further splitting pass
6336 before RA to do the splitting. */
6337 if (!can_create_pseudo_p ()
6338 || (cfun
->curr_properties
& PROP_rtl_split_insns
))
6341 nunits
= GET_MODE_NUNITS (mode
);
6342 mask
= GET_MODE_MASK (inner
);
6344 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? 0 : nunits
- 1);
6346 /* Check if the value can really be the operand of a vspltis[bhw]. */
6347 if (EASY_VECTOR_15 (val
))
6350 /* Also check if we are loading up the most significant bit which can be done
6351 by loading up -1 and shifting the value left by -1. */
6352 else if (EASY_VECTOR_MSB (val
, inner
))
6358 /* Check if VAL is present in every STEP-th element until we find elements
6359 that are 0 or all 1 bits. */
6360 for (i
= 1; i
< nunits
; ++i
)
6362 unsigned elt
= BYTES_BIG_ENDIAN
? i
: nunits
- 1 - i
;
6363 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6365 /* If the value isn't the splat value, check for the remaining elements
6371 for (j
= i
+1; j
< nunits
; ++j
)
6373 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6374 if (const_vector_elt_as_int (op
, elt2
) != 0)
6378 return (nunits
- i
) * GET_MODE_SIZE (inner
);
6381 else if ((elt_val
& mask
) == mask
)
6383 for (j
= i
+1; j
< nunits
; ++j
)
6385 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6386 if ((const_vector_elt_as_int (op
, elt2
) & mask
) != mask
)
6390 return -((nunits
- i
) * GET_MODE_SIZE (inner
));
6398 /* If all elements are equal, we don't need to do VSLDOI. */
6403 /* Return non-zero (element mode byte size) if OP is of the given MODE
6404 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6407 easy_altivec_constant (rtx op
, machine_mode mode
)
6409 unsigned step
, copies
;
6411 if (mode
== VOIDmode
)
6412 mode
= GET_MODE (op
);
6413 else if (mode
!= GET_MODE (op
))
6416 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6418 if (mode
== V2DFmode
)
6419 return zero_constant (op
, mode
) ? 8 : 0;
6421 else if (mode
== V2DImode
)
6423 if (!CONST_INT_P (CONST_VECTOR_ELT (op
, 0))
6424 || !CONST_INT_P (CONST_VECTOR_ELT (op
, 1)))
6427 if (zero_constant (op
, mode
))
6430 if (INTVAL (CONST_VECTOR_ELT (op
, 0)) == -1
6431 && INTVAL (CONST_VECTOR_ELT (op
, 1)) == -1)
6437 /* V1TImode is a special container for TImode. Ignore for now. */
6438 else if (mode
== V1TImode
)
6441 /* Start with a vspltisw. */
6442 step
= GET_MODE_NUNITS (mode
) / 4;
6445 if (vspltis_constant (op
, step
, copies
))
6448 /* Then try with a vspltish. */
6454 if (vspltis_constant (op
, step
, copies
))
6457 /* And finally a vspltisb. */
6463 if (vspltis_constant (op
, step
, copies
))
6466 if (vspltis_shifted (op
) != 0)
6467 return GET_MODE_SIZE (GET_MODE_INNER (mode
));
6472 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6473 result is OP. Abort if it is not possible. */
6476 gen_easy_altivec_constant (rtx op
)
6478 machine_mode mode
= GET_MODE (op
);
6479 int nunits
= GET_MODE_NUNITS (mode
);
6480 rtx val
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6481 unsigned step
= nunits
/ 4;
6482 unsigned copies
= 1;
6484 /* Start with a vspltisw. */
6485 if (vspltis_constant (op
, step
, copies
))
6486 return gen_rtx_VEC_DUPLICATE (V4SImode
, gen_lowpart (SImode
, val
));
6488 /* Then try with a vspltish. */
6494 if (vspltis_constant (op
, step
, copies
))
6495 return gen_rtx_VEC_DUPLICATE (V8HImode
, gen_lowpart (HImode
, val
));
6497 /* And finally a vspltisb. */
6503 if (vspltis_constant (op
, step
, copies
))
6504 return gen_rtx_VEC_DUPLICATE (V16QImode
, gen_lowpart (QImode
, val
));
6509 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6510 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6512 Return the number of instructions needed (1 or 2) into the address pointed
6515 Return the constant that is being split via CONSTANT_PTR. */
6518 xxspltib_constant_p (rtx op
,
6523 size_t nunits
= GET_MODE_NUNITS (mode
);
6525 HOST_WIDE_INT value
;
6528 /* Set the returned values to out of bound values. */
6529 *num_insns_ptr
= -1;
6530 *constant_ptr
= 256;
6532 if (!TARGET_P9_VECTOR
)
6535 if (mode
== VOIDmode
)
6536 mode
= GET_MODE (op
);
6538 else if (mode
!= GET_MODE (op
) && GET_MODE (op
) != VOIDmode
)
6541 /* Handle (vec_duplicate <constant>). */
6542 if (GET_CODE (op
) == VEC_DUPLICATE
)
6544 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6545 && mode
!= V2DImode
)
6548 element
= XEXP (op
, 0);
6549 if (!CONST_INT_P (element
))
6552 value
= INTVAL (element
);
6553 if (!IN_RANGE (value
, -128, 127))
6557 /* Handle (const_vector [...]). */
6558 else if (GET_CODE (op
) == CONST_VECTOR
)
6560 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6561 && mode
!= V2DImode
)
6564 element
= CONST_VECTOR_ELT (op
, 0);
6565 if (!CONST_INT_P (element
))
6568 value
= INTVAL (element
);
6569 if (!IN_RANGE (value
, -128, 127))
6572 for (i
= 1; i
< nunits
; i
++)
6574 element
= CONST_VECTOR_ELT (op
, i
);
6575 if (!CONST_INT_P (element
))
6578 if (value
!= INTVAL (element
))
6583 /* Handle integer constants being loaded into the upper part of the VSX
6584 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6585 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6586 else if (CONST_INT_P (op
))
6588 if (!SCALAR_INT_MODE_P (mode
))
6591 value
= INTVAL (op
);
6592 if (!IN_RANGE (value
, -128, 127))
6595 if (!IN_RANGE (value
, -1, 0))
6597 if (!(reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
))
6600 if (EASY_VECTOR_15 (value
))
6608 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6609 sign extend. Special case 0/-1 to allow getting any VSX register instead
6610 of an Altivec register. */
6611 if ((mode
== V4SImode
|| mode
== V8HImode
) && !IN_RANGE (value
, -1, 0)
6612 && EASY_VECTOR_15 (value
))
6615 /* Return # of instructions and the constant byte for XXSPLTIB. */
6616 if (mode
== V16QImode
)
6619 else if (IN_RANGE (value
, -1, 0))
6622 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6623 single XXSPLTIW or XXSPLTIDP instruction. */
6624 else if (vsx_prefixed_constant (op
, mode
))
6627 /* Return XXSPLITB followed by a sign extend operation to convert the
6628 constant to V8HImode or V4SImode. */
6632 *constant_ptr
= (int) value
;
6636 /* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
6637 instructions vupkhsw and vspltisw.
6639 Return the constant that is being split via CONSTANT_PTR. */
6642 vspltisw_vupkhsw_constant_p (rtx op
, machine_mode mode
, int *constant_ptr
)
6644 HOST_WIDE_INT value
;
6647 if (!TARGET_P8_VECTOR
)
6650 if (mode
!= V2DImode
)
6653 if (!const_vec_duplicate_p (op
, &elt
))
6656 value
= INTVAL (elt
);
6657 if (value
== 0 || value
== 1
6658 || !EASY_VECTOR_15 (value
))
6662 *constant_ptr
= (int) value
;
6667 output_vec_const_move (rtx
*operands
)
6675 mode
= GET_MODE (dest
);
6679 bool dest_vmx_p
= ALTIVEC_REGNO_P (REGNO (dest
));
6680 int xxspltib_value
= 256;
6683 if (zero_constant (vec
, mode
))
6685 if (TARGET_P9_VECTOR
)
6686 return "xxspltib %x0,0";
6688 else if (dest_vmx_p
)
6689 return "vspltisw %0,0";
6692 return "xxlxor %x0,%x0,%x0";
6695 if (all_ones_constant (vec
, mode
))
6697 if (TARGET_P9_VECTOR
)
6698 return "xxspltib %x0,255";
6700 else if (dest_vmx_p
)
6701 return "vspltisw %0,-1";
6703 else if (TARGET_P8_VECTOR
)
6704 return "xxlorc %x0,%x0,%x0";
6710 vec_const_128bit_type vsx_const
;
6711 if (TARGET_POWER10
&& vec_const_128bit_to_bytes (vec
, mode
, &vsx_const
))
6713 unsigned imm
= constant_generates_lxvkq (&vsx_const
);
6716 operands
[2] = GEN_INT (imm
);
6717 return "lxvkq %x0,%2";
6720 imm
= constant_generates_xxspltiw (&vsx_const
);
6723 operands
[2] = GEN_INT (imm
);
6724 return "xxspltiw %x0,%2";
6727 imm
= constant_generates_xxspltidp (&vsx_const
);
6730 operands
[2] = GEN_INT (imm
);
6731 return "xxspltidp %x0,%2";
6735 if (TARGET_P9_VECTOR
6736 && xxspltib_constant_p (vec
, mode
, &num_insns
, &xxspltib_value
))
6740 operands
[2] = GEN_INT (xxspltib_value
& 0xff);
6741 return "xxspltib %x0,%2";
6752 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest
)));
6753 if (zero_constant (vec
, mode
))
6754 return "vspltisw %0,0";
6756 if (all_ones_constant (vec
, mode
))
6757 return "vspltisw %0,-1";
6759 /* Do we need to construct a value using VSLDOI? */
6760 shift
= vspltis_shifted (vec
);
6764 splat_vec
= gen_easy_altivec_constant (vec
);
6765 gcc_assert (GET_CODE (splat_vec
) == VEC_DUPLICATE
);
6766 operands
[1] = XEXP (splat_vec
, 0);
6767 if (!EASY_VECTOR_15 (INTVAL (operands
[1])))
6770 switch (GET_MODE (splat_vec
))
6773 return "vspltisw %0,%1";
6776 return "vspltish %0,%1";
6779 return "vspltisb %0,%1";
6789 /* Initialize vector TARGET to VALS. */
6792 rs6000_expand_vector_init (rtx target
, rtx vals
)
6794 machine_mode mode
= GET_MODE (target
);
6795 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6796 unsigned int n_elts
= GET_MODE_NUNITS (mode
);
6797 int n_var
= 0, one_var
= -1;
6798 bool all_same
= true, all_const_zero
= true;
6802 for (i
= 0; i
< n_elts
; ++i
)
6804 x
= XVECEXP (vals
, 0, i
);
6805 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
6806 ++n_var
, one_var
= i
;
6807 else if (x
!= CONST0_RTX (inner_mode
))
6808 all_const_zero
= false;
6810 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6816 rtx const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6817 bool int_vector_p
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
6818 if ((int_vector_p
|| TARGET_VSX
) && all_const_zero
)
6820 /* Zero register. */
6821 emit_move_insn (target
, CONST0_RTX (mode
));
6824 else if (int_vector_p
&& easy_vector_constant (const_vec
, mode
))
6826 /* Splat immediate. */
6827 emit_insn (gen_rtx_SET (target
, const_vec
));
6832 /* Load from constant pool. */
6833 emit_move_insn (target
, const_vec
);
6838 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6839 if (VECTOR_MEM_VSX_P (mode
) && (mode
== V2DFmode
|| mode
== V2DImode
))
6843 size_t num_elements
= all_same
? 1 : 2;
6844 for (i
= 0; i
< num_elements
; i
++)
6846 op
[i
] = XVECEXP (vals
, 0, i
);
6847 /* Just in case there is a SUBREG with a smaller mode, do a
6849 if (GET_MODE (op
[i
]) != inner_mode
)
6851 rtx tmp
= gen_reg_rtx (inner_mode
);
6852 convert_move (tmp
, op
[i
], 0);
6855 /* Allow load with splat double word. */
6856 else if (MEM_P (op
[i
]))
6859 op
[i
] = force_reg (inner_mode
, op
[i
]);
6861 else if (!REG_P (op
[i
]))
6862 op
[i
] = force_reg (inner_mode
, op
[i
]);
6867 if (mode
== V2DFmode
)
6868 emit_insn (gen_vsx_splat_v2df (target
, op
[0]));
6870 emit_insn (gen_vsx_splat_v2di (target
, op
[0]));
6874 if (mode
== V2DFmode
)
6875 emit_insn (gen_vsx_concat_v2df (target
, op
[0], op
[1]));
6877 emit_insn (gen_vsx_concat_v2di (target
, op
[0], op
[1]));
6882 /* Special case initializing vector int if we are on 64-bit systems with
6883 direct move or we have the ISA 3.0 instructions. */
6884 if (mode
== V4SImode
&& VECTOR_MEM_VSX_P (V4SImode
)
6885 && TARGET_DIRECT_MOVE_64BIT
)
6889 rtx element0
= XVECEXP (vals
, 0, 0);
6890 if (MEM_P (element0
))
6891 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6893 element0
= force_reg (SImode
, element0
);
6895 if (TARGET_P9_VECTOR
)
6896 emit_insn (gen_vsx_splat_v4si (target
, element0
));
6899 rtx tmp
= gen_reg_rtx (DImode
);
6900 emit_insn (gen_zero_extendsidi2 (tmp
, element0
));
6901 emit_insn (gen_vsx_splat_v4si_di (target
, tmp
));
6910 for (i
= 0; i
< 4; i
++)
6911 elements
[i
] = force_reg (SImode
, XVECEXP (vals
, 0, i
));
6913 emit_insn (gen_vsx_init_v4si (target
, elements
[0], elements
[1],
6914 elements
[2], elements
[3]));
6919 /* With single precision floating point on VSX, know that internally single
6920 precision is actually represented as a double, and either make 2 V2DF
6921 vectors, and convert these vectors to single precision, or do one
6922 conversion, and splat the result to the other elements. */
6923 if (mode
== V4SFmode
&& VECTOR_MEM_VSX_P (V4SFmode
))
6927 rtx element0
= XVECEXP (vals
, 0, 0);
6929 if (TARGET_P9_VECTOR
)
6931 if (MEM_P (element0
))
6932 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6934 emit_insn (gen_vsx_splat_v4sf (target
, element0
));
6939 rtx freg
= gen_reg_rtx (V4SFmode
);
6940 rtx sreg
= force_reg (SFmode
, element0
);
6941 rtx cvt
= (TARGET_XSCVDPSPN
6942 ? gen_vsx_xscvdpspn_scalar (freg
, sreg
)
6943 : gen_vsx_xscvdpsp_scalar (freg
, sreg
));
6946 emit_insn (gen_vsx_xxspltw_v4sf_direct (target
, freg
,
6952 if (TARGET_P8_VECTOR
&& TARGET_POWERPC64
)
6958 for (i
= 0; i
< 4; i
++)
6960 tmp_si
[i
] = gen_reg_rtx (SImode
);
6961 tmp_di
[i
] = gen_reg_rtx (DImode
);
6962 mrg_di
[i
] = gen_reg_rtx (DImode
);
6963 tmp_sf
[i
] = force_reg (SFmode
, XVECEXP (vals
, 0, i
));
6964 emit_insn (gen_movsi_from_sf (tmp_si
[i
], tmp_sf
[i
]));
6965 emit_insn (gen_zero_extendsidi2 (tmp_di
[i
], tmp_si
[i
]));
6968 if (!BYTES_BIG_ENDIAN
)
6970 std::swap (tmp_di
[0], tmp_di
[1]);
6971 std::swap (tmp_di
[2], tmp_di
[3]);
6974 emit_insn (gen_ashldi3 (mrg_di
[0], tmp_di
[0], GEN_INT (32)));
6975 emit_insn (gen_iordi3 (mrg_di
[1], mrg_di
[0], tmp_di
[1]));
6976 emit_insn (gen_ashldi3 (mrg_di
[2], tmp_di
[2], GEN_INT (32)));
6977 emit_insn (gen_iordi3 (mrg_di
[3], mrg_di
[2], tmp_di
[3]));
6979 rtx tmp_v2di
= gen_reg_rtx (V2DImode
);
6980 emit_insn (gen_vsx_concat_v2di (tmp_v2di
, mrg_di
[1], mrg_di
[3]));
6981 emit_move_insn (target
, gen_lowpart (V4SFmode
, tmp_v2di
));
6985 rtx dbl_even
= gen_reg_rtx (V2DFmode
);
6986 rtx dbl_odd
= gen_reg_rtx (V2DFmode
);
6987 rtx flt_even
= gen_reg_rtx (V4SFmode
);
6988 rtx flt_odd
= gen_reg_rtx (V4SFmode
);
6989 rtx op0
= force_reg (SFmode
, XVECEXP (vals
, 0, 0));
6990 rtx op1
= force_reg (SFmode
, XVECEXP (vals
, 0, 1));
6991 rtx op2
= force_reg (SFmode
, XVECEXP (vals
, 0, 2));
6992 rtx op3
= force_reg (SFmode
, XVECEXP (vals
, 0, 3));
6994 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op1
));
6995 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op2
, op3
));
6996 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
6997 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
6998 rs6000_expand_extract_even (target
, flt_even
, flt_odd
);
7004 /* Special case initializing vector short/char that are splats if we are on
7005 64-bit systems with direct move. */
7006 if (all_same
&& TARGET_DIRECT_MOVE_64BIT
7007 && (mode
== V16QImode
|| mode
== V8HImode
))
7009 rtx op0
= XVECEXP (vals
, 0, 0);
7010 rtx di_tmp
= gen_reg_rtx (DImode
);
7013 op0
= force_reg (GET_MODE_INNER (mode
), op0
);
7015 if (mode
== V16QImode
)
7017 emit_insn (gen_zero_extendqidi2 (di_tmp
, op0
));
7018 emit_insn (gen_vsx_vspltb_di (target
, di_tmp
));
7022 if (mode
== V8HImode
)
7024 emit_insn (gen_zero_extendhidi2 (di_tmp
, op0
));
7025 emit_insn (gen_vsx_vsplth_di (target
, di_tmp
));
7030 /* Store value to stack temp. Load vector element. Splat. However, splat
7031 of 64-bit items is not supported on Altivec. */
7032 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
7034 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7035 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0),
7036 XVECEXP (vals
, 0, 0));
7037 x
= gen_rtx_UNSPEC (VOIDmode
,
7038 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7039 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7041 gen_rtx_SET (target
, mem
),
7043 x
= gen_rtx_VEC_SELECT (inner_mode
, target
,
7044 gen_rtx_PARALLEL (VOIDmode
,
7045 gen_rtvec (1, const0_rtx
)));
7046 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
7050 /* One field is non-constant. Load constant then overwrite
7054 rtx copy
= copy_rtx (vals
);
7056 /* Load constant part of vector, substitute neighboring value for
7058 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
7059 rs6000_expand_vector_init (target
, copy
);
7061 /* Insert variable. */
7062 rs6000_expand_vector_set (target
, XVECEXP (vals
, 0, one_var
),
7067 if (TARGET_DIRECT_MOVE
&& (mode
== V16QImode
|| mode
== V8HImode
))
7070 /* Force the values into word_mode registers. */
7071 for (i
= 0; i
< n_elts
; i
++)
7073 rtx tmp
= force_reg (inner_mode
, XVECEXP (vals
, 0, i
));
7074 machine_mode tmode
= TARGET_POWERPC64
? DImode
: SImode
;
7075 op
[i
] = simplify_gen_subreg (tmode
, tmp
, inner_mode
, 0);
7078 /* Take unsigned char big endianness on 64bit as example for below
7079 construction, the input values are: A, B, C, D, ..., O, P. */
7081 if (TARGET_DIRECT_MOVE_128
)
7083 /* Move to VSX register with vec_concat, each has 2 values.
7084 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
7085 vr1[1] = { xxxxxxxC, xxxxxxxD };
7087 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
7089 for (i
= 0; i
< n_elts
/ 2; i
++)
7091 vr1
[i
] = gen_reg_rtx (V2DImode
);
7092 emit_insn (gen_vsx_concat_v2di (vr1
[i
], op
[i
* 2],
7096 /* Pack vectors with 2 values into vectors with 4 values.
7097 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
7098 vr2[1] = { xxxExxxF, xxxGxxxH };
7099 vr2[1] = { xxxIxxxJ, xxxKxxxL };
7100 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
7102 for (i
= 0; i
< n_elts
/ 4; i
++)
7104 vr2
[i
] = gen_reg_rtx (V4SImode
);
7105 emit_insn (gen_altivec_vpkudum (vr2
[i
], vr1
[i
* 2],
7109 /* Pack vectors with 4 values into vectors with 8 values.
7110 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7111 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7113 for (i
= 0; i
< n_elts
/ 8; i
++)
7115 vr3
[i
] = gen_reg_rtx (V8HImode
);
7116 emit_insn (gen_altivec_vpkuwum (vr3
[i
], vr2
[i
* 2],
7120 /* If it's V8HImode, it's done and return it. */
7121 if (mode
== V8HImode
)
7123 emit_insn (gen_rtx_SET (target
, vr3
[0]));
7127 /* Pack vectors with 8 values into 16 values. */
7128 rtx res
= gen_reg_rtx (V16QImode
);
7129 emit_insn (gen_altivec_vpkuhum (res
, vr3
[0], vr3
[1]));
7130 emit_insn (gen_rtx_SET (target
, res
));
7134 rtx (*merge_v16qi
) (rtx
, rtx
, rtx
) = NULL
;
7135 rtx (*merge_v8hi
) (rtx
, rtx
, rtx
) = NULL
;
7136 rtx (*merge_v4si
) (rtx
, rtx
, rtx
) = NULL
;
7139 /* Set up some common gen routines and values. */
7140 if (BYTES_BIG_ENDIAN
)
7142 if (mode
== V16QImode
)
7144 merge_v16qi
= gen_altivec_vmrghb
;
7145 merge_v8hi
= gen_altivec_vmrglh
;
7148 merge_v8hi
= gen_altivec_vmrghh
;
7150 merge_v4si
= gen_altivec_vmrglw
;
7151 perm_idx
= GEN_INT (3);
7155 if (mode
== V16QImode
)
7157 merge_v16qi
= gen_altivec_vmrglb
;
7158 merge_v8hi
= gen_altivec_vmrghh
;
7161 merge_v8hi
= gen_altivec_vmrglh
;
7163 merge_v4si
= gen_altivec_vmrghw
;
7164 perm_idx
= GEN_INT (0);
7167 /* Move to VSX register with direct move.
7168 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7169 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7171 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7173 for (i
= 0; i
< n_elts
; i
++)
7175 vr_qi
[i
] = gen_reg_rtx (V16QImode
);
7176 if (TARGET_POWERPC64
)
7177 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi
[i
], op
[i
]));
7179 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi
[i
], op
[i
]));
7182 /* Merge/move to vector short.
7183 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7184 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7186 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7188 for (i
= 0; i
< 8; i
++)
7191 if (mode
== V16QImode
)
7193 tmp
= gen_reg_rtx (V16QImode
);
7194 emit_insn (merge_v16qi (tmp
, vr_qi
[2 * i
], vr_qi
[2 * i
+ 1]));
7196 vr_hi
[i
] = gen_reg_rtx (V8HImode
);
7197 emit_move_insn (vr_hi
[i
], gen_lowpart (V8HImode
, tmp
));
7200 /* Merge vector short to vector int.
7201 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7202 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7204 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7206 for (i
= 0; i
< 4; i
++)
7208 rtx tmp
= gen_reg_rtx (V8HImode
);
7209 emit_insn (merge_v8hi (tmp
, vr_hi
[2 * i
], vr_hi
[2 * i
+ 1]));
7210 vr_si
[i
] = gen_reg_rtx (V4SImode
);
7211 emit_move_insn (vr_si
[i
], gen_lowpart (V4SImode
, tmp
));
7214 /* Merge vector int to vector long.
7215 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7216 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7218 for (i
= 0; i
< 2; i
++)
7220 rtx tmp
= gen_reg_rtx (V4SImode
);
7221 emit_insn (merge_v4si (tmp
, vr_si
[2 * i
], vr_si
[2 * i
+ 1]));
7222 vr_di
[i
] = gen_reg_rtx (V2DImode
);
7223 emit_move_insn (vr_di
[i
], gen_lowpart (V2DImode
, tmp
));
7226 rtx res
= gen_reg_rtx (V2DImode
);
7227 emit_insn (gen_vsx_xxpermdi_v2di (res
, vr_di
[0], vr_di
[1], perm_idx
));
7228 emit_insn (gen_rtx_SET (target
, gen_lowpart (mode
, res
)));
7234 /* Construct the vector in memory one field at a time
7235 and load the whole vector. */
7236 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7237 for (i
= 0; i
< n_elts
; i
++)
7238 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7239 i
* GET_MODE_SIZE (inner_mode
)),
7240 XVECEXP (vals
, 0, i
));
7241 emit_move_insn (target
, mem
);
7244 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7245 is variable and also counts by vector element size for p9 and above. */
7248 rs6000_expand_vector_set_var_p9 (rtx target
, rtx val
, rtx idx
)
7250 machine_mode mode
= GET_MODE (target
);
7252 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7254 machine_mode inner_mode
= GET_MODE (val
);
7256 int width
= GET_MODE_SIZE (inner_mode
);
7258 gcc_assert (width
>= 1 && width
<= 8);
7260 int shift
= exact_log2 (width
);
7262 machine_mode idx_mode
= GET_MODE (idx
);
7264 machine_mode shift_mode
;
7265 /* Gen function pointers for shifting left and generation of permutation
7267 rtx (*gen_ashl
) (rtx
, rtx
, rtx
);
7268 rtx (*gen_pcvr1
) (rtx
, rtx
);
7269 rtx (*gen_pcvr2
) (rtx
, rtx
);
7271 if (TARGET_POWERPC64
)
7273 shift_mode
= DImode
;
7274 gen_ashl
= gen_ashldi3
;
7275 gen_pcvr1
= BYTES_BIG_ENDIAN
? gen_altivec_lvsl_reg_di
7276 : gen_altivec_lvsr_reg_di
;
7277 gen_pcvr2
= BYTES_BIG_ENDIAN
? gen_altivec_lvsr_reg_di
7278 : gen_altivec_lvsl_reg_di
;
7282 shift_mode
= SImode
;
7283 gen_ashl
= gen_ashlsi3
;
7284 gen_pcvr1
= BYTES_BIG_ENDIAN
? gen_altivec_lvsl_reg_si
7285 : gen_altivec_lvsr_reg_si
;
7286 gen_pcvr2
= BYTES_BIG_ENDIAN
? gen_altivec_lvsr_reg_si
7287 : gen_altivec_lvsl_reg_si
;
7289 /* Generate the IDX for permute shift, width is the vector element size.
7290 idx = idx * width. */
7291 rtx tmp
= gen_reg_rtx (shift_mode
);
7292 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7294 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7296 /* Generate one permutation control vector used for rotating the element
7297 at to-insert position to element zero in target vector. lvsl is
7298 used for big endianness while lvsr is used for little endianness:
7299 lvs[lr] v1,0,idx. */
7300 rtx pcvr1
= gen_reg_rtx (V16QImode
);
7301 emit_insn (gen_pcvr1 (pcvr1
, tmp
));
7303 rtx sub_target
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7304 rtx perm1
= gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
,
7308 /* Insert val into element 0 of target vector. */
7309 rs6000_expand_vector_set (target
, val
, const0_rtx
);
7311 /* Rotate back with a reversed permutation control vector generated from:
7312 lvs[rl] v2,0,idx. */
7313 rtx pcvr2
= gen_reg_rtx (V16QImode
);
7314 emit_insn (gen_pcvr2 (pcvr2
, tmp
));
7316 rtx perm2
= gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
,
7321 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7322 is variable and also counts by vector element size for p7 & p8. */
7325 rs6000_expand_vector_set_var_p7 (rtx target
, rtx val
, rtx idx
)
7327 machine_mode mode
= GET_MODE (target
);
7329 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7331 machine_mode inner_mode
= GET_MODE (val
);
7332 HOST_WIDE_INT mode_mask
= GET_MODE_MASK (inner_mode
);
7334 int width
= GET_MODE_SIZE (inner_mode
);
7335 gcc_assert (width
>= 1 && width
<= 4);
7337 int shift
= exact_log2 (width
);
7339 machine_mode idx_mode
= GET_MODE (idx
);
7341 machine_mode shift_mode
;
7342 rtx (*gen_ashl
)(rtx
, rtx
, rtx
);
7343 rtx (*gen_add
)(rtx
, rtx
, rtx
);
7344 rtx (*gen_sub
)(rtx
, rtx
, rtx
);
7345 rtx (*gen_lvsl
)(rtx
, rtx
);
7347 if (TARGET_POWERPC64
)
7349 shift_mode
= DImode
;
7350 gen_ashl
= gen_ashldi3
;
7351 gen_add
= gen_adddi3
;
7352 gen_sub
= gen_subdi3
;
7353 gen_lvsl
= gen_altivec_lvsl_reg_di
;
7357 shift_mode
= SImode
;
7358 gen_ashl
= gen_ashlsi3
;
7359 gen_add
= gen_addsi3
;
7360 gen_sub
= gen_subsi3
;
7361 gen_lvsl
= gen_altivec_lvsl_reg_si
;
7364 /* idx = idx * width. */
7365 rtx tmp
= gen_reg_rtx (shift_mode
);
7366 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7368 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7370 /* For LE: idx = idx + 8. */
7371 if (!BYTES_BIG_ENDIAN
)
7372 emit_insn (gen_add (tmp
, tmp
, GEN_INT (8)));
7374 emit_insn (gen_sub (tmp
, GEN_INT (24 - width
), tmp
));
7377 DImode: 0xffffffffffffffff0000000000000000
7378 SImode: 0x00000000ffffffff0000000000000000
7379 HImode: 0x000000000000ffff0000000000000000.
7380 QImode: 0x00000000000000ff0000000000000000. */
7381 rtx mask
= gen_reg_rtx (V16QImode
);
7382 rtx mask_v2di
= gen_reg_rtx (V2DImode
);
7383 rtvec v
= rtvec_alloc (2);
7384 if (!BYTES_BIG_ENDIAN
)
7386 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7387 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7391 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7392 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7394 emit_insn (gen_vec_initv2didi (mask_v2di
, gen_rtx_PARALLEL (V2DImode
, v
)));
7395 rtx sub_mask
= simplify_gen_subreg (V16QImode
, mask_v2di
, V2DImode
, 0);
7396 emit_insn (gen_rtx_SET (mask
, sub_mask
));
7398 /* mtvsrd[wz] f0,tmp_val. */
7399 rtx tmp_val
= gen_reg_rtx (SImode
);
7400 if (inner_mode
== E_SFmode
)
7401 if (TARGET_DIRECT_MOVE_64BIT
)
7402 emit_insn (gen_movsi_from_sf (tmp_val
, val
));
7405 rtx stack
= rs6000_allocate_stack_temp (SFmode
, false, true);
7406 emit_insn (gen_movsf_hardfloat (stack
, val
));
7407 rtx stack2
= copy_rtx (stack
);
7408 PUT_MODE (stack2
, SImode
);
7409 emit_move_insn (tmp_val
, stack2
);
7412 tmp_val
= force_reg (SImode
, val
);
7414 rtx val_v16qi
= gen_reg_rtx (V16QImode
);
7415 rtx val_v2di
= gen_reg_rtx (V2DImode
);
7416 rtvec vec_val
= rtvec_alloc (2);
7417 if (!BYTES_BIG_ENDIAN
)
7419 RTVEC_ELT (vec_val
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7420 RTVEC_ELT (vec_val
, 1) = tmp_val
;
7424 RTVEC_ELT (vec_val
, 0) = tmp_val
;
7425 RTVEC_ELT (vec_val
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7428 gen_vec_initv2didi (val_v2di
, gen_rtx_PARALLEL (V2DImode
, vec_val
)));
7429 rtx sub_val
= simplify_gen_subreg (V16QImode
, val_v2di
, V2DImode
, 0);
7430 emit_insn (gen_rtx_SET (val_v16qi
, sub_val
));
7432 /* lvsl 13,0,idx. */
7433 rtx pcv
= gen_reg_rtx (V16QImode
);
7434 emit_insn (gen_lvsl (pcv
, tmp
));
7436 /* vperm 1,1,1,13. */
7437 /* vperm 0,0,0,13. */
7438 rtx val_perm
= gen_reg_rtx (V16QImode
);
7439 rtx mask_perm
= gen_reg_rtx (V16QImode
);
7440 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm
, val_v16qi
, val_v16qi
, pcv
));
7441 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm
, mask
, mask
, pcv
));
7443 rtx target_v16qi
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7445 /* xxsel 34,34,32,33. */
7447 gen_vector_select_v16qi (target_v16qi
, target_v16qi
, val_perm
, mask_perm
));
7450 /* Set field ELT_RTX of TARGET to VAL. */
7453 rs6000_expand_vector_set (rtx target
, rtx val
, rtx elt_rtx
)
7455 machine_mode mode
= GET_MODE (target
);
7456 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7457 rtx reg
= gen_reg_rtx (mode
);
7459 int width
= GET_MODE_SIZE (inner_mode
);
7462 val
= force_reg (GET_MODE (val
), val
);
7464 if (VECTOR_MEM_VSX_P (mode
))
7466 if (!CONST_INT_P (elt_rtx
))
7468 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7469 when elt_rtx is variable. */
7470 if ((TARGET_P9_VECTOR
&& TARGET_POWERPC64
) || width
== 8)
7472 rs6000_expand_vector_set_var_p9 (target
, val
, elt_rtx
);
7475 else if (TARGET_VSX
)
7477 rs6000_expand_vector_set_var_p7 (target
, val
, elt_rtx
);
7481 gcc_assert (CONST_INT_P (elt_rtx
));
7484 rtx insn
= NULL_RTX
;
7486 if (mode
== V2DFmode
)
7487 insn
= gen_vsx_set_v2df (target
, target
, val
, elt_rtx
);
7489 else if (mode
== V2DImode
)
7490 insn
= gen_vsx_set_v2di (target
, target
, val
, elt_rtx
);
7492 else if (TARGET_P9_VECTOR
&& TARGET_POWERPC64
)
7494 if (mode
== V4SImode
)
7495 insn
= gen_vsx_set_v4si_p9 (target
, target
, val
, elt_rtx
);
7496 else if (mode
== V8HImode
)
7497 insn
= gen_vsx_set_v8hi_p9 (target
, target
, val
, elt_rtx
);
7498 else if (mode
== V16QImode
)
7499 insn
= gen_vsx_set_v16qi_p9 (target
, target
, val
, elt_rtx
);
7500 else if (mode
== V4SFmode
)
7501 insn
= gen_vsx_set_v4sf_p9 (target
, target
, val
, elt_rtx
);
7511 /* Simplify setting single element vectors like V1TImode. */
7512 if (GET_MODE_SIZE (mode
) == GET_MODE_SIZE (inner_mode
)
7513 && INTVAL (elt_rtx
) == 0)
7515 emit_move_insn (target
, gen_lowpart (mode
, val
));
7519 /* Load single variable value. */
7520 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7521 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0), val
);
7522 x
= gen_rtx_UNSPEC (VOIDmode
,
7523 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7524 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7526 gen_rtx_SET (reg
, mem
),
7529 /* Linear sequence. */
7530 mask
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
7531 for (i
= 0; i
< 16; ++i
)
7532 XVECEXP (mask
, 0, i
) = GEN_INT (i
);
7534 /* Set permute mask to insert element into target. */
7535 for (i
= 0; i
< width
; ++i
)
7536 XVECEXP (mask
, 0, INTVAL (elt_rtx
) * width
+ i
) = GEN_INT (i
+ 0x10);
7537 x
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (mask
, 0));
7539 if (BYTES_BIG_ENDIAN
)
7540 x
= gen_rtx_UNSPEC (mode
,
7541 gen_rtvec (3, target
, reg
,
7542 force_reg (V16QImode
, x
)),
7546 if (TARGET_P9_VECTOR
)
7547 x
= gen_rtx_UNSPEC (mode
,
7548 gen_rtvec (3, reg
, target
,
7549 force_reg (V16QImode
, x
)),
7553 /* Invert selector. We prefer to generate VNAND on P8 so
7554 that future fusion opportunities can kick in, but must
7555 generate VNOR elsewhere. */
7556 rtx notx
= gen_rtx_NOT (V16QImode
, force_reg (V16QImode
, x
));
7557 rtx iorx
= (TARGET_P8_VECTOR
7558 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
7559 : gen_rtx_AND (V16QImode
, notx
, notx
));
7560 rtx tmp
= gen_reg_rtx (V16QImode
);
7561 emit_insn (gen_rtx_SET (tmp
, iorx
));
7563 /* Permute with operands reversed and adjusted selector. */
7564 x
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, reg
, target
, tmp
),
7569 emit_insn (gen_rtx_SET (target
, x
));
7572 /* Extract field ELT from VEC into TARGET. */
7575 rs6000_expand_vector_extract (rtx target
, rtx vec
, rtx elt
)
7577 machine_mode mode
= GET_MODE (vec
);
7578 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7581 if (VECTOR_MEM_VSX_P (mode
) && CONST_INT_P (elt
))
7588 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7591 emit_insn (gen_vsx_extract_v2df (target
, vec
, elt
));
7594 emit_insn (gen_vsx_extract_v2di (target
, vec
, elt
));
7597 emit_insn (gen_vsx_extract_v4sf (target
, vec
, elt
));
7600 if (TARGET_DIRECT_MOVE_64BIT
)
7602 emit_insn (gen_vsx_extract_v16qi (target
, vec
, elt
));
7608 if (TARGET_DIRECT_MOVE_64BIT
)
7610 emit_insn (gen_vsx_extract_v8hi (target
, vec
, elt
));
7616 if (TARGET_DIRECT_MOVE_64BIT
)
7618 emit_insn (gen_vsx_extract_v4si (target
, vec
, elt
));
7624 else if (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (elt
)
7625 && TARGET_DIRECT_MOVE_64BIT
)
7627 if (GET_MODE (elt
) != DImode
)
7629 rtx tmp
= gen_reg_rtx (DImode
);
7630 convert_move (tmp
, elt
, 0);
7633 else if (!REG_P (elt
))
7634 elt
= force_reg (DImode
, elt
);
7639 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7643 emit_insn (gen_vsx_extract_v2df_var (target
, vec
, elt
));
7647 emit_insn (gen_vsx_extract_v2di_var (target
, vec
, elt
));
7651 emit_insn (gen_vsx_extract_v4sf_var (target
, vec
, elt
));
7655 emit_insn (gen_vsx_extract_v4si_var (target
, vec
, elt
));
7659 emit_insn (gen_vsx_extract_v8hi_var (target
, vec
, elt
));
7663 emit_insn (gen_vsx_extract_v16qi_var (target
, vec
, elt
));
7671 /* Allocate mode-sized buffer. */
7672 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7674 emit_move_insn (mem
, vec
);
7675 if (CONST_INT_P (elt
))
7677 int modulo_elt
= INTVAL (elt
) % GET_MODE_NUNITS (mode
);
7679 /* Add offset to field within buffer matching vector element. */
7680 mem
= adjust_address_nv (mem
, inner_mode
,
7681 modulo_elt
* GET_MODE_SIZE (inner_mode
));
7682 emit_move_insn (target
, adjust_address_nv (mem
, inner_mode
, 0));
7686 unsigned int ele_size
= GET_MODE_SIZE (inner_mode
);
7687 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (mode
) - 1);
7689 elt
= gen_rtx_AND (Pmode
, elt
, num_ele_m1
);
7691 elt
= gen_rtx_MULT (Pmode
, elt
, GEN_INT (ele_size
));
7692 rtx new_addr
= gen_rtx_PLUS (Pmode
, XEXP (mem
, 0), elt
);
7693 new_addr
= change_address (mem
, inner_mode
, new_addr
);
7694 emit_move_insn (target
, new_addr
);
7698 /* Return the offset within a memory object (MEM) of a vector type to a given
7699 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7700 the element is constant, we return a constant integer.
7702 Otherwise, we use a base register temporary to calculate the offset after
7703 masking it to fit within the bounds of the vector and scaling it. The
7704 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7705 built-in function. */
7708 get_vector_offset (rtx mem
, rtx element
, rtx base_tmp
, unsigned scalar_size
)
7710 if (CONST_INT_P (element
))
7711 return GEN_INT (INTVAL (element
) * scalar_size
);
7713 /* All insns should use the 'Q' constraint (address is a single register) if
7714 the element number is not a constant. */
7715 gcc_assert (satisfies_constraint_Q (mem
));
7717 /* Mask the element to make sure the element number is between 0 and the
7718 maximum number of elements - 1 so that we don't generate an address
7719 outside the vector. */
7720 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (GET_MODE (mem
)) - 1);
7721 rtx and_op
= gen_rtx_AND (Pmode
, element
, num_ele_m1
);
7722 emit_insn (gen_rtx_SET (base_tmp
, and_op
));
7724 /* Shift the element to get the byte offset from the element number. */
7725 int shift
= exact_log2 (scalar_size
);
7726 gcc_assert (shift
>= 0);
7730 rtx shift_op
= gen_rtx_ASHIFT (Pmode
, base_tmp
, GEN_INT (shift
));
7731 emit_insn (gen_rtx_SET (base_tmp
, shift_op
));
7737 /* Helper function update PC-relative addresses when we are adjusting a memory
7738 address (ADDR) to a vector to point to a scalar field within the vector with
7739 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7740 use the base register temporary (BASE_TMP) to form the address. */
7743 adjust_vec_address_pcrel (rtx addr
, rtx element_offset
, rtx base_tmp
)
7745 rtx new_addr
= NULL
;
7747 gcc_assert (CONST_INT_P (element_offset
));
7749 if (GET_CODE (addr
) == CONST
)
7750 addr
= XEXP (addr
, 0);
7752 if (GET_CODE (addr
) == PLUS
)
7754 rtx op0
= XEXP (addr
, 0);
7755 rtx op1
= XEXP (addr
, 1);
7757 if (CONST_INT_P (op1
))
7759 HOST_WIDE_INT offset
7760 = INTVAL (XEXP (addr
, 1)) + INTVAL (element_offset
);
7767 rtx plus
= gen_rtx_PLUS (Pmode
, op0
, GEN_INT (offset
));
7768 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7774 emit_move_insn (base_tmp
, addr
);
7775 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7779 else if (SYMBOL_REF_P (addr
) || LABEL_REF_P (addr
))
7781 rtx plus
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7782 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7791 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7792 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7793 temporary (BASE_TMP) to fixup the address. Return the new memory address
7794 that is valid for reads or writes to a given register (SCALAR_REG).
7796 This function is expected to be called after reload is completed when we are
7797 splitting insns. The temporary BASE_TMP might be set multiple times with
7801 rs6000_adjust_vec_address (rtx scalar_reg
,
7805 machine_mode scalar_mode
)
7807 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7808 rtx addr
= XEXP (mem
, 0);
7811 gcc_assert (!reg_mentioned_p (base_tmp
, addr
));
7812 gcc_assert (!reg_mentioned_p (base_tmp
, element
));
7814 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7815 gcc_assert (GET_RTX_CLASS (GET_CODE (addr
)) != RTX_AUTOINC
);
7817 /* Calculate what we need to add to the address to get the element
7819 rtx element_offset
= get_vector_offset (mem
, element
, base_tmp
, scalar_size
);
7821 /* Create the new address pointing to the element within the vector. If we
7822 are adding 0, we don't have to change the address. */
7823 if (element_offset
== const0_rtx
)
7826 /* A simple indirect address can be converted into a reg + offset
7828 else if (REG_P (addr
) || SUBREG_P (addr
))
7829 new_addr
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7831 /* For references to local static variables, fold a constant offset into the
7833 else if (pcrel_local_address (addr
, Pmode
) && CONST_INT_P (element_offset
))
7834 new_addr
= adjust_vec_address_pcrel (addr
, element_offset
, base_tmp
);
7836 /* Optimize D-FORM addresses with constant offset with a constant element, to
7837 include the element offset in the address directly. */
7838 else if (GET_CODE (addr
) == PLUS
)
7840 rtx op0
= XEXP (addr
, 0);
7841 rtx op1
= XEXP (addr
, 1);
7843 gcc_assert (REG_P (op0
) || SUBREG_P (op0
));
7844 if (CONST_INT_P (op1
) && CONST_INT_P (element_offset
))
7846 /* op0 should never be r0, because r0+offset is not valid. But it
7847 doesn't hurt to make sure it is not r0. */
7848 gcc_assert (reg_or_subregno (op0
) != 0);
7850 /* D-FORM address with constant element number. */
7851 HOST_WIDE_INT offset
= INTVAL (op1
) + INTVAL (element_offset
);
7852 rtx offset_rtx
= GEN_INT (offset
);
7853 new_addr
= gen_rtx_PLUS (Pmode
, op0
, offset_rtx
);
7857 /* If we don't have a D-FORM address with a constant element number,
7858 add the two elements in the current address. Then add the offset.
7860 Previously, we tried to add the offset to OP1 and change the
7861 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7862 complicated because we had to verify that op1 was not GPR0 and we
7863 had a constant element offset (due to the way ADDI is defined).
7864 By doing the add of OP0 and OP1 first, and then adding in the
7865 offset, it has the benefit that if D-FORM instructions are
7866 allowed, the offset is part of the memory access to the vector
7868 emit_insn (gen_rtx_SET (base_tmp
, gen_rtx_PLUS (Pmode
, op0
, op1
)));
7869 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7875 emit_move_insn (base_tmp
, addr
);
7876 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7879 /* If the address isn't valid, move the address into the temporary base
7880 register. Some reasons it could not be valid include:
7882 The address offset overflowed the 16 or 34 bit offset size;
7883 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7884 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7885 Only X_FORM loads can be done, and the address is D_FORM. */
7887 enum insn_form iform
7888 = address_to_insn_form (new_addr
, scalar_mode
,
7889 reg_to_non_prefixed (scalar_reg
, scalar_mode
));
7891 if (iform
== INSN_FORM_BAD
)
7893 emit_move_insn (base_tmp
, new_addr
);
7894 new_addr
= base_tmp
;
7897 return change_address (mem
, scalar_mode
, new_addr
);
7900 /* Split a variable vec_extract operation into the component instructions. */
7903 rs6000_split_vec_extract_var (rtx dest
, rtx src
, rtx element
, rtx tmp_gpr
,
7906 machine_mode mode
= GET_MODE (src
);
7907 machine_mode scalar_mode
= GET_MODE_INNER (GET_MODE (src
));
7908 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7909 int byte_shift
= exact_log2 (scalar_size
);
7911 gcc_assert (byte_shift
>= 0);
7913 /* If we are given a memory address, optimize to load just the element. We
7914 don't have to adjust the vector element number on little endian
7918 emit_move_insn (dest
,
7919 rs6000_adjust_vec_address (dest
, src
, element
, tmp_gpr
,
7924 else if (REG_P (src
) || SUBREG_P (src
))
7926 int num_elements
= GET_MODE_NUNITS (mode
);
7927 int bits_in_element
= mode_to_bits (GET_MODE_INNER (mode
));
7928 int bit_shift
= 7 - exact_log2 (num_elements
);
7930 unsigned int dest_regno
= reg_or_subregno (dest
);
7931 unsigned int src_regno
= reg_or_subregno (src
);
7932 unsigned int element_regno
= reg_or_subregno (element
);
7934 gcc_assert (REG_P (tmp_gpr
));
7936 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7937 a general purpose register. */
7938 if (TARGET_P9_VECTOR
7939 && (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
7940 && INT_REGNO_P (dest_regno
)
7941 && ALTIVEC_REGNO_P (src_regno
)
7942 && INT_REGNO_P (element_regno
))
7944 rtx dest_si
= gen_rtx_REG (SImode
, dest_regno
);
7945 rtx element_si
= gen_rtx_REG (SImode
, element_regno
);
7947 if (mode
== V16QImode
)
7948 emit_insn (BYTES_BIG_ENDIAN
7949 ? gen_vextublx (dest_si
, element_si
, src
)
7950 : gen_vextubrx (dest_si
, element_si
, src
));
7952 else if (mode
== V8HImode
)
7954 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7955 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const1_rtx
));
7956 emit_insn (BYTES_BIG_ENDIAN
7957 ? gen_vextuhlx (dest_si
, tmp_gpr_si
, src
)
7958 : gen_vextuhrx (dest_si
, tmp_gpr_si
, src
));
7964 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7965 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const2_rtx
));
7966 emit_insn (BYTES_BIG_ENDIAN
7967 ? gen_vextuwlx (dest_si
, tmp_gpr_si
, src
)
7968 : gen_vextuwrx (dest_si
, tmp_gpr_si
, src
));
7975 gcc_assert (REG_P (tmp_altivec
));
7977 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7978 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7979 will shift the element into the upper position (adding 3 to convert a
7980 byte shift into a bit shift). */
7981 if (scalar_size
== 8)
7983 if (!BYTES_BIG_ENDIAN
)
7985 emit_insn (gen_xordi3 (tmp_gpr
, element
, const1_rtx
));
7991 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7993 emit_insn (gen_rtx_SET (tmp_gpr
,
7994 gen_rtx_AND (DImode
,
7995 gen_rtx_ASHIFT (DImode
,
8002 if (!BYTES_BIG_ENDIAN
)
8004 rtx num_ele_m1
= GEN_INT (num_elements
- 1);
8006 emit_insn (gen_anddi3 (tmp_gpr
, element
, num_ele_m1
));
8007 emit_insn (gen_subdi3 (tmp_gpr
, num_ele_m1
, tmp_gpr
));
8013 emit_insn (gen_ashldi3 (tmp_gpr
, element2
, GEN_INT (bit_shift
)));
8016 /* Get the value into the lower byte of the Altivec register where VSLO
8018 if (TARGET_P9_VECTOR
)
8019 emit_insn (gen_vsx_splat_v2di (tmp_altivec
, tmp_gpr
));
8020 else if (can_create_pseudo_p ())
8021 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_gpr
, tmp_gpr
));
8024 rtx tmp_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8025 emit_move_insn (tmp_di
, tmp_gpr
);
8026 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_di
, tmp_di
));
8029 /* Do the VSLO to get the value into the final location. */
8033 emit_insn (gen_vsx_vslo_v2df (dest
, src
, tmp_altivec
));
8037 emit_insn (gen_vsx_vslo_v2di (dest
, src
, tmp_altivec
));
8042 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8043 rtx tmp_altivec_v4sf
= gen_rtx_REG (V4SFmode
, REGNO (tmp_altivec
));
8044 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8045 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8048 emit_insn (gen_vsx_xscvspdp_scalar2 (dest
, tmp_altivec_v4sf
));
8056 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8057 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8058 rtx tmp_gpr_di
= gen_rtx_REG (DImode
, REGNO (dest
));
8059 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8061 emit_move_insn (tmp_gpr_di
, tmp_altivec_di
);
8062 emit_insn (gen_lshrdi3 (tmp_gpr_di
, tmp_gpr_di
,
8063 GEN_INT (64 - bits_in_element
)));
8077 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8078 selects whether the alignment is abi mandated, optional, or
8079 both abi and optional alignment. */
8082 rs6000_data_alignment (tree type
, unsigned int align
, enum data_align how
)
8084 if (how
!= align_opt
)
8086 if (VECTOR_TYPE_P (type
) && align
< 128)
8090 if (how
!= align_abi
)
8092 if (TREE_CODE (type
) == ARRAY_TYPE
8093 && TYPE_MODE (TREE_TYPE (type
)) == QImode
)
8095 if (align
< BITS_PER_WORD
)
8096 align
= BITS_PER_WORD
;
8103 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8104 instructions simply ignore the low bits; VSX memory instructions
8105 are aligned to 4 or 8 bytes. */
8108 rs6000_slow_unaligned_access (machine_mode mode
, unsigned int align
)
8110 return (STRICT_ALIGNMENT
8111 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8112 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && align
< 32)
8113 || ((VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
))
8114 && (int) align
< VECTOR_ALIGN (mode
)))));
8117 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
8120 rs6000_special_adjust_field_align (tree type
, unsigned int computed
)
8122 if (computed
<= 32 || TYPE_PACKED (type
))
8125 /* Strip initial arrays. */
8126 while (TREE_CODE (type
) == ARRAY_TYPE
)
8127 type
= TREE_TYPE (type
);
8129 /* If RECORD or UNION, recursively find the first field. */
8130 while (AGGREGATE_TYPE_P (type
))
8132 tree field
= TYPE_FIELDS (type
);
8134 /* Skip all non field decls */
8135 while (field
!= NULL
8136 && (TREE_CODE (field
) != FIELD_DECL
8137 || DECL_FIELD_ABI_IGNORED (field
)))
8138 field
= DECL_CHAIN (field
);
8143 /* A packed field does not contribute any extra alignment. */
8144 if (DECL_PACKED (field
))
8147 type
= TREE_TYPE (field
);
8150 while (TREE_CODE (type
) == ARRAY_TYPE
)
8151 type
= TREE_TYPE (type
);
8154 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8155 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8156 computed
= MIN (computed
, 32);
8161 /* AIX increases natural record alignment to doubleword if the innermost first
8162 field is an FP double while the FP fields remain word aligned.
8163 Only called if TYPE initially is a RECORD or UNION. */
8166 rs6000_special_round_type_align (tree type
, unsigned int computed
,
8167 unsigned int specified
)
8169 unsigned int align
= MAX (computed
, specified
);
8171 if (TYPE_PACKED (type
) || align
>= 64)
8174 /* If RECORD or UNION, recursively find the first field. */
8177 tree field
= TYPE_FIELDS (type
);
8179 /* Skip all non field decls */
8180 while (field
!= NULL
8181 && (TREE_CODE (field
) != FIELD_DECL
8182 || DECL_FIELD_ABI_IGNORED (field
)))
8183 field
= DECL_CHAIN (field
);
8188 /* A packed field does not contribute any extra alignment. */
8189 if (DECL_PACKED (field
))
8192 type
= TREE_TYPE (field
);
8195 while (TREE_CODE (type
) == ARRAY_TYPE
)
8196 type
= TREE_TYPE (type
);
8197 } while (AGGREGATE_TYPE_P (type
));
8199 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8200 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8201 align
= MAX (align
, 64);
8206 /* Darwin increases record alignment to the natural alignment of
8210 darwin_rs6000_special_round_type_align (tree type
, unsigned int computed
,
8211 unsigned int specified
)
8213 unsigned int align
= MAX (computed
, specified
);
8215 if (TYPE_PACKED (type
))
8218 /* Find the first field, looking down into aggregates. */
8220 tree field
= TYPE_FIELDS (type
);
8221 /* Skip all non field decls */
8222 while (field
!= NULL
8223 && (TREE_CODE (field
) != FIELD_DECL
8224 || DECL_FIELD_ABI_IGNORED (field
)))
8225 field
= DECL_CHAIN (field
);
8228 /* A packed field does not contribute any extra alignment. */
8229 if (DECL_PACKED (field
))
8231 type
= TREE_TYPE (field
);
8232 while (TREE_CODE (type
) == ARRAY_TYPE
)
8233 type
= TREE_TYPE (type
);
8234 } while (AGGREGATE_TYPE_P (type
));
8236 if (type
!= error_mark_node
&& ! AGGREGATE_TYPE_P (type
)
8237 && ! TYPE_PACKED (type
) && maximum_field_alignment
== 0)
8238 align
= MAX (align
, TYPE_ALIGN (type
));
8243 /* Return 1 for an operand in small memory on V.4/eabi. */
8246 small_data_operand (rtx op ATTRIBUTE_UNUSED
,
8247 machine_mode mode ATTRIBUTE_UNUSED
)
8252 if (rs6000_sdata
== SDATA_NONE
|| rs6000_sdata
== SDATA_DATA
)
8255 if (DEFAULT_ABI
!= ABI_V4
)
8258 if (SYMBOL_REF_P (op
))
8261 else if (GET_CODE (op
) != CONST
8262 || GET_CODE (XEXP (op
, 0)) != PLUS
8263 || !SYMBOL_REF_P (XEXP (XEXP (op
, 0), 0))
8264 || !CONST_INT_P (XEXP (XEXP (op
, 0), 1)))
8269 rtx sum
= XEXP (op
, 0);
8270 HOST_WIDE_INT summand
;
8272 /* We have to be careful here, because it is the referenced address
8273 that must be 32k from _SDA_BASE_, not just the symbol. */
8274 summand
= INTVAL (XEXP (sum
, 1));
8275 if (summand
< 0 || summand
> g_switch_value
)
8278 sym_ref
= XEXP (sum
, 0);
8281 return SYMBOL_REF_SMALL_P (sym_ref
);
8287 /* Return true if either operand is a general purpose register. */
8290 gpr_or_gpr_p (rtx op0
, rtx op1
)
8292 return ((REG_P (op0
) && INT_REGNO_P (REGNO (op0
)))
8293 || (REG_P (op1
) && INT_REGNO_P (REGNO (op1
))));
8296 /* Return true if this is a move direct operation between GPR registers and
8297 floating point/VSX registers. */
8300 direct_move_p (rtx op0
, rtx op1
)
8302 if (!REG_P (op0
) || !REG_P (op1
))
8305 if (!TARGET_DIRECT_MOVE
)
8308 int regno0
= REGNO (op0
);
8309 int regno1
= REGNO (op1
);
8310 if (!HARD_REGISTER_NUM_P (regno0
) || !HARD_REGISTER_NUM_P (regno1
))
8313 if (INT_REGNO_P (regno0
) && VSX_REGNO_P (regno1
))
8316 if (VSX_REGNO_P (regno0
) && INT_REGNO_P (regno1
))
8322 /* Return true if the ADDR is an acceptable address for a quad memory
8323 operation of mode MODE (either LQ/STQ for general purpose registers, or
8324 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8325 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8326 3.0 LXV/STXV instruction. */
8329 quad_address_p (rtx addr
, machine_mode mode
, bool strict
)
8333 if (GET_MODE_SIZE (mode
) < 16)
8336 if (legitimate_indirect_address_p (addr
, strict
))
8339 if (VECTOR_MODE_P (mode
) && !mode_supports_dq_form (mode
))
8342 /* Is this a valid prefixed address? If the bottom four bits of the offset
8343 are non-zero, we could use a prefixed instruction (which does not have the
8344 DQ-form constraint that the traditional instruction had) instead of
8345 forcing the unaligned offset to a GPR. */
8346 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DQ
))
8349 if (GET_CODE (addr
) != PLUS
)
8352 op0
= XEXP (addr
, 0);
8353 if (!REG_P (op0
) || !INT_REG_OK_FOR_BASE_P (op0
, strict
))
8356 op1
= XEXP (addr
, 1);
8357 if (!CONST_INT_P (op1
))
8360 return quad_address_offset_p (INTVAL (op1
));
8363 /* Return true if this is a load or store quad operation. This function does
8364 not handle the atomic quad memory instructions. */
8367 quad_load_store_p (rtx op0
, rtx op1
)
8371 if (!TARGET_QUAD_MEMORY
)
8374 else if (REG_P (op0
) && MEM_P (op1
))
8375 ret
= (quad_int_reg_operand (op0
, GET_MODE (op0
))
8376 && quad_memory_operand (op1
, GET_MODE (op1
))
8377 && !reg_overlap_mentioned_p (op0
, op1
));
8379 else if (MEM_P (op0
) && REG_P (op1
))
8380 ret
= (quad_memory_operand (op0
, GET_MODE (op0
))
8381 && quad_int_reg_operand (op1
, GET_MODE (op1
)));
8386 if (TARGET_DEBUG_ADDR
)
8388 fprintf (stderr
, "\n========== quad_load_store, return %s\n",
8389 ret
? "true" : "false");
8390 debug_rtx (gen_rtx_SET (op0
, op1
));
8396 /* Given an address, return a constant offset term if one exists. */
8399 address_offset (rtx op
)
8401 if (GET_CODE (op
) == PRE_INC
8402 || GET_CODE (op
) == PRE_DEC
)
8404 else if (GET_CODE (op
) == PRE_MODIFY
8405 || GET_CODE (op
) == LO_SUM
)
8408 if (GET_CODE (op
) == CONST
)
8411 if (GET_CODE (op
) == PLUS
)
8414 if (CONST_INT_P (op
))
8420 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8421 the mode. If we can't find (or don't know) the alignment of the symbol
8422 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8423 should be pessimistic]. Offsets are validated in the same way as for
8426 darwin_rs6000_legitimate_lo_sum_const_p (rtx x
, machine_mode mode
)
8428 /* We should not get here with this. */
8429 gcc_checking_assert (! mode_supports_dq_form (mode
));
8431 if (GET_CODE (x
) == CONST
)
8434 /* If we are building PIC code, then any symbol must be wrapped in an
8435 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8436 bool machopic_offs_p
= false;
8437 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
8439 x
= XVECEXP (x
, 0, 0);
8440 machopic_offs_p
= true;
8444 unsigned HOST_WIDE_INT offset
= 0;
8446 if (GET_CODE (x
) == PLUS
)
8449 if (! SYMBOL_REF_P (sym
))
8451 if (!CONST_INT_P (XEXP (x
, 1)))
8453 offset
= INTVAL (XEXP (x
, 1));
8455 else if (SYMBOL_REF_P (x
))
8457 else if (CONST_INT_P (x
))
8458 offset
= INTVAL (x
);
8459 else if (GET_CODE (x
) == LABEL_REF
)
8460 offset
= 0; // We assume code labels are Pmode aligned
8462 return false; // not sure what we have here.
8464 /* If we don't know the alignment of the thing to which the symbol refers,
8465 we assume optimistically it is "enough".
8466 ??? maybe we should be pessimistic instead. */
8471 tree decl
= SYMBOL_REF_DECL (sym
);
8472 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8473 if (TARGET_MACHO
&& flag_pic
&& !machopic_offs_p
)
8476 if (MACHO_SYMBOL_INDIRECTION_P (sym
))
8477 /* The decl in an indirection symbol is the original one, which might
8478 be less aligned than the indirection. Our indirections are always
8483 if (decl
&& DECL_ALIGN (decl
))
8484 align
= DECL_ALIGN_UNIT (decl
);
8487 unsigned int extra
= 0;
8493 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8495 if (VECTOR_MEM_VSX_P (mode
))
8498 if (!TARGET_POWERPC64
)
8500 else if ((offset
& 3) || (align
& 3))
8511 if (!TARGET_POWERPC64
)
8513 else if ((offset
& 3) || (align
& 3))
8521 /* We only care if the access(es) would cause a change to the high part. */
8522 offset
= sext_hwi (offset
, 16);
8523 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8526 /* Return true if the MEM operand is a memory operand suitable for use
8527 with a (full width, possibly multiple) gpr load/store. On
8528 powerpc64 this means the offset must be divisible by 4.
8529 Implements 'Y' constraint.
8531 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8532 a constraint function we know the operand has satisfied a suitable
8535 Offsetting a lo_sum should not be allowed, except where we know by
8536 alignment that a 32k boundary is not crossed. Note that by
8537 "offsetting" here we mean a further offset to access parts of the
8538 MEM. It's fine to have a lo_sum where the inner address is offset
8539 from a sym, since the same sym+offset will appear in the high part
8540 of the address calculation. */
8543 mem_operand_gpr (rtx op
, machine_mode mode
)
8545 unsigned HOST_WIDE_INT offset
;
8547 rtx addr
= XEXP (op
, 0);
8549 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8551 && (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
8552 && mode_supports_pre_incdec_p (mode
)
8553 && legitimate_indirect_address_p (XEXP (addr
, 0), false))
8556 /* Allow prefixed instructions if supported. If the bottom two bits of the
8557 offset are non-zero, we could use a prefixed instruction (which does not
8558 have the DS-form constraint that the traditional instruction had) instead
8559 of forcing the unaligned offset to a GPR. */
8560 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8563 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8564 really OK. Doing this early avoids teaching all the other machinery
8566 if (TARGET_MACHO
&& GET_CODE (addr
) == LO_SUM
)
8567 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr
, 1), mode
);
8569 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8570 if (!rs6000_offsettable_memref_p (op
, mode
, false))
8573 op
= address_offset (addr
);
8577 offset
= INTVAL (op
);
8578 if (TARGET_POWERPC64
&& (offset
& 3) != 0)
8581 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8585 if (GET_CODE (addr
) == LO_SUM
)
8586 /* For lo_sum addresses, we must allow any offset except one that
8587 causes a wrap, so test only the low 16 bits. */
8588 offset
= sext_hwi (offset
, 16);
8590 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8593 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8594 enforce an offset divisible by 4 even for 32-bit. */
8597 mem_operand_ds_form (rtx op
, machine_mode mode
)
8599 unsigned HOST_WIDE_INT offset
;
8601 rtx addr
= XEXP (op
, 0);
8603 /* Allow prefixed instructions if supported. If the bottom two bits of the
8604 offset are non-zero, we could use a prefixed instruction (which does not
8605 have the DS-form constraint that the traditional instruction had) instead
8606 of forcing the unaligned offset to a GPR. */
8607 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8610 if (!offsettable_address_p (false, mode
, addr
))
8613 op
= address_offset (addr
);
8617 offset
= INTVAL (op
);
8618 if ((offset
& 3) != 0)
8621 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8625 if (GET_CODE (addr
) == LO_SUM
)
8626 /* For lo_sum addresses, we must allow any offset except one that
8627 causes a wrap, so test only the low 16 bits. */
8628 offset
= sext_hwi (offset
, 16);
8630 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8633 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8636 reg_offset_addressing_ok_p (machine_mode mode
)
8650 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8651 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8652 a vector mode, if we want to use the VSX registers to move it around,
8653 we need to restrict ourselves to reg+reg addressing. Similarly for
8654 IEEE 128-bit floating point that is passed in a single vector
8656 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
))
8657 return mode_supports_dq_form (mode
);
8660 /* The vector pair/quad types support offset addressing if the
8661 underlying vectors support offset addressing. */
8667 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8668 addressing for the LFIWZX and STFIWX instructions. */
8669 if (TARGET_NO_SDMODE_STACK
)
8681 virtual_stack_registers_memory_p (rtx op
)
8686 regnum
= REGNO (op
);
8688 else if (GET_CODE (op
) == PLUS
8689 && REG_P (XEXP (op
, 0))
8690 && CONST_INT_P (XEXP (op
, 1)))
8691 regnum
= REGNO (XEXP (op
, 0));
8696 return (regnum
>= FIRST_VIRTUAL_REGISTER
8697 && regnum
<= LAST_VIRTUAL_POINTER_REGISTER
);
8700 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8701 is known to not straddle a 32k boundary. This function is used
8702 to determine whether -mcmodel=medium code can use TOC pointer
8703 relative addressing for OP. This means the alignment of the TOC
8704 pointer must also be taken into account, and unfortunately that is
8707 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8708 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8712 offsettable_ok_by_alignment (rtx op
, HOST_WIDE_INT offset
,
8716 unsigned HOST_WIDE_INT dsize
, dalign
, lsb
, mask
;
8718 if (!SYMBOL_REF_P (op
))
8721 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8723 if (mode_supports_dq_form (mode
))
8726 dsize
= GET_MODE_SIZE (mode
);
8727 decl
= SYMBOL_REF_DECL (op
);
8733 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8734 replacing memory addresses with an anchor plus offset. We
8735 could find the decl by rummaging around in the block->objects
8736 VEC for the given offset but that seems like too much work. */
8737 dalign
= BITS_PER_UNIT
;
8738 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op
)
8739 && SYMBOL_REF_ANCHOR_P (op
)
8740 && SYMBOL_REF_BLOCK (op
) != NULL
)
8742 struct object_block
*block
= SYMBOL_REF_BLOCK (op
);
8744 dalign
= block
->alignment
;
8745 offset
+= SYMBOL_REF_BLOCK_OFFSET (op
);
8747 else if (CONSTANT_POOL_ADDRESS_P (op
))
8749 /* It would be nice to have get_pool_align().. */
8750 machine_mode cmode
= get_pool_mode (op
);
8752 dalign
= GET_MODE_ALIGNMENT (cmode
);
8755 else if (DECL_P (decl
))
8757 dalign
= DECL_ALIGN (decl
);
8761 /* Allow BLKmode when the entire object is known to not
8762 cross a 32k boundary. */
8763 if (!DECL_SIZE_UNIT (decl
))
8766 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl
)))
8769 dsize
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
8773 dalign
/= BITS_PER_UNIT
;
8774 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8775 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8776 return dalign
>= dsize
;
8782 /* Find how many bits of the alignment we know for this access. */
8783 dalign
/= BITS_PER_UNIT
;
8784 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8785 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8787 lsb
= offset
& -offset
;
8791 return dalign
>= dsize
;
8795 constant_pool_expr_p (rtx op
)
8799 split_const (op
, &base
, &offset
);
8800 return (SYMBOL_REF_P (base
)
8801 && CONSTANT_POOL_ADDRESS_P (base
)
8802 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base
), Pmode
));
8805 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8806 use that as the register to put the HIGH value into if register allocation
8810 create_TOC_reference (rtx symbol
, rtx largetoc_reg
)
8812 rtx tocrel
, tocreg
, hi
;
8814 gcc_assert (TARGET_TOC
);
8816 if (TARGET_DEBUG_ADDR
)
8818 if (SYMBOL_REF_P (symbol
))
8819 fprintf (stderr
, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8823 fprintf (stderr
, "\ncreate_TOC_reference, code %s:\n",
8824 GET_RTX_NAME (GET_CODE (symbol
)));
8829 if (!can_create_pseudo_p ())
8830 df_set_regs_ever_live (TOC_REGISTER
, true);
8832 tocreg
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
8833 tocrel
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, symbol
, tocreg
), UNSPEC_TOCREL
);
8834 if (TARGET_CMODEL
== CMODEL_SMALL
|| can_create_pseudo_p ())
8837 hi
= gen_rtx_HIGH (Pmode
, copy_rtx (tocrel
));
8838 if (largetoc_reg
!= NULL
)
8840 emit_move_insn (largetoc_reg
, hi
);
8843 return gen_rtx_LO_SUM (Pmode
, hi
, tocrel
);
8846 /* These are only used to pass through from print_operand/print_operand_address
8847 to rs6000_output_addr_const_extra over the intervening function
8848 output_addr_const which is not target code. */
8849 static const_rtx tocrel_base_oac
, tocrel_offset_oac
;
8851 /* Return true if OP is a toc pointer relative address (the output
8852 of create_TOC_reference). If STRICT, do not match non-split
8853 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8854 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8855 TOCREL_OFFSET_RET respectively. */
8858 toc_relative_expr_p (const_rtx op
, bool strict
, const_rtx
*tocrel_base_ret
,
8859 const_rtx
*tocrel_offset_ret
)
8864 if (TARGET_CMODEL
!= CMODEL_SMALL
)
8866 /* When strict ensure we have everything tidy. */
8868 && !(GET_CODE (op
) == LO_SUM
8869 && REG_P (XEXP (op
, 0))
8870 && INT_REG_OK_FOR_BASE_P (XEXP (op
, 0), strict
)))
8873 /* When not strict, allow non-split TOC addresses and also allow
8874 (lo_sum (high ..)) TOC addresses created during reload. */
8875 if (GET_CODE (op
) == LO_SUM
)
8879 const_rtx tocrel_base
= op
;
8880 const_rtx tocrel_offset
= const0_rtx
;
8882 if (GET_CODE (op
) == PLUS
&& add_cint_operand (XEXP (op
, 1), GET_MODE (op
)))
8884 tocrel_base
= XEXP (op
, 0);
8885 tocrel_offset
= XEXP (op
, 1);
8888 if (tocrel_base_ret
)
8889 *tocrel_base_ret
= tocrel_base
;
8890 if (tocrel_offset_ret
)
8891 *tocrel_offset_ret
= tocrel_offset
;
8893 return (GET_CODE (tocrel_base
) == UNSPEC
8894 && XINT (tocrel_base
, 1) == UNSPEC_TOCREL
8895 && REG_P (XVECEXP (tocrel_base
, 0, 1))
8896 && REGNO (XVECEXP (tocrel_base
, 0, 1)) == TOC_REGISTER
);
8899 /* Return true if X is a constant pool address, and also for cmodel=medium
8900 if X is a toc-relative address known to be offsettable within MODE. */
8903 legitimate_constant_pool_address_p (const_rtx x
, machine_mode mode
,
8906 const_rtx tocrel_base
, tocrel_offset
;
8907 return (toc_relative_expr_p (x
, strict
, &tocrel_base
, &tocrel_offset
)
8908 && (TARGET_CMODEL
!= CMODEL_MEDIUM
8909 || constant_pool_expr_p (XVECEXP (tocrel_base
, 0, 0))
8911 || offsettable_ok_by_alignment (XVECEXP (tocrel_base
, 0, 0),
8912 INTVAL (tocrel_offset
), mode
)));
8916 legitimate_small_data_p (machine_mode mode
, rtx x
)
8918 return (DEFAULT_ABI
== ABI_V4
8919 && !flag_pic
&& !TARGET_TOC
8920 && (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
)
8921 && small_data_operand (x
, mode
));
8925 rs6000_legitimate_offset_address_p (machine_mode mode
, rtx x
,
8926 bool strict
, bool worst_case
)
8928 unsigned HOST_WIDE_INT offset
;
8931 if (GET_CODE (x
) != PLUS
)
8933 if (!REG_P (XEXP (x
, 0)))
8935 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8937 if (mode_supports_dq_form (mode
))
8938 return quad_address_p (x
, mode
, strict
);
8939 if (!reg_offset_addressing_ok_p (mode
))
8940 return virtual_stack_registers_memory_p (x
);
8941 if (legitimate_constant_pool_address_p (x
, mode
, strict
|| lra_in_progress
))
8943 if (!CONST_INT_P (XEXP (x
, 1)))
8946 offset
= INTVAL (XEXP (x
, 1));
8953 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8955 if (VECTOR_MEM_VSX_P (mode
))
8960 if (!TARGET_POWERPC64
)
8962 else if (offset
& 3)
8975 if (!TARGET_POWERPC64
)
8977 else if (offset
& 3)
8985 if (TARGET_PREFIXED
)
8986 return SIGNED_34BIT_OFFSET_EXTRA_P (offset
, extra
);
8988 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8992 legitimate_indexed_address_p (rtx x
, int strict
)
8996 if (GET_CODE (x
) != PLUS
)
9002 return (REG_P (op0
) && REG_P (op1
)
9003 && ((INT_REG_OK_FOR_BASE_P (op0
, strict
)
9004 && INT_REG_OK_FOR_INDEX_P (op1
, strict
))
9005 || (INT_REG_OK_FOR_BASE_P (op1
, strict
)
9006 && INT_REG_OK_FOR_INDEX_P (op0
, strict
))));
9010 avoiding_indexed_address_p (machine_mode mode
)
9012 unsigned int msize
= GET_MODE_SIZE (mode
);
9014 /* Avoid indexed addressing for modes that have non-indexed load/store
9015 instruction forms. On power10, vector pairs have an indexed
9016 form, but vector quads don't. */
9020 return (TARGET_AVOID_XFORM
&& VECTOR_MEM_NONE_P (mode
));
9024 legitimate_indirect_address_p (rtx x
, int strict
)
9026 return REG_P (x
) && INT_REG_OK_FOR_BASE_P (x
, strict
);
9030 macho_lo_sum_memory_operand (rtx x
, machine_mode mode
)
9032 if (!TARGET_MACHO
|| !flag_pic
9033 || mode
!= SImode
|| !MEM_P (x
))
9037 if (GET_CODE (x
) != LO_SUM
)
9039 if (!REG_P (XEXP (x
, 0)))
9041 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 0))
9045 return CONSTANT_P (x
);
9049 legitimate_lo_sum_address_p (machine_mode mode
, rtx x
, int strict
)
9051 if (GET_CODE (x
) != LO_SUM
)
9053 if (!REG_P (XEXP (x
, 0)))
9055 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
9057 /* quad word addresses are restricted, and we can't use LO_SUM. */
9058 if (mode_supports_dq_form (mode
))
9066 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
)
9068 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9069 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9070 recognizes some LO_SUM addresses as valid although this
9071 function says opposite. In most cases, LRA through different
9072 transformations can generate correct code for address reloads.
9073 It cannot manage only some LO_SUM cases. So we need to add
9074 code here saying that some addresses are still valid. */
9075 large_toc_ok
= (lra_in_progress
&& TARGET_CMODEL
!= CMODEL_SMALL
9076 && small_toc_ref (x
, VOIDmode
));
9077 if (TARGET_TOC
&& ! large_toc_ok
)
9079 if (GET_MODE_NUNITS (mode
) != 1)
9081 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
9082 && !(/* ??? Assume floating point reg based on mode? */
9083 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9086 return CONSTANT_P (x
) || large_toc_ok
;
9088 else if (TARGET_MACHO
)
9090 if (GET_MODE_NUNITS (mode
) != 1)
9092 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
9093 && !(/* see above */
9094 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9097 if (MACHO_DYNAMIC_NO_PIC_P
|| !flag_pic
)
9098 return CONSTANT_P (x
);
9100 /* Macho-O PIC code from here. */
9101 if (GET_CODE (x
) == CONST
)
9104 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
9105 if (SYMBOL_REF_P (x
))
9108 /* So this is OK if the wrapped object is const. */
9109 if (GET_CODE (x
) == UNSPEC
9110 && XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9111 return CONSTANT_P (XVECEXP (x
, 0, 0));
9112 return CONSTANT_P (x
);
9118 /* Try machine-dependent ways of modifying an illegitimate address
9119 to be legitimate. If we find one, return the new, valid address.
9120 This is used from only one place: `memory_address' in explow.cc.
9122 OLDX is the address as it was before break_out_memory_refs was
9123 called. In some cases it is useful to look at this to decide what
9126 It is always safe for this function to do nothing. It exists to
9127 recognize opportunities to optimize the output.
9129 On RS/6000, first check for the sum of a register with a constant
9130 integer that is out of range. If so, generate code to add the
9131 constant with the low-order 16 bits masked to the register and force
9132 this result into another register (this can be done with `cau').
9133 Then generate an address of REG+(CONST&0xffff), allowing for the
9134 possibility of bit 16 being a one.
9136 Then check for the sum of a register and something not constant, try to
9137 load the other things into a register and return the sum. */
9140 rs6000_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
9145 if (!reg_offset_addressing_ok_p (mode
)
9146 || mode_supports_dq_form (mode
))
9148 if (virtual_stack_registers_memory_p (x
))
9151 /* In theory we should not be seeing addresses of the form reg+0,
9152 but just in case it is generated, optimize it away. */
9153 if (GET_CODE (x
) == PLUS
&& XEXP (x
, 1) == const0_rtx
)
9154 return force_reg (Pmode
, XEXP (x
, 0));
9156 /* For TImode with load/store quad, restrict addresses to just a single
9157 pointer, so it works with both GPRs and VSX registers. */
9158 /* Make sure both operands are registers. */
9159 else if (GET_CODE (x
) == PLUS
9160 && (mode
!= TImode
|| !TARGET_VSX
))
9161 return gen_rtx_PLUS (Pmode
,
9162 force_reg (Pmode
, XEXP (x
, 0)),
9163 force_reg (Pmode
, XEXP (x
, 1)));
9165 return force_reg (Pmode
, x
);
9167 if (SYMBOL_REF_P (x
) && !TARGET_MACHO
)
9169 enum tls_model model
= SYMBOL_REF_TLS_MODEL (x
);
9171 return rs6000_legitimize_tls_address (x
, model
);
9183 /* As in legitimate_offset_address_p we do not assume
9184 worst-case. The mode here is just a hint as to the registers
9185 used. A TImode is usually in gprs, but may actually be in
9186 fprs. Leave worst-case scenario for reload to handle via
9187 insn constraints. PTImode is only GPRs. */
9194 if (GET_CODE (x
) == PLUS
9195 && REG_P (XEXP (x
, 0))
9196 && CONST_INT_P (XEXP (x
, 1))
9197 && ((unsigned HOST_WIDE_INT
) (INTVAL (XEXP (x
, 1)) + 0x8000)
9198 >= 0x10000 - extra
))
9200 HOST_WIDE_INT high_int
, low_int
;
9202 low_int
= sext_hwi (INTVAL (XEXP (x
, 1)), 16);
9203 if (low_int
>= 0x8000 - extra
)
9205 high_int
= INTVAL (XEXP (x
, 1)) - low_int
;
9206 sum
= force_operand (gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9207 gen_int_mode (high_int
, Pmode
)), 0);
9208 return plus_constant (Pmode
, sum
, low_int
);
9210 else if (GET_CODE (x
) == PLUS
9211 && REG_P (XEXP (x
, 0))
9212 && !CONST_INT_P (XEXP (x
, 1))
9213 && GET_MODE_NUNITS (mode
) == 1
9214 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9215 || (/* ??? Assume floating point reg based on mode? */
9216 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9217 && !avoiding_indexed_address_p (mode
))
9219 return gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9220 force_reg (Pmode
, force_operand (XEXP (x
, 1), 0)));
9222 else if ((TARGET_ELF
9224 || !MACHO_DYNAMIC_NO_PIC_P
9228 && TARGET_NO_TOC_OR_PCREL
9231 && !CONST_WIDE_INT_P (x
)
9232 && !CONST_DOUBLE_P (x
)
9234 && GET_MODE_NUNITS (mode
) == 1
9235 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9236 || (/* ??? Assume floating point reg based on mode? */
9237 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
))))
9239 rtx reg
= gen_reg_rtx (Pmode
);
9241 emit_insn (gen_elf_high (reg
, x
));
9243 emit_insn (gen_macho_high (Pmode
, reg
, x
));
9244 return gen_rtx_LO_SUM (Pmode
, reg
, x
);
9248 && constant_pool_expr_p (x
)
9249 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x
), Pmode
))
9250 return create_TOC_reference (x
, NULL_RTX
);
9255 /* Debug version of rs6000_legitimize_address. */
9257 rs6000_debug_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
)
9263 ret
= rs6000_legitimize_address (x
, oldx
, mode
);
9264 insns
= get_insns ();
9270 "\nrs6000_legitimize_address: mode %s, old code %s, "
9271 "new code %s, modified\n",
9272 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)),
9273 GET_RTX_NAME (GET_CODE (ret
)));
9275 fprintf (stderr
, "Original address:\n");
9278 fprintf (stderr
, "oldx:\n");
9281 fprintf (stderr
, "New address:\n");
9286 fprintf (stderr
, "Insns added:\n");
9287 debug_rtx_list (insns
, 20);
9293 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9294 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)));
9305 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9306 We need to emit DTP-relative relocations. */
9308 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
9310 rs6000_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
9315 fputs ("\t.long\t", file
);
9318 fputs (DOUBLE_INT_ASM_OP
, file
);
9323 output_addr_const (file
, x
);
9325 fputs ("@dtprel+0x8000", file
);
9328 /* Return true if X is a symbol that refers to real (rather than emulated)
9332 rs6000_real_tls_symbol_ref_p (rtx x
)
9334 return (SYMBOL_REF_P (x
)
9335 && SYMBOL_REF_TLS_MODEL (x
) >= TLS_MODEL_REAL
);
9338 /* In the name of slightly smaller debug output, and to cater to
9339 general assembler lossage, recognize various UNSPEC sequences
9340 and turn them back into a direct symbol reference. */
9343 rs6000_delegitimize_address (rtx orig_x
)
9347 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9348 encodes loading up the high part of the address of a TOC reference along
9349 with a load of a GPR using the same base register used for the load. We
9350 return the original SYMBOL_REF.
9352 (set (reg:INT1 <reg>
9353 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9355 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9356 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9357 We return the original SYMBOL_REF.
9359 (parallel [(set (reg:DI <base-reg>)
9360 (unspec:DI [(symbol_ref <symbol>)
9361 (const_int <marker>)]
9362 UNSPEC_PCREL_OPT_LD_ADDR))
9363 (set (reg:DI <load-reg>)
9364 (unspec:DI [(const_int 0)]
9365 UNSPEC_PCREL_OPT_LD_DATA))])
9367 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9368 GPR being loaded is the same as the GPR used to hold the external address.
9370 (set (reg:DI <base-reg>)
9371 (unspec:DI [(symbol_ref <symbol>)
9372 (const_int <marker>)]
9373 UNSPEC_PCREL_OPT_LD_SAME_REG))
9375 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9376 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9377 We return the original SYMBOL_REF.
9379 (parallel [(set (reg:DI <base-reg>)
9380 (unspec:DI [(symbol_ref <symbol>)
9381 (const_int <marker>)]
9382 UNSPEC_PCREL_OPT_ST_ADDR))
9383 (use (reg <store-reg>))]) */
9385 if (GET_CODE (orig_x
) == UNSPEC
)
9386 switch (XINT (orig_x
, 1))
9388 case UNSPEC_FUSION_GPR
:
9389 case UNSPEC_PCREL_OPT_LD_ADDR
:
9390 case UNSPEC_PCREL_OPT_LD_SAME_REG
:
9391 case UNSPEC_PCREL_OPT_ST_ADDR
:
9392 orig_x
= XVECEXP (orig_x
, 0, 0);
9399 orig_x
= delegitimize_mem_from_attrs (orig_x
);
9406 if (TARGET_CMODEL
!= CMODEL_SMALL
&& GET_CODE (y
) == LO_SUM
)
9410 if (GET_CODE (y
) == PLUS
9411 && GET_MODE (y
) == Pmode
9412 && CONST_INT_P (XEXP (y
, 1)))
9414 offset
= XEXP (y
, 1);
9418 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_TOCREL
)
9420 y
= XVECEXP (y
, 0, 0);
9423 /* Do not associate thread-local symbols with the original
9424 constant pool symbol. */
9427 && CONSTANT_POOL_ADDRESS_P (y
)
9428 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y
)))
9432 if (offset
!= NULL_RTX
)
9433 y
= gen_rtx_PLUS (Pmode
, y
, offset
);
9434 if (!MEM_P (orig_x
))
9437 return replace_equiv_address_nv (orig_x
, y
);
9441 && GET_CODE (orig_x
) == LO_SUM
9442 && GET_CODE (XEXP (orig_x
, 1)) == CONST
)
9444 y
= XEXP (XEXP (orig_x
, 1), 0);
9445 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9446 return XVECEXP (y
, 0, 0);
9452 /* Return true if X shouldn't be emitted into the debug info.
9453 The linker doesn't like .toc section references from
9454 .debug_* sections, so reject .toc section symbols. */
9457 rs6000_const_not_ok_for_debug_p (rtx x
)
9459 if (GET_CODE (x
) == UNSPEC
)
9461 if (SYMBOL_REF_P (x
)
9462 && CONSTANT_POOL_ADDRESS_P (x
))
9464 rtx c
= get_pool_constant (x
);
9465 machine_mode cmode
= get_pool_mode (x
);
9466 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c
, cmode
))
9473 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9476 rs6000_legitimate_combined_insn (rtx_insn
*insn
)
9478 int icode
= INSN_CODE (insn
);
9480 /* Reject creating doloop insns. Combine should not be allowed
9481 to create these for a number of reasons:
9482 1) In a nested loop, if combine creates one of these in an
9483 outer loop and the register allocator happens to allocate ctr
9484 to the outer loop insn, then the inner loop can't use ctr.
9485 Inner loops ought to be more highly optimized.
9486 2) Combine often wants to create one of these from what was
9487 originally a three insn sequence, first combining the three
9488 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9489 allocated ctr, the splitter takes use back to the three insn
9490 sequence. It's better to stop combine at the two insn
9492 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9493 insns, the register allocator sometimes uses floating point
9494 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9495 jump insn and output reloads are not implemented for jumps,
9496 the ctrsi/ctrdi splitters need to handle all possible cases.
9497 That's a pain, and it gets to be seriously difficult when a
9498 splitter that runs after reload needs memory to transfer from
9499 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9500 for the difficult case. It's better to not create problems
9501 in the first place. */
9502 if (icode
!= CODE_FOR_nothing
9503 && (icode
== CODE_FOR_bdz_si
9504 || icode
== CODE_FOR_bdz_di
9505 || icode
== CODE_FOR_bdnz_si
9506 || icode
== CODE_FOR_bdnz_di
9507 || icode
== CODE_FOR_bdztf_si
9508 || icode
== CODE_FOR_bdztf_di
9509 || icode
== CODE_FOR_bdnztf_si
9510 || icode
== CODE_FOR_bdnztf_di
))
9516 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9518 static GTY(()) rtx rs6000_tls_symbol
;
9520 rs6000_tls_get_addr (void)
9522 if (!rs6000_tls_symbol
)
9523 rs6000_tls_symbol
= init_one_libfunc ("__tls_get_addr");
9525 return rs6000_tls_symbol
;
9528 /* Construct the SYMBOL_REF for TLS GOT references. */
9530 static GTY(()) rtx rs6000_got_symbol
;
9532 rs6000_got_sym (void)
9534 if (!rs6000_got_symbol
)
9536 rs6000_got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
9537 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_LOCAL
;
9538 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_EXTERNAL
;
9541 return rs6000_got_symbol
;
9544 /* AIX Thread-Local Address support. */
9547 rs6000_legitimize_tls_address_aix (rtx addr
, enum tls_model model
)
9549 rtx sym
, mem
, tocref
, tlsreg
, tmpreg
, dest
;
9553 /* Place addr into TOC constant pool. */
9554 sym
= force_const_mem (GET_MODE (addr
), addr
);
9556 /* Output the TOC entry and create the MEM referencing the value. */
9557 if (constant_pool_expr_p (XEXP (sym
, 0))
9558 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym
, 0)), Pmode
))
9560 tocref
= create_TOC_reference (XEXP (sym
, 0), NULL_RTX
);
9561 mem
= gen_const_mem (Pmode
, tocref
);
9562 set_mem_alias_set (mem
, get_TOC_alias_set ());
9567 /* Use global-dynamic for local-dynamic. */
9568 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
9569 || model
== TLS_MODEL_LOCAL_DYNAMIC
)
9571 /* Create new TOC reference for @m symbol. */
9572 name
= XSTR (XVECEXP (XEXP (mem
, 0), 0, 0), 0);
9573 tlsname
= XALLOCAVEC (char, strlen (name
) + 1);
9574 strcpy (tlsname
, "*LCM");
9575 strcat (tlsname
, name
+ 3);
9576 rtx modaddr
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (tlsname
));
9577 SYMBOL_REF_FLAGS (modaddr
) |= SYMBOL_FLAG_LOCAL
;
9578 tocref
= create_TOC_reference (modaddr
, NULL_RTX
);
9579 rtx modmem
= gen_const_mem (Pmode
, tocref
);
9580 set_mem_alias_set (modmem
, get_TOC_alias_set ());
9582 rtx modreg
= gen_reg_rtx (Pmode
);
9583 emit_insn (gen_rtx_SET (modreg
, modmem
));
9585 tmpreg
= gen_reg_rtx (Pmode
);
9586 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9588 dest
= gen_reg_rtx (Pmode
);
9590 emit_insn (gen_tls_get_addrsi (dest
, modreg
, tmpreg
));
9592 emit_insn (gen_tls_get_addrdi (dest
, modreg
, tmpreg
));
9595 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9596 else if (TARGET_32BIT
)
9598 tlsreg
= gen_reg_rtx (SImode
);
9599 emit_insn (gen_tls_get_tpointer (tlsreg
));
9603 tlsreg
= gen_rtx_REG (DImode
, 13);
9604 xcoff_tls_exec_model_detected
= true;
9607 /* Load the TOC value into temporary register. */
9608 tmpreg
= gen_reg_rtx (Pmode
);
9609 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9610 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
9611 gen_rtx_MINUS (Pmode
, addr
, tlsreg
));
9613 /* Add TOC symbol value to TLS pointer. */
9614 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tmpreg
, tlsreg
));
9619 /* Passes the tls arg value for global dynamic and local dynamic
9620 emit_library_call_value in rs6000_legitimize_tls_address to
9621 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9622 marker relocs put on __tls_get_addr calls. */
9623 static rtx global_tlsarg
;
9625 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9626 this (thread-local) address. */
9629 rs6000_legitimize_tls_address (rtx addr
, enum tls_model model
)
9634 return rs6000_legitimize_tls_address_aix (addr
, model
);
9636 dest
= gen_reg_rtx (Pmode
);
9637 if (model
== TLS_MODEL_LOCAL_EXEC
9638 && (rs6000_tls_size
== 16 || rs6000_pcrel_p ()))
9644 tlsreg
= gen_rtx_REG (Pmode
, 13);
9645 insn
= gen_tls_tprel_64 (dest
, tlsreg
, addr
);
9649 tlsreg
= gen_rtx_REG (Pmode
, 2);
9650 insn
= gen_tls_tprel_32 (dest
, tlsreg
, addr
);
9654 else if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 32)
9658 tmp
= gen_reg_rtx (Pmode
);
9661 tlsreg
= gen_rtx_REG (Pmode
, 13);
9662 insn
= gen_tls_tprel_ha_64 (tmp
, tlsreg
, addr
);
9666 tlsreg
= gen_rtx_REG (Pmode
, 2);
9667 insn
= gen_tls_tprel_ha_32 (tmp
, tlsreg
, addr
);
9671 insn
= gen_tls_tprel_lo_64 (dest
, tmp
, addr
);
9673 insn
= gen_tls_tprel_lo_32 (dest
, tmp
, addr
);
9678 rtx got
, tga
, tmp1
, tmp2
;
9680 /* We currently use relocations like @got@tlsgd for tls, which
9681 means the linker will handle allocation of tls entries, placing
9682 them in the .got section. So use a pointer to the .got section,
9683 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9684 or to secondary GOT sections used by 32-bit -fPIC. */
9685 if (rs6000_pcrel_p ())
9687 else if (TARGET_64BIT
)
9688 got
= gen_rtx_REG (Pmode
, 2);
9692 got
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
9695 rtx gsym
= rs6000_got_sym ();
9696 got
= gen_reg_rtx (Pmode
);
9698 rs6000_emit_move (got
, gsym
, Pmode
);
9703 tmp1
= gen_reg_rtx (Pmode
);
9704 tmp2
= gen_reg_rtx (Pmode
);
9705 mem
= gen_const_mem (Pmode
, tmp1
);
9706 lab
= gen_label_rtx ();
9707 emit_insn (gen_load_toc_v4_PIC_1b (gsym
, lab
));
9708 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
9709 if (TARGET_LINK_STACK
)
9710 emit_insn (gen_addsi3 (tmp1
, tmp1
, GEN_INT (4)));
9711 emit_move_insn (tmp2
, mem
);
9712 rtx_insn
*last
= emit_insn (gen_addsi3 (got
, tmp1
, tmp2
));
9713 set_unique_reg_note (last
, REG_EQUAL
, gsym
);
9718 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
)
9720 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addr
, got
),
9722 tga
= rs6000_tls_get_addr ();
9723 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9724 emit_insn (gen_rtx_SET (argreg
, arg
));
9725 global_tlsarg
= arg
;
9726 emit_library_call_value (tga
, dest
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9727 global_tlsarg
= NULL_RTX
;
9729 /* Make a note so that the result of this call can be CSEd. */
9730 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9731 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9732 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9734 else if (model
== TLS_MODEL_LOCAL_DYNAMIC
)
9736 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, got
), UNSPEC_TLSLD
);
9737 tga
= rs6000_tls_get_addr ();
9738 tmp1
= gen_reg_rtx (Pmode
);
9739 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9740 emit_insn (gen_rtx_SET (argreg
, arg
));
9741 global_tlsarg
= arg
;
9742 emit_library_call_value (tga
, tmp1
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9743 global_tlsarg
= NULL_RTX
;
9745 /* Make a note so that the result of this call can be CSEd. */
9746 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9747 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9748 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9750 if (rs6000_tls_size
== 16 || rs6000_pcrel_p ())
9753 insn
= gen_tls_dtprel_64 (dest
, tmp1
, addr
);
9755 insn
= gen_tls_dtprel_32 (dest
, tmp1
, addr
);
9757 else if (rs6000_tls_size
== 32)
9759 tmp2
= gen_reg_rtx (Pmode
);
9761 insn
= gen_tls_dtprel_ha_64 (tmp2
, tmp1
, addr
);
9763 insn
= gen_tls_dtprel_ha_32 (tmp2
, tmp1
, addr
);
9766 insn
= gen_tls_dtprel_lo_64 (dest
, tmp2
, addr
);
9768 insn
= gen_tls_dtprel_lo_32 (dest
, tmp2
, addr
);
9772 tmp2
= gen_reg_rtx (Pmode
);
9774 insn
= gen_tls_got_dtprel_64 (tmp2
, got
, addr
);
9776 insn
= gen_tls_got_dtprel_32 (tmp2
, got
, addr
);
9778 insn
= gen_rtx_SET (dest
, gen_rtx_PLUS (Pmode
, tmp2
, tmp1
));
9784 /* IE, or 64-bit offset LE. */
9785 tmp2
= gen_reg_rtx (Pmode
);
9787 insn
= gen_tls_got_tprel_64 (tmp2
, got
, addr
);
9789 insn
= gen_tls_got_tprel_32 (tmp2
, got
, addr
);
9791 if (rs6000_pcrel_p ())
9794 insn
= gen_tls_tls_pcrel_64 (dest
, tmp2
, addr
);
9796 insn
= gen_tls_tls_pcrel_32 (dest
, tmp2
, addr
);
9798 else if (TARGET_64BIT
)
9799 insn
= gen_tls_tls_64 (dest
, tmp2
, addr
);
9801 insn
= gen_tls_tls_32 (dest
, tmp2
, addr
);
9809 /* Only create the global variable for the stack protect guard if we are using
9810 the global flavor of that guard. */
9812 rs6000_init_stack_protect_guard (void)
9814 if (rs6000_stack_protector_guard
== SSP_GLOBAL
)
9815 return default_stack_protect_guard ();
9820 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9823 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9825 /* If GET_CODE (x) is HIGH, the 'X' represets the high part of a symbol_ref.
9826 It can not be put into a constant pool. e.g.
9827 (high:DI (unspec:DI [(symbol_ref/u:DI ("*.LC0")..)
9828 (high:DI (symbol_ref:DI ("var")..)). */
9829 if (GET_CODE (x
) == HIGH
)
9832 /* A TLS symbol in the TOC cannot contain a sum. */
9833 if (GET_CODE (x
) == CONST
9834 && GET_CODE (XEXP (x
, 0)) == PLUS
9835 && SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
9836 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0)) != 0)
9839 /* Allow AIX TOC TLS symbols in the constant pool,
9840 but not ELF TLS symbols. */
9841 return TARGET_ELF
&& tls_referenced_p (x
);
9844 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9845 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9846 can be addressed relative to the toc pointer. */
9849 use_toc_relative_ref (rtx sym
, machine_mode mode
)
9851 return ((constant_pool_expr_p (sym
)
9852 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym
),
9853 get_pool_mode (sym
)))
9854 || (TARGET_CMODEL
== CMODEL_MEDIUM
9855 && SYMBOL_REF_LOCAL_P (sym
)
9856 && GET_MODE_SIZE (mode
) <= POWERPC64_TOC_POINTER_ALIGNMENT
));
9859 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9860 that is a valid memory address for an instruction.
9861 The MODE argument is the machine mode for the MEM expression
9862 that wants to use this address.
9864 On the RS/6000, there are four valid address: a SYMBOL_REF that
9865 refers to a constant pool entry of an address (or the sum of it
9866 plus a constant), a short (16-bit signed) constant plus a register,
9867 the sum of two registers, or a register indirect, possibly with an
9868 auto-increment. For DFmode, DDmode and DImode with a constant plus
9869 register, we must ensure that both words are addressable or PowerPC64
9870 with offset word aligned.
9872 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9873 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9874 because adjacent memory cells are accessed by adding word-sized offsets
9875 during assembly output. */
9877 rs6000_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
,
9878 code_helper ch
= ERROR_MARK
)
9880 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
9881 bool quad_offset_p
= mode_supports_dq_form (mode
);
9883 if (TARGET_ELF
&& RS6000_SYMBOL_REF_TLS_P (x
))
9886 /* lxvl and stxvl doesn't support any addressing modes with PLUS. */
9887 if (ch
.is_internal_fn ()
9888 && (ch
== IFN_LEN_LOAD
|| ch
== IFN_LEN_STORE
)
9889 && GET_CODE (x
) == PLUS
)
9892 /* Handle unaligned altivec lvx/stvx type addresses. */
9893 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
9894 && GET_CODE (x
) == AND
9895 && CONST_INT_P (XEXP (x
, 1))
9896 && INTVAL (XEXP (x
, 1)) == -16)
9899 return (legitimate_indirect_address_p (x
, reg_ok_strict
)
9900 || legitimate_indexed_address_p (x
, reg_ok_strict
)
9901 || virtual_stack_registers_memory_p (x
));
9904 if (legitimate_indirect_address_p (x
, reg_ok_strict
))
9907 && (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == PRE_DEC
)
9908 && mode_supports_pre_incdec_p (mode
)
9909 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
))
9912 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9913 if (address_is_prefixed (x
, mode
, NON_PREFIXED_DEFAULT
))
9916 /* Handle restricted vector d-form offsets in ISA 3.0. */
9919 if (quad_address_p (x
, mode
, reg_ok_strict
))
9922 else if (virtual_stack_registers_memory_p (x
))
9925 else if (reg_offset_p
)
9927 if (legitimate_small_data_p (mode
, x
))
9929 if (legitimate_constant_pool_address_p (x
, mode
,
9930 reg_ok_strict
|| lra_in_progress
))
9934 /* For TImode, if we have TImode in VSX registers, only allow register
9935 indirect addresses. This will allow the values to go in either GPRs
9936 or VSX registers without reloading. The vector types would tend to
9937 go into VSX registers, so we allow REG+REG, while TImode seems
9938 somewhat split, in that some uses are GPR based, and some VSX based. */
9939 /* FIXME: We could loosen this by changing the following to
9940 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9941 but currently we cannot allow REG+REG addressing for TImode. See
9942 PR72827 for complete details on how this ends up hoodwinking DSE. */
9943 if (mode
== TImode
&& TARGET_VSX
)
9945 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9948 && GET_CODE (x
) == PLUS
9949 && REG_P (XEXP (x
, 0))
9950 && (XEXP (x
, 0) == virtual_stack_vars_rtx
9951 || XEXP (x
, 0) == arg_pointer_rtx
)
9952 && CONST_INT_P (XEXP (x
, 1)))
9954 if (rs6000_legitimate_offset_address_p (mode
, x
, reg_ok_strict
, false))
9956 if (!FLOAT128_2REG_P (mode
)
9957 && (TARGET_HARD_FLOAT
9959 || (mode
!= DFmode
&& mode
!= DDmode
))
9960 && (TARGET_POWERPC64
|| mode
!= DImode
)
9961 && (mode
!= TImode
|| VECTOR_MEM_VSX_P (TImode
))
9963 && !avoiding_indexed_address_p (mode
)
9964 && legitimate_indexed_address_p (x
, reg_ok_strict
))
9966 if (TARGET_UPDATE
&& GET_CODE (x
) == PRE_MODIFY
9967 && mode_supports_pre_modify_p (mode
)
9968 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
)
9969 && (rs6000_legitimate_offset_address_p (mode
, XEXP (x
, 1),
9970 reg_ok_strict
, false)
9971 || (!avoiding_indexed_address_p (mode
)
9972 && legitimate_indexed_address_p (XEXP (x
, 1), reg_ok_strict
)))
9973 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
9975 /* There is no prefixed version of the load/store with update. */
9976 rtx addr
= XEXP (x
, 1);
9977 return !address_is_prefixed (addr
, mode
, NON_PREFIXED_DEFAULT
);
9979 if (reg_offset_p
&& !quad_offset_p
9980 && legitimate_lo_sum_address_p (mode
, x
, reg_ok_strict
))
9985 /* Debug version of rs6000_legitimate_address_p. */
9987 rs6000_debug_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
,
9990 bool ret
= rs6000_legitimate_address_p (mode
, x
, reg_ok_strict
, ch
);
9992 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9993 "strict = %d, reload = %s, code = %s\n",
9994 ret
? "true" : "false",
9995 GET_MODE_NAME (mode
),
9997 (reload_completed
? "after" : "before"),
9998 GET_RTX_NAME (GET_CODE (x
)));
10004 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10007 rs6000_mode_dependent_address_p (const_rtx addr
,
10008 addr_space_t as ATTRIBUTE_UNUSED
)
10010 return rs6000_mode_dependent_address_ptr (addr
);
10013 /* Go to LABEL if ADDR (a legitimate address expression)
10014 has an effect that depends on the machine mode it is used for.
10016 On the RS/6000 this is true of all integral offsets (since AltiVec
10017 and VSX modes don't allow them) or is a pre-increment or decrement.
10019 ??? Except that due to conceptual problems in offsettable_address_p
10020 we can't really report the problems of integral offsets. So leave
10021 this assuming that the adjustable offset must be valid for the
10022 sub-words of a TFmode operand, which is what we had before. */
10025 rs6000_mode_dependent_address (const_rtx addr
)
10027 switch (GET_CODE (addr
))
10030 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10031 is considered a legitimate address before reload, so there
10032 are no offset restrictions in that case. Note that this
10033 condition is safe in strict mode because any address involving
10034 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10035 been rejected as illegitimate. */
10036 if (XEXP (addr
, 0) != virtual_stack_vars_rtx
10037 && XEXP (addr
, 0) != arg_pointer_rtx
10038 && CONST_INT_P (XEXP (addr
, 1)))
10040 HOST_WIDE_INT val
= INTVAL (XEXP (addr
, 1));
10041 HOST_WIDE_INT extra
= TARGET_POWERPC64
? 8 : 12;
10042 if (TARGET_PREFIXED
)
10043 return !SIGNED_34BIT_OFFSET_EXTRA_P (val
, extra
);
10045 return !SIGNED_16BIT_OFFSET_EXTRA_P (val
, extra
);
10050 /* Anything in the constant pool is sufficiently aligned that
10051 all bytes have the same high part address. */
10052 return !legitimate_constant_pool_address_p (addr
, QImode
, false);
10054 /* Auto-increment cases are now treated generically in recog.cc. */
10056 return TARGET_UPDATE
;
10058 /* AND is only allowed in Altivec loads. */
10069 /* Debug version of rs6000_mode_dependent_address. */
10071 rs6000_debug_mode_dependent_address (const_rtx addr
)
10073 bool ret
= rs6000_mode_dependent_address (addr
);
10075 fprintf (stderr
, "\nrs6000_mode_dependent_address: ret = %s\n",
10076 ret
? "true" : "false");
10082 /* Implement FIND_BASE_TERM. */
10085 rs6000_find_base_term (rtx op
)
10090 if (GET_CODE (base
) == CONST
)
10091 base
= XEXP (base
, 0);
10092 if (GET_CODE (base
) == PLUS
)
10093 base
= XEXP (base
, 0);
10094 if (GET_CODE (base
) == UNSPEC
)
10095 switch (XINT (base
, 1))
10097 case UNSPEC_TOCREL
:
10098 case UNSPEC_MACHOPIC_OFFSET
:
10099 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10100 for aliasing purposes. */
10101 return XVECEXP (base
, 0, 0);
10107 /* More elaborate version of recog's offsettable_memref_p predicate
10108 that works around the ??? note of rs6000_mode_dependent_address.
10109 In particular it accepts
10111 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10113 in 32-bit mode, that the recog predicate rejects. */
10116 rs6000_offsettable_memref_p (rtx op
, machine_mode reg_mode
, bool strict
)
10123 /* First mimic offsettable_memref_p. */
10124 if (offsettable_address_p (strict
, GET_MODE (op
), XEXP (op
, 0)))
10127 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10128 the latter predicate knows nothing about the mode of the memory
10129 reference and, therefore, assumes that it is the largest supported
10130 mode (TFmode). As a consequence, legitimate offsettable memory
10131 references are rejected. rs6000_legitimate_offset_address_p contains
10132 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10133 at least with a little bit of help here given that we know the
10134 actual registers used. */
10135 worst_case
= ((TARGET_POWERPC64
&& GET_MODE_CLASS (reg_mode
) == MODE_INT
)
10136 || GET_MODE_SIZE (reg_mode
) == 4);
10137 return rs6000_legitimate_offset_address_p (GET_MODE (op
), XEXP (op
, 0),
10138 strict
, worst_case
);
10141 /* Determine the reassociation width to be used in reassociate_bb.
10142 This takes into account how many parallel operations we
10143 can actually do of a given type, and also the latency.
10145 int add/sub 6/cycle
10147 vect add/sub/mul 2/cycle
10148 fp add/sub/mul 2/cycle
10153 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
10156 switch (rs6000_tune
)
10158 case PROCESSOR_POWER8
:
10159 case PROCESSOR_POWER9
:
10160 case PROCESSOR_POWER10
:
10161 case PROCESSOR_POWER11
:
10162 if (DECIMAL_FLOAT_MODE_P (mode
))
10164 if (VECTOR_MODE_P (mode
))
10166 if (INTEGRAL_MODE_P (mode
))
10168 if (FLOAT_MODE_P (mode
))
10177 /* Change register usage conditional on target flags. */
10179 rs6000_conditional_register_usage (void)
10183 if (TARGET_DEBUG_TARGET
)
10184 fprintf (stderr
, "rs6000_conditional_register_usage called\n");
10186 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10188 fixed_regs
[13] = call_used_regs
[13] = 1;
10190 /* Conditionally disable FPRs. */
10191 if (TARGET_SOFT_FLOAT
)
10192 for (i
= 32; i
< 64; i
++)
10193 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10195 /* For non PC-relative code, GPR2 is unavailable for register allocation. */
10196 if (FIXED_R2
&& !rs6000_pcrel_p ())
10199 /* The TOC register is not killed across calls in a way that is
10200 visible to the compiler. */
10201 if (fixed_regs
[2] && (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
))
10202 call_used_regs
[2] = 0;
10204 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
10205 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10207 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
10208 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10209 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10211 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
10212 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10213 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10215 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
)
10216 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10218 if (!TARGET_ALTIVEC
&& !TARGET_VSX
)
10220 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
10221 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10222 call_used_regs
[VRSAVE_REGNO
] = 1;
10225 if (TARGET_ALTIVEC
|| TARGET_VSX
)
10226 global_regs
[VSCR_REGNO
] = 1;
10228 if (TARGET_ALTIVEC_ABI
)
10230 for (i
= FIRST_ALTIVEC_REGNO
; i
< FIRST_ALTIVEC_REGNO
+ 20; ++i
)
10231 call_used_regs
[i
] = 1;
10233 /* AIX reserves VR20:31 in non-extended ABI mode. */
10234 if (TARGET_XCOFF
&& !rs6000_aix_extabi
)
10235 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
< FIRST_ALTIVEC_REGNO
+ 32; ++i
)
10236 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10241 /* Output insns to set DEST equal to the constant SOURCE as a series of
10242 lis, ori and shl instructions and return TRUE. */
10245 rs6000_emit_set_const (rtx dest
, rtx source
)
10247 machine_mode mode
= GET_MODE (dest
);
10252 gcc_checking_assert (CONST_INT_P (source
));
10253 c
= INTVAL (source
);
10258 emit_insn (gen_rtx_SET (dest
, source
));
10262 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (SImode
);
10264 emit_insn (gen_rtx_SET (temp
, GEN_INT (c
& ~(HOST_WIDE_INT
) 0xffff)));
10265 emit_insn (gen_rtx_SET (dest
,
10266 gen_rtx_IOR (SImode
, temp
,
10267 GEN_INT (c
& 0xffff))));
10271 if (!TARGET_POWERPC64
)
10275 hi
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
== 0, DImode
);
10276 lo
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
!= 0, DImode
);
10277 emit_move_insn (hi
, GEN_INT (c
>> 32));
10278 c
= sext_hwi (c
, 32);
10279 emit_move_insn (lo
, GEN_INT (c
));
10282 rs6000_emit_set_long_const (dest
, c
);
10286 gcc_unreachable ();
10289 insn
= get_last_insn ();
10290 set
= single_set (insn
);
10291 if (! CONSTANT_P (SET_SRC (set
)))
10292 set_unique_reg_note (insn
, REG_EQUAL
, GEN_INT (c
));
10297 /* Check if C can be rotated to a negative value which 'lis' instruction is
10298 able to load: 1..1xx0..0. If so, set *ROT to the number by which C is
10299 rotated, and return true. Return false otherwise. */
10302 can_be_rotated_to_negative_lis (HOST_WIDE_INT c
, int *rot
)
10304 /* case a. 1..1xxx0..01..1: up to 15 x's, at least 16 0's. */
10305 int leading_ones
= clz_hwi (~c
);
10306 int tailing_ones
= ctz_hwi (~c
);
10307 int middle_zeros
= ctz_hwi (c
>> tailing_ones
);
10308 if (middle_zeros
>= 16 && leading_ones
+ tailing_ones
>= 33)
10310 *rot
= HOST_BITS_PER_WIDE_INT
- tailing_ones
;
10314 /* case b. xx0..01..1xx: some of 15 x's (and some of 16 0's) are
10315 rotated over the highest bit. */
10316 int pos_one
= clz_hwi ((c
<< 16) >> 16);
10317 middle_zeros
= ctz_hwi (c
>> (HOST_BITS_PER_WIDE_INT
- pos_one
));
10318 int middle_ones
= clz_hwi (~(c
<< pos_one
));
10319 if (middle_zeros
>= 16 && middle_ones
>= 33)
10328 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10331 If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
10332 is set to the mask operand of rotldi(rldicl), and return true.
10333 Return false otherwise. */
10336 can_be_built_by_li_lis_and_rotldi (HOST_WIDE_INT c
, int *shift
,
10337 HOST_WIDE_INT
*mask
)
10339 /* If C or ~C contains at least 49 successive zeros, then C can be rotated
10340 to/from a positive or negative value that 'li' is able to load. */
10342 if (can_be_rotated_to_lowbits (c
, 15, &n
)
10343 || can_be_rotated_to_lowbits (~c
, 15, &n
)
10344 || can_be_rotated_to_negative_lis (c
, &n
))
10346 *mask
= HOST_WIDE_INT_M1
;
10347 *shift
= HOST_BITS_PER_WIDE_INT
- n
;
10354 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10357 If so, *SHIFT is set to the shift operand of rldicl, and *MASK is set to
10358 the mask operand of rldicl, and return true.
10359 Return false otherwise. */
10362 can_be_built_by_li_lis_and_rldicl (HOST_WIDE_INT c
, int *shift
,
10363 HOST_WIDE_INT
*mask
)
10365 /* Leading zeros may be cleaned by rldicl with a mask. Change leading zeros
10366 to ones and then recheck it. */
10367 int lz
= clz_hwi (c
);
10369 /* If lz == 0, the left shift is undefined. */
10373 HOST_WIDE_INT unmask_c
10374 = c
| (HOST_WIDE_INT_M1U
<< (HOST_BITS_PER_WIDE_INT
- lz
));
10376 if (can_be_rotated_to_lowbits (~unmask_c
, 15, &n
)
10377 || can_be_rotated_to_negative_lis (unmask_c
, &n
))
10379 *mask
= HOST_WIDE_INT_M1U
>> lz
;
10380 *shift
= n
== 0 ? 0 : HOST_BITS_PER_WIDE_INT
- n
;
10387 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10390 If so, *SHIFT is set to the shift operand of rldicr, and *MASK is set to
10391 the mask operand of rldicr, and return true.
10392 Return false otherwise. */
10395 can_be_built_by_li_lis_and_rldicr (HOST_WIDE_INT c
, int *shift
,
10396 HOST_WIDE_INT
*mask
)
10398 /* Tailing zeros may be cleaned by rldicr with a mask. Change tailing zeros
10399 to ones and then recheck it. */
10400 int tz
= ctz_hwi (c
);
10402 /* If tz == HOST_BITS_PER_WIDE_INT, the left shift is undefined. */
10403 if (tz
>= HOST_BITS_PER_WIDE_INT
)
10406 HOST_WIDE_INT unmask_c
= c
| ((HOST_WIDE_INT_1U
<< tz
) - 1);
10408 if (can_be_rotated_to_lowbits (~unmask_c
, 15, &n
)
10409 || can_be_rotated_to_negative_lis (unmask_c
, &n
))
10411 *mask
= HOST_WIDE_INT_M1U
<< tz
;
10412 *shift
= HOST_BITS_PER_WIDE_INT
- n
;
10419 /* Check if value C can be built by 2 instructions: one is 'li', another is
10422 If so, *SHIFT is set to the 'shift' operand of rldic; and *MASK is set
10423 to the mask value about the 'mb' operand of rldic; and return true.
10424 Return false otherwise. */
10427 can_be_built_by_li_and_rldic (HOST_WIDE_INT c
, int *shift
, HOST_WIDE_INT
*mask
)
10429 /* There are 49 successive ones in the negative value of 'li'. */
10432 /* 1..1xx1..1: negative value of li --> 0..01..1xx0..0:
10433 right bits are shifted as 0's, and left 1's(and x's) are cleaned. */
10434 int tz
= ctz_hwi (c
);
10435 int lz
= clz_hwi (c
);
10437 /* If lz == HOST_BITS_PER_WIDE_INT, the left shift is undefined. */
10438 if (lz
>= HOST_BITS_PER_WIDE_INT
)
10441 int middle_ones
= clz_hwi (~(c
<< lz
));
10442 if (tz
+ lz
+ middle_ones
>= ones
10443 && (tz
- lz
) < HOST_BITS_PER_WIDE_INT
10444 && tz
< HOST_BITS_PER_WIDE_INT
)
10446 *mask
= ((1LL << (HOST_BITS_PER_WIDE_INT
- tz
- lz
)) - 1LL) << tz
;
10451 /* 1..1xx1..1 --> 1..1xx0..01..1: some 1's(following x's) are cleaned. */
10452 int leading_ones
= clz_hwi (~c
);
10453 int tailing_ones
= ctz_hwi (~c
);
10454 int middle_zeros
= ctz_hwi (c
>> tailing_ones
);
10455 if (leading_ones
+ tailing_ones
+ middle_zeros
>= ones
10456 && middle_zeros
< HOST_BITS_PER_WIDE_INT
)
10458 *mask
= ~(((1ULL << middle_zeros
) - 1ULL) << tailing_ones
);
10459 *shift
= tailing_ones
+ middle_zeros
;
10463 /* xx1..1xx: --> xx0..01..1xx: some 1's(following x's) are cleaned. */
10464 /* Get the position for the first bit of successive 1.
10465 The 24th bit would be in successive 0 or 1. */
10466 HOST_WIDE_INT low_mask
= (HOST_WIDE_INT_1U
<< 24) - HOST_WIDE_INT_1U
;
10467 int pos_first_1
= ((c
& (low_mask
+ 1)) == 0)
10468 ? clz_hwi (c
& low_mask
)
10469 : HOST_BITS_PER_WIDE_INT
- ctz_hwi (~(c
| low_mask
));
10471 /* Make sure the left and right shifts are defined. */
10472 if (!IN_RANGE (pos_first_1
, 1, HOST_BITS_PER_WIDE_INT
-1))
10475 middle_ones
= clz_hwi (~c
<< pos_first_1
);
10476 middle_zeros
= ctz_hwi (c
>> (HOST_BITS_PER_WIDE_INT
- pos_first_1
));
10477 if (pos_first_1
< HOST_BITS_PER_WIDE_INT
10478 && middle_ones
+ middle_zeros
< HOST_BITS_PER_WIDE_INT
10479 && middle_ones
+ middle_zeros
>= ones
)
10481 *mask
= ~(((1ULL << middle_zeros
) - 1LL)
10482 << (HOST_BITS_PER_WIDE_INT
- pos_first_1
));
10483 *shift
= HOST_BITS_PER_WIDE_INT
- pos_first_1
+ middle_zeros
;
10490 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10491 Output insns to set DEST equal to the constant C as a series of
10492 lis, ori and shl instructions. If NUM_INSNS is not NULL, then
10493 only increase *NUM_INSNS as the number of insns, and do not emit
10497 rs6000_emit_set_long_const (rtx dest
, HOST_WIDE_INT c
, int *num_insns
)
10499 HOST_WIDE_INT ud1
, ud2
, ud3
, ud4
;
10502 ud2
= (c
>> 16) & 0xffff;
10503 ud3
= (c
>> 32) & 0xffff;
10504 ud4
= (c
>> 48) & 0xffff;
10506 /* This lambda is used to emit one insn or just increase the insn count.
10507 When counting the insn number, no need to emit the insn. */
10508 auto count_or_emit_insn
= [&num_insns
] (rtx dest_or_insn
, rtx src
= nullptr) {
10516 emit_move_insn (dest_or_insn
, src
);
10518 emit_insn (dest_or_insn
);
10521 if (TARGET_PREFIXED
&& SIGNED_INTEGER_34BIT_P (c
))
10524 count_or_emit_insn (dest
, GEN_INT (c
));
10528 if ((ud4
== 0xffff && ud3
== 0xffff && ud2
== 0xffff && (ud1
& 0x8000))
10529 || (ud4
== 0 && ud3
== 0 && ud2
== 0 && !(ud1
& 0x8000)))
10532 count_or_emit_insn (dest
, GEN_INT (sext_hwi (ud1
, 16)));
10537 = (num_insns
|| !can_create_pseudo_p ()) ? dest
: gen_reg_rtx (DImode
);
10539 if ((ud4
== 0xffff && ud3
== 0xffff && (ud2
& 0x8000))
10540 || (ud4
== 0 && ud3
== 0 && !(ud2
& 0x8000)))
10543 count_or_emit_insn (ud1
!= 0 ? temp
: dest
,
10544 GEN_INT (sext_hwi (ud2
<< 16, 32)));
10546 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10550 if (ud4
== 0xffff && ud3
== 0xffff && !(ud2
& 0x8000) && ud1
== 0)
10553 count_or_emit_insn (temp
, GEN_INT (sext_hwi ((ud2
| 0x8000) << 16, 32)));
10554 count_or_emit_insn (dest
,
10555 gen_rtx_XOR (DImode
, temp
, GEN_INT (0x80000000)));
10559 if (ud4
== 0xffff && ud3
== 0xffff && (ud1
& 0x8000))
10562 count_or_emit_insn (temp
, GEN_INT (sext_hwi (ud1
, 16)));
10563 count_or_emit_insn (dest
, gen_rtx_XOR (DImode
, temp
,
10564 GEN_INT ((ud2
^ 0xffff) << 16)));
10569 HOST_WIDE_INT mask
;
10570 if (can_be_built_by_li_lis_and_rotldi (c
, &shift
, &mask
)
10571 || can_be_built_by_li_lis_and_rldicl (c
, &shift
, &mask
)
10572 || can_be_built_by_li_lis_and_rldicr (c
, &shift
, &mask
)
10573 || can_be_built_by_li_and_rldic (c
, &shift
, &mask
))
10575 /* li/lis; rldicX */
10576 unsigned HOST_WIDE_INT imm
= (c
| ~mask
);
10577 imm
= (imm
>> shift
) | (imm
<< (HOST_BITS_PER_WIDE_INT
- shift
));
10579 count_or_emit_insn (temp
, GEN_INT (imm
));
10581 temp
= gen_rtx_ROTATE (DImode
, temp
, GEN_INT (shift
));
10582 if (mask
!= HOST_WIDE_INT_M1
)
10583 temp
= gen_rtx_AND (DImode
, temp
, GEN_INT (mask
));
10584 count_or_emit_insn (dest
, temp
);
10589 if (ud3
== 0 && ud4
== 0)
10591 gcc_assert ((ud2
& 0x8000) && ud1
!= 0);
10592 if (!(ud1
& 0x8000))
10595 count_or_emit_insn (temp
, GEN_INT (ud1
));
10596 count_or_emit_insn (dest
,
10597 gen_rtx_IOR (DImode
, temp
, GEN_INT (ud2
<< 16)));
10601 /* lis; ori; rldicl */
10602 count_or_emit_insn (temp
, GEN_INT (sext_hwi (ud2
<< 16, 32)));
10603 count_or_emit_insn (temp
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10604 count_or_emit_insn (dest
,
10605 gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff)));
10609 if (ud1
== ud3
&& ud2
== ud4
)
10611 /* load low 32bits first, e.g. "lis; ori", then "rldimi". */
10612 HOST_WIDE_INT num
= (ud2
<< 16) | ud1
;
10613 rs6000_emit_set_long_const (temp
, sext_hwi (num
, 32), num_insns
);
10615 rtx rldimi
= gen_rotldi3_insert_3 (dest
, temp
, GEN_INT (32), temp
,
10616 GEN_INT (0xffffffff));
10617 count_or_emit_insn (rldimi
);
10621 if ((ud4
== 0xffff && (ud3
& 0x8000)) || (ud4
== 0 && !(ud3
& 0x8000)))
10623 /* li; [ori;] rldicl [;oir]. */
10624 count_or_emit_insn (temp
, GEN_INT (sext_hwi (ud3
<< 16, 32)));
10626 count_or_emit_insn (temp
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud2
)));
10627 count_or_emit_insn (ud1
!= 0 ? temp
: dest
,
10628 gen_rtx_ASHIFT (DImode
, temp
, GEN_INT (16)));
10630 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10634 if (TARGET_PREFIXED
)
10636 if (can_create_pseudo_p ())
10638 /* pli A,L; pli B,H; rldimi A,B,32,0. */
10639 rtx temp1
= num_insns
? nullptr : gen_reg_rtx (DImode
);
10640 count_or_emit_insn (temp
, GEN_INT ((ud4
<< 16) | ud3
));
10641 count_or_emit_insn (temp1
, GEN_INT ((ud2
<< 16) | ud1
));
10642 rtx rldimi
= gen_rotldi3_insert_3 (dest
, temp
, GEN_INT (32), temp1
,
10643 GEN_INT (0xffffffff));
10644 count_or_emit_insn (rldimi
);
10648 /* pli A,H; sldi A,32; paddi A,A,L. */
10649 count_or_emit_insn (dest
, GEN_INT ((ud4
<< 16) | ud3
));
10650 count_or_emit_insn (dest
, gen_rtx_ASHIFT (DImode
, dest
, GEN_INT (32)));
10652 bool can_use_paddi
= dest
? REGNO (dest
) != FIRST_GPR_REGNO
: false;
10653 /* Use paddi for the low 32 bits. */
10654 if (ud2
!= 0 && ud1
!= 0 && can_use_paddi
)
10655 count_or_emit_insn (dest
, gen_rtx_PLUS (DImode
, dest
,
10656 GEN_INT ((ud2
<< 16) | ud1
)));
10657 /* Use oris, ori for low 32 bits. */
10658 if (ud2
!= 0 && (ud1
== 0 || !can_use_paddi
))
10659 count_or_emit_insn (dest
,
10660 gen_rtx_IOR (DImode
, dest
, GEN_INT (ud2
<< 16)));
10661 if (ud1
!= 0 && (ud2
== 0 || !can_use_paddi
))
10662 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud1
)));
10666 if (can_create_pseudo_p ())
10668 /* lis HIGH,UD4 ; ori HIGH,UD3 ;
10669 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */
10670 rtx high
= num_insns
? nullptr : gen_reg_rtx (DImode
);
10671 rtx low
= num_insns
? nullptr : gen_reg_rtx (DImode
);
10672 HOST_WIDE_INT num
= (ud2
<< 16) | ud1
;
10673 rs6000_emit_set_long_const (low
, sext_hwi (num
, 32), num_insns
);
10674 num
= (ud4
<< 16) | ud3
;
10675 rs6000_emit_set_long_const (high
, sext_hwi (num
, 32), num_insns
);
10677 rtx rldimi
= gen_rotldi3_insert_3 (dest
, high
, GEN_INT (32), low
,
10678 GEN_INT (0xffffffff));
10679 count_or_emit_insn (rldimi
);
10683 /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
10684 oris DEST,UD2 ; ori DEST,UD1. */
10685 count_or_emit_insn (dest
, GEN_INT (sext_hwi (ud4
<< 16, 32)));
10687 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud3
)));
10689 count_or_emit_insn (dest
, gen_rtx_ASHIFT (DImode
, dest
, GEN_INT (32)));
10691 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud2
<< 16)));
10693 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud1
)));
10698 /* Helper for the following. Get rid of [r+r] memory refs
10699 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10702 rs6000_eliminate_indexed_memrefs (rtx operands
[2])
10704 if (MEM_P (operands
[0])
10705 && !REG_P (XEXP (operands
[0], 0))
10706 && ! legitimate_constant_pool_address_p (XEXP (operands
[0], 0),
10707 GET_MODE (operands
[0]), false))
10709 = replace_equiv_address (operands
[0],
10710 copy_addr_to_reg (XEXP (operands
[0], 0)));
10712 if (MEM_P (operands
[1])
10713 && !REG_P (XEXP (operands
[1], 0))
10714 && ! legitimate_constant_pool_address_p (XEXP (operands
[1], 0),
10715 GET_MODE (operands
[1]), false))
10717 = replace_equiv_address (operands
[1],
10718 copy_addr_to_reg (XEXP (operands
[1], 0)));
10721 /* Generate a vector of constants to permute MODE for a little-endian
10722 storage operation by swapping the two halves of a vector. */
10724 rs6000_const_vec (machine_mode mode
)
10752 v
= rtvec_alloc (subparts
);
10754 for (i
= 0; i
< subparts
/ 2; ++i
)
10755 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
+ subparts
/ 2);
10756 for (i
= subparts
/ 2; i
< subparts
; ++i
)
10757 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
- subparts
/ 2);
10762 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10763 store operation. */
10765 rs6000_emit_le_vsx_permute (rtx dest
, rtx source
, machine_mode mode
)
10767 gcc_assert (!altivec_indexed_or_indirect_operand (dest
, mode
));
10768 gcc_assert (!altivec_indexed_or_indirect_operand (source
, mode
));
10770 /* Scalar permutations are easier to express in integer modes rather than
10771 floating-point modes, so cast them here. We use V1TImode instead
10772 of TImode to ensure that the values don't go through GPRs. */
10773 if (FLOAT128_VECTOR_P (mode
))
10775 dest
= gen_lowpart (V1TImode
, dest
);
10776 source
= gen_lowpart (V1TImode
, source
);
10780 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10782 if (mode
== TImode
|| mode
== V1TImode
)
10783 emit_insn (gen_rtx_SET (dest
, gen_rtx_ROTATE (mode
, source
,
10787 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rs6000_const_vec (mode
));
10788 emit_insn (gen_rtx_SET (dest
, gen_rtx_VEC_SELECT (mode
, source
, par
)));
10792 /* Emit a little-endian load from vector memory location SOURCE to VSX
10793 register DEST in mode MODE. The load is done with two permuting
10794 insn's that represent an lxvd2x and xxpermdi. */
10796 rs6000_emit_le_vsx_load (rtx dest
, rtx source
, machine_mode mode
)
10798 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10800 if (mode
== TImode
|| mode
== V1TImode
)
10803 dest
= gen_lowpart (V2DImode
, dest
);
10804 source
= adjust_address (source
, V2DImode
, 0);
10807 rtx tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest
) : dest
;
10808 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10809 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10812 /* Emit a little-endian store to vector memory location DEST from VSX
10813 register SOURCE in mode MODE. The store is done with two permuting
10814 insn's that represent an xxpermdi and an stxvd2x. */
10816 rs6000_emit_le_vsx_store (rtx dest
, rtx source
, machine_mode mode
)
10818 /* This should never be called after LRA. */
10819 gcc_assert (can_create_pseudo_p ());
10821 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10823 if (mode
== TImode
|| mode
== V1TImode
)
10826 dest
= adjust_address (dest
, V2DImode
, 0);
10827 source
= gen_lowpart (V2DImode
, source
);
10830 rtx tmp
= gen_reg_rtx_and_attrs (source
);
10831 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10832 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10835 /* Emit a sequence representing a little-endian VSX load or store,
10836 moving data from SOURCE to DEST in mode MODE. This is done
10837 separately from rs6000_emit_move to ensure it is called only
10838 during expand. LE VSX loads and stores introduced later are
10839 handled with a split. The expand-time RTL generation allows
10840 us to optimize away redundant pairs of register-permutes. */
10842 rs6000_emit_le_vsx_move (rtx dest
, rtx source
, machine_mode mode
)
10844 gcc_assert (!BYTES_BIG_ENDIAN
10845 && VECTOR_MEM_VSX_P (mode
)
10846 && !TARGET_P9_VECTOR
10847 && !gpr_or_gpr_p (dest
, source
)
10848 && (MEM_P (source
) ^ MEM_P (dest
)));
10850 if (MEM_P (source
))
10852 gcc_assert (REG_P (dest
) || SUBREG_P (dest
));
10853 rs6000_emit_le_vsx_load (dest
, source
, mode
);
10857 if (!REG_P (source
))
10858 source
= force_reg (mode
, source
);
10859 rs6000_emit_le_vsx_store (dest
, source
, mode
);
10863 /* Return whether a SFmode or SImode move can be done without converting one
10864 mode to another. This arrises when we have:
10866 (SUBREG:SF (REG:SI ...))
10867 (SUBREG:SI (REG:SF ...))
10869 and one of the values is in a floating point/vector register, where SFmode
10870 scalars are stored in DFmode format. */
10873 valid_sf_si_move (rtx dest
, rtx src
, machine_mode mode
)
10875 if (TARGET_ALLOW_SF_SUBREG
)
10878 if (mode
!= SFmode
&& GET_MODE_CLASS (mode
) != MODE_INT
)
10881 if (!SUBREG_P (src
) || !sf_subreg_operand (src
, mode
))
10884 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10885 if (SUBREG_P (dest
))
10887 rtx dest_subreg
= SUBREG_REG (dest
);
10888 rtx src_subreg
= SUBREG_REG (src
);
10889 return GET_MODE (dest_subreg
) == GET_MODE (src_subreg
);
10896 /* Helper function to change moves with:
10898 (SUBREG:SF (REG:SI)) and
10899 (SUBREG:SI (REG:SF))
10901 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10902 values are stored as DFmode values in the VSX registers. We need to convert
10903 the bits before we can use a direct move or operate on the bits in the
10904 vector register as an integer type.
10906 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10909 rs6000_emit_move_si_sf_subreg (rtx dest
, rtx source
, machine_mode mode
)
10911 if (TARGET_DIRECT_MOVE_64BIT
&& !reload_completed
10912 && (!SUBREG_P (dest
) || !sf_subreg_operand (dest
, mode
))
10913 && SUBREG_P (source
) && sf_subreg_operand (source
, mode
))
10915 rtx inner_source
= SUBREG_REG (source
);
10916 machine_mode inner_mode
= GET_MODE (inner_source
);
10918 if (mode
== SImode
&& inner_mode
== SFmode
)
10920 emit_insn (gen_movsi_from_sf (dest
, inner_source
));
10924 if (mode
== SFmode
&& inner_mode
== SImode
)
10926 emit_insn (gen_movsf_from_si (dest
, inner_source
));
10934 /* Emit a move from SOURCE to DEST in mode MODE. */
10936 rs6000_emit_move (rtx dest
, rtx source
, machine_mode mode
)
10939 operands
[0] = dest
;
10940 operands
[1] = source
;
10942 if (TARGET_DEBUG_ADDR
)
10945 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10946 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10947 GET_MODE_NAME (mode
),
10950 can_create_pseudo_p ());
10952 fprintf (stderr
, "source:\n");
10953 debug_rtx (source
);
10956 /* Check that we get CONST_WIDE_INT only when we should. */
10957 if (CONST_WIDE_INT_P (operands
[1])
10958 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
10959 gcc_unreachable ();
10961 #ifdef HAVE_AS_GNU_ATTRIBUTE
10962 /* If we use a long double type, set the flags in .gnu_attribute that say
10963 what the long double type is. This is to allow the linker's warning
10964 message for the wrong long double to be useful, even if the function does
10965 not do a call (for example, doing a 128-bit add on power9 if the long
10966 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10967 used if they aren't the default long dobule type. */
10968 if (rs6000_gnu_attr
&& (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
))
10970 if (TARGET_LONG_DOUBLE_128
&& (mode
== TFmode
|| mode
== TCmode
))
10971 rs6000_passes_float
= rs6000_passes_long_double
= true;
10973 else if (!TARGET_LONG_DOUBLE_128
&& (mode
== DFmode
|| mode
== DCmode
))
10974 rs6000_passes_float
= rs6000_passes_long_double
= true;
10978 /* See if we need to special case SImode/SFmode SUBREG moves. */
10979 if ((mode
== SImode
|| mode
== SFmode
) && SUBREG_P (source
)
10980 && rs6000_emit_move_si_sf_subreg (dest
, source
, mode
))
10983 /* Check if GCC is setting up a block move that will end up using FP
10984 registers as temporaries. We must make sure this is acceptable. */
10985 if (MEM_P (operands
[0])
10986 && MEM_P (operands
[1])
10988 && (rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[0]))
10989 || rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[1])))
10990 && ! (rs6000_slow_unaligned_access (SImode
,
10991 (MEM_ALIGN (operands
[0]) > 32
10992 ? 32 : MEM_ALIGN (operands
[0])))
10993 || rs6000_slow_unaligned_access (SImode
,
10994 (MEM_ALIGN (operands
[1]) > 32
10995 ? 32 : MEM_ALIGN (operands
[1]))))
10996 && ! MEM_VOLATILE_P (operands
[0])
10997 && ! MEM_VOLATILE_P (operands
[1]))
10999 emit_move_insn (adjust_address (operands
[0], SImode
, 0),
11000 adjust_address (operands
[1], SImode
, 0));
11001 emit_move_insn (adjust_address (copy_rtx (operands
[0]), SImode
, 4),
11002 adjust_address (copy_rtx (operands
[1]), SImode
, 4));
11006 if (can_create_pseudo_p () && MEM_P (operands
[0])
11007 && !gpc_reg_operand (operands
[1], mode
))
11008 operands
[1] = force_reg (mode
, operands
[1]);
11010 /* Recognize the case where operand[1] is a reference to thread-local
11011 data and load its address to a register. */
11012 if (tls_referenced_p (operands
[1]))
11014 enum tls_model model
;
11015 rtx tmp
= operands
[1];
11018 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
11020 addend
= XEXP (XEXP (tmp
, 0), 1);
11021 tmp
= XEXP (XEXP (tmp
, 0), 0);
11024 gcc_assert (SYMBOL_REF_P (tmp
));
11025 model
= SYMBOL_REF_TLS_MODEL (tmp
);
11026 gcc_assert (model
!= 0);
11028 tmp
= rs6000_legitimize_tls_address (tmp
, model
);
11031 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
11032 tmp
= force_operand (tmp
, operands
[0]);
11037 /* 128-bit constant floating-point values on Darwin should really be loaded
11038 as two parts. However, this premature splitting is a problem when DFmode
11039 values can go into Altivec registers. */
11040 if (TARGET_MACHO
&& CONST_DOUBLE_P (operands
[1]) && FLOAT128_IBM_P (mode
)
11041 && !reg_addr
[DFmode
].scalar_in_vmx_p
)
11043 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
, 0),
11044 simplify_gen_subreg (DFmode
, operands
[1], mode
, 0),
11046 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
,
11047 GET_MODE_SIZE (DFmode
)),
11048 simplify_gen_subreg (DFmode
, operands
[1], mode
,
11049 GET_MODE_SIZE (DFmode
)),
11054 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
11055 p1:SD) if p1 is not of floating point class and p0 is spilled as
11056 we can have no analogous movsd_store for this. */
11057 if (lra_in_progress
&& mode
== DDmode
11058 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
11059 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
11060 && SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1]))
11061 && GET_MODE (SUBREG_REG (operands
[1])) == SDmode
)
11064 int regno
= REGNO (SUBREG_REG (operands
[1]));
11066 if (!HARD_REGISTER_NUM_P (regno
))
11068 cl
= reg_preferred_class (regno
);
11069 regno
= reg_renumber
[regno
];
11071 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][1];
11073 if (regno
>= 0 && ! FP_REGNO_P (regno
))
11076 operands
[0] = gen_lowpart_SUBREG (SDmode
, operands
[0]);
11077 operands
[1] = SUBREG_REG (operands
[1]);
11080 if (lra_in_progress
11082 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
11083 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
11084 && (REG_P (operands
[1])
11085 || (SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1])))))
11087 int regno
= reg_or_subregno (operands
[1]);
11090 if (!HARD_REGISTER_NUM_P (regno
))
11092 cl
= reg_preferred_class (regno
);
11093 gcc_assert (cl
!= NO_REGS
);
11094 regno
= reg_renumber
[regno
];
11096 regno
= ira_class_hard_regs
[cl
][0];
11098 if (FP_REGNO_P (regno
))
11100 if (GET_MODE (operands
[0]) != DDmode
)
11101 operands
[0] = gen_rtx_SUBREG (DDmode
, operands
[0], 0);
11102 emit_insn (gen_movsd_store (operands
[0], operands
[1]));
11104 else if (INT_REGNO_P (regno
))
11105 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
11110 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
11111 p:DD)) if p0 is not of floating point class and p1 is spilled as
11112 we can have no analogous movsd_load for this. */
11113 if (lra_in_progress
&& mode
== DDmode
11114 && SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))
11115 && GET_MODE (SUBREG_REG (operands
[0])) == SDmode
11116 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
11117 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
11120 int regno
= REGNO (SUBREG_REG (operands
[0]));
11122 if (!HARD_REGISTER_NUM_P (regno
))
11124 cl
= reg_preferred_class (regno
);
11125 regno
= reg_renumber
[regno
];
11127 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][0];
11129 if (regno
>= 0 && ! FP_REGNO_P (regno
))
11132 operands
[0] = SUBREG_REG (operands
[0]);
11133 operands
[1] = gen_lowpart_SUBREG (SDmode
, operands
[1]);
11136 if (lra_in_progress
11138 && (REG_P (operands
[0])
11139 || (SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))))
11140 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
11141 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
11143 int regno
= reg_or_subregno (operands
[0]);
11146 if (!HARD_REGISTER_NUM_P (regno
))
11148 cl
= reg_preferred_class (regno
);
11149 gcc_assert (cl
!= NO_REGS
);
11150 regno
= reg_renumber
[regno
];
11152 regno
= ira_class_hard_regs
[cl
][0];
11154 if (FP_REGNO_P (regno
))
11156 if (GET_MODE (operands
[1]) != DDmode
)
11157 operands
[1] = gen_rtx_SUBREG (DDmode
, operands
[1], 0);
11158 emit_insn (gen_movsd_load (operands
[0], operands
[1]));
11160 else if (INT_REGNO_P (regno
))
11161 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
11167 /* FIXME: In the long term, this switch statement should go away
11168 and be replaced by a sequence of tests based on things like
11174 if (CONSTANT_P (operands
[1])
11175 && !CONST_INT_P (operands
[1]))
11176 operands
[1] = force_const_mem (mode
, operands
[1]);
11183 if (FLOAT128_2REG_P (mode
))
11184 rs6000_eliminate_indexed_memrefs (operands
);
11191 if (CONSTANT_P (operands
[1])
11192 && ! easy_fp_constant (operands
[1], mode
))
11193 operands
[1] = force_const_mem (mode
, operands
[1]);
11203 if (CONSTANT_P (operands
[1])
11204 && !easy_vector_constant (operands
[1], mode
))
11205 operands
[1] = force_const_mem (mode
, operands
[1]);
11210 if (CONST_INT_P (operands
[1]) && INTVAL (operands
[1]) != 0)
11211 error ("%qs is an opaque type, and you cannot set it to other values",
11212 (mode
== OOmode
) ? "__vector_pair" : "__vector_quad");
11217 /* Use default pattern for address of ELF small data */
11220 && DEFAULT_ABI
== ABI_V4
11221 && (SYMBOL_REF_P (operands
[1])
11222 || GET_CODE (operands
[1]) == CONST
)
11223 && small_data_operand (operands
[1], mode
))
11225 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11229 /* Use the default pattern for loading up PC-relative addresses. */
11230 if (TARGET_PCREL
&& mode
== Pmode
11231 && pcrel_local_or_external_address (operands
[1], Pmode
))
11233 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11237 if (DEFAULT_ABI
== ABI_V4
11238 && mode
== Pmode
&& mode
== SImode
11239 && flag_pic
== 1 && got_operand (operands
[1], mode
))
11241 emit_insn (gen_movsi_got (operands
[0], operands
[1]));
11245 if ((TARGET_ELF
|| DEFAULT_ABI
== ABI_DARWIN
)
11246 && TARGET_NO_TOC_OR_PCREL
11249 && CONSTANT_P (operands
[1])
11250 && GET_CODE (operands
[1]) != HIGH
11251 && !CONST_INT_P (operands
[1]))
11253 rtx target
= (!can_create_pseudo_p ()
11255 : gen_reg_rtx (mode
));
11257 /* If this is a function address on -mcall-aixdesc,
11258 convert it to the address of the descriptor. */
11259 if (DEFAULT_ABI
== ABI_AIX
11260 && SYMBOL_REF_P (operands
[1])
11261 && XSTR (operands
[1], 0)[0] == '.')
11263 const char *name
= XSTR (operands
[1], 0);
11265 while (*name
== '.')
11267 new_ref
= gen_rtx_SYMBOL_REF (Pmode
, name
);
11268 CONSTANT_POOL_ADDRESS_P (new_ref
)
11269 = CONSTANT_POOL_ADDRESS_P (operands
[1]);
11270 SYMBOL_REF_FLAGS (new_ref
) = SYMBOL_REF_FLAGS (operands
[1]);
11271 SYMBOL_REF_USED (new_ref
) = SYMBOL_REF_USED (operands
[1]);
11272 SYMBOL_REF_DATA (new_ref
) = SYMBOL_REF_DATA (operands
[1]);
11273 operands
[1] = new_ref
;
11276 if (DEFAULT_ABI
== ABI_DARWIN
)
11279 /* This is not PIC code, but could require the subset of
11280 indirections used by mdynamic-no-pic. */
11281 if (MACHO_DYNAMIC_NO_PIC_P
)
11283 /* Take care of any required data indirection. */
11284 operands
[1] = rs6000_machopic_legitimize_pic_address (
11285 operands
[1], mode
, operands
[0]);
11286 if (operands
[0] != operands
[1])
11287 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11291 emit_insn (gen_macho_high (Pmode
, target
, operands
[1]));
11292 emit_insn (gen_macho_low (Pmode
, operands
[0],
11293 target
, operands
[1]));
11297 emit_insn (gen_elf_high (target
, operands
[1]));
11298 emit_insn (gen_elf_low (operands
[0], target
, operands
[1]));
11302 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11303 and we have put it in the TOC, we just need to make a TOC-relative
11304 reference to it. */
11306 && SYMBOL_REF_P (operands
[1])
11307 && use_toc_relative_ref (operands
[1], mode
))
11308 operands
[1] = create_TOC_reference (operands
[1], operands
[0]);
11309 else if (mode
== Pmode
11310 && CONSTANT_P (operands
[1])
11311 && GET_CODE (operands
[1]) != HIGH
11312 && ((REG_P (operands
[0])
11313 && FP_REGNO_P (REGNO (operands
[0])))
11314 || !CONST_INT_P (operands
[1])
11315 || (num_insns_constant (operands
[1], mode
)
11316 > (TARGET_CMODEL
!= CMODEL_SMALL
? 3 : 2)))
11317 && !toc_relative_expr_p (operands
[1], false, NULL
, NULL
)
11318 && (TARGET_CMODEL
== CMODEL_SMALL
11319 || can_create_pseudo_p ()
11320 || (REG_P (operands
[0])
11321 && INT_REG_OK_FOR_BASE_P (operands
[0], true))))
11325 /* Darwin uses a special PIC legitimizer. */
11326 if (DEFAULT_ABI
== ABI_DARWIN
&& MACHOPIC_INDIRECT
)
11329 rs6000_machopic_legitimize_pic_address (operands
[1], mode
,
11331 if (operands
[0] != operands
[1])
11332 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11337 /* If we are to limit the number of things we put in the TOC and
11338 this is a symbol plus a constant we can add in one insn,
11339 just put the symbol in the TOC and add the constant. */
11340 if (GET_CODE (operands
[1]) == CONST
11341 && TARGET_NO_SUM_IN_TOC
11342 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
11343 && add_operand (XEXP (XEXP (operands
[1], 0), 1), mode
)
11344 && (GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == LABEL_REF
11345 || SYMBOL_REF_P (XEXP (XEXP (operands
[1], 0), 0)))
11346 && ! side_effects_p (operands
[0]))
11349 force_const_mem (mode
, XEXP (XEXP (operands
[1], 0), 0));
11350 rtx other
= XEXP (XEXP (operands
[1], 0), 1);
11352 sym
= force_reg (mode
, sym
);
11353 emit_insn (gen_add3_insn (operands
[0], sym
, other
));
11357 operands
[1] = force_const_mem (mode
, operands
[1]);
11360 && SYMBOL_REF_P (XEXP (operands
[1], 0))
11361 && use_toc_relative_ref (XEXP (operands
[1], 0), mode
))
11363 rtx tocref
= create_TOC_reference (XEXP (operands
[1], 0),
11365 operands
[1] = gen_const_mem (mode
, tocref
);
11366 set_mem_alias_set (operands
[1], get_TOC_alias_set ());
11372 if (!VECTOR_MEM_VSX_P (TImode
))
11373 rs6000_eliminate_indexed_memrefs (operands
);
11377 rs6000_eliminate_indexed_memrefs (operands
);
11381 fatal_insn ("bad move", gen_rtx_SET (dest
, source
));
11384 /* Above, we may have called force_const_mem which may have returned
11385 an invalid address. If we can, fix this up; otherwise, reload will
11386 have to deal with it. */
11387 if (MEM_P (operands
[1]))
11388 operands
[1] = validize_mem (operands
[1]);
11390 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11394 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
11396 init_float128_ibm (machine_mode mode
)
11398 if (!TARGET_XL_COMPAT
)
11400 set_optab_libfunc (add_optab
, mode
, "__gcc_qadd");
11401 set_optab_libfunc (sub_optab
, mode
, "__gcc_qsub");
11402 set_optab_libfunc (smul_optab
, mode
, "__gcc_qmul");
11403 set_optab_libfunc (sdiv_optab
, mode
, "__gcc_qdiv");
11405 if (!TARGET_HARD_FLOAT
)
11407 set_optab_libfunc (neg_optab
, mode
, "__gcc_qneg");
11408 set_optab_libfunc (eq_optab
, mode
, "__gcc_qeq");
11409 set_optab_libfunc (ne_optab
, mode
, "__gcc_qne");
11410 set_optab_libfunc (gt_optab
, mode
, "__gcc_qgt");
11411 set_optab_libfunc (ge_optab
, mode
, "__gcc_qge");
11412 set_optab_libfunc (lt_optab
, mode
, "__gcc_qlt");
11413 set_optab_libfunc (le_optab
, mode
, "__gcc_qle");
11414 set_optab_libfunc (unord_optab
, mode
, "__gcc_qunord");
11416 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__gcc_stoq");
11417 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__gcc_dtoq");
11418 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__gcc_qtos");
11419 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__gcc_qtod");
11420 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__gcc_qtoi");
11421 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__gcc_qtou");
11422 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__gcc_itoq");
11423 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__gcc_utoq");
11428 set_optab_libfunc (add_optab
, mode
, "_xlqadd");
11429 set_optab_libfunc (sub_optab
, mode
, "_xlqsub");
11430 set_optab_libfunc (smul_optab
, mode
, "_xlqmul");
11431 set_optab_libfunc (sdiv_optab
, mode
, "_xlqdiv");
11434 /* Add various conversions for IFmode to use the traditional TFmode
11436 if (mode
== IFmode
)
11438 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdtf");
11439 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddtf");
11440 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdtf");
11441 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunctfsd");
11442 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunctfdd");
11443 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtftd");
11445 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixtfdi");
11446 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunstfdi");
11448 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatditf");
11449 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatunditf");
11451 if (TARGET_POWERPC64
)
11453 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixtfti");
11454 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunstfti");
11455 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattitf");
11456 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntitf");
11461 /* Set up IEEE 128-bit floating point routines. Use different names if the
11462 arguments can be passed in a vector register. The historical PowerPC
11463 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11464 continue to use that if we aren't using vector registers to pass IEEE
11465 128-bit floating point. */
11468 init_float128_ieee (machine_mode mode
)
11470 if (FLOAT128_VECTOR_P (mode
))
11472 set_optab_libfunc (add_optab
, mode
, "__addkf3");
11473 set_optab_libfunc (sub_optab
, mode
, "__subkf3");
11474 set_optab_libfunc (neg_optab
, mode
, "__negkf2");
11475 set_optab_libfunc (smul_optab
, mode
, "__mulkf3");
11476 set_optab_libfunc (sdiv_optab
, mode
, "__divkf3");
11477 set_optab_libfunc (sqrt_optab
, mode
, "__sqrtkf2");
11478 set_optab_libfunc (abs_optab
, mode
, "__abskf2");
11479 set_optab_libfunc (powi_optab
, mode
, "__powikf2");
11481 set_optab_libfunc (eq_optab
, mode
, "__eqkf2");
11482 set_optab_libfunc (ne_optab
, mode
, "__nekf2");
11483 set_optab_libfunc (gt_optab
, mode
, "__gtkf2");
11484 set_optab_libfunc (ge_optab
, mode
, "__gekf2");
11485 set_optab_libfunc (lt_optab
, mode
, "__ltkf2");
11486 set_optab_libfunc (le_optab
, mode
, "__lekf2");
11487 set_optab_libfunc (unord_optab
, mode
, "__unordkf2");
11489 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__extendsfkf2");
11490 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__extenddfkf2");
11491 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__trunckfsf2");
11492 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__trunckfdf2");
11494 set_conv_libfunc (trunc_optab
, mode
, IFmode
, "__trunctfkf2");
11495 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11496 set_conv_libfunc (trunc_optab
, mode
, TFmode
, "__trunctfkf2");
11498 set_conv_libfunc (sext_optab
, IFmode
, mode
, "__extendkftf2");
11499 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11500 set_conv_libfunc (sext_optab
, TFmode
, mode
, "__extendkftf2");
11502 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdkf");
11503 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddkf");
11504 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdkf");
11505 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunckfsd");
11506 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunckfdd");
11507 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendkftd");
11509 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__fixkfsi");
11510 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__fixunskfsi");
11511 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixkfdi");
11512 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunskfdi");
11514 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__floatsikf");
11515 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__floatunsikf");
11516 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatdikf");
11517 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatundikf");
11519 if (TARGET_POWERPC64
)
11521 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixkfti_sw");
11522 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunskfti_sw");
11523 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattikf_sw");
11524 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntikf_sw");
11530 set_optab_libfunc (add_optab
, mode
, "_q_add");
11531 set_optab_libfunc (sub_optab
, mode
, "_q_sub");
11532 set_optab_libfunc (neg_optab
, mode
, "_q_neg");
11533 set_optab_libfunc (smul_optab
, mode
, "_q_mul");
11534 set_optab_libfunc (sdiv_optab
, mode
, "_q_div");
11535 if (TARGET_PPC_GPOPT
)
11536 set_optab_libfunc (sqrt_optab
, mode
, "_q_sqrt");
11538 set_optab_libfunc (eq_optab
, mode
, "_q_feq");
11539 set_optab_libfunc (ne_optab
, mode
, "_q_fne");
11540 set_optab_libfunc (gt_optab
, mode
, "_q_fgt");
11541 set_optab_libfunc (ge_optab
, mode
, "_q_fge");
11542 set_optab_libfunc (lt_optab
, mode
, "_q_flt");
11543 set_optab_libfunc (le_optab
, mode
, "_q_fle");
11545 set_conv_libfunc (sext_optab
, mode
, SFmode
, "_q_stoq");
11546 set_conv_libfunc (sext_optab
, mode
, DFmode
, "_q_dtoq");
11547 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "_q_qtos");
11548 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "_q_qtod");
11549 set_conv_libfunc (sfix_optab
, SImode
, mode
, "_q_qtoi");
11550 set_conv_libfunc (ufix_optab
, SImode
, mode
, "_q_qtou");
11551 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "_q_itoq");
11552 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "_q_utoq");
11557 rs6000_init_libfuncs (void)
11559 /* __float128 support. */
11560 if (TARGET_FLOAT128_TYPE
)
11562 init_float128_ibm (IFmode
);
11563 init_float128_ieee (KFmode
);
11566 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11567 if (TARGET_LONG_DOUBLE_128
)
11569 if (!TARGET_IEEEQUAD
)
11570 init_float128_ibm (TFmode
);
11572 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11574 init_float128_ieee (TFmode
);
11578 /* Emit a potentially record-form instruction, setting DST from SRC.
11579 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11580 signed comparison of DST with zero. If DOT is 1, the generated RTL
11581 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11582 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11583 a separate COMPARE. */
11586 rs6000_emit_dot_insn (rtx dst
, rtx src
, int dot
, rtx ccreg
)
11590 emit_move_insn (dst
, src
);
11594 if (cc_reg_not_cr0_operand (ccreg
, CCmode
))
11596 emit_move_insn (dst
, src
);
11597 emit_move_insn (ccreg
, gen_rtx_COMPARE (CCmode
, dst
, const0_rtx
));
11601 rtx ccset
= gen_rtx_SET (ccreg
, gen_rtx_COMPARE (CCmode
, src
, const0_rtx
));
11604 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, dst
);
11605 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, clobber
)));
11609 rtx set
= gen_rtx_SET (dst
, src
);
11610 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, set
)));
11615 /* A validation routine: say whether CODE, a condition code, and MODE
11616 match. The other alternatives either don't make sense or should
11617 never be generated. */
11620 validate_condition_mode (enum rtx_code code
, machine_mode mode
)
11622 gcc_assert ((GET_RTX_CLASS (code
) == RTX_COMPARE
11623 || GET_RTX_CLASS (code
) == RTX_COMM_COMPARE
)
11624 && GET_MODE_CLASS (mode
) == MODE_CC
);
11626 /* These don't make sense. */
11627 gcc_assert ((code
!= GT
&& code
!= LT
&& code
!= GE
&& code
!= LE
)
11628 || mode
!= CCUNSmode
);
11630 gcc_assert ((code
!= GTU
&& code
!= LTU
&& code
!= GEU
&& code
!= LEU
)
11631 || mode
== CCUNSmode
);
11633 gcc_assert (mode
== CCFPmode
11634 || (code
!= ORDERED
&& code
!= UNORDERED
11635 && code
!= UNEQ
&& code
!= LTGT
11636 && code
!= UNGT
&& code
!= UNLT
11637 && code
!= UNGE
&& code
!= UNLE
));
11639 /* These are invalid; the information is not there. */
11640 gcc_assert (mode
!= CCEQmode
|| code
== EQ
|| code
== NE
);
11644 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11645 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11646 not zero, store there the bit offset (counted from the right) where
11647 the single stretch of 1 bits begins; and similarly for B, the bit
11648 offset where it ends. */
11651 rs6000_is_valid_mask (rtx mask
, int *b
, int *e
, machine_mode mode
)
11653 unsigned HOST_WIDE_INT val
= INTVAL (mask
);
11654 unsigned HOST_WIDE_INT bit
;
11656 int n
= GET_MODE_PRECISION (mode
);
11658 if (mode
!= DImode
&& mode
!= SImode
)
11661 if (INTVAL (mask
) >= 0)
11664 ne
= exact_log2 (bit
);
11665 nb
= exact_log2 (val
+ bit
);
11667 else if (val
+ 1 == 0)
11676 nb
= exact_log2 (bit
);
11677 ne
= exact_log2 (val
+ bit
);
11682 ne
= exact_log2 (bit
);
11683 if (val
+ bit
== 0)
11691 if (nb
< 0 || ne
< 0 || nb
>= n
|| ne
>= n
)
11703 rs6000_is_valid_rotate_dot_mask (rtx mask
, machine_mode mode
)
11706 if (rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
) && nb
>= ne
&& ne
> 0)
11710 /* *rotldi3_mask_dot requires for -m32 -mpowerpc64 that the mask is
11712 return (UINTVAL (mask
) << (63 - nb
)) <= 0x7fffffff;
11718 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11719 or rldicr instruction, to implement an AND with it in mode MODE. */
11722 rs6000_is_valid_and_mask (rtx mask
, machine_mode mode
)
11726 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11729 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11731 if (mode
== DImode
)
11732 return (ne
== 0 || nb
== 63 || (nb
< 32 && ne
<= nb
));
11734 /* For SImode, rlwinm can do everything. */
11735 if (mode
== SImode
)
11736 return (nb
< 32 && ne
< 32);
11741 /* Return the instruction template for an AND with mask in mode MODE, with
11742 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11745 rs6000_insn_for_and_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11749 if (!rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
))
11750 gcc_unreachable ();
11752 if (mode
== DImode
&& ne
== 0)
11754 operands
[3] = GEN_INT (63 - nb
);
11756 return "rldicl. %0,%1,0,%3";
11757 return "rldicl %0,%1,0,%3";
11760 if (mode
== DImode
&& nb
== 63)
11762 operands
[3] = GEN_INT (63 - ne
);
11764 return "rldicr. %0,%1,0,%3";
11765 return "rldicr %0,%1,0,%3";
11768 if (nb
< 32 && ne
< 32)
11770 operands
[3] = GEN_INT (31 - nb
);
11771 operands
[4] = GEN_INT (31 - ne
);
11773 return "rlwinm. %0,%1,0,%3,%4";
11774 return "rlwinm %0,%1,0,%3,%4";
11777 gcc_unreachable ();
11780 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11781 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11782 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11785 rs6000_is_valid_shift_mask (rtx mask
, rtx shift
, machine_mode mode
)
11789 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11792 int n
= GET_MODE_PRECISION (mode
);
11795 if (CONST_INT_P (XEXP (shift
, 1)))
11797 sh
= INTVAL (XEXP (shift
, 1));
11798 if (sh
< 0 || sh
>= n
)
11802 rtx_code code
= GET_CODE (shift
);
11804 /* Convert any shift by 0 to a rotate, to simplify below code. */
11808 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11809 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11811 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11817 /* DImode rotates need rld*. */
11818 if (mode
== DImode
&& code
== ROTATE
)
11819 return (nb
== 63 || ne
== 0 || ne
== sh
);
11821 /* SImode rotates need rlw*. */
11822 if (mode
== SImode
&& code
== ROTATE
)
11823 return (nb
< 32 && ne
< 32 && sh
< 32);
11825 /* Wrap-around masks are only okay for rotates. */
11829 /* Variable shifts are only okay for rotates. */
11833 /* Don't allow ASHIFT if the mask is wrong for that. */
11834 if (code
== ASHIFT
&& ne
< sh
)
11837 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11838 if the mask is wrong for that. */
11839 if (nb
< 32 && ne
< 32 && sh
< 32
11840 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11843 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11844 if the mask is wrong for that. */
11845 if (code
== LSHIFTRT
)
11847 if (nb
== 63 || ne
== 0 || ne
== sh
)
11848 return !(code
== LSHIFTRT
&& nb
>= sh
);
11853 /* Return the instruction template for a shift with mask in mode MODE, with
11854 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11857 rs6000_insn_for_shift_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11861 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11862 gcc_unreachable ();
11864 if (mode
== DImode
&& ne
== 0)
11866 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11867 operands
[2] = GEN_INT (64 - INTVAL (operands
[2]));
11868 operands
[3] = GEN_INT (63 - nb
);
11870 return "rld%I2cl. %0,%1,%2,%3";
11871 return "rld%I2cl %0,%1,%2,%3";
11874 if (mode
== DImode
&& nb
== 63)
11876 operands
[3] = GEN_INT (63 - ne
);
11878 return "rld%I2cr. %0,%1,%2,%3";
11879 return "rld%I2cr %0,%1,%2,%3";
11883 && GET_CODE (operands
[4]) != LSHIFTRT
11884 && CONST_INT_P (operands
[2])
11885 && ne
== INTVAL (operands
[2]))
11887 operands
[3] = GEN_INT (63 - nb
);
11889 return "rld%I2c. %0,%1,%2,%3";
11890 return "rld%I2c %0,%1,%2,%3";
11893 if (nb
< 32 && ne
< 32)
11895 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11896 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11897 operands
[3] = GEN_INT (31 - nb
);
11898 operands
[4] = GEN_INT (31 - ne
);
11899 /* This insn can also be a 64-bit rotate with mask that really makes
11900 it just a shift right (with mask); the %h below are to adjust for
11901 that situation (shift count is >= 32 in that case). */
11903 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11904 return "rlw%I2nm %0,%1,%h2,%3,%4";
11907 gcc_unreachable ();
11910 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11911 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11912 ASHIFT, or LSHIFTRT) in mode MODE. */
11915 rs6000_is_valid_insert_mask (rtx mask
, rtx shift
, machine_mode mode
)
11919 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11922 int n
= GET_MODE_PRECISION (mode
);
11924 int sh
= INTVAL (XEXP (shift
, 1));
11925 if (sh
< 0 || sh
>= n
)
11928 rtx_code code
= GET_CODE (shift
);
11930 /* Convert any shift by 0 to a rotate, to simplify below code. */
11934 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11935 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11937 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11943 /* DImode rotates need rldimi. */
11944 if (mode
== DImode
&& code
== ROTATE
)
11947 /* SImode rotates need rlwimi. */
11948 if (mode
== SImode
&& code
== ROTATE
)
11949 return (nb
< 32 && ne
< 32 && sh
< 32);
11951 /* Wrap-around masks are only okay for rotates. */
11955 /* Don't allow ASHIFT if the mask is wrong for that. */
11956 if (code
== ASHIFT
&& ne
< sh
)
11959 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11960 if the mask is wrong for that. */
11961 if (nb
< 32 && ne
< 32 && sh
< 32
11962 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11965 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11966 if the mask is wrong for that. */
11967 if (code
== LSHIFTRT
)
11970 return !(code
== LSHIFTRT
&& nb
>= sh
);
11975 /* Return the instruction template for an insert with mask in mode MODE, with
11976 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11979 rs6000_insn_for_insert_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11983 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11984 gcc_unreachable ();
11986 /* Prefer rldimi because rlwimi is cracked. */
11987 if (TARGET_POWERPC64
11988 && (!dot
|| mode
== DImode
)
11989 && GET_CODE (operands
[4]) != LSHIFTRT
11990 && ne
== INTVAL (operands
[2]))
11992 operands
[3] = GEN_INT (63 - nb
);
11994 return "rldimi. %0,%1,%2,%3";
11995 return "rldimi %0,%1,%2,%3";
11998 if (nb
< 32 && ne
< 32)
12000 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
12001 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
12002 operands
[3] = GEN_INT (31 - nb
);
12003 operands
[4] = GEN_INT (31 - ne
);
12005 return "rlwimi. %0,%1,%2,%3,%4";
12006 return "rlwimi %0,%1,%2,%3,%4";
12009 gcc_unreachable ();
12012 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
12013 using two machine instructions. */
12016 rs6000_is_valid_2insn_and (rtx c
, machine_mode mode
)
12018 /* There are two kinds of AND we can handle with two insns:
12019 1) those we can do with two rl* insn;
12022 We do not handle that last case yet. */
12024 /* If there is just one stretch of ones, we can do it. */
12025 if (rs6000_is_valid_mask (c
, NULL
, NULL
, mode
))
12028 /* Otherwise, fill in the lowest "hole"; if we can do the result with
12029 one insn, we can do the whole thing with two. */
12030 unsigned HOST_WIDE_INT val
= INTVAL (c
);
12031 unsigned HOST_WIDE_INT bit1
= val
& -val
;
12032 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
12033 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
12034 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
12035 return rs6000_is_valid_and_mask (GEN_INT (val
+ bit3
- bit2
), mode
);
12038 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
12039 If EXPAND is true, split rotate-and-mask instructions we generate to
12040 their constituent parts as well (this is used during expand); if DOT
12041 is 1, make the last insn a record-form instruction clobbering the
12042 destination GPR and setting the CC reg (from operands[3]); if 2, set
12043 that GPR as well as the CC reg. */
12046 rs6000_emit_2insn_and (machine_mode mode
, rtx
*operands
, bool expand
, int dot
)
12048 gcc_assert (!(expand
&& dot
));
12050 unsigned HOST_WIDE_INT val
= INTVAL (operands
[2]);
12052 /* If it is one stretch of ones, it is DImode; shift left, mask, then
12053 shift right. This generates better code than doing the masks without
12054 shifts, or shifting first right and then left. */
12056 if (rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
) && nb
>= ne
)
12058 gcc_assert (mode
== DImode
);
12060 int shift
= 63 - nb
;
12063 rtx tmp1
= gen_reg_rtx (DImode
);
12064 rtx tmp2
= gen_reg_rtx (DImode
);
12065 emit_insn (gen_ashldi3 (tmp1
, operands
[1], GEN_INT (shift
)));
12066 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (val
<< shift
)));
12067 emit_insn (gen_lshrdi3 (operands
[0], tmp2
, GEN_INT (shift
)));
12071 rtx tmp
= gen_rtx_ASHIFT (mode
, operands
[1], GEN_INT (shift
));
12072 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (val
<< shift
));
12073 emit_move_insn (operands
[0], tmp
);
12074 tmp
= gen_rtx_LSHIFTRT (mode
, operands
[0], GEN_INT (shift
));
12075 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
12080 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
12081 that does the rest. */
12082 unsigned HOST_WIDE_INT bit1
= val
& -val
;
12083 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
12084 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
12085 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
12087 unsigned HOST_WIDE_INT mask1
= -bit3
+ bit2
- 1;
12088 unsigned HOST_WIDE_INT mask2
= val
+ bit3
- bit2
;
12090 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2
), mode
));
12092 /* Two "no-rotate"-and-mask instructions, for SImode. */
12093 if (rs6000_is_valid_and_mask (GEN_INT (mask1
), mode
))
12095 gcc_assert (mode
== SImode
);
12097 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
12098 rtx tmp
= gen_rtx_AND (mode
, operands
[1], GEN_INT (mask1
));
12099 emit_move_insn (reg
, tmp
);
12100 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
12101 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
12105 gcc_assert (mode
== DImode
);
12107 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
12108 insns; we have to do the first in SImode, because it wraps. */
12109 if (mask2
<= 0xffffffff
12110 && rs6000_is_valid_and_mask (GEN_INT (mask1
), SImode
))
12112 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
12113 rtx tmp
= gen_rtx_AND (SImode
, gen_lowpart (SImode
, operands
[1]),
12115 rtx reg_low
= gen_lowpart (SImode
, reg
);
12116 emit_move_insn (reg_low
, tmp
);
12117 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
12118 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
12122 /* Two rld* insns: rotate, clear the hole in the middle (which now is
12123 at the top end), rotate back and clear the other hole. */
12124 int right
= exact_log2 (bit3
);
12125 int left
= 64 - right
;
12127 /* Rotate the mask too. */
12128 mask1
= (mask1
>> right
) | ((bit2
- 1) << left
);
12132 rtx tmp1
= gen_reg_rtx (DImode
);
12133 rtx tmp2
= gen_reg_rtx (DImode
);
12134 rtx tmp3
= gen_reg_rtx (DImode
);
12135 emit_insn (gen_rotldi3 (tmp1
, operands
[1], GEN_INT (left
)));
12136 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (mask1
)));
12137 emit_insn (gen_rotldi3 (tmp3
, tmp2
, GEN_INT (right
)));
12138 emit_insn (gen_anddi3 (operands
[0], tmp3
, GEN_INT (mask2
)));
12142 rtx tmp
= gen_rtx_ROTATE (mode
, operands
[1], GEN_INT (left
));
12143 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask1
));
12144 emit_move_insn (operands
[0], tmp
);
12145 tmp
= gen_rtx_ROTATE (mode
, operands
[0], GEN_INT (right
));
12146 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask2
));
12147 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
12151 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
12152 for lfq and stfq insns iff the registers are hard registers. */
12155 registers_ok_for_quad_peep (rtx reg1
, rtx reg2
)
12157 /* We might have been passed a SUBREG. */
12158 if (!REG_P (reg1
) || !REG_P (reg2
))
12161 /* We might have been passed non floating point registers. */
12162 if (!FP_REGNO_P (REGNO (reg1
))
12163 || !FP_REGNO_P (REGNO (reg2
)))
12166 return (REGNO (reg1
) == REGNO (reg2
) - 1);
12169 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
12170 addr1 and addr2 must be in consecutive memory locations
12171 (addr2 == addr1 + 8). */
12174 mems_ok_for_quad_peep (rtx mem1
, rtx mem2
)
12177 unsigned int reg1
, reg2
;
12178 int offset1
, offset2
;
12180 /* The mems cannot be volatile. */
12181 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
12184 addr1
= XEXP (mem1
, 0);
12185 addr2
= XEXP (mem2
, 0);
12187 /* Extract an offset (if used) from the first addr. */
12188 if (GET_CODE (addr1
) == PLUS
)
12190 /* If not a REG, return zero. */
12191 if (!REG_P (XEXP (addr1
, 0)))
12195 reg1
= REGNO (XEXP (addr1
, 0));
12196 /* The offset must be constant! */
12197 if (!CONST_INT_P (XEXP (addr1
, 1)))
12199 offset1
= INTVAL (XEXP (addr1
, 1));
12202 else if (!REG_P (addr1
))
12206 reg1
= REGNO (addr1
);
12207 /* This was a simple (mem (reg)) expression. Offset is 0. */
12211 /* And now for the second addr. */
12212 if (GET_CODE (addr2
) == PLUS
)
12214 /* If not a REG, return zero. */
12215 if (!REG_P (XEXP (addr2
, 0)))
12219 reg2
= REGNO (XEXP (addr2
, 0));
12220 /* The offset must be constant. */
12221 if (!CONST_INT_P (XEXP (addr2
, 1)))
12223 offset2
= INTVAL (XEXP (addr2
, 1));
12226 else if (!REG_P (addr2
))
12230 reg2
= REGNO (addr2
);
12231 /* This was a simple (mem (reg)) expression. Offset is 0. */
12235 /* Both of these must have the same base register. */
12239 /* The offset for the second addr must be 8 more than the first addr. */
12240 if (offset2
!= offset1
+ 8)
12243 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
12248 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
12249 need to use DDmode, in all other cases we can use the same mode. */
12250 static machine_mode
12251 rs6000_secondary_memory_needed_mode (machine_mode mode
)
12253 if (lra_in_progress
&& mode
== SDmode
)
12258 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
12259 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
12260 only work on the traditional altivec registers, note if an altivec register
12263 static enum rs6000_reg_type
12264 register_to_reg_type (rtx reg
, bool *is_altivec
)
12266 HOST_WIDE_INT regno
;
12267 enum reg_class rclass
;
12269 if (SUBREG_P (reg
))
12270 reg
= SUBREG_REG (reg
);
12273 return NO_REG_TYPE
;
12275 regno
= REGNO (reg
);
12276 if (!HARD_REGISTER_NUM_P (regno
))
12278 if (!lra_in_progress
&& !reload_completed
)
12279 return PSEUDO_REG_TYPE
;
12281 regno
= true_regnum (reg
);
12282 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
))
12283 return PSEUDO_REG_TYPE
;
12286 gcc_assert (regno
>= 0);
12288 if (is_altivec
&& ALTIVEC_REGNO_P (regno
))
12289 *is_altivec
= true;
12291 rclass
= rs6000_regno_regclass
[regno
];
12292 return reg_class_to_reg_type
[(int)rclass
];
12295 /* Helper function to return the cost of adding a TOC entry address. */
12298 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask
)
12302 if (TARGET_CMODEL
!= CMODEL_SMALL
)
12303 ret
= ((addr_mask
& RELOAD_REG_OFFSET
) == 0) ? 1 : 2;
12306 ret
= (TARGET_MINIMAL_TOC
) ? 6 : 3;
12311 /* Helper function for rs6000_secondary_reload to determine whether the memory
12312 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
12313 needs reloading. Return negative if the memory is not handled by the memory
12314 helper functions and to try a different reload method, 0 if no additional
12315 instructions are need, and positive to give the extra cost for the
12319 rs6000_secondary_reload_memory (rtx addr
,
12320 enum reg_class rclass
,
12323 int extra_cost
= 0;
12324 rtx reg
, and_arg
, plus_arg0
, plus_arg1
;
12325 addr_mask_type addr_mask
;
12326 const char *type
= NULL
;
12327 const char *fail_msg
= NULL
;
12329 if (GPR_REG_CLASS_P (rclass
))
12330 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
12332 else if (rclass
== FLOAT_REGS
)
12333 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
12335 else if (rclass
== ALTIVEC_REGS
)
12336 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
12338 /* For the combined VSX_REGS, turn off Altivec AND -16. */
12339 else if (rclass
== VSX_REGS
)
12340 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
]
12341 & ~RELOAD_REG_AND_M16
);
12343 /* If the register allocator hasn't made up its mind yet on the register
12344 class to use, settle on defaults to use. */
12345 else if (rclass
== NO_REGS
)
12347 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
]
12348 & ~RELOAD_REG_AND_M16
);
12350 if ((addr_mask
& RELOAD_REG_MULTIPLE
) != 0)
12351 addr_mask
&= ~(RELOAD_REG_INDEXED
12352 | RELOAD_REG_PRE_INCDEC
12353 | RELOAD_REG_PRE_MODIFY
);
12359 /* If the register isn't valid in this register class, just return now. */
12360 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
12362 if (TARGET_DEBUG_ADDR
)
12365 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12366 "not valid in class\n",
12367 GET_MODE_NAME (mode
), reg_class_names
[rclass
]);
12374 switch (GET_CODE (addr
))
12376 /* Does the register class supports auto update forms for this mode? We
12377 don't need a scratch register, since the powerpc only supports
12378 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12381 reg
= XEXP (addr
, 0);
12382 if (!base_reg_operand (addr
, GET_MODE (reg
)))
12384 fail_msg
= "no base register #1";
12388 else if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
12396 reg
= XEXP (addr
, 0);
12397 plus_arg1
= XEXP (addr
, 1);
12398 if (!base_reg_operand (reg
, GET_MODE (reg
))
12399 || GET_CODE (plus_arg1
) != PLUS
12400 || !rtx_equal_p (reg
, XEXP (plus_arg1
, 0)))
12402 fail_msg
= "bad PRE_MODIFY";
12406 else if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
12413 /* Do we need to simulate AND -16 to clear the bottom address bits used
12414 in VMX load/stores? Only allow the AND for vector sizes. */
12416 and_arg
= XEXP (addr
, 0);
12417 if (GET_MODE_SIZE (mode
) != 16
12418 || !CONST_INT_P (XEXP (addr
, 1))
12419 || INTVAL (XEXP (addr
, 1)) != -16)
12421 fail_msg
= "bad Altivec AND #1";
12425 if (rclass
!= ALTIVEC_REGS
)
12427 if (legitimate_indirect_address_p (and_arg
, false))
12430 else if (legitimate_indexed_address_p (and_arg
, false))
12435 fail_msg
= "bad Altivec AND #2";
12443 /* If this is an indirect address, make sure it is a base register. */
12446 if (!legitimate_indirect_address_p (addr
, false))
12453 /* If this is an indexed address, make sure the register class can handle
12454 indexed addresses for this mode. */
12456 plus_arg0
= XEXP (addr
, 0);
12457 plus_arg1
= XEXP (addr
, 1);
12459 /* (plus (plus (reg) (constant)) (constant)) is generated during
12460 push_reload processing, so handle it now. */
12461 if (GET_CODE (plus_arg0
) == PLUS
&& CONST_INT_P (plus_arg1
))
12463 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12470 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12471 push_reload processing, so handle it now. */
12472 else if (GET_CODE (plus_arg0
) == PLUS
&& REG_P (plus_arg1
))
12474 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12477 type
= "indexed #2";
12481 else if (!base_reg_operand (plus_arg0
, GET_MODE (plus_arg0
)))
12483 fail_msg
= "no base register #2";
12487 else if (int_reg_operand (plus_arg1
, GET_MODE (plus_arg1
)))
12489 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0
12490 || !legitimate_indexed_address_p (addr
, false))
12497 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0
12498 && CONST_INT_P (plus_arg1
))
12500 if (!quad_address_offset_p (INTVAL (plus_arg1
)))
12503 type
= "vector d-form offset";
12507 /* Make sure the register class can handle offset addresses. */
12508 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
12510 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12513 type
= "offset #2";
12519 fail_msg
= "bad PLUS";
12526 /* Quad offsets are restricted and can't handle normal addresses. */
12527 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12530 type
= "vector d-form lo_sum";
12533 else if (!legitimate_lo_sum_address_p (mode
, addr
, false))
12535 fail_msg
= "bad LO_SUM";
12539 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12546 /* Static addresses need to create a TOC entry. */
12550 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12553 type
= "vector d-form lo_sum #2";
12559 extra_cost
= rs6000_secondary_reload_toc_costs (addr_mask
);
12563 /* TOC references look like offsetable memory. */
12565 if (TARGET_CMODEL
== CMODEL_SMALL
|| XINT (addr
, 1) != UNSPEC_TOCREL
)
12567 fail_msg
= "bad UNSPEC";
12571 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12574 type
= "vector d-form lo_sum #3";
12577 else if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12580 type
= "toc reference";
12586 fail_msg
= "bad address";
12591 if (TARGET_DEBUG_ADDR
/* && extra_cost != 0 */)
12593 if (extra_cost
< 0)
12595 "rs6000_secondary_reload_memory error: mode = %s, "
12596 "class = %s, addr_mask = '%s', %s\n",
12597 GET_MODE_NAME (mode
),
12598 reg_class_names
[rclass
],
12599 rs6000_debug_addr_mask (addr_mask
, false),
12600 (fail_msg
!= NULL
) ? fail_msg
: "<bad address>");
12604 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12605 "addr_mask = '%s', extra cost = %d, %s\n",
12606 GET_MODE_NAME (mode
),
12607 reg_class_names
[rclass
],
12608 rs6000_debug_addr_mask (addr_mask
, false),
12610 (type
) ? type
: "<none>");
12618 /* Helper function for rs6000_secondary_reload to return true if a move to a
12619 different register classe is really a simple move. */
12622 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type
,
12623 enum rs6000_reg_type from_type
,
12626 int size
= GET_MODE_SIZE (mode
);
12628 /* Add support for various direct moves available. In this function, we only
12629 look at cases where we don't need any extra registers, and one or more
12630 simple move insns are issued. Originally small integers are not allowed
12631 in FPR/VSX registers. Single precision binary floating is not a simple
12632 move because we need to convert to the single precision memory layout.
12633 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12634 need special direct move handling, which we do not support yet. */
12635 if (TARGET_DIRECT_MOVE
12636 && ((to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12637 || (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12639 if (TARGET_POWERPC64
)
12641 /* ISA 2.07: MTVSRD or MVFVSRD. */
12645 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12646 if (size
== 16 && TARGET_P9_VECTOR
&& mode
!= TDmode
)
12650 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12651 if (TARGET_P8_VECTOR
)
12653 if (mode
== SImode
)
12656 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
12660 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12661 if (mode
== SDmode
)
12665 /* Move to/from SPR. */
12666 else if ((size
== 4 || (TARGET_POWERPC64
&& size
== 8))
12667 && ((to_type
== GPR_REG_TYPE
&& from_type
== SPR_REG_TYPE
)
12668 || (to_type
== SPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12674 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12675 special direct moves that involve allocating an extra register, return the
12676 insn code of the helper function if there is such a function or
12677 CODE_FOR_nothing if not. */
12680 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type
,
12681 enum rs6000_reg_type from_type
,
12683 secondary_reload_info
*sri
,
12687 enum insn_code icode
= CODE_FOR_nothing
;
12689 int size
= GET_MODE_SIZE (mode
);
12691 if (TARGET_POWERPC64
&& size
== 16)
12693 /* Handle moving 128-bit values from GPRs to VSX point registers on
12694 ISA 2.07 (power8, power9) when running in 64-bit mode using
12695 XXPERMDI to glue the two 64-bit values back together. */
12696 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12698 cost
= 3; /* 2 mtvsrd's, 1 xxpermdi. */
12699 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12702 /* Handle moving 128-bit values from VSX point registers to GPRs on
12703 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12704 bottom 64-bit value. */
12705 else if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12707 cost
= 3; /* 2 mfvsrd's, 1 xxpermdi. */
12708 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12712 else if (TARGET_POWERPC64
&& mode
== SFmode
)
12714 if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12716 cost
= 3; /* xscvdpspn, mfvsrd, and. */
12717 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12720 else if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12722 cost
= 2; /* mtvsrz, xscvspdpn. */
12723 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12727 else if (!TARGET_POWERPC64
&& size
== 8)
12729 /* Handle moving 64-bit values from GPRs to floating point registers on
12730 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12731 32-bit values back together. Altivec register classes must be handled
12732 specially since a different instruction is used, and the secondary
12733 reload support requires a single instruction class in the scratch
12734 register constraint. However, right now TFmode is not allowed in
12735 Altivec registers, so the pattern will never match. */
12736 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
&& !altivec_p
)
12738 cost
= 3; /* 2 mtvsrwz's, 1 fmrgow. */
12739 icode
= reg_addr
[mode
].reload_fpr_gpr
;
12743 if (icode
!= CODE_FOR_nothing
)
12748 sri
->icode
= icode
;
12749 sri
->extra_cost
= cost
;
12756 /* Return whether a move between two register classes can be done either
12757 directly (simple move) or via a pattern that uses a single extra temporary
12758 (using ISA 2.07's direct move in this case. */
12761 rs6000_secondary_reload_move (enum rs6000_reg_type to_type
,
12762 enum rs6000_reg_type from_type
,
12764 secondary_reload_info
*sri
,
12767 /* Fall back to load/store reloads if either type is not a register. */
12768 if (to_type
== NO_REG_TYPE
|| from_type
== NO_REG_TYPE
)
12771 /* If we haven't allocated registers yet, assume the move can be done for the
12772 standard register types. */
12773 if ((to_type
== PSEUDO_REG_TYPE
&& from_type
== PSEUDO_REG_TYPE
)
12774 || (to_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (from_type
))
12775 || (from_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (to_type
)))
12778 /* Moves to the same set of registers is a simple move for non-specialized
12780 if (to_type
== from_type
&& IS_STD_REG_TYPE (to_type
))
12783 /* Check whether a simple move can be done directly. */
12784 if (rs6000_secondary_reload_simple_move (to_type
, from_type
, mode
))
12788 sri
->icode
= CODE_FOR_nothing
;
12789 sri
->extra_cost
= 0;
12794 /* Now check if we can do it in a few steps. */
12795 return rs6000_secondary_reload_direct_move (to_type
, from_type
, mode
, sri
,
12799 /* Inform reload about cases where moving X with a mode MODE to a register in
12800 RCLASS requires an extra scratch or immediate register. Return the class
12801 needed for the immediate register.
12803 For VSX and Altivec, we may need a register to convert sp+offset into
12806 For misaligned 64-bit gpr loads and stores we need a register to
12807 convert an offset address to indirect. */
12810 rs6000_secondary_reload (bool in_p
,
12812 reg_class_t rclass_i
,
12814 secondary_reload_info
*sri
)
12816 enum reg_class rclass
= (enum reg_class
) rclass_i
;
12817 reg_class_t ret
= ALL_REGS
;
12818 enum insn_code icode
;
12819 bool default_p
= false;
12820 bool done_p
= false;
12822 /* Allow subreg of memory before/during reload. */
12823 bool memory_p
= (MEM_P (x
)
12824 || (!reload_completed
&& SUBREG_P (x
)
12825 && MEM_P (SUBREG_REG (x
))));
12827 sri
->icode
= CODE_FOR_nothing
;
12828 sri
->t_icode
= CODE_FOR_nothing
;
12829 sri
->extra_cost
= 0;
12831 ? reg_addr
[mode
].reload_load
12832 : reg_addr
[mode
].reload_store
);
12834 if (REG_P (x
) || register_operand (x
, mode
))
12836 enum rs6000_reg_type to_type
= reg_class_to_reg_type
[(int)rclass
];
12837 bool altivec_p
= (rclass
== ALTIVEC_REGS
);
12838 enum rs6000_reg_type from_type
= register_to_reg_type (x
, &altivec_p
);
12841 std::swap (to_type
, from_type
);
12843 /* Can we do a direct move of some sort? */
12844 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
, sri
,
12847 icode
= (enum insn_code
)sri
->icode
;
12854 /* Make sure 0.0 is not reloaded or forced into memory. */
12855 if (x
== CONST0_RTX (mode
) && VSX_REG_CLASS_P (rclass
))
12862 /* If this is a scalar floating point value and we want to load it into the
12863 traditional Altivec registers, do it via a move via a traditional floating
12864 point register, unless we have D-form addressing. Also make sure that
12865 non-zero constants use a FPR. */
12866 if (!done_p
&& reg_addr
[mode
].scalar_in_vmx_p
12867 && !mode_supports_vmx_dform (mode
)
12868 && (rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
12869 && (memory_p
|| CONST_DOUBLE_P (x
)))
12876 /* Handle reload of load/stores if we have reload helper functions. */
12877 if (!done_p
&& icode
!= CODE_FOR_nothing
&& memory_p
)
12879 int extra_cost
= rs6000_secondary_reload_memory (XEXP (x
, 0), rclass
,
12882 if (extra_cost
>= 0)
12886 if (extra_cost
> 0)
12888 sri
->extra_cost
= extra_cost
;
12889 sri
->icode
= icode
;
12894 /* Handle unaligned loads and stores of integer registers. */
12895 if (!done_p
&& TARGET_POWERPC64
12896 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12898 && GET_MODE_SIZE (GET_MODE (x
)) >= UNITS_PER_WORD
)
12900 rtx addr
= XEXP (x
, 0);
12901 rtx off
= address_offset (addr
);
12903 if (off
!= NULL_RTX
)
12905 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12906 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12908 /* We need a secondary reload when our legitimate_address_p
12909 says the address is good (as otherwise the entire address
12910 will be reloaded), and the offset is not a multiple of
12911 four or we have an address wrap. Address wrap will only
12912 occur for LO_SUMs since legitimate_offset_address_p
12913 rejects addresses for 16-byte mems that will wrap. */
12914 if (GET_CODE (addr
) == LO_SUM
12915 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12916 && ((offset
& 3) != 0
12917 || ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
))
12918 : (offset
+ 0x8000 < 0x10000 - extra
/* legitimate_address_p */
12919 && (offset
& 3) != 0))
12921 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12923 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_load
12924 : CODE_FOR_reload_di_load
);
12926 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_store
12927 : CODE_FOR_reload_di_store
);
12928 sri
->extra_cost
= 2;
12939 if (!done_p
&& !TARGET_POWERPC64
12940 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12942 && GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
12944 rtx addr
= XEXP (x
, 0);
12945 rtx off
= address_offset (addr
);
12947 if (off
!= NULL_RTX
)
12949 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12950 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12952 /* We need a secondary reload when our legitimate_address_p
12953 says the address is good (as otherwise the entire address
12954 will be reloaded), and we have a wrap.
12956 legitimate_lo_sum_address_p allows LO_SUM addresses to
12957 have any offset so test for wrap in the low 16 bits.
12959 legitimate_offset_address_p checks for the range
12960 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12961 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12962 [0x7ff4,0x7fff] respectively, so test for the
12963 intersection of these ranges, [0x7ffc,0x7fff] and
12964 [0x7ff4,0x7ff7] respectively.
12966 Note that the address we see here may have been
12967 manipulated by legitimize_reload_address. */
12968 if (GET_CODE (addr
) == LO_SUM
12969 ? ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
12970 : offset
- (0x8000 - extra
) < UNITS_PER_WORD
)
12973 sri
->icode
= CODE_FOR_reload_si_load
;
12975 sri
->icode
= CODE_FOR_reload_si_store
;
12976 sri
->extra_cost
= 2;
12991 ret
= default_secondary_reload (in_p
, x
, rclass
, mode
, sri
);
12993 gcc_assert (ret
!= ALL_REGS
);
12995 if (TARGET_DEBUG_ADDR
)
12998 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
13000 reg_class_names
[ret
],
13001 in_p
? "true" : "false",
13002 reg_class_names
[rclass
],
13003 GET_MODE_NAME (mode
));
13005 if (reload_completed
)
13006 fputs (", after reload", stderr
);
13009 fputs (", done_p not set", stderr
);
13012 fputs (", default secondary reload", stderr
);
13014 if (sri
->icode
!= CODE_FOR_nothing
)
13015 fprintf (stderr
, ", reload func = %s, extra cost = %d",
13016 insn_data
[sri
->icode
].name
, sri
->extra_cost
);
13018 else if (sri
->extra_cost
> 0)
13019 fprintf (stderr
, ", extra cost = %d", sri
->extra_cost
);
13021 fputs ("\n", stderr
);
13028 /* Better tracing for rs6000_secondary_reload_inner. */
13031 rs6000_secondary_reload_trace (int line
, rtx reg
, rtx mem
, rtx scratch
,
13036 gcc_assert (reg
!= NULL_RTX
&& mem
!= NULL_RTX
&& scratch
!= NULL_RTX
);
13038 fprintf (stderr
, "rs6000_secondary_reload_inner:%d, type = %s\n", line
,
13039 store_p
? "store" : "load");
13042 set
= gen_rtx_SET (mem
, reg
);
13044 set
= gen_rtx_SET (reg
, mem
);
13046 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
13047 debug_rtx (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
13050 static void rs6000_secondary_reload_fail (int, rtx
, rtx
, rtx
, bool)
13051 ATTRIBUTE_NORETURN
;
13054 rs6000_secondary_reload_fail (int line
, rtx reg
, rtx mem
, rtx scratch
,
13057 rs6000_secondary_reload_trace (line
, reg
, mem
, scratch
, store_p
);
13058 gcc_unreachable ();
13061 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
13062 reload helper functions. These were identified in
13063 rs6000_secondary_reload_memory, and if reload decided to use the secondary
13064 reload, it calls the insns:
13065 reload_<RELOAD:mode>_<P:mptrsize>_store
13066 reload_<RELOAD:mode>_<P:mptrsize>_load
13068 which in turn calls this function, to do whatever is necessary to create
13069 valid addresses. */
13072 rs6000_secondary_reload_inner (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
13074 int regno
= true_regnum (reg
);
13075 machine_mode mode
= GET_MODE (reg
);
13076 addr_mask_type addr_mask
;
13079 rtx op_reg
, op0
, op1
;
13084 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
) || !MEM_P (mem
)
13085 || !base_reg_operand (scratch
, GET_MODE (scratch
)))
13086 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13088 if (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
))
13089 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
13091 else if (IN_RANGE (regno
, FIRST_FPR_REGNO
, LAST_FPR_REGNO
))
13092 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
13094 else if (IN_RANGE (regno
, FIRST_ALTIVEC_REGNO
, LAST_ALTIVEC_REGNO
))
13095 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
13098 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13100 /* Make sure the mode is valid in this register class. */
13101 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
13102 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13104 if (TARGET_DEBUG_ADDR
)
13105 rs6000_secondary_reload_trace (__LINE__
, reg
, mem
, scratch
, store_p
);
13107 new_addr
= addr
= XEXP (mem
, 0);
13108 switch (GET_CODE (addr
))
13110 /* Does the register class support auto update forms for this mode? If
13111 not, do the update now. We don't need a scratch register, since the
13112 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
13115 op_reg
= XEXP (addr
, 0);
13116 if (!base_reg_operand (op_reg
, Pmode
))
13117 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13119 if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
13121 int delta
= GET_MODE_SIZE (mode
);
13122 if (GET_CODE (addr
) == PRE_DEC
)
13124 emit_insn (gen_add2_insn (op_reg
, GEN_INT (delta
)));
13130 op0
= XEXP (addr
, 0);
13131 op1
= XEXP (addr
, 1);
13132 if (!base_reg_operand (op0
, Pmode
)
13133 || GET_CODE (op1
) != PLUS
13134 || !rtx_equal_p (op0
, XEXP (op1
, 0)))
13135 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13137 if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
13139 emit_insn (gen_rtx_SET (op0
, op1
));
13144 /* Do we need to simulate AND -16 to clear the bottom address bits used
13145 in VMX load/stores? */
13147 op0
= XEXP (addr
, 0);
13148 op1
= XEXP (addr
, 1);
13149 if ((addr_mask
& RELOAD_REG_AND_M16
) == 0)
13151 if (REG_P (op0
) || SUBREG_P (op0
))
13154 else if (GET_CODE (op1
) == PLUS
)
13156 emit_insn (gen_rtx_SET (scratch
, op1
));
13161 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13163 and_op
= gen_rtx_AND (GET_MODE (scratch
), op_reg
, op1
);
13164 cc_clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (CCmode
));
13165 rv
= gen_rtvec (2, gen_rtx_SET (scratch
, and_op
), cc_clobber
);
13166 emit_insn (gen_rtx_PARALLEL (VOIDmode
, rv
));
13167 new_addr
= scratch
;
13171 /* If this is an indirect address, make sure it is a base register. */
13174 if (!base_reg_operand (addr
, GET_MODE (addr
)))
13176 emit_insn (gen_rtx_SET (scratch
, addr
));
13177 new_addr
= scratch
;
13181 /* If this is an indexed address, make sure the register class can handle
13182 indexed addresses for this mode. */
13184 op0
= XEXP (addr
, 0);
13185 op1
= XEXP (addr
, 1);
13186 if (!base_reg_operand (op0
, Pmode
))
13187 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13189 else if (int_reg_operand (op1
, Pmode
))
13191 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
13193 emit_insn (gen_rtx_SET (scratch
, addr
));
13194 new_addr
= scratch
;
13198 else if (mode_supports_dq_form (mode
) && CONST_INT_P (op1
))
13200 if (((addr_mask
& RELOAD_REG_QUAD_OFFSET
) == 0)
13201 || !quad_address_p (addr
, mode
, false))
13203 emit_insn (gen_rtx_SET (scratch
, addr
));
13204 new_addr
= scratch
;
13208 /* Make sure the register class can handle offset addresses. */
13209 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
13211 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
13213 emit_insn (gen_rtx_SET (scratch
, addr
));
13214 new_addr
= scratch
;
13219 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13224 op0
= XEXP (addr
, 0);
13225 op1
= XEXP (addr
, 1);
13226 if (!base_reg_operand (op0
, Pmode
))
13227 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13229 else if (int_reg_operand (op1
, Pmode
))
13231 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
13233 emit_insn (gen_rtx_SET (scratch
, addr
));
13234 new_addr
= scratch
;
13238 /* Quad offsets are restricted and can't handle normal addresses. */
13239 else if (mode_supports_dq_form (mode
))
13241 emit_insn (gen_rtx_SET (scratch
, addr
));
13242 new_addr
= scratch
;
13245 /* Make sure the register class can handle offset addresses. */
13246 else if (legitimate_lo_sum_address_p (mode
, addr
, false))
13248 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
13250 emit_insn (gen_rtx_SET (scratch
, addr
));
13251 new_addr
= scratch
;
13256 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13263 rs6000_emit_move (scratch
, addr
, Pmode
);
13264 new_addr
= scratch
;
13268 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13271 /* Adjust the address if it changed. */
13272 if (addr
!= new_addr
)
13274 mem
= replace_equiv_address_nv (mem
, new_addr
);
13275 if (TARGET_DEBUG_ADDR
)
13276 fprintf (stderr
, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
13279 /* Now create the move. */
13281 emit_insn (gen_rtx_SET (mem
, reg
));
13283 emit_insn (gen_rtx_SET (reg
, mem
));
13288 /* Convert reloads involving 64-bit gprs and misaligned offset
13289 addressing, or multiple 32-bit gprs and offsets that are too large,
13290 to use indirect addressing. */
13293 rs6000_secondary_reload_gpr (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
13295 int regno
= true_regnum (reg
);
13296 enum reg_class rclass
;
13298 rtx scratch_or_premodify
= scratch
;
13300 if (TARGET_DEBUG_ADDR
)
13302 fprintf (stderr
, "\nrs6000_secondary_reload_gpr, type = %s\n",
13303 store_p
? "store" : "load");
13304 fprintf (stderr
, "reg:\n");
13306 fprintf (stderr
, "mem:\n");
13308 fprintf (stderr
, "scratch:\n");
13309 debug_rtx (scratch
);
13312 gcc_assert (regno
>= 0 && HARD_REGISTER_NUM_P (regno
));
13313 gcc_assert (MEM_P (mem
));
13314 rclass
= REGNO_REG_CLASS (regno
);
13315 gcc_assert (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
);
13316 addr
= XEXP (mem
, 0);
13318 if (GET_CODE (addr
) == PRE_MODIFY
)
13320 gcc_assert (REG_P (XEXP (addr
, 0))
13321 && GET_CODE (XEXP (addr
, 1)) == PLUS
13322 && XEXP (XEXP (addr
, 1), 0) == XEXP (addr
, 0));
13323 scratch_or_premodify
= XEXP (addr
, 0);
13324 addr
= XEXP (addr
, 1);
13326 gcc_assert (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
);
13328 rs6000_emit_move (scratch_or_premodify
, addr
, Pmode
);
13330 mem
= replace_equiv_address_nv (mem
, scratch_or_premodify
);
13332 /* Now create the move. */
13334 emit_insn (gen_rtx_SET (mem
, reg
));
13336 emit_insn (gen_rtx_SET (reg
, mem
));
13341 /* Given an rtx X being reloaded into a reg required to be
13342 in class CLASS, return the class of reg to actually use.
13343 In general this is just CLASS; but on some machines
13344 in some cases it is preferable to use a more restrictive class.
13346 On the RS/6000, we have to return NO_REGS when we want to reload a
13347 floating-point CONST_DOUBLE to force it to be copied to memory.
13349 We also don't want to reload integer values into floating-point
13350 registers if we can at all help it. In fact, this can
13351 cause reload to die, if it tries to generate a reload of CTR
13352 into a FP register and discovers it doesn't have the memory location
13355 ??? Would it be a good idea to have reload do the converse, that is
13356 try to reload floating modes into FP registers if possible?
13359 static enum reg_class
13360 rs6000_preferred_reload_class (rtx x
, enum reg_class rclass
)
13362 machine_mode mode
= GET_MODE (x
);
13363 bool is_constant
= CONSTANT_P (x
);
13365 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
13366 reload class for it. */
13367 if ((rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
13368 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
) == 0)
13371 if ((rclass
== FLOAT_REGS
|| rclass
== VSX_REGS
)
13372 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
] & RELOAD_REG_VALID
) == 0)
13375 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13376 the reloading of address expressions using PLUS into floating point
13378 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
) && GET_CODE (x
) != PLUS
)
13382 /* Zero is always allowed in all VSX registers. */
13383 if (x
== CONST0_RTX (mode
))
13386 /* If this is a vector constant that can be formed with a few Altivec
13387 instructions, we want altivec registers. */
13388 if (GET_CODE (x
) == CONST_VECTOR
&& easy_vector_constant (x
, mode
))
13389 return ALTIVEC_REGS
;
13391 /* If this is an integer constant that can easily be loaded into
13392 vector registers, allow it. */
13393 if (CONST_INT_P (x
))
13395 HOST_WIDE_INT value
= INTVAL (x
);
13397 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13398 2.06 can generate it in the Altivec registers with
13402 if (TARGET_P8_VECTOR
)
13404 else if (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
13405 return ALTIVEC_REGS
;
13410 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13411 a sign extend in the Altivec registers. */
13412 if (IN_RANGE (value
, -128, 127) && TARGET_P9_VECTOR
13413 && (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
))
13414 return ALTIVEC_REGS
;
13417 /* Force constant to memory. */
13421 /* D-form addressing can easily reload the value. */
13422 if (mode_supports_vmx_dform (mode
)
13423 || mode_supports_dq_form (mode
))
13426 /* If this is a scalar floating point value and we don't have D-form
13427 addressing, prefer the traditional floating point registers so that we
13428 can use D-form (register+offset) addressing. */
13429 if (rclass
== VSX_REGS
13430 && (mode
== SFmode
|| GET_MODE_SIZE (mode
) == 8))
13433 /* Prefer the Altivec registers if Altivec is handling the vector
13434 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13436 if (VECTOR_UNIT_ALTIVEC_P (mode
) || VECTOR_MEM_ALTIVEC_P (mode
)
13437 || mode
== V1TImode
)
13438 return ALTIVEC_REGS
;
13443 if (is_constant
|| GET_CODE (x
) == PLUS
)
13445 if (reg_class_subset_p (GENERAL_REGS
, rclass
))
13446 return GENERAL_REGS
;
13447 if (reg_class_subset_p (BASE_REGS
, rclass
))
13452 /* For the vector pair and vector quad modes, prefer their natural register
13453 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13454 the GPR registers. */
13455 if (rclass
== GEN_OR_FLOAT_REGS
)
13457 if (mode
== OOmode
)
13460 if (mode
== XOmode
)
13463 if (GET_MODE_CLASS (mode
) == MODE_INT
)
13464 return GENERAL_REGS
;
13470 /* Debug version of rs6000_preferred_reload_class. */
13471 static enum reg_class
13472 rs6000_debug_preferred_reload_class (rtx x
, enum reg_class rclass
)
13474 enum reg_class ret
= rs6000_preferred_reload_class (x
, rclass
);
13477 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13479 reg_class_names
[ret
], reg_class_names
[rclass
],
13480 GET_MODE_NAME (GET_MODE (x
)));
13486 /* If we are copying between FP or AltiVec registers and anything else, we need
13487 a memory location. The exception is when we are targeting ppc64 and the
13488 move to/from fpr to gpr instructions are available. Also, under VSX, you
13489 can copy vector registers from the FP register set to the Altivec register
13490 set and vice versa. */
13493 rs6000_secondary_memory_needed (machine_mode mode
,
13494 reg_class_t from_class
,
13495 reg_class_t to_class
)
13497 enum rs6000_reg_type from_type
, to_type
;
13498 bool altivec_p
= ((from_class
== ALTIVEC_REGS
)
13499 || (to_class
== ALTIVEC_REGS
));
13501 /* If a simple/direct move is available, we don't need secondary memory */
13502 from_type
= reg_class_to_reg_type
[(int)from_class
];
13503 to_type
= reg_class_to_reg_type
[(int)to_class
];
13505 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
,
13506 (secondary_reload_info
*)0, altivec_p
))
13509 /* If we have a floating point or vector register class, we need to use
13510 memory to transfer the data. */
13511 if (IS_FP_VECT_REG_TYPE (from_type
) || IS_FP_VECT_REG_TYPE (to_type
))
13517 /* Debug version of rs6000_secondary_memory_needed. */
13519 rs6000_debug_secondary_memory_needed (machine_mode mode
,
13520 reg_class_t from_class
,
13521 reg_class_t to_class
)
13523 bool ret
= rs6000_secondary_memory_needed (mode
, from_class
, to_class
);
13526 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13527 "to_class = %s, mode = %s\n",
13528 ret
? "true" : "false",
13529 reg_class_names
[from_class
],
13530 reg_class_names
[to_class
],
13531 GET_MODE_NAME (mode
));
13536 /* Return the register class of a scratch register needed to copy IN into
13537 or out of a register in RCLASS in MODE. If it can be done directly,
13538 NO_REGS is returned. */
13540 static enum reg_class
13541 rs6000_secondary_reload_class (enum reg_class rclass
, machine_mode mode
,
13546 if (TARGET_ELF
|| (DEFAULT_ABI
== ABI_DARWIN
13548 && MACHOPIC_INDIRECT
13552 /* We cannot copy a symbolic operand directly into anything
13553 other than BASE_REGS for TARGET_ELF. So indicate that a
13554 register from BASE_REGS is needed as an intermediate
13557 On Darwin, pic addresses require a load from memory, which
13558 needs a base register. */
13559 if (rclass
!= BASE_REGS
13560 && (SYMBOL_REF_P (in
)
13561 || GET_CODE (in
) == HIGH
13562 || GET_CODE (in
) == LABEL_REF
13563 || GET_CODE (in
) == CONST
))
13569 regno
= REGNO (in
);
13570 if (!HARD_REGISTER_NUM_P (regno
))
13572 regno
= true_regnum (in
);
13573 if (!HARD_REGISTER_NUM_P (regno
))
13577 else if (SUBREG_P (in
))
13579 regno
= true_regnum (in
);
13580 if (!HARD_REGISTER_NUM_P (regno
))
13586 /* If we have VSX register moves, prefer moving scalar values between
13587 Altivec registers and GPR by going via an FPR (and then via memory)
13588 instead of reloading the secondary memory address for Altivec moves. */
13590 && GET_MODE_SIZE (mode
) < 16
13591 && !mode_supports_vmx_dform (mode
)
13592 && (((rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
)
13593 && (regno
>= 0 && ALTIVEC_REGNO_P (regno
)))
13594 || ((rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
13595 && (regno
>= 0 && INT_REGNO_P (regno
)))))
13598 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13600 if (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
13601 || (regno
>= 0 && INT_REGNO_P (regno
)))
13604 /* Constants, memory, and VSX registers can go into VSX registers (both the
13605 traditional floating point and the altivec registers). */
13606 if (rclass
== VSX_REGS
13607 && (regno
== -1 || VSX_REGNO_P (regno
)))
13610 /* Constants, memory, and FP registers can go into FP registers. */
13611 if ((regno
== -1 || FP_REGNO_P (regno
))
13612 && (rclass
== FLOAT_REGS
|| rclass
== GEN_OR_FLOAT_REGS
))
13613 return (mode
!= SDmode
|| lra_in_progress
) ? NO_REGS
: GENERAL_REGS
;
13615 /* Memory, and AltiVec registers can go into AltiVec registers. */
13616 if ((regno
== -1 || ALTIVEC_REGNO_P (regno
))
13617 && rclass
== ALTIVEC_REGS
)
13620 /* We can copy among the CR registers. */
13621 if ((rclass
== CR_REGS
|| rclass
== CR0_REGS
)
13622 && regno
>= 0 && CR_REGNO_P (regno
))
13625 /* Otherwise, we need GENERAL_REGS. */
13626 return GENERAL_REGS
;
13629 /* Debug version of rs6000_secondary_reload_class. */
13630 static enum reg_class
13631 rs6000_debug_secondary_reload_class (enum reg_class rclass
,
13632 machine_mode mode
, rtx in
)
13634 enum reg_class ret
= rs6000_secondary_reload_class (rclass
, mode
, in
);
13636 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13637 "mode = %s, input rtx:\n",
13638 reg_class_names
[ret
], reg_class_names
[rclass
],
13639 GET_MODE_NAME (mode
));
13645 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13648 rs6000_can_change_mode_class (machine_mode from
,
13650 reg_class_t rclass
)
13652 unsigned from_size
= GET_MODE_SIZE (from
);
13653 unsigned to_size
= GET_MODE_SIZE (to
);
13655 if (from_size
!= to_size
)
13657 enum reg_class xclass
= (TARGET_VSX
) ? VSX_REGS
: FLOAT_REGS
;
13659 if (reg_classes_intersect_p (xclass
, rclass
))
13661 unsigned to_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, to
);
13662 unsigned from_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, from
);
13663 bool to_float128_vector_p
= FLOAT128_VECTOR_P (to
);
13664 bool from_float128_vector_p
= FLOAT128_VECTOR_P (from
);
13666 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13667 single register under VSX because the scalar part of the register
13668 is in the upper 64-bits, and not the lower 64-bits. Types like
13669 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13670 IEEE floating point can't overlap, and neither can small
13673 if (to_float128_vector_p
&& from_float128_vector_p
)
13676 else if (to_float128_vector_p
|| from_float128_vector_p
)
13679 /* TDmode in floating-mode registers must always go into a register
13680 pair with the most significant word in the even-numbered register
13681 to match ISA requirements. In little-endian mode, this does not
13682 match subreg numbering, so we cannot allow subregs. */
13683 if (!BYTES_BIG_ENDIAN
&& (to
== TDmode
|| from
== TDmode
))
13686 /* Allow SD<->DD changes, since SDmode values are stored in
13687 the low half of the DDmode, just like target-independent
13688 code expects. We need to allow at least SD->DD since
13689 rs6000_secondary_memory_needed_mode asks for that change
13690 to be made for SD reloads. */
13691 if ((to
== DDmode
&& from
== SDmode
)
13692 || (to
== SDmode
&& from
== DDmode
))
13695 if (from_size
< 8 || to_size
< 8)
13698 if (from_size
== 8 && (8 * to_nregs
) != to_size
)
13701 if (to_size
== 8 && (8 * from_nregs
) != from_size
)
13710 /* Since the VSX register set includes traditional floating point registers
13711 and altivec registers, just check for the size being different instead of
13712 trying to check whether the modes are vector modes. Otherwise it won't
13713 allow say DF and DI to change classes. For types like TFmode and TDmode
13714 that take 2 64-bit registers, rather than a single 128-bit register, don't
13715 allow subregs of those types to other 128 bit types. */
13716 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
))
13718 unsigned num_regs
= (from_size
+ 15) / 16;
13719 if (hard_regno_nregs (FIRST_FPR_REGNO
, to
) > num_regs
13720 || hard_regno_nregs (FIRST_FPR_REGNO
, from
) > num_regs
)
13723 return (from_size
== 8 || from_size
== 16);
13726 if (TARGET_ALTIVEC
&& rclass
== ALTIVEC_REGS
13727 && (ALTIVEC_VECTOR_MODE (from
) + ALTIVEC_VECTOR_MODE (to
)) == 1)
13733 /* Debug version of rs6000_can_change_mode_class. */
13735 rs6000_debug_can_change_mode_class (machine_mode from
,
13737 reg_class_t rclass
)
13739 bool ret
= rs6000_can_change_mode_class (from
, to
, rclass
);
13742 "rs6000_can_change_mode_class, return %s, from = %s, "
13743 "to = %s, rclass = %s\n",
13744 ret
? "true" : "false",
13745 GET_MODE_NAME (from
), GET_MODE_NAME (to
),
13746 reg_class_names
[rclass
]);
13751 /* Return a string to do a move operation of 128 bits of data. */
13754 rs6000_output_move_128bit (rtx operands
[])
13756 rtx dest
= operands
[0];
13757 rtx src
= operands
[1];
13758 machine_mode mode
= GET_MODE (dest
);
13761 bool dest_gpr_p
, dest_fp_p
, dest_vmx_p
, dest_vsx_p
;
13762 bool src_gpr_p
, src_fp_p
, src_vmx_p
, src_vsx_p
;
13766 dest_regno
= REGNO (dest
);
13767 dest_gpr_p
= INT_REGNO_P (dest_regno
);
13768 dest_fp_p
= FP_REGNO_P (dest_regno
);
13769 dest_vmx_p
= ALTIVEC_REGNO_P (dest_regno
);
13770 dest_vsx_p
= dest_fp_p
| dest_vmx_p
;
13775 dest_gpr_p
= dest_fp_p
= dest_vmx_p
= dest_vsx_p
= false;
13780 src_regno
= REGNO (src
);
13781 src_gpr_p
= INT_REGNO_P (src_regno
);
13782 src_fp_p
= FP_REGNO_P (src_regno
);
13783 src_vmx_p
= ALTIVEC_REGNO_P (src_regno
);
13784 src_vsx_p
= src_fp_p
| src_vmx_p
;
13789 src_gpr_p
= src_fp_p
= src_vmx_p
= src_vsx_p
= false;
13792 /* Register moves. */
13793 if (dest_regno
>= 0 && src_regno
>= 0)
13800 if (TARGET_DIRECT_MOVE_128
&& src_vsx_p
)
13801 return (WORDS_BIG_ENDIAN
13802 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13803 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13805 else if (TARGET_VSX
&& TARGET_DIRECT_MOVE
&& src_vsx_p
)
13809 else if (TARGET_VSX
&& dest_vsx_p
)
13812 return "xxlor %x0,%x1,%x1";
13814 else if (TARGET_DIRECT_MOVE_128
&& src_gpr_p
)
13815 return (WORDS_BIG_ENDIAN
13816 ? "mtvsrdd %x0,%1,%L1"
13817 : "mtvsrdd %x0,%L1,%1");
13819 else if (TARGET_DIRECT_MOVE
&& src_gpr_p
)
13823 else if (TARGET_ALTIVEC
&& dest_vmx_p
&& src_vmx_p
)
13824 return "vor %0,%1,%1";
13826 else if (dest_fp_p
&& src_fp_p
)
13831 else if (dest_regno
>= 0 && MEM_P (src
))
13835 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13841 else if (TARGET_ALTIVEC
&& dest_vmx_p
13842 && altivec_indexed_or_indirect_operand (src
, mode
))
13843 return "lvx %0,%y1";
13845 else if (TARGET_VSX
&& dest_vsx_p
)
13847 if (mode_supports_dq_form (mode
)
13848 && quad_address_p (XEXP (src
, 0), mode
, true))
13849 return "lxv %x0,%1";
13851 else if (TARGET_P9_VECTOR
)
13852 return "lxvx %x0,%y1";
13854 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13855 return "lxvw4x %x0,%y1";
13858 return "lxvd2x %x0,%y1";
13861 else if (TARGET_ALTIVEC
&& dest_vmx_p
)
13862 return "lvx %0,%y1";
13864 else if (dest_fp_p
)
13869 else if (src_regno
>= 0 && MEM_P (dest
))
13873 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13874 return "stq %1,%0";
13879 else if (TARGET_ALTIVEC
&& src_vmx_p
13880 && altivec_indexed_or_indirect_operand (dest
, mode
))
13881 return "stvx %1,%y0";
13883 else if (TARGET_VSX
&& src_vsx_p
)
13885 if (mode_supports_dq_form (mode
)
13886 && quad_address_p (XEXP (dest
, 0), mode
, true))
13887 return "stxv %x1,%0";
13889 else if (TARGET_P9_VECTOR
)
13890 return "stxvx %x1,%y0";
13892 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13893 return "stxvw4x %x1,%y0";
13896 return "stxvd2x %x1,%y0";
13899 else if (TARGET_ALTIVEC
&& src_vmx_p
)
13900 return "stvx %1,%y0";
13907 else if (dest_regno
>= 0
13908 && (CONST_INT_P (src
)
13909 || CONST_WIDE_INT_P (src
)
13910 || CONST_DOUBLE_P (src
)
13911 || GET_CODE (src
) == CONST_VECTOR
))
13916 else if ((dest_vmx_p
&& TARGET_ALTIVEC
)
13917 || (dest_vsx_p
&& TARGET_VSX
))
13918 return output_vec_const_move (operands
);
13921 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest
, src
));
13924 /* Validate a 128-bit move. */
13926 rs6000_move_128bit_ok_p (rtx operands
[])
13928 machine_mode mode
= GET_MODE (operands
[0]);
13929 return (gpc_reg_operand (operands
[0], mode
)
13930 || gpc_reg_operand (operands
[1], mode
));
13933 /* Return true if a 128-bit move needs to be split. */
13935 rs6000_split_128bit_ok_p (rtx operands
[])
13937 if (!reload_completed
)
13940 if (!gpr_or_gpr_p (operands
[0], operands
[1]))
13943 if (quad_load_store_p (operands
[0], operands
[1]))
13950 /* Given a comparison operation, return the bit number in CCR to test. We
13951 know this is a valid comparison.
13953 SCC_P is 1 if this is for an scc. That means that %D will have been
13954 used instead of %C, so the bits will be in different places.
13956 Return -1 if OP isn't a valid comparison for some reason. */
13959 ccr_bit (rtx op
, int scc_p
)
13961 enum rtx_code code
= GET_CODE (op
);
13962 machine_mode cc_mode
;
13967 if (!COMPARISON_P (op
))
13970 reg
= XEXP (op
, 0);
13972 if (!REG_P (reg
) || !CR_REGNO_P (REGNO (reg
)))
13975 cc_mode
= GET_MODE (reg
);
13976 cc_regnum
= REGNO (reg
);
13977 base_bit
= 4 * (cc_regnum
- CR0_REGNO
);
13979 validate_condition_mode (code
, cc_mode
);
13981 /* When generating a sCOND operation, only positive conditions are
14000 return scc_p
? base_bit
+ 3 : base_bit
+ 2;
14002 return base_bit
+ 2;
14003 case GT
: case GTU
: case UNLE
:
14004 return base_bit
+ 1;
14005 case LT
: case LTU
: case UNGE
:
14007 case ORDERED
: case UNORDERED
:
14008 return base_bit
+ 3;
14011 /* If scc, we will have done a cror to put the bit in the
14012 unordered position. So test that bit. For integer, this is ! LT
14013 unless this is an scc insn. */
14014 return scc_p
? base_bit
+ 3 : base_bit
;
14017 return scc_p
? base_bit
+ 3 : base_bit
+ 1;
14024 /* Return the GOT register. */
14027 rs6000_got_register (rtx value ATTRIBUTE_UNUSED
)
14029 /* The second flow pass currently (June 1999) can't update
14030 regs_ever_live without disturbing other parts of the compiler, so
14031 update it here to make the prolog/epilogue code happy. */
14032 if (!can_create_pseudo_p ()
14033 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
14034 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM
, true);
14036 crtl
->uses_pic_offset_table
= 1;
14038 return pic_offset_table_rtx
;
14041 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
14043 /* Write out a function code label. */
14046 rs6000_output_function_entry (FILE *file
, const char *fname
)
14048 if (fname
[0] != '.')
14050 switch (DEFAULT_ABI
)
14053 gcc_unreachable ();
14059 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "L.");
14069 RS6000_OUTPUT_BASENAME (file
, fname
);
14072 /* Print an operand. Recognize special options, documented below. */
14075 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
14076 only introduced by the linker, when applying the sda21
14078 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
14079 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
14081 #define SMALL_DATA_RELOC "sda21"
14082 #define SMALL_DATA_REG 0
14086 print_operand (FILE *file
, rtx x
, int code
)
14089 unsigned HOST_WIDE_INT uval
;
14093 /* %a is output_address. */
14095 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
14099 /* Write the MMA accumulator number associated with VSX register X. */
14100 if (!REG_P (x
) || !FP_REGNO_P (REGNO (x
)) || (REGNO (x
) % 4) != 0)
14101 output_operand_lossage ("invalid %%A value");
14103 fprintf (file
, "%d", (REGNO (x
) - FIRST_FPR_REGNO
) / 4);
14107 /* Like 'J' but get to the GT bit only. */
14108 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14110 output_operand_lossage ("invalid %%D value");
14114 /* Bit 1 is GT bit. */
14115 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 1;
14117 /* Add one for shift count in rlinm for scc. */
14118 fprintf (file
, "%d", i
+ 1);
14122 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
14125 output_operand_lossage ("invalid %%e value");
14130 if ((uval
& 0xffff) == 0 && uval
!= 0)
14135 /* X is a CR register. Print the number of the EQ bit of the CR */
14136 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14137 output_operand_lossage ("invalid %%E value");
14139 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
) + 2);
14143 /* X is a CR register. Print the shift count needed to move it
14144 to the high-order four bits. */
14145 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14146 output_operand_lossage ("invalid %%f value");
14148 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
));
14152 /* Similar, but print the count for the rotate in the opposite
14154 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14155 output_operand_lossage ("invalid %%F value");
14157 fprintf (file
, "%d", 32 - 4 * (REGNO (x
) - CR0_REGNO
));
14161 /* X is a constant integer. If it is negative, print "m",
14162 otherwise print "z". This is to make an aze or ame insn. */
14163 if (!CONST_INT_P (x
))
14164 output_operand_lossage ("invalid %%G value");
14165 else if (INTVAL (x
) >= 0)
14172 /* If constant, output low-order five bits. Otherwise, write
14175 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 31);
14177 print_operand (file
, x
, 0);
14181 /* If constant, output low-order six bits. Otherwise, write
14184 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 63);
14186 print_operand (file
, x
, 0);
14190 /* Print `i' if this is a constant, else nothing. */
14196 /* Write the bit number in CCR for jump. */
14197 i
= ccr_bit (x
, 0);
14199 output_operand_lossage ("invalid %%j code");
14201 fprintf (file
, "%d", i
);
14205 /* Similar, but add one for shift count in rlinm for scc and pass
14206 scc flag to `ccr_bit'. */
14207 i
= ccr_bit (x
, 1);
14209 output_operand_lossage ("invalid %%J code");
14211 /* If we want bit 31, write a shift count of zero, not 32. */
14212 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
14216 /* X must be a constant. Write the 1's complement of the
14219 output_operand_lossage ("invalid %%k value");
14221 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~ INTVAL (x
));
14225 /* X must be a symbolic constant on ELF. Write an
14226 expression suitable for an 'addi' that adds in the low 16
14227 bits of the MEM. */
14228 if (GET_CODE (x
) == CONST
)
14230 if (GET_CODE (XEXP (x
, 0)) != PLUS
14231 || (!SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
14232 && GET_CODE (XEXP (XEXP (x
, 0), 0)) != LABEL_REF
)
14233 || !CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
14234 output_operand_lossage ("invalid %%K value");
14236 print_operand_address (file
, x
);
14237 fputs ("@l", file
);
14240 /* %l is output_asm_label. */
14243 /* Write second word of DImode or DFmode reference. Works on register
14244 or non-indexed memory only. */
14246 fputs (reg_names
[REGNO (x
) + 1], file
);
14247 else if (MEM_P (x
))
14249 machine_mode mode
= GET_MODE (x
);
14250 /* Handle possible auto-increment. Since it is pre-increment and
14251 we have already done it, we can just use an offset of word. */
14252 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14253 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14254 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
14256 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14257 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
14260 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
,
14264 if (small_data_operand (x
, GET_MODE (x
)))
14265 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14266 reg_names
[SMALL_DATA_REG
]);
14270 case 'N': /* Unused */
14271 /* Write the number of elements in the vector times 4. */
14272 if (GET_CODE (x
) != PARALLEL
)
14273 output_operand_lossage ("invalid %%N value");
14275 fprintf (file
, "%d", XVECLEN (x
, 0) * 4);
14278 case 'O': /* Unused */
14279 /* Similar, but subtract 1 first. */
14280 if (GET_CODE (x
) != PARALLEL
)
14281 output_operand_lossage ("invalid %%O value");
14283 fprintf (file
, "%d", (XVECLEN (x
, 0) - 1) * 4);
14287 /* X is a CONST_INT that is a power of two. Output the logarithm. */
14290 || (i
= exact_log2 (INTVAL (x
))) < 0)
14291 output_operand_lossage ("invalid %%p value");
14293 fprintf (file
, "%d", i
);
14297 /* The operand must be an indirect memory reference. The result
14298 is the register name. */
14299 if (!MEM_P (x
) || !REG_P (XEXP (x
, 0))
14300 || REGNO (XEXP (x
, 0)) >= 32)
14301 output_operand_lossage ("invalid %%P value");
14303 fputs (reg_names
[REGNO (XEXP (x
, 0))], file
);
14307 /* This outputs the logical code corresponding to a boolean
14308 expression. The expression may have one or both operands
14309 negated (if one, only the first one). For condition register
14310 logical operations, it will also treat the negated
14311 CR codes as NOTs, but not handle NOTs of them. */
14313 const char *const *t
= 0;
14315 enum rtx_code code
= GET_CODE (x
);
14316 static const char * const tbl
[3][3] = {
14317 { "and", "andc", "nor" },
14318 { "or", "orc", "nand" },
14319 { "xor", "eqv", "xor" } };
14323 else if (code
== IOR
)
14325 else if (code
== XOR
)
14328 output_operand_lossage ("invalid %%q value");
14330 if (GET_CODE (XEXP (x
, 0)) != NOT
)
14334 if (GET_CODE (XEXP (x
, 1)) == NOT
)
14345 if (! TARGET_MFCRF
)
14351 /* X is a CR register. Print the mask for `mtcrf'. */
14352 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14353 output_operand_lossage ("invalid %%R value");
14355 fprintf (file
, "%d", 128 >> (REGNO (x
) - CR0_REGNO
));
14359 /* Low 5 bits of 32 - value */
14361 output_operand_lossage ("invalid %%s value");
14363 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (32 - INTVAL (x
)) & 31);
14367 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
14368 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14370 output_operand_lossage ("invalid %%t value");
14374 /* Bit 3 is OV bit. */
14375 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 3;
14377 /* If we want bit 31, write a shift count of zero, not 32. */
14378 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
14382 /* Print the symbolic name of a branch target register. */
14383 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14384 x
= XVECEXP (x
, 0, 0);
14385 if (!REG_P (x
) || (REGNO (x
) != LR_REGNO
14386 && REGNO (x
) != CTR_REGNO
))
14387 output_operand_lossage ("invalid %%T value");
14388 else if (REGNO (x
) == LR_REGNO
)
14389 fputs ("lr", file
);
14391 fputs ("ctr", file
);
14395 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14396 for use in unsigned operand. */
14399 output_operand_lossage ("invalid %%u value");
14404 if ((uval
& 0xffff) == 0)
14407 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
, uval
& 0xffff);
14411 /* High-order 16 bits of constant for use in signed operand. */
14413 output_operand_lossage ("invalid %%v value");
14415 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
,
14416 (INTVAL (x
) >> 16) & 0xffff);
14420 /* Print `u' if this has an auto-increment or auto-decrement. */
14422 && (GET_CODE (XEXP (x
, 0)) == PRE_INC
14423 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
14424 || GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
))
14429 /* Print the trap code for this operand. */
14430 switch (GET_CODE (x
))
14433 fputs ("eq", file
); /* 4 */
14436 fputs ("ne", file
); /* 24 */
14439 fputs ("lt", file
); /* 16 */
14442 fputs ("le", file
); /* 20 */
14445 fputs ("gt", file
); /* 8 */
14448 fputs ("ge", file
); /* 12 */
14451 fputs ("llt", file
); /* 2 */
14454 fputs ("lle", file
); /* 6 */
14457 fputs ("lgt", file
); /* 1 */
14460 fputs ("lge", file
); /* 5 */
14463 output_operand_lossage ("invalid %%V value");
14468 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14471 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, sext_hwi (INTVAL (x
), 16));
14473 print_operand (file
, x
, 0);
14477 /* X is a FPR or Altivec register used in a VSX context. */
14478 if (!REG_P (x
) || !VSX_REGNO_P (REGNO (x
)))
14479 output_operand_lossage ("invalid %%x value");
14482 int reg
= REGNO (x
);
14483 int vsx_reg
= (FP_REGNO_P (reg
)
14485 : reg
- FIRST_ALTIVEC_REGNO
+ 32);
14487 #ifdef TARGET_REGNAMES
14488 if (TARGET_REGNAMES
)
14489 fprintf (file
, "%%vs%d", vsx_reg
);
14492 fprintf (file
, "%d", vsx_reg
);
14498 && (legitimate_indexed_address_p (XEXP (x
, 0), 0)
14499 || (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
14500 && legitimate_indexed_address_p (XEXP (XEXP (x
, 0), 1), 0))))
14505 /* Like 'L', for third word of TImode/PTImode */
14507 fputs (reg_names
[REGNO (x
) + 2], file
);
14508 else if (MEM_P (x
))
14510 machine_mode mode
= GET_MODE (x
);
14511 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14512 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14513 output_address (mode
, plus_constant (Pmode
,
14514 XEXP (XEXP (x
, 0), 0), 8));
14515 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14516 output_address (mode
, plus_constant (Pmode
,
14517 XEXP (XEXP (x
, 0), 0), 8));
14519 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 8), 0));
14520 if (small_data_operand (x
, GET_MODE (x
)))
14521 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14522 reg_names
[SMALL_DATA_REG
]);
14527 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14528 x
= XVECEXP (x
, 0, 1);
14529 /* X is a SYMBOL_REF. Write out the name preceded by a
14530 period and without any trailing data in brackets. Used for function
14531 names. If we are configured for System V (or the embedded ABI) on
14532 the PowerPC, do not emit the period, since those systems do not use
14533 TOCs and the like. */
14534 if (!SYMBOL_REF_P (x
))
14536 output_operand_lossage ("invalid %%z value");
14540 /* For macho, check to see if we need a stub. */
14543 const char *name
= XSTR (x
, 0);
14545 if (darwin_symbol_stubs
14546 && MACHOPIC_INDIRECT
14547 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
14548 name
= machopic_indirection_name (x
, /*stub_p=*/true);
14550 assemble_name (file
, name
);
14552 else if (!DOT_SYMBOLS
)
14553 assemble_name (file
, XSTR (x
, 0));
14555 rs6000_output_function_entry (file
, XSTR (x
, 0));
14559 /* Like 'L', for last word of TImode/PTImode. */
14561 fputs (reg_names
[REGNO (x
) + 3], file
);
14562 else if (MEM_P (x
))
14564 machine_mode mode
= GET_MODE (x
);
14565 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14566 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14567 output_address (mode
, plus_constant (Pmode
,
14568 XEXP (XEXP (x
, 0), 0), 12));
14569 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14570 output_address (mode
, plus_constant (Pmode
,
14571 XEXP (XEXP (x
, 0), 0), 12));
14573 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 12), 0));
14574 if (small_data_operand (x
, GET_MODE (x
)))
14575 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14576 reg_names
[SMALL_DATA_REG
]);
14580 /* Print AltiVec memory operand. */
14585 gcc_assert (MEM_P (x
));
14589 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x
))
14590 && GET_CODE (tmp
) == AND
14591 && CONST_INT_P (XEXP (tmp
, 1))
14592 && INTVAL (XEXP (tmp
, 1)) == -16)
14593 tmp
= XEXP (tmp
, 0);
14594 else if (VECTOR_MEM_VSX_P (GET_MODE (x
))
14595 && GET_CODE (tmp
) == PRE_MODIFY
)
14596 tmp
= XEXP (tmp
, 1);
14598 fprintf (file
, "0,%s", reg_names
[REGNO (tmp
)]);
14601 if (GET_CODE (tmp
) != PLUS
14602 || !REG_P (XEXP (tmp
, 0))
14603 || !REG_P (XEXP (tmp
, 1)))
14605 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14609 if (REGNO (XEXP (tmp
, 0)) == 0)
14610 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 1)) ],
14611 reg_names
[ REGNO (XEXP (tmp
, 0)) ]);
14613 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 0)) ],
14614 reg_names
[ REGNO (XEXP (tmp
, 1)) ]);
14621 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
14622 else if (MEM_P (x
))
14624 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14625 know the width from the mode. */
14626 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
)
14627 fprintf (file
, "%d(%s)", GET_MODE_SIZE (GET_MODE (x
)),
14628 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14629 else if (GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14630 fprintf (file
, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x
)),
14631 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14632 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14633 output_address (GET_MODE (x
), XEXP (XEXP (x
, 0), 1));
14635 output_address (GET_MODE (x
), XEXP (x
, 0));
14637 else if (toc_relative_expr_p (x
, false,
14638 &tocrel_base_oac
, &tocrel_offset_oac
))
14639 /* This hack along with a corresponding hack in
14640 rs6000_output_addr_const_extra arranges to output addends
14641 where the assembler expects to find them. eg.
14642 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14643 without this hack would be output as "x@toc+4". We
14645 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14646 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
14647 output_addr_const (file
, XVECEXP (x
, 0, 0));
14648 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14649 output_addr_const (file
, XVECEXP (x
, 0, 1));
14651 output_addr_const (file
, x
);
14655 if (const char *name
= get_some_local_dynamic_name ())
14656 assemble_name (file
, name
);
14658 output_operand_lossage ("'%%&' used without any "
14659 "local dynamic TLS references");
14663 output_operand_lossage ("invalid %%xn code");
14667 /* Print the address of an operand. */
14670 print_operand_address (FILE *file
, rtx x
)
14673 fprintf (file
, "0(%s)", reg_names
[ REGNO (x
) ]);
14675 /* Is it a PC-relative address? */
14676 else if (TARGET_PCREL
&& pcrel_local_or_external_address (x
, VOIDmode
))
14678 HOST_WIDE_INT offset
;
14680 if (GET_CODE (x
) == CONST
)
14683 if (GET_CODE (x
) == PLUS
)
14685 offset
= INTVAL (XEXP (x
, 1));
14691 output_addr_const (file
, x
);
14694 fprintf (file
, "%+" PRId64
, offset
);
14696 if (SYMBOL_REF_P (x
) && !SYMBOL_REF_LOCAL_P (x
))
14697 fprintf (file
, "@got");
14699 fprintf (file
, "@pcrel");
14701 else if (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
14702 || GET_CODE (x
) == LABEL_REF
)
14704 output_addr_const (file
, x
);
14705 if (small_data_operand (x
, GET_MODE (x
)))
14706 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14707 reg_names
[SMALL_DATA_REG
]);
14710 /* Do not support getting address directly from TOC, emit error.
14711 No more work is needed for !TARGET_TOC. */
14713 output_operand_lossage ("%%a requires an address of memory");
14716 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14717 && REG_P (XEXP (x
, 1)))
14719 if (REGNO (XEXP (x
, 0)) == 0)
14720 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 1)) ],
14721 reg_names
[ REGNO (XEXP (x
, 0)) ]);
14723 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 0)) ],
14724 reg_names
[ REGNO (XEXP (x
, 1)) ]);
14726 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14727 && CONST_INT_P (XEXP (x
, 1)))
14728 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
14729 INTVAL (XEXP (x
, 1)), reg_names
[ REGNO (XEXP (x
, 0)) ]);
14731 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14732 && CONSTANT_P (XEXP (x
, 1)))
14734 fprintf (file
, "lo16(");
14735 output_addr_const (file
, XEXP (x
, 1));
14736 fprintf (file
, ")(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14740 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14741 && CONSTANT_P (XEXP (x
, 1)))
14743 output_addr_const (file
, XEXP (x
, 1));
14744 fprintf (file
, "@l(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14747 else if (toc_relative_expr_p (x
, false, &tocrel_base_oac
, &tocrel_offset_oac
))
14749 /* This hack along with a corresponding hack in
14750 rs6000_output_addr_const_extra arranges to output addends
14751 where the assembler expects to find them. eg.
14753 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14754 without this hack would be output as "x@toc+8@l(9)". We
14755 want "x+8@toc@l(9)". */
14756 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14757 if (GET_CODE (x
) == LO_SUM
)
14758 fprintf (file
, "@l(%s)", reg_names
[REGNO (XEXP (x
, 0))]);
14760 fprintf (file
, "(%s)", reg_names
[REGNO (XVECEXP (tocrel_base_oac
, 0, 1))]);
14763 output_addr_const (file
, x
);
14766 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14769 rs6000_output_addr_const_extra (FILE *file
, rtx x
)
14771 if (GET_CODE (x
) == UNSPEC
)
14772 switch (XINT (x
, 1))
14774 case UNSPEC_TOCREL
:
14775 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x
, 0, 0))
14776 && REG_P (XVECEXP (x
, 0, 1))
14777 && REGNO (XVECEXP (x
, 0, 1)) == TOC_REGISTER
);
14778 output_addr_const (file
, XVECEXP (x
, 0, 0));
14779 if (x
== tocrel_base_oac
&& tocrel_offset_oac
!= const0_rtx
)
14781 if (INTVAL (tocrel_offset_oac
) >= 0)
14782 fprintf (file
, "+");
14783 output_addr_const (file
, CONST_CAST_RTX (tocrel_offset_oac
));
14785 if (!TARGET_AIX
|| (TARGET_ELF
&& TARGET_MINIMAL_TOC
))
14788 assemble_name (file
, toc_label_name
);
14791 else if (TARGET_ELF
)
14792 fputs ("@toc", file
);
14796 case UNSPEC_MACHOPIC_OFFSET
:
14797 output_addr_const (file
, XVECEXP (x
, 0, 0));
14799 machopic_output_function_base_name (file
);
14806 /* Target hook for assembling integer objects. The PowerPC version has
14807 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14808 is defined. It also needs to handle DI-mode objects on 64-bit
14812 rs6000_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
14814 #ifdef RELOCATABLE_NEEDS_FIXUP
14815 /* Special handling for SI values. */
14816 if (RELOCATABLE_NEEDS_FIXUP
&& size
== 4 && aligned_p
)
14818 static int recurse
= 0;
14820 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14821 the .fixup section. Since the TOC section is already relocated, we
14822 don't need to mark it here. We used to skip the text section, but it
14823 should never be valid for relocated addresses to be placed in the text
14825 if (DEFAULT_ABI
== ABI_V4
14826 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
14827 && in_section
!= toc_section
14829 && !CONST_SCALAR_INT_P (x
)
14835 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCP", fixuplabelno
);
14837 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
14838 fprintf (asm_out_file
, "\t.long\t(");
14839 output_addr_const (asm_out_file
, x
);
14840 fprintf (asm_out_file
, ")@fixup\n");
14841 fprintf (asm_out_file
, "\t.section\t\".fixup\",\"aw\"\n");
14842 ASM_OUTPUT_ALIGN (asm_out_file
, 2);
14843 fprintf (asm_out_file
, "\t.long\t");
14844 assemble_name (asm_out_file
, buf
);
14845 fprintf (asm_out_file
, "\n\t.previous\n");
14849 /* Remove initial .'s to turn a -mcall-aixdesc function
14850 address into the address of the descriptor, not the function
14852 else if (SYMBOL_REF_P (x
)
14853 && XSTR (x
, 0)[0] == '.'
14854 && DEFAULT_ABI
== ABI_AIX
)
14856 const char *name
= XSTR (x
, 0);
14857 while (*name
== '.')
14860 fprintf (asm_out_file
, "\t.long\t%s\n", name
);
14864 #endif /* RELOCATABLE_NEEDS_FIXUP */
14865 return default_assemble_integer (x
, size
, aligned_p
);
14868 /* Return a template string for assembly to emit when making an
14869 external call. FUNOP is the call mem argument operand number. */
14871 static const char *
14872 rs6000_call_template_1 (rtx
*operands
, unsigned int funop
, bool sibcall
)
14874 /* -Wformat-overflow workaround, without which gcc thinks that %u
14875 might produce 10 digits. */
14876 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14880 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14882 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14883 sprintf (arg
, "(%%%u@tlsgd)", funop
+ 1);
14884 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14885 sprintf (arg
, "(%%&@tlsld)");
14888 /* The magic 32768 offset here corresponds to the offset of
14889 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14891 sprintf (z
, "%%z%u%s", funop
,
14892 (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
== 2
14895 static char str
[32]; /* 1 spare */
14896 if (rs6000_pcrel_p ())
14897 sprintf (str
, "b%s %s@notoc%s", sibcall
? "" : "l", z
, arg
);
14898 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
14899 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14900 sibcall
? "" : "\n\tnop");
14901 else if (DEFAULT_ABI
== ABI_V4
)
14902 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14903 flag_pic
? "@plt" : "");
14905 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14906 else if (DEFAULT_ABI
== ABI_DARWIN
)
14908 /* The cookie is in operand func+2. */
14909 gcc_checking_assert (GET_CODE (operands
[funop
+ 2]) == CONST_INT
);
14910 int cookie
= INTVAL (operands
[funop
+ 2]);
14911 if (cookie
& CALL_LONG
)
14913 tree funname
= get_identifier (XSTR (operands
[funop
], 0));
14914 tree labelname
= get_prev_label (funname
);
14915 gcc_checking_assert (labelname
&& !sibcall
);
14917 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14918 instruction will reach 'foo', otherwise link as 'bl L42'".
14919 "L42" should be a 'branch island', that will do a far jump to
14920 'foo'. Branch islands are generated in
14921 macho_branch_islands(). */
14922 sprintf (str
, "jbsr %%z%u,%.10s", funop
,
14923 IDENTIFIER_POINTER (labelname
));
14926 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14928 sprintf (str
, "b%s %s%s", sibcall
? "" : "l", z
, arg
);
14932 gcc_unreachable ();
14937 rs6000_call_template (rtx
*operands
, unsigned int funop
)
14939 return rs6000_call_template_1 (operands
, funop
, false);
14943 rs6000_sibcall_template (rtx
*operands
, unsigned int funop
)
14945 return rs6000_call_template_1 (operands
, funop
, true);
14948 /* As above, for indirect calls. */
14950 static const char *
14951 rs6000_indirect_call_template_1 (rtx
*operands
, unsigned int funop
,
14954 /* -Wformat-overflow workaround, without which gcc thinks that %u
14955 might produce 10 digits. Note that -Wformat-overflow will not
14956 currently warn here for str[], so do not rely on a warning to
14957 ensure str[] is correctly sized. */
14958 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14960 /* Currently, funop is either 0 or 1. The maximum string is always
14961 a !speculate 64-bit __tls_get_addr call.
14964 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14965 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14967 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14968 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14975 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14976 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14978 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14979 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14986 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14987 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14989 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14990 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14997 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14998 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
15000 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
15001 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
15005 static char str
[160]; /* 8 spare */
15007 const char *ptrload
= TARGET_64BIT
? "d" : "wz";
15009 if (DEFAULT_ABI
== ABI_AIX
)
15012 ptrload
, funop
+ 3);
15014 /* We don't need the extra code to stop indirect call speculation if
15016 bool speculate
= (TARGET_MACHO
15017 || rs6000_speculate_indirect_jumps
15018 || (REG_P (operands
[funop
])
15019 && REGNO (operands
[funop
]) == LR_REGNO
));
15021 if (TARGET_PLTSEQ
&& GET_CODE (operands
[funop
]) == UNSPEC
)
15023 const char *rel64
= TARGET_64BIT
? "64" : "";
15026 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
15028 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
15029 sprintf (tls
, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
15031 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
15032 sprintf (tls
, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
15036 const char *notoc
= rs6000_pcrel_p () ? "_NOTOC" : "";
15037 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
15038 && flag_pic
== 2 ? "+32768" : "");
15042 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
15043 tls
, rel64
, notoc
, funop
, addend
);
15044 s
+= sprintf (s
, "crset 2\n\t");
15047 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
15048 tls
, rel64
, notoc
, funop
, addend
);
15050 else if (!speculate
)
15051 s
+= sprintf (s
, "crset 2\n\t");
15053 if (rs6000_pcrel_p ())
15056 sprintf (s
, "b%%T%ul", funop
);
15058 sprintf (s
, "beq%%T%ul-", funop
);
15060 else if (DEFAULT_ABI
== ABI_AIX
)
15066 funop
, ptrload
, funop
+ 4);
15071 funop
, ptrload
, funop
+ 4);
15073 else if (DEFAULT_ABI
== ABI_ELFv2
)
15079 funop
, ptrload
, funop
+ 3);
15084 funop
, ptrload
, funop
+ 3);
15091 funop
, sibcall
? "" : "l");
15095 funop
, sibcall
? "" : "l", sibcall
? "\n\tb $" : "");
15101 rs6000_indirect_call_template (rtx
*operands
, unsigned int funop
)
15103 return rs6000_indirect_call_template_1 (operands
, funop
, false);
15107 rs6000_indirect_sibcall_template (rtx
*operands
, unsigned int funop
)
15109 return rs6000_indirect_call_template_1 (operands
, funop
, true);
15113 /* Output indirect call insns. WHICH identifies the type of sequence. */
15115 rs6000_pltseq_template (rtx
*operands
, int which
)
15117 const char *rel64
= TARGET_64BIT
? "64" : "";
15120 if (GET_CODE (operands
[3]) == UNSPEC
)
15122 char off
= which
== RS6000_PLTSEQ_PLT_PCREL34
? '8' : '4';
15123 if (XINT (operands
[3], 1) == UNSPEC_TLSGD
)
15124 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
15126 else if (XINT (operands
[3], 1) == UNSPEC_TLSLD
)
15127 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
15131 gcc_assert (DEFAULT_ABI
== ABI_ELFv2
|| DEFAULT_ABI
== ABI_V4
);
15132 static char str
[96]; /* 10 spare */
15133 char off
= WORDS_BIG_ENDIAN
? '2' : '4';
15134 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
15135 && flag_pic
== 2 ? "+32768" : "");
15138 case RS6000_PLTSEQ_TOCSAVE
:
15141 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
15142 TARGET_64BIT
? "d 2,24(1)" : "w 2,12(1)",
15145 case RS6000_PLTSEQ_PLT16_HA
:
15146 if (DEFAULT_ABI
== ABI_V4
&& !flag_pic
)
15149 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
15153 "addis %%0,%%1,0\n\t"
15154 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
15155 tls
, off
, rel64
, addend
);
15157 case RS6000_PLTSEQ_PLT16_LO
:
15159 "l%s %%0,0(%%1)\n\t"
15160 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
15161 TARGET_64BIT
? "d" : "wz",
15162 tls
, off
, rel64
, TARGET_64BIT
? "_DS" : "", addend
);
15164 case RS6000_PLTSEQ_MTCTR
:
15167 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
15168 tls
, rel64
, addend
);
15170 case RS6000_PLTSEQ_PLT_PCREL34
:
15172 "pl%s %%0,0(0),1\n\t"
15173 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
15174 TARGET_64BIT
? "d" : "wz",
15178 gcc_unreachable ();
15184 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
15185 /* Emit an assembler directive to set symbol visibility for DECL to
15186 VISIBILITY_TYPE. */
15189 rs6000_assemble_visibility (tree decl
, int vis
)
15194 /* Functions need to have their entry point symbol visibility set as
15195 well as their descriptor symbol visibility. */
15196 if (DEFAULT_ABI
== ABI_AIX
15198 && TREE_CODE (decl
) == FUNCTION_DECL
)
15200 static const char * const visibility_types
[] = {
15201 NULL
, "protected", "hidden", "internal"
15204 const char *name
, *type
;
15206 name
= ((* targetm
.strip_name_encoding
)
15207 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
))));
15208 type
= visibility_types
[vis
];
15210 fprintf (asm_out_file
, "\t.%s\t%s\n", type
, name
);
15211 fprintf (asm_out_file
, "\t.%s\t.%s\n", type
, name
);
15214 default_assemble_visibility (decl
, vis
);
15218 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
15219 entry. If RECORD_P is true and the target supports named sections,
15220 the location of the NOPs will be recorded in a special object section
15221 called "__patchable_function_entries". This routine may be called
15222 twice per function to put NOPs before and after the function
15226 rs6000_print_patchable_function_entry (FILE *file
,
15227 unsigned HOST_WIDE_INT patch_area_size
,
15230 bool global_entry_needed_p
= rs6000_global_entry_point_prologue_needed_p ();
15231 /* For a function which needs global entry point, we will emit the
15232 patchable area before and after local entry point under the control of
15233 cfun->machine->global_entry_emitted, see the handling in function
15234 rs6000_output_function_prologue. */
15235 if (!global_entry_needed_p
|| cfun
->machine
->global_entry_emitted
)
15236 default_print_patchable_function_entry (file
, patch_area_size
, record_p
);
15240 rs6000_reverse_condition (machine_mode mode
, enum rtx_code code
)
15242 /* Reversal of FP compares takes care -- an ordered compare
15243 becomes an unordered compare and vice versa. */
15244 if (mode
== CCFPmode
15245 && (!flag_finite_math_only
15246 || code
== UNLT
|| code
== UNLE
|| code
== UNGT
|| code
== UNGE
15247 || code
== UNEQ
|| code
== LTGT
))
15248 return reverse_condition_maybe_unordered (code
);
15250 return reverse_condition (code
);
15253 /* Check if C (as 64bit integer) can be rotated to a constant which constains
15254 nonzero bits at the LOWBITS low bits only.
15256 Return true if C can be rotated to such constant. If so, *ROT is written
15257 to the number by which C is rotated.
15258 Return false otherwise. */
15261 can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c
, int lowbits
, int *rot
)
15263 int clz
= HOST_BITS_PER_WIDE_INT
- lowbits
;
15265 /* case a. 0..0xxx: already at least clz zeros. */
15266 int lz
= clz_hwi (c
);
15273 /* case b. 0..0xxx0..0: at least clz zeros. */
15274 int tz
= ctz_hwi (c
);
15275 if (lz
+ tz
>= clz
)
15277 *rot
= HOST_BITS_PER_WIDE_INT
- tz
;
15281 /* case c. xx10.....0xx: rotate 'clz - 1' bits first, then check case b.
15282 ^bit -> Vbit, , then zeros are at head or tail.
15283 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
15284 const int rot_bits
= lowbits
+ 1;
15285 unsigned HOST_WIDE_INT rc
= (c
>> rot_bits
) | (c
<< (clz
- 1));
15287 if (clz_hwi (rc
) + tz
>= clz
)
15289 *rot
= HOST_BITS_PER_WIDE_INT
- (tz
+ rot_bits
);
15296 /* Check if C (as 64bit integer) can be rotated to a positive 16bits constant
15297 which contains 48bits leading zeros and 16bits of any value. */
15300 can_be_rotated_to_positive_16bits (HOST_WIDE_INT c
)
15303 bool res
= can_be_rotated_to_lowbits (c
, 16, &rot
);
15304 return res
&& rot
> 0;
15307 /* Check if C (as 64bit integer) can be rotated to a negative 15bits constant
15308 which contains 49bits leading ones and 15bits of any value. */
15311 can_be_rotated_to_negative_15bits (HOST_WIDE_INT c
)
15314 bool res
= can_be_rotated_to_lowbits (~c
, 15, &rot
);
15315 return res
&& rot
> 0;
15318 /* Generate a compare for CODE. Return a brand-new rtx that
15319 represents the result of the compare. */
15322 rs6000_generate_compare (rtx cmp
, machine_mode mode
)
15324 machine_mode comp_mode
;
15325 rtx compare_result
;
15326 enum rtx_code code
= GET_CODE (cmp
);
15327 rtx op0
= XEXP (cmp
, 0);
15328 rtx op1
= XEXP (cmp
, 1);
15330 if (!TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
))
15331 comp_mode
= CCmode
;
15332 else if (FLOAT_MODE_P (mode
))
15333 comp_mode
= CCFPmode
;
15334 else if (code
== GTU
|| code
== LTU
15335 || code
== GEU
|| code
== LEU
)
15336 comp_mode
= CCUNSmode
;
15337 else if ((code
== EQ
|| code
== NE
)
15338 && unsigned_reg_p (op0
)
15339 && (unsigned_reg_p (op1
)
15340 || (CONST_INT_P (op1
) && INTVAL (op1
) != 0)))
15341 /* These are unsigned values, perhaps there will be a later
15342 ordering compare that can be shared with this one. */
15343 comp_mode
= CCUNSmode
;
15345 comp_mode
= CCmode
;
15347 /* If we have an unsigned compare, make sure we don't have a signed value as
15349 if (comp_mode
== CCUNSmode
&& CONST_INT_P (op1
)
15350 && INTVAL (op1
) < 0)
15352 op0
= copy_rtx_if_shared (op0
);
15353 op1
= force_reg (GET_MODE (op0
), op1
);
15354 cmp
= gen_rtx_fmt_ee (code
, GET_MODE (cmp
), op0
, op1
);
15357 /* First, the compare. */
15358 compare_result
= gen_reg_rtx (comp_mode
);
15360 /* IEEE 128-bit support in VSX registers when we do not have hardware
15362 if (!TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
))
15364 rtx libfunc
= NULL_RTX
;
15365 bool check_nan
= false;
15372 libfunc
= optab_libfunc (eq_optab
, mode
);
15377 libfunc
= optab_libfunc (ge_optab
, mode
);
15382 libfunc
= optab_libfunc (le_optab
, mode
);
15387 libfunc
= optab_libfunc (unord_optab
, mode
);
15388 code
= (code
== UNORDERED
) ? NE
: EQ
;
15394 libfunc
= optab_libfunc (ge_optab
, mode
);
15395 code
= (code
== UNGE
) ? GE
: GT
;
15401 libfunc
= optab_libfunc (le_optab
, mode
);
15402 code
= (code
== UNLE
) ? LE
: LT
;
15408 libfunc
= optab_libfunc (eq_optab
, mode
);
15409 code
= (code
= UNEQ
) ? EQ
: NE
;
15413 gcc_unreachable ();
15416 gcc_assert (libfunc
);
15419 dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
15420 SImode
, op0
, mode
, op1
, mode
);
15422 /* The library signals an exception for signalling NaNs, so we need to
15423 handle isgreater, etc. by first checking isordered. */
15426 rtx ne_rtx
, normal_dest
, unord_dest
;
15427 rtx unord_func
= optab_libfunc (unord_optab
, mode
);
15428 rtx join_label
= gen_label_rtx ();
15429 rtx join_ref
= gen_rtx_LABEL_REF (VOIDmode
, join_label
);
15430 rtx unord_cmp
= gen_reg_rtx (comp_mode
);
15433 /* Test for either value being a NaN. */
15434 gcc_assert (unord_func
);
15435 unord_dest
= emit_library_call_value (unord_func
, NULL_RTX
, LCT_CONST
,
15436 SImode
, op0
, mode
, op1
, mode
);
15438 /* Set value (0) if either value is a NaN, and jump to the join
15440 dest
= gen_reg_rtx (SImode
);
15441 emit_move_insn (dest
, const1_rtx
);
15442 emit_insn (gen_rtx_SET (unord_cmp
,
15443 gen_rtx_COMPARE (comp_mode
, unord_dest
,
15446 ne_rtx
= gen_rtx_NE (comp_mode
, unord_cmp
, const0_rtx
);
15447 emit_jump_insn (gen_rtx_SET (pc_rtx
,
15448 gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
15452 /* Do the normal comparison, knowing that the values are not
15454 normal_dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
15455 SImode
, op0
, mode
, op1
, mode
);
15457 emit_insn (gen_cstoresi4 (dest
,
15458 gen_rtx_fmt_ee (code
, SImode
, normal_dest
,
15460 normal_dest
, const0_rtx
));
15462 /* Join NaN and non-Nan paths. Compare dest against 0. */
15463 emit_label (join_label
);
15467 emit_insn (gen_rtx_SET (compare_result
,
15468 gen_rtx_COMPARE (comp_mode
, dest
, const0_rtx
)));
15473 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15474 CLOBBERs to match cmptf_internal2 pattern. */
15475 if (comp_mode
== CCFPmode
&& TARGET_XL_COMPAT
15476 && FLOAT128_IBM_P (GET_MODE (op0
))
15477 && TARGET_HARD_FLOAT
)
15478 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
15480 gen_rtx_SET (compare_result
,
15481 gen_rtx_COMPARE (comp_mode
, op0
, op1
)),
15482 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15483 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15484 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15485 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15486 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15487 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15488 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15489 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15490 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (Pmode
)))));
15491 else if (GET_CODE (op1
) == UNSPEC
15492 && XINT (op1
, 1) == UNSPEC_SP_TEST
)
15494 rtx op1b
= XVECEXP (op1
, 0, 0);
15495 comp_mode
= CCEQmode
;
15496 compare_result
= gen_reg_rtx (CCEQmode
);
15498 emit_insn (gen_stack_protect_testdi (compare_result
, op0
, op1b
));
15500 emit_insn (gen_stack_protect_testsi (compare_result
, op0
, op1b
));
15502 else if (mode
== V16QImode
)
15504 gcc_assert (code
== EQ
|| code
== NE
);
15506 rtx result_vector
= gen_reg_rtx (V16QImode
);
15507 rtx cc_bit
= gen_reg_rtx (SImode
);
15508 emit_insn (gen_altivec_vcmpequb_p (result_vector
, op0
, op1
));
15509 emit_insn (gen_cr6_test_for_lt (cc_bit
));
15510 emit_insn (gen_rtx_SET (compare_result
,
15511 gen_rtx_COMPARE (comp_mode
, cc_bit
,
15515 emit_insn (gen_rtx_SET (compare_result
,
15516 gen_rtx_COMPARE (comp_mode
, op0
, op1
)));
15519 validate_condition_mode (code
, GET_MODE (compare_result
));
15521 return gen_rtx_fmt_ee (code
, VOIDmode
, compare_result
, const0_rtx
);
15525 /* Return the diagnostic message string if the binary operation OP is
15526 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15529 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED
,
15533 machine_mode mode1
= TYPE_MODE (type1
);
15534 machine_mode mode2
= TYPE_MODE (type2
);
15536 /* For complex modes, use the inner type. */
15537 if (COMPLEX_MODE_P (mode1
))
15538 mode1
= GET_MODE_INNER (mode1
);
15540 if (COMPLEX_MODE_P (mode2
))
15541 mode2
= GET_MODE_INNER (mode2
);
15543 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15544 double to intermix unless -mfloat128-convert. */
15545 if (mode1
== mode2
)
15548 if (!TARGET_FLOAT128_CVT
)
15550 if ((FLOAT128_IEEE_P (mode1
) && FLOAT128_IBM_P (mode2
))
15551 || (FLOAT128_IBM_P (mode1
) && FLOAT128_IEEE_P (mode2
)))
15552 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15560 /* Expand floating point conversion to/from __float128 and __ibm128. */
15563 rs6000_expand_float128_convert (rtx dest
, rtx src
, bool unsigned_p
)
15565 machine_mode dest_mode
= GET_MODE (dest
);
15566 machine_mode src_mode
= GET_MODE (src
);
15567 convert_optab cvt
= unknown_optab
;
15568 bool do_move
= false;
15569 rtx libfunc
= NULL_RTX
;
15571 typedef rtx (*rtx_2func_t
) (rtx
, rtx
);
15572 rtx_2func_t hw_convert
= (rtx_2func_t
)0;
15576 rtx_2func_t from_df
;
15577 rtx_2func_t from_sf
;
15578 rtx_2func_t from_si_sign
;
15579 rtx_2func_t from_si_uns
;
15580 rtx_2func_t from_di_sign
;
15581 rtx_2func_t from_di_uns
;
15584 rtx_2func_t to_si_sign
;
15585 rtx_2func_t to_si_uns
;
15586 rtx_2func_t to_di_sign
;
15587 rtx_2func_t to_di_uns
;
15588 } hw_conversions
[2] = {
15589 /* convertions to/from KFmode */
15591 gen_extenddfkf2_hw
, /* KFmode <- DFmode. */
15592 gen_extendsfkf2_hw
, /* KFmode <- SFmode. */
15593 gen_float_kfsi2_hw
, /* KFmode <- SImode (signed). */
15594 gen_floatuns_kfsi2_hw
, /* KFmode <- SImode (unsigned). */
15595 gen_float_kfdi2_hw
, /* KFmode <- DImode (signed). */
15596 gen_floatuns_kfdi2_hw
, /* KFmode <- DImode (unsigned). */
15597 gen_trunckfdf2_hw
, /* DFmode <- KFmode. */
15598 gen_trunckfsf2_hw
, /* SFmode <- KFmode. */
15599 gen_fix_kfsi2_hw
, /* SImode <- KFmode (signed). */
15600 gen_fixuns_kfsi2_hw
, /* SImode <- KFmode (unsigned). */
15601 gen_fix_kfdi2_hw
, /* DImode <- KFmode (signed). */
15602 gen_fixuns_kfdi2_hw
, /* DImode <- KFmode (unsigned). */
15605 /* convertions to/from TFmode */
15607 gen_extenddftf2_hw
, /* TFmode <- DFmode. */
15608 gen_extendsftf2_hw
, /* TFmode <- SFmode. */
15609 gen_float_tfsi2_hw
, /* TFmode <- SImode (signed). */
15610 gen_floatuns_tfsi2_hw
, /* TFmode <- SImode (unsigned). */
15611 gen_float_tfdi2_hw
, /* TFmode <- DImode (signed). */
15612 gen_floatuns_tfdi2_hw
, /* TFmode <- DImode (unsigned). */
15613 gen_trunctfdf2_hw
, /* DFmode <- TFmode. */
15614 gen_trunctfsf2_hw
, /* SFmode <- TFmode. */
15615 gen_fix_tfsi2_hw
, /* SImode <- TFmode (signed). */
15616 gen_fixuns_tfsi2_hw
, /* SImode <- TFmode (unsigned). */
15617 gen_fix_tfdi2_hw
, /* DImode <- TFmode (signed). */
15618 gen_fixuns_tfdi2_hw
, /* DImode <- TFmode (unsigned). */
15622 if (dest_mode
== src_mode
)
15623 gcc_unreachable ();
15625 /* Eliminate memory operations. */
15627 src
= force_reg (src_mode
, src
);
15631 rtx tmp
= gen_reg_rtx (dest_mode
);
15632 rs6000_expand_float128_convert (tmp
, src
, unsigned_p
);
15633 rs6000_emit_move (dest
, tmp
, dest_mode
);
15637 /* Convert to IEEE 128-bit floating point. */
15638 if (FLOAT128_IEEE_P (dest_mode
))
15640 if (dest_mode
== KFmode
)
15642 else if (dest_mode
== TFmode
)
15645 gcc_unreachable ();
15651 hw_convert
= hw_conversions
[kf_or_tf
].from_df
;
15656 hw_convert
= hw_conversions
[kf_or_tf
].from_sf
;
15662 if (FLOAT128_IBM_P (src_mode
))
15671 cvt
= ufloat_optab
;
15672 hw_convert
= hw_conversions
[kf_or_tf
].from_si_uns
;
15676 cvt
= sfloat_optab
;
15677 hw_convert
= hw_conversions
[kf_or_tf
].from_si_sign
;
15684 cvt
= ufloat_optab
;
15685 hw_convert
= hw_conversions
[kf_or_tf
].from_di_uns
;
15689 cvt
= sfloat_optab
;
15690 hw_convert
= hw_conversions
[kf_or_tf
].from_di_sign
;
15695 gcc_unreachable ();
15699 /* Convert from IEEE 128-bit floating point. */
15700 else if (FLOAT128_IEEE_P (src_mode
))
15702 if (src_mode
== KFmode
)
15704 else if (src_mode
== TFmode
)
15707 gcc_unreachable ();
15713 hw_convert
= hw_conversions
[kf_or_tf
].to_df
;
15718 hw_convert
= hw_conversions
[kf_or_tf
].to_sf
;
15724 if (FLOAT128_IBM_P (dest_mode
))
15734 hw_convert
= hw_conversions
[kf_or_tf
].to_si_uns
;
15739 hw_convert
= hw_conversions
[kf_or_tf
].to_si_sign
;
15747 hw_convert
= hw_conversions
[kf_or_tf
].to_di_uns
;
15752 hw_convert
= hw_conversions
[kf_or_tf
].to_di_sign
;
15757 gcc_unreachable ();
15761 /* Both IBM format. */
15762 else if (FLOAT128_IBM_P (dest_mode
) && FLOAT128_IBM_P (src_mode
))
15766 gcc_unreachable ();
15768 /* Handle conversion between TFmode/KFmode/IFmode. */
15770 emit_insn (gen_rtx_SET (dest
, gen_rtx_FLOAT_EXTEND (dest_mode
, src
)));
15772 /* Handle conversion if we have hardware support. */
15773 else if (TARGET_FLOAT128_HW
&& hw_convert
)
15774 emit_insn ((hw_convert
) (dest
, src
));
15776 /* Call an external function to do the conversion. */
15777 else if (cvt
!= unknown_optab
)
15779 libfunc
= convert_optab_libfunc (cvt
, dest_mode
, src_mode
);
15780 gcc_assert (libfunc
!= NULL_RTX
);
15782 dest2
= emit_library_call_value (libfunc
, dest
, LCT_CONST
, dest_mode
,
15785 gcc_assert (dest2
!= NULL_RTX
);
15786 if (!rtx_equal_p (dest
, dest2
))
15787 emit_move_insn (dest
, dest2
);
15791 gcc_unreachable ();
15797 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15798 can be used as that dest register. Return the dest register. */
15801 rs6000_emit_eqne (machine_mode mode
, rtx op1
, rtx op2
, rtx scratch
)
15803 if (op2
== const0_rtx
)
15806 if (GET_CODE (scratch
) == SCRATCH
)
15807 scratch
= gen_reg_rtx (mode
);
15809 if (logical_operand (op2
, mode
))
15810 emit_insn (gen_rtx_SET (scratch
, gen_rtx_XOR (mode
, op1
, op2
)));
15812 emit_insn (gen_rtx_SET (scratch
,
15813 gen_rtx_PLUS (mode
, op1
, negate_rtx (mode
, op2
))));
15818 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15819 requires this. The result is mode MODE. */
15821 rs6000_emit_fp_cror (rtx_code code
, machine_mode mode
, rtx x
)
15825 if (code
== LTGT
|| code
== LE
|| code
== UNLT
)
15826 cond
[n
++] = gen_rtx_fmt_ee (LT
, mode
, x
, const0_rtx
);
15827 if (code
== LTGT
|| code
== GE
|| code
== UNGT
)
15828 cond
[n
++] = gen_rtx_fmt_ee (GT
, mode
, x
, const0_rtx
);
15829 if (code
== LE
|| code
== GE
|| code
== UNEQ
)
15830 cond
[n
++] = gen_rtx_fmt_ee (EQ
, mode
, x
, const0_rtx
);
15831 if (code
== UNLT
|| code
== UNGT
|| code
== UNEQ
)
15832 cond
[n
++] = gen_rtx_fmt_ee (UNORDERED
, mode
, x
, const0_rtx
);
15834 gcc_assert (n
== 2);
15836 rtx cc
= gen_reg_rtx (CCEQmode
);
15837 rtx logical
= gen_rtx_IOR (mode
, cond
[0], cond
[1]);
15838 emit_insn (gen_cceq_ior_compare (mode
, cc
, logical
, cond
[0], x
, cond
[1], x
));
15844 rs6000_emit_sCOND (machine_mode mode
, rtx operands
[])
15846 rtx condition_rtx
= rs6000_generate_compare (operands
[1], mode
);
15847 rtx_code cond_code
= GET_CODE (condition_rtx
);
15849 if (FLOAT_MODE_P (mode
) && HONOR_NANS (mode
)
15850 && !(FLOAT128_VECTOR_P (mode
) && !TARGET_FLOAT128_HW
))
15852 else if (cond_code
== NE
15853 || cond_code
== GE
|| cond_code
== LE
15854 || cond_code
== GEU
|| cond_code
== LEU
15855 || cond_code
== ORDERED
|| cond_code
== UNGE
|| cond_code
== UNLE
)
15857 rtx not_result
= gen_reg_rtx (CCEQmode
);
15858 rtx not_op
, rev_cond_rtx
;
15859 machine_mode cc_mode
;
15861 cc_mode
= GET_MODE (XEXP (condition_rtx
, 0));
15863 rev_cond_rtx
= gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode
, cond_code
),
15864 SImode
, XEXP (condition_rtx
, 0), const0_rtx
);
15865 not_op
= gen_rtx_COMPARE (CCEQmode
, rev_cond_rtx
, const0_rtx
);
15866 emit_insn (gen_rtx_SET (not_result
, not_op
));
15867 condition_rtx
= gen_rtx_EQ (VOIDmode
, not_result
, const0_rtx
);
15870 machine_mode op_mode
= GET_MODE (XEXP (operands
[1], 0));
15871 if (op_mode
== VOIDmode
)
15872 op_mode
= GET_MODE (XEXP (operands
[1], 1));
15874 if (TARGET_POWERPC64
&& (op_mode
== DImode
|| FLOAT_MODE_P (mode
)))
15876 PUT_MODE (condition_rtx
, DImode
);
15877 convert_move (operands
[0], condition_rtx
, 0);
15881 PUT_MODE (condition_rtx
, SImode
);
15882 emit_insn (gen_rtx_SET (operands
[0], condition_rtx
));
15886 /* Emit a branch of kind CODE to location LOC. */
15889 rs6000_emit_cbranch (machine_mode mode
, rtx operands
[])
15891 rtx condition_rtx
= rs6000_generate_compare (operands
[0], mode
);
15892 rtx loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
15893 rtx ite
= gen_rtx_IF_THEN_ELSE (VOIDmode
, condition_rtx
, loc_ref
, pc_rtx
);
15894 emit_jump_insn (gen_rtx_SET (pc_rtx
, ite
));
15897 /* Return the string to output a conditional branch to LABEL, which is
15898 the operand template of the label, or NULL if the branch is really a
15899 conditional return.
15901 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15902 condition code register and its mode specifies what kind of
15903 comparison we made.
15905 REVERSED is nonzero if we should reverse the sense of the comparison.
15907 INSN is the insn. */
15910 output_cbranch (rtx op
, const char *label
, int reversed
, rtx_insn
*insn
)
15912 static char string
[64];
15913 enum rtx_code code
= GET_CODE (op
);
15914 rtx cc_reg
= XEXP (op
, 0);
15915 machine_mode mode
= GET_MODE (cc_reg
);
15916 int cc_regno
= REGNO (cc_reg
) - CR0_REGNO
;
15917 int need_longbranch
= label
!= NULL
&& get_attr_length (insn
) == 8;
15918 int really_reversed
= reversed
^ need_longbranch
;
15924 validate_condition_mode (code
, mode
);
15926 /* Work out which way this really branches. We could use
15927 reverse_condition_maybe_unordered here always but this
15928 makes the resulting assembler clearer. */
15929 if (really_reversed
)
15931 /* Reversal of FP compares takes care -- an ordered compare
15932 becomes an unordered compare and vice versa. */
15933 if (mode
== CCFPmode
)
15934 code
= reverse_condition_maybe_unordered (code
);
15936 code
= reverse_condition (code
);
15941 /* Not all of these are actually distinct opcodes, but
15942 we distinguish them for clarity of the resulting assembler. */
15943 case NE
: case LTGT
:
15944 ccode
= "ne"; break;
15945 case EQ
: case UNEQ
:
15946 ccode
= "eq"; break;
15948 ccode
= "ge"; break;
15949 case GT
: case GTU
: case UNGT
:
15950 ccode
= "gt"; break;
15952 ccode
= "le"; break;
15953 case LT
: case LTU
: case UNLT
:
15954 ccode
= "lt"; break;
15955 case UNORDERED
: ccode
= "un"; break;
15956 case ORDERED
: ccode
= "nu"; break;
15957 case UNGE
: ccode
= "nl"; break;
15958 case UNLE
: ccode
= "ng"; break;
15960 gcc_unreachable ();
15963 /* Maybe we have a guess as to how likely the branch is. */
15965 note
= find_reg_note (insn
, REG_BR_PROB
, NULL_RTX
);
15966 if (note
!= NULL_RTX
)
15968 /* PROB is the difference from 50%. */
15969 int prob
= profile_probability::from_reg_br_prob_note (XINT (note
, 0))
15970 .to_reg_br_prob_base () - REG_BR_PROB_BASE
/ 2;
15972 /* Only hint for highly probable/improbable branches on newer cpus when
15973 we have real profile data, as static prediction overrides processor
15974 dynamic prediction. For older cpus we may as well always hint, but
15975 assume not taken for branches that are very close to 50% as a
15976 mispredicted taken branch is more expensive than a
15977 mispredicted not-taken branch. */
15978 if (rs6000_always_hint
15979 || (abs (prob
) > REG_BR_PROB_BASE
/ 100 * 48
15980 && (profile_status_for_fn (cfun
) != PROFILE_GUESSED
)
15981 && br_prob_note_reliable_p (note
)))
15983 if (abs (prob
) > REG_BR_PROB_BASE
/ 20
15984 && ((prob
> 0) ^ need_longbranch
))
15992 s
+= sprintf (s
, "b%slr%s ", ccode
, pred
);
15994 s
+= sprintf (s
, "b%s%s ", ccode
, pred
);
15996 /* We need to escape any '%' characters in the reg_names string.
15997 Assume they'd only be the first character.... */
15998 if (reg_names
[cc_regno
+ CR0_REGNO
][0] == '%')
16000 s
+= sprintf (s
, "%s", reg_names
[cc_regno
+ CR0_REGNO
]);
16004 /* If the branch distance was too far, we may have to use an
16005 unconditional branch to go the distance. */
16006 if (need_longbranch
)
16007 s
+= sprintf (s
, ",$+8\n\tb %s", label
);
16009 s
+= sprintf (s
, ",%s", label
);
16015 /* Return insn for VSX or Altivec comparisons. */
16018 rs6000_emit_vector_compare_inner (enum rtx_code code
, rtx op0
, rtx op1
)
16021 machine_mode mode
= GET_MODE (op0
);
16029 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16040 mask
= gen_reg_rtx (mode
);
16041 emit_insn (gen_rtx_SET (mask
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
16048 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
16049 DMODE is expected destination mode. This is a recursive function. */
16052 rs6000_emit_vector_compare (enum rtx_code rcode
,
16054 machine_mode dmode
)
16057 bool swap_operands
= false;
16058 bool try_again
= false;
16060 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode
));
16061 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
16063 /* See if the comparison works as is. */
16064 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
16072 swap_operands
= true;
16077 swap_operands
= true;
16085 /* Invert condition and try again.
16086 e.g., A != B becomes ~(A==B). */
16088 enum rtx_code rev_code
;
16089 enum insn_code nor_code
;
16092 rev_code
= reverse_condition_maybe_unordered (rcode
);
16093 if (rev_code
== UNKNOWN
)
16096 nor_code
= optab_handler (one_cmpl_optab
, dmode
);
16097 if (nor_code
== CODE_FOR_nothing
)
16100 mask2
= rs6000_emit_vector_compare (rev_code
, op0
, op1
, dmode
);
16104 mask
= gen_reg_rtx (dmode
);
16105 emit_insn (GEN_FCN (nor_code
) (mask
, mask2
));
16113 /* Try GT/GTU/LT/LTU OR EQ */
16116 enum insn_code ior_code
;
16117 enum rtx_code new_code
;
16138 gcc_unreachable ();
16141 ior_code
= optab_handler (ior_optab
, dmode
);
16142 if (ior_code
== CODE_FOR_nothing
)
16145 c_rtx
= rs6000_emit_vector_compare (new_code
, op0
, op1
, dmode
);
16149 eq_rtx
= rs6000_emit_vector_compare (EQ
, op0
, op1
, dmode
);
16153 mask
= gen_reg_rtx (dmode
);
16154 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
16165 std::swap (op0
, op1
);
16167 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
16172 /* You only get two chances. */
16176 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
16177 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
16178 operands for the relation operation COND. */
16181 rs6000_emit_vector_cond_expr (rtx dest
, rtx op_true
, rtx op_false
,
16182 rtx cond
, rtx cc_op0
, rtx cc_op1
)
16184 machine_mode dest_mode
= GET_MODE (dest
);
16185 machine_mode mask_mode
= GET_MODE (cc_op0
);
16186 enum rtx_code rcode
= GET_CODE (cond
);
16188 bool invert_move
= false;
16190 if (VECTOR_UNIT_NONE_P (dest_mode
))
16193 gcc_assert (GET_MODE_SIZE (dest_mode
) == GET_MODE_SIZE (mask_mode
)
16194 && GET_MODE_NUNITS (dest_mode
) == GET_MODE_NUNITS (mask_mode
));
16198 /* Swap operands if we can, and fall back to doing the operation as
16199 specified, and doing a NOR to invert the test. */
16205 /* Invert condition and try again.
16206 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
16207 invert_move
= true;
16208 rcode
= reverse_condition_maybe_unordered (rcode
);
16209 if (rcode
== UNKNOWN
)
16215 if (GET_MODE_CLASS (mask_mode
) == MODE_VECTOR_INT
)
16217 /* Invert condition to avoid compound test. */
16218 invert_move
= true;
16219 rcode
= reverse_condition (rcode
);
16228 /* Invert condition to avoid compound test if necessary. */
16229 if (rcode
== GEU
|| rcode
== LEU
)
16231 invert_move
= true;
16232 rcode
= reverse_condition (rcode
);
16240 /* Get the vector mask for the given relational operations. */
16241 mask
= rs6000_emit_vector_compare (rcode
, cc_op0
, cc_op1
, mask_mode
);
16246 if (mask_mode
!= dest_mode
)
16247 mask
= simplify_gen_subreg (dest_mode
, mask
, mask_mode
, 0);
16250 std::swap (op_true
, op_false
);
16252 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
16253 if (GET_MODE_CLASS (dest_mode
) == MODE_VECTOR_INT
16254 && (GET_CODE (op_true
) == CONST_VECTOR
16255 || GET_CODE (op_false
) == CONST_VECTOR
))
16257 rtx constant_0
= CONST0_RTX (dest_mode
);
16258 rtx constant_m1
= CONSTM1_RTX (dest_mode
);
16260 if (op_true
== constant_m1
&& op_false
== constant_0
)
16262 emit_move_insn (dest
, mask
);
16266 else if (op_true
== constant_0
&& op_false
== constant_m1
)
16268 emit_insn (gen_rtx_SET (dest
, gen_rtx_NOT (dest_mode
, mask
)));
16272 /* If we can't use the vector comparison directly, perhaps we can use
16273 the mask for the true or false fields, instead of loading up a
16275 if (op_true
== constant_m1
)
16278 if (op_false
== constant_0
)
16282 if (!REG_P (op_true
) && !SUBREG_P (op_true
))
16283 op_true
= force_reg (dest_mode
, op_true
);
16285 if (!REG_P (op_false
) && !SUBREG_P (op_false
))
16286 op_false
= force_reg (dest_mode
, op_false
);
16288 rtx tmp
= gen_rtx_IOR (dest_mode
,
16289 gen_rtx_AND (dest_mode
, gen_rtx_NOT (dest_mode
, mask
),
16291 gen_rtx_AND (dest_mode
, mask
, op_true
));
16292 emit_insn (gen_rtx_SET (dest
, tmp
));
16296 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
16297 maximum or minimum with "C" semantics.
16299 Unless you use -ffast-math, you can't use these instructions to replace
16300 conditions that implicitly reverse the condition because the comparison
16301 might generate a NaN or signed zer0.
16303 I.e. the following can be replaced all of the time
16304 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
16305 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
16306 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
16307 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
16309 The following can be replaced only if -ffast-math is used:
16310 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
16311 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
16312 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
16313 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
16315 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16316 nonzero/true, FALSE_COND if it is zero/false.
16318 Return false if we can't generate the appropriate minimum or maximum, and
16319 true if we can did the minimum or maximum. */
16322 rs6000_maybe_emit_maxc_minc (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16324 enum rtx_code code
= GET_CODE (op
);
16325 rtx op0
= XEXP (op
, 0);
16326 rtx op1
= XEXP (op
, 1);
16327 machine_mode compare_mode
= GET_MODE (op0
);
16328 machine_mode result_mode
= GET_MODE (dest
);
16330 if (result_mode
!= compare_mode
)
16333 /* See the comments of this function, it simply expects GE/GT/LE/LT in
16334 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
16335 we need to do the reversions first to make the following checks
16336 support fewer cases, like:
16338 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
16339 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
16340 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
16341 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
16343 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
16344 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
16345 have to check for fast-math or the like. */
16346 if (code
== UNGE
|| code
== UNGT
|| code
== UNLE
|| code
== UNLT
)
16348 code
= reverse_condition_maybe_unordered (code
);
16349 std::swap (true_cond
, false_cond
);
16353 if (code
== GE
|| code
== GT
)
16355 else if (code
== LE
|| code
== LT
)
16360 if (rtx_equal_p (op0
, true_cond
) && rtx_equal_p (op1
, false_cond
))
16363 /* Only when NaNs and signed-zeros are not in effect, smax could be
16364 used for `op0 < op1 ? op1 : op0`, and smin could be used for
16365 `op0 > op1 ? op1 : op0`. */
16366 else if (rtx_equal_p (op1
, true_cond
) && rtx_equal_p (op0
, false_cond
)
16367 && !HONOR_NANS (compare_mode
) && !HONOR_SIGNED_ZEROS (compare_mode
))
16373 rs6000_emit_minmax (dest
, max_p
? SMAX
: SMIN
, op0
, op1
);
16377 /* Possibly emit a floating point conditional move by generating a compare that
16378 sets a mask instruction and a XXSEL select instruction.
16380 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16381 nonzero/true, FALSE_COND if it is zero/false.
16383 Return false if the operation cannot be generated, and true if we could
16384 generate the instruction. */
16387 rs6000_maybe_emit_fp_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16389 enum rtx_code code
= GET_CODE (op
);
16390 rtx op0
= XEXP (op
, 0);
16391 rtx op1
= XEXP (op
, 1);
16392 machine_mode compare_mode
= GET_MODE (op0
);
16393 machine_mode result_mode
= GET_MODE (dest
);
16398 if (!can_create_pseudo_p ())
16401 /* We allow the comparison to be either SFmode/DFmode and the true/false
16402 condition to be either SFmode/DFmode. I.e. we allow:
16407 r = (a == b) ? c : d;
16414 r = (a == b) ? c : d;
16416 but we don't allow intermixing the IEEE 128-bit floating point types with
16417 the 32/64-bit scalar types. */
16419 if (!(compare_mode
== result_mode
16420 || (compare_mode
== SFmode
&& result_mode
== DFmode
)
16421 || (compare_mode
== DFmode
&& result_mode
== SFmode
)))
16434 code
= swap_condition (code
);
16435 std::swap (op0
, op1
);
16442 /* Generate: [(parallel [(set (dest)
16443 (if_then_else (op (cmp1) (cmp2))
16446 (clobber (scratch))])]. */
16448 compare_rtx
= gen_rtx_fmt_ee (code
, CCFPmode
, op0
, op1
);
16449 cmove_rtx
= gen_rtx_SET (dest
,
16450 gen_rtx_IF_THEN_ELSE (result_mode
,
16455 clobber_rtx
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (V2DImode
));
16456 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
16457 gen_rtvec (2, cmove_rtx
, clobber_rtx
)));
16462 /* Helper function to return true if the target has instructions to do a
16463 compare and set mask instruction that can be used with XXSEL to implement a
16464 conditional move. It is also assumed that such a target also supports the
16465 "C" minimum and maximum instructions. */
16468 have_compare_and_set_mask (machine_mode mode
)
16474 return TARGET_P9_MINMAX
;
16478 return TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
);
16487 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16488 operands of the last comparison is nonzero/true, FALSE_COND if it
16489 is zero/false. Return 0 if the hardware has no such operation. */
16492 rs6000_emit_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16494 enum rtx_code code
= GET_CODE (op
);
16495 rtx op0
= XEXP (op
, 0);
16496 rtx op1
= XEXP (op
, 1);
16497 machine_mode compare_mode
= GET_MODE (op0
);
16498 machine_mode result_mode
= GET_MODE (dest
);
16500 bool is_against_zero
;
16502 /* These modes should always match. */
16503 if (GET_MODE (op1
) != compare_mode
16504 /* In the isel case however, we can use a compare immediate, so
16505 op1 may be a small constant. */
16506 && (!TARGET_ISEL
|| !short_cint_operand (op1
, VOIDmode
)))
16508 if (GET_MODE (true_cond
) != result_mode
)
16510 if (GET_MODE (false_cond
) != result_mode
)
16513 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16515 if (have_compare_and_set_mask (compare_mode
)
16516 && have_compare_and_set_mask (result_mode
))
16518 if (rs6000_maybe_emit_maxc_minc (dest
, op
, true_cond
, false_cond
))
16521 if (rs6000_maybe_emit_fp_cmove (dest
, op
, true_cond
, false_cond
))
16525 /* Don't allow using floating point comparisons for integer results for
16527 if (FLOAT_MODE_P (compare_mode
) && !FLOAT_MODE_P (result_mode
))
16530 /* First, work out if the hardware can do this at all, or
16531 if it's too slow.... */
16532 if (!FLOAT_MODE_P (compare_mode
))
16535 return rs6000_emit_int_cmove (dest
, op
, true_cond
, false_cond
);
16539 is_against_zero
= op1
== CONST0_RTX (compare_mode
);
16541 /* A floating-point subtract might overflow, underflow, or produce
16542 an inexact result, thus changing the floating-point flags, so it
16543 can't be generated if we care about that. It's safe if one side
16544 of the construct is zero, since then no subtract will be
16546 if (SCALAR_FLOAT_MODE_P (compare_mode
)
16547 && flag_trapping_math
&& ! is_against_zero
)
16550 /* Eliminate half of the comparisons by switching operands, this
16551 makes the remaining code simpler. */
16552 if (code
== UNLT
|| code
== UNGT
|| code
== UNORDERED
|| code
== NE
16553 || code
== LTGT
|| code
== LT
|| code
== UNLE
)
16555 code
= reverse_condition_maybe_unordered (code
);
16557 true_cond
= false_cond
;
16561 /* UNEQ and LTGT take four instructions for a comparison with zero,
16562 it'll probably be faster to use a branch here too. */
16563 if (code
== UNEQ
&& HONOR_NANS (compare_mode
))
16566 /* We're going to try to implement comparisons by performing
16567 a subtract, then comparing against zero. Unfortunately,
16568 Inf - Inf is NaN which is not zero, and so if we don't
16569 know that the operand is finite and the comparison
16570 would treat EQ different to UNORDERED, we can't do it. */
16571 if (HONOR_INFINITIES (compare_mode
)
16572 && code
!= GT
&& code
!= UNGE
16573 && (!CONST_DOUBLE_P (op1
)
16574 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1
)))
16575 /* Constructs of the form (a OP b ? a : b) are safe. */
16576 && ((! rtx_equal_p (op0
, false_cond
) && ! rtx_equal_p (op1
, false_cond
))
16577 || (! rtx_equal_p (op0
, true_cond
)
16578 && ! rtx_equal_p (op1
, true_cond
))))
16581 /* At this point we know we can use fsel. */
16583 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16584 is no fsel instruction. */
16585 if (compare_mode
!= SFmode
&& compare_mode
!= DFmode
)
16588 /* Reduce the comparison to a comparison against zero. */
16589 if (! is_against_zero
)
16591 temp
= gen_reg_rtx (compare_mode
);
16592 emit_insn (gen_rtx_SET (temp
, gen_rtx_MINUS (compare_mode
, op0
, op1
)));
16594 op1
= CONST0_RTX (compare_mode
);
16597 /* If we don't care about NaNs we can reduce some of the comparisons
16598 down to faster ones. */
16599 if (! HONOR_NANS (compare_mode
))
16605 true_cond
= false_cond
;
16618 /* Now, reduce everything down to a GE. */
16625 temp
= gen_reg_rtx (compare_mode
);
16626 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16631 temp
= gen_reg_rtx (compare_mode
);
16632 emit_insn (gen_rtx_SET (temp
, gen_rtx_ABS (compare_mode
, op0
)));
16637 temp
= gen_reg_rtx (compare_mode
);
16638 emit_insn (gen_rtx_SET (temp
,
16639 gen_rtx_NEG (compare_mode
,
16640 gen_rtx_ABS (compare_mode
, op0
))));
16645 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16646 temp
= gen_reg_rtx (result_mode
);
16647 emit_insn (gen_rtx_SET (temp
,
16648 gen_rtx_IF_THEN_ELSE (result_mode
,
16649 gen_rtx_GE (VOIDmode
,
16651 true_cond
, false_cond
)));
16652 false_cond
= true_cond
;
16655 temp
= gen_reg_rtx (compare_mode
);
16656 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16661 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16662 temp
= gen_reg_rtx (result_mode
);
16663 emit_insn (gen_rtx_SET (temp
,
16664 gen_rtx_IF_THEN_ELSE (result_mode
,
16665 gen_rtx_GE (VOIDmode
,
16667 true_cond
, false_cond
)));
16668 true_cond
= false_cond
;
16671 temp
= gen_reg_rtx (compare_mode
);
16672 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16677 gcc_unreachable ();
16680 emit_insn (gen_rtx_SET (dest
,
16681 gen_rtx_IF_THEN_ELSE (result_mode
,
16682 gen_rtx_GE (VOIDmode
,
16684 true_cond
, false_cond
)));
16688 /* Same as above, but for ints (isel). */
16691 rs6000_emit_int_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16693 rtx condition_rtx
, cr
;
16694 machine_mode mode
= GET_MODE (dest
);
16695 enum rtx_code cond_code
;
16696 rtx (*isel_func
) (rtx
, rtx
, rtx
, rtx
, rtx
);
16699 if (mode
!= SImode
&& (!TARGET_POWERPC64
|| mode
!= DImode
))
16702 /* PR104335: We now need to expect CC-mode "comparisons"
16703 coming from ifcvt. The following code expects proper
16704 comparisons so better abort here. */
16705 if (GET_MODE_CLASS (GET_MODE (XEXP (op
, 0))) == MODE_CC
)
16708 /* We still have to do the compare, because isel doesn't do a
16709 compare, it just looks at the CRx bits set by a previous compare
16711 condition_rtx
= rs6000_generate_compare (op
, mode
);
16712 cond_code
= GET_CODE (condition_rtx
);
16713 cr
= XEXP (condition_rtx
, 0);
16714 signedp
= GET_MODE (cr
) == CCmode
;
16716 isel_func
= (mode
== SImode
16717 ? (signedp
? gen_isel_cc_si
: gen_isel_ccuns_si
)
16718 : (signedp
? gen_isel_cc_di
: gen_isel_ccuns_di
));
16722 case LT
: case GT
: case LTU
: case GTU
: case EQ
:
16723 /* isel handles these directly. */
16727 /* We need to swap the sense of the comparison. */
16729 std::swap (false_cond
, true_cond
);
16730 PUT_CODE (condition_rtx
, reverse_condition (cond_code
));
16735 false_cond
= force_reg (mode
, false_cond
);
16736 if (true_cond
!= const0_rtx
)
16737 true_cond
= force_reg (mode
, true_cond
);
16739 emit_insn (isel_func (dest
, condition_rtx
, true_cond
, false_cond
, cr
));
16745 rs6000_emit_minmax (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
16747 machine_mode mode
= GET_MODE (op0
);
16751 /* VSX/altivec have direct min/max insns. */
16752 if ((code
== SMAX
|| code
== SMIN
)
16753 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode
)
16754 || (mode
== SFmode
&& VECTOR_UNIT_VSX_P (DFmode
))
16755 || (TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
))))
16757 emit_insn (gen_rtx_SET (dest
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
16761 if (code
== SMAX
|| code
== SMIN
)
16766 if (code
== SMAX
|| code
== UMAX
)
16767 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16768 op0
, op1
, mode
, 0);
16770 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16771 op1
, op0
, mode
, 0);
16772 gcc_assert (target
);
16773 if (target
!= dest
)
16774 emit_move_insn (dest
, target
);
16777 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16778 COND is true. Mark the jump as unlikely to be taken. */
16781 emit_unlikely_jump (rtx cond
, rtx label
)
16783 rtx x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, label
, pc_rtx
);
16784 rtx_insn
*insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, x
));
16785 add_reg_br_prob_note (insn
, profile_probability::very_unlikely ());
16788 /* A subroutine of the atomic operation splitters. Emit a load-locked
16789 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16790 the zero_extend operation. */
16793 emit_load_locked (machine_mode mode
, rtx reg
, rtx mem
)
16795 rtx (*fn
) (rtx
, rtx
) = NULL
;
16800 fn
= gen_load_lockedqi
;
16803 fn
= gen_load_lockedhi
;
16806 if (GET_MODE (mem
) == QImode
)
16807 fn
= gen_load_lockedqi_si
;
16808 else if (GET_MODE (mem
) == HImode
)
16809 fn
= gen_load_lockedhi_si
;
16811 fn
= gen_load_lockedsi
;
16814 fn
= gen_load_lockeddi
;
16817 fn
= gen_load_lockedti
;
16820 gcc_unreachable ();
16822 emit_insn (fn (reg
, mem
));
16825 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16826 instruction in MODE. */
16829 emit_store_conditional (machine_mode mode
, rtx res
, rtx mem
, rtx val
)
16831 rtx (*fn
) (rtx
, rtx
, rtx
) = NULL
;
16836 fn
= gen_store_conditionalqi
;
16839 fn
= gen_store_conditionalhi
;
16842 fn
= gen_store_conditionalsi
;
16845 fn
= gen_store_conditionaldi
;
16848 fn
= gen_store_conditionalti
;
16851 gcc_unreachable ();
16854 /* Emit sync before stwcx. to address PPC405 Erratum. */
16855 if (PPC405_ERRATUM77
)
16856 emit_insn (gen_hwsync ());
16858 emit_insn (fn (res
, mem
, val
));
16861 /* Expand barriers before and after a load_locked/store_cond sequence. */
16864 rs6000_pre_atomic_barrier (rtx mem
, enum memmodel model
)
16866 rtx addr
= XEXP (mem
, 0);
16868 if (!legitimate_indirect_address_p (addr
, reload_completed
)
16869 && !legitimate_indexed_address_p (addr
, reload_completed
))
16871 addr
= force_reg (Pmode
, addr
);
16872 mem
= replace_equiv_address_nv (mem
, addr
);
16877 case MEMMODEL_RELAXED
:
16878 case MEMMODEL_CONSUME
:
16879 case MEMMODEL_ACQUIRE
:
16881 case MEMMODEL_RELEASE
:
16882 case MEMMODEL_ACQ_REL
:
16883 emit_insn (gen_lwsync ());
16885 case MEMMODEL_SEQ_CST
:
16886 emit_insn (gen_hwsync ());
16889 gcc_unreachable ();
16895 rs6000_post_atomic_barrier (enum memmodel model
)
16899 case MEMMODEL_RELAXED
:
16900 case MEMMODEL_CONSUME
:
16901 case MEMMODEL_RELEASE
:
16903 case MEMMODEL_ACQUIRE
:
16904 case MEMMODEL_ACQ_REL
:
16905 case MEMMODEL_SEQ_CST
:
16906 emit_insn (gen_isync ());
16909 gcc_unreachable ();
16913 /* A subroutine of the various atomic expanders. For sub-word operations,
16914 we must adjust things to operate on SImode. Given the original MEM,
16915 return a new aligned memory. Also build and return the quantities by
16916 which to shift and mask. */
16919 rs6000_adjust_atomic_subword (rtx orig_mem
, rtx
*pshift
, rtx
*pmask
)
16921 rtx addr
, align
, shift
, mask
, mem
;
16922 HOST_WIDE_INT shift_mask
;
16923 machine_mode mode
= GET_MODE (orig_mem
);
16925 /* For smaller modes, we have to implement this via SImode. */
16926 shift_mask
= (mode
== QImode
? 0x18 : 0x10);
16928 addr
= XEXP (orig_mem
, 0);
16929 addr
= force_reg (GET_MODE (addr
), addr
);
16931 /* Aligned memory containing subword. Generate a new memory. We
16932 do not want any of the existing MEM_ATTR data, as we're now
16933 accessing memory outside the original object. */
16934 align
= expand_simple_binop (Pmode
, AND
, addr
, GEN_INT (-4),
16935 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16936 mem
= gen_rtx_MEM (SImode
, align
);
16937 MEM_VOLATILE_P (mem
) = MEM_VOLATILE_P (orig_mem
);
16938 if (MEM_ALIAS_SET (orig_mem
) == ALIAS_SET_MEMORY_BARRIER
)
16939 set_mem_alias_set (mem
, ALIAS_SET_MEMORY_BARRIER
);
16941 /* Shift amount for subword relative to aligned word. */
16942 shift
= gen_reg_rtx (SImode
);
16943 addr
= gen_lowpart (SImode
, addr
);
16944 rtx tmp
= gen_reg_rtx (SImode
);
16945 emit_insn (gen_ashlsi3 (tmp
, addr
, GEN_INT (3)));
16946 emit_insn (gen_andsi3 (shift
, tmp
, GEN_INT (shift_mask
)));
16947 if (BYTES_BIG_ENDIAN
)
16948 shift
= expand_simple_binop (SImode
, XOR
, shift
, GEN_INT (shift_mask
),
16949 shift
, 1, OPTAB_LIB_WIDEN
);
16952 /* Mask for insertion. */
16953 mask
= expand_simple_binop (SImode
, ASHIFT
, GEN_INT (GET_MODE_MASK (mode
)),
16954 shift
, NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16960 /* A subroutine of the various atomic expanders. For sub-word operands,
16961 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16964 rs6000_mask_atomic_subword (rtx oldval
, rtx newval
, rtx mask
)
16968 x
= gen_reg_rtx (SImode
);
16969 emit_insn (gen_rtx_SET (x
, gen_rtx_AND (SImode
,
16970 gen_rtx_NOT (SImode
, mask
),
16973 x
= expand_simple_binop (SImode
, IOR
, newval
, x
, x
, 1, OPTAB_LIB_WIDEN
);
16978 /* A subroutine of the various atomic expanders. For sub-word operands,
16979 extract WIDE to NARROW via SHIFT. */
16982 rs6000_finish_atomic_subword (rtx narrow
, rtx wide
, rtx shift
)
16984 wide
= expand_simple_binop (SImode
, LSHIFTRT
, wide
, shift
,
16985 wide
, 1, OPTAB_LIB_WIDEN
);
16986 emit_move_insn (narrow
, gen_lowpart (GET_MODE (narrow
), wide
));
16989 /* Expand an atomic compare and swap operation. */
16992 rs6000_expand_atomic_compare_and_swap (rtx operands
[])
16994 rtx boolval
, retval
, mem
, oldval
, newval
, cond
;
16995 rtx label1
, label2
, x
, mask
, shift
;
16996 machine_mode mode
, orig_mode
;
16997 enum memmodel mod_s
, mod_f
;
17000 boolval
= operands
[0];
17001 retval
= operands
[1];
17003 oldval
= operands
[3];
17004 newval
= operands
[4];
17005 is_weak
= (INTVAL (operands
[5]) != 0);
17006 mod_s
= memmodel_base (INTVAL (operands
[6]));
17007 mod_f
= memmodel_base (INTVAL (operands
[7]));
17008 orig_mode
= mode
= GET_MODE (mem
);
17010 mask
= shift
= NULL_RTX
;
17011 if (mode
== QImode
|| mode
== HImode
)
17013 /* Before power8, we didn't have access to lbarx/lharx, so generate a
17014 lwarx and shift/mask operations. With power8, we need to do the
17015 comparison in SImode, but the store is still done in QI/HImode. */
17016 oldval
= convert_modes (SImode
, mode
, oldval
, 1);
17018 if (!TARGET_SYNC_HI_QI
)
17020 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
17022 /* Shift and mask OLDVAL into position with the word. */
17023 oldval
= expand_simple_binop (SImode
, ASHIFT
, oldval
, shift
,
17024 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17026 /* Shift and mask NEWVAL into position within the word. */
17027 newval
= convert_modes (SImode
, mode
, newval
, 1);
17028 newval
= expand_simple_binop (SImode
, ASHIFT
, newval
, shift
,
17029 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17032 /* Prepare to adjust the return value. */
17033 retval
= gen_reg_rtx (SImode
);
17036 else if (reg_overlap_mentioned_p (retval
, oldval
))
17037 oldval
= copy_to_reg (oldval
);
17039 if (mode
!= TImode
&& !reg_or_short_operand (oldval
, mode
))
17040 oldval
= copy_to_mode_reg (mode
, oldval
);
17042 if (reg_overlap_mentioned_p (retval
, newval
))
17043 newval
= copy_to_reg (newval
);
17045 mem
= rs6000_pre_atomic_barrier (mem
, mod_s
);
17050 label1
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
17051 emit_label (XEXP (label1
, 0));
17053 label2
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
17055 emit_load_locked (mode
, retval
, mem
);
17059 x
= expand_simple_binop (SImode
, AND
, retval
, mask
,
17060 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17062 cond
= gen_reg_rtx (CCmode
);
17063 /* If we have TImode, synthesize a comparison. */
17064 if (mode
!= TImode
)
17065 x
= gen_rtx_COMPARE (CCmode
, x
, oldval
);
17068 rtx xor1_result
= gen_reg_rtx (DImode
);
17069 rtx xor2_result
= gen_reg_rtx (DImode
);
17070 rtx or_result
= gen_reg_rtx (DImode
);
17071 rtx new_word0
= simplify_gen_subreg (DImode
, x
, TImode
, 0);
17072 rtx new_word1
= simplify_gen_subreg (DImode
, x
, TImode
, 8);
17073 rtx old_word0
= simplify_gen_subreg (DImode
, oldval
, TImode
, 0);
17074 rtx old_word1
= simplify_gen_subreg (DImode
, oldval
, TImode
, 8);
17076 emit_insn (gen_xordi3 (xor1_result
, new_word0
, old_word0
));
17077 emit_insn (gen_xordi3 (xor2_result
, new_word1
, old_word1
));
17078 emit_insn (gen_iordi3 (or_result
, xor1_result
, xor2_result
));
17079 x
= gen_rtx_COMPARE (CCmode
, or_result
, const0_rtx
);
17082 emit_insn (gen_rtx_SET (cond
, x
));
17084 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
17085 emit_unlikely_jump (x
, label2
);
17089 x
= rs6000_mask_atomic_subword (retval
, newval
, mask
);
17091 emit_store_conditional (orig_mode
, cond
, mem
, x
);
17095 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
17096 emit_unlikely_jump (x
, label1
);
17099 if (!is_mm_relaxed (mod_f
))
17100 emit_label (XEXP (label2
, 0));
17102 rs6000_post_atomic_barrier (mod_s
);
17104 if (is_mm_relaxed (mod_f
))
17105 emit_label (XEXP (label2
, 0));
17108 rs6000_finish_atomic_subword (operands
[1], retval
, shift
);
17109 else if (mode
!= GET_MODE (operands
[1]))
17110 convert_move (operands
[1], retval
, 1);
17112 /* In all cases, CR0 contains EQ on success, and NE on failure. */
17113 x
= gen_rtx_EQ (SImode
, cond
, const0_rtx
);
17114 emit_insn (gen_rtx_SET (boolval
, x
));
17117 /* Expand an atomic exchange operation. */
17120 rs6000_expand_atomic_exchange (rtx operands
[])
17122 rtx retval
, mem
, val
, cond
;
17124 enum memmodel model
;
17125 rtx label
, x
, mask
, shift
;
17127 retval
= operands
[0];
17130 model
= memmodel_base (INTVAL (operands
[3]));
17131 mode
= GET_MODE (mem
);
17133 mask
= shift
= NULL_RTX
;
17134 if (!TARGET_SYNC_HI_QI
&& (mode
== QImode
|| mode
== HImode
))
17136 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
17138 /* Shift and mask VAL into position with the word. */
17139 val
= convert_modes (SImode
, mode
, val
, 1);
17140 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
17141 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17143 /* Prepare to adjust the return value. */
17144 retval
= gen_reg_rtx (SImode
);
17148 mem
= rs6000_pre_atomic_barrier (mem
, model
);
17150 label
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
17151 emit_label (XEXP (label
, 0));
17153 emit_load_locked (mode
, retval
, mem
);
17157 x
= rs6000_mask_atomic_subword (retval
, val
, mask
);
17159 cond
= gen_reg_rtx (CCmode
);
17160 emit_store_conditional (mode
, cond
, mem
, x
);
17162 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
17163 emit_unlikely_jump (x
, label
);
17165 rs6000_post_atomic_barrier (model
);
17168 rs6000_finish_atomic_subword (operands
[0], retval
, shift
);
17171 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
17172 to perform. MEM is the memory on which to operate. VAL is the second
17173 operand of the binary operator. BEFORE and AFTER are optional locations to
17174 return the value of MEM either before of after the operation. MODEL_RTX
17175 is a CONST_INT containing the memory model to use. */
17178 rs6000_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
17179 rtx orig_before
, rtx orig_after
, rtx model_rtx
)
17181 enum memmodel model
= memmodel_base (INTVAL (model_rtx
));
17182 machine_mode mode
= GET_MODE (mem
);
17183 machine_mode store_mode
= mode
;
17184 rtx label
, x
, cond
, mask
, shift
;
17185 rtx before
= orig_before
, after
= orig_after
;
17187 mask
= shift
= NULL_RTX
;
17188 /* On power8, we want to use SImode for the operation. On previous systems,
17189 use the operation in a subword and shift/mask to get the proper byte or
17191 if (mode
== QImode
|| mode
== HImode
)
17193 if (TARGET_SYNC_HI_QI
)
17195 val
= convert_modes (SImode
, mode
, val
, 1);
17197 /* Prepare to adjust the return value. */
17198 before
= gen_reg_rtx (SImode
);
17200 after
= gen_reg_rtx (SImode
);
17205 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
17207 /* Shift and mask VAL into position with the word. */
17208 val
= convert_modes (SImode
, mode
, val
, 1);
17209 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
17210 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17216 /* We've already zero-extended VAL. That is sufficient to
17217 make certain that it does not affect other bits. */
17222 /* If we make certain that all of the other bits in VAL are
17223 set, that will be sufficient to not affect other bits. */
17224 x
= gen_rtx_NOT (SImode
, mask
);
17225 x
= gen_rtx_IOR (SImode
, x
, val
);
17226 emit_insn (gen_rtx_SET (val
, x
));
17233 /* These will all affect bits outside the field and need
17234 adjustment via MASK within the loop. */
17238 gcc_unreachable ();
17241 /* Prepare to adjust the return value. */
17242 before
= gen_reg_rtx (SImode
);
17244 after
= gen_reg_rtx (SImode
);
17245 store_mode
= mode
= SImode
;
17249 mem
= rs6000_pre_atomic_barrier (mem
, model
);
17251 label
= gen_label_rtx ();
17252 emit_label (label
);
17253 label
= gen_rtx_LABEL_REF (VOIDmode
, label
);
17255 if (before
== NULL_RTX
)
17256 before
= gen_reg_rtx (mode
);
17258 emit_load_locked (mode
, before
, mem
);
17262 x
= expand_simple_binop (mode
, AND
, before
, val
,
17263 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17264 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
17268 after
= expand_simple_binop (mode
, code
, before
, val
,
17269 after
, 1, OPTAB_LIB_WIDEN
);
17275 x
= expand_simple_binop (SImode
, AND
, after
, mask
,
17276 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17277 x
= rs6000_mask_atomic_subword (before
, x
, mask
);
17279 else if (store_mode
!= mode
)
17280 x
= convert_modes (store_mode
, mode
, x
, 1);
17282 cond
= gen_reg_rtx (CCmode
);
17283 emit_store_conditional (store_mode
, cond
, mem
, x
);
17285 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
17286 emit_unlikely_jump (x
, label
);
17288 rs6000_post_atomic_barrier (model
);
17292 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
17293 then do the calcuations in a SImode register. */
17295 rs6000_finish_atomic_subword (orig_before
, before
, shift
);
17297 rs6000_finish_atomic_subword (orig_after
, after
, shift
);
17299 else if (store_mode
!= mode
)
17301 /* QImode/HImode on machines with lbarx/lharx where we do the native
17302 operation and then do the calcuations in a SImode register. */
17304 convert_move (orig_before
, before
, 1);
17306 convert_move (orig_after
, after
, 1);
17308 else if (orig_after
&& after
!= orig_after
)
17309 emit_move_insn (orig_after
, after
);
17312 static GTY(()) alias_set_type TOC_alias_set
= -1;
17315 get_TOC_alias_set (void)
17317 if (TOC_alias_set
== -1)
17318 TOC_alias_set
= new_alias_set ();
17319 return TOC_alias_set
;
17322 /* The mode the ABI uses for a word. This is not the same as word_mode
17323 for -m32 -mpowerpc64. This is used to implement various target hooks. */
17325 static scalar_int_mode
17326 rs6000_abi_word_mode (void)
17328 return TARGET_32BIT
? SImode
: DImode
;
17331 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
17333 rs6000_offload_options (void)
17336 return xstrdup ("-foffload-abi=lp64");
17338 return xstrdup ("-foffload-abi=ilp32");
17342 /* A quick summary of the various types of 'constant-pool tables'
17345 Target Flags Name One table per
17346 AIX (none) AIX TOC object file
17347 AIX -mfull-toc AIX TOC object file
17348 AIX -mminimal-toc AIX minimal TOC translation unit
17349 SVR4/EABI (none) SVR4 SDATA object file
17350 SVR4/EABI -fpic SVR4 pic object file
17351 SVR4/EABI -fPIC SVR4 PIC translation unit
17352 SVR4/EABI -mrelocatable EABI TOC function
17353 SVR4/EABI -maix AIX TOC object file
17354 SVR4/EABI -maix -mminimal-toc
17355 AIX minimal TOC translation unit
17357 Name Reg. Set by entries contains:
17358 made by addrs? fp? sum?
17360 AIX TOC 2 crt0 as Y option option
17361 AIX minimal TOC 30 prolog gcc Y Y option
17362 SVR4 SDATA 13 crt0 gcc N Y N
17363 SVR4 pic 30 prolog ld Y not yet N
17364 SVR4 PIC 30 prolog gcc Y option option
17365 EABI TOC 30 prolog gcc Y option option
17369 /* Hash functions for the hash table. */
17372 rs6000_hash_constant (rtx k
)
17374 enum rtx_code code
= GET_CODE (k
);
17375 machine_mode mode
= GET_MODE (k
);
17376 unsigned result
= (code
<< 3) ^ mode
;
17377 const char *format
;
17380 format
= GET_RTX_FORMAT (code
);
17381 flen
= strlen (format
);
17387 return result
* 1231 + (unsigned) INSN_UID (XEXP (k
, 0));
17389 case CONST_WIDE_INT
:
17392 flen
= CONST_WIDE_INT_NUNITS (k
);
17393 for (i
= 0; i
< flen
; i
++)
17394 result
= result
* 613 + CONST_WIDE_INT_ELT (k
, i
);
17399 return real_hash (CONST_DOUBLE_REAL_VALUE (k
)) * result
;
17409 for (; fidx
< flen
; fidx
++)
17410 switch (format
[fidx
])
17415 const char *str
= XSTR (k
, fidx
);
17416 len
= strlen (str
);
17417 result
= result
* 613 + len
;
17418 for (i
= 0; i
< len
; i
++)
17419 result
= result
* 613 + (unsigned) str
[i
];
17424 result
= result
* 1231 + rs6000_hash_constant (XEXP (k
, fidx
));
17428 result
= result
* 613 + (unsigned) XINT (k
, fidx
);
17431 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT
))
17432 result
= result
* 613 + (unsigned) XWINT (k
, fidx
);
17436 for (i
= 0; i
< sizeof (HOST_WIDE_INT
) / sizeof (unsigned); i
++)
17437 result
= result
* 613 + (unsigned) (XWINT (k
, fidx
)
17444 gcc_unreachable ();
17451 toc_hasher::hash (toc_hash_struct
*thc
)
17453 return rs6000_hash_constant (thc
->key
) ^ thc
->key_mode
;
17456 /* Compare H1 and H2 for equivalence. */
17459 toc_hasher::equal (toc_hash_struct
*h1
, toc_hash_struct
*h2
)
17464 if (h1
->key_mode
!= h2
->key_mode
)
17467 return rtx_equal_p (r1
, r2
);
17470 /* These are the names given by the C++ front-end to vtables, and
17471 vtable-like objects. Ideally, this logic should not be here;
17472 instead, there should be some programmatic way of inquiring as
17473 to whether or not an object is a vtable. */
17475 #define VTABLE_NAME_P(NAME) \
17476 (startswith (name, "_vt.") \
17477 || startswith (name, "_ZTV") \
17478 || startswith (name, "_ZTT") \
17479 || startswith (name, "_ZTI") \
17480 || startswith (name, "_ZTC"))
17482 #ifdef NO_DOLLAR_IN_LABEL
17483 /* Return a GGC-allocated character string translating dollar signs in
17484 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17487 rs6000_xcoff_strip_dollar (const char *name
)
17493 q
= (const char *) strchr (name
, '$');
17495 if (q
== 0 || q
== name
)
17498 len
= strlen (name
);
17499 strip
= XALLOCAVEC (char, len
+ 1);
17500 strcpy (strip
, name
);
17501 p
= strip
+ (q
- name
);
17505 p
= strchr (p
+ 1, '$');
17508 return ggc_alloc_string (strip
, len
);
17513 rs6000_output_symbol_ref (FILE *file
, rtx x
)
17515 const char *name
= XSTR (x
, 0);
17517 /* Currently C++ toc references to vtables can be emitted before it
17518 is decided whether the vtable is public or private. If this is
17519 the case, then the linker will eventually complain that there is
17520 a reference to an unknown section. Thus, for vtables only,
17521 we emit the TOC reference to reference the identifier and not the
17523 if (VTABLE_NAME_P (name
))
17525 RS6000_OUTPUT_BASENAME (file
, name
);
17528 assemble_name (file
, name
);
17531 /* Output a TOC entry. We derive the entry name from what is being
17535 output_toc (FILE *file
, rtx x
, int labelno
, machine_mode mode
)
17538 const char *name
= buf
;
17540 HOST_WIDE_INT offset
= 0;
17542 gcc_assert (!TARGET_NO_TOC_OR_PCREL
);
17544 /* When the linker won't eliminate them, don't output duplicate
17545 TOC entries (this happens on AIX if there is any kind of TOC,
17546 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17548 if (TARGET_TOC
&& GET_CODE (x
) != LABEL_REF
)
17550 struct toc_hash_struct
*h
;
17552 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17553 time because GGC is not initialized at that point. */
17554 if (toc_hash_table
== NULL
)
17555 toc_hash_table
= hash_table
<toc_hasher
>::create_ggc (1021);
17557 h
= ggc_alloc
<toc_hash_struct
> ();
17559 h
->key_mode
= mode
;
17560 h
->labelno
= labelno
;
17562 toc_hash_struct
**found
= toc_hash_table
->find_slot (h
, INSERT
);
17563 if (*found
== NULL
)
17565 else /* This is indeed a duplicate.
17566 Set this label equal to that label. */
17568 fputs ("\t.set ", file
);
17569 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17570 fprintf (file
, "%d,", labelno
);
17571 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17572 fprintf (file
, "%d\n", ((*found
)->labelno
));
17575 if (TARGET_XCOFF
&& SYMBOL_REF_P (x
)
17576 && (SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_GLOBAL_DYNAMIC
17577 || SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
))
17579 fputs ("\t.set ", file
);
17580 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17581 fprintf (file
, "%d,", labelno
);
17582 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17583 fprintf (file
, "%d\n", ((*found
)->labelno
));
17590 /* If we're going to put a double constant in the TOC, make sure it's
17591 aligned properly when strict alignment is on. */
17592 if ((CONST_DOUBLE_P (x
) || CONST_WIDE_INT_P (x
))
17593 && STRICT_ALIGNMENT
17594 && GET_MODE_BITSIZE (mode
) >= 64
17595 && ! (TARGET_NO_FP_IN_TOC
&& ! TARGET_MINIMAL_TOC
)) {
17596 ASM_OUTPUT_ALIGN (file
, 3);
17599 (*targetm
.asm_out
.internal_label
) (file
, "LC", labelno
);
17601 /* Handle FP constants specially. Note that if we have a minimal
17602 TOC, things we put here aren't actually in the TOC, so we can allow
17604 if (CONST_DOUBLE_P (x
)
17605 && (GET_MODE (x
) == TFmode
|| GET_MODE (x
) == TDmode
17606 || GET_MODE (x
) == IFmode
|| GET_MODE (x
) == KFmode
))
17610 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17611 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17613 real_to_target (k
, CONST_DOUBLE_REAL_VALUE (x
), GET_MODE (x
));
17617 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17618 fputs (DOUBLE_INT_ASM_OP
, file
);
17620 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17621 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17622 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17623 fprintf (file
, "0x%lx%08lx,0x%lx%08lx\n",
17624 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17625 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff,
17626 k
[WORDS_BIG_ENDIAN
? 2 : 3] & 0xffffffff,
17627 k
[WORDS_BIG_ENDIAN
? 3 : 2] & 0xffffffff);
17632 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17633 fputs ("\t.long ", file
);
17635 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17636 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17637 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17638 fprintf (file
, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17639 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17640 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17644 else if (CONST_DOUBLE_P (x
)
17645 && (GET_MODE (x
) == DFmode
|| GET_MODE (x
) == DDmode
))
17649 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17650 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17652 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17656 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17657 fputs (DOUBLE_INT_ASM_OP
, file
);
17659 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17660 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17661 fprintf (file
, "0x%lx%08lx\n",
17662 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17663 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff);
17668 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17669 fputs ("\t.long ", file
);
17671 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17672 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17673 fprintf (file
, "0x%lx,0x%lx\n",
17674 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17678 else if (CONST_DOUBLE_P (x
)
17679 && (GET_MODE (x
) == SFmode
|| GET_MODE (x
) == SDmode
))
17683 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17684 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17686 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17690 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17691 fputs (DOUBLE_INT_ASM_OP
, file
);
17693 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17694 if (WORDS_BIG_ENDIAN
)
17695 fprintf (file
, "0x%lx00000000\n", l
& 0xffffffff);
17697 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17702 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17703 fputs ("\t.long ", file
);
17705 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17706 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17710 else if (GET_MODE (x
) == VOIDmode
&& CONST_INT_P (x
))
17712 unsigned HOST_WIDE_INT low
;
17713 HOST_WIDE_INT high
;
17715 low
= INTVAL (x
) & 0xffffffff;
17716 high
= (HOST_WIDE_INT
) INTVAL (x
) >> 32;
17718 /* TOC entries are always Pmode-sized, so when big-endian
17719 smaller integer constants in the TOC need to be padded.
17720 (This is still a win over putting the constants in
17721 a separate constant pool, because then we'd have
17722 to have both a TOC entry _and_ the actual constant.)
17724 For a 32-bit target, CONST_INT values are loaded and shifted
17725 entirely within `low' and can be stored in one TOC entry. */
17727 /* It would be easy to make this work, but it doesn't now. */
17728 gcc_assert (!TARGET_64BIT
|| POINTER_SIZE
>= GET_MODE_BITSIZE (mode
));
17730 if (WORDS_BIG_ENDIAN
&& POINTER_SIZE
> GET_MODE_BITSIZE (mode
))
17733 low
<<= POINTER_SIZE
- GET_MODE_BITSIZE (mode
);
17734 high
= (HOST_WIDE_INT
) low
>> 32;
17740 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17741 fputs (DOUBLE_INT_ASM_OP
, file
);
17743 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17744 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17745 fprintf (file
, "0x%lx%08lx\n",
17746 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17751 if (POINTER_SIZE
< GET_MODE_BITSIZE (mode
))
17753 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17754 fputs ("\t.long ", file
);
17756 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17757 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17758 fprintf (file
, "0x%lx,0x%lx\n",
17759 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17763 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17764 fputs ("\t.long ", file
);
17766 fprintf (file
, "\t.tc IS_%lx[TC],", (long) low
& 0xffffffff);
17767 fprintf (file
, "0x%lx\n", (long) low
& 0xffffffff);
17773 if (GET_CODE (x
) == CONST
)
17775 gcc_assert (GET_CODE (XEXP (x
, 0)) == PLUS
17776 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)));
17778 base
= XEXP (XEXP (x
, 0), 0);
17779 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
17782 switch (GET_CODE (base
))
17785 name
= XSTR (base
, 0);
17789 ASM_GENERATE_INTERNAL_LABEL (buf
, "L",
17790 CODE_LABEL_NUMBER (XEXP (base
, 0)));
17794 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (base
));
17798 gcc_unreachable ();
17801 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17802 fputs (TARGET_32BIT
? "\t.long " : DOUBLE_INT_ASM_OP
, file
);
17805 fputs ("\t.tc ", file
);
17806 RS6000_OUTPUT_BASENAME (file
, name
);
17809 fprintf (file
, ".N" HOST_WIDE_INT_PRINT_UNSIGNED
, - offset
);
17811 fprintf (file
, ".P" HOST_WIDE_INT_PRINT_UNSIGNED
, offset
);
17813 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17814 after other TOC symbols, reducing overflow of small TOC access
17815 to [TC] symbols. */
17816 fputs (TARGET_XCOFF
&& TARGET_CMODEL
!= CMODEL_SMALL
17817 ? "[TE]," : "[TC],", file
);
17820 /* Currently C++ toc references to vtables can be emitted before it
17821 is decided whether the vtable is public or private. If this is
17822 the case, then the linker will eventually complain that there is
17823 a TOC reference to an unknown section. Thus, for vtables only,
17824 we emit the TOC reference to reference the symbol and not the
17826 if (VTABLE_NAME_P (name
))
17828 RS6000_OUTPUT_BASENAME (file
, name
);
17830 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
);
17831 else if (offset
> 0)
17832 fprintf (file
, "+" HOST_WIDE_INT_PRINT_DEC
, offset
);
17835 output_addr_const (file
, x
);
17838 if (TARGET_XCOFF
&& SYMBOL_REF_P (base
))
17840 switch (SYMBOL_REF_TLS_MODEL (base
))
17844 case TLS_MODEL_LOCAL_EXEC
:
17845 fputs ("@le", file
);
17847 case TLS_MODEL_INITIAL_EXEC
:
17848 fputs ("@ie", file
);
17850 /* Use global-dynamic for local-dynamic. */
17851 case TLS_MODEL_GLOBAL_DYNAMIC
:
17852 case TLS_MODEL_LOCAL_DYNAMIC
:
17854 (*targetm
.asm_out
.internal_label
) (file
, "LCM", labelno
);
17855 fputs ("\t.tc .", file
);
17856 RS6000_OUTPUT_BASENAME (file
, name
);
17857 fputs ("[TC],", file
);
17858 output_addr_const (file
, x
);
17859 fputs ("@m", file
);
17862 gcc_unreachable ();
17870 /* Output an assembler pseudo-op to write an ASCII string of N characters
17871 starting at P to FILE.
17873 On the RS/6000, we have to do this using the .byte operation and
17874 write out special characters outside the quoted string.
17875 Also, the assembler is broken; very long strings are truncated,
17876 so we must artificially break them up early. */
17879 output_ascii (FILE *file
, const char *p
, int n
)
17882 int i
, count_string
;
17883 const char *for_string
= "\t.byte \"";
17884 const char *for_decimal
= "\t.byte ";
17885 const char *to_close
= NULL
;
17888 for (i
= 0; i
< n
; i
++)
17891 if (c
>= ' ' && c
< 0177)
17894 fputs (for_string
, file
);
17897 /* Write two quotes to get one. */
17905 for_decimal
= "\"\n\t.byte ";
17909 if (count_string
>= 512)
17911 fputs (to_close
, file
);
17913 for_string
= "\t.byte \"";
17914 for_decimal
= "\t.byte ";
17922 fputs (for_decimal
, file
);
17923 fprintf (file
, "%d", c
);
17925 for_string
= "\n\t.byte \"";
17926 for_decimal
= ", ";
17932 /* Now close the string if we have written one. Then end the line. */
17934 fputs (to_close
, file
);
17937 /* Generate a unique section name for FILENAME for a section type
17938 represented by SECTION_DESC. Output goes into BUF.
17940 SECTION_DESC can be any string, as long as it is different for each
17941 possible section type.
17943 We name the section in the same manner as xlc. The name begins with an
17944 underscore followed by the filename (after stripping any leading directory
17945 names) with the last period replaced by the string SECTION_DESC. If
17946 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17950 rs6000_gen_section_name (char **buf
, const char *filename
,
17951 const char *section_desc
)
17953 const char *q
, *after_last_slash
, *last_period
= 0;
17957 after_last_slash
= filename
;
17958 for (q
= filename
; *q
; q
++)
17961 after_last_slash
= q
+ 1;
17962 else if (*q
== '.')
17966 len
= strlen (after_last_slash
) + strlen (section_desc
) + 2;
17967 *buf
= (char *) xmalloc (len
);
17972 for (q
= after_last_slash
; *q
; q
++)
17974 if (q
== last_period
)
17976 strcpy (p
, section_desc
);
17977 p
+= strlen (section_desc
);
17981 else if (ISALNUM (*q
))
17985 if (last_period
== 0)
17986 strcpy (p
, section_desc
);
17991 /* Emit profile function. */
17994 output_profile_hook (int labelno ATTRIBUTE_UNUSED
)
17996 /* Non-standard profiling for kernels, which just saves LR then calls
17997 _mcount without worrying about arg saves. The idea is to change
17998 the function prologue as little as possible as it isn't easy to
17999 account for arg save/restore code added just for _mcount. */
18000 if (TARGET_PROFILE_KERNEL
)
18003 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
18005 #ifndef NO_PROFILE_COUNTERS
18006 # define NO_PROFILE_COUNTERS 0
18008 if (NO_PROFILE_COUNTERS
)
18009 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
18010 LCT_NORMAL
, VOIDmode
);
18014 const char *label_name
;
18017 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
18018 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
18019 fun
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
18021 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
18022 LCT_NORMAL
, VOIDmode
, fun
, Pmode
);
18025 else if (DEFAULT_ABI
== ABI_DARWIN
)
18027 const char *mcount_name
= RS6000_MCOUNT
;
18028 int caller_addr_regno
= LR_REGNO
;
18030 /* Be conservative and always set this, at least for now. */
18031 crtl
->uses_pic_offset_table
= 1;
18034 /* For PIC code, set up a stub and collect the caller's address
18035 from r0, which is where the prologue puts it. */
18036 if (MACHOPIC_INDIRECT
18037 && crtl
->uses_pic_offset_table
)
18038 caller_addr_regno
= 0;
18040 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, mcount_name
),
18041 LCT_NORMAL
, VOIDmode
,
18042 gen_rtx_REG (Pmode
, caller_addr_regno
), Pmode
);
18046 /* Write function profiler code. */
18049 output_function_profiler (FILE *file
, int labelno
)
18053 switch (DEFAULT_ABI
)
18056 gcc_unreachable ();
18061 warning (0, "no profiling of 64-bit code for this ABI");
18064 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
18065 fprintf (file
, "\tmflr %s\n", reg_names
[0]);
18066 if (NO_PROFILE_COUNTERS
)
18068 asm_fprintf (file
, "\tstw %s,4(%s)\n",
18069 reg_names
[0], reg_names
[1]);
18071 else if (TARGET_SECURE_PLT
&& flag_pic
)
18073 if (TARGET_LINK_STACK
)
18076 get_ppc476_thunk_name (name
);
18077 asm_fprintf (file
, "\tbl %s\n", name
);
18080 asm_fprintf (file
, "\tbcl 20,31,1f\n1:\n");
18081 asm_fprintf (file
, "\tstw %s,4(%s)\n",
18082 reg_names
[0], reg_names
[1]);
18083 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
18084 asm_fprintf (file
, "\taddis %s,%s,",
18085 reg_names
[12], reg_names
[12]);
18086 assemble_name (file
, buf
);
18087 asm_fprintf (file
, "-1b@ha\n\tla %s,", reg_names
[0]);
18088 assemble_name (file
, buf
);
18089 asm_fprintf (file
, "-1b@l(%s)\n", reg_names
[12]);
18091 else if (flag_pic
== 1)
18093 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file
);
18094 asm_fprintf (file
, "\tstw %s,4(%s)\n",
18095 reg_names
[0], reg_names
[1]);
18096 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
18097 asm_fprintf (file
, "\tlwz %s,", reg_names
[0]);
18098 assemble_name (file
, buf
);
18099 asm_fprintf (file
, "@got(%s)\n", reg_names
[12]);
18101 else if (flag_pic
> 1)
18103 asm_fprintf (file
, "\tstw %s,4(%s)\n",
18104 reg_names
[0], reg_names
[1]);
18105 /* Now, we need to get the address of the label. */
18106 if (TARGET_LINK_STACK
)
18109 get_ppc476_thunk_name (name
);
18110 asm_fprintf (file
, "\tbl %s\n\tb 1f\n\t.long ", name
);
18111 assemble_name (file
, buf
);
18112 fputs ("-.\n1:", file
);
18113 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
18114 asm_fprintf (file
, "\taddi %s,%s,4\n",
18115 reg_names
[11], reg_names
[11]);
18119 fputs ("\tbcl 20,31,1f\n\t.long ", file
);
18120 assemble_name (file
, buf
);
18121 fputs ("-.\n1:", file
);
18122 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
18124 asm_fprintf (file
, "\tlwz %s,0(%s)\n",
18125 reg_names
[0], reg_names
[11]);
18126 asm_fprintf (file
, "\tadd %s,%s,%s\n",
18127 reg_names
[0], reg_names
[0], reg_names
[11]);
18131 asm_fprintf (file
, "\tlis %s,", reg_names
[12]);
18132 assemble_name (file
, buf
);
18133 fputs ("@ha\n", file
);
18134 asm_fprintf (file
, "\tstw %s,4(%s)\n",
18135 reg_names
[0], reg_names
[1]);
18136 asm_fprintf (file
, "\tla %s,", reg_names
[0]);
18137 assemble_name (file
, buf
);
18138 asm_fprintf (file
, "@l(%s)\n", reg_names
[12]);
18141 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
18142 fprintf (file
, "\tbl %s%s\n",
18143 RS6000_MCOUNT
, flag_pic
? "@plt" : "");
18149 /* Don't do anything, done in output_profile_hook (). */
18156 /* The following variable value is the last issued insn. */
18158 static rtx_insn
*last_scheduled_insn
;
18160 /* The following variable helps to balance issuing of load and
18161 store instructions */
18163 static int load_store_pendulum
;
18165 /* The following variable helps pair divide insns during scheduling. */
18166 static int divide_cnt
;
18167 /* The following variable helps pair and alternate vector and vector load
18168 insns during scheduling. */
18169 static int vec_pairing
;
18172 /* Power4 load update and store update instructions are cracked into a
18173 load or store and an integer insn which are executed in the same cycle.
18174 Branches have their own dispatch slot which does not count against the
18175 GCC issue rate, but it changes the program flow so there are no other
18176 instructions to issue in this cycle. */
18179 rs6000_variable_issue_1 (rtx_insn
*insn
, int more
)
18181 last_scheduled_insn
= insn
;
18182 if (GET_CODE (PATTERN (insn
)) == USE
18183 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18185 cached_can_issue_more
= more
;
18186 return cached_can_issue_more
;
18189 if (insn_terminates_group_p (insn
, current_group
))
18191 cached_can_issue_more
= 0;
18192 return cached_can_issue_more
;
18195 /* If no reservation, but reach here */
18196 if (recog_memoized (insn
) < 0)
18199 if (rs6000_sched_groups
)
18201 if (is_microcoded_insn (insn
))
18202 cached_can_issue_more
= 0;
18203 else if (is_cracked_insn (insn
))
18204 cached_can_issue_more
= more
> 2 ? more
- 2 : 0;
18206 cached_can_issue_more
= more
- 1;
18208 return cached_can_issue_more
;
18211 if (rs6000_tune
== PROCESSOR_CELL
&& is_nonpipeline_insn (insn
))
18214 cached_can_issue_more
= more
- 1;
18215 return cached_can_issue_more
;
18219 rs6000_variable_issue (FILE *stream
, int verbose
, rtx_insn
*insn
, int more
)
18221 int r
= rs6000_variable_issue_1 (insn
, more
);
18223 fprintf (stream
, "// rs6000_variable_issue (more = %d) = %d\n", more
, r
);
18227 /* Adjust the cost of a scheduling dependency. Return the new cost of
18228 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
18231 rs6000_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
18234 enum attr_type attr_type
;
18236 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
18243 /* Data dependency; DEP_INSN writes a register that INSN reads
18244 some cycles later. */
18246 /* Separate a load from a narrower, dependent store. */
18247 if ((rs6000_sched_groups
|| rs6000_tune
== PROCESSOR_POWER9
18248 || rs6000_tune
== PROCESSOR_POWER10
18249 || rs6000_tune
== PROCESSOR_POWER11
)
18250 && GET_CODE (PATTERN (insn
)) == SET
18251 && GET_CODE (PATTERN (dep_insn
)) == SET
18252 && MEM_P (XEXP (PATTERN (insn
), 1))
18253 && MEM_P (XEXP (PATTERN (dep_insn
), 0))
18254 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn
), 1)))
18255 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn
), 0)))))
18258 attr_type
= get_attr_type (insn
);
18263 /* Tell the first scheduling pass about the latency between
18264 a mtctr and bctr (and mtlr and br/blr). The first
18265 scheduling pass will not know about this latency since
18266 the mtctr instruction, which has the latency associated
18267 to it, will be generated by reload. */
18270 /* Leave some extra cycles between a compare and its
18271 dependent branch, to inhibit expensive mispredicts. */
18272 if ((rs6000_tune
== PROCESSOR_PPC603
18273 || rs6000_tune
== PROCESSOR_PPC604
18274 || rs6000_tune
== PROCESSOR_PPC604e
18275 || rs6000_tune
== PROCESSOR_PPC620
18276 || rs6000_tune
== PROCESSOR_PPC630
18277 || rs6000_tune
== PROCESSOR_PPC750
18278 || rs6000_tune
== PROCESSOR_PPC7400
18279 || rs6000_tune
== PROCESSOR_PPC7450
18280 || rs6000_tune
== PROCESSOR_PPCE5500
18281 || rs6000_tune
== PROCESSOR_PPCE6500
18282 || rs6000_tune
== PROCESSOR_POWER4
18283 || rs6000_tune
== PROCESSOR_POWER5
18284 || rs6000_tune
== PROCESSOR_POWER7
18285 || rs6000_tune
== PROCESSOR_POWER8
18286 || rs6000_tune
== PROCESSOR_POWER9
18287 || rs6000_tune
== PROCESSOR_POWER10
18288 || rs6000_tune
== PROCESSOR_POWER11
18289 || rs6000_tune
== PROCESSOR_CELL
)
18290 && recog_memoized (dep_insn
)
18291 && (INSN_CODE (dep_insn
) >= 0))
18293 switch (get_attr_type (dep_insn
))
18296 case TYPE_FPCOMPARE
:
18297 case TYPE_CR_LOGICAL
:
18301 if (get_attr_dot (dep_insn
) == DOT_YES
)
18306 if (get_attr_dot (dep_insn
) == DOT_YES
18307 && get_attr_var_shift (dep_insn
) == VAR_SHIFT_NO
)
18318 if ((rs6000_tune
== PROCESSOR_POWER6
)
18319 && recog_memoized (dep_insn
)
18320 && (INSN_CODE (dep_insn
) >= 0))
18323 if (GET_CODE (PATTERN (insn
)) != SET
)
18324 /* If this happens, we have to extend this to schedule
18325 optimally. Return default for now. */
18328 /* Adjust the cost for the case where the value written
18329 by a fixed point operation is used as the address
18330 gen value on a store. */
18331 switch (get_attr_type (dep_insn
))
18336 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18337 return get_attr_sign_extend (dep_insn
)
18338 == SIGN_EXTEND_YES
? 6 : 4;
18343 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18344 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
18354 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18362 if (get_attr_update (dep_insn
) == UPDATE_YES
18363 && ! rs6000_store_data_bypass_p (dep_insn
, insn
))
18369 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18375 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18376 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
18386 if ((rs6000_tune
== PROCESSOR_POWER6
)
18387 && recog_memoized (dep_insn
)
18388 && (INSN_CODE (dep_insn
) >= 0))
18391 /* Adjust the cost for the case where the value written
18392 by a fixed point instruction is used within the address
18393 gen portion of a subsequent load(u)(x) */
18394 switch (get_attr_type (dep_insn
))
18399 if (set_to_load_agen (dep_insn
, insn
))
18400 return get_attr_sign_extend (dep_insn
)
18401 == SIGN_EXTEND_YES
? 6 : 4;
18406 if (set_to_load_agen (dep_insn
, insn
))
18407 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
18417 if (set_to_load_agen (dep_insn
, insn
))
18425 if (get_attr_update (dep_insn
) == UPDATE_YES
18426 && set_to_load_agen (dep_insn
, insn
))
18432 if (set_to_load_agen (dep_insn
, insn
))
18438 if (set_to_load_agen (dep_insn
, insn
))
18439 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
18452 /* Fall out to return default cost. */
18456 case REG_DEP_OUTPUT
:
18457 /* Output dependency; DEP_INSN writes a register that INSN writes some
18459 if ((rs6000_tune
== PROCESSOR_POWER6
)
18460 && recog_memoized (dep_insn
)
18461 && (INSN_CODE (dep_insn
) >= 0))
18463 attr_type
= get_attr_type (insn
);
18468 case TYPE_FPSIMPLE
:
18469 if (get_attr_type (dep_insn
) == TYPE_FP
18470 || get_attr_type (dep_insn
) == TYPE_FPSIMPLE
)
18477 /* Fall through, no cost for output dependency. */
18481 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18486 gcc_unreachable ();
18492 /* Debug version of rs6000_adjust_cost. */
18495 rs6000_debug_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
18496 int cost
, unsigned int dw
)
18498 int ret
= rs6000_adjust_cost (insn
, dep_type
, dep_insn
, cost
, dw
);
18506 default: dep
= "unknown depencency"; break;
18507 case REG_DEP_TRUE
: dep
= "data dependency"; break;
18508 case REG_DEP_OUTPUT
: dep
= "output dependency"; break;
18509 case REG_DEP_ANTI
: dep
= "anti depencency"; break;
18513 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18514 "%s, insn:\n", ret
, cost
, dep
);
18522 /* The function returns a true if INSN is microcoded.
18523 Return false otherwise. */
18526 is_microcoded_insn (rtx_insn
*insn
)
18528 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18529 || GET_CODE (PATTERN (insn
)) == USE
18530 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18533 if (rs6000_tune
== PROCESSOR_CELL
)
18534 return get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
;
18536 if (rs6000_sched_groups
18537 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18539 enum attr_type type
= get_attr_type (insn
);
18540 if ((type
== TYPE_LOAD
18541 && get_attr_update (insn
) == UPDATE_YES
18542 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
18543 || ((type
== TYPE_LOAD
|| type
== TYPE_STORE
)
18544 && get_attr_update (insn
) == UPDATE_YES
18545 && get_attr_indexed (insn
) == INDEXED_YES
)
18546 || type
== TYPE_MFCR
)
18553 /* The function returns true if INSN is cracked into 2 instructions
18554 by the processor (and therefore occupies 2 issue slots). */
18557 is_cracked_insn (rtx_insn
*insn
)
18559 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18560 || GET_CODE (PATTERN (insn
)) == USE
18561 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18564 if (rs6000_sched_groups
18565 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18567 enum attr_type type
= get_attr_type (insn
);
18568 if ((type
== TYPE_LOAD
18569 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
18570 && get_attr_update (insn
) == UPDATE_NO
)
18571 || (type
== TYPE_LOAD
18572 && get_attr_sign_extend (insn
) == SIGN_EXTEND_NO
18573 && get_attr_update (insn
) == UPDATE_YES
18574 && get_attr_indexed (insn
) == INDEXED_NO
)
18575 || (type
== TYPE_STORE
18576 && get_attr_update (insn
) == UPDATE_YES
18577 && get_attr_indexed (insn
) == INDEXED_NO
)
18578 || ((type
== TYPE_FPLOAD
|| type
== TYPE_FPSTORE
)
18579 && get_attr_update (insn
) == UPDATE_YES
)
18580 || (type
== TYPE_CR_LOGICAL
18581 && get_attr_cr_logical_3op (insn
) == CR_LOGICAL_3OP_YES
)
18582 || (type
== TYPE_EXTS
18583 && get_attr_dot (insn
) == DOT_YES
)
18584 || (type
== TYPE_SHIFT
18585 && get_attr_dot (insn
) == DOT_YES
18586 && get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
18587 || (type
== TYPE_MUL
18588 && get_attr_dot (insn
) == DOT_YES
)
18589 || type
== TYPE_DIV
18590 || (type
== TYPE_INSERT
18591 && get_attr_size (insn
) == SIZE_32
))
18598 /* The function returns true if INSN can be issued only from
18599 the branch slot. */
18602 is_branch_slot_insn (rtx_insn
*insn
)
18604 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18605 || GET_CODE (PATTERN (insn
)) == USE
18606 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18609 if (rs6000_sched_groups
)
18611 enum attr_type type
= get_attr_type (insn
);
18612 if (type
== TYPE_BRANCH
|| type
== TYPE_JMPREG
)
18620 /* The function returns true if out_inst sets a value that is
18621 used in the address generation computation of in_insn */
18623 set_to_load_agen (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
18625 rtx out_set
, in_set
;
18627 /* For performance reasons, only handle the simple case where
18628 both loads are a single_set. */
18629 out_set
= single_set (out_insn
);
18632 in_set
= single_set (in_insn
);
18634 return reg_mentioned_p (SET_DEST (out_set
), SET_SRC (in_set
));
18640 /* Try to determine base/offset/size parts of the given MEM.
18641 Return true if successful, false if all the values couldn't
18644 This function only looks for REG or REG+CONST address forms.
18645 REG+REG address form will return false. */
18648 get_memref_parts (rtx mem
, rtx
*base
, HOST_WIDE_INT
*offset
,
18649 HOST_WIDE_INT
*size
)
18652 if (MEM_SIZE_KNOWN_P (mem
))
18653 *size
= MEM_SIZE (mem
);
18657 addr_rtx
= (XEXP (mem
, 0));
18658 if (GET_CODE (addr_rtx
) == PRE_MODIFY
)
18659 addr_rtx
= XEXP (addr_rtx
, 1);
18662 while (GET_CODE (addr_rtx
) == PLUS
18663 && CONST_INT_P (XEXP (addr_rtx
, 1)))
18665 *offset
+= INTVAL (XEXP (addr_rtx
, 1));
18666 addr_rtx
= XEXP (addr_rtx
, 0);
18668 if (!REG_P (addr_rtx
))
18675 /* If the target storage locations of arguments MEM1 and MEM2 are
18676 adjacent, then return the argument that has the lower address.
18677 Otherwise, return NULL_RTX. */
18680 adjacent_mem_locations (rtx mem1
, rtx mem2
)
18683 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18687 && get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18688 && get_memref_parts (mem2
, ®2
, &off2
, &size2
)
18689 && REGNO (reg1
) == REGNO (reg2
))
18691 if (off1
+ size1
== off2
)
18693 else if (off2
+ size2
== off1
)
18700 /* This function returns true if it can be determined that the two MEM
18701 locations overlap by at least 1 byte based on base reg/offset/size. */
18704 mem_locations_overlap (rtx mem1
, rtx mem2
)
18707 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18709 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18710 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
18711 return ((REGNO (reg1
) == REGNO (reg2
))
18712 && (((off1
<= off2
) && (off1
+ size1
> off2
))
18713 || ((off2
<= off1
) && (off2
+ size2
> off1
))));
18718 /* A C statement (sans semicolon) to update the integer scheduling
18719 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18720 INSN earlier, reduce the priority to execute INSN later. Do not
18721 define this macro if you do not need to adjust the scheduling
18722 priorities of insns. */
18725 rs6000_adjust_priority (rtx_insn
*insn ATTRIBUTE_UNUSED
, int priority
)
18727 rtx load_mem
, str_mem
;
18728 /* On machines (like the 750) which have asymmetric integer units,
18729 where one integer unit can do multiply and divides and the other
18730 can't, reduce the priority of multiply/divide so it is scheduled
18731 before other integer operations. */
18734 if (! INSN_P (insn
))
18737 if (GET_CODE (PATTERN (insn
)) == USE
)
18740 switch (rs6000_tune
) {
18741 case PROCESSOR_PPC750
:
18742 switch (get_attr_type (insn
))
18749 fprintf (stderr
, "priority was %#x (%d) before adjustment\n",
18750 priority
, priority
);
18751 if (priority
>= 0 && priority
< 0x01000000)
18758 if (insn_must_be_first_in_group (insn
)
18759 && reload_completed
18760 && current_sched_info
->sched_max_insns_priority
18761 && rs6000_sched_restricted_insns_priority
)
18764 /* Prioritize insns that can be dispatched only in the first
18766 if (rs6000_sched_restricted_insns_priority
== 1)
18767 /* Attach highest priority to insn. This means that in
18768 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18769 precede 'priority' (critical path) considerations. */
18770 return current_sched_info
->sched_max_insns_priority
;
18771 else if (rs6000_sched_restricted_insns_priority
== 2)
18772 /* Increase priority of insn by a minimal amount. This means that in
18773 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18774 considerations precede dispatch-slot restriction considerations. */
18775 return (priority
+ 1);
18778 if (rs6000_tune
== PROCESSOR_POWER6
18779 && ((load_store_pendulum
== -2 && is_load_insn (insn
, &load_mem
))
18780 || (load_store_pendulum
== 2 && is_store_insn (insn
, &str_mem
))))
18781 /* Attach highest priority to insn if the scheduler has just issued two
18782 stores and this instruction is a load, or two loads and this instruction
18783 is a store. Power6 wants loads and stores scheduled alternately
18785 return current_sched_info
->sched_max_insns_priority
;
18790 /* Return true if the instruction is nonpipelined on the Cell. */
18792 is_nonpipeline_insn (rtx_insn
*insn
)
18794 enum attr_type type
;
18795 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18796 || GET_CODE (PATTERN (insn
)) == USE
18797 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18800 type
= get_attr_type (insn
);
18801 if (type
== TYPE_MUL
18802 || type
== TYPE_DIV
18803 || type
== TYPE_SDIV
18804 || type
== TYPE_DDIV
18805 || type
== TYPE_SSQRT
18806 || type
== TYPE_DSQRT
18807 || type
== TYPE_MFCR
18808 || type
== TYPE_MFCRF
18809 || type
== TYPE_MFJMPR
)
18817 /* Return how many instructions the machine can issue per cycle. */
18820 rs6000_issue_rate (void)
18822 /* Unless scheduling for register pressure, use issue rate of 1 for
18823 first scheduling pass to decrease degradation. */
18824 if (!reload_completed
&& !flag_sched_pressure
)
18827 switch (rs6000_tune
) {
18828 case PROCESSOR_RS64A
:
18829 case PROCESSOR_PPC601
: /* ? */
18830 case PROCESSOR_PPC7450
:
18832 case PROCESSOR_PPC440
:
18833 case PROCESSOR_PPC603
:
18834 case PROCESSOR_PPC750
:
18835 case PROCESSOR_PPC7400
:
18836 case PROCESSOR_PPC8540
:
18837 case PROCESSOR_PPC8548
:
18838 case PROCESSOR_CELL
:
18839 case PROCESSOR_PPCE300C2
:
18840 case PROCESSOR_PPCE300C3
:
18841 case PROCESSOR_PPCE500MC
:
18842 case PROCESSOR_PPCE500MC64
:
18843 case PROCESSOR_PPCE5500
:
18844 case PROCESSOR_PPCE6500
:
18845 case PROCESSOR_TITAN
:
18847 case PROCESSOR_PPC476
:
18848 case PROCESSOR_PPC604
:
18849 case PROCESSOR_PPC604e
:
18850 case PROCESSOR_PPC620
:
18851 case PROCESSOR_PPC630
:
18853 case PROCESSOR_POWER4
:
18854 case PROCESSOR_POWER5
:
18855 case PROCESSOR_POWER6
:
18856 case PROCESSOR_POWER7
:
18858 case PROCESSOR_POWER8
:
18860 case PROCESSOR_POWER9
:
18862 case PROCESSOR_POWER10
:
18863 case PROCESSOR_POWER11
:
18870 /* Return how many instructions to look ahead for better insn
18874 rs6000_use_sched_lookahead (void)
18876 switch (rs6000_tune
)
18878 case PROCESSOR_PPC8540
:
18879 case PROCESSOR_PPC8548
:
18882 case PROCESSOR_CELL
:
18883 return (reload_completed
? 8 : 0);
18890 /* We are choosing insn from the ready queue. Return zero if INSN can be
18893 rs6000_use_sched_lookahead_guard (rtx_insn
*insn
, int ready_index
)
18895 if (ready_index
== 0)
18898 if (rs6000_tune
!= PROCESSOR_CELL
)
18901 gcc_assert (insn
!= NULL_RTX
&& INSN_P (insn
));
18903 if (!reload_completed
18904 || is_nonpipeline_insn (insn
)
18905 || is_microcoded_insn (insn
))
18911 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18912 and return true. */
18915 find_mem_ref (rtx pat
, rtx
*mem_ref
)
18920 /* stack_tie does not produce any real memory traffic. */
18921 if (tie_operand (pat
, VOIDmode
))
18930 /* Recursively process the pattern. */
18931 fmt
= GET_RTX_FORMAT (GET_CODE (pat
));
18933 for (i
= GET_RTX_LENGTH (GET_CODE (pat
)) - 1; i
>= 0; i
--)
18937 if (find_mem_ref (XEXP (pat
, i
), mem_ref
))
18940 else if (fmt
[i
] == 'E')
18941 for (j
= XVECLEN (pat
, i
) - 1; j
>= 0; j
--)
18943 if (find_mem_ref (XVECEXP (pat
, i
, j
), mem_ref
))
18951 /* Determine if PAT is a PATTERN of a load insn. */
18954 is_load_insn1 (rtx pat
, rtx
*load_mem
)
18956 if (!pat
|| pat
== NULL_RTX
)
18959 if (GET_CODE (pat
) == SET
)
18961 if (REG_P (SET_DEST (pat
)))
18962 return find_mem_ref (SET_SRC (pat
), load_mem
);
18967 if (GET_CODE (pat
) == PARALLEL
)
18971 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18972 if (is_load_insn1 (XVECEXP (pat
, 0, i
), load_mem
))
18979 /* Determine if INSN loads from memory. */
18982 is_load_insn (rtx insn
, rtx
*load_mem
)
18984 if (!insn
|| !INSN_P (insn
))
18990 return is_load_insn1 (PATTERN (insn
), load_mem
);
18993 /* Determine if PAT is a PATTERN of a store insn. */
18996 is_store_insn1 (rtx pat
, rtx
*str_mem
)
18998 if (!pat
|| pat
== NULL_RTX
)
19001 if (GET_CODE (pat
) == SET
)
19003 if (REG_P (SET_SRC (pat
)) || SUBREG_P (SET_SRC (pat
)))
19004 return find_mem_ref (SET_DEST (pat
), str_mem
);
19009 if (GET_CODE (pat
) == PARALLEL
)
19013 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
19014 if (is_store_insn1 (XVECEXP (pat
, 0, i
), str_mem
))
19021 /* Determine if INSN stores to memory. */
19024 is_store_insn (rtx insn
, rtx
*str_mem
)
19026 if (!insn
|| !INSN_P (insn
))
19029 return is_store_insn1 (PATTERN (insn
), str_mem
);
19032 /* Return whether TYPE is a Power9 pairable vector instruction type. */
19035 is_power9_pairable_vec_type (enum attr_type type
)
19039 case TYPE_VECSIMPLE
:
19040 case TYPE_VECCOMPLEX
:
19044 case TYPE_VECFLOAT
:
19046 case TYPE_VECDOUBLE
:
19054 /* Returns whether the dependence between INSN and NEXT is considered
19055 costly by the given target. */
19058 rs6000_is_costly_dependence (dep_t dep
, int cost
, int distance
)
19062 rtx load_mem
, str_mem
;
19064 /* If the flag is not enabled - no dependence is considered costly;
19065 allow all dependent insns in the same group.
19066 This is the most aggressive option. */
19067 if (rs6000_sched_costly_dep
== no_dep_costly
)
19070 /* If the flag is set to 1 - a dependence is always considered costly;
19071 do not allow dependent instructions in the same group.
19072 This is the most conservative option. */
19073 if (rs6000_sched_costly_dep
== all_deps_costly
)
19076 insn
= DEP_PRO (dep
);
19077 next
= DEP_CON (dep
);
19079 if (rs6000_sched_costly_dep
== store_to_load_dep_costly
19080 && is_load_insn (next
, &load_mem
)
19081 && is_store_insn (insn
, &str_mem
))
19082 /* Prevent load after store in the same group. */
19085 if (rs6000_sched_costly_dep
== true_store_to_load_dep_costly
19086 && is_load_insn (next
, &load_mem
)
19087 && is_store_insn (insn
, &str_mem
)
19088 && DEP_TYPE (dep
) == REG_DEP_TRUE
19089 && mem_locations_overlap(str_mem
, load_mem
))
19090 /* Prevent load after store in the same group if it is a true
19094 /* The flag is set to X; dependences with latency >= X are considered costly,
19095 and will not be scheduled in the same group. */
19096 if (rs6000_sched_costly_dep
<= max_dep_latency
19097 && ((cost
- distance
) >= (int)rs6000_sched_costly_dep
))
19103 /* Return the next insn after INSN that is found before TAIL is reached,
19104 skipping any "non-active" insns - insns that will not actually occupy
19105 an issue slot. Return NULL_RTX if such an insn is not found. */
19108 get_next_active_insn (rtx_insn
*insn
, rtx_insn
*tail
)
19110 if (insn
== NULL_RTX
|| insn
== tail
)
19115 insn
= NEXT_INSN (insn
);
19116 if (insn
== NULL_RTX
|| insn
== tail
)
19120 || JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
)
19121 || (NONJUMP_INSN_P (insn
)
19122 && GET_CODE (PATTERN (insn
)) != USE
19123 && GET_CODE (PATTERN (insn
)) != CLOBBER
19124 && INSN_CODE (insn
) != CODE_FOR_stack_tie
))
19130 /* Move instruction at POS to the end of the READY list. */
19133 move_to_end_of_ready (rtx_insn
**ready
, int pos
, int lastpos
)
19139 for (i
= pos
; i
< lastpos
; i
++)
19140 ready
[i
] = ready
[i
+ 1];
19141 ready
[lastpos
] = tmp
;
19144 /* Do Power6 specific sched_reorder2 reordering of ready list. */
19147 power6_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
19149 /* For Power6, we need to handle some special cases to try and keep the
19150 store queue from overflowing and triggering expensive flushes.
19152 This code monitors how load and store instructions are being issued
19153 and skews the ready list one way or the other to increase the likelihood
19154 that a desired instruction is issued at the proper time.
19156 A couple of things are done. First, we maintain a "load_store_pendulum"
19157 to track the current state of load/store issue.
19159 - If the pendulum is at zero, then no loads or stores have been
19160 issued in the current cycle so we do nothing.
19162 - If the pendulum is 1, then a single load has been issued in this
19163 cycle and we attempt to locate another load in the ready list to
19166 - If the pendulum is -2, then two stores have already been
19167 issued in this cycle, so we increase the priority of the first load
19168 in the ready list to increase it's likelihood of being chosen first
19171 - If the pendulum is -1, then a single store has been issued in this
19172 cycle and we attempt to locate another store in the ready list to
19173 issue with it, preferring a store to an adjacent memory location to
19174 facilitate store pairing in the store queue.
19176 - If the pendulum is 2, then two loads have already been
19177 issued in this cycle, so we increase the priority of the first store
19178 in the ready list to increase it's likelihood of being chosen first
19181 - If the pendulum < -2 or > 2, then do nothing.
19183 Note: This code covers the most common scenarios. There exist non
19184 load/store instructions which make use of the LSU and which
19185 would need to be accounted for to strictly model the behavior
19186 of the machine. Those instructions are currently unaccounted
19187 for to help minimize compile time overhead of this code.
19190 rtx load_mem
, str_mem
;
19192 if (is_store_insn (last_scheduled_insn
, &str_mem
))
19193 /* Issuing a store, swing the load_store_pendulum to the left */
19194 load_store_pendulum
--;
19195 else if (is_load_insn (last_scheduled_insn
, &load_mem
))
19196 /* Issuing a load, swing the load_store_pendulum to the right */
19197 load_store_pendulum
++;
19199 return cached_can_issue_more
;
19201 /* If the pendulum is balanced, or there is only one instruction on
19202 the ready list, then all is well, so return. */
19203 if ((load_store_pendulum
== 0) || (lastpos
<= 0))
19204 return cached_can_issue_more
;
19206 if (load_store_pendulum
== 1)
19208 /* A load has been issued in this cycle. Scan the ready list
19209 for another load to issue with it */
19214 if (is_load_insn (ready
[pos
], &load_mem
))
19216 /* Found a load. Move it to the head of the ready list,
19217 and adjust it's priority so that it is more likely to
19219 move_to_end_of_ready (ready
, pos
, lastpos
);
19221 if (!sel_sched_p ()
19222 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19223 INSN_PRIORITY (ready
[lastpos
])++;
19229 else if (load_store_pendulum
== -2)
19231 /* Two stores have been issued in this cycle. Increase the
19232 priority of the first load in the ready list to favor it for
19233 issuing in the next cycle. */
19238 if (is_load_insn (ready
[pos
], &load_mem
)
19240 && INSN_PRIORITY_KNOWN (ready
[pos
]))
19242 INSN_PRIORITY (ready
[pos
])++;
19244 /* Adjust the pendulum to account for the fact that a load
19245 was found and increased in priority. This is to prevent
19246 increasing the priority of multiple loads */
19247 load_store_pendulum
--;
19254 else if (load_store_pendulum
== -1)
19256 /* A store has been issued in this cycle. Scan the ready list for
19257 another store to issue with it, preferring a store to an adjacent
19259 int first_store_pos
= -1;
19265 if (is_store_insn (ready
[pos
], &str_mem
))
19268 /* Maintain the index of the first store found on the
19270 if (first_store_pos
== -1)
19271 first_store_pos
= pos
;
19273 if (is_store_insn (last_scheduled_insn
, &str_mem2
)
19274 && adjacent_mem_locations (str_mem
, str_mem2
))
19276 /* Found an adjacent store. Move it to the head of the
19277 ready list, and adjust it's priority so that it is
19278 more likely to stay there */
19279 move_to_end_of_ready (ready
, pos
, lastpos
);
19281 if (!sel_sched_p ()
19282 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19283 INSN_PRIORITY (ready
[lastpos
])++;
19285 first_store_pos
= -1;
19293 if (first_store_pos
>= 0)
19295 /* An adjacent store wasn't found, but a non-adjacent store was,
19296 so move the non-adjacent store to the front of the ready
19297 list, and adjust its priority so that it is more likely to
19299 move_to_end_of_ready (ready
, first_store_pos
, lastpos
);
19300 if (!sel_sched_p ()
19301 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19302 INSN_PRIORITY (ready
[lastpos
])++;
19305 else if (load_store_pendulum
== 2)
19307 /* Two loads have been issued in this cycle. Increase the priority
19308 of the first store in the ready list to favor it for issuing in
19314 if (is_store_insn (ready
[pos
], &str_mem
)
19316 && INSN_PRIORITY_KNOWN (ready
[pos
]))
19318 INSN_PRIORITY (ready
[pos
])++;
19320 /* Adjust the pendulum to account for the fact that a store
19321 was found and increased in priority. This is to prevent
19322 increasing the priority of multiple stores */
19323 load_store_pendulum
++;
19331 return cached_can_issue_more
;
19334 /* Do Power9 specific sched_reorder2 reordering of ready list. */
19337 power9_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
19340 enum attr_type type
, type2
;
19342 type
= get_attr_type (last_scheduled_insn
);
19344 /* Try to issue fixed point divides back-to-back in pairs so they will be
19345 routed to separate execution units and execute in parallel. */
19346 if (type
== TYPE_DIV
&& divide_cnt
== 0)
19348 /* First divide has been scheduled. */
19351 /* Scan the ready list looking for another divide, if found move it
19352 to the end of the list so it is chosen next. */
19356 if (recog_memoized (ready
[pos
]) >= 0
19357 && get_attr_type (ready
[pos
]) == TYPE_DIV
)
19359 move_to_end_of_ready (ready
, pos
, lastpos
);
19367 /* Last insn was the 2nd divide or not a divide, reset the counter. */
19370 /* The best dispatch throughput for vector and vector load insns can be
19371 achieved by interleaving a vector and vector load such that they'll
19372 dispatch to the same superslice. If this pairing cannot be achieved
19373 then it is best to pair vector insns together and vector load insns
19376 To aid in this pairing, vec_pairing maintains the current state with
19377 the following values:
19379 0 : Initial state, no vecload/vector pairing has been started.
19381 1 : A vecload or vector insn has been issued and a candidate for
19382 pairing has been found and moved to the end of the ready
19384 if (type
== TYPE_VECLOAD
)
19386 /* Issued a vecload. */
19387 if (vec_pairing
== 0)
19389 int vecload_pos
= -1;
19390 /* We issued a single vecload, look for a vector insn to pair it
19391 with. If one isn't found, try to pair another vecload. */
19395 if (recog_memoized (ready
[pos
]) >= 0)
19397 type2
= get_attr_type (ready
[pos
]);
19398 if (is_power9_pairable_vec_type (type2
))
19400 /* Found a vector insn to pair with, move it to the
19401 end of the ready list so it is scheduled next. */
19402 move_to_end_of_ready (ready
, pos
, lastpos
);
19404 return cached_can_issue_more
;
19406 else if (type2
== TYPE_VECLOAD
&& vecload_pos
== -1)
19407 /* Remember position of first vecload seen. */
19412 if (vecload_pos
>= 0)
19414 /* Didn't find a vector to pair with but did find a vecload,
19415 move it to the end of the ready list. */
19416 move_to_end_of_ready (ready
, vecload_pos
, lastpos
);
19418 return cached_can_issue_more
;
19422 else if (is_power9_pairable_vec_type (type
))
19424 /* Issued a vector operation. */
19425 if (vec_pairing
== 0)
19428 /* We issued a single vector insn, look for a vecload to pair it
19429 with. If one isn't found, try to pair another vector. */
19433 if (recog_memoized (ready
[pos
]) >= 0)
19435 type2
= get_attr_type (ready
[pos
]);
19436 if (type2
== TYPE_VECLOAD
)
19438 /* Found a vecload insn to pair with, move it to the
19439 end of the ready list so it is scheduled next. */
19440 move_to_end_of_ready (ready
, pos
, lastpos
);
19442 return cached_can_issue_more
;
19444 else if (is_power9_pairable_vec_type (type2
)
19446 /* Remember position of first vector insn seen. */
19453 /* Didn't find a vecload to pair with but did find a vector
19454 insn, move it to the end of the ready list. */
19455 move_to_end_of_ready (ready
, vec_pos
, lastpos
);
19457 return cached_can_issue_more
;
19462 /* We've either finished a vec/vecload pair, couldn't find an insn to
19463 continue the current pair, or the last insn had nothing to do with
19464 with pairing. In any case, reset the state. */
19468 return cached_can_issue_more
;
19471 /* Determine if INSN is a store to memory that can be fused with a similar
19475 is_fusable_store (rtx_insn
*insn
, rtx
*str_mem
)
19477 /* Insn must be a non-prefixed base+disp form store. */
19478 if (is_store_insn (insn
, str_mem
)
19479 && get_attr_prefixed (insn
) == PREFIXED_NO
19480 && get_attr_update (insn
) == UPDATE_NO
19481 && get_attr_indexed (insn
) == INDEXED_NO
)
19483 /* Further restrictions by mode and size. */
19484 if (!MEM_SIZE_KNOWN_P (*str_mem
))
19487 machine_mode mode
= GET_MODE (*str_mem
);
19488 HOST_WIDE_INT size
= MEM_SIZE (*str_mem
);
19490 if (INTEGRAL_MODE_P (mode
))
19491 /* Must be word or dword size. */
19492 return (size
== 4 || size
== 8);
19493 else if (FLOAT_MODE_P (mode
))
19494 /* Must be dword size. */
19495 return (size
== 8);
19501 /* Do Power10 specific reordering of the ready list. */
19504 power10_sched_reorder (rtx_insn
**ready
, int lastpos
)
19508 /* Do store fusion during sched2 only. */
19509 if (!reload_completed
)
19510 return cached_can_issue_more
;
19512 /* If the prior insn finished off a store fusion pair then simply
19513 reset the counter and return, nothing more to do. */
19514 if (load_store_pendulum
!= 0)
19516 load_store_pendulum
= 0;
19517 return cached_can_issue_more
;
19520 /* Try to pair certain store insns to adjacent memory locations
19521 so that the hardware will fuse them to a single operation. */
19522 if (TARGET_P10_FUSION
&& is_fusable_store (last_scheduled_insn
, &mem1
))
19525 /* A fusable store was just scheduled. Scan the ready list for another
19526 store that it can fuse with. */
19531 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19532 must be ascending only. */
19533 if (is_fusable_store (ready
[pos
], &mem2
)
19534 && ((INTEGRAL_MODE_P (GET_MODE (mem1
))
19535 && adjacent_mem_locations (mem1
, mem2
))
19536 || (FLOAT_MODE_P (GET_MODE (mem1
))
19537 && (adjacent_mem_locations (mem1
, mem2
) == mem1
))))
19539 /* Found a fusable store. Move it to the end of the ready list
19540 so it is scheduled next. */
19541 move_to_end_of_ready (ready
, pos
, lastpos
);
19543 load_store_pendulum
= -1;
19550 return cached_can_issue_more
;
19553 /* We are about to begin issuing insns for this clock cycle. */
19556 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
, int sched_verbose
,
19557 rtx_insn
**ready ATTRIBUTE_UNUSED
,
19558 int *pn_ready ATTRIBUTE_UNUSED
,
19559 int clock_var ATTRIBUTE_UNUSED
)
19561 int n_ready
= *pn_ready
;
19564 fprintf (dump
, "// rs6000_sched_reorder :\n");
19566 /* Reorder the ready list, if the second to last ready insn
19567 is a nonepipeline insn. */
19568 if (rs6000_tune
== PROCESSOR_CELL
&& n_ready
> 1)
19570 if (is_nonpipeline_insn (ready
[n_ready
- 1])
19571 && (recog_memoized (ready
[n_ready
- 2]) > 0))
19572 /* Simply swap first two insns. */
19573 std::swap (ready
[n_ready
- 1], ready
[n_ready
- 2]);
19576 if (rs6000_tune
== PROCESSOR_POWER6
)
19577 load_store_pendulum
= 0;
19579 /* Do Power10/Power11 dependent reordering. */
19580 if (last_scheduled_insn
19581 && (rs6000_tune
== PROCESSOR_POWER10
19582 || rs6000_tune
== PROCESSOR_POWER11
))
19583 power10_sched_reorder (ready
, n_ready
- 1);
19585 return rs6000_issue_rate ();
19588 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19591 rs6000_sched_reorder2 (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
19592 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
)
19595 fprintf (dump
, "// rs6000_sched_reorder2 :\n");
19597 /* Do Power6 dependent reordering if necessary. */
19598 if (rs6000_tune
== PROCESSOR_POWER6
&& last_scheduled_insn
)
19599 return power6_sched_reorder2 (ready
, *pn_ready
- 1);
19601 /* Do Power9 dependent reordering if necessary. */
19602 if (rs6000_tune
== PROCESSOR_POWER9
&& last_scheduled_insn
19603 && recog_memoized (last_scheduled_insn
) >= 0)
19604 return power9_sched_reorder2 (ready
, *pn_ready
- 1);
19606 /* Do Power10/Power11 dependent reordering. */
19607 if (last_scheduled_insn
19608 && (rs6000_tune
== PROCESSOR_POWER10
19609 || rs6000_tune
== PROCESSOR_POWER11
))
19610 return power10_sched_reorder (ready
, *pn_ready
- 1);
19612 return cached_can_issue_more
;
19615 /* Return whether the presence of INSN causes a dispatch group termination
19616 of group WHICH_GROUP.
19618 If WHICH_GROUP == current_group, this function will return true if INSN
19619 causes the termination of the current group (i.e, the dispatch group to
19620 which INSN belongs). This means that INSN will be the last insn in the
19621 group it belongs to.
19623 If WHICH_GROUP == previous_group, this function will return true if INSN
19624 causes the termination of the previous group (i.e, the dispatch group that
19625 precedes the group to which INSN belongs). This means that INSN will be
19626 the first insn in the group it belongs to). */
19629 insn_terminates_group_p (rtx_insn
*insn
, enum group_termination which_group
)
19636 first
= insn_must_be_first_in_group (insn
);
19637 last
= insn_must_be_last_in_group (insn
);
19642 if (which_group
== current_group
)
19644 else if (which_group
== previous_group
)
19652 insn_must_be_first_in_group (rtx_insn
*insn
)
19654 enum attr_type type
;
19658 || DEBUG_INSN_P (insn
)
19659 || GET_CODE (PATTERN (insn
)) == USE
19660 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19663 switch (rs6000_tune
)
19665 case PROCESSOR_POWER5
:
19666 if (is_cracked_insn (insn
))
19669 case PROCESSOR_POWER4
:
19670 if (is_microcoded_insn (insn
))
19673 if (!rs6000_sched_groups
)
19676 type
= get_attr_type (insn
);
19683 case TYPE_CR_LOGICAL
:
19696 case PROCESSOR_POWER6
:
19697 type
= get_attr_type (insn
);
19706 case TYPE_FPCOMPARE
:
19717 if (get_attr_dot (insn
) == DOT_NO
19718 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19723 if (get_attr_size (insn
) == SIZE_32
)
19731 if (get_attr_update (insn
) == UPDATE_YES
)
19739 case PROCESSOR_POWER7
:
19740 type
= get_attr_type (insn
);
19744 case TYPE_CR_LOGICAL
:
19758 if (get_attr_dot (insn
) == DOT_YES
)
19763 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19764 || get_attr_update (insn
) == UPDATE_YES
)
19771 if (get_attr_update (insn
) == UPDATE_YES
)
19779 case PROCESSOR_POWER8
:
19780 type
= get_attr_type (insn
);
19784 case TYPE_CR_LOGICAL
:
19792 case TYPE_VECSTORE
:
19799 if (get_attr_dot (insn
) == DOT_YES
)
19804 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19805 || get_attr_update (insn
) == UPDATE_YES
)
19810 if (get_attr_update (insn
) == UPDATE_YES
19811 && get_attr_indexed (insn
) == INDEXED_YES
)
19827 insn_must_be_last_in_group (rtx_insn
*insn
)
19829 enum attr_type type
;
19833 || DEBUG_INSN_P (insn
)
19834 || GET_CODE (PATTERN (insn
)) == USE
19835 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19838 switch (rs6000_tune
) {
19839 case PROCESSOR_POWER4
:
19840 case PROCESSOR_POWER5
:
19841 if (is_microcoded_insn (insn
))
19844 if (is_branch_slot_insn (insn
))
19848 case PROCESSOR_POWER6
:
19849 type
= get_attr_type (insn
);
19857 case TYPE_FPCOMPARE
:
19868 if (get_attr_dot (insn
) == DOT_NO
19869 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19874 if (get_attr_size (insn
) == SIZE_32
)
19882 case PROCESSOR_POWER7
:
19883 type
= get_attr_type (insn
);
19893 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19894 && get_attr_update (insn
) == UPDATE_YES
)
19899 if (get_attr_update (insn
) == UPDATE_YES
19900 && get_attr_indexed (insn
) == INDEXED_YES
)
19908 case PROCESSOR_POWER8
:
19909 type
= get_attr_type (insn
);
19921 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19922 && get_attr_update (insn
) == UPDATE_YES
)
19927 if (get_attr_update (insn
) == UPDATE_YES
19928 && get_attr_indexed (insn
) == INDEXED_YES
)
19943 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19944 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19947 is_costly_group (rtx
*group_insns
, rtx next_insn
)
19950 int issue_rate
= rs6000_issue_rate ();
19952 for (i
= 0; i
< issue_rate
; i
++)
19954 sd_iterator_def sd_it
;
19956 rtx insn
= group_insns
[i
];
19961 FOR_EACH_DEP (insn
, SD_LIST_RES_FORW
, sd_it
, dep
)
19963 rtx next
= DEP_CON (dep
);
19965 if (next
== next_insn
19966 && rs6000_is_costly_dependence (dep
, dep_cost (dep
), 0))
19974 /* Utility of the function redefine_groups.
19975 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19976 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19977 to keep it "far" (in a separate group) from GROUP_INSNS, following
19978 one of the following schemes, depending on the value of the flag
19979 -minsert_sched_nops = X:
19980 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19981 in order to force NEXT_INSN into a separate group.
19982 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19983 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19984 insertion (has a group just ended, how many vacant issue slots remain in the
19985 last group, and how many dispatch groups were encountered so far). */
19988 force_new_group (int sched_verbose
, FILE *dump
, rtx
*group_insns
,
19989 rtx_insn
*next_insn
, bool *group_end
, int can_issue_more
,
19994 int issue_rate
= rs6000_issue_rate ();
19995 bool end
= *group_end
;
19998 if (next_insn
== NULL_RTX
|| DEBUG_INSN_P (next_insn
))
19999 return can_issue_more
;
20001 if (rs6000_sched_insert_nops
> sched_finish_regroup_exact
)
20002 return can_issue_more
;
20004 force
= is_costly_group (group_insns
, next_insn
);
20006 return can_issue_more
;
20008 if (sched_verbose
> 6)
20009 fprintf (dump
,"force: group count = %d, can_issue_more = %d\n",
20010 *group_count
,can_issue_more
);
20012 if (rs6000_sched_insert_nops
== sched_finish_regroup_exact
)
20015 can_issue_more
= 0;
20017 /* Since only a branch can be issued in the last issue_slot, it is
20018 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
20019 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
20020 in this case the last nop will start a new group and the branch
20021 will be forced to the new group. */
20022 if (can_issue_more
&& !is_branch_slot_insn (next_insn
))
20025 /* Do we have a special group ending nop? */
20026 if (rs6000_tune
== PROCESSOR_POWER6
|| rs6000_tune
== PROCESSOR_POWER7
20027 || rs6000_tune
== PROCESSOR_POWER8
)
20029 nop
= gen_group_ending_nop ();
20030 emit_insn_before (nop
, next_insn
);
20031 can_issue_more
= 0;
20034 while (can_issue_more
> 0)
20037 emit_insn_before (nop
, next_insn
);
20045 if (rs6000_sched_insert_nops
< sched_finish_regroup_exact
)
20047 int n_nops
= rs6000_sched_insert_nops
;
20049 /* Nops can't be issued from the branch slot, so the effective
20050 issue_rate for nops is 'issue_rate - 1'. */
20051 if (can_issue_more
== 0)
20052 can_issue_more
= issue_rate
;
20054 if (can_issue_more
== 0)
20056 can_issue_more
= issue_rate
- 1;
20059 for (i
= 0; i
< issue_rate
; i
++)
20061 group_insns
[i
] = 0;
20068 emit_insn_before (nop
, next_insn
);
20069 if (can_issue_more
== issue_rate
- 1) /* new group begins */
20072 if (can_issue_more
== 0)
20074 can_issue_more
= issue_rate
- 1;
20077 for (i
= 0; i
< issue_rate
; i
++)
20079 group_insns
[i
] = 0;
20085 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
20088 /* Is next_insn going to start a new group? */
20091 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
20092 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
20093 || (can_issue_more
< issue_rate
&&
20094 insn_terminates_group_p (next_insn
, previous_group
)));
20095 if (*group_end
&& end
)
20098 if (sched_verbose
> 6)
20099 fprintf (dump
, "done force: group count = %d, can_issue_more = %d\n",
20100 *group_count
, can_issue_more
);
20101 return can_issue_more
;
20104 return can_issue_more
;
20107 /* This function tries to synch the dispatch groups that the compiler "sees"
20108 with the dispatch groups that the processor dispatcher is expected to
20109 form in practice. It tries to achieve this synchronization by forcing the
20110 estimated processor grouping on the compiler (as opposed to the function
20111 'pad_goups' which tries to force the scheduler's grouping on the processor).
20113 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
20114 examines the (estimated) dispatch groups that will be formed by the processor
20115 dispatcher. It marks these group boundaries to reflect the estimated
20116 processor grouping, overriding the grouping that the scheduler had marked.
20117 Depending on the value of the flag '-minsert-sched-nops' this function can
20118 force certain insns into separate groups or force a certain distance between
20119 them by inserting nops, for example, if there exists a "costly dependence"
20122 The function estimates the group boundaries that the processor will form as
20123 follows: It keeps track of how many vacant issue slots are available after
20124 each insn. A subsequent insn will start a new group if one of the following
20126 - no more vacant issue slots remain in the current dispatch group.
20127 - only the last issue slot, which is the branch slot, is vacant, but the next
20128 insn is not a branch.
20129 - only the last 2 or less issue slots, including the branch slot, are vacant,
20130 which means that a cracked insn (which occupies two issue slots) can't be
20131 issued in this group.
20132 - less than 'issue_rate' slots are vacant, and the next insn always needs to
20133 start a new group. */
20136 redefine_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
20139 rtx_insn
*insn
, *next_insn
;
20141 int can_issue_more
;
20144 int group_count
= 0;
20148 issue_rate
= rs6000_issue_rate ();
20149 group_insns
= XALLOCAVEC (rtx
, issue_rate
);
20150 for (i
= 0; i
< issue_rate
; i
++)
20152 group_insns
[i
] = 0;
20154 can_issue_more
= issue_rate
;
20156 insn
= get_next_active_insn (prev_head_insn
, tail
);
20159 while (insn
!= NULL_RTX
)
20161 slot
= (issue_rate
- can_issue_more
);
20162 group_insns
[slot
] = insn
;
20164 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
20165 if (insn_terminates_group_p (insn
, current_group
))
20166 can_issue_more
= 0;
20168 next_insn
= get_next_active_insn (insn
, tail
);
20169 if (next_insn
== NULL_RTX
)
20170 return group_count
+ 1;
20172 /* Is next_insn going to start a new group? */
20174 = (can_issue_more
== 0
20175 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
20176 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
20177 || (can_issue_more
< issue_rate
&&
20178 insn_terminates_group_p (next_insn
, previous_group
)));
20180 can_issue_more
= force_new_group (sched_verbose
, dump
, group_insns
,
20181 next_insn
, &group_end
, can_issue_more
,
20187 can_issue_more
= 0;
20188 for (i
= 0; i
< issue_rate
; i
++)
20190 group_insns
[i
] = 0;
20194 if (GET_MODE (next_insn
) == TImode
&& can_issue_more
)
20195 PUT_MODE (next_insn
, VOIDmode
);
20196 else if (!can_issue_more
&& GET_MODE (next_insn
) != TImode
)
20197 PUT_MODE (next_insn
, TImode
);
20200 if (can_issue_more
== 0)
20201 can_issue_more
= issue_rate
;
20204 return group_count
;
20207 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
20208 dispatch group boundaries that the scheduler had marked. Pad with nops
20209 any dispatch groups which have vacant issue slots, in order to force the
20210 scheduler's grouping on the processor dispatcher. The function
20211 returns the number of dispatch groups found. */
20214 pad_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
20217 rtx_insn
*insn
, *next_insn
;
20220 int can_issue_more
;
20222 int group_count
= 0;
20224 /* Initialize issue_rate. */
20225 issue_rate
= rs6000_issue_rate ();
20226 can_issue_more
= issue_rate
;
20228 insn
= get_next_active_insn (prev_head_insn
, tail
);
20229 next_insn
= get_next_active_insn (insn
, tail
);
20231 while (insn
!= NULL_RTX
)
20234 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
20236 group_end
= (next_insn
== NULL_RTX
|| GET_MODE (next_insn
) == TImode
);
20238 if (next_insn
== NULL_RTX
)
20243 /* If the scheduler had marked group termination at this location
20244 (between insn and next_insn), and neither insn nor next_insn will
20245 force group termination, pad the group with nops to force group
20248 && (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
20249 && !insn_terminates_group_p (insn
, current_group
)
20250 && !insn_terminates_group_p (next_insn
, previous_group
))
20252 if (!is_branch_slot_insn (next_insn
))
20255 while (can_issue_more
)
20258 emit_insn_before (nop
, next_insn
);
20263 can_issue_more
= issue_rate
;
20268 next_insn
= get_next_active_insn (insn
, tail
);
20271 return group_count
;
20274 /* We're beginning a new block. Initialize data structures as necessary. */
20277 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
20278 int sched_verbose ATTRIBUTE_UNUSED
,
20279 int max_ready ATTRIBUTE_UNUSED
)
20281 last_scheduled_insn
= NULL
;
20282 load_store_pendulum
= 0;
20287 /* The following function is called at the end of scheduling BB.
20288 After reload, it inserts nops at insn group bundling. */
20291 rs6000_sched_finish (FILE *dump
, int sched_verbose
)
20296 fprintf (dump
, "=== Finishing schedule.\n");
20298 if (reload_completed
&& rs6000_sched_groups
)
20300 /* Do not run sched_finish hook when selective scheduling enabled. */
20301 if (sel_sched_p ())
20304 if (rs6000_sched_insert_nops
== sched_finish_none
)
20307 if (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
20308 n_groups
= pad_groups (dump
, sched_verbose
,
20309 current_sched_info
->prev_head
,
20310 current_sched_info
->next_tail
);
20312 n_groups
= redefine_groups (dump
, sched_verbose
,
20313 current_sched_info
->prev_head
,
20314 current_sched_info
->next_tail
);
20316 if (sched_verbose
>= 6)
20318 fprintf (dump
, "ngroups = %d\n", n_groups
);
20319 print_rtl (dump
, current_sched_info
->prev_head
);
20320 fprintf (dump
, "Done finish_sched\n");
20325 struct rs6000_sched_context
20327 short cached_can_issue_more
;
20328 rtx_insn
*last_scheduled_insn
;
20329 int load_store_pendulum
;
20334 typedef struct rs6000_sched_context rs6000_sched_context_def
;
20335 typedef rs6000_sched_context_def
*rs6000_sched_context_t
;
20337 /* Allocate store for new scheduling context. */
20339 rs6000_alloc_sched_context (void)
20341 return xmalloc (sizeof (rs6000_sched_context_def
));
20344 /* If CLEAN_P is true then initializes _SC with clean data,
20345 and from the global context otherwise. */
20347 rs6000_init_sched_context (void *_sc
, bool clean_p
)
20349 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
20353 sc
->cached_can_issue_more
= 0;
20354 sc
->last_scheduled_insn
= NULL
;
20355 sc
->load_store_pendulum
= 0;
20356 sc
->divide_cnt
= 0;
20357 sc
->vec_pairing
= 0;
20361 sc
->cached_can_issue_more
= cached_can_issue_more
;
20362 sc
->last_scheduled_insn
= last_scheduled_insn
;
20363 sc
->load_store_pendulum
= load_store_pendulum
;
20364 sc
->divide_cnt
= divide_cnt
;
20365 sc
->vec_pairing
= vec_pairing
;
20369 /* Sets the global scheduling context to the one pointed to by _SC. */
20371 rs6000_set_sched_context (void *_sc
)
20373 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
20375 gcc_assert (sc
!= NULL
);
20377 cached_can_issue_more
= sc
->cached_can_issue_more
;
20378 last_scheduled_insn
= sc
->last_scheduled_insn
;
20379 load_store_pendulum
= sc
->load_store_pendulum
;
20380 divide_cnt
= sc
->divide_cnt
;
20381 vec_pairing
= sc
->vec_pairing
;
20386 rs6000_free_sched_context (void *_sc
)
20388 gcc_assert (_sc
!= NULL
);
20394 rs6000_sched_can_speculate_insn (rtx_insn
*insn
)
20396 switch (get_attr_type (insn
))
20411 /* Length in units of the trampoline for entering a nested function. */
20414 rs6000_trampoline_size (void)
20418 switch (DEFAULT_ABI
)
20421 gcc_unreachable ();
20424 ret
= (TARGET_32BIT
) ? 12 : 24;
20428 gcc_assert (!TARGET_32BIT
);
20434 ret
= (TARGET_32BIT
) ? 40 : 48;
20441 /* Emit RTL insns to initialize the variable parts of a trampoline.
20442 FNADDR is an RTX for the address of the function's pure code.
20443 CXT is an RTX for the static chain value for the function. */
20446 rs6000_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
20448 int regsize
= (TARGET_32BIT
) ? 4 : 8;
20449 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
20450 rtx ctx_reg
= force_reg (Pmode
, cxt
);
20451 rtx addr
= force_reg (Pmode
, XEXP (m_tramp
, 0));
20453 switch (DEFAULT_ABI
)
20456 gcc_unreachable ();
20458 /* Under AIX, just build the 3 word function descriptor */
20461 rtx fnmem
, fn_reg
, toc_reg
;
20463 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS
)
20464 error ("you cannot take the address of a nested function if you use "
20465 "the %qs option", "-mno-pointers-to-nested-functions");
20467 fnmem
= gen_const_mem (Pmode
, force_reg (Pmode
, fnaddr
));
20468 fn_reg
= gen_reg_rtx (Pmode
);
20469 toc_reg
= gen_reg_rtx (Pmode
);
20471 /* Macro to shorten the code expansions below. */
20472 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20474 m_tramp
= replace_equiv_address (m_tramp
, addr
);
20476 emit_move_insn (fn_reg
, MEM_PLUS (fnmem
, 0));
20477 emit_move_insn (toc_reg
, MEM_PLUS (fnmem
, regsize
));
20478 emit_move_insn (MEM_PLUS (m_tramp
, 0), fn_reg
);
20479 emit_move_insn (MEM_PLUS (m_tramp
, regsize
), toc_reg
);
20480 emit_move_insn (MEM_PLUS (m_tramp
, 2*regsize
), ctx_reg
);
20486 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20490 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__trampoline_setup"),
20491 LCT_NORMAL
, VOIDmode
,
20493 GEN_INT (rs6000_trampoline_size ()), SImode
,
20501 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20502 identifier as an argument, so the front end shouldn't look it up. */
20505 rs6000_attribute_takes_identifier_p (const_tree attr_id
)
20507 return is_attribute_p ("altivec", attr_id
);
20510 /* Handle the "altivec" attribute. The attribute may have
20511 arguments as follows:
20513 __attribute__((altivec(vector__)))
20514 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20515 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20517 and may appear more than once (e.g., 'vector bool char') in a
20518 given declaration. */
20521 rs6000_handle_altivec_attribute (tree
*node
,
20522 tree name ATTRIBUTE_UNUSED
,
20524 int flags ATTRIBUTE_UNUSED
,
20525 bool *no_add_attrs
)
20527 tree type
= *node
, result
= NULL_TREE
;
20531 = ((args
&& TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
)
20532 && TREE_CODE (TREE_VALUE (args
)) == IDENTIFIER_NODE
)
20533 ? *IDENTIFIER_POINTER (TREE_VALUE (args
))
20536 while (POINTER_TYPE_P (type
)
20537 || TREE_CODE (type
) == FUNCTION_TYPE
20538 || TREE_CODE (type
) == METHOD_TYPE
20539 || TREE_CODE (type
) == ARRAY_TYPE
)
20540 type
= TREE_TYPE (type
);
20542 mode
= TYPE_MODE (type
);
20544 /* Check for invalid AltiVec type qualifiers. */
20545 if (type
== long_double_type_node
)
20546 error ("use of %<long double%> in AltiVec types is invalid");
20547 else if (type
== boolean_type_node
)
20548 error ("use of boolean types in AltiVec types is invalid");
20549 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
20550 error ("use of %<complex%> in AltiVec types is invalid");
20551 else if (DECIMAL_FLOAT_MODE_P (mode
))
20552 error ("use of decimal floating-point types in AltiVec types is invalid");
20553 else if (!TARGET_VSX
)
20555 if (type
== long_unsigned_type_node
|| type
== long_integer_type_node
)
20558 error ("use of %<long%> in AltiVec types is invalid for "
20559 "64-bit code without %qs", "-mvsx");
20560 else if (rs6000_warn_altivec_long
)
20561 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20564 else if (type
== long_long_unsigned_type_node
20565 || type
== long_long_integer_type_node
)
20566 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20568 else if (type
== double_type_node
)
20569 error ("use of %<double%> in AltiVec types is invalid without %qs",
20573 switch (altivec_type
)
20576 unsigned_p
= TYPE_UNSIGNED (type
);
20580 result
= (unsigned_p
? unsigned_V1TI_type_node
: V1TI_type_node
);
20583 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
20586 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
20589 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
20592 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
20594 case E_SFmode
: result
= V4SF_type_node
; break;
20595 case E_DFmode
: result
= V2DF_type_node
; break;
20596 /* If the user says 'vector int bool', we may be handed the 'bool'
20597 attribute _before_ the 'vector' attribute, and so select the
20598 proper type in the 'b' case below. */
20599 case E_V4SImode
: case E_V8HImode
: case E_V16QImode
: case E_V4SFmode
:
20600 case E_V2DImode
: case E_V2DFmode
:
20608 case E_TImode
: case E_V1TImode
: result
= bool_V1TI_type_node
; break;
20609 case E_DImode
: case E_V2DImode
: result
= bool_V2DI_type_node
; break;
20610 case E_SImode
: case E_V4SImode
: result
= bool_V4SI_type_node
; break;
20611 case E_HImode
: case E_V8HImode
: result
= bool_V8HI_type_node
; break;
20612 case E_QImode
: case E_V16QImode
: result
= bool_V16QI_type_node
;
20619 case E_V8HImode
: result
= pixel_V8HI_type_node
;
20625 /* Propagate qualifiers attached to the element type
20626 onto the vector type. */
20627 if (result
&& result
!= type
&& TYPE_QUALS (type
))
20628 result
= build_qualified_type (result
, TYPE_QUALS (type
));
20630 *no_add_attrs
= true; /* No need to hang on to the attribute. */
20633 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
20638 /* AltiVec defines five built-in scalar types that serve as vector
20639 elements; we must teach the compiler how to mangle them. The 128-bit
20640 floating point mangling is target-specific as well. MMA defines
20641 two built-in types to be used as opaque vector types. */
20643 static const char *
20644 rs6000_mangle_type (const_tree type
)
20646 type
= TYPE_MAIN_VARIANT (type
);
20648 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
20649 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
20650 && TREE_CODE (type
) != OPAQUE_TYPE
)
20653 if (type
== bool_char_type_node
) return "U6__boolc";
20654 if (type
== bool_short_type_node
) return "U6__bools";
20655 if (type
== pixel_type_node
) return "u7__pixel";
20656 if (type
== bool_int_type_node
) return "U6__booli";
20657 if (type
== bool_long_long_type_node
) return "U6__boolx";
20659 if (type
== float128_type_node
|| type
== float64x_type_node
)
20662 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IBM_P (TYPE_MODE (type
)))
20664 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IEEE_P (TYPE_MODE (type
)))
20665 return "u9__ieee128";
20667 if (type
== vector_pair_type_node
)
20668 return "u13__vector_pair";
20669 if (type
== vector_quad_type_node
)
20670 return "u13__vector_quad";
20672 /* For all other types, use the default mangling. */
20676 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20677 struct attribute_spec.handler. */
20680 rs6000_handle_longcall_attribute (tree
*node
, tree name
,
20681 tree args ATTRIBUTE_UNUSED
,
20682 int flags ATTRIBUTE_UNUSED
,
20683 bool *no_add_attrs
)
20685 if (TREE_CODE (*node
) != FUNCTION_TYPE
20686 && TREE_CODE (*node
) != FIELD_DECL
20687 && TREE_CODE (*node
) != TYPE_DECL
)
20689 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
20691 *no_add_attrs
= true;
20697 /* Set longcall attributes on all functions declared when
20698 rs6000_default_long_calls is true. */
20700 rs6000_set_default_type_attributes (tree type
)
20702 if (rs6000_default_long_calls
20703 && FUNC_OR_METHOD_TYPE_P (type
))
20704 TYPE_ATTRIBUTES (type
) = tree_cons (get_identifier ("longcall"),
20706 TYPE_ATTRIBUTES (type
));
20709 darwin_set_default_type_attributes (type
);
20713 /* Return a reference suitable for calling a function with the
20714 longcall attribute. */
20717 rs6000_longcall_ref (rtx call_ref
, rtx arg
)
20719 /* System V adds '.' to the internal name, so skip them. */
20720 const char *call_name
= XSTR (call_ref
, 0);
20721 if (*call_name
== '.')
20723 while (*call_name
== '.')
20726 tree node
= get_identifier (call_name
);
20727 call_ref
= gen_rtx_SYMBOL_REF (VOIDmode
, IDENTIFIER_POINTER (node
));
20732 rtx base
= const0_rtx
;
20734 if (rs6000_pcrel_p ())
20736 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20737 rtx u
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20738 gen_rtvec (3, base
, call_ref
, arg
),
20739 UNSPECV_PLT_PCREL
);
20740 emit_insn (gen_rtx_SET (reg
, u
));
20744 if (DEFAULT_ABI
== ABI_ELFv2
)
20745 base
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
20749 base
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
20752 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20753 may be used by a function global entry point. For SysV4, r11
20754 is used by __glink_PLTresolve lazy resolver entry. */
20755 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20756 rtx hi
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (3, base
, call_ref
, arg
),
20758 rtx lo
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20759 gen_rtvec (3, reg
, call_ref
, arg
),
20761 emit_insn (gen_rtx_SET (reg
, hi
));
20762 emit_insn (gen_rtx_SET (reg
, lo
));
20766 return force_reg (Pmode
, call_ref
);
20769 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20770 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20773 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20774 struct attribute_spec.handler. */
20776 rs6000_handle_struct_attribute (tree
*node
, tree name
,
20777 tree args ATTRIBUTE_UNUSED
,
20778 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
20781 if (DECL_P (*node
))
20783 if (TREE_CODE (*node
) == TYPE_DECL
)
20784 type
= &TREE_TYPE (*node
);
20789 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
20790 || TREE_CODE (*type
) == UNION_TYPE
)))
20792 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
20793 *no_add_attrs
= true;
20796 else if ((is_attribute_p ("ms_struct", name
)
20797 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
20798 || ((is_attribute_p ("gcc_struct", name
)
20799 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
20801 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
20803 *no_add_attrs
= true;
20810 rs6000_ms_bitfield_layout_p (const_tree record_type
)
20812 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
20813 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
20814 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
20817 #ifdef USING_ELFOS_H
20819 /* A get_unnamed_section callback, used for switching to toc_section. */
20822 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
20824 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20825 && TARGET_MINIMAL_TOC
)
20827 if (!toc_initialized
)
20829 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20830 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20831 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LCTOC", 0);
20832 fprintf (asm_out_file
, "\t.tc ");
20833 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1[TC],");
20834 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20835 fprintf (asm_out_file
, "\n");
20837 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20838 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20839 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20840 fprintf (asm_out_file
, " = .+32768\n");
20841 toc_initialized
= 1;
20844 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20846 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20848 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20849 if (!toc_initialized
)
20851 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20852 toc_initialized
= 1;
20857 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20858 if (!toc_initialized
)
20860 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20861 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20862 fprintf (asm_out_file
, " = .+32768\n");
20863 toc_initialized
= 1;
20868 /* Implement TARGET_ASM_INIT_SECTIONS. */
20871 rs6000_elf_asm_init_sections (void)
20874 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op
, NULL
);
20877 = get_unnamed_section (SECTION_WRITE
, output_section_asm_op
,
20878 SDATA2_SECTION_ASM_OP
);
20881 /* Implement TARGET_SELECT_RTX_SECTION. */
20884 rs6000_elf_select_rtx_section (machine_mode mode
, rtx x
,
20885 unsigned HOST_WIDE_INT align
)
20887 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
20888 return toc_section
;
20890 return default_elf_select_rtx_section (mode
, x
, align
);
20893 /* For a SYMBOL_REF, set generic flags and then perform some
20894 target-specific processing.
20896 When the AIX ABI is requested on a non-AIX system, replace the
20897 function name with the real name (with a leading .) rather than the
20898 function descriptor name. This saves a lot of overriding code to
20899 read the prefixes. */
20901 static void rs6000_elf_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
20903 rs6000_elf_encode_section_info (tree decl
, rtx rtl
, int first
)
20905 default_encode_section_info (decl
, rtl
, first
);
20908 && TREE_CODE (decl
) == FUNCTION_DECL
20910 && DEFAULT_ABI
== ABI_AIX
)
20912 rtx sym_ref
= XEXP (rtl
, 0);
20913 size_t len
= strlen (XSTR (sym_ref
, 0));
20914 char *str
= XALLOCAVEC (char, len
+ 2);
20916 memcpy (str
+ 1, XSTR (sym_ref
, 0), len
+ 1);
20917 XSTR (sym_ref
, 0) = ggc_alloc_string (str
, len
+ 1);
20922 compare_section_name (const char *section
, const char *templ
)
20926 len
= strlen (templ
);
20927 return (strncmp (section
, templ
, len
) == 0
20928 && (section
[len
] == 0 || section
[len
] == '.'));
20932 rs6000_elf_in_small_data_p (const_tree decl
)
20934 if (rs6000_sdata
== SDATA_NONE
)
20937 /* We want to merge strings, so we never consider them small data. */
20938 if (TREE_CODE (decl
) == STRING_CST
)
20941 /* Functions are never in the small data area. */
20942 if (TREE_CODE (decl
) == FUNCTION_DECL
)
20945 if (VAR_P (decl
) && DECL_SECTION_NAME (decl
))
20947 const char *section
= DECL_SECTION_NAME (decl
);
20948 if (compare_section_name (section
, ".sdata")
20949 || compare_section_name (section
, ".sdata2")
20950 || compare_section_name (section
, ".gnu.linkonce.s")
20951 || compare_section_name (section
, ".sbss")
20952 || compare_section_name (section
, ".sbss2")
20953 || compare_section_name (section
, ".gnu.linkonce.sb")
20954 || strcmp (section
, ".PPC.EMB.sdata0") == 0
20955 || strcmp (section
, ".PPC.EMB.sbss0") == 0)
20960 /* If we are told not to put readonly data in sdata, then don't. */
20961 if (TREE_READONLY (decl
) && rs6000_sdata
!= SDATA_EABI
20962 && !rs6000_readonly_in_sdata
)
20965 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
20968 && size
<= g_switch_value
20969 /* If it's not public, and we're not going to reference it there,
20970 there's no need to put it in the small data section. */
20971 && (rs6000_sdata
!= SDATA_DATA
|| TREE_PUBLIC (decl
)))
20978 #endif /* USING_ELFOS_H */
20980 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20983 rs6000_use_blocks_for_constant_p (machine_mode mode
, const_rtx x
)
20985 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
);
20988 /* Do not place thread-local symbols refs in the object blocks. */
20991 rs6000_use_blocks_for_decl_p (const_tree decl
)
20993 return !DECL_THREAD_LOCAL_P (decl
);
20996 /* Return a REG that occurs in ADDR with coefficient 1.
20997 ADDR can be effectively incremented by incrementing REG.
20999 r0 is special and we must not select it as an address
21000 register by this routine since our caller will try to
21001 increment the returned register via an "la" instruction. */
21004 find_addr_reg (rtx addr
)
21006 while (GET_CODE (addr
) == PLUS
)
21008 if (REG_P (XEXP (addr
, 0))
21009 && REGNO (XEXP (addr
, 0)) != 0)
21010 addr
= XEXP (addr
, 0);
21011 else if (REG_P (XEXP (addr
, 1))
21012 && REGNO (XEXP (addr
, 1)) != 0)
21013 addr
= XEXP (addr
, 1);
21014 else if (CONSTANT_P (XEXP (addr
, 0)))
21015 addr
= XEXP (addr
, 1);
21016 else if (CONSTANT_P (XEXP (addr
, 1)))
21017 addr
= XEXP (addr
, 0);
21019 gcc_unreachable ();
21021 gcc_assert (REG_P (addr
) && REGNO (addr
) != 0);
21026 rs6000_fatal_bad_address (rtx op
)
21028 fatal_insn ("bad address", op
);
21033 vec
<branch_island
, va_gc
> *branch_islands
;
21035 /* Remember to generate a branch island for far calls to the given
21039 add_compiler_branch_island (tree label_name
, tree function_name
,
21042 branch_island bi
= {function_name
, label_name
, line_number
};
21043 vec_safe_push (branch_islands
, bi
);
21046 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
21047 already there or not. */
21050 no_previous_def (tree function_name
)
21055 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
21056 if (function_name
== bi
->function_name
)
21061 /* GET_PREV_LABEL gets the label name from the previous definition of
21065 get_prev_label (tree function_name
)
21070 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
21071 if (function_name
== bi
->function_name
)
21072 return bi
->label_name
;
21076 /* Generate external symbol indirection stubs (PIC and non-PIC). */
21079 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
21081 unsigned int length
;
21082 char *symbol_name
, *lazy_ptr_name
;
21083 char *local_label_0
;
21084 static unsigned label
= 0;
21086 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21087 symb
= (*targetm
.strip_name_encoding
) (symb
);
21089 length
= strlen (symb
);
21090 symbol_name
= XALLOCAVEC (char, length
+ 32);
21091 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
21093 lazy_ptr_name
= XALLOCAVEC (char, length
+ 32);
21094 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name
, symb
, length
);
21098 switch_to_section (darwin_sections
[machopic_picsymbol_stub1_section
]);
21099 fprintf (file
, "\t.align 5\n");
21101 fprintf (file
, "%s:\n", stub
);
21102 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21105 local_label_0
= XALLOCAVEC (char, 16);
21106 sprintf (local_label_0
, "L%u$spb", label
);
21108 fprintf (file
, "\tmflr r0\n");
21109 fprintf (file
, "\tbcl 20,31,%s\n", local_label_0
);
21110 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
21111 fprintf (file
, "\taddis r11,r11,ha16(%s-%s)\n",
21112 lazy_ptr_name
, local_label_0
);
21113 fprintf (file
, "\tmtlr r0\n");
21114 fprintf (file
, "\t%s r12,lo16(%s-%s)(r11)\n",
21115 (TARGET_64BIT
? "ldu" : "lwzu"),
21116 lazy_ptr_name
, local_label_0
);
21117 fprintf (file
, "\tmtctr r12\n");
21118 fprintf (file
, "\tbctr\n");
21120 else /* mdynamic-no-pic or mkernel. */
21122 switch_to_section (darwin_sections
[machopic_symbol_stub1_section
]);
21123 fprintf (file
, "\t.align 4\n");
21125 fprintf (file
, "%s:\n", stub
);
21126 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21128 fprintf (file
, "\tlis r11,ha16(%s)\n", lazy_ptr_name
);
21129 fprintf (file
, "\t%s r12,lo16(%s)(r11)\n",
21130 (TARGET_64BIT
? "ldu" : "lwzu"),
21132 fprintf (file
, "\tmtctr r12\n");
21133 fprintf (file
, "\tbctr\n");
21136 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
21137 fprintf (file
, "%s:\n", lazy_ptr_name
);
21138 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21139 fprintf (file
, "%sdyld_stub_binding_helper\n",
21140 (TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t"));
21143 /* Legitimize PIC addresses. If the address is already
21144 position-independent, we return ORIG. Newly generated
21145 position-independent addresses go into a reg. This is REG if non
21146 zero, otherwise we allocate register(s) as necessary. */
21148 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
21151 rs6000_machopic_legitimize_pic_address (rtx orig
, machine_mode mode
,
21156 if (reg
== NULL
&& !reload_completed
)
21157 reg
= gen_reg_rtx (Pmode
);
21159 if (GET_CODE (orig
) == CONST
)
21163 if (GET_CODE (XEXP (orig
, 0)) == PLUS
21164 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
21167 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
21169 /* Use a different reg for the intermediate value, as
21170 it will be marked UNCHANGING. */
21171 reg_temp
= !can_create_pseudo_p () ? reg
: gen_reg_rtx (Pmode
);
21172 base
= rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 0),
21175 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 1),
21178 if (CONST_INT_P (offset
))
21180 if (SMALL_INT (offset
))
21181 return plus_constant (Pmode
, base
, INTVAL (offset
));
21182 else if (!reload_completed
)
21183 offset
= force_reg (Pmode
, offset
);
21186 rtx mem
= force_const_mem (Pmode
, orig
);
21187 return machopic_legitimize_pic_address (mem
, Pmode
, reg
);
21190 return gen_rtx_PLUS (Pmode
, base
, offset
);
21193 /* Fall back on generic machopic code. */
21194 return machopic_legitimize_pic_address (orig
, mode
, reg
);
21197 /* Output a .machine directive for the Darwin assembler, and call
21198 the generic start_file routine. */
21201 rs6000_darwin_file_start (void)
21203 static const struct
21207 HOST_WIDE_INT if_set
;
21209 { "ppc64", "ppc64", MASK_64BIT
},
21210 { "970", "ppc970", OPTION_MASK_PPC_GPOPT
| OPTION_MASK_MFCRF \
21211 | MASK_POWERPC64
},
21212 { "power4", "ppc970", 0 },
21213 { "G5", "ppc970", 0 },
21214 { "7450", "ppc7450", 0 },
21215 { "7400", "ppc7400", OPTION_MASK_ALTIVEC
},
21216 { "G4", "ppc7400", 0 },
21217 { "750", "ppc750", 0 },
21218 { "740", "ppc750", 0 },
21219 { "G3", "ppc750", 0 },
21220 { "604e", "ppc604e", 0 },
21221 { "604", "ppc604", 0 },
21222 { "603e", "ppc603", 0 },
21223 { "603", "ppc603", 0 },
21224 { "601", "ppc601", 0 },
21225 { NULL
, "ppc", 0 } };
21226 const char *cpu_id
= "";
21229 rs6000_file_start ();
21230 darwin_file_start ();
21232 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
21234 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
21235 cpu_id
= rs6000_default_cpu
;
21237 if (OPTION_SET_P (rs6000_cpu_index
))
21238 cpu_id
= processor_target_table
[rs6000_cpu_index
].name
;
21240 /* Look through the mapping array. Pick the first name that either
21241 matches the argument, has a bit set in IF_SET that is also set
21242 in the target flags, or has a NULL name. */
21245 while (mapping
[i
].arg
!= NULL
21246 && strcmp (mapping
[i
].arg
, cpu_id
) != 0
21247 && (mapping
[i
].if_set
& rs6000_isa_flags
) == 0)
21250 fprintf (asm_out_file
, "\t.machine %s\n", mapping
[i
].name
);
21253 #endif /* TARGET_MACHO */
21257 rs6000_elf_reloc_rw_mask (void)
21261 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
21267 /* Record an element in the table of global constructors. SYMBOL is
21268 a SYMBOL_REF of the function to be called; PRIORITY is a number
21269 between 0 and MAX_INIT_PRIORITY.
21271 This differs from default_named_section_asm_out_constructor in
21272 that we have special handling for -mrelocatable. */
21274 static void rs6000_elf_asm_out_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
21276 rs6000_elf_asm_out_constructor (rtx symbol
, int priority
)
21278 const char *section
= ".ctors";
21281 if (priority
!= DEFAULT_INIT_PRIORITY
)
21283 sprintf (buf
, ".ctors.%.5u",
21284 /* Invert the numbering so the linker puts us in the proper
21285 order; constructors are run from right to left, and the
21286 linker sorts in increasing order. */
21287 MAX_INIT_PRIORITY
- priority
);
21291 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
21292 assemble_align (POINTER_SIZE
);
21294 if (DEFAULT_ABI
== ABI_V4
21295 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
21297 fputs ("\t.long (", asm_out_file
);
21298 output_addr_const (asm_out_file
, symbol
);
21299 fputs (")@fixup\n", asm_out_file
);
21302 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
21305 static void rs6000_elf_asm_out_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
21307 rs6000_elf_asm_out_destructor (rtx symbol
, int priority
)
21309 const char *section
= ".dtors";
21312 if (priority
!= DEFAULT_INIT_PRIORITY
)
21314 sprintf (buf
, ".dtors.%.5u",
21315 /* Invert the numbering so the linker puts us in the proper
21316 order; constructors are run from right to left, and the
21317 linker sorts in increasing order. */
21318 MAX_INIT_PRIORITY
- priority
);
21322 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
21323 assemble_align (POINTER_SIZE
);
21325 if (DEFAULT_ABI
== ABI_V4
21326 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
21328 fputs ("\t.long (", asm_out_file
);
21329 output_addr_const (asm_out_file
, symbol
);
21330 fputs (")@fixup\n", asm_out_file
);
21333 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
21337 rs6000_elf_declare_function_name (FILE *file
, const char *name
, tree decl
)
21339 if (TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
21341 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file
);
21342 ASM_OUTPUT_LABEL (file
, name
);
21343 fputs (DOUBLE_INT_ASM_OP
, file
);
21344 rs6000_output_function_entry (file
, name
);
21345 fputs (",.TOC.@tocbase,0\n\t.previous\n", file
);
21348 fputs ("\t.size\t", file
);
21349 assemble_name (file
, name
);
21350 fputs (",24\n\t.type\t.", file
);
21351 assemble_name (file
, name
);
21352 fputs (",@function\n", file
);
21353 if (TREE_PUBLIC (decl
) && ! DECL_WEAK (decl
))
21355 fputs ("\t.globl\t.", file
);
21356 assemble_name (file
, name
);
21361 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21362 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21363 rs6000_output_function_entry (file
, name
);
21364 fputs (":\n", file
);
21365 assemble_function_label_final ();
21370 if (DEFAULT_ABI
== ABI_V4
21371 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
21372 && !TARGET_SECURE_PLT
21373 && (!constant_pool_empty_p () || crtl
->profile
)
21374 && (uses_toc
= uses_TOC ()))
21379 switch_to_other_text_partition ();
21380 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
21382 fprintf (file
, "\t.long ");
21383 assemble_name (file
, toc_label_name
);
21386 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
21387 assemble_name (file
, buf
);
21390 switch_to_other_text_partition ();
21393 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21394 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21396 if (TARGET_CMODEL
== CMODEL_LARGE
21397 && rs6000_global_entry_point_prologue_needed_p ())
21401 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
21403 fprintf (file
, "\t.quad .TOC.-");
21404 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
21405 assemble_name (file
, buf
);
21409 if (DEFAULT_ABI
== ABI_AIX
)
21411 const char *desc_name
, *orig_name
;
21413 orig_name
= (*targetm
.strip_name_encoding
) (name
);
21414 desc_name
= orig_name
;
21415 while (*desc_name
== '.')
21418 if (TREE_PUBLIC (decl
))
21419 fprintf (file
, "\t.globl %s\n", desc_name
);
21421 fprintf (file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
21422 fprintf (file
, "%s:\n", desc_name
);
21423 fprintf (file
, "\t.long %s\n", orig_name
);
21424 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file
);
21425 fputs ("\t.long 0\n", file
);
21426 fprintf (file
, "\t.previous\n");
21428 ASM_OUTPUT_FUNCTION_LABEL (file
, name
, decl
);
21431 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED
;
21433 rs6000_elf_file_end (void)
21435 #ifdef HAVE_AS_GNU_ATTRIBUTE
21436 /* ??? The value emitted depends on options active at file end.
21437 Assume anyone using #pragma or attributes that might change
21438 options knows what they are doing. */
21439 if ((TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
21440 && rs6000_passes_float
)
21444 if (TARGET_HARD_FLOAT
)
21448 if (rs6000_passes_long_double
)
21450 if (!TARGET_LONG_DOUBLE_128
)
21452 else if (TARGET_IEEEQUAD
)
21457 fprintf (asm_out_file
, "\t.gnu_attribute 4, %d\n", fp
);
21459 if (TARGET_32BIT
&& DEFAULT_ABI
== ABI_V4
)
21461 if (rs6000_passes_vector
)
21462 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
21463 (TARGET_ALTIVEC_ABI
? 2 : 1));
21464 if (rs6000_returns_struct
)
21465 fprintf (asm_out_file
, "\t.gnu_attribute 12, %d\n",
21466 aix_struct_return
? 2 : 1);
21469 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21470 if (TARGET_32BIT
|| DEFAULT_ABI
== ABI_ELFv2
)
21471 file_end_indicate_exec_stack ();
21474 if (flag_split_stack
)
21475 file_end_indicate_split_stack ();
21479 /* We have expanded a CPU builtin, so we need to emit a reference to
21480 the special symbol that LIBC uses to declare it supports the
21481 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21482 switch_to_section (data_section
);
21483 fprintf (asm_out_file
, "\t.align %u\n", TARGET_32BIT
? 2 : 3);
21484 fprintf (asm_out_file
, "\t%s %s\n",
21485 TARGET_32BIT
? ".long" : ".quad", tcb_verification_symbol
);
21492 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21493 #define HAVE_XCOFF_DWARF_EXTRAS 0
21497 /* Names of bss and data sections. These should be unique names for each
21498 compilation unit. */
21500 char *xcoff_bss_section_name
;
21501 char *xcoff_private_data_section_name
;
21502 char *xcoff_private_rodata_section_name
;
21503 char *xcoff_tls_data_section_name
;
21504 char *xcoff_read_only_section_name
;
21506 static enum unwind_info_type
21507 rs6000_xcoff_debug_unwind_info (void)
21513 rs6000_xcoff_asm_output_anchor (rtx symbol
)
21517 sprintf (buffer
, "$ + " HOST_WIDE_INT_PRINT_DEC
,
21518 SYMBOL_REF_BLOCK_OFFSET (symbol
));
21519 fprintf (asm_out_file
, "%s", SET_ASM_OP
);
21520 RS6000_OUTPUT_BASENAME (asm_out_file
, XSTR (symbol
, 0));
21521 fprintf (asm_out_file
, ",");
21522 RS6000_OUTPUT_BASENAME (asm_out_file
, buffer
);
21523 fprintf (asm_out_file
, "\n");
21527 rs6000_xcoff_asm_globalize_label (FILE *stream
, const char *name
)
21529 fputs (GLOBAL_ASM_OP
, stream
);
21530 RS6000_OUTPUT_BASENAME (stream
, name
);
21531 putc ('\n', stream
);
21534 /* A get_unnamed_decl callback, used for read-only sections. PTR
21535 points to the section string variable. */
21538 rs6000_xcoff_output_readonly_section_asm_op (const char *directive
)
21540 fprintf (asm_out_file
, "\t.csect %s[RO],%s\n",
21542 ? xcoff_private_rodata_section_name
21543 : xcoff_read_only_section_name
,
21544 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21547 /* Likewise for read-write sections. */
21550 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21552 fprintf (asm_out_file
, "\t.csect %s[RW],%s\n",
21553 xcoff_private_data_section_name
,
21554 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21558 rs6000_xcoff_output_tls_section_asm_op (const char *directive
)
21560 fprintf (asm_out_file
, "\t.csect %s[TL],%s\n",
21562 ? xcoff_private_data_section_name
21563 : xcoff_tls_data_section_name
,
21564 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21567 /* A get_unnamed_section callback, used for switching to toc_section. */
21570 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
21572 if (TARGET_MINIMAL_TOC
)
21574 /* toc_section is always selected at least once from
21575 rs6000_xcoff_file_start, so this is guaranteed to
21576 always be defined once and only once in each file. */
21577 if (!toc_initialized
)
21579 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file
);
21580 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file
);
21581 toc_initialized
= 1;
21583 fprintf (asm_out_file
, "\t.csect toc_table[RW]%s\n",
21584 (TARGET_32BIT
? "" : ",3"));
21587 fputs ("\t.toc\n", asm_out_file
);
21590 /* Implement TARGET_ASM_INIT_SECTIONS. */
21593 rs6000_xcoff_asm_init_sections (void)
21595 read_only_data_section
21596 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21599 private_data_section
21600 = get_unnamed_section (SECTION_WRITE
,
21601 rs6000_xcoff_output_readwrite_section_asm_op
,
21604 read_only_private_data_section
21605 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21609 = get_unnamed_section (SECTION_TLS
,
21610 rs6000_xcoff_output_tls_section_asm_op
,
21613 tls_private_data_section
21614 = get_unnamed_section (SECTION_TLS
,
21615 rs6000_xcoff_output_tls_section_asm_op
,
21619 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op
, NULL
);
21621 readonly_data_section
= read_only_data_section
;
21625 rs6000_xcoff_reloc_rw_mask (void)
21631 rs6000_xcoff_asm_named_section (const char *name
, unsigned int flags
,
21632 tree decl ATTRIBUTE_UNUSED
)
21635 static const char * const suffix
[7]
21636 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21638 if (flags
& SECTION_EXCLUDE
)
21640 else if (flags
& SECTION_DEBUG
)
21642 fprintf (asm_out_file
, "\t.dwsect %s\n", name
);
21645 else if (flags
& SECTION_CODE
)
21647 else if (flags
& SECTION_TLS
)
21649 if (flags
& SECTION_BSS
)
21654 else if (flags
& SECTION_WRITE
)
21656 if (flags
& SECTION_BSS
)
21664 fprintf (asm_out_file
, "\t.csect %s%s[%s],%u\n",
21665 (flags
& SECTION_CODE
) ? "." : "",
21666 name
, suffix
[smclass
], flags
& SECTION_ENTSIZE
);
21669 #define IN_NAMED_SECTION(DECL) \
21670 ((TREE_CODE (DECL) == FUNCTION_DECL || VAR_P (DECL)) \
21671 && DECL_SECTION_NAME (DECL) != NULL)
21674 rs6000_xcoff_select_section (tree decl
, int reloc
,
21675 unsigned HOST_WIDE_INT align
)
21677 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21679 if (align
> BIGGEST_ALIGNMENT
&& VAR_OR_FUNCTION_DECL_P (decl
))
21681 resolve_unique_section (decl
, reloc
, true);
21682 if (IN_NAMED_SECTION (decl
))
21683 return get_named_section (decl
, NULL
, reloc
);
21686 if (decl_readonly_section (decl
, reloc
))
21688 if (TREE_PUBLIC (decl
))
21689 return read_only_data_section
;
21691 return read_only_private_data_section
;
21696 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
21698 if (bss_initializer_p (decl
))
21699 return tls_comm_section
;
21700 else if (TREE_PUBLIC (decl
))
21701 return tls_data_section
;
21703 return tls_private_data_section
;
21707 if (TREE_PUBLIC (decl
))
21708 return data_section
;
21710 return private_data_section
;
21715 rs6000_xcoff_unique_section (tree decl
, int reloc ATTRIBUTE_UNUSED
)
21719 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
21720 name
= (*targetm
.strip_name_encoding
) (name
);
21721 set_decl_section_name (decl
, name
);
21724 /* Select section for constant in constant pool.
21726 On RS/6000, all constants are in the private read-only data area.
21727 However, if this is being placed in the TOC it must be output as a
21731 rs6000_xcoff_select_rtx_section (machine_mode mode
, rtx x
,
21732 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
21734 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
21735 return toc_section
;
21737 return read_only_private_data_section
;
21740 /* Remove any trailing [DS] or the like from the symbol name. */
21742 static const char *
21743 rs6000_xcoff_strip_name_encoding (const char *name
)
21748 len
= strlen (name
);
21749 if (name
[len
- 1] == ']')
21750 return ggc_alloc_string (name
, len
- 4);
21755 /* Section attributes. AIX is always PIC. */
21757 static unsigned int
21758 rs6000_xcoff_section_type_flags (tree decl
, const char *name
, int reloc
)
21760 unsigned int align
;
21761 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
21763 if (decl
&& DECL_P (decl
) && VAR_P (decl
) && bss_initializer_p (decl
))
21764 flags
|= SECTION_BSS
;
21766 /* Align to at least UNIT size. */
21767 if (!decl
|| !DECL_P (decl
))
21768 align
= MIN_UNITS_PER_WORD
;
21769 /* Align code CSECT to at least 32 bytes. */
21770 else if ((flags
& SECTION_CODE
) != 0)
21771 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
), 32);
21773 /* Increase alignment of large objects if not already stricter. */
21774 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
),
21775 int_size_in_bytes (TREE_TYPE (decl
)) > MIN_UNITS_PER_WORD
21776 ? UNITS_PER_FP_WORD
: MIN_UNITS_PER_WORD
);
21778 return flags
| (exact_log2 (align
) & SECTION_ENTSIZE
);
21781 /* Output at beginning of assembler file.
21783 Initialize the section names for the RS/6000 at this point.
21785 Specify filename, including full path, to assembler.
21787 We want to go into the TOC section so at least one .toc will be emitted.
21788 Also, in order to output proper .bs/.es pairs, we need at least one static
21789 [RW] section emitted.
21791 Finally, declare mcount when profiling to make the assembler happy. */
21794 rs6000_xcoff_file_start (void)
21796 rs6000_gen_section_name (&xcoff_bss_section_name
,
21797 main_input_filename
, ".bss_");
21798 rs6000_gen_section_name (&xcoff_private_data_section_name
,
21799 main_input_filename
, ".rw_");
21800 rs6000_gen_section_name (&xcoff_private_rodata_section_name
,
21801 main_input_filename
, ".rop_");
21802 rs6000_gen_section_name (&xcoff_read_only_section_name
,
21803 main_input_filename
, ".ro_");
21804 rs6000_gen_section_name (&xcoff_tls_data_section_name
,
21805 main_input_filename
, ".tls_");
21807 fputs ("\t.file\t", asm_out_file
);
21808 output_quoted_string (asm_out_file
, main_input_filename
);
21809 fputc ('\n', asm_out_file
);
21810 if (write_symbols
!= NO_DEBUG
)
21811 switch_to_section (private_data_section
);
21812 switch_to_section (toc_section
);
21813 switch_to_section (text_section
);
21815 fprintf (asm_out_file
, "\t.extern %s\n", RS6000_MCOUNT
);
21816 rs6000_file_start ();
21819 /* Output at end of assembler file.
21820 On the RS/6000, referencing data should automatically pull in text. */
21823 rs6000_xcoff_file_end (void)
21825 switch_to_section (text_section
);
21826 if (xcoff_tls_exec_model_detected
)
21828 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21829 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file
);
21831 fputs ("_section_.text:\n", asm_out_file
);
21832 switch_to_section (data_section
);
21833 fputs (TARGET_32BIT
21834 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21839 struct declare_alias_data
21842 bool function_descriptor
;
21845 /* Declare alias N. A helper function for for_node_and_aliases. */
21848 rs6000_declare_alias (struct symtab_node
*n
, void *d
)
21850 struct declare_alias_data
*data
= (struct declare_alias_data
*)d
;
21851 /* Main symbol is output specially, because varasm machinery does part of
21852 the job for us - we do not need to declare .globl/lglobs and such. */
21853 if (!n
->alias
|| n
->weakref
)
21856 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n
->decl
)))
21859 /* Prevent assemble_alias from trying to use .set pseudo operation
21860 that does not behave as expected by the middle-end. */
21861 TREE_ASM_WRITTEN (n
->decl
) = true;
21863 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
));
21864 char *buffer
= (char *) alloca (strlen (name
) + 2);
21866 int dollar_inside
= 0;
21868 strcpy (buffer
, name
);
21869 p
= strchr (buffer
, '$');
21873 p
= strchr (p
+ 1, '$');
21875 if (TREE_PUBLIC (n
->decl
))
21877 if (!RS6000_WEAK
|| !DECL_WEAK (n
->decl
))
21879 if (dollar_inside
) {
21880 if (data
->function_descriptor
)
21881 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21882 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21884 if (data
->function_descriptor
)
21886 fputs ("\t.globl .", data
->file
);
21887 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21888 putc ('\n', data
->file
);
21890 fputs ("\t.globl ", data
->file
);
21891 assemble_name (data
->file
, buffer
);
21892 putc ('\n', data
->file
);
21894 #ifdef ASM_WEAKEN_DECL
21895 else if (DECL_WEAK (n
->decl
) && !data
->function_descriptor
)
21896 ASM_WEAKEN_DECL (data
->file
, n
->decl
, name
, NULL
);
21903 if (data
->function_descriptor
)
21904 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21905 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21907 if (data
->function_descriptor
)
21909 fputs ("\t.lglobl .", data
->file
);
21910 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21911 putc ('\n', data
->file
);
21913 fputs ("\t.lglobl ", data
->file
);
21914 assemble_name (data
->file
, buffer
);
21915 putc ('\n', data
->file
);
21917 if (data
->function_descriptor
)
21918 putc ('.', data
->file
);
21919 ASM_OUTPUT_LABEL (data
->file
, buffer
);
21924 #ifdef HAVE_GAS_HIDDEN
21925 /* Helper function to calculate visibility of a DECL
21926 and return the value as a const string. */
21928 static const char *
21929 rs6000_xcoff_visibility (tree decl
)
21931 static const char * const visibility_types
[] = {
21932 "", ",protected", ",hidden", ",internal"
21935 enum symbol_visibility vis
= DECL_VISIBILITY (decl
);
21936 return visibility_types
[vis
];
21941 /* This macro produces the initial definition of a function name.
21942 On the RS/6000, we need to place an extra '.' in the function name and
21943 output the function descriptor.
21944 Dollar signs are converted to underscores.
21946 The csect for the function will have already been created when
21947 text_section was selected. We do have to go back to that csect, however.
21949 The third and fourth parameters to the .function pseudo-op (16 and 044)
21950 are placeholders which no longer have any use.
21952 Because AIX assembler's .set command has unexpected semantics, we output
21953 all aliases as alternative labels in front of the definition. */
21956 rs6000_xcoff_declare_function_name (FILE *file
, const char *name
, tree decl
)
21958 char *buffer
= (char *) alloca (strlen (name
) + 1);
21960 int dollar_inside
= 0;
21961 struct declare_alias_data data
= {file
, false};
21963 strcpy (buffer
, name
);
21964 p
= strchr (buffer
, '$');
21968 p
= strchr (p
+ 1, '$');
21970 if (TREE_PUBLIC (decl
))
21972 if (!RS6000_WEAK
|| !DECL_WEAK (decl
))
21974 if (dollar_inside
) {
21975 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21976 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21978 fputs ("\t.globl .", file
);
21979 RS6000_OUTPUT_BASENAME (file
, buffer
);
21980 #ifdef HAVE_GAS_HIDDEN
21981 fputs (rs6000_xcoff_visibility (decl
), file
);
21988 if (dollar_inside
) {
21989 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21990 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21992 fputs ("\t.lglobl .", file
);
21993 RS6000_OUTPUT_BASENAME (file
, buffer
);
21997 fputs ("\t.csect ", file
);
21998 assemble_name (file
, buffer
);
21999 fputs (TARGET_32BIT
? "\n" : ",3\n", file
);
22001 ASM_OUTPUT_FUNCTION_LABEL (file
, buffer
, decl
);
22003 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
22005 fputs (TARGET_32BIT
? "\t.long ." : "\t.llong .", file
);
22006 RS6000_OUTPUT_BASENAME (file
, buffer
);
22007 fputs (", TOC[tc0], 0\n", file
);
22010 switch_to_section (function_section (decl
));
22012 ASM_OUTPUT_LABEL (file
, buffer
);
22014 data
.function_descriptor
= true;
22015 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
22017 if (!DECL_IGNORED_P (decl
))
22019 if (dwarf_debuginfo_p ())
22021 name
= (*targetm
.strip_name_encoding
) (name
);
22022 fprintf (file
, "\t.function .%s,.%s,2,0\n", name
, name
);
22029 /* Output assembly language to globalize a symbol from a DECL,
22030 possibly with visibility. */
22033 rs6000_xcoff_asm_globalize_decl_name (FILE *stream
, tree decl
)
22035 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
22036 fputs (GLOBAL_ASM_OP
, stream
);
22037 assemble_name (stream
, name
);
22038 #ifdef HAVE_GAS_HIDDEN
22039 fputs (rs6000_xcoff_visibility (decl
), stream
);
22041 putc ('\n', stream
);
22044 /* Output assembly language to define a symbol as COMMON from a DECL,
22045 possibly with visibility. */
22048 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream
,
22049 tree decl ATTRIBUTE_UNUSED
,
22051 unsigned HOST_WIDE_INT size
,
22052 unsigned int align
)
22054 unsigned int align2
= 2;
22057 align
= DATA_ABI_ALIGNMENT (TREE_TYPE (decl
), DECL_ALIGN (decl
));
22060 align2
= floor_log2 (align
/ BITS_PER_UNIT
);
22064 if (! DECL_COMMON (decl
))
22066 /* Forget section. */
22069 /* Globalize TLS BSS. */
22070 if (TREE_PUBLIC (decl
) && DECL_THREAD_LOCAL_P (decl
))
22072 fputs (GLOBAL_ASM_OP
, stream
);
22073 assemble_name (stream
, name
);
22074 fputc ('\n', stream
);
22077 /* Switch to section and skip space. */
22078 fputs ("\t.csect ", stream
);
22079 assemble_name (stream
, name
);
22080 fprintf (stream
, ",%u\n", align2
);
22081 ASM_DECLARE_OBJECT_NAME (stream
, name
, decl
);
22082 ASM_OUTPUT_SKIP (stream
, size
? size
: 1);
22086 if (TREE_PUBLIC (decl
))
22089 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%u" ,
22090 name
, size
, align2
);
22092 #ifdef HAVE_GAS_HIDDEN
22094 fputs (rs6000_xcoff_visibility (decl
), stream
);
22096 putc ('\n', stream
);
22100 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%s,%u\n",
22101 (*targetm
.strip_name_encoding
) (name
), size
, name
, align2
);
22104 /* This macro produces the initial definition of a object (variable) name.
22105 Because AIX assembler's .set command has unexpected semantics, we output
22106 all aliases as alternative labels in front of the definition. */
22109 rs6000_xcoff_declare_object_name (FILE *file
, const char *name
, tree decl
)
22111 struct declare_alias_data data
= {file
, false};
22112 ASM_OUTPUT_LABEL (file
, name
);
22113 symtab_node::get_create (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
22117 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
22120 rs6000_asm_output_dwarf_pcrel (FILE *file
, int size
, const char *label
)
22122 fputs (integer_asm_op (size
, FALSE
), file
);
22123 assemble_name (file
, label
);
22124 fputs ("-$", file
);
22127 /* Output a symbol offset relative to the dbase for the current object.
22128 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
22131 __gcc_unwind_dbase is embedded in all executables/libraries through
22132 libgcc/config/rs6000/crtdbase.S. */
22135 rs6000_asm_output_dwarf_datarel (FILE *file
, int size
, const char *label
)
22137 fputs (integer_asm_op (size
, FALSE
), file
);
22138 assemble_name (file
, label
);
22139 fputs("-__gcc_unwind_dbase", file
);
22144 rs6000_xcoff_encode_section_info (tree decl
, rtx rtl
, int first
)
22148 const char *symname
;
22150 default_encode_section_info (decl
, rtl
, first
);
22152 /* Careful not to prod global register variables. */
22155 symbol
= XEXP (rtl
, 0);
22156 if (!SYMBOL_REF_P (symbol
))
22159 flags
= SYMBOL_REF_FLAGS (symbol
);
22161 if (VAR_P (decl
) && DECL_THREAD_LOCAL_P (decl
))
22162 flags
&= ~SYMBOL_FLAG_HAS_BLOCK_INFO
;
22164 SYMBOL_REF_FLAGS (symbol
) = flags
;
22166 symname
= XSTR (symbol
, 0);
22168 /* Append CSECT mapping class, unless the symbol already is qualified.
22169 Aliases are implemented as labels, so the symbol name should not add
22170 a mapping class. */
22173 && VAR_OR_FUNCTION_DECL_P (decl
)
22174 && (symtab_node::get (decl
) == NULL
22175 || symtab_node::get (decl
)->alias
== 0)
22176 && symname
[strlen (symname
) - 1] != ']')
22178 const char *smclass
= NULL
;
22180 if (TREE_CODE (decl
) == FUNCTION_DECL
)
22182 else if (DECL_THREAD_LOCAL_P (decl
))
22184 if (bss_initializer_p (decl
))
22186 else if (flag_data_sections
)
22189 else if (DECL_EXTERNAL (decl
))
22191 else if (bss_initializer_p (decl
))
22193 else if (flag_data_sections
)
22195 /* This must exactly match the logic of select section. */
22196 if (decl_readonly_section (decl
, compute_reloc_for_var (decl
)))
22202 if (smclass
!= NULL
)
22204 char *newname
= XALLOCAVEC (char, strlen (symname
) + 5);
22206 strcpy (newname
, symname
);
22207 strcat (newname
, smclass
);
22208 XSTR (symbol
, 0) = ggc_strdup (newname
);
22212 #endif /* HAVE_AS_TLS */
22213 #endif /* TARGET_XCOFF */
22216 rs6000_asm_weaken_decl (FILE *stream
, tree decl
,
22217 const char *name
, const char *val
)
22219 fputs ("\t.weak\t", stream
);
22220 assemble_name (stream
, name
);
22221 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
22222 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
22224 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22226 fputs (rs6000_xcoff_visibility (decl
), stream
);
22228 fputs ("\n\t.weak\t.", stream
);
22229 RS6000_OUTPUT_BASENAME (stream
, name
);
22231 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22233 fputs (rs6000_xcoff_visibility (decl
), stream
);
22235 fputc ('\n', stream
);
22239 #ifdef ASM_OUTPUT_DEF
22240 ASM_OUTPUT_DEF (stream
, name
, val
);
22242 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
22243 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
22245 fputs ("\t.set\t.", stream
);
22246 RS6000_OUTPUT_BASENAME (stream
, name
);
22247 fputs (",.", stream
);
22248 RS6000_OUTPUT_BASENAME (stream
, val
);
22249 fputc ('\n', stream
);
22255 /* Return true if INSN should not be copied. */
22258 rs6000_cannot_copy_insn_p (rtx_insn
*insn
)
22260 return recog_memoized (insn
) >= 0
22261 && get_attr_cannot_copy (insn
);
22264 /* Compute a (partial) cost for rtx X. Return true if the complete
22265 cost has been computed, and false if subexpressions should be
22266 scanned. In either case, *TOTAL contains the cost result. */
22269 rs6000_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
22270 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
22272 int code
= GET_CODE (x
);
22276 /* On the RS/6000, if it is valid in the insn, it is free. */
22278 if (((outer_code
== SET
22279 || outer_code
== PLUS
22280 || outer_code
== MINUS
)
22281 && (satisfies_constraint_I (x
)
22282 || satisfies_constraint_L (x
)))
22283 || (outer_code
== AND
22284 && (satisfies_constraint_K (x
)
22286 ? satisfies_constraint_L (x
)
22287 : satisfies_constraint_J (x
))))
22288 || ((outer_code
== IOR
|| outer_code
== XOR
)
22289 && (satisfies_constraint_K (x
)
22291 ? satisfies_constraint_L (x
)
22292 : satisfies_constraint_J (x
))))
22293 || outer_code
== ASHIFT
22294 || outer_code
== ASHIFTRT
22295 || outer_code
== LSHIFTRT
22296 || outer_code
== ROTATE
22297 || outer_code
== ROTATERT
22298 || outer_code
== ZERO_EXTRACT
22299 || (outer_code
== MULT
22300 && satisfies_constraint_I (x
))
22301 || ((outer_code
== DIV
|| outer_code
== UDIV
22302 || outer_code
== MOD
|| outer_code
== UMOD
)
22303 && exact_log2 (INTVAL (x
)) >= 0)
22304 || (outer_code
== COMPARE
22305 && (satisfies_constraint_I (x
)
22306 || satisfies_constraint_K (x
)))
22307 || ((outer_code
== EQ
|| outer_code
== NE
)
22308 && (satisfies_constraint_I (x
)
22309 || satisfies_constraint_K (x
)
22311 ? satisfies_constraint_L (x
)
22312 : satisfies_constraint_J (x
))))
22313 || (outer_code
== GTU
22314 && satisfies_constraint_I (x
))
22315 || (outer_code
== LTU
22316 && satisfies_constraint_P (x
)))
22321 else if ((outer_code
== PLUS
22322 && reg_or_add_cint_operand (x
, mode
))
22323 || (outer_code
== MINUS
22324 && reg_or_sub_cint_operand (x
, mode
))
22325 || ((outer_code
== SET
22326 || outer_code
== IOR
22327 || outer_code
== XOR
)
22329 & ~ (unsigned HOST_WIDE_INT
) 0xffffffff) == 0))
22331 *total
= COSTS_N_INSNS (1);
22337 case CONST_WIDE_INT
:
22341 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22345 /* When optimizing for size, MEM should be slightly more expensive
22346 than generating address, e.g., (plus (reg) (const)).
22347 L1 cache latency is about two instructions. */
22348 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22349 if (rs6000_slow_unaligned_access (mode
, MEM_ALIGN (x
)))
22350 *total
+= COSTS_N_INSNS (100);
22359 if (FLOAT_MODE_P (mode
))
22360 *total
= rs6000_cost
->fp
;
22362 *total
= COSTS_N_INSNS (1);
22366 if (CONST_INT_P (XEXP (x
, 1))
22367 && satisfies_constraint_I (XEXP (x
, 1)))
22369 if (INTVAL (XEXP (x
, 1)) >= -256
22370 && INTVAL (XEXP (x
, 1)) <= 255)
22371 *total
= rs6000_cost
->mulsi_const9
;
22373 *total
= rs6000_cost
->mulsi_const
;
22375 else if (mode
== SFmode
)
22376 *total
= rs6000_cost
->fp
;
22377 else if (FLOAT_MODE_P (mode
))
22378 *total
= rs6000_cost
->dmul
;
22379 else if (mode
== DImode
)
22380 *total
= rs6000_cost
->muldi
;
22382 *total
= rs6000_cost
->mulsi
;
22386 if (mode
== SFmode
)
22387 *total
= rs6000_cost
->fp
;
22389 *total
= rs6000_cost
->dmul
;
22394 if (FLOAT_MODE_P (mode
))
22396 *total
= mode
== DFmode
? rs6000_cost
->ddiv
22397 : rs6000_cost
->sdiv
;
22404 if (CONST_INT_P (XEXP (x
, 1))
22405 && exact_log2 (INTVAL (XEXP (x
, 1))) >= 0)
22407 if (code
== DIV
|| code
== MOD
)
22409 *total
= COSTS_N_INSNS (2);
22412 *total
= COSTS_N_INSNS (1);
22416 if (GET_MODE (XEXP (x
, 1)) == DImode
)
22417 *total
= rs6000_cost
->divdi
;
22419 *total
= rs6000_cost
->divsi
;
22421 /* Add in shift and subtract for MOD unless we have a mod instruction. */
22422 if ((!TARGET_MODULO
22423 || (RS6000_DISABLE_SCALAR_MODULO
&& SCALAR_INT_MODE_P (mode
)))
22424 && (code
== MOD
|| code
== UMOD
))
22425 *total
+= COSTS_N_INSNS (2);
22429 *total
= COSTS_N_INSNS (TARGET_CTZ
? 1 : 4);
22433 *total
= COSTS_N_INSNS (4);
22437 *total
= COSTS_N_INSNS (TARGET_POPCNTD
? 1 : 6);
22441 *total
= COSTS_N_INSNS (TARGET_CMPB
? 2 : 6);
22445 if (outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
22448 *total
= COSTS_N_INSNS (1);
22452 if (CONST_INT_P (XEXP (x
, 1)))
22454 rtx left
= XEXP (x
, 0);
22455 rtx_code left_code
= GET_CODE (left
);
22457 /* rotate-and-mask: 1 insn. */
22458 if ((left_code
== ROTATE
22459 || left_code
== ASHIFT
22460 || left_code
== LSHIFTRT
)
22461 && rs6000_is_valid_shift_mask (XEXP (x
, 1), left
, mode
))
22463 *total
= rtx_cost (XEXP (left
, 0), mode
, left_code
, 0, speed
);
22464 if (!CONST_INT_P (XEXP (left
, 1)))
22465 *total
+= rtx_cost (XEXP (left
, 1), SImode
, left_code
, 1, speed
);
22466 *total
+= COSTS_N_INSNS (1);
22470 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22471 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
22472 if (rs6000_is_valid_and_mask (XEXP (x
, 1), mode
)
22473 || (val
& 0xffff) == val
22474 || (val
& 0xffff0000) == val
22475 || ((val
& 0xffff) == 0 && mode
== SImode
))
22477 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
22478 *total
+= COSTS_N_INSNS (1);
22483 if (rs6000_is_valid_2insn_and (XEXP (x
, 1), mode
))
22485 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
22486 *total
+= COSTS_N_INSNS (2);
22491 *total
= COSTS_N_INSNS (1);
22496 *total
= COSTS_N_INSNS (1);
22502 *total
= COSTS_N_INSNS (1);
22506 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22507 the sign extend and shift separately within the insn. */
22508 if (TARGET_EXTSWSLI
&& mode
== DImode
22509 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
22510 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
)
22521 /* Handle mul_highpart. */
22522 if (outer_code
== TRUNCATE
22523 && GET_CODE (XEXP (x
, 0)) == MULT
)
22525 if (mode
== DImode
)
22526 *total
= rs6000_cost
->muldi
;
22528 *total
= rs6000_cost
->mulsi
;
22531 else if (outer_code
== AND
)
22534 *total
= COSTS_N_INSNS (1);
22539 if (MEM_P (XEXP (x
, 0)))
22542 *total
= COSTS_N_INSNS (1);
22548 if (!FLOAT_MODE_P (mode
))
22550 *total
= COSTS_N_INSNS (1);
22556 case UNSIGNED_FLOAT
:
22559 case FLOAT_TRUNCATE
:
22560 *total
= rs6000_cost
->fp
;
22564 if (mode
== DFmode
)
22565 *total
= rs6000_cost
->sfdf_convert
;
22567 *total
= rs6000_cost
->fp
;
22574 *total
= COSTS_N_INSNS (1);
22577 else if (FLOAT_MODE_P (mode
) && TARGET_PPC_GFXOPT
&& TARGET_HARD_FLOAT
)
22579 *total
= rs6000_cost
->fp
;
22588 /* Carry bit requires mode == Pmode.
22589 NEG or PLUS already counted so only add one. */
22591 && (outer_code
== NEG
|| outer_code
== PLUS
))
22593 *total
= COSTS_N_INSNS (1);
22601 if (outer_code
== SET
)
22603 if (XEXP (x
, 1) == const0_rtx
)
22605 *total
= COSTS_N_INSNS (2);
22610 *total
= COSTS_N_INSNS (3);
22615 if (outer_code
== COMPARE
)
22623 if (XINT (x
, 1) == UNSPECV_MMA_XXSETACCZ
)
22637 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22640 rs6000_debug_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
22641 int opno
, int *total
, bool speed
)
22643 bool ret
= rs6000_rtx_costs (x
, mode
, outer_code
, opno
, total
, speed
);
22646 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22647 "opno = %d, total = %d, speed = %s, x:\n",
22648 ret
? "complete" : "scan inner",
22649 GET_MODE_NAME (mode
),
22650 GET_RTX_NAME (outer_code
),
22653 speed
? "true" : "false");
22661 rs6000_insn_cost (rtx_insn
*insn
, bool speed
)
22663 if (recog_memoized (insn
) < 0)
22666 /* If we are optimizing for size, just use the length. */
22668 return get_attr_length (insn
);
22670 /* Use the cost if provided. */
22671 int cost
= get_attr_cost (insn
);
22675 /* If the insn tells us how many insns there are, use that. Otherwise use
22676 the length/4. Adjust the insn length to remove the extra size that
22677 prefixed instructions take. */
22678 int n
= get_attr_num_insns (insn
);
22681 int length
= get_attr_length (insn
);
22682 if (get_attr_prefixed (insn
) == PREFIXED_YES
)
22685 ADJUST_INSN_LENGTH (insn
, adjust
);
22692 enum attr_type type
= get_attr_type (insn
);
22699 cost
= COSTS_N_INSNS (n
+ 1);
22703 switch (get_attr_size (insn
))
22706 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const9
;
22709 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const
;
22712 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi
;
22715 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->muldi
;
22718 gcc_unreachable ();
22722 switch (get_attr_size (insn
))
22725 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divsi
;
22728 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divdi
;
22731 gcc_unreachable ();
22736 cost
= n
* rs6000_cost
->fp
;
22739 cost
= n
* rs6000_cost
->dmul
;
22742 cost
= n
* rs6000_cost
->sdiv
;
22745 cost
= n
* rs6000_cost
->ddiv
;
22752 cost
= COSTS_N_INSNS (n
+ 2);
22756 cost
= COSTS_N_INSNS (n
);
22762 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22765 rs6000_debug_address_cost (rtx x
, machine_mode mode
,
22766 addr_space_t as
, bool speed
)
22768 int ret
= TARGET_ADDRESS_COST (x
, mode
, as
, speed
);
22770 fprintf (stderr
, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22771 ret
, speed
? "true" : "false");
22778 /* A C expression returning the cost of moving data from a register of class
22779 CLASS1 to one of CLASS2. */
22782 rs6000_register_move_cost (machine_mode mode
,
22783 reg_class_t from
, reg_class_t to
)
22786 reg_class_t rclass
;
22788 if (TARGET_DEBUG_COST
)
22791 /* If we have VSX, we can easily move between FPR or Altivec registers,
22792 otherwise we can only easily move within classes.
22793 Do this first so we give best-case answers for union classes
22794 containing both gprs and vsx regs. */
22795 HARD_REG_SET to_vsx
, from_vsx
;
22796 to_vsx
= reg_class_contents
[to
] & reg_class_contents
[VSX_REGS
];
22797 from_vsx
= reg_class_contents
[from
] & reg_class_contents
[VSX_REGS
];
22798 if (!hard_reg_set_empty_p (to_vsx
)
22799 && !hard_reg_set_empty_p (from_vsx
)
22801 || hard_reg_set_intersect_p (to_vsx
, from_vsx
)))
22803 int reg
= FIRST_FPR_REGNO
;
22805 || (TEST_HARD_REG_BIT (to_vsx
, FIRST_ALTIVEC_REGNO
)
22806 && TEST_HARD_REG_BIT (from_vsx
, FIRST_ALTIVEC_REGNO
)))
22807 reg
= FIRST_ALTIVEC_REGNO
;
22808 ret
= 2 * hard_regno_nregs (reg
, mode
);
22811 /* Moves from/to GENERAL_REGS. */
22812 else if ((rclass
= from
, reg_classes_intersect_p (to
, GENERAL_REGS
))
22813 || (rclass
= to
, reg_classes_intersect_p (from
, GENERAL_REGS
)))
22815 if (rclass
== FLOAT_REGS
|| rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
22817 if (TARGET_DIRECT_MOVE
)
22819 /* Keep the cost for direct moves above that for within
22820 a register class even if the actual processor cost is
22821 comparable. We do this because a direct move insn
22822 can't be a nop, whereas with ideal register
22823 allocation a move within the same class might turn
22824 out to be a nop. */
22825 if (rs6000_tune
== PROCESSOR_POWER9
22826 || rs6000_tune
== PROCESSOR_POWER10
22827 || rs6000_tune
== PROCESSOR_POWER11
)
22828 ret
= 3 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22830 ret
= 4 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22831 /* SFmode requires a conversion when moving between gprs
22833 if (mode
== SFmode
)
22837 ret
= (rs6000_memory_move_cost (mode
, rclass
, false)
22838 + rs6000_memory_move_cost (mode
, GENERAL_REGS
, false));
22841 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22843 else if (rclass
== CR_REGS
)
22846 /* For those processors that have slow LR/CTR moves, make them more
22847 expensive than memory in order to bias spills to memory .*/
22848 else if ((rs6000_tune
== PROCESSOR_POWER6
22849 || rs6000_tune
== PROCESSOR_POWER7
22850 || rs6000_tune
== PROCESSOR_POWER8
22851 || rs6000_tune
== PROCESSOR_POWER9
)
22852 && reg_class_subset_p (rclass
, SPECIAL_REGS
))
22853 ret
= 6 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22856 /* A move will cost one instruction per GPR moved. */
22857 ret
= 2 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22860 /* Everything else has to go through GENERAL_REGS. */
22862 ret
= (rs6000_register_move_cost (mode
, GENERAL_REGS
, to
)
22863 + rs6000_register_move_cost (mode
, from
, GENERAL_REGS
));
22865 if (TARGET_DEBUG_COST
)
22867 if (dbg_cost_ctrl
== 1)
22869 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22870 ret
, GET_MODE_NAME (mode
), reg_class_names
[from
],
22871 reg_class_names
[to
]);
22878 /* A C expressions returning the cost of moving data of MODE from a register to
22882 rs6000_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
22883 bool in ATTRIBUTE_UNUSED
)
22887 if (TARGET_DEBUG_COST
)
22890 if (reg_classes_intersect_p (rclass
, GENERAL_REGS
))
22891 ret
= 4 * hard_regno_nregs (0, mode
);
22892 else if ((reg_classes_intersect_p (rclass
, FLOAT_REGS
)
22893 || reg_classes_intersect_p (rclass
, VSX_REGS
)))
22894 ret
= 4 * hard_regno_nregs (32, mode
);
22895 else if (reg_classes_intersect_p (rclass
, ALTIVEC_REGS
))
22896 ret
= 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO
, mode
);
22898 ret
= 4 + rs6000_register_move_cost (mode
, rclass
, GENERAL_REGS
);
22900 if (TARGET_DEBUG_COST
)
22902 if (dbg_cost_ctrl
== 1)
22904 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22905 ret
, GET_MODE_NAME (mode
), reg_class_names
[rclass
], in
);
22912 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22914 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22915 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22916 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22917 move cost between GENERAL_REGS and VSX_REGS low.
22919 It might seem reasonable to use a union class. After all, if usage
22920 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22921 rather than memory. However, in cases where register pressure of
22922 both is high, like the cactus_adm spec test, allowing
22923 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22924 the first scheduling pass. This is partly due to an allocno of
22925 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22926 class, which gives too high a pressure for GENERAL_REGS and too low
22927 for VSX_REGS. So, force a choice of the subclass here.
22929 The best class is also the union if GENERAL_REGS and VSX_REGS have
22930 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22931 allocno class, since trying to narrow down the class by regno mode
22932 is prone to error. For example, SImode is allowed in VSX regs and
22933 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22934 it would be wrong to choose an allocno of GENERAL_REGS based on
22938 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED
,
22939 reg_class_t allocno_class
,
22940 reg_class_t best_class
)
22942 switch (allocno_class
)
22944 case GEN_OR_VSX_REGS
:
22945 /* best_class must be a subset of allocno_class. */
22946 gcc_checking_assert (best_class
== GEN_OR_VSX_REGS
22947 || best_class
== GEN_OR_FLOAT_REGS
22948 || best_class
== VSX_REGS
22949 || best_class
== ALTIVEC_REGS
22950 || best_class
== FLOAT_REGS
22951 || best_class
== GENERAL_REGS
22952 || best_class
== BASE_REGS
);
22953 /* Use best_class but choose wider classes when copying from the
22954 wider class to best_class is cheap. This mimics IRA choice
22955 of allocno class. */
22956 if (best_class
== BASE_REGS
)
22957 return GENERAL_REGS
;
22958 if (TARGET_VSX
&& best_class
== FLOAT_REGS
)
22963 if (best_class
== ALTIVEC_REGS
)
22964 return ALTIVEC_REGS
;
22970 return allocno_class
;
22973 /* Load up a constant. If the mode is a vector mode, splat the value across
22974 all of the vector elements. */
22977 rs6000_load_constant_and_splat (machine_mode mode
, REAL_VALUE_TYPE dconst
)
22981 if (mode
== SFmode
|| mode
== DFmode
)
22983 rtx d
= const_double_from_real_value (dconst
, mode
);
22984 reg
= force_reg (mode
, d
);
22986 else if (mode
== V4SFmode
)
22988 rtx d
= const_double_from_real_value (dconst
, SFmode
);
22989 rtvec v
= gen_rtvec (4, d
, d
, d
, d
);
22990 reg
= gen_reg_rtx (mode
);
22991 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22993 else if (mode
== V2DFmode
)
22995 rtx d
= const_double_from_real_value (dconst
, DFmode
);
22996 rtvec v
= gen_rtvec (2, d
, d
);
22997 reg
= gen_reg_rtx (mode
);
22998 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
23001 gcc_unreachable ();
23006 /* Generate an FMA instruction. */
23009 rs6000_emit_madd (rtx target
, rtx m1
, rtx m2
, rtx a
)
23011 machine_mode mode
= GET_MODE (target
);
23014 dst
= expand_ternary_op (mode
, fma_optab
, m1
, m2
, a
, target
, 0);
23015 gcc_assert (dst
!= NULL
);
23018 emit_move_insn (target
, dst
);
23021 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
23024 rs6000_emit_nmsub (rtx dst
, rtx m1
, rtx m2
, rtx a
)
23026 machine_mode mode
= GET_MODE (dst
);
23029 /* This is a tad more complicated, since the fnma_optab is for
23030 a different expression: fma(-m1, m2, a), which is the same
23031 thing except in the case of signed zeros.
23033 Fortunately we know that if FMA is supported that FNMSUB is
23034 also supported in the ISA. Just expand it directly. */
23036 gcc_assert (optab_handler (fma_optab
, mode
) != CODE_FOR_nothing
);
23038 r
= gen_rtx_NEG (mode
, a
);
23039 r
= gen_rtx_FMA (mode
, m1
, m2
, r
);
23040 r
= gen_rtx_NEG (mode
, r
);
23041 emit_insn (gen_rtx_SET (dst
, r
));
23044 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
23045 add a reg_note saying that this was a division. Support both scalar and
23046 vector divide. Assumes no trapping math and finite arguments. */
23049 rs6000_emit_swdiv (rtx dst
, rtx n
, rtx d
, bool note_p
)
23051 machine_mode mode
= GET_MODE (dst
);
23052 rtx one
, x0
, e0
, x1
, xprev
, eprev
, xnext
, enext
, u
, v
;
23055 /* Low precision estimates guarantee 5 bits of accuracy. High
23056 precision estimates guarantee 14 bits of accuracy. SFmode
23057 requires 23 bits of accuracy. DFmode requires 52 bits of
23058 accuracy. Each pass at least doubles the accuracy, leading
23059 to the following. */
23060 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
23061 if (mode
== DFmode
|| mode
== V2DFmode
)
23064 enum insn_code code
= optab_handler (smul_optab
, mode
);
23065 insn_gen_fn gen_mul
= GEN_FCN (code
);
23067 gcc_assert (code
!= CODE_FOR_nothing
);
23069 one
= rs6000_load_constant_and_splat (mode
, dconst1
);
23071 /* x0 = 1./d estimate */
23072 x0
= gen_reg_rtx (mode
);
23073 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, d
),
23076 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
23079 /* e0 = 1. - d * x0 */
23080 e0
= gen_reg_rtx (mode
);
23081 rs6000_emit_nmsub (e0
, d
, x0
, one
);
23083 /* x1 = x0 + e0 * x0 */
23084 x1
= gen_reg_rtx (mode
);
23085 rs6000_emit_madd (x1
, e0
, x0
, x0
);
23087 for (i
= 0, xprev
= x1
, eprev
= e0
; i
< passes
- 2;
23088 ++i
, xprev
= xnext
, eprev
= enext
) {
23090 /* enext = eprev * eprev */
23091 enext
= gen_reg_rtx (mode
);
23092 emit_insn (gen_mul (enext
, eprev
, eprev
));
23094 /* xnext = xprev + enext * xprev */
23095 xnext
= gen_reg_rtx (mode
);
23096 rs6000_emit_madd (xnext
, enext
, xprev
, xprev
);
23102 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
23104 /* u = n * xprev */
23105 u
= gen_reg_rtx (mode
);
23106 emit_insn (gen_mul (u
, n
, xprev
));
23108 /* v = n - (d * u) */
23109 v
= gen_reg_rtx (mode
);
23110 rs6000_emit_nmsub (v
, d
, u
, n
);
23112 /* dst = (v * xprev) + u */
23113 rs6000_emit_madd (dst
, v
, xprev
, u
);
23116 add_reg_note (get_last_insn (), REG_EQUAL
, gen_rtx_DIV (mode
, n
, d
));
23119 /* Goldschmidt's Algorithm for single/double-precision floating point
23120 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
23123 rs6000_emit_swsqrt (rtx dst
, rtx src
, bool recip
)
23125 machine_mode mode
= GET_MODE (src
);
23126 rtx e
= gen_reg_rtx (mode
);
23127 rtx g
= gen_reg_rtx (mode
);
23128 rtx h
= gen_reg_rtx (mode
);
23130 /* Low precision estimates guarantee 5 bits of accuracy. High
23131 precision estimates guarantee 14 bits of accuracy. SFmode
23132 requires 23 bits of accuracy. DFmode requires 52 bits of
23133 accuracy. Each pass at least doubles the accuracy, leading
23134 to the following. */
23135 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
23136 if (mode
== DFmode
|| mode
== V2DFmode
)
23141 enum insn_code code
= optab_handler (smul_optab
, mode
);
23142 insn_gen_fn gen_mul
= GEN_FCN (code
);
23144 gcc_assert (code
!= CODE_FOR_nothing
);
23146 mhalf
= rs6000_load_constant_and_splat (mode
, dconsthalf
);
23148 /* e = rsqrt estimate */
23149 emit_insn (gen_rtx_SET (e
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, src
),
23152 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
23155 rtx zero
= force_reg (mode
, CONST0_RTX (mode
));
23157 if (mode
== SFmode
)
23159 rtx target
= emit_conditional_move (e
, { GT
, src
, zero
, mode
},
23162 emit_move_insn (e
, target
);
23166 rtx cond
= gen_rtx_GT (VOIDmode
, e
, zero
);
23167 rs6000_emit_vector_cond_expr (e
, e
, zero
, cond
, src
, zero
);
23171 /* g = sqrt estimate. */
23172 emit_insn (gen_mul (g
, e
, src
));
23173 /* h = 1/(2*sqrt) estimate. */
23174 emit_insn (gen_mul (h
, e
, mhalf
));
23180 rtx t
= gen_reg_rtx (mode
);
23181 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
23182 /* Apply correction directly to 1/rsqrt estimate. */
23183 rs6000_emit_madd (dst
, e
, t
, e
);
23187 for (i
= 0; i
< passes
; i
++)
23189 rtx t1
= gen_reg_rtx (mode
);
23190 rtx g1
= gen_reg_rtx (mode
);
23191 rtx h1
= gen_reg_rtx (mode
);
23193 rs6000_emit_nmsub (t1
, g
, h
, mhalf
);
23194 rs6000_emit_madd (g1
, g
, t1
, g
);
23195 rs6000_emit_madd (h1
, h
, t1
, h
);
23200 /* Multiply by 2 for 1/rsqrt. */
23201 emit_insn (gen_add3_insn (dst
, h
, h
));
23206 rtx t
= gen_reg_rtx (mode
);
23207 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
23208 rs6000_emit_madd (dst
, g
, t
, g
);
23214 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
23215 (Power7) targets. DST is the target, and SRC is the argument operand. */
23218 rs6000_emit_popcount (rtx dst
, rtx src
)
23220 machine_mode mode
= GET_MODE (dst
);
23223 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
23224 if (TARGET_POPCNTD
)
23226 if (mode
== SImode
)
23227 emit_insn (gen_popcntdsi2 (dst
, src
));
23229 emit_insn (gen_popcntddi2 (dst
, src
));
23233 tmp1
= gen_reg_rtx (mode
);
23235 if (mode
== SImode
)
23237 emit_insn (gen_popcntbsi2 (tmp1
, src
));
23238 tmp2
= expand_mult (SImode
, tmp1
, GEN_INT (0x01010101),
23240 tmp2
= force_reg (SImode
, tmp2
);
23241 emit_insn (gen_lshrsi3 (dst
, tmp2
, GEN_INT (24)));
23245 emit_insn (gen_popcntbdi2 (tmp1
, src
));
23246 tmp2
= expand_mult (DImode
, tmp1
,
23247 GEN_INT ((HOST_WIDE_INT
)
23248 0x01010101 << 32 | 0x01010101),
23250 tmp2
= force_reg (DImode
, tmp2
);
23251 emit_insn (gen_lshrdi3 (dst
, tmp2
, GEN_INT (56)));
23256 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
23257 target, and SRC is the argument operand. */
23260 rs6000_emit_parity (rtx dst
, rtx src
)
23262 machine_mode mode
= GET_MODE (dst
);
23265 tmp
= gen_reg_rtx (mode
);
23267 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
23270 if (mode
== SImode
)
23272 emit_insn (gen_popcntbsi2 (tmp
, src
));
23273 emit_insn (gen_paritysi2_cmpb (dst
, tmp
));
23277 emit_insn (gen_popcntbdi2 (tmp
, src
));
23278 emit_insn (gen_paritydi2_cmpb (dst
, tmp
));
23283 if (mode
== SImode
)
23285 /* Is mult+shift >= shift+xor+shift+xor? */
23286 if (rs6000_cost
->mulsi_const
>= COSTS_N_INSNS (3))
23288 rtx tmp1
, tmp2
, tmp3
, tmp4
;
23290 tmp1
= gen_reg_rtx (SImode
);
23291 emit_insn (gen_popcntbsi2 (tmp1
, src
));
23293 tmp2
= gen_reg_rtx (SImode
);
23294 emit_insn (gen_lshrsi3 (tmp2
, tmp1
, GEN_INT (16)));
23295 tmp3
= gen_reg_rtx (SImode
);
23296 emit_insn (gen_xorsi3 (tmp3
, tmp1
, tmp2
));
23298 tmp4
= gen_reg_rtx (SImode
);
23299 emit_insn (gen_lshrsi3 (tmp4
, tmp3
, GEN_INT (8)));
23300 emit_insn (gen_xorsi3 (tmp
, tmp3
, tmp4
));
23303 rs6000_emit_popcount (tmp
, src
);
23304 emit_insn (gen_andsi3 (dst
, tmp
, const1_rtx
));
23308 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
23309 if (rs6000_cost
->muldi
>= COSTS_N_INSNS (5))
23311 rtx tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
;
23313 tmp1
= gen_reg_rtx (DImode
);
23314 emit_insn (gen_popcntbdi2 (tmp1
, src
));
23316 tmp2
= gen_reg_rtx (DImode
);
23317 emit_insn (gen_lshrdi3 (tmp2
, tmp1
, GEN_INT (32)));
23318 tmp3
= gen_reg_rtx (DImode
);
23319 emit_insn (gen_xordi3 (tmp3
, tmp1
, tmp2
));
23321 tmp4
= gen_reg_rtx (DImode
);
23322 emit_insn (gen_lshrdi3 (tmp4
, tmp3
, GEN_INT (16)));
23323 tmp5
= gen_reg_rtx (DImode
);
23324 emit_insn (gen_xordi3 (tmp5
, tmp3
, tmp4
));
23326 tmp6
= gen_reg_rtx (DImode
);
23327 emit_insn (gen_lshrdi3 (tmp6
, tmp5
, GEN_INT (8)));
23328 emit_insn (gen_xordi3 (tmp
, tmp5
, tmp6
));
23331 rs6000_emit_popcount (tmp
, src
);
23332 emit_insn (gen_anddi3 (dst
, tmp
, const1_rtx
));
23336 /* Expand an Altivec constant permutation for little endian mode.
23337 OP0 and OP1 are the input vectors and TARGET is the output vector.
23338 SEL specifies the constant permutation vector.
23340 There are two issues: First, the two input operands must be
23341 swapped so that together they form a double-wide array in LE
23342 order. Second, the vperm instruction has surprising behavior
23343 in LE mode: it interprets the elements of the source vectors
23344 in BE mode ("left to right") and interprets the elements of
23345 the destination vector in LE mode ("right to left"). To
23346 correct for this, we must subtract each element of the permute
23347 control vector from 31.
23349 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
23350 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
23351 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
23352 serve as the permute control vector. Then, in BE mode,
23356 places the desired result in vr9. However, in LE mode the
23357 vector contents will be
23359 vr10 = 00000003 00000002 00000001 00000000
23360 vr11 = 00000007 00000006 00000005 00000004
23362 The result of the vperm using the same permute control vector is
23364 vr9 = 05000000 07000000 01000000 03000000
23366 That is, the leftmost 4 bytes of vr10 are interpreted as the
23367 source for the rightmost 4 bytes of vr9, and so on.
23369 If we change the permute control vector to
23371 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
23379 vr9 = 00000006 00000004 00000002 00000000. */
23382 altivec_expand_vec_perm_const_le (rtx target
, rtx op0
, rtx op1
,
23383 const vec_perm_indices
&sel
)
23387 rtx constv
, unspec
;
23389 /* Unpack and adjust the constant selector. */
23390 for (i
= 0; i
< 16; ++i
)
23392 unsigned int elt
= 31 - (sel
[i
] & 31);
23393 perm
[i
] = GEN_INT (elt
);
23396 /* Expand to a permute, swapping the inputs and using the
23397 adjusted selector. */
23399 op0
= force_reg (V16QImode
, op0
);
23401 op1
= force_reg (V16QImode
, op1
);
23403 constv
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
));
23404 constv
= force_reg (V16QImode
, constv
);
23405 unspec
= gen_rtx_UNSPEC (V16QImode
, gen_rtvec (3, op1
, op0
, constv
),
23407 if (!REG_P (target
))
23409 rtx tmp
= gen_reg_rtx (V16QImode
);
23410 emit_move_insn (tmp
, unspec
);
23414 emit_move_insn (target
, unspec
);
23417 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
23418 permute control vector. But here it's not a constant, so we must
23419 generate a vector NAND or NOR to do the adjustment. */
23422 altivec_expand_vec_perm_le (rtx operands
[4])
23424 rtx notx
, iorx
, unspec
;
23425 rtx target
= operands
[0];
23426 rtx op0
= operands
[1];
23427 rtx op1
= operands
[2];
23428 rtx sel
= operands
[3];
23430 rtx norreg
= gen_reg_rtx (V16QImode
);
23431 machine_mode mode
= GET_MODE (target
);
23433 /* Get everything in regs so the pattern matches. */
23435 op0
= force_reg (mode
, op0
);
23437 op1
= force_reg (mode
, op1
);
23439 sel
= force_reg (V16QImode
, sel
);
23440 if (!REG_P (target
))
23441 tmp
= gen_reg_rtx (mode
);
23443 if (TARGET_P9_VECTOR
)
23445 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, sel
),
23450 /* Invert the selector with a VNAND if available, else a VNOR.
23451 The VNAND is preferred for future fusion opportunities. */
23452 notx
= gen_rtx_NOT (V16QImode
, sel
);
23453 iorx
= (TARGET_P8_VECTOR
23454 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
23455 : gen_rtx_AND (V16QImode
, notx
, notx
));
23456 emit_insn (gen_rtx_SET (norreg
, iorx
));
23458 /* Permute with operands reversed and adjusted selector. */
23459 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, norreg
),
23463 /* Copy into target, possibly by way of a register. */
23464 if (!REG_P (target
))
23466 emit_move_insn (tmp
, unspec
);
23470 emit_move_insn (target
, unspec
);
23473 /* Expand an Altivec constant permutation. Return true if we match
23474 an efficient implementation; false to fall back to VPERM.
23476 OP0 and OP1 are the input vectors and TARGET is the output vector.
23477 SEL specifies the constant permutation vector. */
23480 altivec_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
,
23481 const vec_perm_indices
&sel
)
23483 struct altivec_perm_insn
{
23484 HOST_WIDE_INT mask
;
23485 enum insn_code impl
;
23486 unsigned char perm
[16];
23488 static const struct altivec_perm_insn patterns
[] = {
23489 {OPTION_MASK_ALTIVEC
,
23490 CODE_FOR_altivec_vpkuhum_direct
,
23491 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23492 {OPTION_MASK_ALTIVEC
,
23493 CODE_FOR_altivec_vpkuwum_direct
,
23494 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23495 {OPTION_MASK_ALTIVEC
,
23496 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghb_direct_be
23497 : CODE_FOR_altivec_vmrglb_direct_le
,
23498 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23499 {OPTION_MASK_ALTIVEC
,
23500 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghh_direct_be
23501 : CODE_FOR_altivec_vmrglh_direct_le
,
23502 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23503 {OPTION_MASK_ALTIVEC
,
23504 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghw_direct_v4si_be
23505 : CODE_FOR_altivec_vmrglw_direct_v4si_le
,
23506 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23507 {OPTION_MASK_ALTIVEC
,
23508 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglb_direct_be
23509 : CODE_FOR_altivec_vmrghb_direct_le
,
23510 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23511 {OPTION_MASK_ALTIVEC
,
23512 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglh_direct_be
23513 : CODE_FOR_altivec_vmrghh_direct_le
,
23514 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23515 {OPTION_MASK_ALTIVEC
,
23516 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglw_direct_v4si_be
23517 : CODE_FOR_altivec_vmrghw_direct_v4si_le
,
23518 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23519 {OPTION_MASK_P8_VECTOR
,
23520 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgew_v4sf_direct
23521 : CODE_FOR_p8_vmrgow_v4sf_direct
,
23522 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23523 {OPTION_MASK_P8_VECTOR
,
23524 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgow_v4sf_direct
23525 : CODE_FOR_p8_vmrgew_v4sf_direct
,
23526 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23527 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23528 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23529 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23530 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23531 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23532 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23533 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23534 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23536 unsigned int i
, j
, elt
, which
;
23537 unsigned char perm
[16];
23541 /* Unpack the constant selector. */
23542 for (i
= which
= 0; i
< 16; ++i
)
23545 which
|= (elt
< 16 ? 1 : 2);
23549 /* Simplify the constant selector based on operands. */
23553 gcc_unreachable ();
23557 if (!rtx_equal_p (op0
, op1
))
23562 for (i
= 0; i
< 16; ++i
)
23574 /* Look for splat patterns. */
23579 for (i
= 0; i
< 16; ++i
)
23580 if (perm
[i
] != elt
)
23584 if (!BYTES_BIG_ENDIAN
)
23586 emit_insn (gen_altivec_vspltb_direct (target
, op0
, GEN_INT (elt
)));
23592 for (i
= 0; i
< 16; i
+= 2)
23593 if (perm
[i
] != elt
|| perm
[i
+ 1] != elt
+ 1)
23597 int field
= BYTES_BIG_ENDIAN
? elt
/ 2 : 7 - elt
/ 2;
23598 x
= gen_reg_rtx (V8HImode
);
23599 emit_insn (gen_altivec_vsplth_direct (x
, gen_lowpart (V8HImode
, op0
),
23601 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23608 for (i
= 0; i
< 16; i
+= 4)
23610 || perm
[i
+ 1] != elt
+ 1
23611 || perm
[i
+ 2] != elt
+ 2
23612 || perm
[i
+ 3] != elt
+ 3)
23616 int field
= BYTES_BIG_ENDIAN
? elt
/ 4 : 3 - elt
/ 4;
23617 x
= gen_reg_rtx (V4SImode
);
23618 emit_insn (gen_altivec_vspltw_direct (x
, gen_lowpart (V4SImode
, op0
),
23620 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23626 /* Look for merge and pack patterns. */
23627 for (j
= 0; j
< ARRAY_SIZE (patterns
); ++j
)
23631 if ((patterns
[j
].mask
& rs6000_isa_flags
) == 0)
23634 elt
= patterns
[j
].perm
[0];
23635 if (perm
[0] == elt
)
23637 else if (perm
[0] == elt
+ 16)
23641 for (i
= 1; i
< 16; ++i
)
23643 elt
= patterns
[j
].perm
[i
];
23645 elt
= (elt
>= 16 ? elt
- 16 : elt
+ 16);
23646 else if (one_vec
&& elt
>= 16)
23648 if (perm
[i
] != elt
)
23653 enum insn_code icode
= patterns
[j
].impl
;
23654 machine_mode omode
= insn_data
[icode
].operand
[0].mode
;
23655 machine_mode imode
= insn_data
[icode
].operand
[1].mode
;
23657 rtx perm_idx
= GEN_INT (0);
23658 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23675 perm_idx
= GEN_INT (perm_val
);
23678 /* For little-endian, don't use vpkuwum and vpkuhum if the
23679 underlying vector type is not V4SI and V8HI, respectively.
23680 For example, using vpkuwum with a V8HI picks up the even
23681 halfwords (BE numbering) when the even halfwords (LE
23682 numbering) are what we need. */
23683 if (!BYTES_BIG_ENDIAN
23684 && icode
== CODE_FOR_altivec_vpkuwum_direct
23686 && GET_MODE (op0
) != V4SImode
)
23688 && GET_MODE (XEXP (op0
, 0)) != V4SImode
)))
23690 if (!BYTES_BIG_ENDIAN
23691 && icode
== CODE_FOR_altivec_vpkuhum_direct
23693 && GET_MODE (op0
) != V8HImode
)
23695 && GET_MODE (XEXP (op0
, 0)) != V8HImode
)))
23698 /* For little-endian, the two input operands must be swapped
23699 (or swapped back) to ensure proper right-to-left numbering
23701 if (swapped
== BYTES_BIG_ENDIAN
23702 && icode
!= CODE_FOR_vsx_xxpermdi_v16qi
)
23703 std::swap (op0
, op1
);
23704 if (imode
!= V16QImode
)
23706 op0
= gen_lowpart (imode
, op0
);
23707 op1
= gen_lowpart (imode
, op1
);
23709 if (omode
== V16QImode
)
23712 x
= gen_reg_rtx (omode
);
23713 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23714 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
, perm_idx
));
23716 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
));
23717 if (omode
!= V16QImode
)
23718 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23723 if (!BYTES_BIG_ENDIAN
)
23725 altivec_expand_vec_perm_const_le (target
, op0
, op1
, sel
);
23732 /* Expand a VSX Permute Doubleword constant permutation.
23733 Return true if we match an efficient implementation. */
23736 rs6000_expand_vec_perm_const_1 (rtx target
, rtx op0
, rtx op1
,
23737 unsigned char perm0
, unsigned char perm1
)
23741 /* If both selectors come from the same operand, fold to single op. */
23742 if ((perm0
& 2) == (perm1
& 2))
23749 /* If both operands are equal, fold to simpler permutation. */
23750 if (rtx_equal_p (op0
, op1
))
23753 perm1
= (perm1
& 1) + 2;
23755 /* If the first selector comes from the second operand, swap. */
23756 else if (perm0
& 2)
23762 std::swap (op0
, op1
);
23764 /* If the second selector does not come from the second operand, fail. */
23765 else if ((perm1
& 2) == 0)
23769 if (target
!= NULL
)
23771 machine_mode vmode
, dmode
;
23774 vmode
= GET_MODE (target
);
23775 gcc_assert (GET_MODE_NUNITS (vmode
) == 2);
23776 dmode
= mode_for_vector (GET_MODE_INNER (vmode
), 4).require ();
23777 x
= gen_rtx_VEC_CONCAT (dmode
, op0
, op1
);
23778 v
= gen_rtvec (2, GEN_INT (perm0
), GEN_INT (perm1
));
23779 x
= gen_rtx_VEC_SELECT (vmode
, x
, gen_rtx_PARALLEL (VOIDmode
, v
));
23780 emit_insn (gen_rtx_SET (target
, x
));
23785 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23788 rs6000_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
23789 rtx target
, rtx op0
, rtx op1
,
23790 const vec_perm_indices
&sel
)
23792 if (vmode
!= op_mode
)
23795 bool testing_p
= !target
;
23797 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23798 if (TARGET_ALTIVEC
&& testing_p
)
23803 rtx nop0
= force_reg (vmode
, op0
);
23809 op1
= force_reg (vmode
, op1
);
23811 /* Check for ps_merge* or xxpermdi insns. */
23812 if ((vmode
== V2DFmode
|| vmode
== V2DImode
) && VECTOR_MEM_VSX_P (vmode
))
23816 op0
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 1);
23817 op1
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 2);
23819 if (rs6000_expand_vec_perm_const_1 (target
, op0
, op1
, sel
[0], sel
[1]))
23823 if (TARGET_ALTIVEC
)
23825 /* Force the target-independent code to lower to V16QImode. */
23826 if (vmode
!= V16QImode
)
23828 if (altivec_expand_vec_perm_const (target
, op0
, op1
, sel
))
23835 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23836 OP0 and OP1 are the input vectors and TARGET is the output vector.
23837 PERM specifies the constant permutation vector. */
23840 rs6000_do_expand_vec_perm (rtx target
, rtx op0
, rtx op1
,
23841 machine_mode vmode
, const vec_perm_builder
&perm
)
23843 rtx x
= expand_vec_perm_const (vmode
, op0
, op1
, perm
, BLKmode
, target
);
23845 emit_move_insn (target
, x
);
23848 /* Expand an extract even operation. */
23851 rs6000_expand_extract_even (rtx target
, rtx op0
, rtx op1
)
23853 machine_mode vmode
= GET_MODE (target
);
23854 unsigned i
, nelt
= GET_MODE_NUNITS (vmode
);
23855 vec_perm_builder
perm (nelt
, nelt
, 1);
23857 for (i
= 0; i
< nelt
; i
++)
23858 perm
.quick_push (i
* 2);
23860 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23863 /* Expand a vector interleave operation. */
23866 rs6000_expand_interleave (rtx target
, rtx op0
, rtx op1
, bool highp
)
23868 machine_mode vmode
= GET_MODE (target
);
23869 unsigned i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
23870 vec_perm_builder
perm (nelt
, nelt
, 1);
23872 high
= (highp
? 0 : nelt
/ 2);
23873 for (i
= 0; i
< nelt
/ 2; i
++)
23875 perm
.quick_push (i
+ high
);
23876 perm
.quick_push (i
+ nelt
+ high
);
23879 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23882 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23884 rs6000_scale_v2df (rtx tgt
, rtx src
, int scale
)
23886 HOST_WIDE_INT
hwi_scale (scale
);
23887 REAL_VALUE_TYPE r_pow
;
23888 rtvec v
= rtvec_alloc (2);
23890 rtx scale_vec
= gen_reg_rtx (V2DFmode
);
23891 (void)real_powi (&r_pow
, DFmode
, &dconst2
, hwi_scale
);
23892 elt
= const_double_from_real_value (r_pow
, DFmode
);
23893 RTVEC_ELT (v
, 0) = elt
;
23894 RTVEC_ELT (v
, 1) = elt
;
23895 rs6000_expand_vector_init (scale_vec
, gen_rtx_PARALLEL (V2DFmode
, v
));
23896 emit_insn (gen_mulv2df3 (tgt
, src
, scale_vec
));
23899 /* Return an RTX representing where to find the function value of a
23900 function returning MODE. */
23902 rs6000_complex_function_value (machine_mode mode
)
23904 unsigned int regno
;
23906 machine_mode inner
= GET_MODE_INNER (mode
);
23907 unsigned int inner_bytes
= GET_MODE_UNIT_SIZE (mode
);
23909 if (TARGET_FLOAT128_TYPE
23911 || (mode
== TCmode
&& TARGET_IEEEQUAD
)))
23912 regno
= ALTIVEC_ARG_RETURN
;
23914 else if (FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23915 regno
= FP_ARG_RETURN
;
23919 regno
= GP_ARG_RETURN
;
23921 /* 32-bit is OK since it'll go in r3/r4. */
23922 if (TARGET_32BIT
&& inner_bytes
>= 4)
23923 return gen_rtx_REG (mode
, regno
);
23926 if (inner_bytes
>= 8)
23927 return gen_rtx_REG (mode
, regno
);
23929 r1
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
),
23931 r2
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
+ 1),
23932 GEN_INT (inner_bytes
));
23933 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
23936 /* Return an rtx describing a return value of MODE as a PARALLEL
23937 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23938 stride REG_STRIDE. */
23941 rs6000_parallel_return (machine_mode mode
,
23942 int n_elts
, machine_mode elt_mode
,
23943 unsigned int regno
, unsigned int reg_stride
)
23945 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
23948 for (i
= 0; i
< n_elts
; i
++)
23950 rtx r
= gen_rtx_REG (elt_mode
, regno
);
23951 rtx off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
23952 XVECEXP (par
, 0, i
) = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
23953 regno
+= reg_stride
;
23959 /* Target hook for TARGET_FUNCTION_VALUE.
23961 An integer value is in r3 and a floating-point value is in fp1,
23962 unless -msoft-float. */
23965 rs6000_function_value (const_tree valtype
,
23966 const_tree fn_decl_or_type ATTRIBUTE_UNUSED
,
23967 bool outgoing ATTRIBUTE_UNUSED
)
23970 unsigned int regno
;
23971 machine_mode elt_mode
;
23974 /* Special handling for structs in darwin64. */
23976 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype
), valtype
))
23978 CUMULATIVE_ARGS valcum
;
23982 valcum
.fregno
= FP_ARG_MIN_REG
;
23983 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
23984 /* Do a trial code generation as if this were going to be passed as
23985 an argument; if any part goes in memory, we return NULL. */
23986 valret
= rs6000_darwin64_record_arg (&valcum
, valtype
, true, /* retval= */ true);
23989 /* Otherwise fall through to standard ABI rules. */
23992 mode
= TYPE_MODE (valtype
);
23994 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23995 if (rs6000_discover_homogeneous_aggregate (mode
, valtype
, &elt_mode
, &n_elts
))
23997 int first_reg
, n_regs
;
23999 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode
))
24001 /* _Decimal128 must use even/odd register pairs. */
24002 first_reg
= (elt_mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
24003 n_regs
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
24007 first_reg
= ALTIVEC_ARG_RETURN
;
24011 return rs6000_parallel_return (mode
, n_elts
, elt_mode
, first_reg
, n_regs
);
24014 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
24015 if (TARGET_32BIT
&& TARGET_POWERPC64
)
24024 int count
= GET_MODE_SIZE (mode
) / 4;
24025 return rs6000_parallel_return (mode
, count
, SImode
, GP_ARG_RETURN
, 1);
24028 if ((INTEGRAL_TYPE_P (valtype
)
24029 && GET_MODE_BITSIZE (mode
) < (TARGET_32BIT
? 32 : 64))
24030 || POINTER_TYPE_P (valtype
))
24031 mode
= TARGET_32BIT
? SImode
: DImode
;
24033 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
24034 /* _Decimal128 must use an even/odd register pair. */
24035 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
24036 else if (SCALAR_FLOAT_TYPE_P (valtype
) && TARGET_HARD_FLOAT
24037 && !FLOAT128_VECTOR_P (mode
))
24038 regno
= FP_ARG_RETURN
;
24039 else if (TREE_CODE (valtype
) == COMPLEX_TYPE
24040 && targetm
.calls
.split_complex_arg
)
24041 return rs6000_complex_function_value (mode
);
24042 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
24043 return register is used in both cases, and we won't see V2DImode/V2DFmode
24044 for pure altivec, combine the two cases. */
24045 else if ((VECTOR_TYPE_P (valtype
) || VECTOR_ALIGNMENT_P (mode
))
24046 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
24047 && ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
24048 regno
= ALTIVEC_ARG_RETURN
;
24050 regno
= GP_ARG_RETURN
;
24052 return gen_rtx_REG (mode
, regno
);
24055 /* Define how to find the value returned by a library function
24056 assuming the value has mode MODE. */
24058 rs6000_libcall_value (machine_mode mode
)
24060 unsigned int regno
;
24062 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
24063 if (TARGET_32BIT
&& TARGET_POWERPC64
&& mode
== DImode
)
24064 return rs6000_parallel_return (mode
, 2, SImode
, GP_ARG_RETURN
, 1);
24066 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
24067 /* _Decimal128 must use an even/odd register pair. */
24068 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
24069 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && TARGET_HARD_FLOAT
)
24070 regno
= FP_ARG_RETURN
;
24071 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
24072 return register is used in both cases, and we won't see V2DImode/V2DFmode
24073 for pure altivec, combine the two cases. */
24074 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
24075 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
)
24076 regno
= ALTIVEC_ARG_RETURN
;
24077 else if (COMPLEX_MODE_P (mode
) && targetm
.calls
.split_complex_arg
)
24078 return rs6000_complex_function_value (mode
);
24080 regno
= GP_ARG_RETURN
;
24082 return gen_rtx_REG (mode
, regno
);
24085 /* Compute register pressure classes. We implement the target hook to avoid
24086 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
24087 lead to incorrect estimates of number of available registers and therefor
24088 increased register pressure/spill. */
24090 rs6000_compute_pressure_classes (enum reg_class
*pressure_classes
)
24095 pressure_classes
[n
++] = GENERAL_REGS
;
24096 if (TARGET_ALTIVEC
)
24097 pressure_classes
[n
++] = ALTIVEC_REGS
;
24099 pressure_classes
[n
++] = VSX_REGS
;
24102 if (TARGET_HARD_FLOAT
)
24103 pressure_classes
[n
++] = FLOAT_REGS
;
24105 pressure_classes
[n
++] = CR_REGS
;
24106 pressure_classes
[n
++] = SPECIAL_REGS
;
24111 /* Given FROM and TO register numbers, say whether this elimination is allowed.
24112 Frame pointer elimination is automatically handled.
24114 For the RS/6000, if frame pointer elimination is being done, we would like
24115 to convert ap into fp, not sp.
24117 We need r30 if -mminimal-toc was specified, and there are constant pool
24121 rs6000_can_eliminate (const int from
, const int to
)
24123 return (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
24124 ? ! frame_pointer_needed
24125 : from
== RS6000_PIC_OFFSET_TABLE_REGNUM
24126 ? ! TARGET_MINIMAL_TOC
|| TARGET_NO_TOC_OR_PCREL
24127 || constant_pool_empty_p ()
24131 /* Define the offset between two registers, FROM to be eliminated and its
24132 replacement TO, at the start of a routine. */
24134 rs6000_initial_elimination_offset (int from
, int to
)
24136 rs6000_stack_t
*info
= rs6000_stack_info ();
24137 HOST_WIDE_INT offset
;
24139 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
24140 offset
= info
->push_p
? 0 : -info
->total_size
;
24141 else if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
24143 offset
= info
->push_p
? 0 : -info
->total_size
;
24144 if (FRAME_GROWS_DOWNWARD
)
24145 offset
+= info
->fixed_size
+ info
->vars_size
+ info
->parm_size
;
24147 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
24148 offset
= FRAME_GROWS_DOWNWARD
24149 ? info
->fixed_size
+ info
->vars_size
+ info
->parm_size
24151 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
24152 offset
= info
->total_size
;
24153 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
24154 offset
= info
->push_p
? info
->total_size
: 0;
24155 else if (from
== RS6000_PIC_OFFSET_TABLE_REGNUM
)
24158 gcc_unreachable ();
24163 /* Fill in sizes of registers used by unwinder. */
24166 rs6000_init_dwarf_reg_sizes_extra (tree address
)
24168 if (TARGET_MACHO
&& ! TARGET_ALTIVEC
)
24171 machine_mode mode
= TYPE_MODE (char_type_node
);
24172 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
24173 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
24174 rtx value
= gen_int_mode (16, mode
);
24176 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
24177 The unwinder still needs to know the size of Altivec registers. */
24179 for (i
= FIRST_ALTIVEC_REGNO
; i
< LAST_ALTIVEC_REGNO
+1; i
++)
24181 int column
= DWARF_REG_TO_UNWIND_COLUMN
24182 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
24183 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
24185 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
24190 /* Map internal gcc register numbers to debug format register numbers.
24191 FORMAT specifies the type of debug register number to use:
24192 0 -- debug information, except for frame-related sections
24193 1 -- DWARF .debug_frame section
24194 2 -- DWARF .eh_frame section */
24197 rs6000_debugger_regno (unsigned int regno
, unsigned int format
)
24199 /* On some platforms, we use the standard DWARF register
24200 numbering for .debug_info and .debug_frame. */
24201 if ((format
== 0 && dwarf_debuginfo_p ()) || format
== 1)
24203 #ifdef RS6000_USE_DWARF_NUMBERING
24206 if (FP_REGNO_P (regno
))
24207 return regno
- FIRST_FPR_REGNO
+ 32;
24208 if (ALTIVEC_REGNO_P (regno
))
24209 return regno
- FIRST_ALTIVEC_REGNO
+ 1124;
24210 if (regno
== LR_REGNO
)
24212 if (regno
== CTR_REGNO
)
24214 if (regno
== CA_REGNO
)
24215 return 101; /* XER */
24216 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
24217 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
24218 The actual code emitted saves the whole of CR, so we map CR2_REGNO
24219 to the DWARF reg for CR. */
24220 if (format
== 1 && regno
== CR2_REGNO
)
24222 if (CR_REGNO_P (regno
))
24223 return regno
- CR0_REGNO
+ 86;
24224 if (regno
== VRSAVE_REGNO
)
24226 if (regno
== VSCR_REGNO
)
24229 /* These do not make much sense. */
24230 if (regno
== FRAME_POINTER_REGNUM
)
24232 if (regno
== ARG_POINTER_REGNUM
)
24237 gcc_unreachable ();
24241 /* We use the GCC 7 (and before) internal number for non-DWARF debug
24242 information, and also for .eh_frame. */
24243 /* Translate the regnos to their numbers in GCC 7 (and before). */
24246 if (FP_REGNO_P (regno
))
24247 return regno
- FIRST_FPR_REGNO
+ 32;
24248 if (ALTIVEC_REGNO_P (regno
))
24249 return regno
- FIRST_ALTIVEC_REGNO
+ 77;
24250 if (regno
== LR_REGNO
)
24252 if (regno
== CTR_REGNO
)
24254 if (regno
== CA_REGNO
)
24255 return 76; /* XER */
24256 if (CR_REGNO_P (regno
))
24257 return regno
- CR0_REGNO
+ 68;
24258 if (regno
== VRSAVE_REGNO
)
24260 if (regno
== VSCR_REGNO
)
24263 if (regno
== FRAME_POINTER_REGNUM
)
24265 if (regno
== ARG_POINTER_REGNUM
)
24270 gcc_unreachable ();
24273 /* target hook eh_return_filter_mode */
24274 static scalar_int_mode
24275 rs6000_eh_return_filter_mode (void)
24277 return TARGET_32BIT
? SImode
: word_mode
;
24280 /* Target hook for translate_mode_attribute. */
24281 static machine_mode
24282 rs6000_translate_mode_attribute (machine_mode mode
)
24284 if ((FLOAT128_IEEE_P (mode
)
24285 && ieee128_float_type_node
== long_double_type_node
)
24286 || (FLOAT128_IBM_P (mode
)
24287 && ibm128_float_type_node
== long_double_type_node
))
24288 return COMPLEX_MODE_P (mode
) ? E_TCmode
: E_TFmode
;
24292 /* Target hook for scalar_mode_supported_p. */
24294 rs6000_scalar_mode_supported_p (scalar_mode mode
)
24296 /* -m32 does not support TImode. This is the default, from
24297 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
24298 same ABI as for -m32. But default_scalar_mode_supported_p allows
24299 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
24300 for -mpowerpc64. */
24301 if (TARGET_32BIT
&& mode
== TImode
)
24304 if (DECIMAL_FLOAT_MODE_P (mode
))
24305 return default_decimal_float_supported_p ();
24306 else if (TARGET_FLOAT128_TYPE
&& (mode
== KFmode
|| mode
== IFmode
))
24309 return default_scalar_mode_supported_p (mode
);
24312 /* Target hook for libgcc_floating_mode_supported_p. */
24315 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode
)
24324 /* We only return true for KFmode if IEEE 128-bit types are supported, and
24325 if long double does not use the IEEE 128-bit format. If long double
24326 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
24327 Because the code will not use KFmode in that case, there will be aborts
24328 because it can't find KFmode in the Floatn types. */
24330 return TARGET_FLOAT128_TYPE
&& !TARGET_IEEEQUAD
;
24337 /* Target hook for vector_mode_supported_p. */
24339 rs6000_vector_mode_supported_p (machine_mode mode
)
24341 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
24342 128-bit, the compiler might try to widen IEEE 128-bit to IBM
24344 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
) && !FLOAT128_IEEE_P (mode
))
24351 /* Target hook for floatn_mode. */
24352 static opt_scalar_float_mode
24353 rs6000_floatn_mode (int n
, bool extended
)
24363 if (TARGET_FLOAT128_TYPE
)
24364 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24366 return opt_scalar_float_mode ();
24369 return opt_scalar_float_mode ();
24372 /* Those are the only valid _FloatNx types. */
24373 gcc_unreachable ();
24387 if (TARGET_FLOAT128_TYPE
)
24388 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24390 return opt_scalar_float_mode ();
24393 return opt_scalar_float_mode ();
24399 /* Target hook for c_mode_for_suffix. */
24400 static machine_mode
24401 rs6000_c_mode_for_suffix (char suffix
)
24403 if (TARGET_FLOAT128_TYPE
)
24405 if (suffix
== 'q' || suffix
== 'Q')
24406 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24408 /* At the moment, we are not defining a suffix for IBM extended double.
24409 If/when the default for -mabi=ieeelongdouble is changed, and we want
24410 to support __ibm128 constants in legacy library code, we may need to
24411 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
24412 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
24413 __float80 constants. */
24419 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return TFmode for
24420 TI_LONG_DOUBLE_TYPE which is for long double type, go with the default
24421 one for the others. */
24423 static machine_mode
24424 rs6000_c_mode_for_floating_type (enum tree_index ti
)
24426 if (ti
== TI_LONG_DOUBLE_TYPE
)
24427 return rs6000_long_double_type_size
== 128 ? TFmode
: DFmode
;
24428 return default_mode_for_floating_type (ti
);
24431 /* Target hook for invalid_arg_for_unprototyped_fn. */
24432 static const char *
24433 invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
24435 return (!rs6000_darwin64_abi
24437 && VECTOR_TYPE_P (TREE_TYPE (val
))
24438 && (funcdecl
== NULL_TREE
24439 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
24440 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
24441 && !fndecl_built_in_p (funcdecl
, BUILT_IN_CLASSIFY_TYPE
))))
24442 ? N_("AltiVec argument passed to unprototyped function")
24446 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
24447 setup by using __stack_chk_fail_local hidden function instead of
24448 calling __stack_chk_fail directly. Otherwise it is better to call
24449 __stack_chk_fail directly. */
24451 static tree ATTRIBUTE_UNUSED
24452 rs6000_stack_protect_fail (void)
24454 return (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
24455 ? default_hidden_stack_protect_fail ()
24456 : default_external_stack_protect_fail ();
24459 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
24462 static unsigned HOST_WIDE_INT
24463 rs6000_asan_shadow_offset (void)
24465 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_64BIT
? 41 : 29);
24469 /* Mask options that we want to support inside of attribute((target)) and
24470 #pragma GCC target operations. Note, we do not include things like
24471 64/32-bit, endianness, hard/soft floating point, etc. that would have
24472 different calling sequences. */
24474 struct rs6000_opt_mask
{
24475 const char *name
; /* option name */
24476 HOST_WIDE_INT mask
; /* mask to set */
24477 bool invert
; /* invert sense of mask */
24478 bool valid_target
; /* option is a target option */
24481 static struct rs6000_opt_mask
const rs6000_opt_masks
[] =
24483 { "altivec", OPTION_MASK_ALTIVEC
, false, true },
24484 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
,
24486 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
,
24488 { "cmpb", OPTION_MASK_CMPB
, false, true },
24489 { "crypto", OPTION_MASK_CRYPTO
, false, true },
24490 { "direct-move", 0, false, true },
24491 { "dlmzb", OPTION_MASK_DLMZB
, false, true },
24492 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX
,
24494 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, true },
24495 { "float128-hardware", OPTION_MASK_FLOAT128_HW
, false, true },
24496 { "fprnd", OPTION_MASK_FPRND
, false, true },
24497 { "power10", OPTION_MASK_POWER10
, false, true },
24498 { "power11", OPTION_MASK_POWER11
, false, false },
24499 { "hard-dfp", OPTION_MASK_DFP
, false, true },
24500 { "htm", OPTION_MASK_HTM
, false, true },
24501 { "isel", OPTION_MASK_ISEL
, false, true },
24502 { "mfcrf", OPTION_MASK_MFCRF
, false, true },
24503 { "mfpgpr", 0, false, true },
24504 { "mma", OPTION_MASK_MMA
, false, true },
24505 { "modulo", OPTION_MASK_MODULO
, false, true },
24506 { "mulhw", OPTION_MASK_MULHW
, false, true },
24507 { "multiple", OPTION_MASK_MULTIPLE
, false, true },
24508 { "pcrel", OPTION_MASK_PCREL
, false, true },
24509 { "pcrel-opt", OPTION_MASK_PCREL_OPT
, false, true },
24510 { "popcntb", OPTION_MASK_POPCNTB
, false, true },
24511 { "popcntd", OPTION_MASK_POPCNTD
, false, true },
24512 { "power8-fusion", OPTION_MASK_P8_FUSION
, false, true },
24513 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN
, false, true },
24514 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, true },
24515 { "power9-minmax", OPTION_MASK_P9_MINMAX
, false, true },
24516 { "power9-misc", OPTION_MASK_P9_MISC
, false, true },
24517 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, true },
24518 { "power10-fusion", OPTION_MASK_P10_FUSION
, false, true },
24519 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT
, false, true },
24520 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT
, false, true },
24521 { "prefixed", OPTION_MASK_PREFIXED
, false, true },
24522 { "quad-memory", OPTION_MASK_QUAD_MEMORY
, false, true },
24523 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC
, false, true },
24524 { "recip-precision", OPTION_MASK_RECIP_PRECISION
, false, true },
24525 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT
, false, true },
24526 { "string", 0, false, true },
24527 { "update", OPTION_MASK_NO_UPDATE
, true , true },
24528 { "vsx", OPTION_MASK_VSX
, false, true },
24529 #ifdef OPTION_MASK_64BIT
24531 { "aix64", OPTION_MASK_64BIT
, false, false },
24532 { "aix32", OPTION_MASK_64BIT
, true, false },
24534 { "64", OPTION_MASK_64BIT
, false, false },
24535 { "32", OPTION_MASK_64BIT
, true, false },
24538 #ifdef OPTION_MASK_EABI
24539 { "eabi", OPTION_MASK_EABI
, false, false },
24541 #ifdef OPTION_MASK_LITTLE_ENDIAN
24542 { "little", OPTION_MASK_LITTLE_ENDIAN
, false, false },
24543 { "big", OPTION_MASK_LITTLE_ENDIAN
, true, false },
24545 #ifdef OPTION_MASK_RELOCATABLE
24546 { "relocatable", OPTION_MASK_RELOCATABLE
, false, false },
24548 #ifdef OPTION_MASK_STRICT_ALIGN
24549 { "strict-align", OPTION_MASK_STRICT_ALIGN
, false, false },
24551 { "soft-float", OPTION_MASK_SOFT_FLOAT
, false, false },
24552 { "string", 0, false, false },
24555 /* Option variables that we want to support inside attribute((target)) and
24556 #pragma GCC target operations. */
24558 struct rs6000_opt_var
{
24559 const char *name
; /* option name */
24560 size_t global_offset
; /* offset of the option in global_options. */
24561 size_t target_offset
; /* offset of the option in target options. */
24564 static struct rs6000_opt_var
const rs6000_opt_vars
[] =
24567 offsetof (struct gcc_options
, x_TARGET_FRIZ
),
24568 offsetof (struct cl_target_option
, x_TARGET_FRIZ
), },
24569 { "avoid-indexed-addresses",
24570 offsetof (struct gcc_options
, x_TARGET_AVOID_XFORM
),
24571 offsetof (struct cl_target_option
, x_TARGET_AVOID_XFORM
) },
24573 offsetof (struct gcc_options
, x_rs6000_default_long_calls
),
24574 offsetof (struct cl_target_option
, x_rs6000_default_long_calls
), },
24575 { "optimize-swaps",
24576 offsetof (struct gcc_options
, x_rs6000_optimize_swaps
),
24577 offsetof (struct cl_target_option
, x_rs6000_optimize_swaps
), },
24578 { "allow-movmisalign",
24579 offsetof (struct gcc_options
, x_TARGET_ALLOW_MOVMISALIGN
),
24580 offsetof (struct cl_target_option
, x_TARGET_ALLOW_MOVMISALIGN
), },
24582 offsetof (struct gcc_options
, x_TARGET_SCHED_GROUPS
),
24583 offsetof (struct cl_target_option
, x_TARGET_SCHED_GROUPS
), },
24585 offsetof (struct gcc_options
, x_TARGET_ALWAYS_HINT
),
24586 offsetof (struct cl_target_option
, x_TARGET_ALWAYS_HINT
), },
24587 { "align-branch-targets",
24588 offsetof (struct gcc_options
, x_TARGET_ALIGN_BRANCH_TARGETS
),
24589 offsetof (struct cl_target_option
, x_TARGET_ALIGN_BRANCH_TARGETS
), },
24591 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24592 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24594 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24595 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24596 { "speculate-indirect-jumps",
24597 offsetof (struct gcc_options
, x_rs6000_speculate_indirect_jumps
),
24598 offsetof (struct cl_target_option
, x_rs6000_speculate_indirect_jumps
), },
24601 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24602 parsing. Return true if there were no errors. */
24605 rs6000_inner_target_options (tree args
, bool attr_p
)
24609 if (args
== NULL_TREE
)
24612 else if (TREE_CODE (args
) == STRING_CST
)
24614 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24617 while ((q
= strtok (p
, ",")) != NULL
)
24619 bool error_p
= false;
24620 bool not_valid_p
= false;
24621 const char *cpu_opt
= NULL
;
24624 if (startswith (q
, "cpu="))
24626 int cpu_index
= rs6000_cpu_name_lookup (q
+4);
24627 if (cpu_index
>= 0)
24628 rs6000_cpu_index
= cpu_index
;
24635 else if (startswith (q
, "tune="))
24637 int tune_index
= rs6000_cpu_name_lookup (q
+5);
24638 if (tune_index
>= 0)
24639 rs6000_tune_index
= tune_index
;
24649 bool invert
= false;
24653 if (startswith (r
, "no-"))
24659 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_masks
); i
++)
24660 if (strcmp (r
, rs6000_opt_masks
[i
].name
) == 0)
24662 HOST_WIDE_INT mask
= rs6000_opt_masks
[i
].mask
;
24664 if (!rs6000_opt_masks
[i
].valid_target
)
24665 not_valid_p
= true;
24669 rs6000_isa_flags_explicit
|= mask
;
24671 /* VSX needs altivec, so -mvsx automagically sets
24672 altivec and disables -mavoid-indexed-addresses. */
24675 if (mask
== OPTION_MASK_VSX
)
24677 if (!(rs6000_isa_flags_explicit
24678 & OPTION_MASK_ALTIVEC
))
24679 mask
|= OPTION_MASK_ALTIVEC
;
24680 if (!OPTION_SET_P (TARGET_AVOID_XFORM
))
24681 TARGET_AVOID_XFORM
= 0;
24685 if (rs6000_opt_masks
[i
].invert
)
24689 rs6000_isa_flags
&= ~mask
;
24691 rs6000_isa_flags
|= mask
;
24696 if (error_p
&& !not_valid_p
)
24698 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_vars
); i
++)
24699 if (strcmp (r
, rs6000_opt_vars
[i
].name
) == 0)
24701 size_t j
= rs6000_opt_vars
[i
].global_offset
;
24702 *((int *) ((char *) &global_options
+ j
)) = !invert
;
24703 *((int *) ((char *) &global_options_set
+ j
)) = 1;
24705 not_valid_p
= false;
24713 const char *eprefix
, *esuffix
;
24718 eprefix
= "__attribute__((__target__(";
24723 eprefix
= "#pragma GCC target ";
24728 error ("invalid cpu %qs for %s%qs%s", cpu_opt
, eprefix
,
24730 else if (not_valid_p
)
24731 error ("%s%qs%s is not allowed", eprefix
, q
, esuffix
);
24733 error ("%s%qs%s is invalid", eprefix
, q
, esuffix
);
24738 else if (TREE_CODE (args
) == TREE_LIST
)
24742 tree value
= TREE_VALUE (args
);
24745 bool ret2
= rs6000_inner_target_options (value
, attr_p
);
24749 args
= TREE_CHAIN (args
);
24751 while (args
!= NULL_TREE
);
24756 error ("attribute %<target%> argument not a string");
24763 /* Print out the target options as a list for -mdebug=target. */
24766 rs6000_debug_target_options (tree args
, const char *prefix
)
24768 if (args
== NULL_TREE
)
24769 fprintf (stderr
, "%s<NULL>", prefix
);
24771 else if (TREE_CODE (args
) == STRING_CST
)
24773 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24776 while ((q
= strtok (p
, ",")) != NULL
)
24779 fprintf (stderr
, "%s\"%s\"", prefix
, q
);
24784 else if (TREE_CODE (args
) == TREE_LIST
)
24788 tree value
= TREE_VALUE (args
);
24791 rs6000_debug_target_options (value
, prefix
);
24794 args
= TREE_CHAIN (args
);
24796 while (args
!= NULL_TREE
);
24800 gcc_unreachable ();
24806 /* Hook to validate attribute((target("..."))). */
24809 rs6000_valid_attribute_p (tree fndecl
,
24810 tree
ARG_UNUSED (name
),
24814 struct cl_target_option cur_target
;
24817 tree new_target
, new_optimize
;
24818 tree func_optimize
;
24820 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
24822 if (TARGET_DEBUG_TARGET
)
24824 tree tname
= DECL_NAME (fndecl
);
24825 fprintf (stderr
, "\n==================== rs6000_valid_attribute_p:\n");
24827 fprintf (stderr
, "function: %.*s\n",
24828 (int) IDENTIFIER_LENGTH (tname
),
24829 IDENTIFIER_POINTER (tname
));
24831 fprintf (stderr
, "function: unknown\n");
24833 fprintf (stderr
, "args:");
24834 rs6000_debug_target_options (args
, " ");
24835 fprintf (stderr
, "\n");
24838 fprintf (stderr
, "flags: 0x%x\n", flags
);
24840 fprintf (stderr
, "--------------------\n");
24843 /* attribute((target("default"))) does nothing, beyond
24844 affecting multi-versioning. */
24845 if (TREE_VALUE (args
)
24846 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
24847 && TREE_CHAIN (args
) == NULL_TREE
24848 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
24851 old_optimize
= build_optimization_node (&global_options
,
24852 &global_options_set
);
24853 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
24855 /* If the function changed the optimization levels as well as setting target
24856 options, start with the optimizations specified. */
24857 if (func_optimize
&& func_optimize
!= old_optimize
)
24858 cl_optimization_restore (&global_options
, &global_options_set
,
24859 TREE_OPTIMIZATION (func_optimize
));
24861 /* The target attributes may also change some optimization flags, so update
24862 the optimization options if necessary. */
24863 cl_target_option_save (&cur_target
, &global_options
, &global_options_set
);
24864 rs6000_cpu_index
= rs6000_tune_index
= -1;
24865 ret
= rs6000_inner_target_options (args
, true);
24867 /* Set up any additional state. */
24870 ret
= rs6000_option_override_internal (false);
24871 new_target
= build_target_option_node (&global_options
,
24872 &global_options_set
);
24877 new_optimize
= build_optimization_node (&global_options
,
24878 &global_options_set
);
24885 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
24887 if (old_optimize
!= new_optimize
)
24888 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
24891 cl_target_option_restore (&global_options
, &global_options_set
, &cur_target
);
24893 if (old_optimize
!= new_optimize
)
24894 cl_optimization_restore (&global_options
, &global_options_set
,
24895 TREE_OPTIMIZATION (old_optimize
));
24901 /* Hook to validate the current #pragma GCC target and set the state, and
24902 update the macros based on what was changed. If ARGS is NULL, then
24903 POP_TARGET is used to reset the options. */
24906 rs6000_pragma_target_parse (tree args
, tree pop_target
)
24908 tree prev_tree
= build_target_option_node (&global_options
,
24909 &global_options_set
);
24911 struct cl_target_option
*prev_opt
, *cur_opt
;
24912 HOST_WIDE_INT prev_flags
, cur_flags
, diff_flags
;
24914 if (TARGET_DEBUG_TARGET
)
24916 fprintf (stderr
, "\n==================== rs6000_pragma_target_parse\n");
24917 fprintf (stderr
, "args:");
24918 rs6000_debug_target_options (args
, " ");
24919 fprintf (stderr
, "\n");
24923 fprintf (stderr
, "pop_target:\n");
24924 debug_tree (pop_target
);
24927 fprintf (stderr
, "pop_target: <NULL>\n");
24929 fprintf (stderr
, "--------------------\n");
24934 cur_tree
= ((pop_target
)
24936 : target_option_default_node
);
24937 cl_target_option_restore (&global_options
, &global_options_set
,
24938 TREE_TARGET_OPTION (cur_tree
));
24942 rs6000_cpu_index
= rs6000_tune_index
= -1;
24943 if (!rs6000_inner_target_options (args
, false)
24944 || !rs6000_option_override_internal (false)
24945 || (cur_tree
= build_target_option_node (&global_options
,
24946 &global_options_set
))
24949 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
24950 fprintf (stderr
, "invalid pragma\n");
24956 target_option_current_node
= cur_tree
;
24957 rs6000_activate_target_options (target_option_current_node
);
24959 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24960 change the macros that are defined. */
24961 if (rs6000_target_modify_macros_ptr
)
24963 prev_opt
= TREE_TARGET_OPTION (prev_tree
);
24964 prev_flags
= prev_opt
->x_rs6000_isa_flags
;
24966 cur_opt
= TREE_TARGET_OPTION (cur_tree
);
24967 cur_flags
= cur_opt
->x_rs6000_isa_flags
;
24969 diff_flags
= (prev_flags
^ cur_flags
);
24971 if (diff_flags
!= 0)
24973 /* Delete old macros. */
24974 rs6000_target_modify_macros_ptr (false,
24975 prev_flags
& diff_flags
);
24977 /* Define new macros. */
24978 rs6000_target_modify_macros_ptr (true,
24979 cur_flags
& diff_flags
);
24987 /* Remember the last target of rs6000_set_current_function. */
24988 static GTY(()) tree rs6000_previous_fndecl
;
24990 /* Restore target's globals from NEW_TREE and invalidate the
24991 rs6000_previous_fndecl cache. */
24994 rs6000_activate_target_options (tree new_tree
)
24996 cl_target_option_restore (&global_options
, &global_options_set
,
24997 TREE_TARGET_OPTION (new_tree
));
24998 if (TREE_TARGET_GLOBALS (new_tree
))
24999 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
25000 else if (new_tree
== target_option_default_node
)
25001 restore_target_globals (&default_target_globals
);
25003 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
25004 rs6000_previous_fndecl
= NULL_TREE
;
25007 /* Establish appropriate back-end context for processing the function
25008 FNDECL. The argument might be NULL to indicate processing at top
25009 level, outside of any function scope. */
25011 rs6000_set_current_function (tree fndecl
)
25013 if (TARGET_DEBUG_TARGET
)
25015 fprintf (stderr
, "\n==================== rs6000_set_current_function");
25018 fprintf (stderr
, ", fndecl %s (%p)",
25019 (DECL_NAME (fndecl
)
25020 ? IDENTIFIER_POINTER (DECL_NAME (fndecl
))
25021 : "<unknown>"), (void *)fndecl
);
25023 if (rs6000_previous_fndecl
)
25024 fprintf (stderr
, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl
);
25026 fprintf (stderr
, "\n");
25029 /* Only change the context if the function changes. This hook is called
25030 several times in the course of compiling a function, and we don't want to
25031 slow things down too much or call target_reinit when it isn't safe. */
25032 if (fndecl
== rs6000_previous_fndecl
)
25036 if (rs6000_previous_fndecl
== NULL_TREE
)
25037 old_tree
= target_option_current_node
;
25038 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
))
25039 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
);
25041 old_tree
= target_option_default_node
;
25044 if (fndecl
== NULL_TREE
)
25046 if (old_tree
!= target_option_current_node
)
25047 new_tree
= target_option_current_node
;
25049 new_tree
= NULL_TREE
;
25053 new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
25054 if (new_tree
== NULL_TREE
)
25055 new_tree
= target_option_default_node
;
25058 if (TARGET_DEBUG_TARGET
)
25062 fprintf (stderr
, "\nnew fndecl target specific options:\n");
25063 debug_tree (new_tree
);
25068 fprintf (stderr
, "\nold fndecl target specific options:\n");
25069 debug_tree (old_tree
);
25072 if (old_tree
!= NULL_TREE
|| new_tree
!= NULL_TREE
)
25073 fprintf (stderr
, "--------------------\n");
25076 if (new_tree
&& old_tree
!= new_tree
)
25077 rs6000_activate_target_options (new_tree
);
25080 rs6000_previous_fndecl
= fndecl
;
25084 /* Save the current options */
25087 rs6000_function_specific_save (struct cl_target_option
*ptr
,
25088 struct gcc_options
*opts
,
25089 struct gcc_options */
* opts_set */
)
25091 ptr
->x_rs6000_isa_flags
= opts
->x_rs6000_isa_flags
;
25092 ptr
->x_rs6000_isa_flags_explicit
= opts
->x_rs6000_isa_flags_explicit
;
25095 /* Restore the current options */
25098 rs6000_function_specific_restore (struct gcc_options
*opts
,
25099 struct gcc_options */
* opts_set */
,
25100 struct cl_target_option
*ptr
)
25103 opts
->x_rs6000_isa_flags
= ptr
->x_rs6000_isa_flags
;
25104 opts
->x_rs6000_isa_flags_explicit
= ptr
->x_rs6000_isa_flags_explicit
;
25105 (void) rs6000_option_override_internal (false);
25108 /* Print the current options */
25111 rs6000_function_specific_print (FILE *file
, int indent
,
25112 struct cl_target_option
*ptr
)
25114 rs6000_print_isa_options (file
, indent
, "Isa options set",
25115 ptr
->x_rs6000_isa_flags
);
25117 rs6000_print_isa_options (file
, indent
, "Isa options explicit",
25118 ptr
->x_rs6000_isa_flags_explicit
);
25121 /* Helper function to print the current isa or misc options on a line. */
25124 rs6000_print_options_internal (FILE *file
,
25126 const char *string
,
25127 HOST_WIDE_INT flags
,
25128 const char *prefix
,
25129 const struct rs6000_opt_mask
*opts
,
25130 size_t num_elements
)
25133 size_t start_column
= 0;
25135 size_t max_column
= 120;
25136 size_t prefix_len
= strlen (prefix
);
25137 size_t comma_len
= 0;
25138 const char *comma
= "";
25141 start_column
+= fprintf (file
, "%*s", indent
, "");
25145 fprintf (stderr
, DEBUG_FMT_S
, string
, "<none>");
25149 start_column
+= fprintf (stderr
, DEBUG_FMT_WX
, string
, flags
);
25151 /* Print the various mask options. */
25152 cur_column
= start_column
;
25153 for (i
= 0; i
< num_elements
; i
++)
25155 bool invert
= opts
[i
].invert
;
25156 const char *name
= opts
[i
].name
;
25157 const char *no_str
= "";
25158 HOST_WIDE_INT mask
= opts
[i
].mask
;
25159 size_t len
= comma_len
+ prefix_len
+ strlen (name
);
25163 if ((flags
& mask
) == 0)
25166 len
+= strlen ("no-");
25174 if ((flags
& mask
) != 0)
25177 len
+= strlen ("no-");
25184 if (cur_column
> max_column
)
25186 fprintf (stderr
, ", \\\n%*s", (int)start_column
, "");
25187 cur_column
= start_column
+ len
;
25191 fprintf (file
, "%s%s%s%s", comma
, prefix
, no_str
, name
);
25193 comma_len
= strlen (", ");
25196 fputs ("\n", file
);
25199 /* Helper function to print the current isa options on a line. */
25202 rs6000_print_isa_options (FILE *file
, int indent
, const char *string
,
25203 HOST_WIDE_INT flags
)
25205 rs6000_print_options_internal (file
, indent
, string
, flags
, "-m",
25206 &rs6000_opt_masks
[0],
25207 ARRAY_SIZE (rs6000_opt_masks
));
25210 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
25211 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
25212 -mupper-regs-df, etc.).
25214 This function does not handle explicit options such as the user specifying
25215 -mdirect-move. These are handled in rs6000_option_override_internal, and
25216 the appropriate error is given if needed.
25218 We return a mask of all of the implicit options that should not be enabled
25221 static HOST_WIDE_INT
25222 rs6000_disable_incompatible_switches (void)
25224 HOST_WIDE_INT ignore_masks
= rs6000_isa_flags_explicit
;
25227 static const struct {
25228 const HOST_WIDE_INT no_flag
; /* flag explicitly turned off. */
25229 const HOST_WIDE_INT dep_flags
; /* flags that depend on this option. */
25230 const char *const name
; /* name of the switch. */
25232 { OPTION_MASK_VSX
, OTHER_VSX_VECTOR_MASKS
, "vsx" },
25233 { OPTION_MASK_ALTIVEC
, OTHER_ALTIVEC_MASKS
, "altivec" },
25236 for (i
= 0; i
< ARRAY_SIZE (flags
); i
++)
25238 HOST_WIDE_INT no_flag
= flags
[i
].no_flag
;
25240 if ((rs6000_isa_flags
& no_flag
) == 0
25241 && (rs6000_isa_flags_explicit
& no_flag
) != 0)
25243 HOST_WIDE_INT dep_flags
= flags
[i
].dep_flags
;
25244 HOST_WIDE_INT set_flags
= (rs6000_isa_flags_explicit
25250 for (j
= 0; j
< ARRAY_SIZE (rs6000_opt_masks
); j
++)
25251 if ((set_flags
& rs6000_opt_masks
[j
].mask
) != 0)
25253 set_flags
&= ~rs6000_opt_masks
[j
].mask
;
25254 error ("%<-mno-%s%> turns off %<-m%s%>",
25256 rs6000_opt_masks
[j
].name
);
25259 gcc_assert (!set_flags
);
25262 rs6000_isa_flags
&= ~dep_flags
;
25263 ignore_masks
|= no_flag
| dep_flags
;
25267 return ignore_masks
;
25271 /* Helper function for printing the function name when debugging. */
25273 static const char *
25274 get_decl_name (tree fn
)
25281 name
= DECL_NAME (fn
);
25283 return "<no-name>";
25285 return IDENTIFIER_POINTER (name
);
25288 /* Return the clone id of the target we are compiling code for in a target
25289 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
25290 the priority list for the target clones (ordered from lowest to
25294 rs6000_clone_priority (tree fndecl
)
25296 tree fn_opts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
25297 HOST_WIDE_INT isa_masks
;
25298 int ret
= CLONE_DEFAULT
;
25299 tree attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (fndecl
));
25300 const char *attrs_str
= NULL
;
25302 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
25303 attrs_str
= TREE_STRING_POINTER (attrs
);
25305 /* Return priority zero for default function. Return the ISA needed for the
25306 function if it is not the default. */
25307 if (strcmp (attrs_str
, "default") != 0)
25309 if (fn_opts
== NULL_TREE
)
25310 fn_opts
= target_option_default_node
;
25312 if (!fn_opts
|| !TREE_TARGET_OPTION (fn_opts
))
25313 isa_masks
= rs6000_isa_flags
;
25315 isa_masks
= TREE_TARGET_OPTION (fn_opts
)->x_rs6000_isa_flags
;
25317 for (ret
= CLONE_MAX
- 1; ret
!= 0; ret
--)
25318 if ((rs6000_clone_map
[ret
].isa_mask
& isa_masks
) != 0)
25322 if (TARGET_DEBUG_TARGET
)
25323 fprintf (stderr
, "rs6000_get_function_version_priority (%s) => %d\n",
25324 get_decl_name (fndecl
), ret
);
25329 /* This compares the priority of target features in function DECL1 and DECL2.
25330 It returns positive value if DECL1 is higher priority, negative value if
25331 DECL2 is higher priority and 0 if they are the same. Note, priorities are
25332 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
25335 rs6000_compare_version_priority (tree decl1
, tree decl2
)
25337 int priority1
= rs6000_clone_priority (decl1
);
25338 int priority2
= rs6000_clone_priority (decl2
);
25339 int ret
= priority1
- priority2
;
25341 if (TARGET_DEBUG_TARGET
)
25342 fprintf (stderr
, "rs6000_compare_version_priority (%s, %s) => %d\n",
25343 get_decl_name (decl1
), get_decl_name (decl2
), ret
);
25348 /* Make a dispatcher declaration for the multi-versioned function DECL.
25349 Calls to DECL function will be replaced with calls to the dispatcher
25350 by the front-end. Returns the decl of the dispatcher function. */
25353 rs6000_get_function_versions_dispatcher (void *decl
)
25355 tree fn
= (tree
) decl
;
25356 struct cgraph_node
*node
= NULL
;
25357 struct cgraph_node
*default_node
= NULL
;
25358 struct cgraph_function_version_info
*node_v
= NULL
;
25359 struct cgraph_function_version_info
*first_v
= NULL
;
25361 tree dispatch_decl
= NULL
;
25363 struct cgraph_function_version_info
*default_version_info
= NULL
;
25364 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
25366 if (TARGET_DEBUG_TARGET
)
25367 fprintf (stderr
, "rs6000_get_function_versions_dispatcher (%s)\n",
25368 get_decl_name (fn
));
25370 node
= cgraph_node::get (fn
);
25371 gcc_assert (node
!= NULL
);
25373 node_v
= node
->function_version ();
25374 gcc_assert (node_v
!= NULL
);
25376 if (node_v
->dispatcher_resolver
!= NULL
)
25377 return node_v
->dispatcher_resolver
;
25379 /* Find the default version and make it the first node. */
25381 /* Go to the beginning of the chain. */
25382 while (first_v
->prev
!= NULL
)
25383 first_v
= first_v
->prev
;
25385 default_version_info
= first_v
;
25386 while (default_version_info
!= NULL
)
25388 const tree decl2
= default_version_info
->this_node
->decl
;
25389 if (is_function_default_version (decl2
))
25391 default_version_info
= default_version_info
->next
;
25394 /* If there is no default node, just return NULL. */
25395 if (default_version_info
== NULL
)
25398 /* Make default info the first node. */
25399 if (first_v
!= default_version_info
)
25401 default_version_info
->prev
->next
= default_version_info
->next
;
25402 if (default_version_info
->next
)
25403 default_version_info
->next
->prev
= default_version_info
->prev
;
25404 first_v
->prev
= default_version_info
;
25405 default_version_info
->next
= first_v
;
25406 default_version_info
->prev
= NULL
;
25409 default_node
= default_version_info
->this_node
;
25411 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
25412 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
25413 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
25414 "exports hardware capability bits");
25417 if (targetm
.has_ifunc_p ())
25419 struct cgraph_function_version_info
*it_v
= NULL
;
25420 struct cgraph_node
*dispatcher_node
= NULL
;
25421 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
25423 /* Right now, the dispatching is done via ifunc. */
25424 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
25425 TREE_NOTHROW (dispatch_decl
) = TREE_NOTHROW (fn
);
25427 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
25428 gcc_assert (dispatcher_node
!= NULL
);
25429 dispatcher_node
->dispatcher_function
= 1;
25430 dispatcher_version_info
25431 = dispatcher_node
->insert_new_function_version ();
25432 dispatcher_version_info
->next
= default_version_info
;
25433 dispatcher_node
->definition
= 1;
25435 /* Set the dispatcher for all the versions. */
25436 it_v
= default_version_info
;
25437 while (it_v
!= NULL
)
25439 it_v
->dispatcher_resolver
= dispatch_decl
;
25445 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
25446 "multiversioning needs %<ifunc%> which is not supported "
25451 return dispatch_decl
;
25454 /* Make the resolver function decl to dispatch the versions of a multi-
25455 versioned function, DEFAULT_DECL. Create an empty basic block in the
25456 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25460 make_resolver_func (const tree default_decl
,
25461 const tree dispatch_decl
,
25462 basic_block
*empty_bb
)
25464 /* Make the resolver function static. The resolver function returns
25466 tree decl_name
= clone_function_name (default_decl
, "resolver");
25467 const char *resolver_name
= IDENTIFIER_POINTER (decl_name
);
25468 tree type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
25469 tree decl
= build_fn_decl (resolver_name
, type
);
25470 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
25472 DECL_NAME (decl
) = decl_name
;
25473 TREE_USED (decl
) = 1;
25474 DECL_ARTIFICIAL (decl
) = 1;
25475 DECL_IGNORED_P (decl
) = 0;
25476 TREE_PUBLIC (decl
) = 0;
25477 DECL_UNINLINABLE (decl
) = 1;
25479 /* Resolver is not external, body is generated. */
25480 DECL_EXTERNAL (decl
) = 0;
25481 DECL_EXTERNAL (dispatch_decl
) = 0;
25483 DECL_CONTEXT (decl
) = NULL_TREE
;
25484 DECL_INITIAL (decl
) = make_node (BLOCK
);
25485 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
25487 if (DECL_COMDAT_GROUP (default_decl
)
25488 || TREE_PUBLIC (default_decl
))
25490 /* In this case, each translation unit with a call to this
25491 versioned function will put out a resolver. Ensure it
25492 is comdat to keep just one copy. */
25493 DECL_COMDAT (decl
) = 1;
25494 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
25497 TREE_PUBLIC (dispatch_decl
) = 0;
25499 /* Build result decl and add to function_decl. */
25500 tree t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
25501 DECL_CONTEXT (t
) = decl
;
25502 DECL_ARTIFICIAL (t
) = 1;
25503 DECL_IGNORED_P (t
) = 1;
25504 DECL_RESULT (decl
) = t
;
25506 gimplify_function_tree (decl
);
25507 push_cfun (DECL_STRUCT_FUNCTION (decl
));
25508 *empty_bb
= init_lowered_empty_function (decl
, false,
25509 profile_count::uninitialized ());
25511 cgraph_node::add_new_function (decl
, true);
25512 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
25516 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25517 DECL_ATTRIBUTES (dispatch_decl
)
25518 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
25520 cgraph_node::create_same_body_alias (dispatch_decl
, decl
);
25525 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25526 return a pointer to VERSION_DECL if we are running on a machine that
25527 supports the index CLONE_ISA hardware architecture bits. This function will
25528 be called during version dispatch to decide which function version to
25529 execute. It returns the basic block at the end, to which more conditions
25533 add_condition_to_bb (tree function_decl
, tree version_decl
,
25534 int clone_isa
, basic_block new_bb
)
25536 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
25538 gcc_assert (new_bb
!= NULL
);
25539 gimple_seq gseq
= bb_seq (new_bb
);
25542 tree convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
25543 build_fold_addr_expr (version_decl
));
25544 tree result_var
= create_tmp_var (ptr_type_node
);
25545 gimple
*convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
25546 gimple
*return_stmt
= gimple_build_return (result_var
);
25548 if (clone_isa
== CLONE_DEFAULT
)
25550 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25551 gimple_seq_add_stmt (&gseq
, return_stmt
);
25552 set_bb_seq (new_bb
, gseq
);
25553 gimple_set_bb (convert_stmt
, new_bb
);
25554 gimple_set_bb (return_stmt
, new_bb
);
25559 tree bool_zero
= build_int_cst (bool_int_type_node
, 0);
25560 tree cond_var
= create_tmp_var (bool_int_type_node
);
25561 tree predicate_decl
= rs6000_builtin_decls
[(int) RS6000_BIF_CPU_SUPPORTS
];
25562 const char *arg_str
= rs6000_clone_map
[clone_isa
].name
;
25563 tree predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
25564 gimple
*call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
25565 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
25567 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
25568 gimple_set_bb (call_cond_stmt
, new_bb
);
25569 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
25571 gimple
*if_else_stmt
= gimple_build_cond (NE_EXPR
, cond_var
, bool_zero
,
25572 NULL_TREE
, NULL_TREE
);
25573 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
25574 gimple_set_bb (if_else_stmt
, new_bb
);
25575 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
25577 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25578 gimple_seq_add_stmt (&gseq
, return_stmt
);
25579 set_bb_seq (new_bb
, gseq
);
25581 basic_block bb1
= new_bb
;
25582 edge e12
= split_block (bb1
, if_else_stmt
);
25583 basic_block bb2
= e12
->dest
;
25584 e12
->flags
&= ~EDGE_FALLTHRU
;
25585 e12
->flags
|= EDGE_TRUE_VALUE
;
25587 edge e23
= split_block (bb2
, return_stmt
);
25588 gimple_set_bb (convert_stmt
, bb2
);
25589 gimple_set_bb (return_stmt
, bb2
);
25591 basic_block bb3
= e23
->dest
;
25592 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
25595 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
25601 /* This function generates the dispatch function for multi-versioned functions.
25602 DISPATCH_DECL is the function which will contain the dispatch logic.
25603 FNDECLS are the function choices for dispatch, and is a tree chain.
25604 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25605 code is generated. */
25608 dispatch_function_versions (tree dispatch_decl
,
25610 basic_block
*empty_bb
)
25614 vec
<tree
> *fndecls
;
25615 tree clones
[CLONE_MAX
];
25617 if (TARGET_DEBUG_TARGET
)
25618 fputs ("dispatch_function_versions, top\n", stderr
);
25620 gcc_assert (dispatch_decl
!= NULL
25621 && fndecls_p
!= NULL
25622 && empty_bb
!= NULL
);
25624 /* fndecls_p is actually a vector. */
25625 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
25627 /* At least one more version other than the default. */
25628 gcc_assert (fndecls
->length () >= 2);
25630 /* The first version in the vector is the default decl. */
25631 memset ((void *) clones
, '\0', sizeof (clones
));
25632 clones
[CLONE_DEFAULT
] = (*fndecls
)[0];
25634 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25635 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25636 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25637 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25638 to insert the code here to do the call. */
25640 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
25642 int priority
= rs6000_clone_priority (ele
);
25643 if (!clones
[priority
])
25644 clones
[priority
] = ele
;
25647 for (ix
= CLONE_MAX
- 1; ix
>= 0; ix
--)
25650 if (TARGET_DEBUG_TARGET
)
25651 fprintf (stderr
, "dispatch_function_versions, clone %d, %s\n",
25652 ix
, get_decl_name (clones
[ix
]));
25654 *empty_bb
= add_condition_to_bb (dispatch_decl
, clones
[ix
], ix
,
25661 /* Generate the dispatching code body to dispatch multi-versioned function
25662 DECL. The target hook is called to process the "target" attributes and
25663 provide the code to dispatch the right function at run-time. NODE points
25664 to the dispatcher decl whose body will be created. */
25667 rs6000_generate_version_dispatcher_body (void *node_p
)
25670 basic_block empty_bb
;
25671 struct cgraph_node
*node
= (cgraph_node
*) node_p
;
25672 struct cgraph_function_version_info
*ninfo
= node
->function_version ();
25674 if (ninfo
->dispatcher_resolver
)
25675 return ninfo
->dispatcher_resolver
;
25677 /* node is going to be an alias, so remove the finalized bit. */
25678 node
->definition
= false;
25680 /* The first version in the chain corresponds to the default version. */
25681 ninfo
->dispatcher_resolver
= resolver
25682 = make_resolver_func (ninfo
->next
->this_node
->decl
, node
->decl
, &empty_bb
);
25684 if (TARGET_DEBUG_TARGET
)
25685 fprintf (stderr
, "rs6000_get_function_versions_dispatcher, %s\n",
25686 get_decl_name (resolver
));
25688 push_cfun (DECL_STRUCT_FUNCTION (resolver
));
25689 auto_vec
<tree
, 2> fn_ver_vec
;
25691 for (struct cgraph_function_version_info
*vinfo
= ninfo
->next
;
25693 vinfo
= vinfo
->next
)
25695 struct cgraph_node
*version
= vinfo
->this_node
;
25696 /* Check for virtual functions here again, as by this time it should
25697 have been determined if this function needs a vtable index or
25698 not. This happens for methods in derived classes that override
25699 virtual methods in base classes but are not explicitly marked as
25701 if (DECL_VINDEX (version
->decl
))
25702 sorry ("Virtual function multiversioning not supported");
25704 fn_ver_vec
.safe_push (version
->decl
);
25707 dispatch_function_versions (resolver
, &fn_ver_vec
, &empty_bb
);
25708 cgraph_edge::rebuild_edges ();
25713 /* Hook to decide if we need to scan function gimple statements to
25714 collect target specific information for inlining, and update the
25715 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25716 to predict which ISA feature is used at this time. Return true
25717 if we need to scan, otherwise return false. */
25720 rs6000_need_ipa_fn_target_info (const_tree decl
,
25721 unsigned int &info ATTRIBUTE_UNUSED
)
25723 tree target
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
25725 target
= target_option_default_node
;
25726 struct cl_target_option
*opts
= TREE_TARGET_OPTION (target
);
25728 /* See PR102059, we only handle HTM for now, so will only do
25729 the consequent scannings when HTM feature enabled. */
25730 if (opts
->x_rs6000_isa_flags
& OPTION_MASK_HTM
)
25736 /* Hook to update target specific information INFO for inlining by
25737 checking the given STMT. Return false if we don't need to scan
25738 any more, otherwise return true. */
25741 rs6000_update_ipa_fn_target_info (unsigned int &info
, const gimple
*stmt
)
25743 #ifndef HAVE_AS_POWER10_HTM
25744 /* Assume inline asm can use any instruction features. */
25745 if (gimple_code (stmt
) == GIMPLE_ASM
)
25747 const char *asm_str
= gimple_asm_string (as_a
<const gasm
*> (stmt
));
25748 /* Ignore empty inline asm string. */
25749 if (strlen (asm_str
) > 0)
25750 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25751 the only bit we care about. */
25752 info
|= RS6000_FN_TARGET_INFO_HTM
;
25757 if (gimple_code (stmt
) == GIMPLE_CALL
)
25759 tree fndecl
= gimple_call_fndecl (stmt
);
25760 if (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_MD
))
25762 enum rs6000_gen_builtins fcode
25763 = (enum rs6000_gen_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
25764 /* HTM bifs definitely exploit HTM insns. */
25765 if (bif_is_htm (rs6000_builtin_info
[fcode
]))
25767 info
|= RS6000_FN_TARGET_INFO_HTM
;
25776 /* Hook to determine if one function can safely inline another. */
25779 rs6000_can_inline_p (tree caller
, tree callee
)
25782 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
25783 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
25785 /* If the caller/callee has option attributes, then use them.
25786 Otherwise, use the command line options. */
25788 callee_tree
= target_option_default_node
;
25790 caller_tree
= target_option_default_node
;
25792 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
25793 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
25795 HOST_WIDE_INT callee_isa
= callee_opts
->x_rs6000_isa_flags
;
25796 HOST_WIDE_INT caller_isa
= caller_opts
->x_rs6000_isa_flags
;
25797 HOST_WIDE_INT explicit_isa
= callee_opts
->x_rs6000_isa_flags_explicit
;
25799 cgraph_node
*callee_node
= cgraph_node::get (callee
);
25800 if (ipa_fn_summaries
&& ipa_fn_summaries
->get (callee_node
) != NULL
)
25802 unsigned int info
= ipa_fn_summaries
->get (callee_node
)->target_info
;
25803 if ((info
& RS6000_FN_TARGET_INFO_HTM
) == 0)
25805 callee_isa
&= ~OPTION_MASK_HTM
;
25806 explicit_isa
&= ~OPTION_MASK_HTM
;
25810 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25812 callee_isa
&= ~(OPTION_MASK_P8_FUSION
| OPTION_MASK_P10_FUSION
);
25813 explicit_isa
&= ~(OPTION_MASK_P8_FUSION
| OPTION_MASK_P10_FUSION
);
25815 /* The callee's options must be a subset of the caller's options, i.e.
25816 a vsx function may inline an altivec function, but a no-vsx function
25817 must not inline a vsx function. However, for those options that the
25818 callee has explicitly enabled or disabled, then we must enforce that
25819 the callee's and caller's options match exactly; see PR70010. */
25820 if (((caller_isa
& callee_isa
) == callee_isa
)
25821 && (caller_isa
& explicit_isa
) == (callee_isa
& explicit_isa
))
25824 if (TARGET_DEBUG_TARGET
)
25825 fprintf (stderr
, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25826 get_decl_name (caller
), get_decl_name (callee
),
25827 (ret
? "can" : "cannot"));
25832 /* Allocate a stack temp and fixup the address so it meets the particular
25833 memory requirements (either offetable or REG+REG addressing). */
25836 rs6000_allocate_stack_temp (machine_mode mode
,
25837 bool offsettable_p
,
25840 rtx stack
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
25841 rtx addr
= XEXP (stack
, 0);
25842 int strict_p
= reload_completed
;
25844 if (!legitimate_indirect_address_p (addr
, strict_p
))
25847 && !rs6000_legitimate_offset_address_p (mode
, addr
, strict_p
, true))
25848 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25850 else if (reg_reg_p
&& !legitimate_indexed_address_p (addr
, strict_p
))
25851 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25857 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25858 convert to such a form to deal with memory reference instructions
25859 like STFIWX and LDBRX that only take reg+reg addressing. */
25862 rs6000_force_indexed_or_indirect_mem (rtx x
)
25864 machine_mode mode
= GET_MODE (x
);
25866 gcc_assert (MEM_P (x
));
25867 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x
, mode
))
25869 rtx addr
= XEXP (x
, 0);
25870 if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
25872 rtx reg
= XEXP (addr
, 0);
25873 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (x
));
25874 rtx size_rtx
= GEN_INT ((GET_CODE (addr
) == PRE_DEC
) ? -size
: size
);
25875 gcc_assert (REG_P (reg
));
25876 emit_insn (gen_add3_insn (reg
, reg
, size_rtx
));
25879 else if (GET_CODE (addr
) == PRE_MODIFY
)
25881 rtx reg
= XEXP (addr
, 0);
25882 rtx expr
= XEXP (addr
, 1);
25883 gcc_assert (REG_P (reg
));
25884 gcc_assert (GET_CODE (expr
) == PLUS
);
25885 emit_insn (gen_add3_insn (reg
, XEXP (expr
, 0), XEXP (expr
, 1)));
25889 if (GET_CODE (addr
) == PLUS
)
25891 rtx op0
= XEXP (addr
, 0);
25892 rtx op1
= XEXP (addr
, 1);
25893 op0
= force_reg (Pmode
, op0
);
25894 op1
= force_reg (Pmode
, op1
);
25895 x
= replace_equiv_address (x
, gen_rtx_PLUS (Pmode
, op0
, op1
));
25898 x
= replace_equiv_address (x
, force_reg (Pmode
, addr
));
25904 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25906 On the RS/6000, all integer constants are acceptable, most won't be valid
25907 for particular insns, though. Only easy FP constants are acceptable. */
25910 rs6000_legitimate_constant_p (machine_mode mode
, rtx x
)
25912 if (TARGET_ELF
&& tls_referenced_p (x
))
25915 if (CONST_DOUBLE_P (x
))
25916 return easy_fp_constant (x
, mode
);
25918 if (GET_CODE (x
) == CONST_VECTOR
)
25919 return easy_vector_constant (x
, mode
);
25925 /* Implement TARGET_PRECOMPUTE_TLS_P.
25927 On the AIX, TLS symbols are in the TOC, which is maintained in the
25928 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25929 must be considered legitimate constants. */
25932 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
25934 return tls_referenced_p (x
);
25939 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25942 chain_already_loaded (rtx_insn
*last
)
25944 for (; last
!= NULL
; last
= PREV_INSN (last
))
25946 if (NONJUMP_INSN_P (last
))
25948 rtx patt
= PATTERN (last
);
25950 if (GET_CODE (patt
) == SET
)
25952 rtx lhs
= XEXP (patt
, 0);
25954 if (REG_P (lhs
) && REGNO (lhs
) == STATIC_CHAIN_REGNUM
)
25962 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25965 rs6000_call_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25967 rtx func
= func_desc
;
25968 rtx toc_reg
= gen_rtx_REG (Pmode
, TOC_REGNUM
);
25969 rtx toc_load
= NULL_RTX
;
25970 rtx toc_restore
= NULL_RTX
;
25972 rtx abi_reg
= NULL_RTX
;
25976 bool is_pltseq_longcall
;
25979 tlsarg
= global_tlsarg
;
25981 /* Handle longcall attributes. */
25982 is_pltseq_longcall
= false;
25983 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25984 && GET_CODE (func_desc
) == SYMBOL_REF
)
25986 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25988 is_pltseq_longcall
= true;
25991 /* Handle indirect calls. */
25992 if (!SYMBOL_REF_P (func
)
25993 || (DEFAULT_ABI
== ABI_AIX
&& !SYMBOL_REF_FUNCTION_P (func
)))
25995 if (!rs6000_pcrel_p ())
25997 /* Save the TOC into its reserved slot before the call,
25998 and prepare to restore it after the call. */
25999 rtx stack_toc_offset
= GEN_INT (RS6000_TOC_SAVE_SLOT
);
26000 rtx stack_toc_unspec
= gen_rtx_UNSPEC (Pmode
,
26001 gen_rtvec (1, stack_toc_offset
),
26003 toc_restore
= gen_rtx_SET (toc_reg
, stack_toc_unspec
);
26005 /* Can we optimize saving the TOC in the prologue or
26006 do we need to do it at every call? */
26007 if (TARGET_SAVE_TOC_INDIRECT
&& !cfun
->calls_alloca
)
26008 cfun
->machine
->save_toc_in_prologue
= true;
26011 rtx stack_ptr
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
26012 rtx stack_toc_mem
= gen_frame_mem (Pmode
,
26013 gen_rtx_PLUS (Pmode
, stack_ptr
,
26014 stack_toc_offset
));
26015 MEM_VOLATILE_P (stack_toc_mem
) = 1;
26016 if (is_pltseq_longcall
)
26018 rtvec v
= gen_rtvec (3, toc_reg
, func_desc
, tlsarg
);
26019 rtx mark_toc_reg
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26020 emit_insn (gen_rtx_SET (stack_toc_mem
, mark_toc_reg
));
26023 emit_move_insn (stack_toc_mem
, toc_reg
);
26027 if (DEFAULT_ABI
== ABI_ELFv2
)
26029 /* A function pointer in the ELFv2 ABI is just a plain address, but
26030 the ABI requires it to be loaded into r12 before the call. */
26031 func_addr
= gen_rtx_REG (Pmode
, 12);
26032 emit_move_insn (func_addr
, func
);
26033 abi_reg
= func_addr
;
26034 /* Indirect calls via CTR are strongly preferred over indirect
26035 calls via LR, so move the address there. Needed to mark
26036 this insn for linker plt sequence editing too. */
26037 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26038 if (is_pltseq_longcall
)
26040 rtvec v
= gen_rtvec (3, abi_reg
, func_desc
, tlsarg
);
26041 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26042 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
26043 v
= gen_rtvec (2, func_addr
, func_desc
);
26044 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26047 emit_move_insn (func_addr
, abi_reg
);
26051 /* A function pointer under AIX is a pointer to a data area whose
26052 first word contains the actual address of the function, whose
26053 second word contains a pointer to its TOC, and whose third word
26054 contains a value to place in the static chain register (r11).
26055 Note that if we load the static chain, our "trampoline" need
26056 not have any executable code. */
26058 /* Load up address of the actual function. */
26059 func
= force_reg (Pmode
, func
);
26060 func_addr
= gen_reg_rtx (Pmode
);
26061 emit_move_insn (func_addr
, gen_rtx_MEM (Pmode
, func
));
26063 /* Indirect calls via CTR are strongly preferred over indirect
26064 calls via LR, so move the address there. */
26065 rtx ctr_reg
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26066 emit_move_insn (ctr_reg
, func_addr
);
26067 func_addr
= ctr_reg
;
26069 /* Prepare to load the TOC of the called function. Note that the
26070 TOC load must happen immediately before the actual call so
26071 that unwinding the TOC registers works correctly. See the
26072 comment in frob_update_context. */
26073 rtx func_toc_offset
= GEN_INT (GET_MODE_SIZE (Pmode
));
26074 rtx func_toc_mem
= gen_rtx_MEM (Pmode
,
26075 gen_rtx_PLUS (Pmode
, func
,
26077 toc_load
= gen_rtx_USE (VOIDmode
, func_toc_mem
);
26079 /* If we have a static chain, load it up. But, if the call was
26080 originally direct, the 3rd word has not been written since no
26081 trampoline has been built, so we ought not to load it, lest we
26082 override a static chain value. */
26083 if (!(GET_CODE (func_desc
) == SYMBOL_REF
26084 && SYMBOL_REF_FUNCTION_P (func_desc
))
26085 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
26086 && !chain_already_loaded (get_current_sequence ()->next
->last
))
26088 rtx sc_reg
= gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
26089 rtx func_sc_offset
= GEN_INT (2 * GET_MODE_SIZE (Pmode
));
26090 rtx func_sc_mem
= gen_rtx_MEM (Pmode
,
26091 gen_rtx_PLUS (Pmode
, func
,
26093 emit_move_insn (sc_reg
, func_sc_mem
);
26100 /* No TOC register needed for calls from PC-relative callers. */
26101 if (!rs6000_pcrel_p ())
26102 /* Direct calls use the TOC: for local calls, the callee will
26103 assume the TOC register is set; for non-local calls, the
26104 PLT stub needs the TOC register. */
26109 /* Create the call. */
26110 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26111 if (value
!= NULL_RTX
)
26112 call
[0] = gen_rtx_SET (value
, call
[0]);
26113 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
26117 call
[n_call
++] = toc_load
;
26119 call
[n_call
++] = toc_restore
;
26121 call
[n_call
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
26123 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n_call
, call
));
26124 insn
= emit_call_insn (insn
);
26126 /* Mention all registers defined by the ABI to hold information
26127 as uses in CALL_INSN_FUNCTION_USAGE. */
26129 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
26132 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
26135 rs6000_sibcall_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
26139 rtx r12
= NULL_RTX
;
26140 rtx func_addr
= func_desc
;
26143 tlsarg
= global_tlsarg
;
26145 /* Handle longcall attributes. */
26146 if (INTVAL (cookie
) & CALL_LONG
&& SYMBOL_REF_P (func_desc
))
26148 /* PCREL can do a sibling call to a longcall function
26149 because we don't need to restore the TOC register. */
26150 gcc_assert (rs6000_pcrel_p ());
26151 func_desc
= rs6000_longcall_ref (func_desc
, tlsarg
);
26154 gcc_assert (INTVAL (cookie
) == 0);
26156 /* For ELFv2, r12 and CTR need to hold the function address
26157 for an indirect call. */
26158 if (GET_CODE (func_desc
) != SYMBOL_REF
&& DEFAULT_ABI
== ABI_ELFv2
)
26160 r12
= gen_rtx_REG (Pmode
, 12);
26161 emit_move_insn (r12
, func_desc
);
26162 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26163 emit_move_insn (func_addr
, r12
);
26166 /* Create the call. */
26167 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26168 if (value
!= NULL_RTX
)
26169 call
[0] = gen_rtx_SET (value
, call
[0]);
26171 call
[1] = simple_return_rtx
;
26173 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (2, call
));
26174 insn
= emit_call_insn (insn
);
26176 /* Note use of the TOC register. */
26177 if (!rs6000_pcrel_p ())
26178 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
26179 gen_rtx_REG (Pmode
, TOC_REGNUM
));
26181 /* Note use of r12. */
26183 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), r12
);
26186 /* Expand code to perform a call under the SYSV4 ABI. */
26189 rs6000_call_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
26191 rtx func
= func_desc
;
26195 rtx abi_reg
= NULL_RTX
;
26199 tlsarg
= global_tlsarg
;
26201 /* Handle longcall attributes. */
26202 if ((INTVAL (cookie
) & CALL_LONG
) != 0
26203 && GET_CODE (func_desc
) == SYMBOL_REF
)
26205 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
26206 /* If the longcall was implemented as an inline PLT call using
26207 PLT unspecs then func will be REG:r11. If not, func will be
26208 a pseudo reg. The inline PLT call sequence supports lazy
26209 linking (and longcalls to functions in dlopen'd libraries).
26210 The other style of longcalls don't. The lazy linking entry
26211 to the dynamic symbol resolver requires r11 be the function
26212 address (as it is for linker generated PLT stubs). Ensure
26213 r11 stays valid to the bctrl by marking r11 used by the call. */
26218 /* Handle indirect calls. */
26219 if (GET_CODE (func
) != SYMBOL_REF
)
26221 func
= force_reg (Pmode
, func
);
26223 /* Indirect calls via CTR are strongly preferred over indirect
26224 calls via LR, so move the address there. That can't be left
26225 to reload because we want to mark every instruction in an
26226 inline PLT call sequence with a reloc, enabling the linker to
26227 edit the sequence back to a direct call when that makes sense. */
26228 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26231 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
26232 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26233 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
26234 v
= gen_rtvec (2, func_addr
, func_desc
);
26235 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26238 emit_move_insn (func_addr
, func
);
26243 /* Create the call. */
26244 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26245 if (value
!= NULL_RTX
)
26246 call
[0] = gen_rtx_SET (value
, call
[0]);
26248 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
26250 if (TARGET_SECURE_PLT
26252 && GET_CODE (func_addr
) == SYMBOL_REF
26253 && !SYMBOL_REF_LOCAL_P (func_addr
))
26254 call
[n
++] = gen_rtx_USE (VOIDmode
, pic_offset_table_rtx
);
26256 call
[n
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
26258 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n
, call
));
26259 insn
= emit_call_insn (insn
);
26261 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
26264 /* Expand code to perform a sibling call under the SysV4 ABI. */
26267 rs6000_sibcall_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
26269 rtx func
= func_desc
;
26273 rtx abi_reg
= NULL_RTX
;
26276 tlsarg
= global_tlsarg
;
26278 /* Handle longcall attributes. */
26279 if ((INTVAL (cookie
) & CALL_LONG
) != 0
26280 && GET_CODE (func_desc
) == SYMBOL_REF
)
26282 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
26283 /* If the longcall was implemented as an inline PLT call using
26284 PLT unspecs then func will be REG:r11. If not, func will be
26285 a pseudo reg. The inline PLT call sequence supports lazy
26286 linking (and longcalls to functions in dlopen'd libraries).
26287 The other style of longcalls don't. The lazy linking entry
26288 to the dynamic symbol resolver requires r11 be the function
26289 address (as it is for linker generated PLT stubs). Ensure
26290 r11 stays valid to the bctr by marking r11 used by the call. */
26295 /* Handle indirect calls. */
26296 if (GET_CODE (func
) != SYMBOL_REF
)
26298 func
= force_reg (Pmode
, func
);
26300 /* Indirect sibcalls must go via CTR. That can't be left to
26301 reload because we want to mark every instruction in an inline
26302 PLT call sequence with a reloc, enabling the linker to edit
26303 the sequence back to a direct call when that makes sense. */
26304 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26307 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
26308 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26309 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
26310 v
= gen_rtvec (2, func_addr
, func_desc
);
26311 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26314 emit_move_insn (func_addr
, func
);
26319 /* Create the call. */
26320 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26321 if (value
!= NULL_RTX
)
26322 call
[0] = gen_rtx_SET (value
, call
[0]);
26324 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
26325 call
[2] = simple_return_rtx
;
26327 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
26328 insn
= emit_call_insn (insn
);
26330 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
26335 /* Expand code to perform a call under the Darwin ABI.
26336 Modulo handling of mlongcall, this is much the same as sysv.
26337 if/when the longcall optimisation is removed, we could drop this
26338 code and use the sysv case (taking care to avoid the tls stuff).
26340 We can use this for sibcalls too, if needed. */
26343 rs6000_call_darwin_1 (rtx value
, rtx func_desc
, rtx tlsarg
,
26344 rtx cookie
, bool sibcall
)
26346 rtx func
= func_desc
;
26350 int cookie_val
= INTVAL (cookie
);
26351 bool make_island
= false;
26353 /* Handle longcall attributes, there are two cases for Darwin:
26354 1) Newer linkers are capable of synthesising any branch islands needed.
26355 2) We need a helper branch island synthesised by the compiler.
26356 The second case has mostly been retired and we don't use it for m64.
26357 In fact, it's is an optimisation, we could just indirect as sysv does..
26358 ... however, backwards compatibility for now.
26359 If we're going to use this, then we need to keep the CALL_LONG bit set,
26360 so that we can pick up the special insn form later. */
26361 if ((cookie_val
& CALL_LONG
) != 0
26362 && GET_CODE (func_desc
) == SYMBOL_REF
)
26364 /* FIXME: the longcall opt should not hang off this flag, it is most
26365 likely incorrect for kernel-mode code-generation. */
26366 if (darwin_symbol_stubs
&& TARGET_32BIT
)
26367 make_island
= true; /* Do nothing yet, retain the CALL_LONG flag. */
26370 /* The linker is capable of doing this, but the user explicitly
26371 asked for -mlongcall, so we'll do the 'normal' version. */
26372 func
= rs6000_longcall_ref (func_desc
, NULL_RTX
);
26373 cookie_val
&= ~CALL_LONG
; /* Handled, zap it. */
26377 /* Handle indirect calls. */
26378 if (GET_CODE (func
) != SYMBOL_REF
)
26380 func
= force_reg (Pmode
, func
);
26382 /* Indirect calls via CTR are strongly preferred over indirect
26383 calls via LR, and are required for indirect sibcalls, so move
26384 the address there. */
26385 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26386 emit_move_insn (func_addr
, func
);
26391 /* Create the call. */
26392 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26393 if (value
!= NULL_RTX
)
26394 call
[0] = gen_rtx_SET (value
, call
[0]);
26396 call
[1] = gen_rtx_USE (VOIDmode
, GEN_INT (cookie_val
));
26399 call
[2] = simple_return_rtx
;
26401 call
[2] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
26403 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
26404 insn
= emit_call_insn (insn
);
26405 /* Now we have the debug info in the insn, we can set up the branch island
26406 if we're using one. */
26409 tree funname
= get_identifier (XSTR (func_desc
, 0));
26411 if (no_previous_def (funname
))
26413 rtx label_rtx
= gen_label_rtx ();
26414 char *label_buf
, temp_buf
[256];
26415 ASM_GENERATE_INTERNAL_LABEL (temp_buf
, "L",
26416 CODE_LABEL_NUMBER (label_rtx
));
26417 label_buf
= temp_buf
[0] == '*' ? temp_buf
+ 1 : temp_buf
;
26418 tree labelname
= get_identifier (label_buf
);
26419 add_compiler_branch_island (labelname
, funname
,
26420 insn_line ((const rtx_insn
*)insn
));
26427 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
26428 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
26431 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, false);
26439 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
26440 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
26443 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, true);
26449 /* Return whether we should generate PC-relative code for FNDECL. */
26451 rs6000_fndecl_pcrel_p (const_tree fndecl
)
26453 if (DEFAULT_ABI
!= ABI_ELFv2
)
26456 struct cl_target_option
*opts
= target_opts_for_fn (fndecl
);
26458 return ((opts
->x_rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26459 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26462 /* Return whether we should generate PC-relative code for *FN. */
26464 rs6000_function_pcrel_p (struct function
*fn
)
26466 if (DEFAULT_ABI
!= ABI_ELFv2
)
26469 /* Optimize usual case. */
26471 return ((rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26472 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26474 return rs6000_fndecl_pcrel_p (fn
->decl
);
26477 /* Return whether we should generate PC-relative code for the current
26482 return (DEFAULT_ABI
== ABI_ELFv2
26483 && (rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26484 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26488 /* Given an address (ADDR), a mode (MODE), and what the format of the
26489 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26490 for the address. */
26493 address_to_insn_form (rtx addr
,
26495 enum non_prefixed_form non_prefixed_format
)
26497 /* Single register is easy. */
26498 if (REG_P (addr
) || SUBREG_P (addr
))
26499 return INSN_FORM_BASE_REG
;
26501 /* If the non prefixed instruction format doesn't support offset addressing,
26502 make sure only indexed addressing is allowed.
26504 We special case SDmode so that the register allocator does not try to move
26505 SDmode through GPR registers, but instead uses the 32-bit integer load and
26506 store instructions for the floating point registers. */
26507 if (non_prefixed_format
== NON_PREFIXED_X
|| (mode
== SDmode
&& TARGET_DFP
))
26509 if (GET_CODE (addr
) != PLUS
)
26510 return INSN_FORM_BAD
;
26512 rtx op0
= XEXP (addr
, 0);
26513 rtx op1
= XEXP (addr
, 1);
26514 if (!REG_P (op0
) && !SUBREG_P (op0
))
26515 return INSN_FORM_BAD
;
26517 if (!REG_P (op1
) && !SUBREG_P (op1
))
26518 return INSN_FORM_BAD
;
26520 return INSN_FORM_X
;
26523 /* Deal with update forms. */
26524 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
26525 return INSN_FORM_UPDATE
;
26527 /* Handle PC-relative symbols and labels. Check for both local and
26528 external symbols. Assume labels are always local. TLS symbols
26529 are not PC-relative for rs6000. */
26532 if (LABEL_REF_P (addr
))
26533 return INSN_FORM_PCREL_LOCAL
;
26535 if (SYMBOL_REF_P (addr
) && !SYMBOL_REF_TLS_MODEL (addr
))
26537 if (!SYMBOL_REF_LOCAL_P (addr
))
26538 return INSN_FORM_PCREL_EXTERNAL
;
26540 return INSN_FORM_PCREL_LOCAL
;
26544 if (GET_CODE (addr
) == CONST
)
26545 addr
= XEXP (addr
, 0);
26547 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26548 if (GET_CODE (addr
) == LO_SUM
)
26549 return INSN_FORM_LO_SUM
;
26551 /* Everything below must be an offset address of some form. */
26552 if (GET_CODE (addr
) != PLUS
)
26553 return INSN_FORM_BAD
;
26555 rtx op0
= XEXP (addr
, 0);
26556 rtx op1
= XEXP (addr
, 1);
26558 /* Check for indexed addresses. */
26559 if (REG_P (op1
) || SUBREG_P (op1
))
26561 if (REG_P (op0
) || SUBREG_P (op0
))
26562 return INSN_FORM_X
;
26564 return INSN_FORM_BAD
;
26567 if (!CONST_INT_P (op1
))
26568 return INSN_FORM_BAD
;
26570 HOST_WIDE_INT offset
= INTVAL (op1
);
26571 if (!SIGNED_INTEGER_34BIT_P (offset
))
26572 return INSN_FORM_BAD
;
26574 /* Check for local and external PC-relative addresses. Labels are always
26575 local. TLS symbols are not PC-relative for rs6000. */
26578 if (LABEL_REF_P (op0
))
26579 return INSN_FORM_PCREL_LOCAL
;
26581 if (SYMBOL_REF_P (op0
) && !SYMBOL_REF_TLS_MODEL (op0
))
26583 if (!SYMBOL_REF_LOCAL_P (op0
))
26584 return INSN_FORM_PCREL_EXTERNAL
;
26586 return INSN_FORM_PCREL_LOCAL
;
26590 /* If it isn't PC-relative, the address must use a base register. */
26591 if (!REG_P (op0
) && !SUBREG_P (op0
))
26592 return INSN_FORM_BAD
;
26594 /* Large offsets must be prefixed. */
26595 if (!SIGNED_INTEGER_16BIT_P (offset
))
26597 if (TARGET_PREFIXED
)
26598 return INSN_FORM_PREFIXED_NUMERIC
;
26600 return INSN_FORM_BAD
;
26603 /* We have a 16-bit offset, see what default instruction format to use. */
26604 if (non_prefixed_format
== NON_PREFIXED_DEFAULT
)
26606 unsigned size
= GET_MODE_SIZE (mode
);
26608 /* On 64-bit systems, assume 64-bit integers need to use DS form
26609 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26610 (for LXV and STXV). TImode is problematical in that its normal usage
26611 is expected to be GPRs where it wants a DS instruction format, but if
26612 it goes into the vector registers, it wants a DQ instruction
26614 if (TARGET_POWERPC64
&& size
>= 8 && GET_MODE_CLASS (mode
) == MODE_INT
)
26615 non_prefixed_format
= NON_PREFIXED_DS
;
26617 else if (TARGET_VSX
&& size
>= 16
26618 && (VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
)))
26619 non_prefixed_format
= NON_PREFIXED_DQ
;
26622 non_prefixed_format
= NON_PREFIXED_D
;
26625 /* Classify the D/DS/DQ-form addresses. */
26626 switch (non_prefixed_format
)
26628 /* Instruction format D, all 16 bits are valid. */
26629 case NON_PREFIXED_D
:
26630 return INSN_FORM_D
;
26632 /* Instruction format DS, bottom 2 bits must be 0. */
26633 case NON_PREFIXED_DS
:
26634 if ((offset
& 3) == 0)
26635 return INSN_FORM_DS
;
26637 else if (TARGET_PREFIXED
)
26638 return INSN_FORM_PREFIXED_NUMERIC
;
26641 return INSN_FORM_BAD
;
26643 /* Instruction format DQ, bottom 4 bits must be 0. */
26644 case NON_PREFIXED_DQ
:
26645 if ((offset
& 15) == 0)
26646 return INSN_FORM_DQ
;
26648 else if (TARGET_PREFIXED
)
26649 return INSN_FORM_PREFIXED_NUMERIC
;
26652 return INSN_FORM_BAD
;
26658 return INSN_FORM_BAD
;
26661 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26662 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26663 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26664 a D-form or DS-form instruction. X-form and base_reg are always
26667 address_is_non_pfx_d_or_x (rtx addr
, machine_mode mode
,
26668 enum non_prefixed_form non_prefixed_format
)
26670 enum insn_form result_form
;
26672 result_form
= address_to_insn_form (addr
, mode
, non_prefixed_format
);
26674 switch (non_prefixed_format
)
26676 case NON_PREFIXED_D
:
26677 switch (result_form
)
26682 case INSN_FORM_BASE_REG
:
26688 case NON_PREFIXED_DS
:
26689 switch (result_form
)
26693 case INSN_FORM_BASE_REG
:
26705 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26706 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26707 the load or store with the PCREL_OPT optimization to make sure it is an
26708 instruction that can be optimized.
26710 We need to specify the MODE separately from the REG to allow for loads that
26711 include zero/sign/float extension. */
26714 pcrel_opt_valid_mem_p (rtx reg
, machine_mode mode
, rtx mem
)
26716 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26717 PCREL_OPT optimization. */
26718 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mode
);
26719 if (non_prefixed
== NON_PREFIXED_X
)
26722 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26723 rtx addr
= XEXP (mem
, 0);
26724 enum insn_form iform
= address_to_insn_form (addr
, mode
, non_prefixed
);
26725 return (iform
== INSN_FORM_BASE_REG
26726 || iform
== INSN_FORM_D
26727 || iform
== INSN_FORM_DS
26728 || iform
== INSN_FORM_DQ
);
26731 /* Helper function to see if we're potentially looking at lfs/stfs.
26732 - PARALLEL containing a SET and a CLOBBER
26734 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26735 - CLOBBER is a V4SF
26737 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26742 is_lfs_stfs_insn (rtx_insn
*insn
)
26744 rtx pattern
= PATTERN (insn
);
26745 if (GET_CODE (pattern
) != PARALLEL
)
26748 /* This should be a parallel with exactly one set and one clobber. */
26749 if (XVECLEN (pattern
, 0) != 2)
26752 rtx set
= XVECEXP (pattern
, 0, 0);
26753 if (GET_CODE (set
) != SET
)
26756 rtx clobber
= XVECEXP (pattern
, 0, 1);
26757 if (GET_CODE (clobber
) != CLOBBER
)
26760 /* All we care is that the destination of the SET is a mem:SI,
26761 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26762 should be a scratch:V4SF. */
26764 rtx dest
= SET_DEST (set
);
26765 rtx src
= SET_SRC (set
);
26766 rtx scratch
= SET_DEST (clobber
);
26768 if (GET_CODE (src
) != UNSPEC
)
26772 if (XINT (src
, 1) == UNSPEC_SI_FROM_SF
26773 && GET_CODE (dest
) == MEM
&& GET_MODE (dest
) == SImode
26774 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == V4SFmode
)
26778 if (XINT (src
, 1) == UNSPEC_SF_FROM_SI
26779 && GET_CODE (dest
) == REG
&& GET_MODE (dest
) == SFmode
26780 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == DImode
)
26786 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26787 instruction format (D/DS/DQ) used for offset memory. */
26789 enum non_prefixed_form
26790 reg_to_non_prefixed (rtx reg
, machine_mode mode
)
26792 /* If it isn't a register, use the defaults. */
26793 if (!REG_P (reg
) && !SUBREG_P (reg
))
26794 return NON_PREFIXED_DEFAULT
;
26796 unsigned int r
= reg_or_subregno (reg
);
26798 /* If we have a pseudo, use the default instruction format. */
26799 if (!HARD_REGISTER_NUM_P (r
))
26800 return NON_PREFIXED_DEFAULT
;
26802 unsigned size
= GET_MODE_SIZE (mode
);
26804 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26805 128-bit floating point, and 128-bit integers. Before power9, only indexed
26806 addressing was available for vectors. */
26807 if (FP_REGNO_P (r
))
26809 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26810 return NON_PREFIXED_D
;
26813 return NON_PREFIXED_X
;
26815 else if (TARGET_VSX
&& size
>= 16
26816 && (VECTOR_MODE_P (mode
)
26817 || VECTOR_ALIGNMENT_P (mode
)
26818 || mode
== TImode
|| mode
== CTImode
))
26819 return (TARGET_P9_VECTOR
) ? NON_PREFIXED_DQ
: NON_PREFIXED_X
;
26822 return NON_PREFIXED_DEFAULT
;
26825 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26826 128-bit floating point, and 128-bit integers. Before power9, only indexed
26827 addressing was available. */
26828 else if (ALTIVEC_REGNO_P (r
))
26830 if (!TARGET_P9_VECTOR
)
26831 return NON_PREFIXED_X
;
26833 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26834 return NON_PREFIXED_DS
;
26837 return NON_PREFIXED_X
;
26839 else if (TARGET_VSX
&& size
>= 16
26840 && (VECTOR_MODE_P (mode
)
26841 || VECTOR_ALIGNMENT_P (mode
)
26842 || mode
== TImode
|| mode
== CTImode
))
26843 return NON_PREFIXED_DQ
;
26846 return NON_PREFIXED_DEFAULT
;
26849 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26850 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26851 through the GPR registers for memory operations. */
26852 else if (TARGET_POWERPC64
&& size
>= 8)
26853 return NON_PREFIXED_DS
;
26855 return NON_PREFIXED_D
;
26859 /* Whether a load instruction is a prefixed instruction. This is called from
26860 the prefixed attribute processing. */
26863 prefixed_load_p (rtx_insn
*insn
)
26865 /* Validate the insn to make sure it is a normal load insn. */
26866 extract_insn_cached (insn
);
26867 if (recog_data
.n_operands
< 2)
26870 rtx reg
= recog_data
.operand
[0];
26871 rtx mem
= recog_data
.operand
[1];
26873 if (!REG_P (reg
) && !SUBREG_P (reg
))
26879 /* Prefixed load instructions do not support update or indexed forms. */
26880 if (get_attr_indexed (insn
) == INDEXED_YES
26881 || get_attr_update (insn
) == UPDATE_YES
)
26884 /* LWA uses the DS format instead of the D format that LWZ uses. */
26885 enum non_prefixed_form non_prefixed
;
26886 machine_mode reg_mode
= GET_MODE (reg
);
26887 machine_mode mem_mode
= GET_MODE (mem
);
26889 if (mem_mode
== SImode
&& reg_mode
== DImode
26890 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
26891 non_prefixed
= NON_PREFIXED_DS
;
26894 non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26896 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26897 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, NON_PREFIXED_DEFAULT
);
26899 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, non_prefixed
);
26902 /* Whether a store instruction is a prefixed instruction. This is called from
26903 the prefixed attribute processing. */
26906 prefixed_store_p (rtx_insn
*insn
)
26908 /* Validate the insn to make sure it is a normal store insn. */
26909 extract_insn_cached (insn
);
26910 if (recog_data
.n_operands
< 2)
26913 rtx mem
= recog_data
.operand
[0];
26914 rtx reg
= recog_data
.operand
[1];
26916 if (!REG_P (reg
) && !SUBREG_P (reg
))
26922 /* Prefixed store instructions do not support update or indexed forms. */
26923 if (get_attr_indexed (insn
) == INDEXED_YES
26924 || get_attr_update (insn
) == UPDATE_YES
)
26927 machine_mode mem_mode
= GET_MODE (mem
);
26928 rtx addr
= XEXP (mem
, 0);
26929 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26931 /* Need to make sure we aren't looking at a stfs which doesn't look
26932 like the other things reg_to_non_prefixed/address_is_prefixed
26934 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26935 return address_is_prefixed (addr
, mem_mode
, NON_PREFIXED_DEFAULT
);
26937 return address_is_prefixed (addr
, mem_mode
, non_prefixed
);
26940 /* Whether a load immediate or add instruction is a prefixed instruction. This
26941 is called from the prefixed attribute processing. */
26944 prefixed_paddi_p (rtx_insn
*insn
)
26946 rtx set
= single_set (insn
);
26950 rtx dest
= SET_DEST (set
);
26951 rtx src
= SET_SRC (set
);
26953 if (!REG_P (dest
) && !SUBREG_P (dest
))
26956 /* Is this a load immediate that can't be done with a simple ADDI or
26958 if (CONST_INT_P (src
))
26959 return (satisfies_constraint_eI (src
)
26960 && !satisfies_constraint_I (src
)
26961 && !satisfies_constraint_L (src
));
26963 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26965 if (GET_CODE (src
) == PLUS
)
26967 rtx op1
= XEXP (src
, 1);
26969 return (CONST_INT_P (op1
)
26970 && satisfies_constraint_eI (op1
)
26971 && !satisfies_constraint_I (op1
)
26972 && !satisfies_constraint_L (op1
));
26975 /* If not, is it a load of a PC-relative address? */
26976 if (!TARGET_PCREL
|| GET_MODE (dest
) != Pmode
)
26979 if (!SYMBOL_REF_P (src
) && !LABEL_REF_P (src
) && GET_CODE (src
) != CONST
)
26982 enum insn_form iform
= address_to_insn_form (src
, Pmode
,
26983 NON_PREFIXED_DEFAULT
);
26985 return (iform
== INSN_FORM_PCREL_EXTERNAL
|| iform
== INSN_FORM_PCREL_LOCAL
);
26988 /* Whether the next instruction needs a 'p' prefix issued before the
26989 instruction is printed out. */
26990 static bool prepend_p_to_next_insn
;
26992 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26993 outputting the assembler code. On the PowerPC, we remember if the current
26994 insn is a prefixed insn where we need to emit a 'p' before the insn.
26996 In addition, if the insn is part of a PC-relative reference to an external
26997 label optimization, this is recorded also. */
26999 rs6000_final_prescan_insn (rtx_insn
*insn
, rtx
[], int)
27001 prepend_p_to_next_insn
= (get_attr_maybe_prefixed (insn
)
27002 == MAYBE_PREFIXED_YES
27003 && get_attr_prefixed (insn
) == PREFIXED_YES
);
27007 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
27008 We use it to emit a 'p' for prefixed insns that is set in
27009 FINAL_PRESCAN_INSN. */
27011 rs6000_asm_output_opcode (FILE *stream
)
27013 if (prepend_p_to_next_insn
)
27015 fprintf (stream
, "p");
27017 /* Reset the flag in the case where there are separate insn lines in the
27018 sequence, so the 'p' is only emitted for the first line. This shows up
27019 when we are doing the PCREL_OPT optimization, in that the label created
27020 with %r<n> would have a leading 'p' printed. */
27021 prepend_p_to_next_insn
= false;
27027 /* Emit the relocation to tie the next instruction to a previous instruction
27028 that loads up an external address. This is used to do the PCREL_OPT
27029 optimization. Note, the label is generated after the PLD of the got
27030 pc-relative address to allow for the assembler to insert NOPs before the PLD
27031 instruction. The operand is a constant integer that is the label
27035 output_pcrel_opt_reloc (rtx label_num
)
27037 rtx operands
[1] = { label_num
};
27038 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
27042 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
27043 should be adjusted to reflect any required changes. This macro is used when
27044 there is some systematic length adjustment required that would be difficult
27045 to express in the length attribute.
27047 In the PowerPC, we use this to adjust the length of an instruction if one or
27048 more prefixed instructions are generated, using the attribute
27049 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
27050 hardware requires that a prefied instruciton does not cross a 64-byte
27051 boundary. This means the compiler has to assume the length of the first
27052 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
27053 already set for the non-prefixed instruction, we just need to udpate for the
27057 rs6000_adjust_insn_length (rtx_insn
*insn
, int length
)
27059 if (TARGET_PREFIXED
&& NONJUMP_INSN_P (insn
))
27061 rtx pattern
= PATTERN (insn
);
27062 if (GET_CODE (pattern
) != USE
&& GET_CODE (pattern
) != CLOBBER
27063 && get_attr_prefixed (insn
) == PREFIXED_YES
)
27065 int num_prefixed
= get_attr_max_prefixed_insns (insn
);
27066 length
+= 4 * (num_prefixed
+ 1);
27074 #ifdef HAVE_GAS_HIDDEN
27075 # define USE_HIDDEN_LINKONCE 1
27077 # define USE_HIDDEN_LINKONCE 0
27080 /* Fills in the label name that should be used for a 476 link stack thunk. */
27083 get_ppc476_thunk_name (char name
[32])
27085 gcc_assert (TARGET_LINK_STACK
);
27087 if (USE_HIDDEN_LINKONCE
)
27088 sprintf (name
, "__ppc476.get_thunk");
27090 ASM_GENERATE_INTERNAL_LABEL (name
, "LPPC476_", 0);
27093 /* This function emits the simple thunk routine that is used to preserve
27094 the link stack on the 476 cpu. */
27096 static void rs6000_code_end (void) ATTRIBUTE_UNUSED
;
27098 rs6000_code_end (void)
27103 if (!TARGET_LINK_STACK
)
27106 get_ppc476_thunk_name (name
);
27108 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
, get_identifier (name
),
27109 build_function_type_list (void_type_node
, NULL_TREE
));
27110 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
27111 NULL_TREE
, void_type_node
);
27112 TREE_PUBLIC (decl
) = 1;
27113 TREE_STATIC (decl
) = 1;
27116 if (USE_HIDDEN_LINKONCE
&& !TARGET_XCOFF
)
27118 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
27119 targetm
.asm_out
.unique_section (decl
, 0);
27120 switch_to_section (get_named_section (decl
, NULL
, 0));
27121 DECL_WEAK (decl
) = 1;
27122 ASM_WEAKEN_DECL (asm_out_file
, decl
, name
, 0);
27123 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
27124 targetm
.asm_out
.assemble_visibility (decl
, VISIBILITY_HIDDEN
);
27125 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
27130 switch_to_section (text_section
);
27131 ASM_OUTPUT_LABEL (asm_out_file
, name
);
27134 DECL_INITIAL (decl
) = make_node (BLOCK
);
27135 current_function_decl
= decl
;
27136 allocate_struct_function (decl
, false);
27137 init_function_start (decl
);
27138 first_function_block_is_cold
= false;
27139 /* Make sure unwind info is emitted for the thunk if needed. */
27140 final_start_function (emit_barrier (), asm_out_file
, 1);
27142 fputs ("\tblr\n", asm_out_file
);
27144 final_end_function ();
27145 init_insn_lengths ();
27146 free_after_compilation (cfun
);
27148 current_function_decl
= NULL
;
27151 /* Add r30 to hard reg set if the prologue sets it up and it is not
27152 pic_offset_table_rtx. */
27155 rs6000_set_up_by_prologue (struct hard_reg_set_container
*set
)
27157 if (!TARGET_SINGLE_PIC_BASE
27159 && TARGET_MINIMAL_TOC
27160 && !constant_pool_empty_p ())
27161 add_to_hard_reg_set (&set
->set
, Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
27162 if (cfun
->machine
->split_stack_argp_used
)
27163 add_to_hard_reg_set (&set
->set
, Pmode
, 12);
27165 /* Make sure the hard reg set doesn't include r2, which was possibly added
27166 via PIC_OFFSET_TABLE_REGNUM. */
27168 remove_from_hard_reg_set (&set
->set
, Pmode
, TOC_REGNUM
);
27172 /* Helper function for rs6000_split_logical to emit a logical instruction after
27173 spliting the operation to single GPR registers.
27175 DEST is the destination register.
27176 OP1 and OP2 are the input source registers.
27177 CODE is the base operation (AND, IOR, XOR, NOT).
27178 MODE is the machine mode.
27179 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27180 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27181 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27184 rs6000_split_logical_inner (rtx dest
,
27187 enum rtx_code code
,
27189 bool complement_final_p
,
27190 bool complement_op1_p
,
27191 bool complement_op2_p
)
27195 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
27196 if (op2
&& CONST_INT_P (op2
)
27197 && (mode
== SImode
|| (mode
== DImode
&& TARGET_POWERPC64
))
27198 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
27200 HOST_WIDE_INT mask
= GET_MODE_MASK (mode
);
27201 HOST_WIDE_INT value
= INTVAL (op2
) & mask
;
27203 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
27208 emit_insn (gen_rtx_SET (dest
, const0_rtx
));
27212 else if (value
== mask
)
27214 if (!rtx_equal_p (dest
, op1
))
27215 emit_insn (gen_rtx_SET (dest
, op1
));
27220 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
27221 into separate ORI/ORIS or XORI/XORIS instrucitons. */
27222 else if (code
== IOR
|| code
== XOR
)
27226 if (!rtx_equal_p (dest
, op1
))
27227 emit_insn (gen_rtx_SET (dest
, op1
));
27233 if (code
== AND
&& mode
== SImode
27234 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
27236 emit_insn (gen_andsi3 (dest
, op1
, op2
));
27240 if (complement_op1_p
)
27241 op1
= gen_rtx_NOT (mode
, op1
);
27243 if (complement_op2_p
)
27244 op2
= gen_rtx_NOT (mode
, op2
);
27246 /* For canonical RTL, if only one arm is inverted it is the first. */
27247 if (!complement_op1_p
&& complement_op2_p
)
27248 std::swap (op1
, op2
);
27250 bool_rtx
= ((code
== NOT
)
27251 ? gen_rtx_NOT (mode
, op1
)
27252 : gen_rtx_fmt_ee (code
, mode
, op1
, op2
));
27254 if (complement_final_p
)
27255 bool_rtx
= gen_rtx_NOT (mode
, bool_rtx
);
27257 emit_insn (gen_rtx_SET (dest
, bool_rtx
));
27260 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
27261 operations are split immediately during RTL generation to allow for more
27262 optimizations of the AND/IOR/XOR.
27264 OPERANDS is an array containing the destination and two input operands.
27265 CODE is the base operation (AND, IOR, XOR, NOT).
27266 MODE is the machine mode.
27267 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27268 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27269 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
27270 CLOBBER_REG is either NULL or a scratch register of type CC to allow
27271 formation of the AND instructions. */
27274 rs6000_split_logical_di (rtx operands
[3],
27275 enum rtx_code code
,
27276 bool complement_final_p
,
27277 bool complement_op1_p
,
27278 bool complement_op2_p
)
27280 const HOST_WIDE_INT lower_32bits
= HOST_WIDE_INT_C(0xffffffff);
27281 const HOST_WIDE_INT upper_32bits
= ~ lower_32bits
;
27282 const HOST_WIDE_INT sign_bit
= HOST_WIDE_INT_C(0x80000000);
27283 enum hi_lo
{ hi
= 0, lo
= 1 };
27284 rtx op0_hi_lo
[2], op1_hi_lo
[2], op2_hi_lo
[2];
27287 op0_hi_lo
[hi
] = gen_highpart (SImode
, operands
[0]);
27288 op1_hi_lo
[hi
] = gen_highpart (SImode
, operands
[1]);
27289 op0_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[0]);
27290 op1_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[1]);
27293 op2_hi_lo
[hi
] = op2_hi_lo
[lo
] = NULL_RTX
;
27296 if (!CONST_INT_P (operands
[2]))
27298 op2_hi_lo
[hi
] = gen_highpart_mode (SImode
, DImode
, operands
[2]);
27299 op2_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[2]);
27303 HOST_WIDE_INT value
= INTVAL (operands
[2]);
27304 HOST_WIDE_INT value_hi_lo
[2];
27306 gcc_assert (!complement_final_p
);
27307 gcc_assert (!complement_op1_p
);
27308 gcc_assert (!complement_op2_p
);
27310 value_hi_lo
[hi
] = value
>> 32;
27311 value_hi_lo
[lo
] = value
& lower_32bits
;
27313 for (i
= 0; i
< 2; i
++)
27315 HOST_WIDE_INT sub_value
= value_hi_lo
[i
];
27317 if (sub_value
& sign_bit
)
27318 sub_value
|= upper_32bits
;
27320 op2_hi_lo
[i
] = GEN_INT (sub_value
);
27322 /* If this is an AND instruction, check to see if we need to load
27323 the value in a register. */
27324 if (code
== AND
&& sub_value
!= -1 && sub_value
!= 0
27325 && !and_operand (op2_hi_lo
[i
], SImode
))
27326 op2_hi_lo
[i
] = force_reg (SImode
, op2_hi_lo
[i
]);
27331 for (i
= 0; i
< 2; i
++)
27333 /* Split large IOR/XOR operations. */
27334 if ((code
== IOR
|| code
== XOR
)
27335 && CONST_INT_P (op2_hi_lo
[i
])
27336 && !complement_final_p
27337 && !complement_op1_p
27338 && !complement_op2_p
27339 && !logical_const_operand (op2_hi_lo
[i
], SImode
))
27341 HOST_WIDE_INT value
= INTVAL (op2_hi_lo
[i
]);
27342 HOST_WIDE_INT hi_16bits
= value
& HOST_WIDE_INT_C(0xffff0000);
27343 HOST_WIDE_INT lo_16bits
= value
& HOST_WIDE_INT_C(0x0000ffff);
27344 rtx tmp
= gen_reg_rtx (SImode
);
27346 /* Make sure the constant is sign extended. */
27347 if ((hi_16bits
& sign_bit
) != 0)
27348 hi_16bits
|= upper_32bits
;
27350 rs6000_split_logical_inner (tmp
, op1_hi_lo
[i
], GEN_INT (hi_16bits
),
27351 code
, SImode
, false, false, false);
27353 rs6000_split_logical_inner (op0_hi_lo
[i
], tmp
, GEN_INT (lo_16bits
),
27354 code
, SImode
, false, false, false);
27357 rs6000_split_logical_inner (op0_hi_lo
[i
], op1_hi_lo
[i
], op2_hi_lo
[i
],
27358 code
, SImode
, complement_final_p
,
27359 complement_op1_p
, complement_op2_p
);
27365 /* Split the insns that make up boolean operations operating on multiple GPR
27366 registers. The boolean MD patterns ensure that the inputs either are
27367 exactly the same as the output registers, or there is no overlap.
27369 OPERANDS is an array containing the destination and two input operands.
27370 CODE is the base operation (AND, IOR, XOR, NOT).
27371 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27372 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27373 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27376 rs6000_split_logical (rtx operands
[3],
27377 enum rtx_code code
,
27378 bool complement_final_p
,
27379 bool complement_op1_p
,
27380 bool complement_op2_p
)
27382 machine_mode mode
= GET_MODE (operands
[0]);
27383 machine_mode sub_mode
;
27385 int sub_size
, regno0
, regno1
, nregs
, i
;
27387 /* If this is DImode, use the specialized version that can run before
27388 register allocation. */
27389 if (mode
== DImode
&& !TARGET_POWERPC64
)
27391 rs6000_split_logical_di (operands
, code
, complement_final_p
,
27392 complement_op1_p
, complement_op2_p
);
27398 op2
= (code
== NOT
) ? NULL_RTX
: operands
[2];
27399 sub_mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
27400 sub_size
= GET_MODE_SIZE (sub_mode
);
27401 regno0
= REGNO (op0
);
27402 regno1
= REGNO (op1
);
27404 gcc_assert (reload_completed
);
27405 gcc_assert (IN_RANGE (regno0
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27406 gcc_assert (IN_RANGE (regno1
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27408 nregs
= rs6000_hard_regno_nregs
[(int)mode
][regno0
];
27409 gcc_assert (nregs
> 1);
27411 if (op2
&& REG_P (op2
))
27412 gcc_assert (IN_RANGE (REGNO (op2
), FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27414 for (i
= 0; i
< nregs
; i
++)
27416 int offset
= i
* sub_size
;
27417 rtx sub_op0
= simplify_subreg (sub_mode
, op0
, mode
, offset
);
27418 rtx sub_op1
= simplify_subreg (sub_mode
, op1
, mode
, offset
);
27419 rtx sub_op2
= ((code
== NOT
)
27421 : simplify_subreg (sub_mode
, op2
, mode
, offset
));
27423 rs6000_split_logical_inner (sub_op0
, sub_op1
, sub_op2
, code
, sub_mode
,
27424 complement_final_p
, complement_op1_p
,
27431 /* Emit instructions to move SRC to DST. Called by splitters for
27432 multi-register moves. It will emit at most one instruction for
27433 each register that is accessed; that is, it won't emit li/lis pairs
27434 (or equivalent for 64-bit code). One of SRC or DST must be a hard
27438 rs6000_split_multireg_move (rtx dst
, rtx src
)
27440 /* The register number of the first register being moved. */
27442 /* The mode that is to be moved. */
27444 /* The mode that the move is being done in, and its size. */
27445 machine_mode reg_mode
;
27447 /* The number of registers that will be moved. */
27450 reg
= REG_P (dst
) ? REGNO (dst
) : REGNO (src
);
27451 mode
= GET_MODE (dst
);
27452 nregs
= hard_regno_nregs (reg
, mode
);
27454 /* If we have a vector quad register for MMA, and this is a load or store,
27455 see if we can use vector paired load/stores. */
27456 if (mode
== XOmode
&& TARGET_MMA
27457 && (MEM_P (dst
) || MEM_P (src
)))
27462 /* If we have a vector pair/quad mode, split it into two/four separate
27464 else if (mode
== OOmode
|| mode
== XOmode
)
27465 reg_mode
= V1TImode
;
27466 else if (FP_REGNO_P (reg
))
27467 reg_mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
:
27468 (TARGET_HARD_FLOAT
? DFmode
: SFmode
);
27469 else if (ALTIVEC_REGNO_P (reg
))
27470 reg_mode
= V16QImode
;
27472 reg_mode
= word_mode
;
27473 reg_mode_size
= GET_MODE_SIZE (reg_mode
);
27475 gcc_assert (reg_mode_size
* nregs
== GET_MODE_SIZE (mode
));
27477 /* TDmode residing in FP registers is special, since the ISA requires that
27478 the lower-numbered word of a register pair is always the most significant
27479 word, even in little-endian mode. This does not match the usual subreg
27480 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27481 the appropriate constituent registers "by hand" in little-endian mode.
27483 Note we do not need to check for destructive overlap here since TDmode
27484 can only reside in even/odd register pairs. */
27485 if (FP_REGNO_P (reg
) && DECIMAL_FLOAT_MODE_P (mode
) && !BYTES_BIG_ENDIAN
)
27490 for (i
= 0; i
< nregs
; i
++)
27492 if (REG_P (src
) && FP_REGNO_P (REGNO (src
)))
27493 p_src
= gen_rtx_REG (reg_mode
, REGNO (src
) + nregs
- 1 - i
);
27495 p_src
= simplify_gen_subreg (reg_mode
, src
, mode
,
27496 i
* reg_mode_size
);
27498 if (REG_P (dst
) && FP_REGNO_P (REGNO (dst
)))
27499 p_dst
= gen_rtx_REG (reg_mode
, REGNO (dst
) + nregs
- 1 - i
);
27501 p_dst
= simplify_gen_subreg (reg_mode
, dst
, mode
,
27502 i
* reg_mode_size
);
27504 emit_insn (gen_rtx_SET (p_dst
, p_src
));
27510 /* The __vector_pair and __vector_quad modes are multi-register
27511 modes, so if we have to load or store the registers, we have to be
27512 careful to properly swap them if we're in little endian mode
27513 below. This means the last register gets the first memory
27514 location. We also need to be careful of using the right register
27515 numbers if we are splitting XO to OO. */
27516 if (mode
== OOmode
|| mode
== XOmode
)
27518 nregs
= hard_regno_nregs (reg
, mode
);
27519 int reg_mode_nregs
= hard_regno_nregs (reg
, reg_mode
);
27522 unsigned offset
= 0;
27523 unsigned size
= GET_MODE_SIZE (reg_mode
);
27525 /* If we are reading an accumulator register, we have to
27526 deprime it before we can access it. */
27528 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27529 emit_insn (gen_mma_xxmfacc (src
, src
));
27531 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
27534 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
27535 rtx dst2
= adjust_address (dst
, reg_mode
, offset
);
27536 rtx src2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
27538 emit_insn (gen_rtx_SET (dst2
, src2
));
27546 unsigned offset
= 0;
27547 unsigned size
= GET_MODE_SIZE (reg_mode
);
27549 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
27552 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
27553 rtx dst2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
27554 rtx src2
= adjust_address (src
, reg_mode
, offset
);
27556 emit_insn (gen_rtx_SET (dst2
, src2
));
27559 /* If we are writing an accumulator register, we have to
27560 prime it after we've written it. */
27562 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27563 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27568 if (GET_CODE (src
) == UNSPEC
27569 || GET_CODE (src
) == UNSPEC_VOLATILE
)
27571 gcc_assert (XINT (src
, 1) == UNSPEC_VSX_ASSEMBLE
27572 || XINT (src
, 1) == UNSPECV_MMA_ASSEMBLE
);
27573 gcc_assert (REG_P (dst
));
27574 if (GET_MODE (src
) == XOmode
)
27575 gcc_assert (FP_REGNO_P (REGNO (dst
)));
27576 if (GET_MODE (src
) == OOmode
)
27577 gcc_assert (VSX_REGNO_P (REGNO (dst
)));
27579 int nvecs
= XVECLEN (src
, 0);
27580 for (int i
= 0; i
< nvecs
; i
++)
27583 int regno
= reg
+ i
;
27585 if (WORDS_BIG_ENDIAN
)
27587 op
= XVECEXP (src
, 0, i
);
27589 /* If we are loading an even VSX register and the memory location
27590 is adjacent to the next register's memory location (if any),
27591 then we can load them both with one LXVP instruction. */
27592 if ((regno
& 1) == 0)
27594 rtx op2
= XVECEXP (src
, 0, i
+ 1);
27595 if (adjacent_mem_locations (op
, op2
) == op
)
27597 op
= adjust_address (op
, OOmode
, 0);
27598 /* Skip the next register, since we're going to
27599 load it together with this register. */
27606 op
= XVECEXP (src
, 0, nvecs
- i
- 1);
27608 /* If we are loading an even VSX register and the memory location
27609 is adjacent to the next register's memory location (if any),
27610 then we can load them both with one LXVP instruction. */
27611 if ((regno
& 1) == 0)
27613 rtx op2
= XVECEXP (src
, 0, nvecs
- i
- 2);
27614 if (adjacent_mem_locations (op2
, op
) == op2
)
27616 op
= adjust_address (op2
, OOmode
, 0);
27617 /* Skip the next register, since we're going to
27618 load it together with this register. */
27624 rtx dst_i
= gen_rtx_REG (GET_MODE (op
), regno
);
27625 emit_insn (gen_rtx_SET (dst_i
, op
));
27628 /* We are writing an accumulator register, so we have to
27629 prime it after we've written it. */
27630 if (GET_MODE (src
) == XOmode
)
27631 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27636 /* Register -> register moves can use common code. */
27639 if (REG_P (src
) && REG_P (dst
) && (REGNO (src
) < REGNO (dst
)))
27641 /* If we are reading an accumulator register, we have to
27642 deprime it before we can access it. */
27644 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27645 emit_insn (gen_mma_xxmfacc (src
, src
));
27647 /* Move register range backwards, if we might have destructive
27650 /* XO/OO are opaque so cannot use subregs. */
27651 if (mode
== OOmode
|| mode
== XOmode
)
27653 for (i
= nregs
- 1; i
>= 0; i
--)
27655 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + i
);
27656 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + i
);
27657 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27662 for (i
= nregs
- 1; i
>= 0; i
--)
27663 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27664 i
* reg_mode_size
),
27665 simplify_gen_subreg (reg_mode
, src
, mode
,
27666 i
* reg_mode_size
)));
27669 /* If we are writing an accumulator register, we have to
27670 prime it after we've written it. */
27672 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27673 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27679 bool used_update
= false;
27680 rtx restore_basereg
= NULL_RTX
;
27682 if (MEM_P (src
) && INT_REGNO_P (reg
))
27686 if (GET_CODE (XEXP (src
, 0)) == PRE_INC
27687 || GET_CODE (XEXP (src
, 0)) == PRE_DEC
)
27690 breg
= XEXP (XEXP (src
, 0), 0);
27691 delta_rtx
= (GET_CODE (XEXP (src
, 0)) == PRE_INC
27692 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src
)))
27693 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src
))));
27694 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27695 src
= replace_equiv_address (src
, breg
);
27697 else if (! rs6000_offsettable_memref_p (src
, reg_mode
, true))
27699 if (GET_CODE (XEXP (src
, 0)) == PRE_MODIFY
)
27701 rtx basereg
= XEXP (XEXP (src
, 0), 0);
27704 rtx ndst
= simplify_gen_subreg (reg_mode
, dst
, mode
, 0);
27705 emit_insn (gen_rtx_SET (ndst
,
27706 gen_rtx_MEM (reg_mode
,
27708 used_update
= true;
27711 emit_insn (gen_rtx_SET (basereg
,
27712 XEXP (XEXP (src
, 0), 1)));
27713 src
= replace_equiv_address (src
, basereg
);
27717 rtx basereg
= gen_rtx_REG (Pmode
, reg
);
27718 emit_insn (gen_rtx_SET (basereg
, XEXP (src
, 0)));
27719 src
= replace_equiv_address (src
, basereg
);
27723 breg
= XEXP (src
, 0);
27724 if (GET_CODE (breg
) == PLUS
|| GET_CODE (breg
) == LO_SUM
)
27725 breg
= XEXP (breg
, 0);
27727 /* If the base register we are using to address memory is
27728 also a destination reg, then change that register last. */
27730 && REGNO (breg
) >= REGNO (dst
)
27731 && REGNO (breg
) < REGNO (dst
) + nregs
)
27732 j
= REGNO (breg
) - REGNO (dst
);
27734 else if (MEM_P (dst
) && INT_REGNO_P (reg
))
27738 if (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27739 || GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
27742 breg
= XEXP (XEXP (dst
, 0), 0);
27743 delta_rtx
= (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27744 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst
)))
27745 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst
))));
27747 /* We have to update the breg before doing the store.
27748 Use store with update, if available. */
27752 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27753 emit_insn (TARGET_32BIT
27754 ? (TARGET_POWERPC64
27755 ? gen_movdi_si_update (breg
, breg
, delta_rtx
, nsrc
)
27756 : gen_movsi_si_update (breg
, breg
, delta_rtx
, nsrc
))
27757 : gen_movdi_di_update (breg
, breg
, delta_rtx
, nsrc
));
27758 used_update
= true;
27761 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27762 dst
= replace_equiv_address (dst
, breg
);
27764 else if (!rs6000_offsettable_memref_p (dst
, reg_mode
, true)
27765 && GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27767 if (GET_CODE (XEXP (dst
, 0)) == PRE_MODIFY
)
27769 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27772 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27773 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode
,
27776 used_update
= true;
27779 emit_insn (gen_rtx_SET (basereg
,
27780 XEXP (XEXP (dst
, 0), 1)));
27781 dst
= replace_equiv_address (dst
, basereg
);
27785 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27786 rtx offsetreg
= XEXP (XEXP (dst
, 0), 1);
27787 gcc_assert (GET_CODE (XEXP (dst
, 0)) == PLUS
27789 && REG_P (offsetreg
)
27790 && REGNO (basereg
) != REGNO (offsetreg
));
27791 if (REGNO (basereg
) == 0)
27793 rtx tmp
= offsetreg
;
27794 offsetreg
= basereg
;
27797 emit_insn (gen_add3_insn (basereg
, basereg
, offsetreg
));
27798 restore_basereg
= gen_sub3_insn (basereg
, basereg
, offsetreg
);
27799 dst
= replace_equiv_address (dst
, basereg
);
27802 else if (GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27803 gcc_assert (rs6000_offsettable_memref_p (dst
, reg_mode
, true));
27806 /* If we are reading an accumulator register, we have to
27807 deprime it before we can access it. */
27808 if (TARGET_MMA
&& REG_P (src
)
27809 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27810 emit_insn (gen_mma_xxmfacc (src
, src
));
27812 for (i
= 0; i
< nregs
; i
++)
27814 /* Calculate index to next subword. */
27819 /* If compiler already emitted move of first word by
27820 store with update, no need to do anything. */
27821 if (j
== 0 && used_update
)
27824 /* XO/OO are opaque so cannot use subregs. */
27825 if (mode
== OOmode
|| mode
== XOmode
)
27827 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + j
);
27828 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + j
);
27829 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27832 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27833 j
* reg_mode_size
),
27834 simplify_gen_subreg (reg_mode
, src
, mode
,
27835 j
* reg_mode_size
)));
27838 /* If we are writing an accumulator register, we have to
27839 prime it after we've written it. */
27840 if (TARGET_MMA
&& REG_P (dst
)
27841 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27842 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27844 if (restore_basereg
!= NULL_RTX
)
27845 emit_insn (restore_basereg
);
27849 /* Return true if the peephole2 can combine a load involving a combination of
27850 an addis instruction and a load with an offset that can be fused together on
27854 fusion_gpr_load_p (rtx addis_reg
, /* register set via addis. */
27855 rtx addis_value
, /* addis value. */
27856 rtx target
, /* target register that is loaded. */
27857 rtx mem
) /* bottom part of the memory addr. */
27862 /* Validate arguments. */
27863 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
27866 if (!base_reg_operand (target
, GET_MODE (target
)))
27869 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
27872 /* Allow sign/zero extension. */
27873 if (GET_CODE (mem
) == ZERO_EXTEND
27874 || (GET_CODE (mem
) == SIGN_EXTEND
&& TARGET_P8_FUSION_SIGN
))
27875 mem
= XEXP (mem
, 0);
27880 if (!fusion_gpr_mem_load (mem
, GET_MODE (mem
)))
27883 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
27884 if (GET_CODE (addr
) != PLUS
&& GET_CODE (addr
) != LO_SUM
)
27887 /* Validate that the register used to load the high value is either the
27888 register being loaded, or we can safely replace its use.
27890 This function is only called from the peephole2 pass and we assume that
27891 there are 2 instructions in the peephole (addis and load), so we want to
27892 check if the target register was not used in the memory address and the
27893 register to hold the addis result is dead after the peephole. */
27894 if (REGNO (addis_reg
) != REGNO (target
))
27896 if (reg_mentioned_p (target
, mem
))
27899 if (!peep2_reg_dead_p (2, addis_reg
))
27902 /* If the target register being loaded is the stack pointer, we must
27903 avoid loading any other value into it, even temporarily. */
27904 if (REG_P (target
) && REGNO (target
) == STACK_POINTER_REGNUM
)
27908 base_reg
= XEXP (addr
, 0);
27909 return REGNO (addis_reg
) == REGNO (base_reg
);
27912 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27913 sequence. We adjust the addis register to use the target register. If the
27914 load sign extends, we adjust the code to do the zero extending load, and an
27915 explicit sign extension later since the fusion only covers zero extending
27919 operands[0] register set with addis (to be replaced with target)
27920 operands[1] value set via addis
27921 operands[2] target register being loaded
27922 operands[3] D-form memory reference using operands[0]. */
27925 expand_fusion_gpr_load (rtx
*operands
)
27927 rtx addis_value
= operands
[1];
27928 rtx target
= operands
[2];
27929 rtx orig_mem
= operands
[3];
27930 rtx new_addr
, new_mem
, orig_addr
, offset
;
27931 enum rtx_code plus_or_lo_sum
;
27932 machine_mode target_mode
= GET_MODE (target
);
27933 machine_mode extend_mode
= target_mode
;
27934 machine_mode ptr_mode
= Pmode
;
27935 enum rtx_code extend
= UNKNOWN
;
27937 if (GET_CODE (orig_mem
) == ZERO_EXTEND
27938 || (TARGET_P8_FUSION_SIGN
&& GET_CODE (orig_mem
) == SIGN_EXTEND
))
27940 extend
= GET_CODE (orig_mem
);
27941 orig_mem
= XEXP (orig_mem
, 0);
27942 target_mode
= GET_MODE (orig_mem
);
27945 gcc_assert (MEM_P (orig_mem
));
27947 orig_addr
= XEXP (orig_mem
, 0);
27948 plus_or_lo_sum
= GET_CODE (orig_addr
);
27949 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
27951 offset
= XEXP (orig_addr
, 1);
27952 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
27953 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
27955 if (extend
!= UNKNOWN
)
27956 new_mem
= gen_rtx_fmt_e (ZERO_EXTEND
, extend_mode
, new_mem
);
27958 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
27959 UNSPEC_FUSION_GPR
);
27960 emit_insn (gen_rtx_SET (target
, new_mem
));
27962 if (extend
== SIGN_EXTEND
)
27964 int sub_off
= ((BYTES_BIG_ENDIAN
)
27965 ? GET_MODE_SIZE (extend_mode
) - GET_MODE_SIZE (target_mode
)
27968 = simplify_subreg (target_mode
, target
, extend_mode
, sub_off
);
27970 emit_insn (gen_rtx_SET (target
,
27971 gen_rtx_SIGN_EXTEND (extend_mode
, sign_reg
)));
27977 /* Emit the addis instruction that will be part of a fused instruction
27981 emit_fusion_addis (rtx target
, rtx addis_value
)
27984 const char *addis_str
= NULL
;
27986 /* Emit the addis instruction. */
27987 fuse_ops
[0] = target
;
27988 if (satisfies_constraint_L (addis_value
))
27990 fuse_ops
[1] = addis_value
;
27991 addis_str
= "lis %0,%v1";
27994 else if (GET_CODE (addis_value
) == PLUS
)
27996 rtx op0
= XEXP (addis_value
, 0);
27997 rtx op1
= XEXP (addis_value
, 1);
27999 if (REG_P (op0
) && CONST_INT_P (op1
)
28000 && satisfies_constraint_L (op1
))
28004 addis_str
= "addis %0,%1,%v2";
28008 else if (GET_CODE (addis_value
) == HIGH
)
28010 rtx value
= XEXP (addis_value
, 0);
28011 if (GET_CODE (value
) == UNSPEC
&& XINT (value
, 1) == UNSPEC_TOCREL
)
28013 fuse_ops
[1] = XVECEXP (value
, 0, 0); /* symbol ref. */
28014 fuse_ops
[2] = XVECEXP (value
, 0, 1); /* TOC register. */
28016 addis_str
= "addis %0,%2,%1@toc@ha";
28018 else if (TARGET_XCOFF
)
28019 addis_str
= "addis %0,%1@u(%2)";
28022 gcc_unreachable ();
28025 else if (GET_CODE (value
) == PLUS
)
28027 rtx op0
= XEXP (value
, 0);
28028 rtx op1
= XEXP (value
, 1);
28030 if (GET_CODE (op0
) == UNSPEC
28031 && XINT (op0
, 1) == UNSPEC_TOCREL
28032 && CONST_INT_P (op1
))
28034 fuse_ops
[1] = XVECEXP (op0
, 0, 0); /* symbol ref. */
28035 fuse_ops
[2] = XVECEXP (op0
, 0, 1); /* TOC register. */
28038 addis_str
= "addis %0,%2,%1+%3@toc@ha";
28040 else if (TARGET_XCOFF
)
28041 addis_str
= "addis %0,%1+%3@u(%2)";
28044 gcc_unreachable ();
28048 else if (satisfies_constraint_L (value
))
28050 fuse_ops
[1] = value
;
28051 addis_str
= "lis %0,%v1";
28054 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (value
))
28056 fuse_ops
[1] = value
;
28057 addis_str
= "lis %0,%1@ha";
28062 fatal_insn ("Could not generate addis value for fusion", addis_value
);
28064 output_asm_insn (addis_str
, fuse_ops
);
28067 /* Emit a D-form load or store instruction that is the second instruction
28068 of a fusion sequence. */
28071 emit_fusion_load (rtx load_reg
, rtx addis_reg
, rtx offset
, const char *insn_str
)
28074 char insn_template
[80];
28076 fuse_ops
[0] = load_reg
;
28077 fuse_ops
[1] = addis_reg
;
28079 if (CONST_INT_P (offset
) && satisfies_constraint_I (offset
))
28081 sprintf (insn_template
, "%s %%0,%%2(%%1)", insn_str
);
28082 fuse_ops
[2] = offset
;
28083 output_asm_insn (insn_template
, fuse_ops
);
28086 else if (GET_CODE (offset
) == UNSPEC
28087 && XINT (offset
, 1) == UNSPEC_TOCREL
)
28090 sprintf (insn_template
, "%s %%0,%%2@toc@l(%%1)", insn_str
);
28092 else if (TARGET_XCOFF
)
28093 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
28096 gcc_unreachable ();
28098 fuse_ops
[2] = XVECEXP (offset
, 0, 0);
28099 output_asm_insn (insn_template
, fuse_ops
);
28102 else if (GET_CODE (offset
) == PLUS
28103 && GET_CODE (XEXP (offset
, 0)) == UNSPEC
28104 && XINT (XEXP (offset
, 0), 1) == UNSPEC_TOCREL
28105 && CONST_INT_P (XEXP (offset
, 1)))
28107 rtx tocrel_unspec
= XEXP (offset
, 0);
28109 sprintf (insn_template
, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str
);
28111 else if (TARGET_XCOFF
)
28112 sprintf (insn_template
, "%s %%0,%%2+%%3@l(%%1)", insn_str
);
28115 gcc_unreachable ();
28117 fuse_ops
[2] = XVECEXP (tocrel_unspec
, 0, 0);
28118 fuse_ops
[3] = XEXP (offset
, 1);
28119 output_asm_insn (insn_template
, fuse_ops
);
28122 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (offset
))
28124 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
28126 fuse_ops
[2] = offset
;
28127 output_asm_insn (insn_template
, fuse_ops
);
28131 fatal_insn ("Unable to generate load/store offset for fusion", offset
);
28136 /* Given an address, convert it into the addis and load offset parts. Addresses
28137 created during the peephole2 process look like:
28138 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
28139 (unspec [(...)] UNSPEC_TOCREL)) */
28142 fusion_split_address (rtx addr
, rtx
*p_hi
, rtx
*p_lo
)
28146 if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
28148 hi
= XEXP (addr
, 0);
28149 lo
= XEXP (addr
, 1);
28152 gcc_unreachable ();
28158 /* Return a string to fuse an addis instruction with a gpr load to the same
28159 register that we loaded up the addis instruction. The address that is used
28160 is the logical address that was formed during peephole2:
28161 (lo_sum (high) (low-part))
28163 The code is complicated, so we call output_asm_insn directly, and just
28167 emit_fusion_gpr_load (rtx target
, rtx mem
)
28172 const char *load_str
= NULL
;
28175 if (GET_CODE (mem
) == ZERO_EXTEND
)
28176 mem
= XEXP (mem
, 0);
28178 gcc_assert (REG_P (target
) && MEM_P (mem
));
28180 addr
= XEXP (mem
, 0);
28181 fusion_split_address (addr
, &addis_value
, &load_offset
);
28183 /* Now emit the load instruction to the same register. */
28184 mode
= GET_MODE (mem
);
28202 gcc_assert (TARGET_POWERPC64
);
28207 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target
, mem
));
28210 /* Emit the addis instruction. */
28211 emit_fusion_addis (target
, addis_value
);
28213 /* Emit the D-form load instruction. */
28214 emit_fusion_load (target
, target
, load_offset
, load_str
);
28219 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
28220 ignores it then. */
28221 static GTY(()) tree atomic_hold_decl
;
28222 static GTY(()) tree atomic_clear_decl
;
28223 static GTY(()) tree atomic_update_decl
;
28225 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
28227 rs6000_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
28229 if (!TARGET_HARD_FLOAT
)
28231 #ifdef RS6000_GLIBC_ATOMIC_FENV
28232 if (atomic_hold_decl
== NULL_TREE
)
28235 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
28236 get_identifier ("__atomic_feholdexcept"),
28237 build_function_type_list (void_type_node
,
28238 double_ptr_type_node
,
28240 TREE_PUBLIC (atomic_hold_decl
) = 1;
28241 DECL_EXTERNAL (atomic_hold_decl
) = 1;
28244 if (atomic_clear_decl
== NULL_TREE
)
28247 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
28248 get_identifier ("__atomic_feclearexcept"),
28249 build_function_type_list (void_type_node
,
28251 TREE_PUBLIC (atomic_clear_decl
) = 1;
28252 DECL_EXTERNAL (atomic_clear_decl
) = 1;
28255 tree const_double
= build_qualified_type (double_type_node
,
28257 tree const_double_ptr
= build_pointer_type (const_double
);
28258 if (atomic_update_decl
== NULL_TREE
)
28261 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
28262 get_identifier ("__atomic_feupdateenv"),
28263 build_function_type_list (void_type_node
,
28266 TREE_PUBLIC (atomic_update_decl
) = 1;
28267 DECL_EXTERNAL (atomic_update_decl
) = 1;
28270 tree fenv_var
= create_tmp_var_raw (double_type_node
);
28271 TREE_ADDRESSABLE (fenv_var
) = 1;
28272 tree fenv_addr
= build1 (ADDR_EXPR
, double_ptr_type_node
,
28273 build4 (TARGET_EXPR
, double_type_node
, fenv_var
,
28274 void_node
, NULL_TREE
, NULL_TREE
));
28276 *hold
= build_call_expr (atomic_hold_decl
, 1, fenv_addr
);
28277 *clear
= build_call_expr (atomic_clear_decl
, 0);
28278 *update
= build_call_expr (atomic_update_decl
, 1,
28279 fold_convert (const_double_ptr
, fenv_addr
));
28284 tree mffs
= rs6000_builtin_decls
[RS6000_BIF_MFFS
];
28285 tree mtfsf
= rs6000_builtin_decls
[RS6000_BIF_MTFSF
];
28286 tree call_mffs
= build_call_expr (mffs
, 0);
28288 /* Generates the equivalent of feholdexcept (&fenv_var)
28290 *fenv_var = __builtin_mffs ();
28292 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
28293 __builtin_mtfsf (0xff, fenv_hold); */
28295 /* Mask to clear everything except for the rounding modes and non-IEEE
28296 arithmetic flag. */
28297 const unsigned HOST_WIDE_INT hold_exception_mask
28298 = HOST_WIDE_INT_C (0xffffffff00000007);
28300 tree fenv_var
= create_tmp_var_raw (double_type_node
);
28302 tree hold_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_var
, call_mffs
,
28303 NULL_TREE
, NULL_TREE
);
28305 tree fenv_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_var
);
28306 tree fenv_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
28307 build_int_cst (uint64_type_node
,
28308 hold_exception_mask
));
28310 tree fenv_hold_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28313 tree hold_mtfsf
= build_call_expr (mtfsf
, 2,
28314 build_int_cst (unsigned_type_node
, 0xff),
28317 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_mffs
, hold_mtfsf
);
28319 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
28321 double fenv_clear = __builtin_mffs ();
28322 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
28323 __builtin_mtfsf (0xff, fenv_clear); */
28325 /* Mask to clear everything except for the rounding modes and non-IEEE
28326 arithmetic flag. */
28327 const unsigned HOST_WIDE_INT clear_exception_mask
28328 = HOST_WIDE_INT_C (0xffffffff00000000);
28330 tree fenv_clear
= create_tmp_var_raw (double_type_node
);
28332 tree clear_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_clear
,
28333 call_mffs
, NULL_TREE
, NULL_TREE
);
28335 tree fenv_clean_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_clear
);
28336 tree fenv_clear_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
,
28338 build_int_cst (uint64_type_node
,
28339 clear_exception_mask
));
28341 tree fenv_clear_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28342 fenv_clear_llu_and
);
28344 tree clear_mtfsf
= build_call_expr (mtfsf
, 2,
28345 build_int_cst (unsigned_type_node
, 0xff),
28348 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, clear_mffs
, clear_mtfsf
);
28350 /* Generates the equivalent of feupdateenv (&fenv_var)
28352 double old_fenv = __builtin_mffs ();
28353 double fenv_update;
28354 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
28355 (*(uint64_t*)fenv_var 0x1ff80fff);
28356 __builtin_mtfsf (0xff, fenv_update); */
28358 const unsigned HOST_WIDE_INT update_exception_mask
28359 = HOST_WIDE_INT_C (0xffffffff1fffff00);
28360 const unsigned HOST_WIDE_INT new_exception_mask
28361 = HOST_WIDE_INT_C (0x1ff80fff);
28363 tree old_fenv
= create_tmp_var_raw (double_type_node
);
28364 tree update_mffs
= build4 (TARGET_EXPR
, double_type_node
, old_fenv
,
28365 call_mffs
, NULL_TREE
, NULL_TREE
);
28367 tree old_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, old_fenv
);
28368 tree old_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, old_llu
,
28369 build_int_cst (uint64_type_node
,
28370 update_exception_mask
));
28372 tree new_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
28373 build_int_cst (uint64_type_node
,
28374 new_exception_mask
));
28376 tree new_llu_mask
= build2 (BIT_IOR_EXPR
, uint64_type_node
,
28377 old_llu_and
, new_llu_and
);
28379 tree fenv_update_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28382 tree update_mtfsf
= build_call_expr (mtfsf
, 2,
28383 build_int_cst (unsigned_type_node
, 0xff),
28384 fenv_update_mtfsf
);
28386 *update
= build2 (COMPOUND_EXPR
, void_type_node
, update_mffs
, update_mtfsf
);
28390 rs6000_generate_float2_double_code (rtx dst
, rtx src1
, rtx src2
)
28392 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28394 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
28395 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
28397 /* The destination of the vmrgew instruction layout is:
28398 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28399 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28400 vmrgew instruction will be correct. */
28401 if (BYTES_BIG_ENDIAN
)
28403 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0
, src1
, src2
,
28405 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1
, src1
, src2
,
28410 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
28411 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
28414 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
28415 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
28417 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2
, rtx_tmp0
));
28418 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3
, rtx_tmp1
));
28420 if (BYTES_BIG_ENDIAN
)
28421 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
28423 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
28427 rs6000_generate_float2_code (bool signed_convert
, rtx dst
, rtx src1
, rtx src2
)
28429 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28431 rtx_tmp0
= gen_reg_rtx (V2DImode
);
28432 rtx_tmp1
= gen_reg_rtx (V2DImode
);
28434 /* The destination of the vmrgew instruction layout is:
28435 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28436 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28437 vmrgew instruction will be correct. */
28438 if (BYTES_BIG_ENDIAN
)
28440 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
28441 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
28445 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
28446 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
28449 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
28450 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
28452 if (signed_convert
)
28454 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2
, rtx_tmp0
));
28455 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3
, rtx_tmp1
));
28459 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2
, rtx_tmp0
));
28460 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3
, rtx_tmp1
));
28463 if (BYTES_BIG_ENDIAN
)
28464 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
28466 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
28470 rs6000_generate_vsigned2_code (bool signed_convert
, rtx dst
, rtx src1
,
28473 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28475 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
28476 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
28478 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
28479 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
28481 rtx_tmp2
= gen_reg_rtx (V4SImode
);
28482 rtx_tmp3
= gen_reg_rtx (V4SImode
);
28484 if (signed_convert
)
28486 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2
, rtx_tmp0
));
28487 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3
, rtx_tmp1
));
28491 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2
, rtx_tmp0
));
28492 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3
, rtx_tmp1
));
28495 emit_insn (gen_p8_vmrgew_v4si (dst
, rtx_tmp2
, rtx_tmp3
));
28498 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28501 rs6000_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
28502 optimization_type opt_type
)
28507 return (opt_type
== OPTIMIZE_FOR_SPEED
28508 && RS6000_RECIP_AUTO_RSQRTE_P (mode1
));
28515 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28517 static HOST_WIDE_INT
28518 rs6000_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
28520 if (TREE_CODE (exp
) == STRING_CST
28521 && (STRICT_ALIGNMENT
|| !optimize_size
))
28522 return MAX (align
, BITS_PER_WORD
);
28526 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28528 static HOST_WIDE_INT
28529 rs6000_starting_frame_offset (void)
28531 if (FRAME_GROWS_DOWNWARD
)
28533 return RS6000_STARTING_FRAME_OFFSET
;
28536 /* Internal function to return the built-in function id for the complex
28537 multiply operation for a given mode. */
28539 static inline built_in_function
28540 complex_multiply_builtin_code (machine_mode mode
)
28542 gcc_assert (IN_RANGE (mode
, MIN_MODE_COMPLEX_FLOAT
, MAX_MODE_COMPLEX_FLOAT
));
28543 int func
= BUILT_IN_COMPLEX_MUL_MIN
+ mode
- MIN_MODE_COMPLEX_FLOAT
;
28544 return (built_in_function
) func
;
28547 /* Internal function to return the built-in function id for the complex divide
28548 operation for a given mode. */
28550 static inline built_in_function
28551 complex_divide_builtin_code (machine_mode mode
)
28553 gcc_assert (IN_RANGE (mode
, MIN_MODE_COMPLEX_FLOAT
, MAX_MODE_COMPLEX_FLOAT
));
28554 int func
= BUILT_IN_COMPLEX_DIV_MIN
+ mode
- MIN_MODE_COMPLEX_FLOAT
;
28555 return (built_in_function
) func
;
28558 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28559 function names from <foo>l to <foo>f128 if the default long double type is
28560 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28561 include file switches the names on systems that support long double as IEEE
28562 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28563 In the future, glibc will export names like __ieee128_sinf128 and we can
28564 switch to using those instead of using sinf128, which pollutes the user's
28567 This will switch the names for Fortran math functions as well (which doesn't
28568 use math.h). However, Fortran needs other changes to the compiler and
28569 library before you can switch the real*16 type at compile time.
28571 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28572 only do this transformation if the __float128 type is enabled. This
28573 prevents us from doing the transformation on older 32-bit ports that might
28574 have enabled using IEEE 128-bit floating point as the default long double
28577 We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the
28578 function names used for complex multiply and divide to the appropriate
28582 rs6000_mangle_decl_assembler_name (tree decl
, tree id
)
28584 /* Handle complex multiply/divide. For IEEE 128-bit, use __mulkc3 or
28585 __divkc3 and for IBM 128-bit use __multc3 and __divtc3. */
28586 if (TARGET_FLOAT128_TYPE
28587 && TREE_CODE (decl
) == FUNCTION_DECL
28588 && DECL_IS_UNDECLARED_BUILTIN (decl
)
28589 && DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
28591 built_in_function id
= DECL_FUNCTION_CODE (decl
);
28592 const char *newname
= NULL
;
28594 if (id
== complex_multiply_builtin_code (KCmode
))
28595 newname
= "__mulkc3";
28597 else if (id
== complex_multiply_builtin_code (ICmode
))
28598 newname
= "__multc3";
28600 else if (id
== complex_multiply_builtin_code (TCmode
))
28601 newname
= (TARGET_IEEEQUAD
) ? "__mulkc3" : "__multc3";
28603 else if (id
== complex_divide_builtin_code (KCmode
))
28604 newname
= "__divkc3";
28606 else if (id
== complex_divide_builtin_code (ICmode
))
28607 newname
= "__divtc3";
28609 else if (id
== complex_divide_builtin_code (TCmode
))
28610 newname
= (TARGET_IEEEQUAD
) ? "__divkc3" : "__divtc3";
28614 if (TARGET_DEBUG_BUILTIN
)
28615 fprintf (stderr
, "Map complex mul/div => %s\n", newname
);
28617 return get_identifier (newname
);
28621 /* Map long double built-in functions if long double is IEEE 128-bit. */
28622 if (TARGET_FLOAT128_TYPE
&& TARGET_IEEEQUAD
&& TARGET_LONG_DOUBLE_128
28623 && TREE_CODE (decl
) == FUNCTION_DECL
28624 && DECL_IS_UNDECLARED_BUILTIN (decl
)
28625 && DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
28627 size_t len
= IDENTIFIER_LENGTH (id
);
28628 const char *name
= IDENTIFIER_POINTER (id
);
28629 char *newname
= NULL
;
28631 /* See if it is one of the built-in functions with an unusual name. */
28632 switch (DECL_FUNCTION_CODE (decl
))
28634 case BUILT_IN_DREML
:
28635 newname
= xstrdup ("__remainderieee128");
28638 case BUILT_IN_GAMMAL
:
28639 newname
= xstrdup ("__lgammaieee128");
28642 case BUILT_IN_GAMMAL_R
:
28643 case BUILT_IN_LGAMMAL_R
:
28644 newname
= xstrdup ("__lgammaieee128_r");
28647 case BUILT_IN_NEXTTOWARD
:
28648 newname
= xstrdup ("__nexttoward_to_ieee128");
28651 case BUILT_IN_NEXTTOWARDF
:
28652 newname
= xstrdup ("__nexttowardf_to_ieee128");
28655 case BUILT_IN_NEXTTOWARDL
:
28656 newname
= xstrdup ("__nexttowardieee128");
28659 case BUILT_IN_POW10L
:
28660 newname
= xstrdup ("__exp10ieee128");
28663 case BUILT_IN_SCALBL
:
28664 newname
= xstrdup ("__scalbieee128");
28667 case BUILT_IN_SIGNIFICANDL
:
28668 newname
= xstrdup ("__significandieee128");
28671 case BUILT_IN_SINCOSL
:
28672 newname
= xstrdup ("__sincosieee128");
28679 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28682 size_t printf_len
= strlen ("printf");
28683 size_t scanf_len
= strlen ("scanf");
28684 size_t printf_chk_len
= strlen ("printf_chk");
28686 if (len
>= printf_len
28687 && strcmp (name
+ len
- printf_len
, "printf") == 0)
28688 newname
= xasprintf ("__%sieee128", name
);
28690 else if (len
>= scanf_len
28691 && strcmp (name
+ len
- scanf_len
, "scanf") == 0)
28692 newname
= xasprintf ("__isoc99_%sieee128", name
);
28694 else if (len
>= printf_chk_len
28695 && strcmp (name
+ len
- printf_chk_len
, "printf_chk") == 0)
28696 newname
= xasprintf ("%sieee128", name
);
28698 else if (name
[len
- 1] == 'l')
28700 bool uses_ieee128_p
= false;
28701 tree type
= TREE_TYPE (decl
);
28702 machine_mode ret_mode
= TYPE_MODE (type
);
28704 /* See if the function returns a IEEE 128-bit floating point type or
28706 if (ret_mode
== TFmode
|| ret_mode
== TCmode
)
28707 uses_ieee128_p
= true;
28710 function_args_iterator args_iter
;
28713 /* See if the function passes a IEEE 128-bit floating point type
28714 or complex type. */
28715 FOREACH_FUNCTION_ARGS (type
, arg
, args_iter
)
28717 machine_mode arg_mode
= TYPE_MODE (arg
);
28718 if (arg_mode
== TFmode
|| arg_mode
== TCmode
)
28720 uses_ieee128_p
= true;
28726 /* If we passed or returned an IEEE 128-bit floating point type,
28727 change the name. Use __<name>ieee128, instead of <name>l. */
28728 if (uses_ieee128_p
)
28729 newname
= xasprintf ("__%.*sieee128", (int)(len
- 1), name
);
28735 if (TARGET_DEBUG_BUILTIN
)
28736 fprintf (stderr
, "Map %s => %s\n", name
, newname
);
28738 id
= get_identifier (newname
);
28746 /* Predict whether the given loop in gimple will be transformed in the RTL
28747 doloop_optimize pass. */
28750 rs6000_predict_doloop_p (struct loop
*loop
)
28754 /* On rs6000, targetm.can_use_doloop_p is actually
28755 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28756 if (loop
->inner
!= NULL
)
28758 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
28759 fprintf (dump_file
, "Predict doloop failure due to"
28760 " loop nesting.\n");
28767 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28769 static machine_mode
28770 rs6000_preferred_doloop_mode (machine_mode
)
28775 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28778 rs6000_cannot_substitute_mem_equiv_p (rtx mem
)
28780 gcc_assert (MEM_P (mem
));
28782 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28783 type addresses, so don't allow MEMs with those address types to be
28784 substituted as an equivalent expression. See PR93974 for details. */
28785 if (GET_CODE (XEXP (mem
, 0)) == AND
)
28791 /* Implement TARGET_INVALID_CONVERSION. */
28793 static const char *
28794 rs6000_invalid_conversion (const_tree fromtype
, const_tree totype
)
28796 /* Make sure we're working with the canonical types. */
28797 if (TYPE_CANONICAL (fromtype
) != NULL_TREE
)
28798 fromtype
= TYPE_CANONICAL (fromtype
);
28799 if (TYPE_CANONICAL (totype
) != NULL_TREE
)
28800 totype
= TYPE_CANONICAL (totype
);
28802 machine_mode frommode
= TYPE_MODE (fromtype
);
28803 machine_mode tomode
= TYPE_MODE (totype
);
28805 if (frommode
!= tomode
)
28807 /* Do not allow conversions to/from XOmode and OOmode types. */
28808 if (frommode
== XOmode
)
28809 return N_("invalid conversion from type %<__vector_quad%>");
28810 if (tomode
== XOmode
)
28811 return N_("invalid conversion to type %<__vector_quad%>");
28812 if (frommode
== OOmode
)
28813 return N_("invalid conversion from type %<__vector_pair%>");
28814 if (tomode
== OOmode
)
28815 return N_("invalid conversion to type %<__vector_pair%>");
28818 /* Conversion allowed. */
28822 /* Convert a SFmode constant to the integer bit pattern. */
28825 rs6000_const_f32_to_i32 (rtx operand
)
28828 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (operand
);
28830 gcc_assert (GET_MODE (operand
) == SFmode
);
28831 REAL_VALUE_TO_TARGET_SINGLE (*rv
, value
);
28836 rs6000_emit_xxspltidp_v2df (rtx dst
, long value
)
28838 if (((value
& 0x7F800000) == 0) && ((value
& 0x7FFFFF) != 0))
28839 inform (input_location
,
28840 "the result for the xxspltidp instruction "
28841 "is undefined for subnormal input values");
28842 emit_insn( gen_xxspltidp_v2df_inst (dst
, GEN_INT (value
)));
28845 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28848 rs6000_gen_pic_addr_diff_vec (void)
28850 return rs6000_relative_jumptables
;
28854 rs6000_output_addr_vec_elt (FILE *file
, int value
)
28856 const char *directive
= TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t";
28859 fprintf (file
, "%s", directive
);
28860 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", value
);
28861 assemble_name (file
, buf
);
28862 fprintf (file
, "\n");
28866 /* Copy an integer constant to the vector constant structure. */
28869 constant_int_to_128bit_vector (rtx op
,
28872 vec_const_128bit_type
*info
)
28874 unsigned HOST_WIDE_INT uvalue
= UINTVAL (op
);
28875 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28877 for (int shift
= bitsize
- 8; shift
>= 0; shift
-= 8)
28878 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28881 /* Copy a floating point constant to the vector constant structure. */
28884 constant_fp_to_128bit_vector (rtx op
,
28887 vec_const_128bit_type
*info
)
28889 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28890 unsigned num_words
= bitsize
/ 32;
28891 const REAL_VALUE_TYPE
*rtype
= CONST_DOUBLE_REAL_VALUE (op
);
28892 long real_words
[VECTOR_128BIT_WORDS
];
28894 /* Make sure we don't overflow the real_words array and that it is
28895 filled completely. */
28896 gcc_assert (num_words
<= VECTOR_128BIT_WORDS
&& (bitsize
% 32) == 0);
28898 real_to_target (real_words
, rtype
, mode
);
28900 /* Iterate over each 32-bit word in the floating point constant. The
28901 real_to_target function puts out words in target endian fashion. We need
28902 to arrange the order so that the bytes are written in big endian order. */
28903 for (unsigned num
= 0; num
< num_words
; num
++)
28905 unsigned endian_num
= (BYTES_BIG_ENDIAN
28907 : num_words
- 1 - num
);
28909 unsigned uvalue
= real_words
[endian_num
];
28910 for (int shift
= 32 - 8; shift
>= 0; shift
-= 8)
28911 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28914 /* Mark that this constant involves floating point. */
28915 info
->fp_constant_p
= true;
28918 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28921 Break out the constant out to bytes, half words, words, and double words.
28922 Return true if we have successfully converted the constant.
28924 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28925 constants. Integer and floating point scalar constants are splatted to fill
28929 vec_const_128bit_to_bytes (rtx op
,
28931 vec_const_128bit_type
*info
)
28933 /* Initialize the constant structure. */
28934 memset ((void *)info
, 0, sizeof (vec_const_128bit_type
));
28936 /* Assume CONST_INTs are DImode. */
28937 if (mode
== VOIDmode
)
28938 mode
= CONST_INT_P (op
) ? DImode
: GET_MODE (op
);
28940 if (mode
== VOIDmode
)
28943 unsigned size
= GET_MODE_SIZE (mode
);
28944 bool splat_p
= false;
28946 if (size
> VECTOR_128BIT_BYTES
)
28949 /* Set up the bits. */
28950 switch (GET_CODE (op
))
28952 /* Integer constants, default to double word. */
28955 constant_int_to_128bit_vector (op
, mode
, 0, info
);
28960 /* Floating point constants. */
28963 /* Fail if the floating point constant is the wrong mode. */
28964 if (GET_MODE (op
) != mode
)
28967 /* SFmode stored as scalars are stored in DFmode format. */
28968 if (mode
== SFmode
)
28971 size
= GET_MODE_SIZE (DFmode
);
28974 constant_fp_to_128bit_vector (op
, mode
, 0, info
);
28979 /* Vector constants, iterate over each element. On little endian
28980 systems, we have to reverse the element numbers. */
28983 /* Fail if the vector constant is the wrong mode or size. */
28984 if (GET_MODE (op
) != mode
28985 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
28988 machine_mode ele_mode
= GET_MODE_INNER (mode
);
28989 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
28990 size_t nunits
= GET_MODE_NUNITS (mode
);
28992 for (size_t num
= 0; num
< nunits
; num
++)
28994 rtx ele
= CONST_VECTOR_ELT (op
, num
);
28995 size_t byte_num
= (BYTES_BIG_ENDIAN
28997 : nunits
- 1 - num
) * ele_size
;
28999 if (CONST_INT_P (ele
))
29000 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
29001 else if (CONST_DOUBLE_P (ele
))
29002 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
29010 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
29011 Since we are duplicating the element, we don't have to worry about
29013 case VEC_DUPLICATE
:
29015 /* Fail if the vector duplicate is the wrong mode or size. */
29016 if (GET_MODE (op
) != mode
29017 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
29020 machine_mode ele_mode
= GET_MODE_INNER (mode
);
29021 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
29022 rtx ele
= XEXP (op
, 0);
29023 size_t nunits
= GET_MODE_NUNITS (mode
);
29025 if (!CONST_INT_P (ele
) && !CONST_DOUBLE_P (ele
))
29028 for (size_t num
= 0; num
< nunits
; num
++)
29030 size_t byte_num
= num
* ele_size
;
29032 if (CONST_INT_P (ele
))
29033 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
29035 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
29041 /* Any thing else, just return failure. */
29046 /* Splat the constant to fill 128 bits if desired. */
29047 if (splat_p
&& size
< VECTOR_128BIT_BYTES
)
29049 if ((VECTOR_128BIT_BYTES
% size
) != 0)
29052 for (size_t offset
= size
;
29053 offset
< VECTOR_128BIT_BYTES
;
29055 memcpy ((void *) &info
->bytes
[offset
],
29056 (void *) &info
->bytes
[0],
29060 /* Remember original size. */
29061 info
->original_size
= size
;
29063 /* Determine if the bytes are all the same. */
29064 unsigned char first_byte
= info
->bytes
[0];
29065 info
->all_bytes_same
= true;
29066 for (size_t i
= 1; i
< VECTOR_128BIT_BYTES
; i
++)
29067 if (first_byte
!= info
->bytes
[i
])
29069 info
->all_bytes_same
= false;
29073 /* Pack half words together & determine if all of the half words are the
29075 for (size_t i
= 0; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
29076 info
->half_words
[i
] = ((info
->bytes
[i
* 2] << 8)
29077 | info
->bytes
[(i
* 2) + 1]);
29079 unsigned short first_hword
= info
->half_words
[0];
29080 info
->all_half_words_same
= true;
29081 for (size_t i
= 1; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
29082 if (first_hword
!= info
->half_words
[i
])
29084 info
->all_half_words_same
= false;
29088 /* Pack words together & determine if all of the words are the same. */
29089 for (size_t i
= 0; i
< VECTOR_128BIT_WORDS
; i
++)
29090 info
->words
[i
] = ((info
->bytes
[i
* 4] << 24)
29091 | (info
->bytes
[(i
* 4) + 1] << 16)
29092 | (info
->bytes
[(i
* 4) + 2] << 8)
29093 | info
->bytes
[(i
* 4) + 3]);
29095 info
->all_words_same
29096 = (info
->words
[0] == info
->words
[1]
29097 && info
->words
[0] == info
->words
[2]
29098 && info
->words
[0] == info
->words
[3]);
29100 /* Pack double words together & determine if all of the double words are the
29102 for (size_t i
= 0; i
< VECTOR_128BIT_DOUBLE_WORDS
; i
++)
29104 unsigned HOST_WIDE_INT d_word
= 0;
29105 for (size_t j
= 0; j
< 8; j
++)
29106 d_word
= (d_word
<< 8) | info
->bytes
[(i
* 8) + j
];
29108 info
->double_words
[i
] = d_word
;
29111 info
->all_double_words_same
29112 = (info
->double_words
[0] == info
->double_words
[1]);
29117 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
29118 if the LXVKQ instruction cannot be used. Otherwise return the immediate
29119 value to be used with the LXVKQ instruction. */
29122 constant_generates_lxvkq (vec_const_128bit_type
*vsx_const
)
29124 /* Is the instruction supported with power10 code generation, IEEE 128-bit
29125 floating point hardware and VSX registers are available. */
29126 if (!TARGET_IEEE128_CONSTANT
|| !TARGET_FLOAT128_HW
|| !TARGET_POWER10
29130 /* All of the constants that are generated by LXVKQ have the bottom 3 words
29132 if (vsx_const
->words
[1] != 0
29133 || vsx_const
->words
[2] != 0
29134 || vsx_const
->words
[3] != 0)
29137 /* See if we have a match for the first word. */
29138 switch (vsx_const
->words
[0])
29140 case 0x3FFF0000U
: return 1; /* IEEE 128-bit +1.0. */
29141 case 0x40000000U
: return 2; /* IEEE 128-bit +2.0. */
29142 case 0x40008000U
: return 3; /* IEEE 128-bit +3.0. */
29143 case 0x40010000U
: return 4; /* IEEE 128-bit +4.0. */
29144 case 0x40014000U
: return 5; /* IEEE 128-bit +5.0. */
29145 case 0x40018000U
: return 6; /* IEEE 128-bit +6.0. */
29146 case 0x4001C000U
: return 7; /* IEEE 128-bit +7.0. */
29147 case 0x7FFF0000U
: return 8; /* IEEE 128-bit +Infinity. */
29148 case 0x7FFF8000U
: return 9; /* IEEE 128-bit quiet NaN. */
29149 case 0x80000000U
: return 16; /* IEEE 128-bit -0.0. */
29150 case 0xBFFF0000U
: return 17; /* IEEE 128-bit -1.0. */
29151 case 0xC0000000U
: return 18; /* IEEE 128-bit -2.0. */
29152 case 0xC0008000U
: return 19; /* IEEE 128-bit -3.0. */
29153 case 0xC0010000U
: return 20; /* IEEE 128-bit -4.0. */
29154 case 0xC0014000U
: return 21; /* IEEE 128-bit -5.0. */
29155 case 0xC0018000U
: return 22; /* IEEE 128-bit -6.0. */
29156 case 0xC001C000U
: return 23; /* IEEE 128-bit -7.0. */
29157 case 0xFFFF0000U
: return 24; /* IEEE 128-bit -Infinity. */
29159 /* anything else cannot be loaded. */
29167 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
29168 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
29169 value to be used with the XXSPLTIW instruction. */
29172 constant_generates_xxspltiw (vec_const_128bit_type
*vsx_const
)
29174 if (!TARGET_SPLAT_WORD_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
29177 if (!vsx_const
->all_words_same
)
29180 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
29181 if (vsx_const
->all_bytes_same
)
29184 /* See if we can use VSPLTISH or VSPLTISW. */
29185 if (vsx_const
->all_half_words_same
)
29187 short sign_h_word
= vsx_const
->half_words
[0];
29188 if (EASY_VECTOR_15 (sign_h_word
))
29192 int sign_word
= vsx_const
->words
[0];
29193 if (EASY_VECTOR_15 (sign_word
))
29196 return vsx_const
->words
[0];
29199 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
29200 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
29201 value to be used with the XXSPLTIDP instruction. */
29204 constant_generates_xxspltidp (vec_const_128bit_type
*vsx_const
)
29206 if (!TARGET_SPLAT_FLOAT_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
29209 /* Reject if the two 64-bit segments are not the same. */
29210 if (!vsx_const
->all_double_words_same
)
29213 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
29214 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
29215 if (vsx_const
->all_bytes_same
29216 || vsx_const
->all_half_words_same
29217 || vsx_const
->all_words_same
)
29220 unsigned HOST_WIDE_INT value
= vsx_const
->double_words
[0];
29222 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
29223 pattern and the signalling NaN bit pattern. Recognize infinity and
29224 negative infinity. */
29226 /* Bit representation of DFmode normal quiet NaN. */
29227 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
29229 /* Bit representation of DFmode normal signaling NaN. */
29230 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
29232 /* Bit representation of DFmode positive infinity. */
29233 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
29235 /* Bit representation of DFmode negative infinity. */
29236 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
29238 if (value
!= RS6000_CONST_DF_NAN
29239 && value
!= RS6000_CONST_DF_NANS
29240 && value
!= RS6000_CONST_DF_INF
29241 && value
!= RS6000_CONST_DF_NEG_INF
)
29243 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
29244 the exponent, and 52 bits for the mantissa (not counting the hidden
29245 bit used for normal numbers). NaN values have the exponent set to all
29246 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
29248 int df_exponent
= (value
>> 52) & 0x7ff;
29249 unsigned HOST_WIDE_INT
29250 df_mantissa
= value
& ((HOST_WIDE_INT_1U
<< 52) - HOST_WIDE_INT_1U
);
29252 if (df_exponent
== 0x7ff && df_mantissa
!= 0) /* other NaNs. */
29255 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
29256 the exponent all 0 bits, and the mantissa non-zero. If the value is
29257 subnormal, then the hidden bit in the mantissa is not set. */
29258 if (df_exponent
== 0 && df_mantissa
!= 0) /* subnormal. */
29262 /* Change the representation to DFmode constant. */
29263 long df_words
[2] = { vsx_const
->words
[0], vsx_const
->words
[1] };
29265 /* real_from_target takes the target words in target order. */
29266 if (!BYTES_BIG_ENDIAN
)
29267 std::swap (df_words
[0], df_words
[1]);
29269 REAL_VALUE_TYPE rv_type
;
29270 real_from_target (&rv_type
, df_words
, DFmode
);
29272 const REAL_VALUE_TYPE
*rv
= &rv_type
;
29274 /* Validate that the number can be stored as a SFmode value. */
29275 if (!exact_real_truncate (SFmode
, rv
))
29278 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
29279 mantissa field is non-zero) which is undefined for the XXSPLTIDP
29282 real_to_target (&sf_value
, rv
, SFmode
);
29284 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
29285 and 23 bits for the mantissa. Subnormal numbers have the exponent all
29286 0 bits, and the mantissa non-zero. */
29287 long sf_exponent
= (sf_value
>> 23) & 0xFF;
29288 long sf_mantissa
= sf_value
& 0x7FFFFF;
29290 if (sf_exponent
== 0 && sf_mantissa
!= 0)
29293 /* Return the immediate to be used. */
29297 /* Now we have only two opaque types, they are __vector_quad and
29298 __vector_pair built-in types. They are target specific and
29299 only available when MMA is supported. With MMA supported, it
29300 simply returns true, otherwise it checks if the given gimple
29301 STMT is an assignment, asm or call stmt and uses either of
29302 these two opaque types unexpectedly, if yes, it would raise
29303 an error message and returns true, otherwise it returns false. */
29306 rs6000_opaque_type_invalid_use_p (gimple
*stmt
)
29311 /* If the given TYPE is one MMA opaque type, emit the corresponding
29312 error messages and return true, otherwise return false. */
29313 auto check_and_error_invalid_use
= [](tree type
)
29315 tree mv
= TYPE_MAIN_VARIANT (type
);
29316 if (mv
== vector_quad_type_node
)
29318 error ("type %<__vector_quad%> requires the %qs option", "-mmma");
29321 else if (mv
== vector_pair_type_node
)
29323 error ("type %<__vector_pair%> requires the %qs option", "-mmma");
29331 /* The usage of MMA opaque types is very limited for now,
29332 to check with gassign, gasm and gcall is enough so far. */
29333 if (gassign
*ga
= dyn_cast
<gassign
*> (stmt
))
29335 tree lhs
= gimple_assign_lhs (ga
);
29336 tree type
= TREE_TYPE (lhs
);
29337 if (check_and_error_invalid_use (type
))
29340 else if (gasm
*gs
= dyn_cast
<gasm
*> (stmt
))
29342 unsigned ninputs
= gimple_asm_ninputs (gs
);
29343 for (unsigned i
= 0; i
< ninputs
; i
++)
29345 tree op
= gimple_asm_input_op (gs
, i
);
29346 tree val
= TREE_VALUE (op
);
29347 tree type
= TREE_TYPE (val
);
29348 if (check_and_error_invalid_use (type
))
29351 unsigned noutputs
= gimple_asm_noutputs (gs
);
29352 for (unsigned i
= 0; i
< noutputs
; i
++)
29354 tree op
= gimple_asm_output_op (gs
, i
);
29355 tree val
= TREE_VALUE (op
);
29356 tree type
= TREE_TYPE (val
);
29357 if (check_and_error_invalid_use (type
))
29361 else if (gcall
*gc
= dyn_cast
<gcall
*> (stmt
))
29363 unsigned nargs
= gimple_call_num_args (gc
);
29364 for (unsigned i
= 0; i
< nargs
; i
++)
29366 tree arg
= gimple_call_arg (gc
, i
);
29367 tree type
= TREE_TYPE (arg
);
29368 if (check_and_error_invalid_use (type
))
29377 struct gcc_target targetm
= TARGET_INITIALIZER
;
29379 #include "gt-rs6000.h"