1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2023 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
26 #include "coretypes.h"
36 #include "stringpool.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
47 #include "fold-const.h"
49 #include "stor-layout.h"
51 #include "print-tree.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
59 #include "sched-int.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
71 #include "tree-vector-builder.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
76 #include "ipa-fnsummary.h"
78 #include "case-cfn-macros.h"
80 #include "rs6000-internal.h"
83 /* This file should be included last. */
84 #include "target-def.h"
86 extern tree
rs6000_builtin_mask_for_load (void);
87 extern tree
rs6000_builtin_md_vectorized_function (tree
, tree
, tree
);
88 extern tree
rs6000_builtin_reciprocal (tree
);
90 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
91 systems will also set long double to be IEEE 128-bit. AIX and Darwin
92 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
93 those systems will not pick up this default. This needs to be after all
94 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
96 #ifndef TARGET_IEEEQUAD_DEFAULT
97 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
98 #define TARGET_IEEEQUAD_DEFAULT 1
100 #define TARGET_IEEEQUAD_DEFAULT 0
104 /* Don't enable PC-relative addressing if the target does not support it. */
105 #ifndef PCREL_SUPPORTED_BY_OS
106 #define PCREL_SUPPORTED_BY_OS 0
110 /* Counter for labels which are to be placed in .fixup. */
111 int fixuplabelno
= 0;
114 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
117 /* Specify the machine mode that pointers have. After generation of rtl, the
118 compiler makes no further distinction between pointers and any other objects
119 of this machine mode. */
120 scalar_int_mode rs6000_pmode
;
122 /* Track use of r13 in 64bit AIX TLS. */
123 static bool xcoff_tls_exec_model_detected
= false;
125 /* Width in bits of a pointer. */
126 unsigned rs6000_pointer_size
;
128 #ifdef HAVE_AS_GNU_ATTRIBUTE
129 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
130 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
132 /* Flag whether floating point values have been passed/returned.
133 Note that this doesn't say whether fprs are used, since the
134 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
135 should be set for soft-float values passed in gprs and ieee128
136 values passed in vsx registers. */
137 bool rs6000_passes_float
= false;
138 bool rs6000_passes_long_double
= false;
139 /* Flag whether vector values have been passed/returned. */
140 bool rs6000_passes_vector
= false;
141 /* Flag whether small (<= 8 byte) structures have been returned. */
142 bool rs6000_returns_struct
= false;
145 /* Value is TRUE if register/mode pair is acceptable. */
146 static bool rs6000_hard_regno_mode_ok_p
147 [NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
149 /* Maximum number of registers needed for a given register class and mode. */
150 unsigned char rs6000_class_max_nregs
[NUM_MACHINE_MODES
][LIM_REG_CLASSES
];
152 /* How many registers are needed for a given register and mode. */
153 unsigned char rs6000_hard_regno_nregs
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
155 /* Map register number to register class. */
156 enum reg_class rs6000_regno_regclass
[FIRST_PSEUDO_REGISTER
];
158 static int dbg_cost_ctrl
;
160 /* Flag to say the TOC is initialized */
161 int toc_initialized
, need_toc_init
;
162 char toc_label_name
[10];
164 /* Cached value of rs6000_variable_issue. This is cached in
165 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
166 static short cached_can_issue_more
;
168 static GTY(()) section
*read_only_data_section
;
169 static GTY(()) section
*private_data_section
;
170 static GTY(()) section
*tls_data_section
;
171 static GTY(()) section
*tls_private_data_section
;
172 static GTY(()) section
*read_only_private_data_section
;
173 static GTY(()) section
*sdata2_section
;
175 section
*toc_section
= 0;
177 /* Describe the vector unit used for modes. */
178 enum rs6000_vector rs6000_vector_unit
[NUM_MACHINE_MODES
];
179 enum rs6000_vector rs6000_vector_mem
[NUM_MACHINE_MODES
];
181 /* Register classes for various constraints that are based on the target
183 enum reg_class rs6000_constraints
[RS6000_CONSTRAINT_MAX
];
185 /* Describe the alignment of a vector. */
186 int rs6000_vector_align
[NUM_MACHINE_MODES
];
188 /* What modes to automatically generate reciprocal divide estimate (fre) and
189 reciprocal sqrt (frsqrte) for. */
190 unsigned char rs6000_recip_bits
[MAX_MACHINE_MODE
];
192 /* Masks to determine which reciprocal esitmate instructions to generate
194 enum rs6000_recip_mask
{
195 RECIP_SF_DIV
= 0x001, /* Use divide estimate */
196 RECIP_DF_DIV
= 0x002,
197 RECIP_V4SF_DIV
= 0x004,
198 RECIP_V2DF_DIV
= 0x008,
200 RECIP_SF_RSQRT
= 0x010, /* Use reciprocal sqrt estimate. */
201 RECIP_DF_RSQRT
= 0x020,
202 RECIP_V4SF_RSQRT
= 0x040,
203 RECIP_V2DF_RSQRT
= 0x080,
205 /* Various combination of flags for -mrecip=xxx. */
207 RECIP_ALL
= (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
208 | RECIP_V2DF_DIV
| RECIP_SF_RSQRT
| RECIP_DF_RSQRT
209 | RECIP_V4SF_RSQRT
| RECIP_V2DF_RSQRT
),
211 RECIP_HIGH_PRECISION
= RECIP_ALL
,
213 /* On low precision machines like the power5, don't enable double precision
214 reciprocal square root estimate, since it isn't accurate enough. */
215 RECIP_LOW_PRECISION
= (RECIP_ALL
& ~(RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
))
218 /* -mrecip options. */
221 const char *string
; /* option name */
222 unsigned int mask
; /* mask bits to set */
223 } recip_options
[] = {
224 { "all", RECIP_ALL
},
225 { "none", RECIP_NONE
},
226 { "div", (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
228 { "divf", (RECIP_SF_DIV
| RECIP_V4SF_DIV
) },
229 { "divd", (RECIP_DF_DIV
| RECIP_V2DF_DIV
) },
230 { "rsqrt", (RECIP_SF_RSQRT
| RECIP_DF_RSQRT
| RECIP_V4SF_RSQRT
231 | RECIP_V2DF_RSQRT
) },
232 { "rsqrtf", (RECIP_SF_RSQRT
| RECIP_V4SF_RSQRT
) },
233 { "rsqrtd", (RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
) },
236 /* On PowerPC, we have a limited number of target clones that we care about
237 which means we can use an array to hold the options, rather than having more
238 elaborate data structures to identify each possible variation. Order the
239 clones from the default to the highest ISA. */
241 CLONE_DEFAULT
= 0, /* default clone. */
242 CLONE_ISA_2_05
, /* ISA 2.05 (power6). */
243 CLONE_ISA_2_06
, /* ISA 2.06 (power7). */
244 CLONE_ISA_2_07
, /* ISA 2.07 (power8). */
245 CLONE_ISA_3_00
, /* ISA 3.0 (power9). */
246 CLONE_ISA_3_1
, /* ISA 3.1 (power10). */
250 /* Map compiler ISA bits into HWCAP names. */
252 HOST_WIDE_INT isa_mask
; /* rs6000_isa mask */
253 const char *name
; /* name to use in __builtin_cpu_supports. */
256 static const struct clone_map rs6000_clone_map
[CLONE_MAX
] = {
257 { 0, "" }, /* Default options. */
258 { OPTION_MASK_CMPB
, "arch_2_05" }, /* ISA 2.05 (power6). */
259 { OPTION_MASK_POPCNTD
, "arch_2_06" }, /* ISA 2.06 (power7). */
260 { OPTION_MASK_P8_VECTOR
, "arch_2_07" }, /* ISA 2.07 (power8). */
261 { OPTION_MASK_P9_VECTOR
, "arch_3_00" }, /* ISA 3.0 (power9). */
262 { OPTION_MASK_POWER10
, "arch_3_1" }, /* ISA 3.1 (power10). */
266 /* Newer LIBCs explicitly export this symbol to declare that they provide
267 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
268 reference to this symbol whenever we expand a CPU builtin, so that
269 we never link against an old LIBC. */
270 const char *tcb_verification_symbol
= "__parse_hwcap_and_convert_at_platform";
272 /* True if we have expanded a CPU builtin. */
273 bool cpu_builtin_p
= false;
275 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
276 macros that have changed. Languages that don't support the preprocessor
277 don't link in rs6000-c.cc, so we can't call it directly. */
278 void (*rs6000_target_modify_macros_ptr
) (bool, HOST_WIDE_INT
);
280 /* Simplfy register classes into simpler classifications. We assume
281 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
282 check for standard register classes (gpr/floating/altivec/vsx) and
283 floating/vector classes (float/altivec/vsx). */
285 enum rs6000_reg_type
{
297 /* Map register class to register type. */
298 static enum rs6000_reg_type reg_class_to_reg_type
[N_REG_CLASSES
];
300 /* First/last register type for the 'normal' register types (i.e. general
301 purpose, floating point, altivec, and VSX registers). */
302 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
304 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
307 /* Register classes we care about in secondary reload or go if legitimate
308 address. We only need to worry about GPR, FPR, Altivec, and DMR registers
309 here, along an ANY field that is the OR of the 4 register classes. */
311 enum rs6000_reload_reg_type
{
312 RELOAD_REG_GPR
, /* General purpose registers. */
313 RELOAD_REG_FPR
, /* Traditional floating point regs. */
314 RELOAD_REG_VMX
, /* Altivec (VMX) registers. */
315 RELOAD_REG_DMR
, /* DMR registers. */
316 RELOAD_REG_ANY
, /* OR of GPR/FPR/VMX/DMR masks. */
320 /* For setting up register classes, loop through the 4 register classes mapping
321 into real registers, and skip the ANY class, which is just an OR of the
323 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
324 #define LAST_RELOAD_REG_CLASS RELOAD_REG_DMR
326 /* Map reload register type to a register in the register class. */
327 struct reload_reg_map_type
{
328 const char *name
; /* Register class name. */
329 int reg
; /* Register in the register class. */
332 static const struct reload_reg_map_type reload_reg_map
[N_RELOAD_REG
] = {
333 { "Gpr", FIRST_GPR_REGNO
}, /* RELOAD_REG_GPR. */
334 { "Fpr", FIRST_FPR_REGNO
}, /* RELOAD_REG_FPR. */
335 { "VMX", FIRST_ALTIVEC_REGNO
}, /* RELOAD_REG_VMX. */
336 { "DMR", FIRST_DMR_REGNO
}, /* RELOAD_REG_DMR. */
337 { "Any", -1 }, /* RELOAD_REG_ANY. */
340 /* Mask bits for each register class, indexed per mode. Historically the
341 compiler has been more restrictive which types can do PRE_MODIFY instead of
342 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
343 typedef unsigned char addr_mask_type
;
345 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
346 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
347 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
348 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
349 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
350 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
351 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
352 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
354 /* Register type masks based on the type, of valid addressing modes. */
355 struct rs6000_reg_addr
{
356 enum insn_code reload_load
; /* INSN to reload for loading. */
357 enum insn_code reload_store
; /* INSN to reload for storing. */
358 enum insn_code reload_fpr_gpr
; /* INSN to move from FPR to GPR. */
359 enum insn_code reload_gpr_vsx
; /* INSN to move from GPR to VSX. */
360 enum insn_code reload_vsx_gpr
; /* INSN to move from VSX to GPR. */
361 addr_mask_type addr_mask
[(int)N_RELOAD_REG
]; /* Valid address masks. */
362 bool scalar_in_vmx_p
; /* Scalar value can go in VMX. */
365 static struct rs6000_reg_addr reg_addr
[NUM_MACHINE_MODES
];
367 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
369 mode_supports_pre_incdec_p (machine_mode mode
)
371 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_INCDEC
)
375 /* Helper function to say whether a mode supports PRE_MODIFY. */
377 mode_supports_pre_modify_p (machine_mode mode
)
379 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_MODIFY
)
383 /* Return true if we have D-form addressing in altivec registers. */
385 mode_supports_vmx_dform (machine_mode mode
)
387 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_OFFSET
) != 0);
390 /* Return true if we have D-form addressing in VSX registers. This addressing
391 is more limited than normal d-form addressing in that the offset must be
392 aligned on a 16-byte boundary. */
394 mode_supports_dq_form (machine_mode mode
)
396 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_QUAD_OFFSET
)
400 /* Given that there exists at least one variable that is set (produced)
401 by OUT_INSN and read (consumed) by IN_INSN, return true iff
402 IN_INSN represents one or more memory store operations and none of
403 the variables set by OUT_INSN is used by IN_INSN as the address of a
404 store operation. If either IN_INSN or OUT_INSN does not represent
405 a "single" RTL SET expression (as loosely defined by the
406 implementation of the single_set function) or a PARALLEL with only
407 SETs, CLOBBERs, and USEs inside, this function returns false.
409 This rs6000-specific version of store_data_bypass_p checks for
410 certain conditions that result in assertion failures (and internal
411 compiler errors) in the generic store_data_bypass_p function and
412 returns false rather than calling store_data_bypass_p if one of the
413 problematic conditions is detected. */
416 rs6000_store_data_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
423 in_set
= single_set (in_insn
);
426 if (MEM_P (SET_DEST (in_set
)))
428 out_set
= single_set (out_insn
);
431 out_pat
= PATTERN (out_insn
);
432 if (GET_CODE (out_pat
) == PARALLEL
)
434 for (i
= 0; i
< XVECLEN (out_pat
, 0); i
++)
436 out_exp
= XVECEXP (out_pat
, 0, i
);
437 if ((GET_CODE (out_exp
) == CLOBBER
)
438 || (GET_CODE (out_exp
) == USE
))
440 else if (GET_CODE (out_exp
) != SET
)
449 in_pat
= PATTERN (in_insn
);
450 if (GET_CODE (in_pat
) != PARALLEL
)
453 for (i
= 0; i
< XVECLEN (in_pat
, 0); i
++)
455 in_exp
= XVECEXP (in_pat
, 0, i
);
456 if ((GET_CODE (in_exp
) == CLOBBER
) || (GET_CODE (in_exp
) == USE
))
458 else if (GET_CODE (in_exp
) != SET
)
461 if (MEM_P (SET_DEST (in_exp
)))
463 out_set
= single_set (out_insn
);
466 out_pat
= PATTERN (out_insn
);
467 if (GET_CODE (out_pat
) != PARALLEL
)
469 for (j
= 0; j
< XVECLEN (out_pat
, 0); j
++)
471 out_exp
= XVECEXP (out_pat
, 0, j
);
472 if ((GET_CODE (out_exp
) == CLOBBER
)
473 || (GET_CODE (out_exp
) == USE
))
475 else if (GET_CODE (out_exp
) != SET
)
482 return store_data_bypass_p (out_insn
, in_insn
);
486 /* Processor costs (relative to an add) */
488 const struct processor_costs
*rs6000_cost
;
490 /* Instruction size costs on 32bit processors. */
492 struct processor_costs size32_cost
= {
493 COSTS_N_INSNS (1), /* mulsi */
494 COSTS_N_INSNS (1), /* mulsi_const */
495 COSTS_N_INSNS (1), /* mulsi_const9 */
496 COSTS_N_INSNS (1), /* muldi */
497 COSTS_N_INSNS (1), /* divsi */
498 COSTS_N_INSNS (1), /* divdi */
499 COSTS_N_INSNS (1), /* fp */
500 COSTS_N_INSNS (1), /* dmul */
501 COSTS_N_INSNS (1), /* sdiv */
502 COSTS_N_INSNS (1), /* ddiv */
503 32, /* cache line size */
507 0, /* SF->DF convert */
510 /* Instruction size costs on 64bit processors. */
512 struct processor_costs size64_cost
= {
513 COSTS_N_INSNS (1), /* mulsi */
514 COSTS_N_INSNS (1), /* mulsi_const */
515 COSTS_N_INSNS (1), /* mulsi_const9 */
516 COSTS_N_INSNS (1), /* muldi */
517 COSTS_N_INSNS (1), /* divsi */
518 COSTS_N_INSNS (1), /* divdi */
519 COSTS_N_INSNS (1), /* fp */
520 COSTS_N_INSNS (1), /* dmul */
521 COSTS_N_INSNS (1), /* sdiv */
522 COSTS_N_INSNS (1), /* ddiv */
523 128, /* cache line size */
527 0, /* SF->DF convert */
530 /* Instruction costs on RS64A processors. */
532 struct processor_costs rs64a_cost
= {
533 COSTS_N_INSNS (20), /* mulsi */
534 COSTS_N_INSNS (12), /* mulsi_const */
535 COSTS_N_INSNS (8), /* mulsi_const9 */
536 COSTS_N_INSNS (34), /* muldi */
537 COSTS_N_INSNS (65), /* divsi */
538 COSTS_N_INSNS (67), /* divdi */
539 COSTS_N_INSNS (4), /* fp */
540 COSTS_N_INSNS (4), /* dmul */
541 COSTS_N_INSNS (31), /* sdiv */
542 COSTS_N_INSNS (31), /* ddiv */
543 128, /* cache line size */
547 0, /* SF->DF convert */
550 /* Instruction costs on MPCCORE processors. */
552 struct processor_costs mpccore_cost
= {
553 COSTS_N_INSNS (2), /* mulsi */
554 COSTS_N_INSNS (2), /* mulsi_const */
555 COSTS_N_INSNS (2), /* mulsi_const9 */
556 COSTS_N_INSNS (2), /* muldi */
557 COSTS_N_INSNS (6), /* divsi */
558 COSTS_N_INSNS (6), /* divdi */
559 COSTS_N_INSNS (4), /* fp */
560 COSTS_N_INSNS (5), /* dmul */
561 COSTS_N_INSNS (10), /* sdiv */
562 COSTS_N_INSNS (17), /* ddiv */
563 32, /* cache line size */
567 0, /* SF->DF convert */
570 /* Instruction costs on PPC403 processors. */
572 struct processor_costs ppc403_cost
= {
573 COSTS_N_INSNS (4), /* mulsi */
574 COSTS_N_INSNS (4), /* mulsi_const */
575 COSTS_N_INSNS (4), /* mulsi_const9 */
576 COSTS_N_INSNS (4), /* muldi */
577 COSTS_N_INSNS (33), /* divsi */
578 COSTS_N_INSNS (33), /* divdi */
579 COSTS_N_INSNS (11), /* fp */
580 COSTS_N_INSNS (11), /* dmul */
581 COSTS_N_INSNS (11), /* sdiv */
582 COSTS_N_INSNS (11), /* ddiv */
583 32, /* cache line size */
587 0, /* SF->DF convert */
590 /* Instruction costs on PPC405 processors. */
592 struct processor_costs ppc405_cost
= {
593 COSTS_N_INSNS (5), /* mulsi */
594 COSTS_N_INSNS (4), /* mulsi_const */
595 COSTS_N_INSNS (3), /* mulsi_const9 */
596 COSTS_N_INSNS (5), /* muldi */
597 COSTS_N_INSNS (35), /* divsi */
598 COSTS_N_INSNS (35), /* divdi */
599 COSTS_N_INSNS (11), /* fp */
600 COSTS_N_INSNS (11), /* dmul */
601 COSTS_N_INSNS (11), /* sdiv */
602 COSTS_N_INSNS (11), /* ddiv */
603 32, /* cache line size */
607 0, /* SF->DF convert */
610 /* Instruction costs on PPC440 processors. */
612 struct processor_costs ppc440_cost
= {
613 COSTS_N_INSNS (3), /* mulsi */
614 COSTS_N_INSNS (2), /* mulsi_const */
615 COSTS_N_INSNS (2), /* mulsi_const9 */
616 COSTS_N_INSNS (3), /* muldi */
617 COSTS_N_INSNS (34), /* divsi */
618 COSTS_N_INSNS (34), /* divdi */
619 COSTS_N_INSNS (5), /* fp */
620 COSTS_N_INSNS (5), /* dmul */
621 COSTS_N_INSNS (19), /* sdiv */
622 COSTS_N_INSNS (33), /* ddiv */
623 32, /* cache line size */
627 0, /* SF->DF convert */
630 /* Instruction costs on PPC476 processors. */
632 struct processor_costs ppc476_cost
= {
633 COSTS_N_INSNS (4), /* mulsi */
634 COSTS_N_INSNS (4), /* mulsi_const */
635 COSTS_N_INSNS (4), /* mulsi_const9 */
636 COSTS_N_INSNS (4), /* muldi */
637 COSTS_N_INSNS (11), /* divsi */
638 COSTS_N_INSNS (11), /* divdi */
639 COSTS_N_INSNS (6), /* fp */
640 COSTS_N_INSNS (6), /* dmul */
641 COSTS_N_INSNS (19), /* sdiv */
642 COSTS_N_INSNS (33), /* ddiv */
643 32, /* l1 cache line size */
647 0, /* SF->DF convert */
650 /* Instruction costs on PPC601 processors. */
652 struct processor_costs ppc601_cost
= {
653 COSTS_N_INSNS (5), /* mulsi */
654 COSTS_N_INSNS (5), /* mulsi_const */
655 COSTS_N_INSNS (5), /* mulsi_const9 */
656 COSTS_N_INSNS (5), /* muldi */
657 COSTS_N_INSNS (36), /* divsi */
658 COSTS_N_INSNS (36), /* divdi */
659 COSTS_N_INSNS (4), /* fp */
660 COSTS_N_INSNS (5), /* dmul */
661 COSTS_N_INSNS (17), /* sdiv */
662 COSTS_N_INSNS (31), /* ddiv */
663 32, /* cache line size */
667 0, /* SF->DF convert */
670 /* Instruction costs on PPC603 processors. */
672 struct processor_costs ppc603_cost
= {
673 COSTS_N_INSNS (5), /* mulsi */
674 COSTS_N_INSNS (3), /* mulsi_const */
675 COSTS_N_INSNS (2), /* mulsi_const9 */
676 COSTS_N_INSNS (5), /* muldi */
677 COSTS_N_INSNS (37), /* divsi */
678 COSTS_N_INSNS (37), /* divdi */
679 COSTS_N_INSNS (3), /* fp */
680 COSTS_N_INSNS (4), /* dmul */
681 COSTS_N_INSNS (18), /* sdiv */
682 COSTS_N_INSNS (33), /* ddiv */
683 32, /* cache line size */
687 0, /* SF->DF convert */
690 /* Instruction costs on PPC604 processors. */
692 struct processor_costs ppc604_cost
= {
693 COSTS_N_INSNS (4), /* mulsi */
694 COSTS_N_INSNS (4), /* mulsi_const */
695 COSTS_N_INSNS (4), /* mulsi_const9 */
696 COSTS_N_INSNS (4), /* muldi */
697 COSTS_N_INSNS (20), /* divsi */
698 COSTS_N_INSNS (20), /* divdi */
699 COSTS_N_INSNS (3), /* fp */
700 COSTS_N_INSNS (3), /* dmul */
701 COSTS_N_INSNS (18), /* sdiv */
702 COSTS_N_INSNS (32), /* ddiv */
703 32, /* cache line size */
707 0, /* SF->DF convert */
710 /* Instruction costs on PPC604e processors. */
712 struct processor_costs ppc604e_cost
= {
713 COSTS_N_INSNS (2), /* mulsi */
714 COSTS_N_INSNS (2), /* mulsi_const */
715 COSTS_N_INSNS (2), /* mulsi_const9 */
716 COSTS_N_INSNS (2), /* muldi */
717 COSTS_N_INSNS (20), /* divsi */
718 COSTS_N_INSNS (20), /* divdi */
719 COSTS_N_INSNS (3), /* fp */
720 COSTS_N_INSNS (3), /* dmul */
721 COSTS_N_INSNS (18), /* sdiv */
722 COSTS_N_INSNS (32), /* ddiv */
723 32, /* cache line size */
727 0, /* SF->DF convert */
730 /* Instruction costs on PPC620 processors. */
732 struct processor_costs ppc620_cost
= {
733 COSTS_N_INSNS (5), /* mulsi */
734 COSTS_N_INSNS (4), /* mulsi_const */
735 COSTS_N_INSNS (3), /* mulsi_const9 */
736 COSTS_N_INSNS (7), /* muldi */
737 COSTS_N_INSNS (21), /* divsi */
738 COSTS_N_INSNS (37), /* divdi */
739 COSTS_N_INSNS (3), /* fp */
740 COSTS_N_INSNS (3), /* dmul */
741 COSTS_N_INSNS (18), /* sdiv */
742 COSTS_N_INSNS (32), /* ddiv */
743 128, /* cache line size */
747 0, /* SF->DF convert */
750 /* Instruction costs on PPC630 processors. */
752 struct processor_costs ppc630_cost
= {
753 COSTS_N_INSNS (5), /* mulsi */
754 COSTS_N_INSNS (4), /* mulsi_const */
755 COSTS_N_INSNS (3), /* mulsi_const9 */
756 COSTS_N_INSNS (7), /* muldi */
757 COSTS_N_INSNS (21), /* divsi */
758 COSTS_N_INSNS (37), /* divdi */
759 COSTS_N_INSNS (3), /* fp */
760 COSTS_N_INSNS (3), /* dmul */
761 COSTS_N_INSNS (17), /* sdiv */
762 COSTS_N_INSNS (21), /* ddiv */
763 128, /* cache line size */
767 0, /* SF->DF convert */
770 /* Instruction costs on Cell processor. */
771 /* COSTS_N_INSNS (1) ~ one add. */
773 struct processor_costs ppccell_cost
= {
774 COSTS_N_INSNS (9/2)+2, /* mulsi */
775 COSTS_N_INSNS (6/2), /* mulsi_const */
776 COSTS_N_INSNS (6/2), /* mulsi_const9 */
777 COSTS_N_INSNS (15/2)+2, /* muldi */
778 COSTS_N_INSNS (38/2), /* divsi */
779 COSTS_N_INSNS (70/2), /* divdi */
780 COSTS_N_INSNS (10/2), /* fp */
781 COSTS_N_INSNS (10/2), /* dmul */
782 COSTS_N_INSNS (74/2), /* sdiv */
783 COSTS_N_INSNS (74/2), /* ddiv */
784 128, /* cache line size */
788 0, /* SF->DF convert */
791 /* Instruction costs on PPC750 and PPC7400 processors. */
793 struct processor_costs ppc750_cost
= {
794 COSTS_N_INSNS (5), /* mulsi */
795 COSTS_N_INSNS (3), /* mulsi_const */
796 COSTS_N_INSNS (2), /* mulsi_const9 */
797 COSTS_N_INSNS (5), /* muldi */
798 COSTS_N_INSNS (17), /* divsi */
799 COSTS_N_INSNS (17), /* divdi */
800 COSTS_N_INSNS (3), /* fp */
801 COSTS_N_INSNS (3), /* dmul */
802 COSTS_N_INSNS (17), /* sdiv */
803 COSTS_N_INSNS (31), /* ddiv */
804 32, /* cache line size */
808 0, /* SF->DF convert */
811 /* Instruction costs on PPC7450 processors. */
813 struct processor_costs ppc7450_cost
= {
814 COSTS_N_INSNS (4), /* mulsi */
815 COSTS_N_INSNS (3), /* mulsi_const */
816 COSTS_N_INSNS (3), /* mulsi_const9 */
817 COSTS_N_INSNS (4), /* muldi */
818 COSTS_N_INSNS (23), /* divsi */
819 COSTS_N_INSNS (23), /* divdi */
820 COSTS_N_INSNS (5), /* fp */
821 COSTS_N_INSNS (5), /* dmul */
822 COSTS_N_INSNS (21), /* sdiv */
823 COSTS_N_INSNS (35), /* ddiv */
824 32, /* cache line size */
828 0, /* SF->DF convert */
831 /* Instruction costs on PPC8540 processors. */
833 struct processor_costs ppc8540_cost
= {
834 COSTS_N_INSNS (4), /* mulsi */
835 COSTS_N_INSNS (4), /* mulsi_const */
836 COSTS_N_INSNS (4), /* mulsi_const9 */
837 COSTS_N_INSNS (4), /* muldi */
838 COSTS_N_INSNS (19), /* divsi */
839 COSTS_N_INSNS (19), /* divdi */
840 COSTS_N_INSNS (4), /* fp */
841 COSTS_N_INSNS (4), /* dmul */
842 COSTS_N_INSNS (29), /* sdiv */
843 COSTS_N_INSNS (29), /* ddiv */
844 32, /* cache line size */
847 1, /* prefetch streams /*/
848 0, /* SF->DF convert */
851 /* Instruction costs on E300C2 and E300C3 cores. */
853 struct processor_costs ppce300c2c3_cost
= {
854 COSTS_N_INSNS (4), /* mulsi */
855 COSTS_N_INSNS (4), /* mulsi_const */
856 COSTS_N_INSNS (4), /* mulsi_const9 */
857 COSTS_N_INSNS (4), /* muldi */
858 COSTS_N_INSNS (19), /* divsi */
859 COSTS_N_INSNS (19), /* divdi */
860 COSTS_N_INSNS (3), /* fp */
861 COSTS_N_INSNS (4), /* dmul */
862 COSTS_N_INSNS (18), /* sdiv */
863 COSTS_N_INSNS (33), /* ddiv */
867 1, /* prefetch streams /*/
868 0, /* SF->DF convert */
871 /* Instruction costs on PPCE500MC processors. */
873 struct processor_costs ppce500mc_cost
= {
874 COSTS_N_INSNS (4), /* mulsi */
875 COSTS_N_INSNS (4), /* mulsi_const */
876 COSTS_N_INSNS (4), /* mulsi_const9 */
877 COSTS_N_INSNS (4), /* muldi */
878 COSTS_N_INSNS (14), /* divsi */
879 COSTS_N_INSNS (14), /* divdi */
880 COSTS_N_INSNS (8), /* fp */
881 COSTS_N_INSNS (10), /* dmul */
882 COSTS_N_INSNS (36), /* sdiv */
883 COSTS_N_INSNS (66), /* ddiv */
884 64, /* cache line size */
887 1, /* prefetch streams /*/
888 0, /* SF->DF convert */
891 /* Instruction costs on PPCE500MC64 processors. */
893 struct processor_costs ppce500mc64_cost
= {
894 COSTS_N_INSNS (4), /* mulsi */
895 COSTS_N_INSNS (4), /* mulsi_const */
896 COSTS_N_INSNS (4), /* mulsi_const9 */
897 COSTS_N_INSNS (4), /* muldi */
898 COSTS_N_INSNS (14), /* divsi */
899 COSTS_N_INSNS (14), /* divdi */
900 COSTS_N_INSNS (4), /* fp */
901 COSTS_N_INSNS (10), /* dmul */
902 COSTS_N_INSNS (36), /* sdiv */
903 COSTS_N_INSNS (66), /* ddiv */
904 64, /* cache line size */
907 1, /* prefetch streams /*/
908 0, /* SF->DF convert */
911 /* Instruction costs on PPCE5500 processors. */
913 struct processor_costs ppce5500_cost
= {
914 COSTS_N_INSNS (5), /* mulsi */
915 COSTS_N_INSNS (5), /* mulsi_const */
916 COSTS_N_INSNS (4), /* mulsi_const9 */
917 COSTS_N_INSNS (5), /* muldi */
918 COSTS_N_INSNS (14), /* divsi */
919 COSTS_N_INSNS (14), /* divdi */
920 COSTS_N_INSNS (7), /* fp */
921 COSTS_N_INSNS (10), /* dmul */
922 COSTS_N_INSNS (36), /* sdiv */
923 COSTS_N_INSNS (66), /* ddiv */
924 64, /* cache line size */
927 1, /* prefetch streams /*/
928 0, /* SF->DF convert */
931 /* Instruction costs on PPCE6500 processors. */
933 struct processor_costs ppce6500_cost
= {
934 COSTS_N_INSNS (5), /* mulsi */
935 COSTS_N_INSNS (5), /* mulsi_const */
936 COSTS_N_INSNS (4), /* mulsi_const9 */
937 COSTS_N_INSNS (5), /* muldi */
938 COSTS_N_INSNS (14), /* divsi */
939 COSTS_N_INSNS (14), /* divdi */
940 COSTS_N_INSNS (7), /* fp */
941 COSTS_N_INSNS (10), /* dmul */
942 COSTS_N_INSNS (36), /* sdiv */
943 COSTS_N_INSNS (66), /* ddiv */
944 64, /* cache line size */
947 1, /* prefetch streams /*/
948 0, /* SF->DF convert */
951 /* Instruction costs on AppliedMicro Titan processors. */
953 struct processor_costs titan_cost
= {
954 COSTS_N_INSNS (5), /* mulsi */
955 COSTS_N_INSNS (5), /* mulsi_const */
956 COSTS_N_INSNS (5), /* mulsi_const9 */
957 COSTS_N_INSNS (5), /* muldi */
958 COSTS_N_INSNS (18), /* divsi */
959 COSTS_N_INSNS (18), /* divdi */
960 COSTS_N_INSNS (10), /* fp */
961 COSTS_N_INSNS (10), /* dmul */
962 COSTS_N_INSNS (46), /* sdiv */
963 COSTS_N_INSNS (72), /* ddiv */
964 32, /* cache line size */
967 1, /* prefetch streams /*/
968 0, /* SF->DF convert */
971 /* Instruction costs on POWER4 and POWER5 processors. */
973 struct processor_costs power4_cost
= {
974 COSTS_N_INSNS (3), /* mulsi */
975 COSTS_N_INSNS (2), /* mulsi_const */
976 COSTS_N_INSNS (2), /* mulsi_const9 */
977 COSTS_N_INSNS (4), /* muldi */
978 COSTS_N_INSNS (18), /* divsi */
979 COSTS_N_INSNS (34), /* divdi */
980 COSTS_N_INSNS (3), /* fp */
981 COSTS_N_INSNS (3), /* dmul */
982 COSTS_N_INSNS (17), /* sdiv */
983 COSTS_N_INSNS (17), /* ddiv */
984 128, /* cache line size */
987 8, /* prefetch streams /*/
988 0, /* SF->DF convert */
991 /* Instruction costs on POWER6 processors. */
993 struct processor_costs power6_cost
= {
994 COSTS_N_INSNS (8), /* mulsi */
995 COSTS_N_INSNS (8), /* mulsi_const */
996 COSTS_N_INSNS (8), /* mulsi_const9 */
997 COSTS_N_INSNS (8), /* muldi */
998 COSTS_N_INSNS (22), /* divsi */
999 COSTS_N_INSNS (28), /* divdi */
1000 COSTS_N_INSNS (3), /* fp */
1001 COSTS_N_INSNS (3), /* dmul */
1002 COSTS_N_INSNS (13), /* sdiv */
1003 COSTS_N_INSNS (16), /* ddiv */
1004 128, /* cache line size */
1006 2048, /* l2 cache */
1007 16, /* prefetch streams */
1008 0, /* SF->DF convert */
1011 /* Instruction costs on POWER7 processors. */
1013 struct processor_costs power7_cost
= {
1014 COSTS_N_INSNS (2), /* mulsi */
1015 COSTS_N_INSNS (2), /* mulsi_const */
1016 COSTS_N_INSNS (2), /* mulsi_const9 */
1017 COSTS_N_INSNS (2), /* muldi */
1018 COSTS_N_INSNS (18), /* divsi */
1019 COSTS_N_INSNS (34), /* divdi */
1020 COSTS_N_INSNS (3), /* fp */
1021 COSTS_N_INSNS (3), /* dmul */
1022 COSTS_N_INSNS (13), /* sdiv */
1023 COSTS_N_INSNS (16), /* ddiv */
1024 128, /* cache line size */
1027 12, /* prefetch streams */
1028 COSTS_N_INSNS (3), /* SF->DF convert */
1031 /* Instruction costs on POWER8 processors. */
1033 struct processor_costs power8_cost
= {
1034 COSTS_N_INSNS (3), /* mulsi */
1035 COSTS_N_INSNS (3), /* mulsi_const */
1036 COSTS_N_INSNS (3), /* mulsi_const9 */
1037 COSTS_N_INSNS (3), /* muldi */
1038 COSTS_N_INSNS (19), /* divsi */
1039 COSTS_N_INSNS (35), /* divdi */
1040 COSTS_N_INSNS (3), /* fp */
1041 COSTS_N_INSNS (3), /* dmul */
1042 COSTS_N_INSNS (14), /* sdiv */
1043 COSTS_N_INSNS (17), /* ddiv */
1044 128, /* cache line size */
1047 12, /* prefetch streams */
1048 COSTS_N_INSNS (3), /* SF->DF convert */
1051 /* Instruction costs on POWER9 processors. */
1053 struct processor_costs power9_cost
= {
1054 COSTS_N_INSNS (3), /* mulsi */
1055 COSTS_N_INSNS (3), /* mulsi_const */
1056 COSTS_N_INSNS (3), /* mulsi_const9 */
1057 COSTS_N_INSNS (3), /* muldi */
1058 COSTS_N_INSNS (8), /* divsi */
1059 COSTS_N_INSNS (12), /* divdi */
1060 COSTS_N_INSNS (3), /* fp */
1061 COSTS_N_INSNS (3), /* dmul */
1062 COSTS_N_INSNS (13), /* sdiv */
1063 COSTS_N_INSNS (18), /* ddiv */
1064 128, /* cache line size */
1067 8, /* prefetch streams */
1068 COSTS_N_INSNS (3), /* SF->DF convert */
1071 /* Instruction costs on POWER10 processors. */
1073 struct processor_costs power10_cost
= {
1074 COSTS_N_INSNS (2), /* mulsi */
1075 COSTS_N_INSNS (2), /* mulsi_const */
1076 COSTS_N_INSNS (2), /* mulsi_const9 */
1077 COSTS_N_INSNS (2), /* muldi */
1078 COSTS_N_INSNS (6), /* divsi */
1079 COSTS_N_INSNS (6), /* divdi */
1080 COSTS_N_INSNS (2), /* fp */
1081 COSTS_N_INSNS (2), /* dmul */
1082 COSTS_N_INSNS (11), /* sdiv */
1083 COSTS_N_INSNS (13), /* ddiv */
1084 128, /* cache line size */
1087 16, /* prefetch streams */
1088 COSTS_N_INSNS (2), /* SF->DF convert */
1091 /* Instruction costs on Future processors. At the moment, this is a copy of
1092 the power10 costs, but it is expected to change over time.. */
1094 struct processor_costs future_cost
= {
1095 COSTS_N_INSNS (2), /* mulsi */
1096 COSTS_N_INSNS (2), /* mulsi_const */
1097 COSTS_N_INSNS (2), /* mulsi_const9 */
1098 COSTS_N_INSNS (2), /* muldi */
1099 COSTS_N_INSNS (6), /* divsi */
1100 COSTS_N_INSNS (6), /* divdi */
1101 COSTS_N_INSNS (2), /* fp */
1102 COSTS_N_INSNS (2), /* dmul */
1103 COSTS_N_INSNS (11), /* sdiv */
1104 COSTS_N_INSNS (13), /* ddiv */
1105 128, /* cache line size */
1108 16, /* prefetch streams */
1109 COSTS_N_INSNS (2), /* SF->DF convert */
1112 /* Instruction costs on POWER A2 processors. */
1114 struct processor_costs ppca2_cost
= {
1115 COSTS_N_INSNS (16), /* mulsi */
1116 COSTS_N_INSNS (16), /* mulsi_const */
1117 COSTS_N_INSNS (16), /* mulsi_const9 */
1118 COSTS_N_INSNS (16), /* muldi */
1119 COSTS_N_INSNS (22), /* divsi */
1120 COSTS_N_INSNS (28), /* divdi */
1121 COSTS_N_INSNS (3), /* fp */
1122 COSTS_N_INSNS (3), /* dmul */
1123 COSTS_N_INSNS (59), /* sdiv */
1124 COSTS_N_INSNS (72), /* ddiv */
1127 2048, /* l2 cache */
1128 16, /* prefetch streams */
1129 0, /* SF->DF convert */
1132 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1133 static tree (*rs6000_veclib_handler
) (combined_fn
, tree
, tree
);
1136 static bool rs6000_debug_legitimate_address_p (machine_mode
, rtx
, bool);
1137 static tree
rs6000_handle_longcall_attribute (tree
*, tree
, tree
, int, bool *);
1138 static tree
rs6000_handle_altivec_attribute (tree
*, tree
, tree
, int, bool *);
1139 static tree
rs6000_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1140 static tree
rs6000_builtin_vectorized_libmass (combined_fn
, tree
, tree
);
1141 static void rs6000_emit_set_long_const (rtx
, HOST_WIDE_INT
);
1142 static int rs6000_memory_move_cost (machine_mode
, reg_class_t
, bool);
1143 static bool rs6000_debug_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
1144 static int rs6000_debug_address_cost (rtx
, machine_mode
, addr_space_t
,
1146 static int rs6000_debug_adjust_cost (rtx_insn
*, int, rtx_insn
*, int,
1148 static bool is_microcoded_insn (rtx_insn
*);
1149 static bool is_nonpipeline_insn (rtx_insn
*);
1150 static bool is_cracked_insn (rtx_insn
*);
1151 static bool is_load_insn (rtx
, rtx
*);
1152 static bool is_store_insn (rtx
, rtx
*);
1153 static bool set_to_load_agen (rtx_insn
*,rtx_insn
*);
1154 static bool insn_terminates_group_p (rtx_insn
*, enum group_termination
);
1155 static bool insn_must_be_first_in_group (rtx_insn
*);
1156 static bool insn_must_be_last_in_group (rtx_insn
*);
1157 bool easy_vector_constant (rtx
, machine_mode
);
1158 static rtx
rs6000_debug_legitimize_address (rtx
, rtx
, machine_mode
);
1159 static rtx
rs6000_legitimize_tls_address (rtx
, enum tls_model
);
1161 static tree
get_prev_label (tree
);
1163 static bool rs6000_mode_dependent_address (const_rtx
);
1164 static bool rs6000_debug_mode_dependent_address (const_rtx
);
1165 static bool rs6000_offsettable_memref_p (rtx
, machine_mode
, bool);
1166 static enum reg_class
rs6000_secondary_reload_class (enum reg_class
,
1168 static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class
,
1171 static enum reg_class
rs6000_preferred_reload_class (rtx
, enum reg_class
);
1172 static enum reg_class
rs6000_debug_preferred_reload_class (rtx
,
1174 static bool rs6000_debug_secondary_memory_needed (machine_mode
,
1177 static bool rs6000_debug_can_change_mode_class (machine_mode
,
1181 static bool (*rs6000_mode_dependent_address_ptr
) (const_rtx
)
1182 = rs6000_mode_dependent_address
;
1184 enum reg_class (*rs6000_secondary_reload_class_ptr
) (enum reg_class
,
1186 = rs6000_secondary_reload_class
;
1188 enum reg_class (*rs6000_preferred_reload_class_ptr
) (rtx
, enum reg_class
)
1189 = rs6000_preferred_reload_class
;
1191 const int INSN_NOT_AVAILABLE
= -1;
1193 static void rs6000_print_isa_options (FILE *, int, const char *,
1195 static HOST_WIDE_INT
rs6000_disable_incompatible_switches (void);
1197 static enum rs6000_reg_type
register_to_reg_type (rtx
, bool *);
1198 static bool rs6000_secondary_reload_move (enum rs6000_reg_type
,
1199 enum rs6000_reg_type
,
1201 secondary_reload_info
*,
1203 rtl_opt_pass
*make_pass_analyze_swaps (gcc::context
*);
1205 /* Hash table stuff for keeping track of TOC entries. */
1207 struct GTY((for_user
)) toc_hash_struct
1209 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1210 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1212 machine_mode key_mode
;
1216 struct toc_hasher
: ggc_ptr_hash
<toc_hash_struct
>
1218 static hashval_t
hash (toc_hash_struct
*);
1219 static bool equal (toc_hash_struct
*, toc_hash_struct
*);
1222 static GTY (()) hash_table
<toc_hasher
> *toc_hash_table
;
1226 /* Default register names. */
1227 char rs6000_reg_names
[][8] =
1230 "0", "1", "2", "3", "4", "5", "6", "7",
1231 "8", "9", "10", "11", "12", "13", "14", "15",
1232 "16", "17", "18", "19", "20", "21", "22", "23",
1233 "24", "25", "26", "27", "28", "29", "30", "31",
1235 "0", "1", "2", "3", "4", "5", "6", "7",
1236 "8", "9", "10", "11", "12", "13", "14", "15",
1237 "16", "17", "18", "19", "20", "21", "22", "23",
1238 "24", "25", "26", "27", "28", "29", "30", "31",
1240 "0", "1", "2", "3", "4", "5", "6", "7",
1241 "8", "9", "10", "11", "12", "13", "14", "15",
1242 "16", "17", "18", "19", "20", "21", "22", "23",
1243 "24", "25", "26", "27", "28", "29", "30", "31",
1245 "lr", "ctr", "ca", "ap",
1247 "0", "1", "2", "3", "4", "5", "6", "7",
1248 /* vrsave vscr sfp */
1249 "vrsave", "vscr", "sfp",
1251 "0", "1", "2", "3", "4", "5", "6", "7",
1254 #ifdef TARGET_REGNAMES
1255 static const char alt_reg_names
[][8] =
1258 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1259 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1260 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1261 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1263 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1264 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1265 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1266 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1268 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1269 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1270 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1271 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1273 "lr", "ctr", "ca", "ap",
1275 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1276 /* vrsave vscr sfp */
1277 "vrsave", "vscr", "sfp",
1279 "%dmr0", "%dmr1", "%dmr2", "%dmr3", "%dmr4", "%dmr5", "%dmr6", "%dmr7",
1283 /* Table of valid machine attributes. */
1285 static const struct attribute_spec rs6000_attribute_table
[] =
1287 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1288 affects_type_identity, handler, exclude } */
1289 { "altivec", 1, 1, false, true, false, false,
1290 rs6000_handle_altivec_attribute
, NULL
},
1291 { "longcall", 0, 0, false, true, true, false,
1292 rs6000_handle_longcall_attribute
, NULL
},
1293 { "shortcall", 0, 0, false, true, true, false,
1294 rs6000_handle_longcall_attribute
, NULL
},
1295 { "ms_struct", 0, 0, false, false, false, false,
1296 rs6000_handle_struct_attribute
, NULL
},
1297 { "gcc_struct", 0, 0, false, false, false, false,
1298 rs6000_handle_struct_attribute
, NULL
},
1299 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1300 SUBTARGET_ATTRIBUTE_TABLE
,
1302 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
1305 #ifndef TARGET_PROFILE_KERNEL
1306 #define TARGET_PROFILE_KERNEL 0
1309 /* Initialize the GCC target structure. */
1310 #undef TARGET_ATTRIBUTE_TABLE
1311 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1312 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1313 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1314 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1315 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1317 #undef TARGET_ASM_ALIGNED_DI_OP
1318 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1320 /* Default unaligned ops are only provided for ELF. Find the ops needed
1321 for non-ELF systems. */
1322 #ifndef OBJECT_FORMAT_ELF
1324 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1326 #undef TARGET_ASM_UNALIGNED_HI_OP
1327 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1328 #undef TARGET_ASM_UNALIGNED_SI_OP
1329 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1330 #undef TARGET_ASM_UNALIGNED_DI_OP
1331 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1334 #undef TARGET_ASM_UNALIGNED_HI_OP
1335 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1336 #undef TARGET_ASM_UNALIGNED_SI_OP
1337 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1338 #undef TARGET_ASM_UNALIGNED_DI_OP
1339 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1340 #undef TARGET_ASM_ALIGNED_DI_OP
1341 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1345 /* This hook deals with fixups for relocatable code and DI-mode objects
1347 #undef TARGET_ASM_INTEGER
1348 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1350 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1351 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1352 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1355 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1356 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1357 rs6000_print_patchable_function_entry
1359 #undef TARGET_SET_UP_BY_PROLOGUE
1360 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1362 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1363 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1364 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1365 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1366 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1367 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1368 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1369 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1370 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1371 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1372 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1373 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1375 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1376 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1378 #undef TARGET_INTERNAL_ARG_POINTER
1379 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1381 #undef TARGET_HAVE_TLS
1382 #define TARGET_HAVE_TLS HAVE_AS_TLS
1384 #undef TARGET_CANNOT_FORCE_CONST_MEM
1385 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1387 #undef TARGET_DELEGITIMIZE_ADDRESS
1388 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1390 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1391 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1393 #undef TARGET_LEGITIMATE_COMBINED_INSN
1394 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1396 #undef TARGET_ASM_FUNCTION_PROLOGUE
1397 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1398 #undef TARGET_ASM_FUNCTION_EPILOGUE
1399 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1401 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1402 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1404 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1405 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1407 #undef TARGET_LEGITIMIZE_ADDRESS
1408 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1410 #undef TARGET_SCHED_VARIABLE_ISSUE
1411 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1413 #undef TARGET_SCHED_ISSUE_RATE
1414 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1415 #undef TARGET_SCHED_ADJUST_COST
1416 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1417 #undef TARGET_SCHED_ADJUST_PRIORITY
1418 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1419 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1420 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1421 #undef TARGET_SCHED_INIT
1422 #define TARGET_SCHED_INIT rs6000_sched_init
1423 #undef TARGET_SCHED_FINISH
1424 #define TARGET_SCHED_FINISH rs6000_sched_finish
1425 #undef TARGET_SCHED_REORDER
1426 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1427 #undef TARGET_SCHED_REORDER2
1428 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1430 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1431 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1433 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1434 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1436 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1437 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1438 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1439 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1440 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1441 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1442 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1443 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1445 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1446 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1448 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1449 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1450 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1451 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1452 rs6000_builtin_support_vector_misalignment
1453 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1454 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1455 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1456 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1457 rs6000_builtin_vectorization_cost
1458 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1459 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1460 rs6000_preferred_simd_mode
1461 #undef TARGET_VECTORIZE_CREATE_COSTS
1462 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1464 #undef TARGET_LOOP_UNROLL_ADJUST
1465 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1467 #undef TARGET_INIT_BUILTINS
1468 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1469 #undef TARGET_BUILTIN_DECL
1470 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1472 #undef TARGET_FOLD_BUILTIN
1473 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1474 #undef TARGET_GIMPLE_FOLD_BUILTIN
1475 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1477 #undef TARGET_EXPAND_BUILTIN
1478 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1480 #undef TARGET_MANGLE_TYPE
1481 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1483 #undef TARGET_INIT_LIBFUNCS
1484 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1487 #undef TARGET_BINDS_LOCAL_P
1488 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1491 #undef TARGET_MS_BITFIELD_LAYOUT_P
1492 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1494 #undef TARGET_ASM_OUTPUT_MI_THUNK
1495 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1497 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1498 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1500 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1501 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1503 #undef TARGET_REGISTER_MOVE_COST
1504 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1505 #undef TARGET_MEMORY_MOVE_COST
1506 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1507 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1508 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1509 rs6000_ira_change_pseudo_allocno_class
1510 #undef TARGET_CANNOT_COPY_INSN_P
1511 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1512 #undef TARGET_RTX_COSTS
1513 #define TARGET_RTX_COSTS rs6000_rtx_costs
1514 #undef TARGET_ADDRESS_COST
1515 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1516 #undef TARGET_INSN_COST
1517 #define TARGET_INSN_COST rs6000_insn_cost
1519 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1520 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1522 #undef TARGET_PROMOTE_FUNCTION_MODE
1523 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1525 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1526 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1528 #undef TARGET_RETURN_IN_MEMORY
1529 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1531 #undef TARGET_RETURN_IN_MSB
1532 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1534 #undef TARGET_SETUP_INCOMING_VARARGS
1535 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1537 /* Always strict argument naming on rs6000. */
1538 #undef TARGET_STRICT_ARGUMENT_NAMING
1539 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1540 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1541 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1542 #undef TARGET_SPLIT_COMPLEX_ARG
1543 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1544 #undef TARGET_MUST_PASS_IN_STACK
1545 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1546 #undef TARGET_PASS_BY_REFERENCE
1547 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1548 #undef TARGET_ARG_PARTIAL_BYTES
1549 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1550 #undef TARGET_FUNCTION_ARG_ADVANCE
1551 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1552 #undef TARGET_FUNCTION_ARG
1553 #define TARGET_FUNCTION_ARG rs6000_function_arg
1554 #undef TARGET_FUNCTION_ARG_PADDING
1555 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1556 #undef TARGET_FUNCTION_ARG_BOUNDARY
1557 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1559 #undef TARGET_BUILD_BUILTIN_VA_LIST
1560 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1562 #undef TARGET_EXPAND_BUILTIN_VA_START
1563 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1565 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1566 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1568 #undef TARGET_EH_RETURN_FILTER_MODE
1569 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1571 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1572 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1574 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1575 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1577 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1578 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1579 rs6000_libgcc_floating_mode_supported_p
1581 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1582 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1584 #undef TARGET_FLOATN_MODE
1585 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1587 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1588 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1590 #undef TARGET_MD_ASM_ADJUST
1591 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1593 #undef TARGET_OPTION_OVERRIDE
1594 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1596 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1597 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1598 rs6000_builtin_vectorized_function
1600 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1601 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1602 rs6000_builtin_md_vectorized_function
1604 #undef TARGET_STACK_PROTECT_GUARD
1605 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1608 #undef TARGET_STACK_PROTECT_FAIL
1609 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1613 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1614 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1617 /* Use a 32-bit anchor range. This leads to sequences like:
1619 addis tmp,anchor,high
1622 where tmp itself acts as an anchor, and can be shared between
1623 accesses to the same 64k page. */
1624 #undef TARGET_MIN_ANCHOR_OFFSET
1625 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1626 #undef TARGET_MAX_ANCHOR_OFFSET
1627 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1628 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1629 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1630 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1631 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1633 #undef TARGET_BUILTIN_RECIPROCAL
1634 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1636 #undef TARGET_SECONDARY_RELOAD
1637 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1638 #undef TARGET_SECONDARY_MEMORY_NEEDED
1639 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1640 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1641 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1643 #undef TARGET_LEGITIMATE_ADDRESS_P
1644 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1646 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1647 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1649 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1650 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1652 #undef TARGET_CAN_ELIMINATE
1653 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1655 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1656 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1658 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1659 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1661 #undef TARGET_TRAMPOLINE_INIT
1662 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1664 #undef TARGET_FUNCTION_VALUE
1665 #define TARGET_FUNCTION_VALUE rs6000_function_value
1667 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1668 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1670 #undef TARGET_OPTION_SAVE
1671 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1673 #undef TARGET_OPTION_RESTORE
1674 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1676 #undef TARGET_OPTION_PRINT
1677 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1679 #undef TARGET_CAN_INLINE_P
1680 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1682 #undef TARGET_SET_CURRENT_FUNCTION
1683 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1685 #undef TARGET_LEGITIMATE_CONSTANT_P
1686 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1688 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1689 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1691 #undef TARGET_CAN_USE_DOLOOP_P
1692 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1694 #undef TARGET_PREDICT_DOLOOP_P
1695 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1697 #undef TARGET_HAVE_COUNT_REG_DECR_P
1698 #define TARGET_HAVE_COUNT_REG_DECR_P true
1700 /* 1000000000 is infinite cost in IVOPTs. */
1701 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1702 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1704 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1705 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1707 #undef TARGET_PREFERRED_DOLOOP_MODE
1708 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1710 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1711 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1713 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1714 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1715 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1716 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1717 #undef TARGET_UNWIND_WORD_MODE
1718 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1720 #undef TARGET_OFFLOAD_OPTIONS
1721 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1723 #undef TARGET_C_MODE_FOR_SUFFIX
1724 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1726 #undef TARGET_INVALID_BINARY_OP
1727 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1729 #undef TARGET_OPTAB_SUPPORTED_P
1730 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1732 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1733 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1735 #undef TARGET_COMPARE_VERSION_PRIORITY
1736 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1738 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1739 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1740 rs6000_generate_version_dispatcher_body
1742 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1743 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1744 rs6000_get_function_versions_dispatcher
1746 #undef TARGET_OPTION_FUNCTION_VERSIONS
1747 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1749 #undef TARGET_HARD_REGNO_NREGS
1750 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1751 #undef TARGET_HARD_REGNO_MODE_OK
1752 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1754 #undef TARGET_MODES_TIEABLE_P
1755 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1757 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1758 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1759 rs6000_hard_regno_call_part_clobbered
1761 #undef TARGET_SLOW_UNALIGNED_ACCESS
1762 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1764 #undef TARGET_CAN_CHANGE_MODE_CLASS
1765 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1767 #undef TARGET_CONSTANT_ALIGNMENT
1768 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1770 #undef TARGET_STARTING_FRAME_OFFSET
1771 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1773 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1774 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1776 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1777 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1779 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1780 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1781 rs6000_cannot_substitute_mem_equiv_p
1783 #undef TARGET_INVALID_CONVERSION
1784 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1786 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1787 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1789 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1790 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1793 /* Processor table. */
1796 const char *const name
; /* Canonical processor name. */
1797 const enum processor_type processor
; /* Processor type enum value. */
1798 const HOST_WIDE_INT target_enable
; /* Target flags to enable. */
1801 static struct rs6000_ptt
const processor_target_table
[] =
1803 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1804 #include "rs6000-cpus.def"
1808 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1812 rs6000_cpu_name_lookup (const char *name
)
1818 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
1819 if (! strcmp (name
, processor_target_table
[i
].name
))
1827 /* Return number of consecutive hard regs needed starting at reg REGNO
1828 to hold something of mode MODE.
1829 This is ordinarily the length in words of a value of mode MODE
1830 but can be less for certain modes in special long registers.
1832 POWER and PowerPC GPRs hold 32 bits worth;
1833 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1836 rs6000_hard_regno_nregs_internal (int regno
, machine_mode mode
)
1838 unsigned HOST_WIDE_INT reg_size
;
1840 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1841 128-bit floating point that can go in vector registers, which has VSX
1842 memory addressing. */
1843 if (FP_REGNO_P (regno
))
1844 reg_size
= (VECTOR_MEM_VSX_P (mode
)
1845 || VECTOR_ALIGNMENT_P (mode
)
1847 ? UNITS_PER_VSX_WORD
1848 : UNITS_PER_FP_WORD
);
1850 else if (ALTIVEC_REGNO_P (regno
))
1851 reg_size
= UNITS_PER_ALTIVEC_WORD
;
1853 else if (DMR_REGNO_P (regno
))
1854 reg_size
= UNITS_PER_DMR_WORD
;
1857 reg_size
= UNITS_PER_WORD
;
1859 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
1862 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1865 rs6000_hard_regno_mode_ok_uncached (int regno
, machine_mode mode
)
1867 int last_regno
= regno
+ rs6000_hard_regno_nregs
[mode
][regno
] - 1;
1869 if (COMPLEX_MODE_P (mode
))
1870 mode
= GET_MODE_INNER (mode
);
1872 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1875 return (TARGET_MMA
&& VSX_REGNO_P (regno
) && (regno
& 1) == 0);
1877 /* On ISA 3.1 (power10), MMA accumulator modes need FPR registers divisible
1880 If dense math is enabled, allow all VSX registers plus the dense math
1881 registers. We need to make sure we don't cross between the boundary of
1882 FPRs and traditional Altiviec registers. */
1885 if (TARGET_MMA
&& !TARGET_DENSE_MATH
)
1886 return (FP_REGNO_P (regno
) && (regno
& 3) == 0);
1888 else if (TARGET_DENSE_MATH
)
1890 if (DMR_REGNO_P (regno
))
1893 if (FP_REGNO_P (regno
))
1894 return ((regno
& 1) == 0 && regno
<= LAST_FPR_REGNO
- 3);
1896 if (ALTIVEC_REGNO_P (regno
))
1897 return ((regno
& 1) == 0 && regno
<= LAST_ALTIVEC_REGNO
- 3);
1904 /* Dense math register modes need DMR registers or VSX registers divisible by
1905 2. We need to make sure we don't cross between the boundary of FPRs and
1906 traditional Altiviec registers. */
1907 if (mode
== TDOmode
)
1909 if (!TARGET_DENSE_MATH
)
1912 if (DMR_REGNO_P (regno
))
1915 if (FP_REGNO_P (regno
))
1916 return ((regno
& 1) == 0 && regno
<= LAST_FPR_REGNO
- 7);
1918 if (ALTIVEC_REGNO_P (regno
))
1919 return ((regno
& 1) == 0 && regno
<= LAST_ALTIVEC_REGNO
- 7);
1924 /* No other types other than XOmode or TDOmode can go in DMRs. */
1925 if (DMR_REGNO_P (regno
))
1928 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1929 register combinations, and use PTImode where we need to deal with quad
1930 word memory operations. Don't allow quad words in the argument or frame
1931 pointer registers, just registers 0..31. */
1932 if (mode
== PTImode
)
1933 return (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1934 && IN_RANGE (last_regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1935 && ((regno
& 1) == 0));
1937 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1938 implementations. Don't allow an item to be split between a FP register
1939 and an Altivec register. Allow TImode in all VSX registers if the user
1941 if (TARGET_VSX
&& VSX_REGNO_P (regno
)
1942 && (VECTOR_MEM_VSX_P (mode
)
1943 || VECTOR_ALIGNMENT_P (mode
)
1944 || reg_addr
[mode
].scalar_in_vmx_p
1946 || (TARGET_VADDUQM
&& mode
== V1TImode
)))
1948 if (FP_REGNO_P (regno
))
1949 return FP_REGNO_P (last_regno
);
1951 if (ALTIVEC_REGNO_P (regno
))
1953 if (GET_MODE_SIZE (mode
) < 16 && !reg_addr
[mode
].scalar_in_vmx_p
)
1956 return ALTIVEC_REGNO_P (last_regno
);
1960 /* The GPRs can hold any mode, but values bigger than one register
1961 cannot go past R31. */
1962 if (INT_REGNO_P (regno
))
1963 return INT_REGNO_P (last_regno
);
1965 /* The float registers (except for VSX vector modes) can only hold floating
1966 modes and DImode. */
1967 if (FP_REGNO_P (regno
))
1969 if (VECTOR_ALIGNMENT_P (mode
))
1972 if (SCALAR_FLOAT_MODE_P (mode
)
1973 && (mode
!= TDmode
|| (regno
% 2) == 0)
1974 && FP_REGNO_P (last_regno
))
1977 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1979 if(GET_MODE_SIZE (mode
) == UNITS_PER_FP_WORD
)
1982 if (TARGET_P8_VECTOR
&& (mode
== SImode
))
1985 if (TARGET_P9_VECTOR
&& (mode
== QImode
|| mode
== HImode
))
1992 /* The CR register can only hold CC modes. */
1993 if (CR_REGNO_P (regno
))
1994 return GET_MODE_CLASS (mode
) == MODE_CC
;
1996 if (CA_REGNO_P (regno
))
1997 return mode
== Pmode
|| mode
== SImode
;
1999 /* AltiVec only in AldyVec registers. */
2000 if (ALTIVEC_REGNO_P (regno
))
2001 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
2002 || mode
== V1TImode
);
2004 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2005 and it must be able to fit within the register set. */
2007 return GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
;
2010 /* Implement TARGET_HARD_REGNO_NREGS. */
2013 rs6000_hard_regno_nregs_hook (unsigned int regno
, machine_mode mode
)
2015 return rs6000_hard_regno_nregs
[mode
][regno
];
2018 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2021 rs6000_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
2023 return rs6000_hard_regno_mode_ok_p
[mode
][regno
];
2026 /* Implement TARGET_MODES_TIEABLE_P.
2028 PTImode cannot tie with other modes because PTImode is restricted to even
2029 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2032 Similarly, don't allow OOmode (vector pair), XOmode (vector quad), or
2033 TDOmode (dmr register) to pair with anything else. Vector pairs are
2034 restricted to even/odd VSX registers. Without dense math, vector quads are
2035 limited to FPR registers divisible by 4. With dense math, vector quads are
2036 limited to even VSX registers or DMR registers.
2038 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2039 128-bit floating point on VSX systems ties with other vectors. */
2042 rs6000_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
2044 if (mode1
== PTImode
|| mode1
== OOmode
|| mode1
== XOmode
2045 || mode1
== TDOmode
|| mode2
== PTImode
|| mode2
== OOmode
2046 || mode2
== XOmode
|| mode2
== TDOmode
)
2047 return mode1
== mode2
;
2049 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1
))
2050 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2
);
2051 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2
))
2054 if (SCALAR_FLOAT_MODE_P (mode1
))
2055 return SCALAR_FLOAT_MODE_P (mode2
);
2056 if (SCALAR_FLOAT_MODE_P (mode2
))
2059 if (GET_MODE_CLASS (mode1
) == MODE_CC
)
2060 return GET_MODE_CLASS (mode2
) == MODE_CC
;
2061 if (GET_MODE_CLASS (mode2
) == MODE_CC
)
2067 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2070 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
2075 && GET_MODE_SIZE (mode
) > 4
2076 && INT_REGNO_P (regno
))
2080 && FP_REGNO_P (regno
)
2081 && GET_MODE_SIZE (mode
) > 8
2082 && !FLOAT128_2REG_P (mode
))
2088 /* Print interesting facts about registers. */
2090 rs6000_debug_reg_print (int first_regno
, int last_regno
, const char *reg_name
)
2094 for (r
= first_regno
; r
<= last_regno
; ++r
)
2096 const char *comma
= "";
2099 if (first_regno
== last_regno
)
2100 fprintf (stderr
, "%s:\t", reg_name
);
2102 fprintf (stderr
, "%s%d:\t", reg_name
, r
- first_regno
);
2105 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2106 if (rs6000_hard_regno_mode_ok_p
[m
][r
] && rs6000_hard_regno_nregs
[m
][r
])
2110 fprintf (stderr
, ",\n\t");
2115 if (rs6000_hard_regno_nregs
[m
][r
] > 1)
2116 len
+= fprintf (stderr
, "%s%s/%d", comma
, GET_MODE_NAME (m
),
2117 rs6000_hard_regno_nregs
[m
][r
]);
2119 len
+= fprintf (stderr
, "%s%s", comma
, GET_MODE_NAME (m
));
2124 if (call_used_or_fixed_reg_p (r
))
2128 fprintf (stderr
, ",\n\t");
2133 len
+= fprintf (stderr
, "%s%s", comma
, "call-used");
2141 fprintf (stderr
, ",\n\t");
2146 len
+= fprintf (stderr
, "%s%s", comma
, "fixed");
2152 fprintf (stderr
, ",\n\t");
2156 len
+= fprintf (stderr
, "%sreg-class = %s", comma
,
2157 reg_class_names
[(int)rs6000_regno_regclass
[r
]]);
2162 fprintf (stderr
, ",\n\t");
2166 fprintf (stderr
, "%sregno = %d\n", comma
, r
);
2171 rs6000_debug_vector_unit (enum rs6000_vector v
)
2177 case VECTOR_NONE
: ret
= "none"; break;
2178 case VECTOR_ALTIVEC
: ret
= "altivec"; break;
2179 case VECTOR_VSX
: ret
= "vsx"; break;
2180 case VECTOR_P8_VECTOR
: ret
= "p8_vector"; break;
2181 default: ret
= "unknown"; break;
2187 /* Inner function printing just the address mask for a particular reload
2189 DEBUG_FUNCTION
char *
2190 rs6000_debug_addr_mask (addr_mask_type mask
, bool keep_spaces
)
2195 if ((mask
& RELOAD_REG_VALID
) != 0)
2197 else if (keep_spaces
)
2200 if ((mask
& RELOAD_REG_MULTIPLE
) != 0)
2202 else if (keep_spaces
)
2205 if ((mask
& RELOAD_REG_INDEXED
) != 0)
2207 else if (keep_spaces
)
2210 if ((mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
2212 else if ((mask
& RELOAD_REG_OFFSET
) != 0)
2214 else if (keep_spaces
)
2217 if ((mask
& RELOAD_REG_PRE_INCDEC
) != 0)
2219 else if (keep_spaces
)
2222 if ((mask
& RELOAD_REG_PRE_MODIFY
) != 0)
2224 else if (keep_spaces
)
2227 if ((mask
& RELOAD_REG_AND_M16
) != 0)
2229 else if (keep_spaces
)
2237 /* Print the address masks in a human readble fashion. */
2239 rs6000_debug_print_mode (ssize_t m
)
2244 fprintf (stderr
, "Mode: %-5s", GET_MODE_NAME (m
));
2245 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2246 fprintf (stderr
, " %s: %s", reload_reg_map
[rc
].name
,
2247 rs6000_debug_addr_mask (reg_addr
[m
].addr_mask
[rc
], true));
2249 if ((reg_addr
[m
].reload_store
!= CODE_FOR_nothing
)
2250 || (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
))
2252 fprintf (stderr
, "%*s Reload=%c%c", spaces
, "",
2253 (reg_addr
[m
].reload_store
!= CODE_FOR_nothing
) ? 's' : '*',
2254 (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
) ? 'l' : '*');
2258 spaces
+= strlen (" Reload=sl");
2260 if (reg_addr
[m
].scalar_in_vmx_p
)
2262 fprintf (stderr
, "%*s Upper=y", spaces
, "");
2266 spaces
+= strlen (" Upper=y");
2268 if (rs6000_vector_unit
[m
] != VECTOR_NONE
2269 || rs6000_vector_mem
[m
] != VECTOR_NONE
)
2271 fprintf (stderr
, "%*s vector: arith=%-10s mem=%s",
2273 rs6000_debug_vector_unit (rs6000_vector_unit
[m
]),
2274 rs6000_debug_vector_unit (rs6000_vector_mem
[m
]));
2277 fputs ("\n", stderr
);
2280 #define DEBUG_FMT_ID "%-32s= "
2281 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2282 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2283 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2285 /* Print various interesting information with -mdebug=reg. */
2287 rs6000_debug_reg_global (void)
2289 static const char *const tf
[2] = { "false", "true" };
2290 const char *nl
= (const char *)0;
2293 char costly_num
[20];
2295 char flags_buffer
[40];
2296 const char *costly_str
;
2297 const char *nop_str
;
2298 const char *trace_str
;
2299 const char *abi_str
;
2300 const char *cmodel_str
;
2301 struct cl_target_option cl_opts
;
2303 /* Modes we want tieable information on. */
2304 static const machine_mode print_tieable_modes
[] = {
2344 /* Virtual regs we are interested in. */
2345 const static struct {
2346 int regno
; /* register number. */
2347 const char *name
; /* register name. */
2348 } virtual_regs
[] = {
2349 { STACK_POINTER_REGNUM
, "stack pointer:" },
2350 { TOC_REGNUM
, "toc: " },
2351 { STATIC_CHAIN_REGNUM
, "static chain: " },
2352 { RS6000_PIC_OFFSET_TABLE_REGNUM
, "pic offset: " },
2353 { HARD_FRAME_POINTER_REGNUM
, "hard frame: " },
2354 { ARG_POINTER_REGNUM
, "arg pointer: " },
2355 { FRAME_POINTER_REGNUM
, "frame pointer:" },
2356 { FIRST_PSEUDO_REGISTER
, "first pseudo: " },
2357 { FIRST_VIRTUAL_REGISTER
, "first virtual:" },
2358 { VIRTUAL_INCOMING_ARGS_REGNUM
, "incoming_args:" },
2359 { VIRTUAL_STACK_VARS_REGNUM
, "stack_vars: " },
2360 { VIRTUAL_STACK_DYNAMIC_REGNUM
, "stack_dynamic:" },
2361 { VIRTUAL_OUTGOING_ARGS_REGNUM
, "outgoing_args:" },
2362 { VIRTUAL_CFA_REGNUM
, "cfa (frame): " },
2363 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM
, "stack boundry:" },
2364 { LAST_VIRTUAL_REGISTER
, "last virtual: " },
2367 fputs ("\nHard register information:\n", stderr
);
2368 rs6000_debug_reg_print (FIRST_GPR_REGNO
, LAST_GPR_REGNO
, "gr");
2369 rs6000_debug_reg_print (FIRST_FPR_REGNO
, LAST_FPR_REGNO
, "fp");
2370 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO
,
2373 rs6000_debug_reg_print (FIRST_DMR_REGNO
, LAST_DMR_REGNO
, "dmr");
2374 rs6000_debug_reg_print (LR_REGNO
, LR_REGNO
, "lr");
2375 rs6000_debug_reg_print (CTR_REGNO
, CTR_REGNO
, "ctr");
2376 rs6000_debug_reg_print (CR0_REGNO
, CR7_REGNO
, "cr");
2377 rs6000_debug_reg_print (CA_REGNO
, CA_REGNO
, "ca");
2378 rs6000_debug_reg_print (VRSAVE_REGNO
, VRSAVE_REGNO
, "vrsave");
2379 rs6000_debug_reg_print (VSCR_REGNO
, VSCR_REGNO
, "vscr");
2381 fputs ("\nVirtual/stack/frame registers:\n", stderr
);
2382 for (v
= 0; v
< ARRAY_SIZE (virtual_regs
); v
++)
2383 fprintf (stderr
, "%s regno = %3d\n", virtual_regs
[v
].name
, virtual_regs
[v
].regno
);
2387 "d reg_class = %s\n"
2388 "v reg_class = %s\n"
2389 "wa reg_class = %s\n"
2390 "we reg_class = %s\n"
2391 "wr reg_class = %s\n"
2392 "wx reg_class = %s\n"
2393 "wA reg_class = %s\n"
2394 "wD reg_class = %s\n"
2396 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_d
]],
2397 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_v
]],
2398 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wa
]],
2399 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_we
]],
2400 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wr
]],
2401 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wx
]],
2402 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wA
]],
2403 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wD
]]);
2406 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2407 rs6000_debug_print_mode (m
);
2409 fputs ("\n", stderr
);
2411 for (m1
= 0; m1
< ARRAY_SIZE (print_tieable_modes
); m1
++)
2413 machine_mode mode1
= print_tieable_modes
[m1
];
2414 bool first_time
= true;
2416 nl
= (const char *)0;
2417 for (m2
= 0; m2
< ARRAY_SIZE (print_tieable_modes
); m2
++)
2419 machine_mode mode2
= print_tieable_modes
[m2
];
2420 if (mode1
!= mode2
&& rs6000_modes_tieable_p (mode1
, mode2
))
2424 fprintf (stderr
, "Tieable modes %s:", GET_MODE_NAME (mode1
));
2429 fprintf (stderr
, " %s", GET_MODE_NAME (mode2
));
2434 fputs ("\n", stderr
);
2440 if (rs6000_recip_control
)
2442 fprintf (stderr
, "\nReciprocal mask = 0x%x\n", rs6000_recip_control
);
2444 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2445 if (rs6000_recip_bits
[m
])
2448 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2450 (RS6000_RECIP_AUTO_RE_P (m
)
2452 : (RS6000_RECIP_HAVE_RE_P (m
) ? "have" : "none")),
2453 (RS6000_RECIP_AUTO_RSQRTE_P (m
)
2455 : (RS6000_RECIP_HAVE_RSQRTE_P (m
) ? "have" : "none")));
2458 fputs ("\n", stderr
);
2461 if (rs6000_cpu_index
>= 0)
2463 const char *name
= processor_target_table
[rs6000_cpu_index
].name
;
2465 = processor_target_table
[rs6000_cpu_index
].target_enable
;
2467 sprintf (flags_buffer
, "-mcpu=%s flags", name
);
2468 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2471 fprintf (stderr
, DEBUG_FMT_S
, "cpu", "<none>");
2473 if (rs6000_tune_index
>= 0)
2475 const char *name
= processor_target_table
[rs6000_tune_index
].name
;
2477 = processor_target_table
[rs6000_tune_index
].target_enable
;
2479 sprintf (flags_buffer
, "-mtune=%s flags", name
);
2480 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2483 fprintf (stderr
, DEBUG_FMT_S
, "tune", "<none>");
2485 cl_target_option_save (&cl_opts
, &global_options
, &global_options_set
);
2486 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags",
2489 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags_explicit",
2490 rs6000_isa_flags_explicit
);
2492 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
2494 fprintf (stderr
, DEBUG_FMT_S
, "--with-cpu default",
2495 OPTION_TARGET_CPU_DEFAULT
? OPTION_TARGET_CPU_DEFAULT
: "<none>");
2497 switch (rs6000_sched_costly_dep
)
2499 case max_dep_latency
:
2500 costly_str
= "max_dep_latency";
2504 costly_str
= "no_dep_costly";
2507 case all_deps_costly
:
2508 costly_str
= "all_deps_costly";
2511 case true_store_to_load_dep_costly
:
2512 costly_str
= "true_store_to_load_dep_costly";
2515 case store_to_load_dep_costly
:
2516 costly_str
= "store_to_load_dep_costly";
2520 costly_str
= costly_num
;
2521 sprintf (costly_num
, "%d", (int)rs6000_sched_costly_dep
);
2525 fprintf (stderr
, DEBUG_FMT_S
, "sched_costly_dep", costly_str
);
2527 switch (rs6000_sched_insert_nops
)
2529 case sched_finish_regroup_exact
:
2530 nop_str
= "sched_finish_regroup_exact";
2533 case sched_finish_pad_groups
:
2534 nop_str
= "sched_finish_pad_groups";
2537 case sched_finish_none
:
2538 nop_str
= "sched_finish_none";
2543 sprintf (nop_num
, "%d", (int)rs6000_sched_insert_nops
);
2547 fprintf (stderr
, DEBUG_FMT_S
, "sched_insert_nops", nop_str
);
2549 switch (rs6000_sdata
)
2556 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "data");
2560 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "sysv");
2564 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "eabi");
2569 switch (rs6000_traceback
)
2571 case traceback_default
: trace_str
= "default"; break;
2572 case traceback_none
: trace_str
= "none"; break;
2573 case traceback_part
: trace_str
= "part"; break;
2574 case traceback_full
: trace_str
= "full"; break;
2575 default: trace_str
= "unknown"; break;
2578 fprintf (stderr
, DEBUG_FMT_S
, "traceback", trace_str
);
2580 switch (rs6000_current_cmodel
)
2582 case CMODEL_SMALL
: cmodel_str
= "small"; break;
2583 case CMODEL_MEDIUM
: cmodel_str
= "medium"; break;
2584 case CMODEL_LARGE
: cmodel_str
= "large"; break;
2585 default: cmodel_str
= "unknown"; break;
2588 fprintf (stderr
, DEBUG_FMT_S
, "cmodel", cmodel_str
);
2590 switch (rs6000_current_abi
)
2592 case ABI_NONE
: abi_str
= "none"; break;
2593 case ABI_AIX
: abi_str
= "aix"; break;
2594 case ABI_ELFv2
: abi_str
= "ELFv2"; break;
2595 case ABI_V4
: abi_str
= "V4"; break;
2596 case ABI_DARWIN
: abi_str
= "darwin"; break;
2597 default: abi_str
= "unknown"; break;
2600 fprintf (stderr
, DEBUG_FMT_S
, "abi", abi_str
);
2602 if (rs6000_altivec_abi
)
2603 fprintf (stderr
, DEBUG_FMT_S
, "altivec_abi", "true");
2605 if (rs6000_aix_extabi
)
2606 fprintf (stderr
, DEBUG_FMT_S
, "AIX vec-extabi", "true");
2608 if (rs6000_darwin64_abi
)
2609 fprintf (stderr
, DEBUG_FMT_S
, "darwin64_abi", "true");
2611 fprintf (stderr
, DEBUG_FMT_S
, "soft_float",
2612 (TARGET_SOFT_FLOAT
? "true" : "false"));
2614 if (TARGET_LINK_STACK
)
2615 fprintf (stderr
, DEBUG_FMT_S
, "link_stack", "true");
2617 if (TARGET_P8_FUSION
)
2621 strcpy (options
, "power8");
2622 if (TARGET_P8_FUSION_SIGN
)
2623 strcat (options
, ", sign");
2625 fprintf (stderr
, DEBUG_FMT_S
, "fusion", options
);
2628 fprintf (stderr
, DEBUG_FMT_S
, "plt-format",
2629 TARGET_SECURE_PLT
? "secure" : "bss");
2630 fprintf (stderr
, DEBUG_FMT_S
, "struct-return",
2631 aix_struct_return
? "aix" : "sysv");
2632 fprintf (stderr
, DEBUG_FMT_S
, "always_hint", tf
[!!rs6000_always_hint
]);
2633 fprintf (stderr
, DEBUG_FMT_S
, "sched_groups", tf
[!!rs6000_sched_groups
]);
2634 fprintf (stderr
, DEBUG_FMT_S
, "align_branch",
2635 tf
[!!rs6000_align_branch_targets
]);
2636 fprintf (stderr
, DEBUG_FMT_D
, "tls_size", rs6000_tls_size
);
2637 fprintf (stderr
, DEBUG_FMT_D
, "long_double_size",
2638 rs6000_long_double_type_size
);
2639 if (rs6000_long_double_type_size
> 64)
2641 fprintf (stderr
, DEBUG_FMT_S
, "long double type",
2642 TARGET_IEEEQUAD
? "IEEE" : "IBM");
2643 fprintf (stderr
, DEBUG_FMT_S
, "default long double type",
2644 TARGET_IEEEQUAD_DEFAULT
? "IEEE" : "IBM");
2646 fprintf (stderr
, DEBUG_FMT_D
, "sched_restricted_insns_priority",
2647 (int)rs6000_sched_restricted_insns_priority
);
2648 fprintf (stderr
, DEBUG_FMT_D
, "Number of standard builtins",
2651 fprintf (stderr
, DEBUG_FMT_D
, "Enable float128 on VSX",
2652 (int)TARGET_FLOAT128_ENABLE_TYPE
);
2655 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit scalar element",
2656 (int)VECTOR_ELEMENT_SCALAR_64BIT
);
2658 if (TARGET_DIRECT_MOVE_128
)
2659 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit mfvsrld element",
2660 (int)VECTOR_ELEMENT_MFVSRLD_64BIT
);
2664 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2665 legitimate address support to figure out the appropriate addressing to
2669 rs6000_setup_reg_addr_masks (void)
2671 ssize_t rc
, reg
, m
, nregs
;
2672 addr_mask_type any_addr_mask
, addr_mask
;
2674 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2676 machine_mode m2
= (machine_mode
) m
;
2677 bool complex_p
= false;
2678 bool small_int_p
= (m2
== QImode
|| m2
== HImode
|| m2
== SImode
);
2681 if (COMPLEX_MODE_P (m2
))
2684 m2
= GET_MODE_INNER (m2
);
2687 msize
= GET_MODE_SIZE (m2
);
2689 /* SDmode is special in that we want to access it only via REG+REG
2690 addressing on power7 and above, since we want to use the LFIWZX and
2691 STFIWZX instructions to load it. */
2692 bool indexed_only_p
= (m
== SDmode
&& TARGET_NO_SDMODE_STACK
);
2695 for (rc
= FIRST_RELOAD_REG_CLASS
; rc
<= LAST_RELOAD_REG_CLASS
; rc
++)
2698 reg
= reload_reg_map
[rc
].reg
;
2700 /* Special case DMR registers. */
2701 if (rc
== RELOAD_REG_DMR
)
2703 if (TARGET_DENSE_MATH
&& (m2
== XOmode
|| m2
== TDOmode
))
2705 addr_mask
= RELOAD_REG_VALID
;
2706 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
2707 any_addr_mask
|= addr_mask
;
2710 reg_addr
[m
].addr_mask
[rc
] = 0;
2715 /* Can mode values go in the GPR/FPR/Altivec registers? */
2716 if (reg
>= 0 && rs6000_hard_regno_mode_ok_p
[m
][reg
])
2718 bool small_int_vsx_p
= (small_int_p
2719 && (rc
== RELOAD_REG_FPR
2720 || rc
== RELOAD_REG_VMX
));
2722 nregs
= rs6000_hard_regno_nregs
[m
][reg
];
2723 addr_mask
|= RELOAD_REG_VALID
;
2725 /* Indicate if the mode takes more than 1 physical register. If
2726 it takes a single register, indicate it can do REG+REG
2727 addressing. Small integers in VSX registers can only do
2728 REG+REG addressing. */
2729 if (small_int_vsx_p
)
2730 addr_mask
|= RELOAD_REG_INDEXED
;
2731 else if (nregs
> 1 || m
== BLKmode
|| complex_p
)
2732 addr_mask
|= RELOAD_REG_MULTIPLE
;
2734 addr_mask
|= RELOAD_REG_INDEXED
;
2736 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2737 addressing. If we allow scalars into Altivec registers,
2738 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2740 For VSX systems, we don't allow update addressing for
2741 DFmode/SFmode if those registers can go in both the
2742 traditional floating point registers and Altivec registers.
2743 The load/store instructions for the Altivec registers do not
2744 have update forms. If we allowed update addressing, it seems
2745 to break IV-OPT code using floating point if the index type is
2746 int instead of long (PR target/81550 and target/84042). */
2749 && (rc
== RELOAD_REG_GPR
|| rc
== RELOAD_REG_FPR
)
2751 && !VECTOR_MODE_P (m2
)
2752 && !VECTOR_ALIGNMENT_P (m2
)
2754 && (m
!= E_DFmode
|| !TARGET_VSX
)
2755 && (m
!= E_SFmode
|| !TARGET_P8_VECTOR
)
2756 && !small_int_vsx_p
)
2758 addr_mask
|= RELOAD_REG_PRE_INCDEC
;
2760 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2761 we don't allow PRE_MODIFY for some multi-register
2766 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2770 if (TARGET_POWERPC64
)
2771 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2776 if (TARGET_HARD_FLOAT
)
2777 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2783 /* GPR and FPR registers can do REG+OFFSET addressing, except
2784 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2785 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2786 if ((addr_mask
!= 0) && !indexed_only_p
2788 && (rc
== RELOAD_REG_GPR
2789 || ((msize
== 8 || m2
== SFmode
)
2790 && (rc
== RELOAD_REG_FPR
2791 || (rc
== RELOAD_REG_VMX
&& TARGET_P9_VECTOR
)))))
2792 addr_mask
|= RELOAD_REG_OFFSET
;
2794 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2795 instructions are enabled. The offset for 128-bit VSX registers is
2796 only 12-bits. While GPRs can handle the full offset range, VSX
2797 registers can only handle the restricted range. */
2798 else if ((addr_mask
!= 0) && !indexed_only_p
2799 && msize
== 16 && TARGET_P9_VECTOR
2800 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2
)
2801 || (m2
== TImode
&& TARGET_VSX
)))
2803 addr_mask
|= RELOAD_REG_OFFSET
;
2804 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2805 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2808 /* Vector pairs can do both indexed and offset loads if the
2809 instructions are enabled, otherwise they can only do offset loads
2810 since it will be broken into two vector moves. Vector quads and
2811 1,024 bit DMR values can only do offset loads. */
2812 else if ((addr_mask
!= 0) && TARGET_MMA
2813 && (m2
== OOmode
|| m2
== XOmode
|| m2
== TDOmode
))
2815 addr_mask
|= RELOAD_REG_OFFSET
;
2816 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2818 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2820 addr_mask
|= RELOAD_REG_INDEXED
;
2824 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2825 addressing on 128-bit types. */
2826 if (rc
== RELOAD_REG_VMX
&& msize
== 16
2827 && (addr_mask
& RELOAD_REG_VALID
) != 0)
2828 addr_mask
|= RELOAD_REG_AND_M16
;
2830 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
2831 any_addr_mask
|= addr_mask
;
2834 reg_addr
[m
].addr_mask
[RELOAD_REG_ANY
] = any_addr_mask
;
2839 /* Initialize the various global tables that are based on register size. */
2841 rs6000_init_hard_regno_mode_ok (bool global_init_p
)
2847 /* Precalculate REGNO_REG_CLASS. */
2848 rs6000_regno_regclass
[0] = GENERAL_REGS
;
2849 for (r
= 1; r
< 32; ++r
)
2850 rs6000_regno_regclass
[r
] = BASE_REGS
;
2852 for (r
= 32; r
< 64; ++r
)
2853 rs6000_regno_regclass
[r
] = FLOAT_REGS
;
2855 for (r
= 64; HARD_REGISTER_NUM_P (r
); ++r
)
2856 rs6000_regno_regclass
[r
] = NO_REGS
;
2858 for (r
= FIRST_ALTIVEC_REGNO
; r
<= LAST_ALTIVEC_REGNO
; ++r
)
2859 rs6000_regno_regclass
[r
] = ALTIVEC_REGS
;
2861 rs6000_regno_regclass
[CR0_REGNO
] = CR0_REGS
;
2862 for (r
= CR1_REGNO
; r
<= CR7_REGNO
; ++r
)
2863 rs6000_regno_regclass
[r
] = CR_REGS
;
2865 for (r
= FIRST_DMR_REGNO
; r
<= LAST_DMR_REGNO
; ++r
)
2866 rs6000_regno_regclass
[r
] = DM_REGS
;
2868 rs6000_regno_regclass
[LR_REGNO
] = LINK_REGS
;
2869 rs6000_regno_regclass
[CTR_REGNO
] = CTR_REGS
;
2870 rs6000_regno_regclass
[CA_REGNO
] = NO_REGS
;
2871 rs6000_regno_regclass
[VRSAVE_REGNO
] = VRSAVE_REGS
;
2872 rs6000_regno_regclass
[VSCR_REGNO
] = VRSAVE_REGS
;
2873 rs6000_regno_regclass
[ARG_POINTER_REGNUM
] = BASE_REGS
;
2874 rs6000_regno_regclass
[FRAME_POINTER_REGNUM
] = BASE_REGS
;
2876 /* Precalculate register class to simpler reload register class. We don't
2877 need all of the register classes that are combinations of different
2878 classes, just the simple ones that have constraint letters. */
2879 for (c
= 0; c
< N_REG_CLASSES
; c
++)
2880 reg_class_to_reg_type
[c
] = NO_REG_TYPE
;
2882 reg_class_to_reg_type
[(int)GENERAL_REGS
] = GPR_REG_TYPE
;
2883 reg_class_to_reg_type
[(int)BASE_REGS
] = GPR_REG_TYPE
;
2884 reg_class_to_reg_type
[(int)VSX_REGS
] = VSX_REG_TYPE
;
2885 reg_class_to_reg_type
[(int)VRSAVE_REGS
] = SPR_REG_TYPE
;
2886 reg_class_to_reg_type
[(int)VSCR_REGS
] = SPR_REG_TYPE
;
2887 reg_class_to_reg_type
[(int)LINK_REGS
] = SPR_REG_TYPE
;
2888 reg_class_to_reg_type
[(int)CTR_REGS
] = SPR_REG_TYPE
;
2889 reg_class_to_reg_type
[(int)LINK_OR_CTR_REGS
] = SPR_REG_TYPE
;
2890 reg_class_to_reg_type
[(int)CR_REGS
] = CR_REG_TYPE
;
2891 reg_class_to_reg_type
[(int)CR0_REGS
] = CR_REG_TYPE
;
2892 reg_class_to_reg_type
[(int)DM_REGS
] = DMR_REG_TYPE
;
2896 reg_class_to_reg_type
[(int)FLOAT_REGS
] = VSX_REG_TYPE
;
2897 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = VSX_REG_TYPE
;
2901 reg_class_to_reg_type
[(int)FLOAT_REGS
] = FPR_REG_TYPE
;
2902 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = ALTIVEC_REG_TYPE
;
2905 /* Precalculate the valid memory formats as well as the vector information,
2906 this must be set up before the rs6000_hard_regno_nregs_internal calls
2908 gcc_assert ((int)VECTOR_NONE
== 0);
2909 memset ((void *) &rs6000_vector_unit
[0], '\0', sizeof (rs6000_vector_unit
));
2910 memset ((void *) &rs6000_vector_mem
[0], '\0', sizeof (rs6000_vector_mem
));
2912 gcc_assert ((int)CODE_FOR_nothing
== 0);
2913 memset ((void *) ®_addr
[0], '\0', sizeof (reg_addr
));
2915 gcc_assert ((int)NO_REGS
== 0);
2916 memset ((void *) &rs6000_constraints
[0], '\0', sizeof (rs6000_constraints
));
2918 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2919 believes it can use native alignment or still uses 128-bit alignment. */
2920 if (TARGET_VSX
&& !TARGET_VSX_ALIGN_128
)
2931 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2932 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2933 if (TARGET_FLOAT128_TYPE
)
2935 rs6000_vector_mem
[KFmode
] = VECTOR_VSX
;
2936 rs6000_vector_align
[KFmode
] = 128;
2938 if (FLOAT128_IEEE_P (TFmode
))
2940 rs6000_vector_mem
[TFmode
] = VECTOR_VSX
;
2941 rs6000_vector_align
[TFmode
] = 128;
2945 /* V2DF mode, VSX only. */
2948 rs6000_vector_unit
[V2DFmode
] = VECTOR_VSX
;
2949 rs6000_vector_mem
[V2DFmode
] = VECTOR_VSX
;
2950 rs6000_vector_align
[V2DFmode
] = align64
;
2953 /* V4SF mode, either VSX or Altivec. */
2956 rs6000_vector_unit
[V4SFmode
] = VECTOR_VSX
;
2957 rs6000_vector_mem
[V4SFmode
] = VECTOR_VSX
;
2958 rs6000_vector_align
[V4SFmode
] = align32
;
2960 else if (TARGET_ALTIVEC
)
2962 rs6000_vector_unit
[V4SFmode
] = VECTOR_ALTIVEC
;
2963 rs6000_vector_mem
[V4SFmode
] = VECTOR_ALTIVEC
;
2964 rs6000_vector_align
[V4SFmode
] = align32
;
2967 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2971 rs6000_vector_unit
[V4SImode
] = VECTOR_ALTIVEC
;
2972 rs6000_vector_unit
[V8HImode
] = VECTOR_ALTIVEC
;
2973 rs6000_vector_unit
[V16QImode
] = VECTOR_ALTIVEC
;
2974 rs6000_vector_align
[V4SImode
] = align32
;
2975 rs6000_vector_align
[V8HImode
] = align32
;
2976 rs6000_vector_align
[V16QImode
] = align32
;
2980 rs6000_vector_mem
[V4SImode
] = VECTOR_VSX
;
2981 rs6000_vector_mem
[V8HImode
] = VECTOR_VSX
;
2982 rs6000_vector_mem
[V16QImode
] = VECTOR_VSX
;
2986 rs6000_vector_mem
[V4SImode
] = VECTOR_ALTIVEC
;
2987 rs6000_vector_mem
[V8HImode
] = VECTOR_ALTIVEC
;
2988 rs6000_vector_mem
[V16QImode
] = VECTOR_ALTIVEC
;
2992 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2993 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2996 rs6000_vector_mem
[V2DImode
] = VECTOR_VSX
;
2997 rs6000_vector_unit
[V2DImode
]
2998 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2999 rs6000_vector_align
[V2DImode
] = align64
;
3001 rs6000_vector_mem
[V1TImode
] = VECTOR_VSX
;
3002 rs6000_vector_unit
[V1TImode
]
3003 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
3004 rs6000_vector_align
[V1TImode
] = 128;
3007 /* DFmode, see if we want to use the VSX unit. Memory is handled
3008 differently, so don't set rs6000_vector_mem. */
3011 rs6000_vector_unit
[DFmode
] = VECTOR_VSX
;
3012 rs6000_vector_align
[DFmode
] = 64;
3015 /* SFmode, see if we want to use the VSX unit. */
3016 if (TARGET_P8_VECTOR
)
3018 rs6000_vector_unit
[SFmode
] = VECTOR_VSX
;
3019 rs6000_vector_align
[SFmode
] = 32;
3022 /* Allow TImode in VSX register and set the VSX memory macros. */
3025 rs6000_vector_mem
[TImode
] = VECTOR_VSX
;
3026 rs6000_vector_align
[TImode
] = align64
;
3029 /* Add support for vector pairs and vector quad registers. */
3032 rs6000_vector_unit
[OOmode
] = VECTOR_NONE
;
3033 rs6000_vector_mem
[OOmode
] = VECTOR_VSX
;
3034 rs6000_vector_align
[OOmode
] = 256;
3036 rs6000_vector_unit
[XOmode
] = VECTOR_NONE
;
3037 rs6000_vector_mem
[XOmode
] = VECTOR_VSX
;
3038 rs6000_vector_align
[XOmode
] = 512;
3041 /* Add support for 1,024 bit DMR registers. */
3042 if (TARGET_DENSE_MATH
)
3044 rs6000_vector_unit
[TDOmode
] = VECTOR_NONE
;
3045 rs6000_vector_mem
[TDOmode
] = VECTOR_VSX
;
3046 rs6000_vector_align
[TDOmode
] = 512;
3049 /* Register class constraints for the constraints that depend on compile
3050 switches. When the VSX code was added, different constraints were added
3051 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3052 of the VSX registers are used. The register classes for scalar floating
3053 point types is set, based on whether we allow that type into the upper
3054 (Altivec) registers. GCC has register classes to target the Altivec
3055 registers for load/store operations, to select using a VSX memory
3056 operation instead of the traditional floating point operation. The
3059 d - Register class to use with traditional DFmode instructions.
3060 v - Altivec register.
3061 wa - Any VSX register.
3062 wc - Reserved to represent individual CR bits (used in LLVM).
3063 wn - always NO_REGS.
3064 wr - GPR if 64-bit mode is permitted.
3065 wx - Float register if we can do 32-bit int stores. */
3067 if (TARGET_HARD_FLOAT
)
3068 rs6000_constraints
[RS6000_CONSTRAINT_d
] = FLOAT_REGS
;
3070 rs6000_constraints
[RS6000_CONSTRAINT_v
] = ALTIVEC_REGS
;
3072 rs6000_constraints
[RS6000_CONSTRAINT_wa
] = VSX_REGS
;
3074 if (TARGET_POWERPC64
)
3076 rs6000_constraints
[RS6000_CONSTRAINT_wr
] = GENERAL_REGS
;
3077 rs6000_constraints
[RS6000_CONSTRAINT_wA
] = BASE_REGS
;
3081 rs6000_constraints
[RS6000_CONSTRAINT_wx
] = FLOAT_REGS
; /* DImode */
3083 /* Support for new direct moves (ISA 3.0 + 64bit). */
3084 if (TARGET_DIRECT_MOVE_128
)
3085 rs6000_constraints
[RS6000_CONSTRAINT_we
] = VSX_REGS
;
3087 /* Support for the accumulator registers, either FPR registers (aka original
3088 mma) or DMR registers (dense math). */
3089 if (TARGET_DENSE_MATH
)
3090 rs6000_constraints
[RS6000_CONSTRAINT_wD
] = DM_REGS
;
3091 else if (TARGET_MMA
)
3092 rs6000_constraints
[RS6000_CONSTRAINT_wD
] = FLOAT_REGS
;
3094 /* Set up the reload helper and direct move functions. */
3095 if (TARGET_VSX
|| TARGET_ALTIVEC
)
3099 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_di_store
;
3100 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_di_load
;
3101 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_di_store
;
3102 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_di_load
;
3103 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_di_store
;
3104 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_di_load
;
3105 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_di_store
;
3106 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_di_load
;
3107 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_di_store
;
3108 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_di_load
;
3109 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_di_store
;
3110 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_di_load
;
3111 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_di_store
;
3112 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_di_load
;
3113 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_di_store
;
3114 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_di_load
;
3115 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_di_store
;
3116 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_di_load
;
3117 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_di_store
;
3118 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_di_load
;
3120 if (FLOAT128_VECTOR_P (KFmode
))
3122 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_di_store
;
3123 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_di_load
;
3126 if (FLOAT128_VECTOR_P (TFmode
))
3128 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_di_store
;
3129 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_di_load
;
3132 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3134 if (TARGET_NO_SDMODE_STACK
)
3136 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_di_store
;
3137 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_di_load
;
3142 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_di_store
;
3143 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_di_load
;
3146 if (TARGET_DIRECT_MOVE
&& !TARGET_DIRECT_MOVE_128
)
3148 reg_addr
[TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxti
;
3149 reg_addr
[V1TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv1ti
;
3150 reg_addr
[V2DFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2df
;
3151 reg_addr
[V2DImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2di
;
3152 reg_addr
[V4SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4sf
;
3153 reg_addr
[V4SImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4si
;
3154 reg_addr
[V8HImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv8hi
;
3155 reg_addr
[V16QImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv16qi
;
3156 reg_addr
[SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxsf
;
3158 reg_addr
[TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprti
;
3159 reg_addr
[V1TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv1ti
;
3160 reg_addr
[V2DFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2df
;
3161 reg_addr
[V2DImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2di
;
3162 reg_addr
[V4SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4sf
;
3163 reg_addr
[V4SImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4si
;
3164 reg_addr
[V8HImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv8hi
;
3165 reg_addr
[V16QImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv16qi
;
3166 reg_addr
[SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprsf
;
3168 if (FLOAT128_VECTOR_P (KFmode
))
3170 reg_addr
[KFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxkf
;
3171 reg_addr
[KFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprkf
;
3174 if (FLOAT128_VECTOR_P (TFmode
))
3176 reg_addr
[TFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxtf
;
3177 reg_addr
[TFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprtf
;
3182 reg_addr
[OOmode
].reload_store
= CODE_FOR_reload_oo_di_store
;
3183 reg_addr
[OOmode
].reload_load
= CODE_FOR_reload_oo_di_load
;
3184 reg_addr
[XOmode
].reload_store
= CODE_FOR_reload_xo_di_store
;
3185 reg_addr
[XOmode
].reload_load
= CODE_FOR_reload_xo_di_load
;
3191 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_si_store
;
3192 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_si_load
;
3193 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_si_store
;
3194 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_si_load
;
3195 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_si_store
;
3196 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_si_load
;
3197 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_si_store
;
3198 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_si_load
;
3199 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_si_store
;
3200 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_si_load
;
3201 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_si_store
;
3202 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_si_load
;
3203 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_si_store
;
3204 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_si_load
;
3205 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_si_store
;
3206 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_si_load
;
3207 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_si_store
;
3208 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_si_load
;
3209 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_si_store
;
3210 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_si_load
;
3212 if (FLOAT128_VECTOR_P (KFmode
))
3214 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_si_store
;
3215 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_si_load
;
3218 if (FLOAT128_IEEE_P (TFmode
))
3220 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_si_store
;
3221 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_si_load
;
3224 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3226 if (TARGET_NO_SDMODE_STACK
)
3228 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_si_store
;
3229 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_si_load
;
3234 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_si_store
;
3235 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_si_load
;
3238 if (TARGET_DIRECT_MOVE
)
3240 reg_addr
[DImode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdi
;
3241 reg_addr
[DDmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdd
;
3242 reg_addr
[DFmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdf
;
3246 reg_addr
[DFmode
].scalar_in_vmx_p
= true;
3247 reg_addr
[DImode
].scalar_in_vmx_p
= true;
3249 if (TARGET_P8_VECTOR
)
3251 reg_addr
[SFmode
].scalar_in_vmx_p
= true;
3252 reg_addr
[SImode
].scalar_in_vmx_p
= true;
3254 if (TARGET_P9_VECTOR
)
3256 reg_addr
[HImode
].scalar_in_vmx_p
= true;
3257 reg_addr
[QImode
].scalar_in_vmx_p
= true;
3262 if (TARGET_DENSE_MATH
)
3264 reg_addr
[TDOmode
].reload_load
= CODE_FOR_reload_dmr_from_memory
;
3265 reg_addr
[TDOmode
].reload_store
= CODE_FOR_reload_dmr_to_memory
;
3268 /* Precalculate HARD_REGNO_NREGS. */
3269 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3270 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3271 rs6000_hard_regno_nregs
[m
][r
]
3272 = rs6000_hard_regno_nregs_internal (r
, (machine_mode
) m
);
3274 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3275 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3276 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3277 rs6000_hard_regno_mode_ok_p
[m
][r
]
3278 = rs6000_hard_regno_mode_ok_uncached (r
, (machine_mode
) m
);
3280 /* Precalculate CLASS_MAX_NREGS sizes. */
3281 for (c
= 0; c
< LIM_REG_CLASSES
; ++c
)
3285 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
))
3286 reg_size
= UNITS_PER_VSX_WORD
;
3288 else if (c
== ALTIVEC_REGS
)
3289 reg_size
= UNITS_PER_ALTIVEC_WORD
;
3291 else if (c
== FLOAT_REGS
)
3292 reg_size
= UNITS_PER_FP_WORD
;
3295 reg_size
= UNITS_PER_WORD
;
3297 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3299 machine_mode m2
= (machine_mode
)m
;
3300 int reg_size2
= reg_size
;
3302 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3304 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
) && FLOAT128_2REG_P (m
))
3305 reg_size2
= UNITS_PER_FP_WORD
;
3307 rs6000_class_max_nregs
[m
][c
]
3308 = (GET_MODE_SIZE (m2
) + reg_size2
- 1) / reg_size2
;
3312 /* Calculate which modes to automatically generate code to use a the
3313 reciprocal divide and square root instructions. In the future, possibly
3314 automatically generate the instructions even if the user did not specify
3315 -mrecip. The older machines double precision reciprocal sqrt estimate is
3316 not accurate enough. */
3317 memset (rs6000_recip_bits
, 0, sizeof (rs6000_recip_bits
));
3319 rs6000_recip_bits
[SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3321 rs6000_recip_bits
[DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3322 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3323 rs6000_recip_bits
[V4SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3324 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3325 rs6000_recip_bits
[V2DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3327 if (TARGET_FRSQRTES
)
3328 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3330 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3331 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3332 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3333 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3334 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3336 if (rs6000_recip_control
)
3338 if (!flag_finite_math_only
)
3339 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3341 if (flag_trapping_math
)
3342 warning (0, "%qs requires %qs or %qs", "-mrecip",
3343 "-fno-trapping-math", "-ffast-math");
3344 if (!flag_reciprocal_math
)
3345 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3347 if (flag_finite_math_only
&& !flag_trapping_math
&& flag_reciprocal_math
)
3349 if (RS6000_RECIP_HAVE_RE_P (SFmode
)
3350 && (rs6000_recip_control
& RECIP_SF_DIV
) != 0)
3351 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3353 if (RS6000_RECIP_HAVE_RE_P (DFmode
)
3354 && (rs6000_recip_control
& RECIP_DF_DIV
) != 0)
3355 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3357 if (RS6000_RECIP_HAVE_RE_P (V4SFmode
)
3358 && (rs6000_recip_control
& RECIP_V4SF_DIV
) != 0)
3359 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3361 if (RS6000_RECIP_HAVE_RE_P (V2DFmode
)
3362 && (rs6000_recip_control
& RECIP_V2DF_DIV
) != 0)
3363 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3365 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode
)
3366 && (rs6000_recip_control
& RECIP_SF_RSQRT
) != 0)
3367 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3369 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode
)
3370 && (rs6000_recip_control
& RECIP_DF_RSQRT
) != 0)
3371 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3373 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode
)
3374 && (rs6000_recip_control
& RECIP_V4SF_RSQRT
) != 0)
3375 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3377 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode
)
3378 && (rs6000_recip_control
& RECIP_V2DF_RSQRT
) != 0)
3379 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3383 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3384 legitimate address support to figure out the appropriate addressing to
3386 rs6000_setup_reg_addr_masks ();
3388 if (global_init_p
|| TARGET_DEBUG_TARGET
)
3390 if (TARGET_DEBUG_REG
)
3391 rs6000_debug_reg_global ();
3393 if (TARGET_DEBUG_COST
|| TARGET_DEBUG_REG
)
3395 "SImode variable mult cost = %d\n"
3396 "SImode constant mult cost = %d\n"
3397 "SImode short constant mult cost = %d\n"
3398 "DImode multipliciation cost = %d\n"
3399 "SImode division cost = %d\n"
3400 "DImode division cost = %d\n"
3401 "Simple fp operation cost = %d\n"
3402 "DFmode multiplication cost = %d\n"
3403 "SFmode division cost = %d\n"
3404 "DFmode division cost = %d\n"
3405 "cache line size = %d\n"
3406 "l1 cache size = %d\n"
3407 "l2 cache size = %d\n"
3408 "simultaneous prefetches = %d\n"
3411 rs6000_cost
->mulsi_const
,
3412 rs6000_cost
->mulsi_const9
,
3420 rs6000_cost
->cache_line_size
,
3421 rs6000_cost
->l1_cache_size
,
3422 rs6000_cost
->l2_cache_size
,
3423 rs6000_cost
->simultaneous_prefetches
);
3428 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3431 darwin_rs6000_override_options (void)
3433 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3435 rs6000_altivec_abi
= 1;
3436 TARGET_ALTIVEC_VRSAVE
= 1;
3437 rs6000_current_abi
= ABI_DARWIN
;
3439 if (DEFAULT_ABI
== ABI_DARWIN
3441 darwin_one_byte_bool
= 1;
3443 if (TARGET_64BIT
&& ! TARGET_POWERPC64
)
3445 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3446 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3449 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3450 optimisation, and will not work with the most generic case (where the
3451 symbol is undefined external, but there is no symbl stub). */
3453 rs6000_default_long_calls
= 0;
3455 /* ld_classic is (so far) still used for kernel (static) code, and supports
3456 the JBSR longcall / branch islands. */
3459 rs6000_default_long_calls
= 1;
3461 /* Allow a kext author to do -mkernel -mhard-float. */
3462 if (! (rs6000_isa_flags_explicit
& OPTION_MASK_SOFT_FLOAT
))
3463 rs6000_isa_flags
|= OPTION_MASK_SOFT_FLOAT
;
3466 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3468 if (!flag_mkernel
&& !flag_apple_kext
3470 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3471 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3473 /* Unless the user (not the configurer) has explicitly overridden
3474 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3475 G4 unless targeting the kernel. */
3478 && strverscmp (darwin_macosx_version_min
, "10.5") >= 0
3479 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
)
3480 && ! OPTION_SET_P (rs6000_cpu_index
))
3482 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3487 /* If not otherwise specified by a target, make 'long double' equivalent to
3490 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3491 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3494 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3495 to clobber the XER[CA] bit because clobbering that bit without telling
3496 the compiler worked just fine with versions of GCC before GCC 5, and
3497 breaking a lot of older code in ways that are hard to track down is
3498 not such a great idea. */
3501 rs6000_md_asm_adjust (vec
<rtx
> & /*outputs*/, vec
<rtx
> & /*inputs*/,
3502 vec
<machine_mode
> & /*input_modes*/,
3503 vec
<const char *> & /*constraints*/, vec
<rtx
> &clobbers
,
3504 HARD_REG_SET
&clobbered_regs
, location_t
/*loc*/)
3506 clobbers
.safe_push (gen_rtx_REG (SImode
, CA_REGNO
));
3507 SET_HARD_REG_BIT (clobbered_regs
, CA_REGNO
);
3511 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3512 but is called when the optimize level is changed via an attribute or
3513 pragma or when it is reset at the end of the code affected by the
3514 attribute or pragma. It is not called at the beginning of compilation
3515 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3516 actions then, you should have TARGET_OPTION_OVERRIDE call
3517 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3520 rs6000_override_options_after_change (void)
3522 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3523 turns -frename-registers on. */
3524 if ((OPTION_SET_P (flag_unroll_loops
) && flag_unroll_loops
)
3525 || (OPTION_SET_P (flag_unroll_all_loops
)
3526 && flag_unroll_all_loops
))
3528 if (!OPTION_SET_P (unroll_only_small_loops
))
3529 unroll_only_small_loops
= 0;
3530 if (!OPTION_SET_P (flag_rename_registers
))
3531 flag_rename_registers
= 1;
3532 if (!OPTION_SET_P (flag_cunroll_grow_size
))
3533 flag_cunroll_grow_size
= 1;
3535 else if (!OPTION_SET_P (flag_cunroll_grow_size
))
3536 flag_cunroll_grow_size
= flag_peel_loops
|| optimize
>= 3;
3538 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3539 if (rs6000_rop_protect
)
3540 flag_shrink_wrap
= 0;
3543 #ifdef TARGET_USES_LINUX64_OPT
3545 rs6000_linux64_override_options ()
3547 if (!OPTION_SET_P (rs6000_alignment_flags
))
3548 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
3549 if (rs6000_isa_flags
& OPTION_MASK_64BIT
)
3551 if (DEFAULT_ABI
!= ABI_AIX
)
3553 rs6000_current_abi
= ABI_AIX
;
3554 error (INVALID_64BIT
, "call");
3556 dot_symbols
= !strcmp (rs6000_abi_name
, "aixdesc");
3557 if (ELFv2_ABI_CHECK
)
3559 rs6000_current_abi
= ABI_ELFv2
;
3561 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3563 if (rs6000_isa_flags
& OPTION_MASK_RELOCATABLE
)
3565 rs6000_isa_flags
&= ~OPTION_MASK_RELOCATABLE
;
3566 error (INVALID_64BIT
, "relocatable");
3568 if (rs6000_isa_flags
& OPTION_MASK_EABI
)
3570 rs6000_isa_flags
&= ~OPTION_MASK_EABI
;
3571 error (INVALID_64BIT
, "eabi");
3573 if (TARGET_PROTOTYPE
)
3575 target_prototype
= 0;
3576 error (INVALID_64BIT
, "prototype");
3578 if ((rs6000_isa_flags
& OPTION_MASK_POWERPC64
) == 0)
3580 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3581 error ("%<-m64%> requires a PowerPC64 cpu");
3583 if (!OPTION_SET_P (rs6000_current_cmodel
))
3584 SET_CMODEL (CMODEL_MEDIUM
);
3585 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MINIMAL_TOC
) != 0)
3587 if (OPTION_SET_P (rs6000_current_cmodel
)
3588 && rs6000_current_cmodel
!= CMODEL_SMALL
)
3589 error ("%<-mcmodel%> incompatible with other toc options");
3590 if (TARGET_MINIMAL_TOC
)
3591 SET_CMODEL (CMODEL_SMALL
);
3592 else if (TARGET_PCREL
3593 || (PCREL_SUPPORTED_BY_OS
3594 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0))
3595 /* Ignore -mno-minimal-toc. */
3598 SET_CMODEL (CMODEL_SMALL
);
3600 if (rs6000_current_cmodel
!= CMODEL_SMALL
)
3602 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
3603 TARGET_NO_FP_IN_TOC
= rs6000_current_cmodel
== CMODEL_MEDIUM
;
3604 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC
))
3605 TARGET_NO_SUM_IN_TOC
= 0;
3607 if (TARGET_PLTSEQ
&& DEFAULT_ABI
!= ABI_ELFv2
)
3609 if (OPTION_SET_P (rs6000_pltseq
))
3610 warning (0, "%qs unsupported for this ABI",
3612 rs6000_pltseq
= false;
3615 else if (TARGET_64BIT
)
3616 error (INVALID_32BIT
, "32");
3619 if (TARGET_PROFILE_KERNEL
)
3622 error (INVALID_32BIT
, "profile-kernel");
3624 if (OPTION_SET_P (rs6000_current_cmodel
))
3626 SET_CMODEL (CMODEL_SMALL
);
3627 error (INVALID_32BIT
, "cmodel");
3633 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3634 This support is only in little endian GLIBC 2.32 or newer. */
3636 glibc_supports_ieee_128bit (void)
3639 if (OPTION_GLIBC
&& !BYTES_BIG_ENDIAN
3640 && ((TARGET_GLIBC_MAJOR
* 1000) + TARGET_GLIBC_MINOR
) >= 2032)
3642 #endif /* OPTION_GLIBC. */
3647 /* Override command line options.
3649 Combine build-specific configuration information with options
3650 specified on the command line to set various state variables which
3651 influence code generation, optimization, and expansion of built-in
3652 functions. Assure that command-line configuration preferences are
3653 compatible with each other and with the build configuration; issue
3654 warnings while adjusting configuration or error messages while
3655 rejecting configuration.
3657 Upon entry to this function:
3659 This function is called once at the beginning of
3660 compilation, and then again at the start and end of compiling
3661 each section of code that has a different configuration, as
3662 indicated, for example, by adding the
3664 __attribute__((__target__("cpu=power9")))
3666 qualifier to a function definition or, for example, by bracketing
3669 #pragma GCC target("altivec")
3673 #pragma GCC reset_options
3675 directives. Parameter global_init_p is true for the initial
3676 invocation, which initializes global variables, and false for all
3677 subsequent invocations.
3680 Various global state information is assumed to be valid. This
3681 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3682 default CPU specified at build configure time, TARGET_DEFAULT,
3683 representing the default set of option flags for the default
3684 target, and OPTION_SET_P (rs6000_isa_flags), representing
3685 which options were requested on the command line.
3687 Upon return from this function:
3689 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3690 was set by name on the command line. Additionally, if certain
3691 attributes are automatically enabled or disabled by this function
3692 in order to assure compatibility between options and
3693 configuration, the flags associated with those attributes are
3694 also set. By setting these "explicit bits", we avoid the risk
3695 that other code might accidentally overwrite these particular
3696 attributes with "default values".
3698 The various bits of rs6000_isa_flags are set to indicate the
3699 target options that have been selected for the most current
3700 compilation efforts. This has the effect of also turning on the
3701 associated TARGET_XXX values since these are macros which are
3702 generally defined to test the corresponding bit of the
3703 rs6000_isa_flags variable.
3705 Various other global variables and fields of global structures
3706 (over 50 in all) are initialized to reflect the desired options
3707 for the most current compilation efforts. */
3710 rs6000_option_override_internal (bool global_init_p
)
3714 HOST_WIDE_INT set_masks
;
3715 HOST_WIDE_INT ignore_masks
;
3718 struct cl_target_option
*main_target_opt
3719 = ((global_init_p
|| target_option_default_node
== NULL
)
3720 ? NULL
: TREE_TARGET_OPTION (target_option_default_node
));
3722 /* Print defaults. */
3723 if ((TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
) && global_init_p
)
3724 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
3726 /* Remember the explicit arguments. */
3728 rs6000_isa_flags_explicit
= OPTION_SET_P (rs6000_isa_flags
);
3730 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3731 library functions, so warn about it. The flag may be useful for
3732 performance studies from time to time though, so don't disable it
3734 if (OPTION_SET_P (rs6000_alignment_flags
)
3735 && rs6000_alignment_flags
== MASK_ALIGN_POWER
3736 && DEFAULT_ABI
== ABI_DARWIN
3738 warning (0, "%qs is not supported for 64-bit Darwin;"
3739 " it is incompatible with the installed C and C++ libraries",
3742 /* Numerous experiment shows that IRA based loop pressure
3743 calculation works better for RTL loop invariant motion on targets
3744 with enough (>= 32) registers. It is an expensive optimization.
3745 So it is on only for peak performance. */
3746 if (optimize
>= 3 && global_init_p
3747 && !OPTION_SET_P (flag_ira_loop_pressure
))
3748 flag_ira_loop_pressure
= 1;
3750 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3751 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3752 options were already specified. */
3753 if (flag_sanitize
& SANITIZE_USER_ADDRESS
3754 && !OPTION_SET_P (flag_asynchronous_unwind_tables
))
3755 flag_asynchronous_unwind_tables
= 1;
3757 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3758 loop unroller is active. It is only checked during unrolling, so
3759 we can just set it on by default. */
3760 if (!OPTION_SET_P (flag_variable_expansion_in_unroller
))
3761 flag_variable_expansion_in_unroller
= 1;
3763 /* Set the pointer size. */
3766 rs6000_pmode
= DImode
;
3767 rs6000_pointer_size
= 64;
3771 rs6000_pmode
= SImode
;
3772 rs6000_pointer_size
= 32;
3775 /* Some OSs don't support saving Altivec registers. On those OSs, we don't
3776 touch the OPTION_MASK_ALTIVEC settings; if the user wants it, the user
3777 must explicitly specify it and we won't interfere with the user's
3780 set_masks
= POWERPC_MASKS
;
3781 #ifdef OS_MISSING_ALTIVEC
3782 if (OS_MISSING_ALTIVEC
)
3783 set_masks
&= ~(OPTION_MASK_ALTIVEC
| OPTION_MASK_VSX
3784 | OTHER_VSX_VECTOR_MASKS
);
3787 /* Don't override by the processor default if given explicitly. */
3788 set_masks
&= ~rs6000_isa_flags_explicit
;
3790 /* Without option powerpc64 specified explicitly, we need to ensure
3791 powerpc64 always enabled for 64 bit here, otherwise some following
3792 checks can use unexpected TARGET_POWERPC64 value. Meanwhile, we
3793 need to ensure set_masks doesn't have OPTION_MASK_POWERPC64 on,
3794 otherwise later processing can clear it. */
3795 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_POWERPC64
)
3798 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3799 set_masks
&= ~OPTION_MASK_POWERPC64
;
3802 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3803 the cpu in a target attribute or pragma, but did not specify a tuning
3804 option, use the cpu for the tuning option rather than the option specified
3805 with -mtune on the command line. Process a '--with-cpu' configuration
3806 request as an implicit --cpu. */
3807 if (rs6000_cpu_index
>= 0)
3808 cpu_index
= rs6000_cpu_index
;
3809 else if (main_target_opt
!= NULL
&& main_target_opt
->x_rs6000_cpu_index
>= 0)
3810 cpu_index
= main_target_opt
->x_rs6000_cpu_index
;
3811 else if (OPTION_TARGET_CPU_DEFAULT
)
3812 cpu_index
= rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT
);
3814 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3815 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3816 with those from the cpu, except for options that were explicitly set. If
3817 we don't have a cpu, do not override the target bits set in
3821 rs6000_cpu_index
= cpu_index
;
3822 rs6000_isa_flags
&= ~set_masks
;
3823 rs6000_isa_flags
|= (processor_target_table
[cpu_index
].target_enable
3828 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3829 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3830 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3831 to using rs6000_isa_flags, we need to do the initialization here.
3833 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3834 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3835 HOST_WIDE_INT flags
;
3837 flags
= TARGET_DEFAULT
;
3840 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3841 const char *default_cpu
= (!TARGET_POWERPC64
3846 int default_cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
3847 flags
= processor_target_table
[default_cpu_index
].target_enable
;
3849 rs6000_isa_flags
|= (flags
& ~rs6000_isa_flags_explicit
);
3852 /* Don't expect powerpc64 enabled on those OSes with OS_MISSING_POWERPC64,
3853 since they do not save and restore the high half of the GPRs correctly
3854 in all cases. If the user explicitly specifies it, we won't interfere
3855 with the user's specification. */
3856 #ifdef OS_MISSING_POWERPC64
3857 if (OS_MISSING_POWERPC64
3860 && !(rs6000_isa_flags_explicit
& OPTION_MASK_POWERPC64
))
3861 rs6000_isa_flags
&= ~OPTION_MASK_POWERPC64
;
3864 if (rs6000_tune_index
>= 0)
3865 tune_index
= rs6000_tune_index
;
3866 else if (cpu_index
>= 0)
3867 rs6000_tune_index
= tune_index
= cpu_index
;
3871 enum processor_type tune_proc
3872 = (TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
);
3875 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
3876 if (processor_target_table
[i
].processor
== tune_proc
)
3884 rs6000_cpu
= processor_target_table
[cpu_index
].processor
;
3886 rs6000_cpu
= TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
;
3888 gcc_assert (tune_index
>= 0);
3889 rs6000_tune
= processor_target_table
[tune_index
].processor
;
3891 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
3892 || rs6000_cpu
== PROCESSOR_PPCE500MC
|| rs6000_cpu
== PROCESSOR_PPCE500MC64
3893 || rs6000_cpu
== PROCESSOR_PPCE5500
)
3896 error ("AltiVec not supported in this target");
3899 /* If we are optimizing big endian systems for space, use the load/store
3900 multiple instructions. */
3901 if (BYTES_BIG_ENDIAN
&& optimize_size
)
3902 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
;
3904 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3905 because the hardware doesn't support the instructions used in little
3906 endian mode, and causes an alignment trap. The 750 does not cause an
3907 alignment trap (except when the target is unaligned). */
3909 if (!BYTES_BIG_ENDIAN
&& rs6000_cpu
!= PROCESSOR_PPC750
&& TARGET_MULTIPLE
)
3911 rs6000_isa_flags
&= ~OPTION_MASK_MULTIPLE
;
3912 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
) != 0)
3913 warning (0, "%qs is not supported on little endian systems",
3917 /* If little-endian, default to -mstrict-align on older processors.
3918 Testing for direct_move matches power8 and later. */
3919 if (!BYTES_BIG_ENDIAN
3920 && !(processor_target_table
[tune_index
].target_enable
3921 & OPTION_MASK_DIRECT_MOVE
))
3922 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_STRICT_ALIGN
;
3924 /* Add some warnings for VSX. */
3927 const char *msg
= NULL
;
3928 if (!TARGET_HARD_FLOAT
)
3930 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3931 msg
= N_("%<-mvsx%> requires hardware floating point");
3934 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3935 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3938 else if (TARGET_AVOID_XFORM
> 0)
3939 msg
= N_("%<-mvsx%> needs indexed addressing");
3940 else if (!TARGET_ALTIVEC
&& (rs6000_isa_flags_explicit
3941 & OPTION_MASK_ALTIVEC
))
3943 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3944 msg
= N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3946 msg
= N_("%<-mno-altivec%> disables vsx");
3952 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3953 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3957 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3958 the -mcpu setting to enable options that conflict. */
3959 if ((!TARGET_HARD_FLOAT
|| !TARGET_ALTIVEC
|| !TARGET_VSX
)
3960 && (rs6000_isa_flags_explicit
& (OPTION_MASK_SOFT_FLOAT
3961 | OPTION_MASK_ALTIVEC
3962 | OPTION_MASK_VSX
)) != 0)
3963 rs6000_isa_flags
&= ~((OPTION_MASK_P8_VECTOR
| OPTION_MASK_CRYPTO
3964 | OPTION_MASK_DIRECT_MOVE
)
3965 & ~rs6000_isa_flags_explicit
);
3967 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
3968 rs6000_print_isa_options (stderr
, 0, "before defaults", rs6000_isa_flags
);
3970 #ifdef XCOFF_DEBUGGING_INFO
3971 /* For AIX default to 64-bit DWARF. */
3972 if (!OPTION_SET_P (dwarf_offset_size
))
3973 dwarf_offset_size
= POINTER_SIZE_UNITS
;
3976 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3977 off all of the options that depend on those flags. */
3978 ignore_masks
= rs6000_disable_incompatible_switches ();
3980 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3981 unless the user explicitly used the -mno-<option> to disable the code. */
3982 if (TARGET_P9_VECTOR
|| TARGET_MODULO
|| TARGET_P9_MISC
)
3983 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3984 else if (TARGET_P9_MINMAX
)
3988 if (cpu_index
== PROCESSOR_POWER9
)
3990 /* legacy behavior: allow -mcpu=power9 with certain
3991 capabilities explicitly disabled. */
3992 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3995 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3996 "for <xxx> less than power9", "-mcpu");
3998 else if ((ISA_3_0_MASKS_SERVER
& rs6000_isa_flags_explicit
)
3999 != (ISA_3_0_MASKS_SERVER
& rs6000_isa_flags
4000 & rs6000_isa_flags_explicit
))
4001 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4002 were explicitly cleared. */
4003 error ("%qs incompatible with explicitly disabled options",
4006 rs6000_isa_flags
|= ISA_3_0_MASKS_SERVER
;
4008 else if (TARGET_P8_VECTOR
|| TARGET_DIRECT_MOVE
|| TARGET_CRYPTO
)
4009 rs6000_isa_flags
|= (ISA_2_7_MASKS_SERVER
& ~ignore_masks
);
4010 else if (TARGET_VSX
)
4011 rs6000_isa_flags
|= (ISA_2_6_MASKS_SERVER
& ~ignore_masks
);
4012 else if (TARGET_POPCNTD
)
4013 rs6000_isa_flags
|= (ISA_2_6_MASKS_EMBEDDED
& ~ignore_masks
);
4014 else if (TARGET_DFP
)
4015 rs6000_isa_flags
|= (ISA_2_5_MASKS_SERVER
& ~ignore_masks
);
4016 else if (TARGET_CMPB
)
4017 rs6000_isa_flags
|= (ISA_2_5_MASKS_EMBEDDED
& ~ignore_masks
);
4018 else if (TARGET_FPRND
)
4019 rs6000_isa_flags
|= (ISA_2_4_MASKS
& ~ignore_masks
);
4020 else if (TARGET_POPCNTB
)
4021 rs6000_isa_flags
|= (ISA_2_2_MASKS
& ~ignore_masks
);
4022 else if (TARGET_ALTIVEC
)
4023 rs6000_isa_flags
|= (OPTION_MASK_PPC_GFXOPT
& ~ignore_masks
);
4025 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4026 target attribute or pragma which automatically enables both options,
4027 unless the altivec ABI was set. This is set by default for 64-bit, but
4028 not for 32-bit. Don't move this before the above code using ignore_masks,
4029 since it can reset the cleared VSX/ALTIVEC flag again. */
4030 if (main_target_opt
&& !main_target_opt
->x_rs6000_altivec_abi
)
4031 rs6000_isa_flags
&= ~((OPTION_MASK_VSX
| OPTION_MASK_ALTIVEC
)
4032 & ~rs6000_isa_flags_explicit
);
4034 if (TARGET_CRYPTO
&& !TARGET_ALTIVEC
)
4036 if (rs6000_isa_flags_explicit
& OPTION_MASK_CRYPTO
)
4037 error ("%qs requires %qs", "-mcrypto", "-maltivec");
4038 rs6000_isa_flags
&= ~OPTION_MASK_CRYPTO
;
4041 if (!TARGET_FPRND
&& TARGET_VSX
)
4043 if (rs6000_isa_flags_explicit
& OPTION_MASK_FPRND
)
4044 /* TARGET_VSX = 1 implies Power 7 and newer */
4045 error ("%qs requires %qs", "-mvsx", "-mfprnd");
4046 rs6000_isa_flags
&= ~OPTION_MASK_FPRND
;
4049 if (TARGET_DIRECT_MOVE
&& !TARGET_VSX
)
4051 if (rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
4052 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
4053 rs6000_isa_flags
&= ~OPTION_MASK_DIRECT_MOVE
;
4056 if (TARGET_P8_VECTOR
&& !TARGET_ALTIVEC
)
4058 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4059 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
4060 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
4063 if (TARGET_P8_VECTOR
&& !TARGET_VSX
)
4065 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4066 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
))
4067 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
4068 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
) == 0)
4070 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
4071 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
4072 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4076 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4078 rs6000_isa_flags
|= OPTION_MASK_VSX
;
4079 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
4083 if (TARGET_DFP
&& !TARGET_HARD_FLOAT
)
4085 if (rs6000_isa_flags_explicit
& OPTION_MASK_DFP
)
4086 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
4087 rs6000_isa_flags
&= ~OPTION_MASK_DFP
;
4090 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4091 silently turn off quad memory mode. */
4092 if ((TARGET_QUAD_MEMORY
|| TARGET_QUAD_MEMORY_ATOMIC
) && !TARGET_POWERPC64
)
4094 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
4095 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
4097 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) != 0)
4098 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
4100 rs6000_isa_flags
&= ~(OPTION_MASK_QUAD_MEMORY
4101 | OPTION_MASK_QUAD_MEMORY_ATOMIC
);
4104 /* Non-atomic quad memory load/store are disabled for little endian, since
4105 the words are reversed, but atomic operations can still be done by
4106 swapping the words. */
4107 if (TARGET_QUAD_MEMORY
&& !WORDS_BIG_ENDIAN
)
4109 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
4110 warning (0, N_("%<-mquad-memory%> is not available in little endian "
4113 rs6000_isa_flags
&= ~OPTION_MASK_QUAD_MEMORY
;
4116 /* Assume if the user asked for normal quad memory instructions, they want
4117 the atomic versions as well, unless they explicity told us not to use quad
4118 word atomic instructions. */
4119 if (TARGET_QUAD_MEMORY
4120 && !TARGET_QUAD_MEMORY_ATOMIC
4121 && ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) == 0))
4122 rs6000_isa_flags
|= OPTION_MASK_QUAD_MEMORY_ATOMIC
;
4124 /* If we can shrink-wrap the TOC register save separately, then use
4125 -msave-toc-indirect unless explicitly disabled. */
4126 if ((rs6000_isa_flags_explicit
& OPTION_MASK_SAVE_TOC_INDIRECT
) == 0
4127 && flag_shrink_wrap_separate
4128 && optimize_function_for_speed_p (cfun
))
4129 rs6000_isa_flags
|= OPTION_MASK_SAVE_TOC_INDIRECT
;
4131 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4132 generating power8 instructions. Power9 does not optimize power8 fusion
4134 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
))
4136 if (processor_target_table
[tune_index
].processor
== PROCESSOR_POWER8
)
4137 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4139 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4142 /* Setting additional fusion flags turns on base fusion. */
4143 if (!TARGET_P8_FUSION
&& TARGET_P8_FUSION_SIGN
)
4145 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4147 if (TARGET_P8_FUSION_SIGN
)
4148 error ("%qs requires %qs", "-mpower8-fusion-sign",
4151 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4154 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4157 /* Power8 does not fuse sign extended loads with the addis. If we are
4158 optimizing at high levels for speed, convert a sign extended load into a
4159 zero extending load, and an explicit sign extension. */
4160 if (TARGET_P8_FUSION
4161 && !(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION_SIGN
)
4162 && optimize_function_for_speed_p (cfun
)
4164 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION_SIGN
;
4166 /* ISA 3.0 vector instructions include ISA 2.07. */
4167 if (TARGET_P9_VECTOR
&& !TARGET_P8_VECTOR
)
4169 /* We prefer to not mention undocumented options in
4170 error messages. However, if users have managed to select
4171 power9-vector without selecting power8-vector, they
4172 already know about undocumented flags. */
4173 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) &&
4174 (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
))
4175 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4176 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) == 0)
4178 rs6000_isa_flags
&= ~OPTION_MASK_P9_VECTOR
;
4179 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4180 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4184 /* OPTION_MASK_P9_VECTOR is explicit and
4185 OPTION_MASK_P8_VECTOR is not explicit. */
4186 rs6000_isa_flags
|= OPTION_MASK_P8_VECTOR
;
4187 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4191 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4192 support. If we only have ISA 2.06 support, and the user did not specify
4193 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4194 but we don't enable the full vectorization support */
4195 if (TARGET_ALLOW_MOVMISALIGN
== -1 && TARGET_P8_VECTOR
&& TARGET_DIRECT_MOVE
)
4196 TARGET_ALLOW_MOVMISALIGN
= 1;
4198 else if (TARGET_ALLOW_MOVMISALIGN
&& !TARGET_VSX
)
4200 if (TARGET_ALLOW_MOVMISALIGN
> 0
4201 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN
))
4202 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4204 TARGET_ALLOW_MOVMISALIGN
= 0;
4207 /* Determine when unaligned vector accesses are permitted, and when
4208 they are preferred over masked Altivec loads. Note that if
4209 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4210 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4212 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4216 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4217 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4219 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4222 else if (!TARGET_ALLOW_MOVMISALIGN
)
4224 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4225 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4226 "-mallow-movmisalign");
4228 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4232 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
))
4234 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4235 rs6000_isa_flags
|= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4237 rs6000_isa_flags
&= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4240 /* Use long double size to select the appropriate long double. We use
4241 TYPE_PRECISION to differentiate the 3 different long double types. We map
4242 128 into the precision used for TFmode. */
4243 int default_long_double_size
= (RS6000_DEFAULT_LONG_DOUBLE_SIZE
== 64
4245 : FLOAT_PRECISION_TFmode
);
4247 /* Set long double size before the IEEE 128-bit tests. */
4248 if (!OPTION_SET_P (rs6000_long_double_type_size
))
4250 if (main_target_opt
!= NULL
4251 && (main_target_opt
->x_rs6000_long_double_type_size
4252 != default_long_double_size
))
4253 error ("target attribute or pragma changes %<long double%> size");
4255 rs6000_long_double_type_size
= default_long_double_size
;
4257 else if (rs6000_long_double_type_size
== FLOAT_PRECISION_TFmode
)
4258 ; /* The option value can be seen when cl_target_option_restore is called. */
4259 else if (rs6000_long_double_type_size
== 128)
4260 rs6000_long_double_type_size
= FLOAT_PRECISION_TFmode
;
4262 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4263 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4264 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4265 those systems will not pick up this default. Warn if the user changes the
4266 default unless -Wno-psabi. */
4267 if (!OPTION_SET_P (rs6000_ieeequad
))
4268 rs6000_ieeequad
= TARGET_IEEEQUAD_DEFAULT
;
4270 else if (TARGET_LONG_DOUBLE_128
)
4272 if (global_options
.x_rs6000_ieeequad
4273 && (!TARGET_POPCNTD
|| !TARGET_VSX
))
4274 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4276 if (rs6000_ieeequad
!= TARGET_IEEEQUAD_DEFAULT
)
4278 /* Determine if the user can change the default long double type at
4279 compilation time. You need GLIBC 2.32 or newer to be able to
4280 change the long double type. Only issue one warning. */
4281 static bool warned_change_long_double
;
4283 if (!warned_change_long_double
&& !glibc_supports_ieee_128bit ())
4285 warned_change_long_double
= true;
4286 if (TARGET_IEEEQUAD
)
4287 warning (OPT_Wpsabi
, "Using IEEE extended precision "
4290 warning (OPT_Wpsabi
, "Using IBM extended precision "
4296 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4297 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4298 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4299 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4300 the keyword as well as the type. */
4301 TARGET_FLOAT128_TYPE
= TARGET_FLOAT128_ENABLE_TYPE
&& TARGET_VSX
;
4303 /* IEEE 128-bit floating point requires VSX support. */
4304 if (TARGET_FLOAT128_KEYWORD
)
4308 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4309 error ("%qs requires VSX support", "-mfloat128");
4311 TARGET_FLOAT128_TYPE
= 0;
4312 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_KEYWORD
4313 | OPTION_MASK_FLOAT128_HW
);
4315 else if (!TARGET_FLOAT128_TYPE
)
4317 TARGET_FLOAT128_TYPE
= 1;
4318 warning (0, "The %<-mfloat128%> option may not be fully supported");
4322 /* Enable the __float128 keyword under Linux by default. */
4323 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_KEYWORD
4324 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) == 0)
4325 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_KEYWORD
;
4327 /* If we have are supporting the float128 type and full ISA 3.0 support,
4328 enable -mfloat128-hardware by default. However, don't enable the
4329 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4330 because sometimes the compiler wants to put things in an integer
4331 container, and if we don't have __int128 support, it is impossible. */
4332 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_HW
&& TARGET_64BIT
4333 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) == ISA_3_0_MASKS_IEEE
4334 && !(rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
))
4335 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_HW
;
4337 if (TARGET_FLOAT128_HW
4338 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) != ISA_3_0_MASKS_IEEE
)
4340 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4341 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4343 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4346 if (TARGET_FLOAT128_HW
&& !TARGET_64BIT
)
4348 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4349 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4351 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4354 /* Enable -mprefixed by default on power10 systems. */
4355 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) == 0)
4356 rs6000_isa_flags
|= OPTION_MASK_PREFIXED
;
4358 /* -mprefixed requires -mcpu=power10 (or later). */
4359 else if (TARGET_PREFIXED
&& !TARGET_POWER10
)
4361 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) != 0)
4362 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4364 rs6000_isa_flags
&= ~OPTION_MASK_PREFIXED
;
4367 /* -mpcrel requires prefixed load/store addressing. */
4368 if (TARGET_PCREL
&& !TARGET_PREFIXED
)
4370 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4371 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4373 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4376 /* Print the options after updating the defaults. */
4377 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4378 rs6000_print_isa_options (stderr
, 0, "after defaults", rs6000_isa_flags
);
4380 /* E500mc does "better" if we inline more aggressively. Respect the
4381 user's opinion, though. */
4382 if (rs6000_block_move_inline_limit
== 0
4383 && (rs6000_tune
== PROCESSOR_PPCE500MC
4384 || rs6000_tune
== PROCESSOR_PPCE500MC64
4385 || rs6000_tune
== PROCESSOR_PPCE5500
4386 || rs6000_tune
== PROCESSOR_PPCE6500
))
4387 rs6000_block_move_inline_limit
= 128;
4389 /* store_one_arg depends on expand_block_move to handle at least the
4390 size of reg_parm_stack_space. */
4391 if (rs6000_block_move_inline_limit
< (TARGET_POWERPC64
? 64 : 32))
4392 rs6000_block_move_inline_limit
= (TARGET_POWERPC64
? 64 : 32);
4396 /* If the appropriate debug option is enabled, replace the target hooks
4397 with debug versions that call the real version and then prints
4398 debugging information. */
4399 if (TARGET_DEBUG_COST
)
4401 targetm
.rtx_costs
= rs6000_debug_rtx_costs
;
4402 targetm
.address_cost
= rs6000_debug_address_cost
;
4403 targetm
.sched
.adjust_cost
= rs6000_debug_adjust_cost
;
4406 if (TARGET_DEBUG_ADDR
)
4408 targetm
.legitimate_address_p
= rs6000_debug_legitimate_address_p
;
4409 targetm
.legitimize_address
= rs6000_debug_legitimize_address
;
4410 rs6000_secondary_reload_class_ptr
4411 = rs6000_debug_secondary_reload_class
;
4412 targetm
.secondary_memory_needed
4413 = rs6000_debug_secondary_memory_needed
;
4414 targetm
.can_change_mode_class
4415 = rs6000_debug_can_change_mode_class
;
4416 rs6000_preferred_reload_class_ptr
4417 = rs6000_debug_preferred_reload_class
;
4418 rs6000_mode_dependent_address_ptr
4419 = rs6000_debug_mode_dependent_address
;
4422 if (rs6000_veclibabi_name
)
4424 if (strcmp (rs6000_veclibabi_name
, "mass") == 0)
4425 rs6000_veclib_handler
= rs6000_builtin_vectorized_libmass
;
4428 error ("unknown vectorization library ABI type in "
4429 "%<-mveclibabi=%s%>", rs6000_veclibabi_name
);
4435 /* Enable Altivec ABI for AIX -maltivec. */
4437 && (TARGET_ALTIVEC
|| TARGET_VSX
)
4438 && !OPTION_SET_P (rs6000_altivec_abi
))
4440 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4441 error ("target attribute or pragma changes AltiVec ABI");
4443 rs6000_altivec_abi
= 1;
4446 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4447 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4448 be explicitly overridden in either case. */
4451 if (!OPTION_SET_P (rs6000_altivec_abi
)
4452 && (TARGET_64BIT
|| TARGET_ALTIVEC
|| TARGET_VSX
))
4454 if (main_target_opt
!= NULL
&&
4455 !main_target_opt
->x_rs6000_altivec_abi
)
4456 error ("target attribute or pragma changes AltiVec ABI");
4458 rs6000_altivec_abi
= 1;
4462 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4463 So far, the only darwin64 targets are also MACH-O. */
4465 && DEFAULT_ABI
== ABI_DARWIN
4468 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_darwin64_abi
)
4469 error ("target attribute or pragma changes darwin64 ABI");
4472 rs6000_darwin64_abi
= 1;
4473 /* Default to natural alignment, for better performance. */
4474 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
4478 /* Place FP constants in the constant pool instead of TOC
4479 if section anchors enabled. */
4480 if (flag_section_anchors
4481 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
4482 TARGET_NO_FP_IN_TOC
= 1;
4484 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4485 rs6000_print_isa_options (stderr
, 0, "before subtarget", rs6000_isa_flags
);
4487 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4488 SUBTARGET_OVERRIDE_OPTIONS
;
4490 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4491 SUBSUBTARGET_OVERRIDE_OPTIONS
;
4493 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4494 SUB3TARGET_OVERRIDE_OPTIONS
;
4497 /* If the ABI has support for PC-relative relocations, enable it by default.
4498 This test depends on the sub-target tests above setting the code model to
4499 medium for ELF v2 systems. */
4500 if (PCREL_SUPPORTED_BY_OS
4501 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0)
4502 rs6000_isa_flags
|= OPTION_MASK_PCREL
;
4504 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4505 after the subtarget override options are done. */
4506 else if (TARGET_PCREL
&& TARGET_CMODEL
!= CMODEL_MEDIUM
)
4508 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4509 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4511 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4514 /* Enable -mmma by default on power10 systems. */
4515 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) == 0)
4516 rs6000_isa_flags
|= OPTION_MASK_MMA
;
4518 /* Turn off vector pair/mma options on non-power10 systems. */
4519 else if (!TARGET_POWER10
&& TARGET_MMA
)
4521 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4522 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4524 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4527 /* Enable power10 fusion if we are tuning for power10, even if we aren't
4528 generating power10 instructions. */
4529 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P10_FUSION
))
4531 if (rs6000_tune
== PROCESSOR_POWER10
)
4532 rs6000_isa_flags
|= OPTION_MASK_P10_FUSION
;
4534 rs6000_isa_flags
&= ~OPTION_MASK_P10_FUSION
;
4537 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4538 such as "*movoo" uses vector pair access which use VSX registers.
4539 So make MMA require VSX support here. */
4540 if (TARGET_MMA
&& !TARGET_VSX
)
4542 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4543 error ("%qs requires %qs", "-mmma", "-mvsx");
4544 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4547 if (!TARGET_PCREL
&& TARGET_PCREL_OPT
)
4548 rs6000_isa_flags
&= ~OPTION_MASK_PCREL_OPT
;
4550 /* Dense math requires MMA. */
4551 if (TARGET_DENSE_MATH
&& !TARGET_MMA
)
4553 if ((rs6000_isa_flags_explicit
& OPTION_MASK_DENSE_MATH
) != 0)
4554 error ("%qs requires %qs", "-mdense-math", "-mmma");
4555 rs6000_isa_flags
&= ~OPTION_MASK_DENSE_MATH
;
4558 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4559 rs6000_print_isa_options (stderr
, 0, "after subtarget", rs6000_isa_flags
);
4561 rs6000_always_hint
= (rs6000_tune
!= PROCESSOR_POWER4
4562 && rs6000_tune
!= PROCESSOR_POWER5
4563 && rs6000_tune
!= PROCESSOR_POWER6
4564 && rs6000_tune
!= PROCESSOR_POWER7
4565 && rs6000_tune
!= PROCESSOR_POWER8
4566 && rs6000_tune
!= PROCESSOR_POWER9
4567 && rs6000_tune
!= PROCESSOR_POWER10
4568 && rs6000_tune
!= PROCESSOR_FUTURE
4569 && rs6000_tune
!= PROCESSOR_PPCA2
4570 && rs6000_tune
!= PROCESSOR_CELL
4571 && rs6000_tune
!= PROCESSOR_PPC476
);
4572 rs6000_sched_groups
= (rs6000_tune
== PROCESSOR_POWER4
4573 || rs6000_tune
== PROCESSOR_POWER5
4574 || rs6000_tune
== PROCESSOR_POWER7
4575 || rs6000_tune
== PROCESSOR_POWER8
);
4576 rs6000_align_branch_targets
= (rs6000_tune
== PROCESSOR_POWER4
4577 || rs6000_tune
== PROCESSOR_POWER5
4578 || rs6000_tune
== PROCESSOR_POWER6
4579 || rs6000_tune
== PROCESSOR_POWER7
4580 || rs6000_tune
== PROCESSOR_POWER8
4581 || rs6000_tune
== PROCESSOR_POWER9
4582 || rs6000_tune
== PROCESSOR_POWER10
4583 || rs6000_tune
== PROCESSOR_FUTURE
4584 || rs6000_tune
== PROCESSOR_PPCE500MC
4585 || rs6000_tune
== PROCESSOR_PPCE500MC64
4586 || rs6000_tune
== PROCESSOR_PPCE5500
4587 || rs6000_tune
== PROCESSOR_PPCE6500
);
4589 /* Allow debug switches to override the above settings. These are set to -1
4590 in rs6000.opt to indicate the user hasn't directly set the switch. */
4591 if (TARGET_ALWAYS_HINT
>= 0)
4592 rs6000_always_hint
= TARGET_ALWAYS_HINT
;
4594 if (TARGET_SCHED_GROUPS
>= 0)
4595 rs6000_sched_groups
= TARGET_SCHED_GROUPS
;
4597 if (TARGET_ALIGN_BRANCH_TARGETS
>= 0)
4598 rs6000_align_branch_targets
= TARGET_ALIGN_BRANCH_TARGETS
;
4600 rs6000_sched_restricted_insns_priority
4601 = (rs6000_sched_groups
? 1 : 0);
4603 /* Handle -msched-costly-dep option. */
4604 rs6000_sched_costly_dep
4605 = (rs6000_sched_groups
? true_store_to_load_dep_costly
: no_dep_costly
);
4607 if (rs6000_sched_costly_dep_str
)
4609 if (! strcmp (rs6000_sched_costly_dep_str
, "no"))
4610 rs6000_sched_costly_dep
= no_dep_costly
;
4611 else if (! strcmp (rs6000_sched_costly_dep_str
, "all"))
4612 rs6000_sched_costly_dep
= all_deps_costly
;
4613 else if (! strcmp (rs6000_sched_costly_dep_str
, "true_store_to_load"))
4614 rs6000_sched_costly_dep
= true_store_to_load_dep_costly
;
4615 else if (! strcmp (rs6000_sched_costly_dep_str
, "store_to_load"))
4616 rs6000_sched_costly_dep
= store_to_load_dep_costly
;
4618 rs6000_sched_costly_dep
= ((enum rs6000_dependence_cost
)
4619 atoi (rs6000_sched_costly_dep_str
));
4622 /* Handle -minsert-sched-nops option. */
4623 rs6000_sched_insert_nops
4624 = (rs6000_sched_groups
? sched_finish_regroup_exact
: sched_finish_none
);
4626 if (rs6000_sched_insert_nops_str
)
4628 if (! strcmp (rs6000_sched_insert_nops_str
, "no"))
4629 rs6000_sched_insert_nops
= sched_finish_none
;
4630 else if (! strcmp (rs6000_sched_insert_nops_str
, "pad"))
4631 rs6000_sched_insert_nops
= sched_finish_pad_groups
;
4632 else if (! strcmp (rs6000_sched_insert_nops_str
, "regroup_exact"))
4633 rs6000_sched_insert_nops
= sched_finish_regroup_exact
;
4635 rs6000_sched_insert_nops
= ((enum rs6000_nop_insertion
)
4636 atoi (rs6000_sched_insert_nops_str
));
4639 /* Handle stack protector */
4640 if (!OPTION_SET_P (rs6000_stack_protector_guard
))
4641 #ifdef TARGET_THREAD_SSP_OFFSET
4642 rs6000_stack_protector_guard
= SSP_TLS
;
4644 rs6000_stack_protector_guard
= SSP_GLOBAL
;
4647 #ifdef TARGET_THREAD_SSP_OFFSET
4648 rs6000_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
4649 rs6000_stack_protector_guard_reg
= TARGET_64BIT
? 13 : 2;
4652 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str
))
4655 const char *str
= rs6000_stack_protector_guard_offset_str
;
4658 long offset
= strtol (str
, &endp
, 0);
4659 if (!*str
|| *endp
|| errno
)
4660 error ("%qs is not a valid number in %qs", str
,
4661 "-mstack-protector-guard-offset=");
4663 if (!IN_RANGE (offset
, -0x8000, 0x7fff)
4664 || (TARGET_64BIT
&& (offset
& 3)))
4665 error ("%qs is not a valid offset in %qs", str
,
4666 "-mstack-protector-guard-offset=");
4668 rs6000_stack_protector_guard_offset
= offset
;
4671 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str
))
4673 const char *str
= rs6000_stack_protector_guard_reg_str
;
4674 int reg
= decode_reg_name (str
);
4676 if (!IN_RANGE (reg
, 1, 31))
4677 error ("%qs is not a valid base register in %qs", str
,
4678 "-mstack-protector-guard-reg=");
4680 rs6000_stack_protector_guard_reg
= reg
;
4683 if (rs6000_stack_protector_guard
== SSP_TLS
4684 && !IN_RANGE (rs6000_stack_protector_guard_reg
, 1, 31))
4685 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4689 #ifdef TARGET_REGNAMES
4690 /* If the user desires alternate register names, copy in the
4691 alternate names now. */
4692 if (TARGET_REGNAMES
)
4693 memcpy (rs6000_reg_names
, alt_reg_names
, sizeof (rs6000_reg_names
));
4696 /* Set aix_struct_return last, after the ABI is determined.
4697 If -maix-struct-return or -msvr4-struct-return was explicitly
4698 used, don't override with the ABI default. */
4699 if (!OPTION_SET_P (aix_struct_return
))
4700 aix_struct_return
= (DEFAULT_ABI
!= ABI_V4
|| DRAFT_V4_STRUCT_RET
);
4703 /* IBM XL compiler defaults to unsigned bitfields. */
4704 if (TARGET_XL_COMPAT
)
4705 flag_signed_bitfields
= 0;
4708 if (TARGET_LONG_DOUBLE_128
&& !TARGET_IEEEQUAD
)
4709 REAL_MODE_FORMAT (TFmode
) = &ibm_extended_format
;
4711 ASM_GENERATE_INTERNAL_LABEL (toc_label_name
, "LCTOC", 1);
4713 /* We can only guarantee the availability of DI pseudo-ops when
4714 assembling for 64-bit targets. */
4717 targetm
.asm_out
.aligned_op
.di
= NULL
;
4718 targetm
.asm_out
.unaligned_op
.di
= NULL
;
4722 /* Set branch target alignment, if not optimizing for size. */
4725 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4726 aligned 8byte to avoid misprediction by the branch predictor. */
4727 if (rs6000_tune
== PROCESSOR_TITAN
4728 || rs6000_tune
== PROCESSOR_CELL
)
4730 if (flag_align_functions
&& !str_align_functions
)
4731 str_align_functions
= "8";
4732 if (flag_align_jumps
&& !str_align_jumps
)
4733 str_align_jumps
= "8";
4734 if (flag_align_loops
&& !str_align_loops
)
4735 str_align_loops
= "8";
4737 if (rs6000_align_branch_targets
)
4739 if (flag_align_functions
&& !str_align_functions
)
4740 str_align_functions
= "16";
4741 if (flag_align_jumps
&& !str_align_jumps
)
4742 str_align_jumps
= "16";
4743 if (flag_align_loops
&& !str_align_loops
)
4745 can_override_loop_align
= 1;
4746 str_align_loops
= "16";
4751 /* Arrange to save and restore machine status around nested functions. */
4752 init_machine_status
= rs6000_init_machine_status
;
4754 /* We should always be splitting complex arguments, but we can't break
4755 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4756 if (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
4757 targetm
.calls
.split_complex_arg
= NULL
;
4759 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4760 if (DEFAULT_ABI
== ABI_AIX
)
4761 targetm
.calls
.custom_function_descriptors
= 0;
4764 /* Initialize rs6000_cost with the appropriate target costs. */
4766 rs6000_cost
= TARGET_POWERPC64
? &size64_cost
: &size32_cost
;
4768 switch (rs6000_tune
)
4770 case PROCESSOR_RS64A
:
4771 rs6000_cost
= &rs64a_cost
;
4774 case PROCESSOR_MPCCORE
:
4775 rs6000_cost
= &mpccore_cost
;
4778 case PROCESSOR_PPC403
:
4779 rs6000_cost
= &ppc403_cost
;
4782 case PROCESSOR_PPC405
:
4783 rs6000_cost
= &ppc405_cost
;
4786 case PROCESSOR_PPC440
:
4787 rs6000_cost
= &ppc440_cost
;
4790 case PROCESSOR_PPC476
:
4791 rs6000_cost
= &ppc476_cost
;
4794 case PROCESSOR_PPC601
:
4795 rs6000_cost
= &ppc601_cost
;
4798 case PROCESSOR_PPC603
:
4799 rs6000_cost
= &ppc603_cost
;
4802 case PROCESSOR_PPC604
:
4803 rs6000_cost
= &ppc604_cost
;
4806 case PROCESSOR_PPC604e
:
4807 rs6000_cost
= &ppc604e_cost
;
4810 case PROCESSOR_PPC620
:
4811 rs6000_cost
= &ppc620_cost
;
4814 case PROCESSOR_PPC630
:
4815 rs6000_cost
= &ppc630_cost
;
4818 case PROCESSOR_CELL
:
4819 rs6000_cost
= &ppccell_cost
;
4822 case PROCESSOR_PPC750
:
4823 case PROCESSOR_PPC7400
:
4824 rs6000_cost
= &ppc750_cost
;
4827 case PROCESSOR_PPC7450
:
4828 rs6000_cost
= &ppc7450_cost
;
4831 case PROCESSOR_PPC8540
:
4832 case PROCESSOR_PPC8548
:
4833 rs6000_cost
= &ppc8540_cost
;
4836 case PROCESSOR_PPCE300C2
:
4837 case PROCESSOR_PPCE300C3
:
4838 rs6000_cost
= &ppce300c2c3_cost
;
4841 case PROCESSOR_PPCE500MC
:
4842 rs6000_cost
= &ppce500mc_cost
;
4845 case PROCESSOR_PPCE500MC64
:
4846 rs6000_cost
= &ppce500mc64_cost
;
4849 case PROCESSOR_PPCE5500
:
4850 rs6000_cost
= &ppce5500_cost
;
4853 case PROCESSOR_PPCE6500
:
4854 rs6000_cost
= &ppce6500_cost
;
4857 case PROCESSOR_TITAN
:
4858 rs6000_cost
= &titan_cost
;
4861 case PROCESSOR_POWER4
:
4862 case PROCESSOR_POWER5
:
4863 rs6000_cost
= &power4_cost
;
4866 case PROCESSOR_POWER6
:
4867 rs6000_cost
= &power6_cost
;
4870 case PROCESSOR_POWER7
:
4871 rs6000_cost
= &power7_cost
;
4874 case PROCESSOR_POWER8
:
4875 rs6000_cost
= &power8_cost
;
4878 case PROCESSOR_POWER9
:
4879 rs6000_cost
= &power9_cost
;
4882 case PROCESSOR_POWER10
:
4883 rs6000_cost
= &power10_cost
;
4886 case PROCESSOR_FUTURE
:
4887 rs6000_cost
= &future_cost
;
4890 case PROCESSOR_PPCA2
:
4891 rs6000_cost
= &ppca2_cost
;
4900 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4901 param_simultaneous_prefetches
,
4902 rs6000_cost
->simultaneous_prefetches
);
4903 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4904 param_l1_cache_size
,
4905 rs6000_cost
->l1_cache_size
);
4906 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4907 param_l1_cache_line_size
,
4908 rs6000_cost
->cache_line_size
);
4909 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4910 param_l2_cache_size
,
4911 rs6000_cost
->l2_cache_size
);
4913 /* Increase loop peeling limits based on performance analysis. */
4914 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4915 param_max_peeled_insns
, 400);
4916 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4917 param_max_completely_peeled_insns
, 400);
4919 /* The lxvl/stxvl instructions don't perform well before Power10. */
4921 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4922 param_vect_partial_vector_usage
, 1);
4924 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4925 param_vect_partial_vector_usage
, 0);
4927 /* Use the 'model' -fsched-pressure algorithm by default. */
4928 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4929 param_sched_pressure_algorithm
,
4930 SCHED_PRESSURE_MODEL
);
4932 /* If using typedef char *va_list, signal that
4933 __builtin_va_start (&ap, 0) can be optimized to
4934 ap = __builtin_next_arg (0). */
4935 if (DEFAULT_ABI
!= ABI_V4
)
4936 targetm
.expand_builtin_va_start
= NULL
;
4939 rs6000_override_options_after_change ();
4941 /* If not explicitly specified via option, decide whether to generate indexed
4942 load/store instructions. A value of -1 indicates that the
4943 initial value of this variable has not been overwritten. During
4944 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4945 if (TARGET_AVOID_XFORM
== -1)
4946 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4947 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4948 need indexed accesses and the type used is the scalar type of the element
4949 being loaded or stored. */
4950 TARGET_AVOID_XFORM
= (rs6000_tune
== PROCESSOR_POWER6
&& TARGET_CMPB
4951 && !TARGET_ALTIVEC
);
4953 /* Set the -mrecip options. */
4954 if (rs6000_recip_name
)
4956 char *p
= ASTRDUP (rs6000_recip_name
);
4958 unsigned int mask
, i
;
4961 while ((q
= strtok (p
, ",")) != NULL
)
4972 if (!strcmp (q
, "default"))
4973 mask
= ((TARGET_RECIP_PRECISION
)
4974 ? RECIP_HIGH_PRECISION
: RECIP_LOW_PRECISION
);
4977 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4978 if (!strcmp (q
, recip_options
[i
].string
))
4980 mask
= recip_options
[i
].mask
;
4984 if (i
== ARRAY_SIZE (recip_options
))
4986 error ("unknown option for %<%s=%s%>", "-mrecip", q
);
4994 rs6000_recip_control
&= ~mask
;
4996 rs6000_recip_control
|= mask
;
5000 /* Initialize all of the registers. */
5001 rs6000_init_hard_regno_mode_ok (global_init_p
);
5003 /* Save the initial options in case the user does function specific options */
5005 target_option_default_node
= target_option_current_node
5006 = build_target_option_node (&global_options
, &global_options_set
);
5008 /* If not explicitly specified via option, decide whether to generate the
5009 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5010 if (TARGET_LINK_STACK
== -1)
5011 SET_TARGET_LINK_STACK (rs6000_tune
== PROCESSOR_PPC476
&& flag_pic
);
5013 /* Deprecate use of -mno-speculate-indirect-jumps. */
5014 if (!rs6000_speculate_indirect_jumps
)
5015 warning (0, "%qs is deprecated and not recommended in any circumstances",
5016 "-mno-speculate-indirect-jumps");
5021 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5022 define the target cpu type. */
5025 rs6000_option_override (void)
5027 (void) rs6000_option_override_internal (true);
5031 /* Implement LOOP_ALIGN. */
5033 rs6000_loop_align (rtx label
)
5038 /* Don't override loop alignment if -falign-loops was specified. */
5039 if (!can_override_loop_align
)
5042 bb
= BLOCK_FOR_INSN (label
);
5043 ninsns
= num_loop_insns(bb
->loop_father
);
5045 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5046 if (ninsns
> 4 && ninsns
<= 8
5047 && (rs6000_tune
== PROCESSOR_POWER4
5048 || rs6000_tune
== PROCESSOR_POWER5
5049 || rs6000_tune
== PROCESSOR_POWER6
5050 || rs6000_tune
== PROCESSOR_POWER7
5051 || rs6000_tune
== PROCESSOR_POWER8
))
5052 return align_flags (5);
5057 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5058 after applying N number of iterations. This routine does not determine
5059 how may iterations are required to reach desired alignment. */
5062 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
5069 if (rs6000_alignment_flags
== MASK_ALIGN_NATURAL
)
5072 if (rs6000_alignment_flags
== MASK_ALIGN_POWER
)
5082 /* Assuming that all other types are naturally aligned. CHECKME! */
5087 /* Return true if the vector misalignment factor is supported by the
5090 rs6000_builtin_support_vector_misalignment (machine_mode mode
,
5097 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5100 /* Return if movmisalign pattern is not supported for this mode. */
5101 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
5104 if (misalignment
== -1)
5106 /* Misalignment factor is unknown at compile time but we know
5107 it's word aligned. */
5108 if (rs6000_vector_alignment_reachable (type
, is_packed
))
5110 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
5112 if (element_size
== 64 || element_size
== 32)
5119 /* VSX supports word-aligned vector. */
5120 if (misalignment
% 4 == 0)
5126 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5128 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
5129 tree vectype
, int misalign
)
5134 switch (type_of_cost
)
5142 case cond_branch_not_taken
:
5146 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5150 /* Power7 has only one permute unit, make it a bit expensive. */
5151 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5156 case vec_promote_demote
:
5157 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5158 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5163 case cond_branch_taken
:
5166 case unaligned_load
:
5167 case vector_gather_load
:
5168 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5169 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5172 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5174 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5175 /* See PR102767, consider V1TI to keep consistency. */
5176 if (elements
== 2 || elements
== 1)
5177 /* Double word aligned. */
5185 /* Double word aligned. */
5189 /* Unknown misalignment. */
5202 /* Misaligned loads are not supported. */
5205 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5208 case unaligned_store
:
5209 case vector_scatter_store
:
5210 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5213 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5215 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5216 /* See PR102767, consider V1TI to keep consistency. */
5217 if (elements
== 2 || elements
== 1)
5218 /* Double word aligned. */
5226 /* Double word aligned. */
5230 /* Unknown misalignment. */
5243 /* Misaligned stores are not supported. */
5249 /* This is a rough approximation assuming non-constant elements
5250 constructed into a vector via element insertion. FIXME:
5251 vec_construct is not granular enough for uniformly good
5252 decisions. If the initialization is a splat, this is
5253 cheaper than we estimate. Improve this someday. */
5254 elem_type
= TREE_TYPE (vectype
);
5255 /* 32-bit vectors loaded into registers are stored as double
5256 precision, so we need 2 permutes, 2 converts, and 1 merge
5257 to construct a vector of short floats from them. */
5258 if (SCALAR_FLOAT_TYPE_P (elem_type
)
5259 && TYPE_PRECISION (elem_type
) == 32)
5261 /* On POWER9, integer vector types are built up in GPRs and then
5262 use a direct move (2 cycles). For POWER8 this is even worse,
5263 as we need two direct moves and a merge, and the direct moves
5265 else if (INTEGRAL_TYPE_P (elem_type
))
5267 if (TARGET_P9_VECTOR
)
5268 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 2;
5270 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 5;
5273 /* V2DFmode doesn't need a direct move. */
5281 /* Implement targetm.vectorize.preferred_simd_mode. */
5284 rs6000_preferred_simd_mode (scalar_mode mode
)
5286 opt_machine_mode vmode
= mode_for_vector (mode
, 16 / GET_MODE_SIZE (mode
));
5288 if (vmode
.exists () && !VECTOR_MEM_NONE_P (vmode
.require ()))
5289 return vmode
.require ();
5294 class rs6000_cost_data
: public vector_costs
5297 using vector_costs::vector_costs
;
5299 unsigned int add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5300 stmt_vec_info stmt_info
, slp_tree
, tree vectype
,
5302 vect_cost_model_location where
) override
;
5303 void finish_cost (const vector_costs
*) override
;
5306 void update_target_cost_per_stmt (vect_cost_for_stmt
, stmt_vec_info
,
5307 vect_cost_model_location
, unsigned int);
5308 void density_test (loop_vec_info
);
5309 void adjust_vect_cost_per_loop (loop_vec_info
);
5310 unsigned int determine_suggested_unroll_factor (loop_vec_info
);
5312 /* Total number of vectorized stmts (loop only). */
5313 unsigned m_nstmts
= 0;
5314 /* Total number of loads (loop only). */
5315 unsigned m_nloads
= 0;
5316 /* Total number of stores (loop only). */
5317 unsigned m_nstores
= 0;
5318 /* Reduction factor for suggesting unroll factor (loop only). */
5319 unsigned m_reduc_factor
= 0;
5320 /* Possible extra penalized cost on vector construction (loop only). */
5321 unsigned m_extra_ctor_cost
= 0;
5322 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5323 instruction is needed by the vectorization. */
5324 bool m_vect_nonmem
= false;
5325 /* If this loop gets vectorized with emulated gather load. */
5326 bool m_gather_load
= false;
5329 /* Test for likely overcommitment of vector hardware resources. If a
5330 loop iteration is relatively large, and too large a percentage of
5331 instructions in the loop are vectorized, the cost model may not
5332 adequately reflect delays from unavailable vector resources.
5333 Penalize the loop body cost for this case. */
5336 rs6000_cost_data::density_test (loop_vec_info loop_vinfo
)
5338 /* This density test only cares about the cost of vector version of the
5339 loop, so immediately return if we are passed costing for the scalar
5340 version (namely computing single scalar iteration cost). */
5341 if (m_costing_for_scalar
)
5344 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5345 basic_block
*bbs
= get_loop_body (loop
);
5346 int nbbs
= loop
->num_nodes
;
5347 int vec_cost
= m_costs
[vect_body
], not_vec_cost
= 0;
5349 for (int i
= 0; i
< nbbs
; i
++)
5351 basic_block bb
= bbs
[i
];
5352 gimple_stmt_iterator gsi
;
5354 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5356 gimple
*stmt
= gsi_stmt (gsi
);
5357 if (is_gimple_debug (stmt
))
5360 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
5362 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5363 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
5369 int density_pct
= (vec_cost
* 100) / (vec_cost
+ not_vec_cost
);
5371 if (density_pct
> rs6000_density_pct_threshold
5372 && vec_cost
+ not_vec_cost
> rs6000_density_size_threshold
)
5374 m_costs
[vect_body
] = vec_cost
* (100 + rs6000_density_penalty
) / 100;
5375 if (dump_enabled_p ())
5376 dump_printf_loc (MSG_NOTE
, vect_location
,
5377 "density %d%%, cost %d exceeds threshold, penalizing "
5378 "loop body cost by %u%%\n", density_pct
,
5379 vec_cost
+ not_vec_cost
, rs6000_density_penalty
);
5382 /* Check whether we need to penalize the body cost to account
5383 for excess strided or elementwise loads. */
5384 if (m_extra_ctor_cost
> 0)
5386 gcc_assert (m_nloads
<= m_nstmts
);
5387 unsigned int load_pct
= (m_nloads
* 100) / m_nstmts
;
5389 /* It's likely to be bounded by latency and execution resources
5390 from many scalar loads which are strided or elementwise loads
5391 into a vector if both conditions below are found:
5392 1. there are many loads, it's easy to result in a long wait
5394 2. load has a big proportion of all vectorized statements,
5395 it's not easy to schedule other statements to spread among
5397 One typical case is the innermost loop of the hotspot of SPEC2017
5398 503.bwaves_r without loop interchange. */
5399 if (m_nloads
> (unsigned int) rs6000_density_load_num_threshold
5400 && load_pct
> (unsigned int) rs6000_density_load_pct_threshold
)
5402 m_costs
[vect_body
] += m_extra_ctor_cost
;
5403 if (dump_enabled_p ())
5404 dump_printf_loc (MSG_NOTE
, vect_location
,
5405 "Found %u loads and "
5406 "load pct. %u%% exceed "
5408 "penalizing loop body "
5409 "cost by extra cost %u "
5417 /* Implement targetm.vectorize.create_costs. */
5419 static vector_costs
*
5420 rs6000_vectorize_create_costs (vec_info
*vinfo
, bool costing_for_scalar
)
5422 return new rs6000_cost_data (vinfo
, costing_for_scalar
);
5425 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5426 For some statement, we would like to further fine-grain tweak the cost on
5427 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5428 information on statement operation codes etc. One typical case here is
5429 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5430 for scalar cost, but it should be priced more whatever transformed to either
5431 compare + branch or compare + isel instructions. */
5434 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind
,
5435 struct _stmt_vec_info
*stmt_info
)
5437 if (kind
== scalar_stmt
&& stmt_info
&& stmt_info
->stmt
5438 && gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
5440 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
5441 if (subcode
== COND_EXPR
)
5448 /* Helper function for add_stmt_cost. Check each statement cost
5449 entry, gather information and update the target_cost fields
5452 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind
,
5453 stmt_vec_info stmt_info
,
5454 vect_cost_model_location where
,
5455 unsigned int orig_count
)
5458 /* Check whether we're doing something other than just a copy loop.
5459 Not all such loops may be profitably vectorized; see
5460 rs6000_finish_cost. */
5461 if (kind
== vec_to_scalar
5463 || kind
== vec_promote_demote
5464 || kind
== vec_construct
5465 || kind
== scalar_to_vec
5466 || (where
== vect_body
&& kind
== vector_stmt
))
5467 m_vect_nonmem
= true;
5469 /* Gather some information when we are costing the vectorized instruction
5470 for the statements located in a loop body. */
5471 if (!m_costing_for_scalar
5472 && is_a
<loop_vec_info
> (m_vinfo
)
5473 && where
== vect_body
)
5475 m_nstmts
+= orig_count
;
5477 if (kind
== scalar_load
5478 || kind
== vector_load
5479 || kind
== unaligned_load
5480 || kind
== vector_gather_load
)
5482 m_nloads
+= orig_count
;
5483 if (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5484 m_gather_load
= true;
5486 else if (kind
== scalar_store
5487 || kind
== vector_store
5488 || kind
== unaligned_store
5489 || kind
== vector_scatter_store
)
5490 m_nstores
+= orig_count
;
5491 else if ((kind
== scalar_stmt
5492 || kind
== vector_stmt
5493 || kind
== vec_to_scalar
)
5495 && vect_is_reduction (stmt_info
))
5497 /* Loop body contains normal int or fp operations and epilogue
5498 contains vector reduction. For simplicity, we assume int
5499 operation takes one cycle and fp operation takes one more. */
5500 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
5501 bool is_float
= FLOAT_TYPE_P (TREE_TYPE (lhs
));
5502 unsigned int basic_cost
= is_float
? 2 : 1;
5503 m_reduc_factor
= MAX (basic_cost
* orig_count
, m_reduc_factor
);
5506 /* Power processors do not currently have instructions for strided
5507 and elementwise loads, and instead we must generate multiple
5508 scalar loads. This leads to undercounting of the cost. We
5509 account for this by scaling the construction cost by the number
5510 of elements involved, and saving this as extra cost that we may
5511 or may not need to apply. When finalizing the cost of the loop,
5512 the extra penalty is applied when the load density heuristics
5514 if (kind
== vec_construct
&& stmt_info
5515 && STMT_VINFO_TYPE (stmt_info
) == load_vec_info_type
5516 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_ELEMENTWISE
5517 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_STRIDED_SLP
))
5519 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5520 unsigned int nunits
= vect_nunits_for_cost (vectype
);
5521 /* As PR103702 shows, it's possible that vectorizer wants to do
5522 costings for only one unit here, it's no need to do any
5523 penalization for it, so simply early return here. */
5526 /* i386 port adopts nunits * stmt_cost as the penalized cost
5527 for this kind of penalization, we used to follow it but
5528 found it could result in an unreliable body cost especially
5529 for V16QI/V8HI modes. To make it better, we choose this
5530 new heuristic: for each scalar load, we use 2 as penalized
5531 cost for the case with 2 nunits and use 1 for the other
5532 cases. It's without much supporting theory, mainly
5533 concluded from the broad performance evaluations on Power8,
5534 Power9 and Power10. One possibly related point is that:
5535 vector construction for more units would use more insns,
5536 it has more chances to schedule them better (even run in
5537 parallelly when enough available units at that time), so
5538 it seems reasonable not to penalize that much for them. */
5539 unsigned int adjusted_cost
= (nunits
== 2) ? 2 : 1;
5540 unsigned int extra_cost
= nunits
* adjusted_cost
;
5541 m_extra_ctor_cost
+= extra_cost
;
5547 rs6000_cost_data::add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5548 stmt_vec_info stmt_info
, slp_tree
,
5549 tree vectype
, int misalign
,
5550 vect_cost_model_location where
)
5552 unsigned retval
= 0;
5554 if (flag_vect_cost_model
)
5556 int stmt_cost
= rs6000_builtin_vectorization_cost (kind
, vectype
,
5558 stmt_cost
+= rs6000_adjust_vect_cost_per_stmt (kind
, stmt_info
);
5559 /* Statements in an inner loop relative to the loop being
5560 vectorized are weighted more heavily. The value here is
5561 arbitrary and could potentially be improved with analysis. */
5562 unsigned int orig_count
= count
;
5563 retval
= adjust_cost_for_freq (stmt_info
, where
, count
* stmt_cost
);
5564 m_costs
[where
] += retval
;
5566 update_target_cost_per_stmt (kind
, stmt_info
, where
, orig_count
);
5572 /* For some target specific vectorization cost which can't be handled per stmt,
5573 we check the requisite conditions and adjust the vectorization cost
5574 accordingly if satisfied. One typical example is to model shift cost for
5575 vector with length by counting number of required lengths under condition
5576 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5579 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo
)
5581 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
5583 rgroup_controls
*rgc
;
5584 unsigned int num_vectors_m1
;
5585 unsigned int shift_cnt
= 0;
5586 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo
), num_vectors_m1
, rgc
)
5588 /* Each length needs one shift to fill into bits 0-7. */
5589 shift_cnt
+= num_vectors_m1
+ 1;
5591 add_stmt_cost (shift_cnt
, scalar_stmt
, NULL
, NULL
,
5592 NULL_TREE
, 0, vect_body
);
5596 /* Determine suggested unroll factor by considering some below factors:
5598 - unroll option/pragma which can disable unrolling for this loop;
5599 - simple hardware resource model for non memory vector insns;
5600 - aggressive heuristics when iteration count is unknown:
5601 - reduction case to break cross iteration dependency;
5602 - emulated gather load;
5603 - estimated iteration count when iteration count is unknown;
5608 rs6000_cost_data::determine_suggested_unroll_factor (loop_vec_info loop_vinfo
)
5610 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5612 /* Don't unroll if it's specified explicitly not to be unrolled. */
5613 if (loop
->unroll
== 1
5614 || (OPTION_SET_P (flag_unroll_loops
) && !flag_unroll_loops
)
5615 || (OPTION_SET_P (flag_unroll_all_loops
) && !flag_unroll_all_loops
))
5618 unsigned int nstmts_nonldst
= m_nstmts
- m_nloads
- m_nstores
;
5619 /* Don't unroll if no vector instructions excepting for memory access. */
5620 if (nstmts_nonldst
== 0)
5623 /* Consider breaking cross iteration dependency for reduction. */
5624 unsigned int reduc_factor
= m_reduc_factor
> 1 ? m_reduc_factor
: 1;
5626 /* Use this simple hardware resource model that how many non ld/st
5627 vector instructions can be issued per cycle. */
5628 unsigned int issue_width
= rs6000_vect_unroll_issue
;
5629 unsigned int uf
= CEIL (reduc_factor
* issue_width
, nstmts_nonldst
);
5630 uf
= MIN ((unsigned int) rs6000_vect_unroll_limit
, uf
);
5631 /* Make sure it is power of 2. */
5632 uf
= 1 << ceil_log2 (uf
);
5634 /* If the iteration count is known, the costing would be exact enough,
5635 don't worry it could be worse. */
5636 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
5639 /* Inspired by SPEC2017 parest_r, we want to aggressively unroll the
5640 loop if either condition is satisfied:
5641 - reduction factor exceeds the threshold;
5642 - emulated gather load adopted. */
5643 if (reduc_factor
> (unsigned int) rs6000_vect_unroll_reduc_threshold
5647 /* Check if we can conclude it's good to unroll from the estimated
5649 HOST_WIDE_INT est_niter
= get_estimated_loop_iterations_int (loop
);
5650 unsigned int vf
= vect_vf_for_cost (loop_vinfo
);
5651 unsigned int unrolled_vf
= vf
* uf
;
5652 if (est_niter
== -1 || est_niter
< unrolled_vf
)
5653 /* When the estimated iteration of this loop is unknown, it's possible
5654 that we are able to vectorize this loop with the original VF but fail
5655 to vectorize it with the unrolled VF any more if the actual iteration
5656 count is in between. */
5660 unsigned int epil_niter_unr
= est_niter
% unrolled_vf
;
5661 unsigned int epil_niter
= est_niter
% vf
;
5662 /* Even if we have partial vector support, it can be still inefficent
5663 to calculate the length when the iteration count is unknown, so
5664 only expect it's good to unroll when the epilogue iteration count
5665 is not bigger than VF (only one time length calculation). */
5666 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
5667 && epil_niter_unr
<= vf
)
5669 /* Without partial vector support, conservatively unroll this when
5670 the epilogue iteration count is less than the original one
5671 (epilogue execution time wouldn't be longer than before). */
5672 else if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
5673 && epil_niter_unr
<= epil_niter
)
5681 rs6000_cost_data::finish_cost (const vector_costs
*scalar_costs
)
5683 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (m_vinfo
))
5685 adjust_vect_cost_per_loop (loop_vinfo
);
5686 density_test (loop_vinfo
);
5688 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5689 that require versioning for any reason. The vectorization is at
5690 best a wash inside the loop, and the versioning checks make
5691 profitability highly unlikely and potentially quite harmful. */
5693 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
) == 2
5694 && LOOP_REQUIRES_VERSIONING (loop_vinfo
))
5695 m_costs
[vect_body
] += 10000;
5697 m_suggested_unroll_factor
5698 = determine_suggested_unroll_factor (loop_vinfo
);
5701 vector_costs::finish_cost (scalar_costs
);
5704 /* Implement targetm.loop_unroll_adjust. */
5707 rs6000_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
5709 if (unroll_only_small_loops
)
5711 /* TODO: These are hardcoded values right now. We probably should use
5713 if (loop
->ninsns
<= 6)
5714 return MIN (4, nunroll
);
5715 if (loop
->ninsns
<= 10)
5716 return MIN (2, nunroll
);
5724 /* Returns a function decl for a vectorized version of the builtin function
5725 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5726 if it is not available.
5728 Implement targetm.vectorize.builtin_vectorized_function. */
5731 rs6000_builtin_vectorized_function (unsigned int fn
, tree type_out
,
5734 machine_mode in_mode
, out_mode
;
5737 if (TARGET_DEBUG_BUILTIN
)
5738 fprintf (stderr
, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5739 combined_fn_name (combined_fn (fn
)),
5740 GET_MODE_NAME (TYPE_MODE (type_out
)),
5741 GET_MODE_NAME (TYPE_MODE (type_in
)));
5743 /* TODO: Should this be gcc_assert? */
5744 if (TREE_CODE (type_out
) != VECTOR_TYPE
5745 || TREE_CODE (type_in
) != VECTOR_TYPE
)
5748 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5749 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
5750 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5751 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5756 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5757 && out_mode
== DFmode
&& out_n
== 2
5758 && in_mode
== DFmode
&& in_n
== 2)
5759 return rs6000_builtin_decls
[RS6000_BIF_CPSGNDP
];
5760 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5761 && out_mode
== SFmode
&& out_n
== 4
5762 && in_mode
== SFmode
&& in_n
== 4)
5763 return rs6000_builtin_decls
[RS6000_BIF_CPSGNSP
];
5764 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5765 && out_mode
== SFmode
&& out_n
== 4
5766 && in_mode
== SFmode
&& in_n
== 4)
5767 return rs6000_builtin_decls
[RS6000_BIF_COPYSIGN_V4SF
];
5770 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5771 && out_mode
== DFmode
&& out_n
== 2
5772 && in_mode
== DFmode
&& in_n
== 2)
5773 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIP
];
5774 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5775 && out_mode
== SFmode
&& out_n
== 4
5776 && in_mode
== SFmode
&& in_n
== 4)
5777 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIP
];
5778 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5779 && out_mode
== SFmode
&& out_n
== 4
5780 && in_mode
== SFmode
&& in_n
== 4)
5781 return rs6000_builtin_decls
[RS6000_BIF_VRFIP
];
5784 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5785 && out_mode
== DFmode
&& out_n
== 2
5786 && in_mode
== DFmode
&& in_n
== 2)
5787 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIM
];
5788 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5789 && out_mode
== SFmode
&& out_n
== 4
5790 && in_mode
== SFmode
&& in_n
== 4)
5791 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIM
];
5792 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5793 && out_mode
== SFmode
&& out_n
== 4
5794 && in_mode
== SFmode
&& in_n
== 4)
5795 return rs6000_builtin_decls
[RS6000_BIF_VRFIM
];
5798 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5799 && out_mode
== DFmode
&& out_n
== 2
5800 && in_mode
== DFmode
&& in_n
== 2)
5801 return rs6000_builtin_decls
[RS6000_BIF_XVMADDDP
];
5802 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5803 && out_mode
== SFmode
&& out_n
== 4
5804 && in_mode
== SFmode
&& in_n
== 4)
5805 return rs6000_builtin_decls
[RS6000_BIF_XVMADDSP
];
5806 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5807 && out_mode
== SFmode
&& out_n
== 4
5808 && in_mode
== SFmode
&& in_n
== 4)
5809 return rs6000_builtin_decls
[RS6000_BIF_VMADDFP
];
5812 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5813 && out_mode
== DFmode
&& out_n
== 2
5814 && in_mode
== DFmode
&& in_n
== 2)
5815 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIZ
];
5816 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5817 && out_mode
== SFmode
&& out_n
== 4
5818 && in_mode
== SFmode
&& in_n
== 4)
5819 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIZ
];
5820 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5821 && out_mode
== SFmode
&& out_n
== 4
5822 && in_mode
== SFmode
&& in_n
== 4)
5823 return rs6000_builtin_decls
[RS6000_BIF_VRFIZ
];
5826 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5827 && flag_unsafe_math_optimizations
5828 && out_mode
== DFmode
&& out_n
== 2
5829 && in_mode
== DFmode
&& in_n
== 2)
5830 return rs6000_builtin_decls
[RS6000_BIF_XVRDPI
];
5831 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5832 && flag_unsafe_math_optimizations
5833 && out_mode
== SFmode
&& out_n
== 4
5834 && in_mode
== SFmode
&& in_n
== 4)
5835 return rs6000_builtin_decls
[RS6000_BIF_XVRSPI
];
5838 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5839 && !flag_trapping_math
5840 && out_mode
== DFmode
&& out_n
== 2
5841 && in_mode
== DFmode
&& in_n
== 2)
5842 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIC
];
5843 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5844 && !flag_trapping_math
5845 && out_mode
== SFmode
&& out_n
== 4
5846 && in_mode
== SFmode
&& in_n
== 4)
5847 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIC
];
5853 /* Generate calls to libmass if appropriate. */
5854 if (rs6000_veclib_handler
)
5855 return rs6000_veclib_handler (combined_fn (fn
), type_out
, type_in
);
5860 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5861 library with vectorized intrinsics. */
5864 rs6000_builtin_vectorized_libmass (combined_fn fn
, tree type_out
,
5868 const char *suffix
= NULL
;
5869 tree fntype
, new_fndecl
, bdecl
= NULL_TREE
;
5872 machine_mode el_mode
, in_mode
;
5875 /* Libmass is suitable for unsafe math only as it does not correctly support
5876 parts of IEEE with the required precision such as denormals. Only support
5877 it if we have VSX to use the simd d2 or f4 functions.
5878 XXX: Add variable length support. */
5879 if (!flag_unsafe_math_optimizations
|| !TARGET_VSX
)
5882 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5883 n
= TYPE_VECTOR_SUBPARTS (type_out
);
5884 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5885 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5886 if (el_mode
!= in_mode
5922 if (el_mode
== DFmode
&& n
== 2)
5924 bdecl
= mathfn_built_in (double_type_node
, fn
);
5925 suffix
= "d2"; /* pow -> powd2 */
5927 else if (el_mode
== SFmode
&& n
== 4)
5929 bdecl
= mathfn_built_in (float_type_node
, fn
);
5930 suffix
= "4"; /* powf -> powf4 */
5942 gcc_assert (suffix
!= NULL
);
5943 bname
= IDENTIFIER_POINTER (DECL_NAME (bdecl
));
5947 strcpy (name
, bname
+ strlen ("__builtin_"));
5948 strcat (name
, suffix
);
5951 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
5952 else if (n_args
== 2)
5953 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
5957 /* Build a function declaration for the vectorized function. */
5958 new_fndecl
= build_decl (BUILTINS_LOCATION
,
5959 FUNCTION_DECL
, get_identifier (name
), fntype
);
5960 TREE_PUBLIC (new_fndecl
) = 1;
5961 DECL_EXTERNAL (new_fndecl
) = 1;
5962 DECL_IS_NOVOPS (new_fndecl
) = 1;
5963 TREE_READONLY (new_fndecl
) = 1;
5969 /* Default CPU string for rs6000*_file_start functions. */
5970 static const char *rs6000_default_cpu
;
5972 #ifdef USING_ELFOS_H
5973 const char *rs6000_machine
;
5976 rs6000_machine_from_flags (void)
5979 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
)
5981 if (rs6000_cpu
== PROCESSOR_PPC8540
|| rs6000_cpu
== PROCESSOR_PPC8548
)
5983 if (rs6000_cpu
== PROCESSOR_PPCE500MC
)
5985 if (rs6000_cpu
== PROCESSOR_PPCE500MC64
)
5987 if (rs6000_cpu
== PROCESSOR_PPCE5500
)
5989 if (rs6000_cpu
== PROCESSOR_PPCE6500
)
5993 if (rs6000_cpu
== PROCESSOR_PPC403
)
5995 if (rs6000_cpu
== PROCESSOR_PPC405
)
5997 if (rs6000_cpu
== PROCESSOR_PPC440
)
5999 if (rs6000_cpu
== PROCESSOR_PPC476
)
6003 if (rs6000_cpu
== PROCESSOR_PPCA2
)
6007 if (rs6000_cpu
== PROCESSOR_CELL
)
6011 if (rs6000_cpu
== PROCESSOR_TITAN
)
6014 /* 500 series and 800 series */
6015 if (rs6000_cpu
== PROCESSOR_MPCCORE
)
6019 /* This (and ppc64 below) are disabled here (for now at least) because
6020 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
6021 are #define'd as some of these. Untangling that is a job for later. */
6023 /* 600 series and 700 series, "classic" */
6024 if (rs6000_cpu
== PROCESSOR_PPC601
|| rs6000_cpu
== PROCESSOR_PPC603
6025 || rs6000_cpu
== PROCESSOR_PPC604
|| rs6000_cpu
== PROCESSOR_PPC604e
6026 || rs6000_cpu
== PROCESSOR_PPC750
)
6030 /* Classic with AltiVec, "G4" */
6031 if (rs6000_cpu
== PROCESSOR_PPC7400
|| rs6000_cpu
== PROCESSOR_PPC7450
)
6035 /* The older 64-bit CPUs */
6036 if (rs6000_cpu
== PROCESSOR_PPC620
|| rs6000_cpu
== PROCESSOR_PPC630
6037 || rs6000_cpu
== PROCESSOR_RS64A
)
6041 HOST_WIDE_INT flags
= rs6000_isa_flags
;
6043 /* Disable the flags that should never influence the .machine selection. */
6044 flags
&= ~(OPTION_MASK_PPC_GFXOPT
| OPTION_MASK_PPC_GPOPT
| OPTION_MASK_ISEL
);
6046 if ((flags
& (ISA_FUTURE_MASKS
& ~ISA_3_1_MASKS_SERVER
)) != 0)
6048 if ((flags
& (ISA_3_1_MASKS_SERVER
& ~ISA_3_0_MASKS_SERVER
)) != 0)
6050 if ((flags
& (ISA_3_0_MASKS_SERVER
& ~ISA_2_7_MASKS_SERVER
)) != 0)
6052 if ((flags
& (ISA_2_7_MASKS_SERVER
& ~ISA_2_6_MASKS_SERVER
)) != 0)
6054 if ((flags
& (ISA_2_6_MASKS_SERVER
& ~ISA_2_5_MASKS_SERVER
)) != 0)
6056 if ((flags
& (ISA_2_5_MASKS_SERVER
& ~ISA_2_4_MASKS
)) != 0)
6058 if ((flags
& (ISA_2_4_MASKS
& ~ISA_2_1_MASKS
)) != 0)
6060 if ((flags
& ISA_2_1_MASKS
) != 0)
6062 if ((flags
& OPTION_MASK_POWERPC64
) != 0)
6068 emit_asm_machine (void)
6070 fprintf (asm_out_file
, "\t.machine %s\n", rs6000_machine
);
6074 /* Do anything needed at the start of the asm file. */
6077 rs6000_file_start (void)
6080 const char *start
= buffer
;
6081 FILE *file
= asm_out_file
;
6083 rs6000_default_cpu
= TARGET_CPU_DEFAULT
;
6085 default_file_start ();
6087 if (flag_verbose_asm
)
6089 sprintf (buffer
, "\n%s rs6000/powerpc options:", ASM_COMMENT_START
);
6091 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
6093 fprintf (file
, "%s --with-cpu=%s", start
, rs6000_default_cpu
);
6097 if (OPTION_SET_P (rs6000_cpu_index
))
6099 fprintf (file
, "%s -mcpu=%s", start
,
6100 processor_target_table
[rs6000_cpu_index
].name
);
6104 if (OPTION_SET_P (rs6000_tune_index
))
6106 fprintf (file
, "%s -mtune=%s", start
,
6107 processor_target_table
[rs6000_tune_index
].name
);
6111 if (PPC405_ERRATUM77
)
6113 fprintf (file
, "%s PPC405CR_ERRATUM77", start
);
6117 #ifdef USING_ELFOS_H
6118 switch (rs6000_sdata
)
6120 case SDATA_NONE
: fprintf (file
, "%s -msdata=none", start
); start
= ""; break;
6121 case SDATA_DATA
: fprintf (file
, "%s -msdata=data", start
); start
= ""; break;
6122 case SDATA_SYSV
: fprintf (file
, "%s -msdata=sysv", start
); start
= ""; break;
6123 case SDATA_EABI
: fprintf (file
, "%s -msdata=eabi", start
); start
= ""; break;
6126 if (rs6000_sdata
&& g_switch_value
)
6128 fprintf (file
, "%s -G %d", start
,
6138 #ifdef USING_ELFOS_H
6139 rs6000_machine
= rs6000_machine_from_flags ();
6140 emit_asm_machine ();
6143 if (DEFAULT_ABI
== ABI_ELFv2
)
6144 fprintf (file
, "\t.abiversion 2\n");
6148 /* Return nonzero if this function is known to have a null epilogue. */
6151 direct_return (void)
6153 if (reload_completed
)
6155 rs6000_stack_t
*info
= rs6000_stack_info ();
6157 if (info
->first_gp_reg_save
== 32
6158 && info
->first_fp_reg_save
== 64
6159 && info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1
6160 && ! info
->lr_save_p
6161 && ! info
->cr_save_p
6162 && info
->vrsave_size
== 0
6170 /* Helper for num_insns_constant. Calculate number of instructions to
6171 load VALUE to a single gpr using combinations of addi, addis, ori,
6172 oris, sldi and rldimi instructions. */
6175 num_insns_constant_gpr (HOST_WIDE_INT value
)
6177 /* signed constant loadable with addi */
6178 if (SIGNED_INTEGER_16BIT_P (value
))
6181 /* constant loadable with addis */
6182 else if ((value
& 0xffff) == 0
6183 && (value
>> 31 == -1 || value
>> 31 == 0))
6186 /* PADDI can support up to 34 bit signed integers. */
6187 else if (TARGET_PREFIXED
&& SIGNED_INTEGER_34BIT_P (value
))
6190 else if (TARGET_POWERPC64
)
6192 HOST_WIDE_INT low
= sext_hwi (value
, 32);
6193 HOST_WIDE_INT high
= value
>> 31;
6195 if (high
== 0 || high
== -1)
6200 if (low
== 0 || low
== high
)
6201 return num_insns_constant_gpr (high
) + 1;
6203 return num_insns_constant_gpr (low
) + 1;
6205 return (num_insns_constant_gpr (high
)
6206 + num_insns_constant_gpr (low
) + 1);
6213 /* Helper for num_insns_constant. Allow constants formed by the
6214 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
6215 and handle modes that require multiple gprs. */
6218 num_insns_constant_multi (HOST_WIDE_INT value
, machine_mode mode
)
6220 int nregs
= (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6224 HOST_WIDE_INT low
= sext_hwi (value
, BITS_PER_WORD
);
6225 int insns
= num_insns_constant_gpr (low
);
6227 /* We won't get more than 2 from num_insns_constant_gpr
6228 except when TARGET_POWERPC64 and mode is DImode or
6229 wider, so the register mode must be DImode. */
6230 && rs6000_is_valid_and_mask (GEN_INT (low
), DImode
))
6233 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6234 it all at once would be UB. */
6235 value
>>= (BITS_PER_WORD
- 1);
6241 /* Return the number of instructions it takes to form a constant in as
6242 many gprs are needed for MODE. */
6245 num_insns_constant (rtx op
, machine_mode mode
)
6249 switch (GET_CODE (op
))
6255 case CONST_WIDE_INT
:
6258 for (int i
= 0; i
< CONST_WIDE_INT_NUNITS (op
); i
++)
6259 insns
+= num_insns_constant_multi (CONST_WIDE_INT_ELT (op
, i
),
6266 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (op
);
6268 if (mode
== SFmode
|| mode
== SDmode
)
6273 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv
, l
);
6275 REAL_VALUE_TO_TARGET_SINGLE (*rv
, l
);
6276 /* See the first define_split in rs6000.md handling a
6277 const_double_operand. */
6281 else if (mode
== DFmode
|| mode
== DDmode
)
6286 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv
, l
);
6288 REAL_VALUE_TO_TARGET_DOUBLE (*rv
, l
);
6290 /* See the second (32-bit) and third (64-bit) define_split
6291 in rs6000.md handling a const_double_operand. */
6292 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 1] << 32;
6293 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffffUL
;
6296 else if (mode
== TFmode
|| mode
== TDmode
6297 || mode
== KFmode
|| mode
== IFmode
)
6303 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv
, l
);
6305 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv
, l
);
6307 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 3] << 32;
6308 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 2] & 0xffffffffUL
;
6309 insns
= num_insns_constant_multi (val
, DImode
);
6310 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 2 : 1] << 32;
6311 val
|= l
[WORDS_BIG_ENDIAN
? 3 : 0] & 0xffffffffUL
;
6312 insns
+= num_insns_constant_multi (val
, DImode
);
6324 return num_insns_constant_multi (val
, mode
);
6327 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6328 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6329 corresponding element of the vector, but for V4SFmode, the
6330 corresponding "float" is interpreted as an SImode integer. */
6333 const_vector_elt_as_int (rtx op
, unsigned int elt
)
6337 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6338 gcc_assert (GET_MODE (op
) != V2DImode
6339 && GET_MODE (op
) != V2DFmode
);
6341 tmp
= CONST_VECTOR_ELT (op
, elt
);
6342 if (GET_MODE (op
) == V4SFmode
)
6343 tmp
= gen_lowpart (SImode
, tmp
);
6344 return INTVAL (tmp
);
6347 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6348 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6349 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6350 all items are set to the same value and contain COPIES replicas of the
6351 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6352 operand and the others are set to the value of the operand's msb. */
6355 vspltis_constant (rtx op
, unsigned step
, unsigned copies
)
6357 machine_mode mode
= GET_MODE (op
);
6358 machine_mode inner
= GET_MODE_INNER (mode
);
6366 HOST_WIDE_INT splat_val
;
6367 HOST_WIDE_INT msb_val
;
6369 if (mode
== V2DImode
|| mode
== V2DFmode
|| mode
== V1TImode
)
6372 nunits
= GET_MODE_NUNITS (mode
);
6373 bitsize
= GET_MODE_BITSIZE (inner
);
6374 mask
= GET_MODE_MASK (inner
);
6376 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6378 msb_val
= val
>= 0 ? 0 : -1;
6380 if (val
== 0 && step
> 1)
6382 /* Special case for loading most significant bit with step > 1.
6383 In that case, match 0s in all but step-1s elements, where match
6385 for (i
= 1; i
< nunits
; ++i
)
6387 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6388 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6389 if ((i
& (step
- 1)) == step
- 1)
6391 if (!EASY_VECTOR_MSB (elt_val
, inner
))
6401 /* Construct the value to be splatted, if possible. If not, return 0. */
6402 for (i
= 2; i
<= copies
; i
*= 2)
6404 HOST_WIDE_INT small_val
;
6406 small_val
= splat_val
>> bitsize
;
6408 if (splat_val
!= ((HOST_WIDE_INT
)
6409 ((unsigned HOST_WIDE_INT
) small_val
<< bitsize
)
6410 | (small_val
& mask
)))
6412 splat_val
= small_val
;
6413 inner
= smallest_int_mode_for_size (bitsize
);
6416 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6417 if (EASY_VECTOR_15 (splat_val
))
6420 /* Also check if we can splat, and then add the result to itself. Do so if
6421 the value is positive, of if the splat instruction is using OP's mode;
6422 for splat_val < 0, the splat and the add should use the same mode. */
6423 else if (EASY_VECTOR_15_ADD_SELF (splat_val
)
6424 && (splat_val
>= 0 || (step
== 1 && copies
== 1)))
6427 /* Also check if are loading up the most significant bit which can be done by
6428 loading up -1 and shifting the value left by -1. Only do this for
6429 step 1 here, for larger steps it is done earlier. */
6430 else if (EASY_VECTOR_MSB (splat_val
, inner
) && step
== 1)
6436 /* Check if VAL is present in every STEP-th element, and the
6437 other elements are filled with its most significant bit. */
6438 for (i
= 1; i
< nunits
; ++i
)
6440 HOST_WIDE_INT desired_val
;
6441 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6442 if ((i
& (step
- 1)) == 0)
6445 desired_val
= msb_val
;
6447 if (desired_val
!= const_vector_elt_as_int (op
, elt
))
6454 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6455 instruction, filling in the bottom elements with 0 or -1.
6457 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6458 for the number of zeroes to shift in, or negative for the number of 0xff
6461 OP is a CONST_VECTOR. */
6464 vspltis_shifted (rtx op
)
6466 machine_mode mode
= GET_MODE (op
);
6467 machine_mode inner
= GET_MODE_INNER (mode
);
6475 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
)
6478 /* We need to create pseudo registers to do the shift, so don't recognize
6479 shift vector constants after reload. Don't match it even before RA
6480 after split1 is done, because there won't be further splitting pass
6481 before RA to do the splitting. */
6482 if (!can_create_pseudo_p ()
6483 || (cfun
->curr_properties
& PROP_rtl_split_insns
))
6486 nunits
= GET_MODE_NUNITS (mode
);
6487 mask
= GET_MODE_MASK (inner
);
6489 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? 0 : nunits
- 1);
6491 /* Check if the value can really be the operand of a vspltis[bhw]. */
6492 if (EASY_VECTOR_15 (val
))
6495 /* Also check if we are loading up the most significant bit which can be done
6496 by loading up -1 and shifting the value left by -1. */
6497 else if (EASY_VECTOR_MSB (val
, inner
))
6503 /* Check if VAL is present in every STEP-th element until we find elements
6504 that are 0 or all 1 bits. */
6505 for (i
= 1; i
< nunits
; ++i
)
6507 unsigned elt
= BYTES_BIG_ENDIAN
? i
: nunits
- 1 - i
;
6508 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6510 /* If the value isn't the splat value, check for the remaining elements
6516 for (j
= i
+1; j
< nunits
; ++j
)
6518 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6519 if (const_vector_elt_as_int (op
, elt2
) != 0)
6523 return (nunits
- i
) * GET_MODE_SIZE (inner
);
6526 else if ((elt_val
& mask
) == mask
)
6528 for (j
= i
+1; j
< nunits
; ++j
)
6530 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6531 if ((const_vector_elt_as_int (op
, elt2
) & mask
) != mask
)
6535 return -((nunits
- i
) * GET_MODE_SIZE (inner
));
6543 /* If all elements are equal, we don't need to do VSLDOI. */
6548 /* Return non-zero (element mode byte size) if OP is of the given MODE
6549 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6552 easy_altivec_constant (rtx op
, machine_mode mode
)
6554 unsigned step
, copies
;
6556 if (mode
== VOIDmode
)
6557 mode
= GET_MODE (op
);
6558 else if (mode
!= GET_MODE (op
))
6561 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6563 if (mode
== V2DFmode
)
6564 return zero_constant (op
, mode
) ? 8 : 0;
6566 else if (mode
== V2DImode
)
6568 if (!CONST_INT_P (CONST_VECTOR_ELT (op
, 0))
6569 || !CONST_INT_P (CONST_VECTOR_ELT (op
, 1)))
6572 if (zero_constant (op
, mode
))
6575 if (INTVAL (CONST_VECTOR_ELT (op
, 0)) == -1
6576 && INTVAL (CONST_VECTOR_ELT (op
, 1)) == -1)
6582 /* V1TImode is a special container for TImode. Ignore for now. */
6583 else if (mode
== V1TImode
)
6586 /* Start with a vspltisw. */
6587 step
= GET_MODE_NUNITS (mode
) / 4;
6590 if (vspltis_constant (op
, step
, copies
))
6593 /* Then try with a vspltish. */
6599 if (vspltis_constant (op
, step
, copies
))
6602 /* And finally a vspltisb. */
6608 if (vspltis_constant (op
, step
, copies
))
6611 if (vspltis_shifted (op
) != 0)
6612 return GET_MODE_SIZE (GET_MODE_INNER (mode
));
6617 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6618 result is OP. Abort if it is not possible. */
6621 gen_easy_altivec_constant (rtx op
)
6623 machine_mode mode
= GET_MODE (op
);
6624 int nunits
= GET_MODE_NUNITS (mode
);
6625 rtx val
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6626 unsigned step
= nunits
/ 4;
6627 unsigned copies
= 1;
6629 /* Start with a vspltisw. */
6630 if (vspltis_constant (op
, step
, copies
))
6631 return gen_rtx_VEC_DUPLICATE (V4SImode
, gen_lowpart (SImode
, val
));
6633 /* Then try with a vspltish. */
6639 if (vspltis_constant (op
, step
, copies
))
6640 return gen_rtx_VEC_DUPLICATE (V8HImode
, gen_lowpart (HImode
, val
));
6642 /* And finally a vspltisb. */
6648 if (vspltis_constant (op
, step
, copies
))
6649 return gen_rtx_VEC_DUPLICATE (V16QImode
, gen_lowpart (QImode
, val
));
6654 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6655 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6657 Return the number of instructions needed (1 or 2) into the address pointed
6660 Return the constant that is being split via CONSTANT_PTR. */
6663 xxspltib_constant_p (rtx op
,
6668 size_t nunits
= GET_MODE_NUNITS (mode
);
6670 HOST_WIDE_INT value
;
6673 /* Set the returned values to out of bound values. */
6674 *num_insns_ptr
= -1;
6675 *constant_ptr
= 256;
6677 if (!TARGET_P9_VECTOR
)
6680 if (mode
== VOIDmode
)
6681 mode
= GET_MODE (op
);
6683 else if (mode
!= GET_MODE (op
) && GET_MODE (op
) != VOIDmode
)
6686 /* Handle (vec_duplicate <constant>). */
6687 if (GET_CODE (op
) == VEC_DUPLICATE
)
6689 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6690 && mode
!= V2DImode
)
6693 element
= XEXP (op
, 0);
6694 if (!CONST_INT_P (element
))
6697 value
= INTVAL (element
);
6698 if (!IN_RANGE (value
, -128, 127))
6702 /* Handle (const_vector [...]). */
6703 else if (GET_CODE (op
) == CONST_VECTOR
)
6705 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6706 && mode
!= V2DImode
)
6709 element
= CONST_VECTOR_ELT (op
, 0);
6710 if (!CONST_INT_P (element
))
6713 value
= INTVAL (element
);
6714 if (!IN_RANGE (value
, -128, 127))
6717 for (i
= 1; i
< nunits
; i
++)
6719 element
= CONST_VECTOR_ELT (op
, i
);
6720 if (!CONST_INT_P (element
))
6723 if (value
!= INTVAL (element
))
6728 /* Handle integer constants being loaded into the upper part of the VSX
6729 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6730 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6731 else if (CONST_INT_P (op
))
6733 if (!SCALAR_INT_MODE_P (mode
))
6736 value
= INTVAL (op
);
6737 if (!IN_RANGE (value
, -128, 127))
6740 if (!IN_RANGE (value
, -1, 0))
6742 if (!(reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
))
6745 if (EASY_VECTOR_15 (value
))
6753 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6754 sign extend. Special case 0/-1 to allow getting any VSX register instead
6755 of an Altivec register. */
6756 if ((mode
== V4SImode
|| mode
== V8HImode
) && !IN_RANGE (value
, -1, 0)
6757 && EASY_VECTOR_15 (value
))
6760 /* Return # of instructions and the constant byte for XXSPLTIB. */
6761 if (mode
== V16QImode
)
6764 else if (IN_RANGE (value
, -1, 0))
6767 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6768 single XXSPLTIW or XXSPLTIDP instruction. */
6769 else if (vsx_prefixed_constant (op
, mode
))
6772 /* Return XXSPLITB followed by a sign extend operation to convert the
6773 constant to V8HImode or V4SImode. */
6777 *constant_ptr
= (int) value
;
6782 output_vec_const_move (rtx
*operands
)
6790 mode
= GET_MODE (dest
);
6794 bool dest_vmx_p
= ALTIVEC_REGNO_P (REGNO (dest
));
6795 int xxspltib_value
= 256;
6798 if (zero_constant (vec
, mode
))
6800 if (TARGET_P9_VECTOR
)
6801 return "xxspltib %x0,0";
6803 else if (dest_vmx_p
)
6804 return "vspltisw %0,0";
6807 return "xxlxor %x0,%x0,%x0";
6810 if (all_ones_constant (vec
, mode
))
6812 if (TARGET_P9_VECTOR
)
6813 return "xxspltib %x0,255";
6815 else if (dest_vmx_p
)
6816 return "vspltisw %0,-1";
6818 else if (TARGET_P8_VECTOR
)
6819 return "xxlorc %x0,%x0,%x0";
6825 vec_const_128bit_type vsx_const
;
6826 if (TARGET_POWER10
&& vec_const_128bit_to_bytes (vec
, mode
, &vsx_const
))
6828 unsigned imm
= constant_generates_lxvkq (&vsx_const
);
6831 operands
[2] = GEN_INT (imm
);
6832 return "lxvkq %x0,%2";
6835 imm
= constant_generates_xxspltiw (&vsx_const
);
6838 operands
[2] = GEN_INT (imm
);
6839 return "xxspltiw %x0,%2";
6842 imm
= constant_generates_xxspltidp (&vsx_const
);
6845 operands
[2] = GEN_INT (imm
);
6846 return "xxspltidp %x0,%2";
6850 if (TARGET_P9_VECTOR
6851 && xxspltib_constant_p (vec
, mode
, &num_insns
, &xxspltib_value
))
6855 operands
[2] = GEN_INT (xxspltib_value
& 0xff);
6856 return "xxspltib %x0,%2";
6867 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest
)));
6868 if (zero_constant (vec
, mode
))
6869 return "vspltisw %0,0";
6871 if (all_ones_constant (vec
, mode
))
6872 return "vspltisw %0,-1";
6874 /* Do we need to construct a value using VSLDOI? */
6875 shift
= vspltis_shifted (vec
);
6879 splat_vec
= gen_easy_altivec_constant (vec
);
6880 gcc_assert (GET_CODE (splat_vec
) == VEC_DUPLICATE
);
6881 operands
[1] = XEXP (splat_vec
, 0);
6882 if (!EASY_VECTOR_15 (INTVAL (operands
[1])))
6885 switch (GET_MODE (splat_vec
))
6888 return "vspltisw %0,%1";
6891 return "vspltish %0,%1";
6894 return "vspltisb %0,%1";
6904 /* Initialize vector TARGET to VALS. */
6907 rs6000_expand_vector_init (rtx target
, rtx vals
)
6909 machine_mode mode
= GET_MODE (target
);
6910 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6911 unsigned int n_elts
= GET_MODE_NUNITS (mode
);
6912 int n_var
= 0, one_var
= -1;
6913 bool all_same
= true, all_const_zero
= true;
6917 for (i
= 0; i
< n_elts
; ++i
)
6919 x
= XVECEXP (vals
, 0, i
);
6920 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
6921 ++n_var
, one_var
= i
;
6922 else if (x
!= CONST0_RTX (inner_mode
))
6923 all_const_zero
= false;
6925 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6931 rtx const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6932 bool int_vector_p
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
6933 if ((int_vector_p
|| TARGET_VSX
) && all_const_zero
)
6935 /* Zero register. */
6936 emit_move_insn (target
, CONST0_RTX (mode
));
6939 else if (int_vector_p
&& easy_vector_constant (const_vec
, mode
))
6941 /* Splat immediate. */
6942 emit_insn (gen_rtx_SET (target
, const_vec
));
6947 /* Load from constant pool. */
6948 emit_move_insn (target
, const_vec
);
6953 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6954 if (VECTOR_MEM_VSX_P (mode
) && (mode
== V2DFmode
|| mode
== V2DImode
))
6958 size_t num_elements
= all_same
? 1 : 2;
6959 for (i
= 0; i
< num_elements
; i
++)
6961 op
[i
] = XVECEXP (vals
, 0, i
);
6962 /* Just in case there is a SUBREG with a smaller mode, do a
6964 if (GET_MODE (op
[i
]) != inner_mode
)
6966 rtx tmp
= gen_reg_rtx (inner_mode
);
6967 convert_move (tmp
, op
[i
], 0);
6970 /* Allow load with splat double word. */
6971 else if (MEM_P (op
[i
]))
6974 op
[i
] = force_reg (inner_mode
, op
[i
]);
6976 else if (!REG_P (op
[i
]))
6977 op
[i
] = force_reg (inner_mode
, op
[i
]);
6982 if (mode
== V2DFmode
)
6983 emit_insn (gen_vsx_splat_v2df (target
, op
[0]));
6985 emit_insn (gen_vsx_splat_v2di (target
, op
[0]));
6989 if (mode
== V2DFmode
)
6990 emit_insn (gen_vsx_concat_v2df (target
, op
[0], op
[1]));
6992 emit_insn (gen_vsx_concat_v2di (target
, op
[0], op
[1]));
6997 /* Special case initializing vector int if we are on 64-bit systems with
6998 direct move or we have the ISA 3.0 instructions. */
6999 if (mode
== V4SImode
&& VECTOR_MEM_VSX_P (V4SImode
)
7000 && TARGET_DIRECT_MOVE_64BIT
)
7004 rtx element0
= XVECEXP (vals
, 0, 0);
7005 if (MEM_P (element0
))
7006 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
7008 element0
= force_reg (SImode
, element0
);
7010 if (TARGET_P9_VECTOR
)
7011 emit_insn (gen_vsx_splat_v4si (target
, element0
));
7014 rtx tmp
= gen_reg_rtx (DImode
);
7015 emit_insn (gen_zero_extendsidi2 (tmp
, element0
));
7016 emit_insn (gen_vsx_splat_v4si_di (target
, tmp
));
7025 for (i
= 0; i
< 4; i
++)
7026 elements
[i
] = force_reg (SImode
, XVECEXP (vals
, 0, i
));
7028 emit_insn (gen_vsx_init_v4si (target
, elements
[0], elements
[1],
7029 elements
[2], elements
[3]));
7034 /* With single precision floating point on VSX, know that internally single
7035 precision is actually represented as a double, and either make 2 V2DF
7036 vectors, and convert these vectors to single precision, or do one
7037 conversion, and splat the result to the other elements. */
7038 if (mode
== V4SFmode
&& VECTOR_MEM_VSX_P (V4SFmode
))
7042 rtx element0
= XVECEXP (vals
, 0, 0);
7044 if (TARGET_P9_VECTOR
)
7046 if (MEM_P (element0
))
7047 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
7049 emit_insn (gen_vsx_splat_v4sf (target
, element0
));
7054 rtx freg
= gen_reg_rtx (V4SFmode
);
7055 rtx sreg
= force_reg (SFmode
, element0
);
7056 rtx cvt
= (TARGET_XSCVDPSPN
7057 ? gen_vsx_xscvdpspn_scalar (freg
, sreg
)
7058 : gen_vsx_xscvdpsp_scalar (freg
, sreg
));
7061 emit_insn (gen_vsx_xxspltw_v4sf_direct (target
, freg
,
7067 if (TARGET_P8_VECTOR
&& TARGET_POWERPC64
)
7073 for (i
= 0; i
< 4; i
++)
7075 tmp_si
[i
] = gen_reg_rtx (SImode
);
7076 tmp_di
[i
] = gen_reg_rtx (DImode
);
7077 mrg_di
[i
] = gen_reg_rtx (DImode
);
7078 tmp_sf
[i
] = force_reg (SFmode
, XVECEXP (vals
, 0, i
));
7079 emit_insn (gen_movsi_from_sf (tmp_si
[i
], tmp_sf
[i
]));
7080 emit_insn (gen_zero_extendsidi2 (tmp_di
[i
], tmp_si
[i
]));
7083 if (!BYTES_BIG_ENDIAN
)
7085 std::swap (tmp_di
[0], tmp_di
[1]);
7086 std::swap (tmp_di
[2], tmp_di
[3]);
7089 emit_insn (gen_ashldi3 (mrg_di
[0], tmp_di
[0], GEN_INT (32)));
7090 emit_insn (gen_iordi3 (mrg_di
[1], mrg_di
[0], tmp_di
[1]));
7091 emit_insn (gen_ashldi3 (mrg_di
[2], tmp_di
[2], GEN_INT (32)));
7092 emit_insn (gen_iordi3 (mrg_di
[3], mrg_di
[2], tmp_di
[3]));
7094 rtx tmp_v2di
= gen_reg_rtx (V2DImode
);
7095 emit_insn (gen_vsx_concat_v2di (tmp_v2di
, mrg_di
[1], mrg_di
[3]));
7096 emit_move_insn (target
, gen_lowpart (V4SFmode
, tmp_v2di
));
7100 rtx dbl_even
= gen_reg_rtx (V2DFmode
);
7101 rtx dbl_odd
= gen_reg_rtx (V2DFmode
);
7102 rtx flt_even
= gen_reg_rtx (V4SFmode
);
7103 rtx flt_odd
= gen_reg_rtx (V4SFmode
);
7104 rtx op0
= force_reg (SFmode
, XVECEXP (vals
, 0, 0));
7105 rtx op1
= force_reg (SFmode
, XVECEXP (vals
, 0, 1));
7106 rtx op2
= force_reg (SFmode
, XVECEXP (vals
, 0, 2));
7107 rtx op3
= force_reg (SFmode
, XVECEXP (vals
, 0, 3));
7109 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op1
));
7110 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op2
, op3
));
7111 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
7112 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
7113 rs6000_expand_extract_even (target
, flt_even
, flt_odd
);
7119 /* Special case initializing vector short/char that are splats if we are on
7120 64-bit systems with direct move. */
7121 if (all_same
&& TARGET_DIRECT_MOVE_64BIT
7122 && (mode
== V16QImode
|| mode
== V8HImode
))
7124 rtx op0
= XVECEXP (vals
, 0, 0);
7125 rtx di_tmp
= gen_reg_rtx (DImode
);
7128 op0
= force_reg (GET_MODE_INNER (mode
), op0
);
7130 if (mode
== V16QImode
)
7132 emit_insn (gen_zero_extendqidi2 (di_tmp
, op0
));
7133 emit_insn (gen_vsx_vspltb_di (target
, di_tmp
));
7137 if (mode
== V8HImode
)
7139 emit_insn (gen_zero_extendhidi2 (di_tmp
, op0
));
7140 emit_insn (gen_vsx_vsplth_di (target
, di_tmp
));
7145 /* Store value to stack temp. Load vector element. Splat. However, splat
7146 of 64-bit items is not supported on Altivec. */
7147 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
7149 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7150 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0),
7151 XVECEXP (vals
, 0, 0));
7152 x
= gen_rtx_UNSPEC (VOIDmode
,
7153 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7154 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7156 gen_rtx_SET (target
, mem
),
7158 x
= gen_rtx_VEC_SELECT (inner_mode
, target
,
7159 gen_rtx_PARALLEL (VOIDmode
,
7160 gen_rtvec (1, const0_rtx
)));
7161 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
7165 /* One field is non-constant. Load constant then overwrite
7169 rtx copy
= copy_rtx (vals
);
7171 /* Load constant part of vector, substitute neighboring value for
7173 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
7174 rs6000_expand_vector_init (target
, copy
);
7176 /* Insert variable. */
7177 rs6000_expand_vector_set (target
, XVECEXP (vals
, 0, one_var
),
7182 if (TARGET_DIRECT_MOVE
&& (mode
== V16QImode
|| mode
== V8HImode
))
7185 /* Force the values into word_mode registers. */
7186 for (i
= 0; i
< n_elts
; i
++)
7188 rtx tmp
= force_reg (inner_mode
, XVECEXP (vals
, 0, i
));
7189 machine_mode tmode
= TARGET_POWERPC64
? DImode
: SImode
;
7190 op
[i
] = simplify_gen_subreg (tmode
, tmp
, inner_mode
, 0);
7193 /* Take unsigned char big endianness on 64bit as example for below
7194 construction, the input values are: A, B, C, D, ..., O, P. */
7196 if (TARGET_DIRECT_MOVE_128
)
7198 /* Move to VSX register with vec_concat, each has 2 values.
7199 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
7200 vr1[1] = { xxxxxxxC, xxxxxxxD };
7202 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
7204 for (i
= 0; i
< n_elts
/ 2; i
++)
7206 vr1
[i
] = gen_reg_rtx (V2DImode
);
7207 emit_insn (gen_vsx_concat_v2di (vr1
[i
], op
[i
* 2],
7211 /* Pack vectors with 2 values into vectors with 4 values.
7212 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
7213 vr2[1] = { xxxExxxF, xxxGxxxH };
7214 vr2[1] = { xxxIxxxJ, xxxKxxxL };
7215 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
7217 for (i
= 0; i
< n_elts
/ 4; i
++)
7219 vr2
[i
] = gen_reg_rtx (V4SImode
);
7220 emit_insn (gen_altivec_vpkudum (vr2
[i
], vr1
[i
* 2],
7224 /* Pack vectors with 4 values into vectors with 8 values.
7225 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7226 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7228 for (i
= 0; i
< n_elts
/ 8; i
++)
7230 vr3
[i
] = gen_reg_rtx (V8HImode
);
7231 emit_insn (gen_altivec_vpkuwum (vr3
[i
], vr2
[i
* 2],
7235 /* If it's V8HImode, it's done and return it. */
7236 if (mode
== V8HImode
)
7238 emit_insn (gen_rtx_SET (target
, vr3
[0]));
7242 /* Pack vectors with 8 values into 16 values. */
7243 rtx res
= gen_reg_rtx (V16QImode
);
7244 emit_insn (gen_altivec_vpkuhum (res
, vr3
[0], vr3
[1]));
7245 emit_insn (gen_rtx_SET (target
, res
));
7249 rtx (*merge_v16qi
) (rtx
, rtx
, rtx
) = NULL
;
7250 rtx (*merge_v8hi
) (rtx
, rtx
, rtx
) = NULL
;
7251 rtx (*merge_v4si
) (rtx
, rtx
, rtx
) = NULL
;
7254 /* Set up some common gen routines and values. */
7255 if (BYTES_BIG_ENDIAN
)
7257 if (mode
== V16QImode
)
7259 merge_v16qi
= gen_altivec_vmrghb
;
7260 merge_v8hi
= gen_altivec_vmrglh
;
7263 merge_v8hi
= gen_altivec_vmrghh
;
7265 merge_v4si
= gen_altivec_vmrglw
;
7266 perm_idx
= GEN_INT (3);
7270 if (mode
== V16QImode
)
7272 merge_v16qi
= gen_altivec_vmrglb
;
7273 merge_v8hi
= gen_altivec_vmrghh
;
7276 merge_v8hi
= gen_altivec_vmrglh
;
7278 merge_v4si
= gen_altivec_vmrghw
;
7279 perm_idx
= GEN_INT (0);
7282 /* Move to VSX register with direct move.
7283 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7284 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7286 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7288 for (i
= 0; i
< n_elts
; i
++)
7290 vr_qi
[i
] = gen_reg_rtx (V16QImode
);
7291 if (TARGET_POWERPC64
)
7292 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi
[i
], op
[i
]));
7294 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi
[i
], op
[i
]));
7297 /* Merge/move to vector short.
7298 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7299 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7301 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7303 for (i
= 0; i
< 8; i
++)
7306 if (mode
== V16QImode
)
7308 tmp
= gen_reg_rtx (V16QImode
);
7309 emit_insn (merge_v16qi (tmp
, vr_qi
[2 * i
], vr_qi
[2 * i
+ 1]));
7311 vr_hi
[i
] = gen_reg_rtx (V8HImode
);
7312 emit_move_insn (vr_hi
[i
], gen_lowpart (V8HImode
, tmp
));
7315 /* Merge vector short to vector int.
7316 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7317 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7319 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7321 for (i
= 0; i
< 4; i
++)
7323 rtx tmp
= gen_reg_rtx (V8HImode
);
7324 emit_insn (merge_v8hi (tmp
, vr_hi
[2 * i
], vr_hi
[2 * i
+ 1]));
7325 vr_si
[i
] = gen_reg_rtx (V4SImode
);
7326 emit_move_insn (vr_si
[i
], gen_lowpart (V4SImode
, tmp
));
7329 /* Merge vector int to vector long.
7330 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7331 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7333 for (i
= 0; i
< 2; i
++)
7335 rtx tmp
= gen_reg_rtx (V4SImode
);
7336 emit_insn (merge_v4si (tmp
, vr_si
[2 * i
], vr_si
[2 * i
+ 1]));
7337 vr_di
[i
] = gen_reg_rtx (V2DImode
);
7338 emit_move_insn (vr_di
[i
], gen_lowpart (V2DImode
, tmp
));
7341 rtx res
= gen_reg_rtx (V2DImode
);
7342 emit_insn (gen_vsx_xxpermdi_v2di (res
, vr_di
[0], vr_di
[1], perm_idx
));
7343 emit_insn (gen_rtx_SET (target
, gen_lowpart (mode
, res
)));
7349 /* Construct the vector in memory one field at a time
7350 and load the whole vector. */
7351 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7352 for (i
= 0; i
< n_elts
; i
++)
7353 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7354 i
* GET_MODE_SIZE (inner_mode
)),
7355 XVECEXP (vals
, 0, i
));
7356 emit_move_insn (target
, mem
);
7359 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7360 is variable and also counts by vector element size for p9 and above. */
7363 rs6000_expand_vector_set_var_p9 (rtx target
, rtx val
, rtx idx
)
7365 machine_mode mode
= GET_MODE (target
);
7367 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7369 machine_mode inner_mode
= GET_MODE (val
);
7371 int width
= GET_MODE_SIZE (inner_mode
);
7373 gcc_assert (width
>= 1 && width
<= 8);
7375 int shift
= exact_log2 (width
);
7377 machine_mode idx_mode
= GET_MODE (idx
);
7379 machine_mode shift_mode
;
7380 rtx (*gen_ashl
)(rtx
, rtx
, rtx
);
7381 rtx (*gen_lvsl
)(rtx
, rtx
);
7382 rtx (*gen_lvsr
)(rtx
, rtx
);
7384 if (TARGET_POWERPC64
)
7386 shift_mode
= DImode
;
7387 gen_ashl
= gen_ashldi3
;
7388 gen_lvsl
= gen_altivec_lvsl_reg_di
;
7389 gen_lvsr
= gen_altivec_lvsr_reg_di
;
7393 shift_mode
= SImode
;
7394 gen_ashl
= gen_ashlsi3
;
7395 gen_lvsl
= gen_altivec_lvsl_reg_si
;
7396 gen_lvsr
= gen_altivec_lvsr_reg_si
;
7398 /* Generate the IDX for permute shift, width is the vector element size.
7399 idx = idx * width. */
7400 rtx tmp
= gen_reg_rtx (shift_mode
);
7401 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7403 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7405 /* lvsr v1,0,idx. */
7406 rtx pcvr
= gen_reg_rtx (V16QImode
);
7407 emit_insn (gen_lvsr (pcvr
, tmp
));
7409 /* lvsl v2,0,idx. */
7410 rtx pcvl
= gen_reg_rtx (V16QImode
);
7411 emit_insn (gen_lvsl (pcvl
, tmp
));
7413 rtx sub_target
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7416 = gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
, pcvr
);
7419 rs6000_expand_vector_set (target
, val
, const0_rtx
);
7422 = gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
, pcvl
);
7426 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7427 is variable and also counts by vector element size for p7 & p8. */
7430 rs6000_expand_vector_set_var_p7 (rtx target
, rtx val
, rtx idx
)
7432 machine_mode mode
= GET_MODE (target
);
7434 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7436 machine_mode inner_mode
= GET_MODE (val
);
7437 HOST_WIDE_INT mode_mask
= GET_MODE_MASK (inner_mode
);
7439 int width
= GET_MODE_SIZE (inner_mode
);
7440 gcc_assert (width
>= 1 && width
<= 4);
7442 int shift
= exact_log2 (width
);
7444 machine_mode idx_mode
= GET_MODE (idx
);
7446 machine_mode shift_mode
;
7447 rtx (*gen_ashl
)(rtx
, rtx
, rtx
);
7448 rtx (*gen_add
)(rtx
, rtx
, rtx
);
7449 rtx (*gen_sub
)(rtx
, rtx
, rtx
);
7450 rtx (*gen_lvsl
)(rtx
, rtx
);
7452 if (TARGET_POWERPC64
)
7454 shift_mode
= DImode
;
7455 gen_ashl
= gen_ashldi3
;
7456 gen_add
= gen_adddi3
;
7457 gen_sub
= gen_subdi3
;
7458 gen_lvsl
= gen_altivec_lvsl_reg_di
;
7462 shift_mode
= SImode
;
7463 gen_ashl
= gen_ashlsi3
;
7464 gen_add
= gen_addsi3
;
7465 gen_sub
= gen_subsi3
;
7466 gen_lvsl
= gen_altivec_lvsl_reg_si
;
7469 /* idx = idx * width. */
7470 rtx tmp
= gen_reg_rtx (shift_mode
);
7471 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7473 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7475 /* For LE: idx = idx + 8. */
7476 if (!BYTES_BIG_ENDIAN
)
7477 emit_insn (gen_add (tmp
, tmp
, GEN_INT (8)));
7479 emit_insn (gen_sub (tmp
, GEN_INT (24 - width
), tmp
));
7482 DImode: 0xffffffffffffffff0000000000000000
7483 SImode: 0x00000000ffffffff0000000000000000
7484 HImode: 0x000000000000ffff0000000000000000.
7485 QImode: 0x00000000000000ff0000000000000000. */
7486 rtx mask
= gen_reg_rtx (V16QImode
);
7487 rtx mask_v2di
= gen_reg_rtx (V2DImode
);
7488 rtvec v
= rtvec_alloc (2);
7489 if (!BYTES_BIG_ENDIAN
)
7491 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7492 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7496 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7497 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7499 emit_insn (gen_vec_initv2didi (mask_v2di
, gen_rtx_PARALLEL (V2DImode
, v
)));
7500 rtx sub_mask
= simplify_gen_subreg (V16QImode
, mask_v2di
, V2DImode
, 0);
7501 emit_insn (gen_rtx_SET (mask
, sub_mask
));
7503 /* mtvsrd[wz] f0,tmp_val. */
7504 rtx tmp_val
= gen_reg_rtx (SImode
);
7505 if (inner_mode
== E_SFmode
)
7506 if (TARGET_DIRECT_MOVE_64BIT
)
7507 emit_insn (gen_movsi_from_sf (tmp_val
, val
));
7510 rtx stack
= rs6000_allocate_stack_temp (SFmode
, false, true);
7511 emit_insn (gen_movsf_hardfloat (stack
, val
));
7512 rtx stack2
= copy_rtx (stack
);
7513 PUT_MODE (stack2
, SImode
);
7514 emit_move_insn (tmp_val
, stack2
);
7517 tmp_val
= force_reg (SImode
, val
);
7519 rtx val_v16qi
= gen_reg_rtx (V16QImode
);
7520 rtx val_v2di
= gen_reg_rtx (V2DImode
);
7521 rtvec vec_val
= rtvec_alloc (2);
7522 if (!BYTES_BIG_ENDIAN
)
7524 RTVEC_ELT (vec_val
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7525 RTVEC_ELT (vec_val
, 1) = tmp_val
;
7529 RTVEC_ELT (vec_val
, 0) = tmp_val
;
7530 RTVEC_ELT (vec_val
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7533 gen_vec_initv2didi (val_v2di
, gen_rtx_PARALLEL (V2DImode
, vec_val
)));
7534 rtx sub_val
= simplify_gen_subreg (V16QImode
, val_v2di
, V2DImode
, 0);
7535 emit_insn (gen_rtx_SET (val_v16qi
, sub_val
));
7537 /* lvsl 13,0,idx. */
7538 rtx pcv
= gen_reg_rtx (V16QImode
);
7539 emit_insn (gen_lvsl (pcv
, tmp
));
7541 /* vperm 1,1,1,13. */
7542 /* vperm 0,0,0,13. */
7543 rtx val_perm
= gen_reg_rtx (V16QImode
);
7544 rtx mask_perm
= gen_reg_rtx (V16QImode
);
7545 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm
, val_v16qi
, val_v16qi
, pcv
));
7546 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm
, mask
, mask
, pcv
));
7548 rtx target_v16qi
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7550 /* xxsel 34,34,32,33. */
7552 gen_vector_select_v16qi (target_v16qi
, target_v16qi
, val_perm
, mask_perm
));
7555 /* Set field ELT_RTX of TARGET to VAL. */
7558 rs6000_expand_vector_set (rtx target
, rtx val
, rtx elt_rtx
)
7560 machine_mode mode
= GET_MODE (target
);
7561 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7562 rtx reg
= gen_reg_rtx (mode
);
7564 int width
= GET_MODE_SIZE (inner_mode
);
7567 val
= force_reg (GET_MODE (val
), val
);
7569 if (VECTOR_MEM_VSX_P (mode
))
7571 if (!CONST_INT_P (elt_rtx
))
7573 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7574 when elt_rtx is variable. */
7575 if ((TARGET_P9_VECTOR
&& TARGET_POWERPC64
) || width
== 8)
7577 rs6000_expand_vector_set_var_p9 (target
, val
, elt_rtx
);
7580 else if (TARGET_VSX
)
7582 rs6000_expand_vector_set_var_p7 (target
, val
, elt_rtx
);
7586 gcc_assert (CONST_INT_P (elt_rtx
));
7589 rtx insn
= NULL_RTX
;
7591 if (mode
== V2DFmode
)
7592 insn
= gen_vsx_set_v2df (target
, target
, val
, elt_rtx
);
7594 else if (mode
== V2DImode
)
7595 insn
= gen_vsx_set_v2di (target
, target
, val
, elt_rtx
);
7597 else if (TARGET_P9_VECTOR
&& TARGET_POWERPC64
)
7599 if (mode
== V4SImode
)
7600 insn
= gen_vsx_set_v4si_p9 (target
, target
, val
, elt_rtx
);
7601 else if (mode
== V8HImode
)
7602 insn
= gen_vsx_set_v8hi_p9 (target
, target
, val
, elt_rtx
);
7603 else if (mode
== V16QImode
)
7604 insn
= gen_vsx_set_v16qi_p9 (target
, target
, val
, elt_rtx
);
7605 else if (mode
== V4SFmode
)
7606 insn
= gen_vsx_set_v4sf_p9 (target
, target
, val
, elt_rtx
);
7616 /* Simplify setting single element vectors like V1TImode. */
7617 if (GET_MODE_SIZE (mode
) == GET_MODE_SIZE (inner_mode
)
7618 && INTVAL (elt_rtx
) == 0)
7620 emit_move_insn (target
, gen_lowpart (mode
, val
));
7624 /* Load single variable value. */
7625 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7626 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0), val
);
7627 x
= gen_rtx_UNSPEC (VOIDmode
,
7628 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7629 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7631 gen_rtx_SET (reg
, mem
),
7634 /* Linear sequence. */
7635 mask
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
7636 for (i
= 0; i
< 16; ++i
)
7637 XVECEXP (mask
, 0, i
) = GEN_INT (i
);
7639 /* Set permute mask to insert element into target. */
7640 for (i
= 0; i
< width
; ++i
)
7641 XVECEXP (mask
, 0, INTVAL (elt_rtx
) * width
+ i
) = GEN_INT (i
+ 0x10);
7642 x
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (mask
, 0));
7644 if (BYTES_BIG_ENDIAN
)
7645 x
= gen_rtx_UNSPEC (mode
,
7646 gen_rtvec (3, target
, reg
,
7647 force_reg (V16QImode
, x
)),
7651 if (TARGET_P9_VECTOR
)
7652 x
= gen_rtx_UNSPEC (mode
,
7653 gen_rtvec (3, reg
, target
,
7654 force_reg (V16QImode
, x
)),
7658 /* Invert selector. We prefer to generate VNAND on P8 so
7659 that future fusion opportunities can kick in, but must
7660 generate VNOR elsewhere. */
7661 rtx notx
= gen_rtx_NOT (V16QImode
, force_reg (V16QImode
, x
));
7662 rtx iorx
= (TARGET_P8_VECTOR
7663 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
7664 : gen_rtx_AND (V16QImode
, notx
, notx
));
7665 rtx tmp
= gen_reg_rtx (V16QImode
);
7666 emit_insn (gen_rtx_SET (tmp
, iorx
));
7668 /* Permute with operands reversed and adjusted selector. */
7669 x
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, reg
, target
, tmp
),
7674 emit_insn (gen_rtx_SET (target
, x
));
7677 /* Extract field ELT from VEC into TARGET. */
7680 rs6000_expand_vector_extract (rtx target
, rtx vec
, rtx elt
)
7682 machine_mode mode
= GET_MODE (vec
);
7683 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7686 if (VECTOR_MEM_VSX_P (mode
) && CONST_INT_P (elt
))
7693 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7696 emit_insn (gen_vsx_extract_v2df (target
, vec
, elt
));
7699 emit_insn (gen_vsx_extract_v2di (target
, vec
, elt
));
7702 emit_insn (gen_vsx_extract_v4sf (target
, vec
, elt
));
7705 if (TARGET_DIRECT_MOVE_64BIT
)
7707 emit_insn (gen_vsx_extract_v16qi (target
, vec
, elt
));
7713 if (TARGET_DIRECT_MOVE_64BIT
)
7715 emit_insn (gen_vsx_extract_v8hi (target
, vec
, elt
));
7721 if (TARGET_DIRECT_MOVE_64BIT
)
7723 emit_insn (gen_vsx_extract_v4si (target
, vec
, elt
));
7729 else if (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (elt
)
7730 && TARGET_DIRECT_MOVE_64BIT
)
7732 if (GET_MODE (elt
) != DImode
)
7734 rtx tmp
= gen_reg_rtx (DImode
);
7735 convert_move (tmp
, elt
, 0);
7738 else if (!REG_P (elt
))
7739 elt
= force_reg (DImode
, elt
);
7744 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7748 emit_insn (gen_vsx_extract_v2df_var (target
, vec
, elt
));
7752 emit_insn (gen_vsx_extract_v2di_var (target
, vec
, elt
));
7756 emit_insn (gen_vsx_extract_v4sf_var (target
, vec
, elt
));
7760 emit_insn (gen_vsx_extract_v4si_var (target
, vec
, elt
));
7764 emit_insn (gen_vsx_extract_v8hi_var (target
, vec
, elt
));
7768 emit_insn (gen_vsx_extract_v16qi_var (target
, vec
, elt
));
7776 /* Allocate mode-sized buffer. */
7777 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7779 emit_move_insn (mem
, vec
);
7780 if (CONST_INT_P (elt
))
7782 int modulo_elt
= INTVAL (elt
) % GET_MODE_NUNITS (mode
);
7784 /* Add offset to field within buffer matching vector element. */
7785 mem
= adjust_address_nv (mem
, inner_mode
,
7786 modulo_elt
* GET_MODE_SIZE (inner_mode
));
7787 emit_move_insn (target
, adjust_address_nv (mem
, inner_mode
, 0));
7791 unsigned int ele_size
= GET_MODE_SIZE (inner_mode
);
7792 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (mode
) - 1);
7793 rtx new_addr
= gen_reg_rtx (Pmode
);
7795 elt
= gen_rtx_AND (Pmode
, elt
, num_ele_m1
);
7797 elt
= gen_rtx_MULT (Pmode
, elt
, GEN_INT (ele_size
));
7798 new_addr
= gen_rtx_PLUS (Pmode
, XEXP (mem
, 0), elt
);
7799 new_addr
= change_address (mem
, inner_mode
, new_addr
);
7800 emit_move_insn (target
, new_addr
);
7804 /* Return the offset within a memory object (MEM) of a vector type to a given
7805 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7806 the element is constant, we return a constant integer.
7808 Otherwise, we use a base register temporary to calculate the offset after
7809 masking it to fit within the bounds of the vector and scaling it. The
7810 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7811 built-in function. */
7814 get_vector_offset (rtx mem
, rtx element
, rtx base_tmp
, unsigned scalar_size
)
7816 if (CONST_INT_P (element
))
7817 return GEN_INT (INTVAL (element
) * scalar_size
);
7819 /* All insns should use the 'Q' constraint (address is a single register) if
7820 the element number is not a constant. */
7821 gcc_assert (satisfies_constraint_Q (mem
));
7823 /* Mask the element to make sure the element number is between 0 and the
7824 maximum number of elements - 1 so that we don't generate an address
7825 outside the vector. */
7826 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (GET_MODE (mem
)) - 1);
7827 rtx and_op
= gen_rtx_AND (Pmode
, element
, num_ele_m1
);
7828 emit_insn (gen_rtx_SET (base_tmp
, and_op
));
7830 /* Shift the element to get the byte offset from the element number. */
7831 int shift
= exact_log2 (scalar_size
);
7832 gcc_assert (shift
>= 0);
7836 rtx shift_op
= gen_rtx_ASHIFT (Pmode
, base_tmp
, GEN_INT (shift
));
7837 emit_insn (gen_rtx_SET (base_tmp
, shift_op
));
7843 /* Helper function update PC-relative addresses when we are adjusting a memory
7844 address (ADDR) to a vector to point to a scalar field within the vector with
7845 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7846 use the base register temporary (BASE_TMP) to form the address. */
7849 adjust_vec_address_pcrel (rtx addr
, rtx element_offset
, rtx base_tmp
)
7851 rtx new_addr
= NULL
;
7853 gcc_assert (CONST_INT_P (element_offset
));
7855 if (GET_CODE (addr
) == CONST
)
7856 addr
= XEXP (addr
, 0);
7858 if (GET_CODE (addr
) == PLUS
)
7860 rtx op0
= XEXP (addr
, 0);
7861 rtx op1
= XEXP (addr
, 1);
7863 if (CONST_INT_P (op1
))
7865 HOST_WIDE_INT offset
7866 = INTVAL (XEXP (addr
, 1)) + INTVAL (element_offset
);
7873 rtx plus
= gen_rtx_PLUS (Pmode
, op0
, GEN_INT (offset
));
7874 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7880 emit_move_insn (base_tmp
, addr
);
7881 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7885 else if (SYMBOL_REF_P (addr
) || LABEL_REF_P (addr
))
7887 rtx plus
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7888 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7897 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7898 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7899 temporary (BASE_TMP) to fixup the address. Return the new memory address
7900 that is valid for reads or writes to a given register (SCALAR_REG).
7902 This function is expected to be called after reload is completed when we are
7903 splitting insns. The temporary BASE_TMP might be set multiple times with
7907 rs6000_adjust_vec_address (rtx scalar_reg
,
7911 machine_mode scalar_mode
)
7913 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7914 rtx addr
= XEXP (mem
, 0);
7917 gcc_assert (!reg_mentioned_p (base_tmp
, addr
));
7918 gcc_assert (!reg_mentioned_p (base_tmp
, element
));
7920 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7921 gcc_assert (GET_RTX_CLASS (GET_CODE (addr
)) != RTX_AUTOINC
);
7923 /* Calculate what we need to add to the address to get the element
7925 rtx element_offset
= get_vector_offset (mem
, element
, base_tmp
, scalar_size
);
7927 /* Create the new address pointing to the element within the vector. If we
7928 are adding 0, we don't have to change the address. */
7929 if (element_offset
== const0_rtx
)
7932 /* A simple indirect address can be converted into a reg + offset
7934 else if (REG_P (addr
) || SUBREG_P (addr
))
7935 new_addr
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7937 /* For references to local static variables, fold a constant offset into the
7939 else if (pcrel_local_address (addr
, Pmode
) && CONST_INT_P (element_offset
))
7940 new_addr
= adjust_vec_address_pcrel (addr
, element_offset
, base_tmp
);
7942 /* Optimize D-FORM addresses with constant offset with a constant element, to
7943 include the element offset in the address directly. */
7944 else if (GET_CODE (addr
) == PLUS
)
7946 rtx op0
= XEXP (addr
, 0);
7947 rtx op1
= XEXP (addr
, 1);
7949 gcc_assert (REG_P (op0
) || SUBREG_P (op0
));
7950 if (CONST_INT_P (op1
) && CONST_INT_P (element_offset
))
7952 /* op0 should never be r0, because r0+offset is not valid. But it
7953 doesn't hurt to make sure it is not r0. */
7954 gcc_assert (reg_or_subregno (op0
) != 0);
7956 /* D-FORM address with constant element number. */
7957 HOST_WIDE_INT offset
= INTVAL (op1
) + INTVAL (element_offset
);
7958 rtx offset_rtx
= GEN_INT (offset
);
7959 new_addr
= gen_rtx_PLUS (Pmode
, op0
, offset_rtx
);
7963 /* If we don't have a D-FORM address with a constant element number,
7964 add the two elements in the current address. Then add the offset.
7966 Previously, we tried to add the offset to OP1 and change the
7967 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7968 complicated because we had to verify that op1 was not GPR0 and we
7969 had a constant element offset (due to the way ADDI is defined).
7970 By doing the add of OP0 and OP1 first, and then adding in the
7971 offset, it has the benefit that if D-FORM instructions are
7972 allowed, the offset is part of the memory access to the vector
7974 emit_insn (gen_rtx_SET (base_tmp
, gen_rtx_PLUS (Pmode
, op0
, op1
)));
7975 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7981 emit_move_insn (base_tmp
, addr
);
7982 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7985 /* If the address isn't valid, move the address into the temporary base
7986 register. Some reasons it could not be valid include:
7988 The address offset overflowed the 16 or 34 bit offset size;
7989 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7990 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7991 Only X_FORM loads can be done, and the address is D_FORM. */
7993 enum insn_form iform
7994 = address_to_insn_form (new_addr
, scalar_mode
,
7995 reg_to_non_prefixed (scalar_reg
, scalar_mode
));
7997 if (iform
== INSN_FORM_BAD
)
7999 emit_move_insn (base_tmp
, new_addr
);
8000 new_addr
= base_tmp
;
8003 return change_address (mem
, scalar_mode
, new_addr
);
8006 /* Split a variable vec_extract operation into the component instructions. */
8009 rs6000_split_vec_extract_var (rtx dest
, rtx src
, rtx element
, rtx tmp_gpr
,
8012 machine_mode mode
= GET_MODE (src
);
8013 machine_mode scalar_mode
= GET_MODE_INNER (GET_MODE (src
));
8014 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
8015 int byte_shift
= exact_log2 (scalar_size
);
8017 gcc_assert (byte_shift
>= 0);
8019 /* If we are given a memory address, optimize to load just the element. We
8020 don't have to adjust the vector element number on little endian
8024 emit_move_insn (dest
,
8025 rs6000_adjust_vec_address (dest
, src
, element
, tmp_gpr
,
8030 else if (REG_P (src
) || SUBREG_P (src
))
8032 int num_elements
= GET_MODE_NUNITS (mode
);
8033 int bits_in_element
= mode_to_bits (GET_MODE_INNER (mode
));
8034 int bit_shift
= 7 - exact_log2 (num_elements
);
8036 unsigned int dest_regno
= reg_or_subregno (dest
);
8037 unsigned int src_regno
= reg_or_subregno (src
);
8038 unsigned int element_regno
= reg_or_subregno (element
);
8040 gcc_assert (REG_P (tmp_gpr
));
8042 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
8043 a general purpose register. */
8044 if (TARGET_P9_VECTOR
8045 && (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
8046 && INT_REGNO_P (dest_regno
)
8047 && ALTIVEC_REGNO_P (src_regno
)
8048 && INT_REGNO_P (element_regno
))
8050 rtx dest_si
= gen_rtx_REG (SImode
, dest_regno
);
8051 rtx element_si
= gen_rtx_REG (SImode
, element_regno
);
8053 if (mode
== V16QImode
)
8054 emit_insn (BYTES_BIG_ENDIAN
8055 ? gen_vextublx (dest_si
, element_si
, src
)
8056 : gen_vextubrx (dest_si
, element_si
, src
));
8058 else if (mode
== V8HImode
)
8060 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
8061 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const1_rtx
));
8062 emit_insn (BYTES_BIG_ENDIAN
8063 ? gen_vextuhlx (dest_si
, tmp_gpr_si
, src
)
8064 : gen_vextuhrx (dest_si
, tmp_gpr_si
, src
));
8070 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
8071 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const2_rtx
));
8072 emit_insn (BYTES_BIG_ENDIAN
8073 ? gen_vextuwlx (dest_si
, tmp_gpr_si
, src
)
8074 : gen_vextuwrx (dest_si
, tmp_gpr_si
, src
));
8081 gcc_assert (REG_P (tmp_altivec
));
8083 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
8084 an XOR, otherwise we need to subtract. The shift amount is so VSLO
8085 will shift the element into the upper position (adding 3 to convert a
8086 byte shift into a bit shift). */
8087 if (scalar_size
== 8)
8089 if (!BYTES_BIG_ENDIAN
)
8091 emit_insn (gen_xordi3 (tmp_gpr
, element
, const1_rtx
));
8097 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
8099 emit_insn (gen_rtx_SET (tmp_gpr
,
8100 gen_rtx_AND (DImode
,
8101 gen_rtx_ASHIFT (DImode
,
8108 if (!BYTES_BIG_ENDIAN
)
8110 rtx num_ele_m1
= GEN_INT (num_elements
- 1);
8112 emit_insn (gen_anddi3 (tmp_gpr
, element
, num_ele_m1
));
8113 emit_insn (gen_subdi3 (tmp_gpr
, num_ele_m1
, tmp_gpr
));
8119 emit_insn (gen_ashldi3 (tmp_gpr
, element2
, GEN_INT (bit_shift
)));
8122 /* Get the value into the lower byte of the Altivec register where VSLO
8124 if (TARGET_P9_VECTOR
)
8125 emit_insn (gen_vsx_splat_v2di (tmp_altivec
, tmp_gpr
));
8126 else if (can_create_pseudo_p ())
8127 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_gpr
, tmp_gpr
));
8130 rtx tmp_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8131 emit_move_insn (tmp_di
, tmp_gpr
);
8132 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_di
, tmp_di
));
8135 /* Do the VSLO to get the value into the final location. */
8139 emit_insn (gen_vsx_vslo_v2df (dest
, src
, tmp_altivec
));
8143 emit_insn (gen_vsx_vslo_v2di (dest
, src
, tmp_altivec
));
8148 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8149 rtx tmp_altivec_v4sf
= gen_rtx_REG (V4SFmode
, REGNO (tmp_altivec
));
8150 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8151 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8154 emit_insn (gen_vsx_xscvspdp_scalar2 (dest
, tmp_altivec_v4sf
));
8162 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8163 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8164 rtx tmp_gpr_di
= gen_rtx_REG (DImode
, REGNO (dest
));
8165 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8167 emit_move_insn (tmp_gpr_di
, tmp_altivec_di
);
8168 emit_insn (gen_lshrdi3 (tmp_gpr_di
, tmp_gpr_di
,
8169 GEN_INT (64 - bits_in_element
)));
8183 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8184 selects whether the alignment is abi mandated, optional, or
8185 both abi and optional alignment. */
8188 rs6000_data_alignment (tree type
, unsigned int align
, enum data_align how
)
8190 if (how
!= align_opt
)
8192 if (TREE_CODE (type
) == VECTOR_TYPE
&& align
< 128)
8196 if (how
!= align_abi
)
8198 if (TREE_CODE (type
) == ARRAY_TYPE
8199 && TYPE_MODE (TREE_TYPE (type
)) == QImode
)
8201 if (align
< BITS_PER_WORD
)
8202 align
= BITS_PER_WORD
;
8209 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8210 instructions simply ignore the low bits; VSX memory instructions
8211 are aligned to 4 or 8 bytes. */
8214 rs6000_slow_unaligned_access (machine_mode mode
, unsigned int align
)
8216 return (STRICT_ALIGNMENT
8217 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8218 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && align
< 32)
8219 || ((VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
))
8220 && (int) align
< VECTOR_ALIGN (mode
)))));
8223 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
8226 rs6000_special_adjust_field_align (tree type
, unsigned int computed
)
8228 if (computed
<= 32 || TYPE_PACKED (type
))
8231 /* Strip initial arrays. */
8232 while (TREE_CODE (type
) == ARRAY_TYPE
)
8233 type
= TREE_TYPE (type
);
8235 /* If RECORD or UNION, recursively find the first field. */
8236 while (AGGREGATE_TYPE_P (type
))
8238 tree field
= TYPE_FIELDS (type
);
8240 /* Skip all non field decls */
8241 while (field
!= NULL
8242 && (TREE_CODE (field
) != FIELD_DECL
8243 || DECL_FIELD_ABI_IGNORED (field
)))
8244 field
= DECL_CHAIN (field
);
8249 /* A packed field does not contribute any extra alignment. */
8250 if (DECL_PACKED (field
))
8253 type
= TREE_TYPE (field
);
8256 while (TREE_CODE (type
) == ARRAY_TYPE
)
8257 type
= TREE_TYPE (type
);
8260 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8261 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8262 computed
= MIN (computed
, 32);
8267 /* AIX increases natural record alignment to doubleword if the innermost first
8268 field is an FP double while the FP fields remain word aligned.
8269 Only called if TYPE initially is a RECORD or UNION. */
8272 rs6000_special_round_type_align (tree type
, unsigned int computed
,
8273 unsigned int specified
)
8275 unsigned int align
= MAX (computed
, specified
);
8277 if (TYPE_PACKED (type
) || align
>= 64)
8280 /* If RECORD or UNION, recursively find the first field. */
8283 tree field
= TYPE_FIELDS (type
);
8285 /* Skip all non field decls */
8286 while (field
!= NULL
8287 && (TREE_CODE (field
) != FIELD_DECL
8288 || DECL_FIELD_ABI_IGNORED (field
)))
8289 field
= DECL_CHAIN (field
);
8294 /* A packed field does not contribute any extra alignment. */
8295 if (DECL_PACKED (field
))
8298 type
= TREE_TYPE (field
);
8301 while (TREE_CODE (type
) == ARRAY_TYPE
)
8302 type
= TREE_TYPE (type
);
8303 } while (AGGREGATE_TYPE_P (type
));
8305 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8306 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8307 align
= MAX (align
, 64);
8312 /* Darwin increases record alignment to the natural alignment of
8316 darwin_rs6000_special_round_type_align (tree type
, unsigned int computed
,
8317 unsigned int specified
)
8319 unsigned int align
= MAX (computed
, specified
);
8321 if (TYPE_PACKED (type
))
8324 /* Find the first field, looking down into aggregates. */
8326 tree field
= TYPE_FIELDS (type
);
8327 /* Skip all non field decls */
8328 while (field
!= NULL
8329 && (TREE_CODE (field
) != FIELD_DECL
8330 || DECL_FIELD_ABI_IGNORED (field
)))
8331 field
= DECL_CHAIN (field
);
8334 /* A packed field does not contribute any extra alignment. */
8335 if (DECL_PACKED (field
))
8337 type
= TREE_TYPE (field
);
8338 while (TREE_CODE (type
) == ARRAY_TYPE
)
8339 type
= TREE_TYPE (type
);
8340 } while (AGGREGATE_TYPE_P (type
));
8342 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
)
8343 align
= MAX (align
, TYPE_ALIGN (type
));
8348 /* Return 1 for an operand in small memory on V.4/eabi. */
8351 small_data_operand (rtx op ATTRIBUTE_UNUSED
,
8352 machine_mode mode ATTRIBUTE_UNUSED
)
8357 if (rs6000_sdata
== SDATA_NONE
|| rs6000_sdata
== SDATA_DATA
)
8360 if (DEFAULT_ABI
!= ABI_V4
)
8363 if (SYMBOL_REF_P (op
))
8366 else if (GET_CODE (op
) != CONST
8367 || GET_CODE (XEXP (op
, 0)) != PLUS
8368 || !SYMBOL_REF_P (XEXP (XEXP (op
, 0), 0))
8369 || !CONST_INT_P (XEXP (XEXP (op
, 0), 1)))
8374 rtx sum
= XEXP (op
, 0);
8375 HOST_WIDE_INT summand
;
8377 /* We have to be careful here, because it is the referenced address
8378 that must be 32k from _SDA_BASE_, not just the symbol. */
8379 summand
= INTVAL (XEXP (sum
, 1));
8380 if (summand
< 0 || summand
> g_switch_value
)
8383 sym_ref
= XEXP (sum
, 0);
8386 return SYMBOL_REF_SMALL_P (sym_ref
);
8392 /* Return true if either operand is a general purpose register. */
8395 gpr_or_gpr_p (rtx op0
, rtx op1
)
8397 return ((REG_P (op0
) && INT_REGNO_P (REGNO (op0
)))
8398 || (REG_P (op1
) && INT_REGNO_P (REGNO (op1
))));
8401 /* Return true if this is a move direct operation between GPR registers and
8402 floating point/VSX registers. */
8405 direct_move_p (rtx op0
, rtx op1
)
8407 if (!REG_P (op0
) || !REG_P (op1
))
8410 if (!TARGET_DIRECT_MOVE
)
8413 int regno0
= REGNO (op0
);
8414 int regno1
= REGNO (op1
);
8415 if (!HARD_REGISTER_NUM_P (regno0
) || !HARD_REGISTER_NUM_P (regno1
))
8418 if (INT_REGNO_P (regno0
) && VSX_REGNO_P (regno1
))
8421 if (VSX_REGNO_P (regno0
) && INT_REGNO_P (regno1
))
8427 /* Return true if the ADDR is an acceptable address for a quad memory
8428 operation of mode MODE (either LQ/STQ for general purpose registers, or
8429 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8430 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8431 3.0 LXV/STXV instruction. */
8434 quad_address_p (rtx addr
, machine_mode mode
, bool strict
)
8438 if (GET_MODE_SIZE (mode
) < 16)
8441 if (legitimate_indirect_address_p (addr
, strict
))
8444 if (VECTOR_MODE_P (mode
) && !mode_supports_dq_form (mode
))
8447 /* Is this a valid prefixed address? If the bottom four bits of the offset
8448 are non-zero, we could use a prefixed instruction (which does not have the
8449 DQ-form constraint that the traditional instruction had) instead of
8450 forcing the unaligned offset to a GPR. */
8451 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DQ
))
8454 if (GET_CODE (addr
) != PLUS
)
8457 op0
= XEXP (addr
, 0);
8458 if (!REG_P (op0
) || !INT_REG_OK_FOR_BASE_P (op0
, strict
))
8461 op1
= XEXP (addr
, 1);
8462 if (!CONST_INT_P (op1
))
8465 return quad_address_offset_p (INTVAL (op1
));
8468 /* Return true if this is a load or store quad operation. This function does
8469 not handle the atomic quad memory instructions. */
8472 quad_load_store_p (rtx op0
, rtx op1
)
8476 if (!TARGET_QUAD_MEMORY
)
8479 else if (REG_P (op0
) && MEM_P (op1
))
8480 ret
= (quad_int_reg_operand (op0
, GET_MODE (op0
))
8481 && quad_memory_operand (op1
, GET_MODE (op1
))
8482 && !reg_overlap_mentioned_p (op0
, op1
));
8484 else if (MEM_P (op0
) && REG_P (op1
))
8485 ret
= (quad_memory_operand (op0
, GET_MODE (op0
))
8486 && quad_int_reg_operand (op1
, GET_MODE (op1
)));
8491 if (TARGET_DEBUG_ADDR
)
8493 fprintf (stderr
, "\n========== quad_load_store, return %s\n",
8494 ret
? "true" : "false");
8495 debug_rtx (gen_rtx_SET (op0
, op1
));
8501 /* Given an address, return a constant offset term if one exists. */
8504 address_offset (rtx op
)
8506 if (GET_CODE (op
) == PRE_INC
8507 || GET_CODE (op
) == PRE_DEC
)
8509 else if (GET_CODE (op
) == PRE_MODIFY
8510 || GET_CODE (op
) == LO_SUM
)
8513 if (GET_CODE (op
) == CONST
)
8516 if (GET_CODE (op
) == PLUS
)
8519 if (CONST_INT_P (op
))
8525 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8526 the mode. If we can't find (or don't know) the alignment of the symbol
8527 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8528 should be pessimistic]. Offsets are validated in the same way as for
8531 darwin_rs6000_legitimate_lo_sum_const_p (rtx x
, machine_mode mode
)
8533 /* We should not get here with this. */
8534 gcc_checking_assert (! mode_supports_dq_form (mode
));
8536 if (GET_CODE (x
) == CONST
)
8539 /* If we are building PIC code, then any symbol must be wrapped in an
8540 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8541 bool machopic_offs_p
= false;
8542 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
8544 x
= XVECEXP (x
, 0, 0);
8545 machopic_offs_p
= true;
8549 unsigned HOST_WIDE_INT offset
= 0;
8551 if (GET_CODE (x
) == PLUS
)
8554 if (! SYMBOL_REF_P (sym
))
8556 if (!CONST_INT_P (XEXP (x
, 1)))
8558 offset
= INTVAL (XEXP (x
, 1));
8560 else if (SYMBOL_REF_P (x
))
8562 else if (CONST_INT_P (x
))
8563 offset
= INTVAL (x
);
8564 else if (GET_CODE (x
) == LABEL_REF
)
8565 offset
= 0; // We assume code labels are Pmode aligned
8567 return false; // not sure what we have here.
8569 /* If we don't know the alignment of the thing to which the symbol refers,
8570 we assume optimistically it is "enough".
8571 ??? maybe we should be pessimistic instead. */
8576 tree decl
= SYMBOL_REF_DECL (sym
);
8577 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8578 if (TARGET_MACHO
&& flag_pic
&& !machopic_offs_p
)
8581 if (MACHO_SYMBOL_INDIRECTION_P (sym
))
8582 /* The decl in an indirection symbol is the original one, which might
8583 be less aligned than the indirection. Our indirections are always
8588 if (decl
&& DECL_ALIGN (decl
))
8589 align
= DECL_ALIGN_UNIT (decl
);
8592 unsigned int extra
= 0;
8598 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8600 if (VECTOR_MEM_VSX_P (mode
))
8603 if (!TARGET_POWERPC64
)
8605 else if ((offset
& 3) || (align
& 3))
8616 if (!TARGET_POWERPC64
)
8618 else if ((offset
& 3) || (align
& 3))
8626 /* We only care if the access(es) would cause a change to the high part. */
8627 offset
= sext_hwi (offset
, 16);
8628 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8631 /* Return true if the MEM operand is a memory operand suitable for use
8632 with a (full width, possibly multiple) gpr load/store. On
8633 powerpc64 this means the offset must be divisible by 4.
8634 Implements 'Y' constraint.
8636 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8637 a constraint function we know the operand has satisfied a suitable
8640 Offsetting a lo_sum should not be allowed, except where we know by
8641 alignment that a 32k boundary is not crossed. Note that by
8642 "offsetting" here we mean a further offset to access parts of the
8643 MEM. It's fine to have a lo_sum where the inner address is offset
8644 from a sym, since the same sym+offset will appear in the high part
8645 of the address calculation. */
8648 mem_operand_gpr (rtx op
, machine_mode mode
)
8650 unsigned HOST_WIDE_INT offset
;
8652 rtx addr
= XEXP (op
, 0);
8654 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8656 && (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
8657 && mode_supports_pre_incdec_p (mode
)
8658 && legitimate_indirect_address_p (XEXP (addr
, 0), false))
8661 /* Allow prefixed instructions if supported. If the bottom two bits of the
8662 offset are non-zero, we could use a prefixed instruction (which does not
8663 have the DS-form constraint that the traditional instruction had) instead
8664 of forcing the unaligned offset to a GPR. */
8665 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8668 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8669 really OK. Doing this early avoids teaching all the other machinery
8671 if (TARGET_MACHO
&& GET_CODE (addr
) == LO_SUM
)
8672 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr
, 1), mode
);
8674 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8675 if (!rs6000_offsettable_memref_p (op
, mode
, false))
8678 op
= address_offset (addr
);
8682 offset
= INTVAL (op
);
8683 if (TARGET_POWERPC64
&& (offset
& 3) != 0)
8686 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8690 if (GET_CODE (addr
) == LO_SUM
)
8691 /* For lo_sum addresses, we must allow any offset except one that
8692 causes a wrap, so test only the low 16 bits. */
8693 offset
= sext_hwi (offset
, 16);
8695 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8698 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8699 enforce an offset divisible by 4 even for 32-bit. */
8702 mem_operand_ds_form (rtx op
, machine_mode mode
)
8704 unsigned HOST_WIDE_INT offset
;
8706 rtx addr
= XEXP (op
, 0);
8708 /* Allow prefixed instructions if supported. If the bottom two bits of the
8709 offset are non-zero, we could use a prefixed instruction (which does not
8710 have the DS-form constraint that the traditional instruction had) instead
8711 of forcing the unaligned offset to a GPR. */
8712 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8715 if (!offsettable_address_p (false, mode
, addr
))
8718 op
= address_offset (addr
);
8722 offset
= INTVAL (op
);
8723 if ((offset
& 3) != 0)
8726 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8730 if (GET_CODE (addr
) == LO_SUM
)
8731 /* For lo_sum addresses, we must allow any offset except one that
8732 causes a wrap, so test only the low 16 bits. */
8733 offset
= sext_hwi (offset
, 16);
8735 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8738 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8741 reg_offset_addressing_ok_p (machine_mode mode
)
8755 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8756 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8757 a vector mode, if we want to use the VSX registers to move it around,
8758 we need to restrict ourselves to reg+reg addressing. Similarly for
8759 IEEE 128-bit floating point that is passed in a single vector
8761 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
))
8762 return mode_supports_dq_form (mode
);
8765 /* The vector pair/quad types and the dense math types support offset
8766 addressing if the underlying vectors support offset addressing. */
8772 return TARGET_DENSE_MATH
;
8775 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8776 addressing for the LFIWZX and STFIWX instructions. */
8777 if (TARGET_NO_SDMODE_STACK
)
8789 virtual_stack_registers_memory_p (rtx op
)
8794 regnum
= REGNO (op
);
8796 else if (GET_CODE (op
) == PLUS
8797 && REG_P (XEXP (op
, 0))
8798 && CONST_INT_P (XEXP (op
, 1)))
8799 regnum
= REGNO (XEXP (op
, 0));
8804 return (regnum
>= FIRST_VIRTUAL_REGISTER
8805 && regnum
<= LAST_VIRTUAL_POINTER_REGISTER
);
8808 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8809 is known to not straddle a 32k boundary. This function is used
8810 to determine whether -mcmodel=medium code can use TOC pointer
8811 relative addressing for OP. This means the alignment of the TOC
8812 pointer must also be taken into account, and unfortunately that is
8815 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8816 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8820 offsettable_ok_by_alignment (rtx op
, HOST_WIDE_INT offset
,
8824 unsigned HOST_WIDE_INT dsize
, dalign
, lsb
, mask
;
8826 if (!SYMBOL_REF_P (op
))
8829 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8831 if (mode_supports_dq_form (mode
))
8834 dsize
= GET_MODE_SIZE (mode
);
8835 decl
= SYMBOL_REF_DECL (op
);
8841 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8842 replacing memory addresses with an anchor plus offset. We
8843 could find the decl by rummaging around in the block->objects
8844 VEC for the given offset but that seems like too much work. */
8845 dalign
= BITS_PER_UNIT
;
8846 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op
)
8847 && SYMBOL_REF_ANCHOR_P (op
)
8848 && SYMBOL_REF_BLOCK (op
) != NULL
)
8850 struct object_block
*block
= SYMBOL_REF_BLOCK (op
);
8852 dalign
= block
->alignment
;
8853 offset
+= SYMBOL_REF_BLOCK_OFFSET (op
);
8855 else if (CONSTANT_POOL_ADDRESS_P (op
))
8857 /* It would be nice to have get_pool_align().. */
8858 machine_mode cmode
= get_pool_mode (op
);
8860 dalign
= GET_MODE_ALIGNMENT (cmode
);
8863 else if (DECL_P (decl
))
8865 dalign
= DECL_ALIGN (decl
);
8869 /* Allow BLKmode when the entire object is known to not
8870 cross a 32k boundary. */
8871 if (!DECL_SIZE_UNIT (decl
))
8874 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl
)))
8877 dsize
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
8881 dalign
/= BITS_PER_UNIT
;
8882 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8883 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8884 return dalign
>= dsize
;
8890 /* Find how many bits of the alignment we know for this access. */
8891 dalign
/= BITS_PER_UNIT
;
8892 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8893 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8895 lsb
= offset
& -offset
;
8899 return dalign
>= dsize
;
8903 constant_pool_expr_p (rtx op
)
8907 split_const (op
, &base
, &offset
);
8908 return (SYMBOL_REF_P (base
)
8909 && CONSTANT_POOL_ADDRESS_P (base
)
8910 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base
), Pmode
));
8913 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8914 use that as the register to put the HIGH value into if register allocation
8918 create_TOC_reference (rtx symbol
, rtx largetoc_reg
)
8920 rtx tocrel
, tocreg
, hi
;
8922 gcc_assert (TARGET_TOC
);
8924 if (TARGET_DEBUG_ADDR
)
8926 if (SYMBOL_REF_P (symbol
))
8927 fprintf (stderr
, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8931 fprintf (stderr
, "\ncreate_TOC_reference, code %s:\n",
8932 GET_RTX_NAME (GET_CODE (symbol
)));
8937 if (!can_create_pseudo_p ())
8938 df_set_regs_ever_live (TOC_REGISTER
, true);
8940 tocreg
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
8941 tocrel
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, symbol
, tocreg
), UNSPEC_TOCREL
);
8942 if (TARGET_CMODEL
== CMODEL_SMALL
|| can_create_pseudo_p ())
8945 hi
= gen_rtx_HIGH (Pmode
, copy_rtx (tocrel
));
8946 if (largetoc_reg
!= NULL
)
8948 emit_move_insn (largetoc_reg
, hi
);
8951 return gen_rtx_LO_SUM (Pmode
, hi
, tocrel
);
8954 /* These are only used to pass through from print_operand/print_operand_address
8955 to rs6000_output_addr_const_extra over the intervening function
8956 output_addr_const which is not target code. */
8957 static const_rtx tocrel_base_oac
, tocrel_offset_oac
;
8959 /* Return true if OP is a toc pointer relative address (the output
8960 of create_TOC_reference). If STRICT, do not match non-split
8961 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8962 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8963 TOCREL_OFFSET_RET respectively. */
8966 toc_relative_expr_p (const_rtx op
, bool strict
, const_rtx
*tocrel_base_ret
,
8967 const_rtx
*tocrel_offset_ret
)
8972 if (TARGET_CMODEL
!= CMODEL_SMALL
)
8974 /* When strict ensure we have everything tidy. */
8976 && !(GET_CODE (op
) == LO_SUM
8977 && REG_P (XEXP (op
, 0))
8978 && INT_REG_OK_FOR_BASE_P (XEXP (op
, 0), strict
)))
8981 /* When not strict, allow non-split TOC addresses and also allow
8982 (lo_sum (high ..)) TOC addresses created during reload. */
8983 if (GET_CODE (op
) == LO_SUM
)
8987 const_rtx tocrel_base
= op
;
8988 const_rtx tocrel_offset
= const0_rtx
;
8990 if (GET_CODE (op
) == PLUS
&& add_cint_operand (XEXP (op
, 1), GET_MODE (op
)))
8992 tocrel_base
= XEXP (op
, 0);
8993 tocrel_offset
= XEXP (op
, 1);
8996 if (tocrel_base_ret
)
8997 *tocrel_base_ret
= tocrel_base
;
8998 if (tocrel_offset_ret
)
8999 *tocrel_offset_ret
= tocrel_offset
;
9001 return (GET_CODE (tocrel_base
) == UNSPEC
9002 && XINT (tocrel_base
, 1) == UNSPEC_TOCREL
9003 && REG_P (XVECEXP (tocrel_base
, 0, 1))
9004 && REGNO (XVECEXP (tocrel_base
, 0, 1)) == TOC_REGISTER
);
9007 /* Return true if X is a constant pool address, and also for cmodel=medium
9008 if X is a toc-relative address known to be offsettable within MODE. */
9011 legitimate_constant_pool_address_p (const_rtx x
, machine_mode mode
,
9014 const_rtx tocrel_base
, tocrel_offset
;
9015 return (toc_relative_expr_p (x
, strict
, &tocrel_base
, &tocrel_offset
)
9016 && (TARGET_CMODEL
!= CMODEL_MEDIUM
9017 || constant_pool_expr_p (XVECEXP (tocrel_base
, 0, 0))
9019 || offsettable_ok_by_alignment (XVECEXP (tocrel_base
, 0, 0),
9020 INTVAL (tocrel_offset
), mode
)));
9024 legitimate_small_data_p (machine_mode mode
, rtx x
)
9026 return (DEFAULT_ABI
== ABI_V4
9027 && !flag_pic
&& !TARGET_TOC
9028 && (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
)
9029 && small_data_operand (x
, mode
));
9033 rs6000_legitimate_offset_address_p (machine_mode mode
, rtx x
,
9034 bool strict
, bool worst_case
)
9036 unsigned HOST_WIDE_INT offset
;
9039 if (GET_CODE (x
) != PLUS
)
9041 if (!REG_P (XEXP (x
, 0)))
9043 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
9045 if (mode_supports_dq_form (mode
))
9046 return quad_address_p (x
, mode
, strict
);
9047 if (!reg_offset_addressing_ok_p (mode
))
9048 return virtual_stack_registers_memory_p (x
);
9049 if (legitimate_constant_pool_address_p (x
, mode
, strict
|| lra_in_progress
))
9051 if (!CONST_INT_P (XEXP (x
, 1)))
9054 offset
= INTVAL (XEXP (x
, 1));
9061 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
9063 if (VECTOR_MEM_VSX_P (mode
))
9068 if (!TARGET_POWERPC64
)
9070 else if (offset
& 3)
9083 if (!TARGET_POWERPC64
)
9085 else if (offset
& 3)
9093 if (TARGET_PREFIXED
)
9094 return SIGNED_34BIT_OFFSET_EXTRA_P (offset
, extra
);
9096 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
9100 legitimate_indexed_address_p (rtx x
, int strict
)
9104 if (GET_CODE (x
) != PLUS
)
9110 return (REG_P (op0
) && REG_P (op1
)
9111 && ((INT_REG_OK_FOR_BASE_P (op0
, strict
)
9112 && INT_REG_OK_FOR_INDEX_P (op1
, strict
))
9113 || (INT_REG_OK_FOR_BASE_P (op1
, strict
)
9114 && INT_REG_OK_FOR_INDEX_P (op0
, strict
))));
9118 avoiding_indexed_address_p (machine_mode mode
)
9120 unsigned int msize
= GET_MODE_SIZE (mode
);
9122 /* Avoid indexed addressing for modes that have non-indexed load/store
9123 instruction forms. On power10, vector pairs have an indexed
9124 form, but vector quads don't. */
9128 return (TARGET_AVOID_XFORM
&& VECTOR_MEM_NONE_P (mode
));
9132 legitimate_indirect_address_p (rtx x
, int strict
)
9134 return REG_P (x
) && INT_REG_OK_FOR_BASE_P (x
, strict
);
9138 macho_lo_sum_memory_operand (rtx x
, machine_mode mode
)
9140 if (!TARGET_MACHO
|| !flag_pic
9141 || mode
!= SImode
|| !MEM_P (x
))
9145 if (GET_CODE (x
) != LO_SUM
)
9147 if (!REG_P (XEXP (x
, 0)))
9149 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 0))
9153 return CONSTANT_P (x
);
9157 legitimate_lo_sum_address_p (machine_mode mode
, rtx x
, int strict
)
9159 if (GET_CODE (x
) != LO_SUM
)
9161 if (!REG_P (XEXP (x
, 0)))
9163 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
9165 /* quad word addresses are restricted, and we can't use LO_SUM. */
9166 if (mode_supports_dq_form (mode
))
9174 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
)
9176 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9177 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9178 recognizes some LO_SUM addresses as valid although this
9179 function says opposite. In most cases, LRA through different
9180 transformations can generate correct code for address reloads.
9181 It cannot manage only some LO_SUM cases. So we need to add
9182 code here saying that some addresses are still valid. */
9183 large_toc_ok
= (lra_in_progress
&& TARGET_CMODEL
!= CMODEL_SMALL
9184 && small_toc_ref (x
, VOIDmode
));
9185 if (TARGET_TOC
&& ! large_toc_ok
)
9187 if (GET_MODE_NUNITS (mode
) != 1)
9189 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
9190 && !(/* ??? Assume floating point reg based on mode? */
9191 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9194 return CONSTANT_P (x
) || large_toc_ok
;
9196 else if (TARGET_MACHO
)
9198 if (GET_MODE_NUNITS (mode
) != 1)
9200 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
9201 && !(/* see above */
9202 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9205 if (MACHO_DYNAMIC_NO_PIC_P
|| !flag_pic
)
9206 return CONSTANT_P (x
);
9208 /* Macho-O PIC code from here. */
9209 if (GET_CODE (x
) == CONST
)
9212 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
9213 if (SYMBOL_REF_P (x
))
9216 /* So this is OK if the wrapped object is const. */
9217 if (GET_CODE (x
) == UNSPEC
9218 && XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9219 return CONSTANT_P (XVECEXP (x
, 0, 0));
9220 return CONSTANT_P (x
);
9226 /* Try machine-dependent ways of modifying an illegitimate address
9227 to be legitimate. If we find one, return the new, valid address.
9228 This is used from only one place: `memory_address' in explow.cc.
9230 OLDX is the address as it was before break_out_memory_refs was
9231 called. In some cases it is useful to look at this to decide what
9234 It is always safe for this function to do nothing. It exists to
9235 recognize opportunities to optimize the output.
9237 On RS/6000, first check for the sum of a register with a constant
9238 integer that is out of range. If so, generate code to add the
9239 constant with the low-order 16 bits masked to the register and force
9240 this result into another register (this can be done with `cau').
9241 Then generate an address of REG+(CONST&0xffff), allowing for the
9242 possibility of bit 16 being a one.
9244 Then check for the sum of a register and something not constant, try to
9245 load the other things into a register and return the sum. */
9248 rs6000_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
9253 if (!reg_offset_addressing_ok_p (mode
)
9254 || mode_supports_dq_form (mode
))
9256 if (virtual_stack_registers_memory_p (x
))
9259 /* In theory we should not be seeing addresses of the form reg+0,
9260 but just in case it is generated, optimize it away. */
9261 if (GET_CODE (x
) == PLUS
&& XEXP (x
, 1) == const0_rtx
)
9262 return force_reg (Pmode
, XEXP (x
, 0));
9264 /* For TImode with load/store quad, restrict addresses to just a single
9265 pointer, so it works with both GPRs and VSX registers. */
9266 /* Make sure both operands are registers. */
9267 else if (GET_CODE (x
) == PLUS
9268 && (mode
!= TImode
|| !TARGET_VSX
))
9269 return gen_rtx_PLUS (Pmode
,
9270 force_reg (Pmode
, XEXP (x
, 0)),
9271 force_reg (Pmode
, XEXP (x
, 1)));
9273 return force_reg (Pmode
, x
);
9275 if (SYMBOL_REF_P (x
) && !TARGET_MACHO
)
9277 enum tls_model model
= SYMBOL_REF_TLS_MODEL (x
);
9279 return rs6000_legitimize_tls_address (x
, model
);
9291 /* As in legitimate_offset_address_p we do not assume
9292 worst-case. The mode here is just a hint as to the registers
9293 used. A TImode is usually in gprs, but may actually be in
9294 fprs. Leave worst-case scenario for reload to handle via
9295 insn constraints. PTImode is only GPRs. */
9302 if (GET_CODE (x
) == PLUS
9303 && REG_P (XEXP (x
, 0))
9304 && CONST_INT_P (XEXP (x
, 1))
9305 && ((unsigned HOST_WIDE_INT
) (INTVAL (XEXP (x
, 1)) + 0x8000)
9306 >= 0x10000 - extra
))
9308 HOST_WIDE_INT high_int
, low_int
;
9310 low_int
= sext_hwi (INTVAL (XEXP (x
, 1)), 16);
9311 if (low_int
>= 0x8000 - extra
)
9313 high_int
= INTVAL (XEXP (x
, 1)) - low_int
;
9314 sum
= force_operand (gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9315 gen_int_mode (high_int
, Pmode
)), 0);
9316 return plus_constant (Pmode
, sum
, low_int
);
9318 else if (GET_CODE (x
) == PLUS
9319 && REG_P (XEXP (x
, 0))
9320 && !CONST_INT_P (XEXP (x
, 1))
9321 && GET_MODE_NUNITS (mode
) == 1
9322 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9323 || (/* ??? Assume floating point reg based on mode? */
9324 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9325 && !avoiding_indexed_address_p (mode
))
9327 return gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9328 force_reg (Pmode
, force_operand (XEXP (x
, 1), 0)));
9330 else if ((TARGET_ELF
9332 || !MACHO_DYNAMIC_NO_PIC_P
9336 && TARGET_NO_TOC_OR_PCREL
9339 && !CONST_WIDE_INT_P (x
)
9340 && !CONST_DOUBLE_P (x
)
9342 && GET_MODE_NUNITS (mode
) == 1
9343 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9344 || (/* ??? Assume floating point reg based on mode? */
9345 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
))))
9347 rtx reg
= gen_reg_rtx (Pmode
);
9349 emit_insn (gen_elf_high (reg
, x
));
9351 emit_insn (gen_macho_high (Pmode
, reg
, x
));
9352 return gen_rtx_LO_SUM (Pmode
, reg
, x
);
9356 && constant_pool_expr_p (x
)
9357 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x
), Pmode
))
9358 return create_TOC_reference (x
, NULL_RTX
);
9363 /* Debug version of rs6000_legitimize_address. */
9365 rs6000_debug_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
)
9371 ret
= rs6000_legitimize_address (x
, oldx
, mode
);
9372 insns
= get_insns ();
9378 "\nrs6000_legitimize_address: mode %s, old code %s, "
9379 "new code %s, modified\n",
9380 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)),
9381 GET_RTX_NAME (GET_CODE (ret
)));
9383 fprintf (stderr
, "Original address:\n");
9386 fprintf (stderr
, "oldx:\n");
9389 fprintf (stderr
, "New address:\n");
9394 fprintf (stderr
, "Insns added:\n");
9395 debug_rtx_list (insns
, 20);
9401 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9402 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)));
9413 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9414 We need to emit DTP-relative relocations. */
9416 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
9418 rs6000_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
9423 fputs ("\t.long\t", file
);
9426 fputs (DOUBLE_INT_ASM_OP
, file
);
9431 output_addr_const (file
, x
);
9433 fputs ("@dtprel+0x8000", file
);
9436 /* Return true if X is a symbol that refers to real (rather than emulated)
9440 rs6000_real_tls_symbol_ref_p (rtx x
)
9442 return (SYMBOL_REF_P (x
)
9443 && SYMBOL_REF_TLS_MODEL (x
) >= TLS_MODEL_REAL
);
9446 /* In the name of slightly smaller debug output, and to cater to
9447 general assembler lossage, recognize various UNSPEC sequences
9448 and turn them back into a direct symbol reference. */
9451 rs6000_delegitimize_address (rtx orig_x
)
9455 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9456 encodes loading up the high part of the address of a TOC reference along
9457 with a load of a GPR using the same base register used for the load. We
9458 return the original SYMBOL_REF.
9460 (set (reg:INT1 <reg>
9461 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9463 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9464 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9465 We return the original SYMBOL_REF.
9467 (parallel [(set (reg:DI <base-reg>)
9468 (unspec:DI [(symbol_ref <symbol>)
9469 (const_int <marker>)]
9470 UNSPEC_PCREL_OPT_LD_ADDR))
9471 (set (reg:DI <load-reg>)
9472 (unspec:DI [(const_int 0)]
9473 UNSPEC_PCREL_OPT_LD_DATA))])
9475 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9476 GPR being loaded is the same as the GPR used to hold the external address.
9478 (set (reg:DI <base-reg>)
9479 (unspec:DI [(symbol_ref <symbol>)
9480 (const_int <marker>)]
9481 UNSPEC_PCREL_OPT_LD_SAME_REG))
9483 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9484 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9485 We return the original SYMBOL_REF.
9487 (parallel [(set (reg:DI <base-reg>)
9488 (unspec:DI [(symbol_ref <symbol>)
9489 (const_int <marker>)]
9490 UNSPEC_PCREL_OPT_ST_ADDR))
9491 (use (reg <store-reg>))]) */
9493 if (GET_CODE (orig_x
) == UNSPEC
)
9494 switch (XINT (orig_x
, 1))
9496 case UNSPEC_FUSION_GPR
:
9497 case UNSPEC_PCREL_OPT_LD_ADDR
:
9498 case UNSPEC_PCREL_OPT_LD_SAME_REG
:
9499 case UNSPEC_PCREL_OPT_ST_ADDR
:
9500 orig_x
= XVECEXP (orig_x
, 0, 0);
9507 orig_x
= delegitimize_mem_from_attrs (orig_x
);
9514 if (TARGET_CMODEL
!= CMODEL_SMALL
&& GET_CODE (y
) == LO_SUM
)
9518 if (GET_CODE (y
) == PLUS
9519 && GET_MODE (y
) == Pmode
9520 && CONST_INT_P (XEXP (y
, 1)))
9522 offset
= XEXP (y
, 1);
9526 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_TOCREL
)
9528 y
= XVECEXP (y
, 0, 0);
9531 /* Do not associate thread-local symbols with the original
9532 constant pool symbol. */
9535 && CONSTANT_POOL_ADDRESS_P (y
)
9536 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y
)))
9540 if (offset
!= NULL_RTX
)
9541 y
= gen_rtx_PLUS (Pmode
, y
, offset
);
9542 if (!MEM_P (orig_x
))
9545 return replace_equiv_address_nv (orig_x
, y
);
9549 && GET_CODE (orig_x
) == LO_SUM
9550 && GET_CODE (XEXP (orig_x
, 1)) == CONST
)
9552 y
= XEXP (XEXP (orig_x
, 1), 0);
9553 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9554 return XVECEXP (y
, 0, 0);
9560 /* Return true if X shouldn't be emitted into the debug info.
9561 The linker doesn't like .toc section references from
9562 .debug_* sections, so reject .toc section symbols. */
9565 rs6000_const_not_ok_for_debug_p (rtx x
)
9567 if (GET_CODE (x
) == UNSPEC
)
9569 if (SYMBOL_REF_P (x
)
9570 && CONSTANT_POOL_ADDRESS_P (x
))
9572 rtx c
= get_pool_constant (x
);
9573 machine_mode cmode
= get_pool_mode (x
);
9574 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c
, cmode
))
9581 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9584 rs6000_legitimate_combined_insn (rtx_insn
*insn
)
9586 int icode
= INSN_CODE (insn
);
9588 /* Reject creating doloop insns. Combine should not be allowed
9589 to create these for a number of reasons:
9590 1) In a nested loop, if combine creates one of these in an
9591 outer loop and the register allocator happens to allocate ctr
9592 to the outer loop insn, then the inner loop can't use ctr.
9593 Inner loops ought to be more highly optimized.
9594 2) Combine often wants to create one of these from what was
9595 originally a three insn sequence, first combining the three
9596 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9597 allocated ctr, the splitter takes use back to the three insn
9598 sequence. It's better to stop combine at the two insn
9600 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9601 insns, the register allocator sometimes uses floating point
9602 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9603 jump insn and output reloads are not implemented for jumps,
9604 the ctrsi/ctrdi splitters need to handle all possible cases.
9605 That's a pain, and it gets to be seriously difficult when a
9606 splitter that runs after reload needs memory to transfer from
9607 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9608 for the difficult case. It's better to not create problems
9609 in the first place. */
9610 if (icode
!= CODE_FOR_nothing
9611 && (icode
== CODE_FOR_bdz_si
9612 || icode
== CODE_FOR_bdz_di
9613 || icode
== CODE_FOR_bdnz_si
9614 || icode
== CODE_FOR_bdnz_di
9615 || icode
== CODE_FOR_bdztf_si
9616 || icode
== CODE_FOR_bdztf_di
9617 || icode
== CODE_FOR_bdnztf_si
9618 || icode
== CODE_FOR_bdnztf_di
))
9624 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9626 static GTY(()) rtx rs6000_tls_symbol
;
9628 rs6000_tls_get_addr (void)
9630 if (!rs6000_tls_symbol
)
9631 rs6000_tls_symbol
= init_one_libfunc ("__tls_get_addr");
9633 return rs6000_tls_symbol
;
9636 /* Construct the SYMBOL_REF for TLS GOT references. */
9638 static GTY(()) rtx rs6000_got_symbol
;
9640 rs6000_got_sym (void)
9642 if (!rs6000_got_symbol
)
9644 rs6000_got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
9645 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_LOCAL
;
9646 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_EXTERNAL
;
9649 return rs6000_got_symbol
;
9652 /* AIX Thread-Local Address support. */
9655 rs6000_legitimize_tls_address_aix (rtx addr
, enum tls_model model
)
9657 rtx sym
, mem
, tocref
, tlsreg
, tmpreg
, dest
;
9661 /* Place addr into TOC constant pool. */
9662 sym
= force_const_mem (GET_MODE (addr
), addr
);
9664 /* Output the TOC entry and create the MEM referencing the value. */
9665 if (constant_pool_expr_p (XEXP (sym
, 0))
9666 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym
, 0)), Pmode
))
9668 tocref
= create_TOC_reference (XEXP (sym
, 0), NULL_RTX
);
9669 mem
= gen_const_mem (Pmode
, tocref
);
9670 set_mem_alias_set (mem
, get_TOC_alias_set ());
9675 /* Use global-dynamic for local-dynamic. */
9676 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
9677 || model
== TLS_MODEL_LOCAL_DYNAMIC
)
9679 /* Create new TOC reference for @m symbol. */
9680 name
= XSTR (XVECEXP (XEXP (mem
, 0), 0, 0), 0);
9681 tlsname
= XALLOCAVEC (char, strlen (name
) + 1);
9682 strcpy (tlsname
, "*LCM");
9683 strcat (tlsname
, name
+ 3);
9684 rtx modaddr
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (tlsname
));
9685 SYMBOL_REF_FLAGS (modaddr
) |= SYMBOL_FLAG_LOCAL
;
9686 tocref
= create_TOC_reference (modaddr
, NULL_RTX
);
9687 rtx modmem
= gen_const_mem (Pmode
, tocref
);
9688 set_mem_alias_set (modmem
, get_TOC_alias_set ());
9690 rtx modreg
= gen_reg_rtx (Pmode
);
9691 emit_insn (gen_rtx_SET (modreg
, modmem
));
9693 tmpreg
= gen_reg_rtx (Pmode
);
9694 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9696 dest
= gen_reg_rtx (Pmode
);
9698 emit_insn (gen_tls_get_addrsi (dest
, modreg
, tmpreg
));
9700 emit_insn (gen_tls_get_addrdi (dest
, modreg
, tmpreg
));
9703 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9704 else if (TARGET_32BIT
)
9706 tlsreg
= gen_reg_rtx (SImode
);
9707 emit_insn (gen_tls_get_tpointer (tlsreg
));
9711 tlsreg
= gen_rtx_REG (DImode
, 13);
9712 xcoff_tls_exec_model_detected
= true;
9715 /* Load the TOC value into temporary register. */
9716 tmpreg
= gen_reg_rtx (Pmode
);
9717 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9718 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
9719 gen_rtx_MINUS (Pmode
, addr
, tlsreg
));
9721 /* Add TOC symbol value to TLS pointer. */
9722 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tmpreg
, tlsreg
));
9727 /* Passes the tls arg value for global dynamic and local dynamic
9728 emit_library_call_value in rs6000_legitimize_tls_address to
9729 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9730 marker relocs put on __tls_get_addr calls. */
9731 static rtx global_tlsarg
;
9733 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9734 this (thread-local) address. */
9737 rs6000_legitimize_tls_address (rtx addr
, enum tls_model model
)
9742 return rs6000_legitimize_tls_address_aix (addr
, model
);
9744 dest
= gen_reg_rtx (Pmode
);
9745 if (model
== TLS_MODEL_LOCAL_EXEC
9746 && (rs6000_tls_size
== 16 || rs6000_pcrel_p ()))
9752 tlsreg
= gen_rtx_REG (Pmode
, 13);
9753 insn
= gen_tls_tprel_64 (dest
, tlsreg
, addr
);
9757 tlsreg
= gen_rtx_REG (Pmode
, 2);
9758 insn
= gen_tls_tprel_32 (dest
, tlsreg
, addr
);
9762 else if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 32)
9766 tmp
= gen_reg_rtx (Pmode
);
9769 tlsreg
= gen_rtx_REG (Pmode
, 13);
9770 insn
= gen_tls_tprel_ha_64 (tmp
, tlsreg
, addr
);
9774 tlsreg
= gen_rtx_REG (Pmode
, 2);
9775 insn
= gen_tls_tprel_ha_32 (tmp
, tlsreg
, addr
);
9779 insn
= gen_tls_tprel_lo_64 (dest
, tmp
, addr
);
9781 insn
= gen_tls_tprel_lo_32 (dest
, tmp
, addr
);
9786 rtx got
, tga
, tmp1
, tmp2
;
9788 /* We currently use relocations like @got@tlsgd for tls, which
9789 means the linker will handle allocation of tls entries, placing
9790 them in the .got section. So use a pointer to the .got section,
9791 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9792 or to secondary GOT sections used by 32-bit -fPIC. */
9793 if (rs6000_pcrel_p ())
9795 else if (TARGET_64BIT
)
9796 got
= gen_rtx_REG (Pmode
, 2);
9800 got
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
9803 rtx gsym
= rs6000_got_sym ();
9804 got
= gen_reg_rtx (Pmode
);
9806 rs6000_emit_move (got
, gsym
, Pmode
);
9811 tmp1
= gen_reg_rtx (Pmode
);
9812 tmp2
= gen_reg_rtx (Pmode
);
9813 mem
= gen_const_mem (Pmode
, tmp1
);
9814 lab
= gen_label_rtx ();
9815 emit_insn (gen_load_toc_v4_PIC_1b (gsym
, lab
));
9816 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
9817 if (TARGET_LINK_STACK
)
9818 emit_insn (gen_addsi3 (tmp1
, tmp1
, GEN_INT (4)));
9819 emit_move_insn (tmp2
, mem
);
9820 rtx_insn
*last
= emit_insn (gen_addsi3 (got
, tmp1
, tmp2
));
9821 set_unique_reg_note (last
, REG_EQUAL
, gsym
);
9826 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
)
9828 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addr
, got
),
9830 tga
= rs6000_tls_get_addr ();
9831 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9832 emit_insn (gen_rtx_SET (argreg
, arg
));
9833 global_tlsarg
= arg
;
9834 emit_library_call_value (tga
, dest
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9835 global_tlsarg
= NULL_RTX
;
9837 /* Make a note so that the result of this call can be CSEd. */
9838 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9839 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9840 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9842 else if (model
== TLS_MODEL_LOCAL_DYNAMIC
)
9844 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, got
), UNSPEC_TLSLD
);
9845 tga
= rs6000_tls_get_addr ();
9846 tmp1
= gen_reg_rtx (Pmode
);
9847 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9848 emit_insn (gen_rtx_SET (argreg
, arg
));
9849 global_tlsarg
= arg
;
9850 emit_library_call_value (tga
, tmp1
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9851 global_tlsarg
= NULL_RTX
;
9853 /* Make a note so that the result of this call can be CSEd. */
9854 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9855 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9856 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9858 if (rs6000_tls_size
== 16 || rs6000_pcrel_p ())
9861 insn
= gen_tls_dtprel_64 (dest
, tmp1
, addr
);
9863 insn
= gen_tls_dtprel_32 (dest
, tmp1
, addr
);
9865 else if (rs6000_tls_size
== 32)
9867 tmp2
= gen_reg_rtx (Pmode
);
9869 insn
= gen_tls_dtprel_ha_64 (tmp2
, tmp1
, addr
);
9871 insn
= gen_tls_dtprel_ha_32 (tmp2
, tmp1
, addr
);
9874 insn
= gen_tls_dtprel_lo_64 (dest
, tmp2
, addr
);
9876 insn
= gen_tls_dtprel_lo_32 (dest
, tmp2
, addr
);
9880 tmp2
= gen_reg_rtx (Pmode
);
9882 insn
= gen_tls_got_dtprel_64 (tmp2
, got
, addr
);
9884 insn
= gen_tls_got_dtprel_32 (tmp2
, got
, addr
);
9886 insn
= gen_rtx_SET (dest
, gen_rtx_PLUS (Pmode
, tmp2
, tmp1
));
9892 /* IE, or 64-bit offset LE. */
9893 tmp2
= gen_reg_rtx (Pmode
);
9895 insn
= gen_tls_got_tprel_64 (tmp2
, got
, addr
);
9897 insn
= gen_tls_got_tprel_32 (tmp2
, got
, addr
);
9899 if (rs6000_pcrel_p ())
9902 insn
= gen_tls_tls_pcrel_64 (dest
, tmp2
, addr
);
9904 insn
= gen_tls_tls_pcrel_32 (dest
, tmp2
, addr
);
9906 else if (TARGET_64BIT
)
9907 insn
= gen_tls_tls_64 (dest
, tmp2
, addr
);
9909 insn
= gen_tls_tls_32 (dest
, tmp2
, addr
);
9917 /* Only create the global variable for the stack protect guard if we are using
9918 the global flavor of that guard. */
9920 rs6000_init_stack_protect_guard (void)
9922 if (rs6000_stack_protector_guard
== SSP_GLOBAL
)
9923 return default_stack_protect_guard ();
9928 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9931 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9933 /* If GET_CODE (x) is HIGH, the 'X' represets the high part of a symbol_ref.
9934 It can not be put into a constant pool. e.g.
9935 (high:DI (unspec:DI [(symbol_ref/u:DI ("*.LC0")..)
9936 (high:DI (symbol_ref:DI ("var")..)). */
9937 if (GET_CODE (x
) == HIGH
)
9940 /* A TLS symbol in the TOC cannot contain a sum. */
9941 if (GET_CODE (x
) == CONST
9942 && GET_CODE (XEXP (x
, 0)) == PLUS
9943 && SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
9944 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0)) != 0)
9947 /* Allow AIX TOC TLS symbols in the constant pool,
9948 but not ELF TLS symbols. */
9949 return TARGET_ELF
&& tls_referenced_p (x
);
9952 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9953 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9954 can be addressed relative to the toc pointer. */
9957 use_toc_relative_ref (rtx sym
, machine_mode mode
)
9959 return ((constant_pool_expr_p (sym
)
9960 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym
),
9961 get_pool_mode (sym
)))
9962 || (TARGET_CMODEL
== CMODEL_MEDIUM
9963 && SYMBOL_REF_LOCAL_P (sym
)
9964 && GET_MODE_SIZE (mode
) <= POWERPC64_TOC_POINTER_ALIGNMENT
));
9967 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9968 that is a valid memory address for an instruction.
9969 The MODE argument is the machine mode for the MEM expression
9970 that wants to use this address.
9972 On the RS/6000, there are four valid address: a SYMBOL_REF that
9973 refers to a constant pool entry of an address (or the sum of it
9974 plus a constant), a short (16-bit signed) constant plus a register,
9975 the sum of two registers, or a register indirect, possibly with an
9976 auto-increment. For DFmode, DDmode and DImode with a constant plus
9977 register, we must ensure that both words are addressable or PowerPC64
9978 with offset word aligned.
9980 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9981 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9982 because adjacent memory cells are accessed by adding word-sized offsets
9983 during assembly output. */
9985 rs6000_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
)
9987 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
9988 bool quad_offset_p
= mode_supports_dq_form (mode
);
9990 if (TARGET_ELF
&& RS6000_SYMBOL_REF_TLS_P (x
))
9993 /* Handle unaligned altivec lvx/stvx type addresses. */
9994 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
9995 && GET_CODE (x
) == AND
9996 && CONST_INT_P (XEXP (x
, 1))
9997 && INTVAL (XEXP (x
, 1)) == -16)
10000 return (legitimate_indirect_address_p (x
, reg_ok_strict
)
10001 || legitimate_indexed_address_p (x
, reg_ok_strict
)
10002 || virtual_stack_registers_memory_p (x
));
10005 if (legitimate_indirect_address_p (x
, reg_ok_strict
))
10008 && (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == PRE_DEC
)
10009 && mode_supports_pre_incdec_p (mode
)
10010 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
))
10013 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
10014 if (address_is_prefixed (x
, mode
, NON_PREFIXED_DEFAULT
))
10017 /* Handle restricted vector d-form offsets in ISA 3.0. */
10020 if (quad_address_p (x
, mode
, reg_ok_strict
))
10023 else if (virtual_stack_registers_memory_p (x
))
10026 else if (reg_offset_p
)
10028 if (legitimate_small_data_p (mode
, x
))
10030 if (legitimate_constant_pool_address_p (x
, mode
,
10031 reg_ok_strict
|| lra_in_progress
))
10035 /* For TImode, if we have TImode in VSX registers, only allow register
10036 indirect addresses. This will allow the values to go in either GPRs
10037 or VSX registers without reloading. The vector types would tend to
10038 go into VSX registers, so we allow REG+REG, while TImode seems
10039 somewhat split, in that some uses are GPR based, and some VSX based. */
10040 /* FIXME: We could loosen this by changing the following to
10041 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
10042 but currently we cannot allow REG+REG addressing for TImode. See
10043 PR72827 for complete details on how this ends up hoodwinking DSE. */
10044 if (mode
== TImode
&& TARGET_VSX
)
10046 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
10047 if (! reg_ok_strict
10049 && GET_CODE (x
) == PLUS
10050 && REG_P (XEXP (x
, 0))
10051 && (XEXP (x
, 0) == virtual_stack_vars_rtx
10052 || XEXP (x
, 0) == arg_pointer_rtx
)
10053 && CONST_INT_P (XEXP (x
, 1)))
10055 if (rs6000_legitimate_offset_address_p (mode
, x
, reg_ok_strict
, false))
10057 if (!FLOAT128_2REG_P (mode
)
10058 && (TARGET_HARD_FLOAT
10059 || TARGET_POWERPC64
10060 || (mode
!= DFmode
&& mode
!= DDmode
))
10061 && (TARGET_POWERPC64
|| mode
!= DImode
)
10062 && (mode
!= TImode
|| VECTOR_MEM_VSX_P (TImode
))
10064 && !avoiding_indexed_address_p (mode
)
10065 && legitimate_indexed_address_p (x
, reg_ok_strict
))
10067 if (TARGET_UPDATE
&& GET_CODE (x
) == PRE_MODIFY
10068 && mode_supports_pre_modify_p (mode
)
10069 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
)
10070 && (rs6000_legitimate_offset_address_p (mode
, XEXP (x
, 1),
10071 reg_ok_strict
, false)
10072 || (!avoiding_indexed_address_p (mode
)
10073 && legitimate_indexed_address_p (XEXP (x
, 1), reg_ok_strict
)))
10074 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
10076 /* There is no prefixed version of the load/store with update. */
10077 rtx addr
= XEXP (x
, 1);
10078 return !address_is_prefixed (addr
, mode
, NON_PREFIXED_DEFAULT
);
10080 if (reg_offset_p
&& !quad_offset_p
10081 && legitimate_lo_sum_address_p (mode
, x
, reg_ok_strict
))
10086 /* Debug version of rs6000_legitimate_address_p. */
10088 rs6000_debug_legitimate_address_p (machine_mode mode
, rtx x
,
10089 bool reg_ok_strict
)
10091 bool ret
= rs6000_legitimate_address_p (mode
, x
, reg_ok_strict
);
10093 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10094 "strict = %d, reload = %s, code = %s\n",
10095 ret
? "true" : "false",
10096 GET_MODE_NAME (mode
),
10098 (reload_completed
? "after" : "before"),
10099 GET_RTX_NAME (GET_CODE (x
)));
10105 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10108 rs6000_mode_dependent_address_p (const_rtx addr
,
10109 addr_space_t as ATTRIBUTE_UNUSED
)
10111 return rs6000_mode_dependent_address_ptr (addr
);
10114 /* Go to LABEL if ADDR (a legitimate address expression)
10115 has an effect that depends on the machine mode it is used for.
10117 On the RS/6000 this is true of all integral offsets (since AltiVec
10118 and VSX modes don't allow them) or is a pre-increment or decrement.
10120 ??? Except that due to conceptual problems in offsettable_address_p
10121 we can't really report the problems of integral offsets. So leave
10122 this assuming that the adjustable offset must be valid for the
10123 sub-words of a TFmode operand, which is what we had before. */
10126 rs6000_mode_dependent_address (const_rtx addr
)
10128 switch (GET_CODE (addr
))
10131 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10132 is considered a legitimate address before reload, so there
10133 are no offset restrictions in that case. Note that this
10134 condition is safe in strict mode because any address involving
10135 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10136 been rejected as illegitimate. */
10137 if (XEXP (addr
, 0) != virtual_stack_vars_rtx
10138 && XEXP (addr
, 0) != arg_pointer_rtx
10139 && CONST_INT_P (XEXP (addr
, 1)))
10141 HOST_WIDE_INT val
= INTVAL (XEXP (addr
, 1));
10142 HOST_WIDE_INT extra
= TARGET_POWERPC64
? 8 : 12;
10143 if (TARGET_PREFIXED
)
10144 return !SIGNED_34BIT_OFFSET_EXTRA_P (val
, extra
);
10146 return !SIGNED_16BIT_OFFSET_EXTRA_P (val
, extra
);
10151 /* Anything in the constant pool is sufficiently aligned that
10152 all bytes have the same high part address. */
10153 return !legitimate_constant_pool_address_p (addr
, QImode
, false);
10155 /* Auto-increment cases are now treated generically in recog.cc. */
10157 return TARGET_UPDATE
;
10159 /* AND is only allowed in Altivec loads. */
10170 /* Debug version of rs6000_mode_dependent_address. */
10172 rs6000_debug_mode_dependent_address (const_rtx addr
)
10174 bool ret
= rs6000_mode_dependent_address (addr
);
10176 fprintf (stderr
, "\nrs6000_mode_dependent_address: ret = %s\n",
10177 ret
? "true" : "false");
10183 /* Implement FIND_BASE_TERM. */
10186 rs6000_find_base_term (rtx op
)
10191 if (GET_CODE (base
) == CONST
)
10192 base
= XEXP (base
, 0);
10193 if (GET_CODE (base
) == PLUS
)
10194 base
= XEXP (base
, 0);
10195 if (GET_CODE (base
) == UNSPEC
)
10196 switch (XINT (base
, 1))
10198 case UNSPEC_TOCREL
:
10199 case UNSPEC_MACHOPIC_OFFSET
:
10200 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10201 for aliasing purposes. */
10202 return XVECEXP (base
, 0, 0);
10208 /* More elaborate version of recog's offsettable_memref_p predicate
10209 that works around the ??? note of rs6000_mode_dependent_address.
10210 In particular it accepts
10212 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10214 in 32-bit mode, that the recog predicate rejects. */
10217 rs6000_offsettable_memref_p (rtx op
, machine_mode reg_mode
, bool strict
)
10224 /* First mimic offsettable_memref_p. */
10225 if (offsettable_address_p (strict
, GET_MODE (op
), XEXP (op
, 0)))
10228 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10229 the latter predicate knows nothing about the mode of the memory
10230 reference and, therefore, assumes that it is the largest supported
10231 mode (TFmode). As a consequence, legitimate offsettable memory
10232 references are rejected. rs6000_legitimate_offset_address_p contains
10233 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10234 at least with a little bit of help here given that we know the
10235 actual registers used. */
10236 worst_case
= ((TARGET_POWERPC64
&& GET_MODE_CLASS (reg_mode
) == MODE_INT
)
10237 || GET_MODE_SIZE (reg_mode
) == 4);
10238 return rs6000_legitimate_offset_address_p (GET_MODE (op
), XEXP (op
, 0),
10239 strict
, worst_case
);
10242 /* Determine the reassociation width to be used in reassociate_bb.
10243 This takes into account how many parallel operations we
10244 can actually do of a given type, and also the latency.
10246 int add/sub 6/cycle
10248 vect add/sub/mul 2/cycle
10249 fp add/sub/mul 2/cycle
10254 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
10257 switch (rs6000_tune
)
10259 case PROCESSOR_POWER8
:
10260 case PROCESSOR_POWER9
:
10261 case PROCESSOR_POWER10
:
10262 case PROCESSOR_FUTURE
:
10263 if (DECIMAL_FLOAT_MODE_P (mode
))
10265 if (VECTOR_MODE_P (mode
))
10267 if (INTEGRAL_MODE_P (mode
))
10269 if (FLOAT_MODE_P (mode
))
10278 /* Change register usage conditional on target flags. */
10280 rs6000_conditional_register_usage (void)
10284 if (TARGET_DEBUG_TARGET
)
10285 fprintf (stderr
, "rs6000_conditional_register_usage called\n");
10287 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10289 fixed_regs
[13] = call_used_regs
[13] = 1;
10291 /* Conditionally disable FPRs. */
10292 if (TARGET_SOFT_FLOAT
)
10293 for (i
= 32; i
< 64; i
++)
10294 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10296 /* The TOC register is not killed across calls in a way that is
10297 visible to the compiler. */
10298 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
10299 call_used_regs
[2] = 0;
10301 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
10302 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10304 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
10305 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10306 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10308 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
10309 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10310 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10312 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
)
10313 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10315 if (!TARGET_ALTIVEC
&& !TARGET_VSX
)
10317 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
10318 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10319 call_used_regs
[VRSAVE_REGNO
] = 1;
10322 if (TARGET_ALTIVEC
|| TARGET_VSX
)
10323 global_regs
[VSCR_REGNO
] = 1;
10325 if (TARGET_ALTIVEC_ABI
)
10327 for (i
= FIRST_ALTIVEC_REGNO
; i
< FIRST_ALTIVEC_REGNO
+ 20; ++i
)
10328 call_used_regs
[i
] = 1;
10330 /* AIX reserves VR20:31 in non-extended ABI mode. */
10331 if (TARGET_XCOFF
&& !rs6000_aix_extabi
)
10332 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
< FIRST_ALTIVEC_REGNO
+ 32; ++i
)
10333 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10338 /* Output insns to set DEST equal to the constant SOURCE as a series of
10339 lis, ori and shl instructions and return TRUE. */
10342 rs6000_emit_set_const (rtx dest
, rtx source
)
10344 machine_mode mode
= GET_MODE (dest
);
10349 gcc_checking_assert (CONST_INT_P (source
));
10350 c
= INTVAL (source
);
10355 emit_insn (gen_rtx_SET (dest
, source
));
10359 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (SImode
);
10361 emit_insn (gen_rtx_SET (temp
, GEN_INT (c
& ~(HOST_WIDE_INT
) 0xffff)));
10362 emit_insn (gen_rtx_SET (dest
,
10363 gen_rtx_IOR (SImode
, temp
,
10364 GEN_INT (c
& 0xffff))));
10368 if (!TARGET_POWERPC64
)
10372 hi
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
== 0, DImode
);
10373 lo
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
!= 0, DImode
);
10374 emit_move_insn (hi
, GEN_INT (c
>> 32));
10375 c
= sext_hwi (c
, 32);
10376 emit_move_insn (lo
, GEN_INT (c
));
10379 rs6000_emit_set_long_const (dest
, c
);
10383 gcc_unreachable ();
10386 insn
= get_last_insn ();
10387 set
= single_set (insn
);
10388 if (! CONSTANT_P (SET_SRC (set
)))
10389 set_unique_reg_note (insn
, REG_EQUAL
, GEN_INT (c
));
10394 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10395 Output insns to set DEST equal to the constant C as a series of
10396 lis, ori and shl instructions. */
10399 rs6000_emit_set_long_const (rtx dest
, HOST_WIDE_INT c
)
10402 HOST_WIDE_INT ud1
, ud2
, ud3
, ud4
;
10412 if ((ud4
== 0xffff && ud3
== 0xffff && ud2
== 0xffff && (ud1
& 0x8000))
10413 || (ud4
== 0 && ud3
== 0 && ud2
== 0 && ! (ud1
& 0x8000)))
10414 emit_move_insn (dest
, GEN_INT (sext_hwi (ud1
, 16)));
10416 else if ((ud4
== 0xffff && ud3
== 0xffff && (ud2
& 0x8000))
10417 || (ud4
== 0 && ud3
== 0 && ! (ud2
& 0x8000)))
10419 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10421 emit_move_insn (ud1
!= 0 ? temp
: dest
,
10422 GEN_INT (sext_hwi (ud2
<< 16, 32)));
10424 emit_move_insn (dest
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10426 else if (ud4
== 0xffff && ud3
== 0xffff && (ud1
& 0x8000))
10429 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10430 emit_move_insn (temp
, GEN_INT (sext_hwi (ud1
, 16)));
10431 emit_move_insn (dest
, gen_rtx_XOR (DImode
, temp
,
10432 GEN_INT ((ud2
^ 0xffff) << 16)));
10434 else if (ud3
== 0 && ud4
== 0)
10436 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10438 gcc_assert (ud2
& 0x8000);
10443 emit_move_insn (temp
, GEN_INT (sext_hwi (ud2
<< 16, 32)));
10444 emit_move_insn (dest
,
10445 gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff)));
10447 else if (!(ud1
& 0x8000))
10450 emit_move_insn (temp
, GEN_INT (ud1
));
10451 emit_move_insn (dest
,
10452 gen_rtx_IOR (DImode
, temp
, GEN_INT (ud2
<< 16)));
10456 /* lis; ori; rldicl */
10457 emit_move_insn (temp
, GEN_INT (sext_hwi (ud2
<< 16, 32)));
10458 emit_move_insn (temp
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10459 emit_move_insn (dest
,
10460 gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff)));
10463 else if (ud1
== ud3
&& ud2
== ud4
)
10465 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10466 HOST_WIDE_INT num
= (ud2
<< 16) | ud1
;
10467 rs6000_emit_set_long_const (temp
, sext_hwi (num
, 32));
10468 rtx one
= gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff));
10469 rtx two
= gen_rtx_ASHIFT (DImode
, temp
, GEN_INT (32));
10470 emit_move_insn (dest
, gen_rtx_IOR (DImode
, one
, two
));
10472 else if ((ud4
== 0xffff && (ud3
& 0x8000))
10473 || (ud4
== 0 && ! (ud3
& 0x8000)))
10475 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10477 emit_move_insn (temp
, GEN_INT (sext_hwi (ud3
<< 16, 32)));
10479 emit_move_insn (temp
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud2
)));
10480 emit_move_insn (ud1
!= 0 ? temp
: dest
,
10481 gen_rtx_ASHIFT (DImode
, temp
, GEN_INT (16)));
10483 emit_move_insn (dest
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10485 else if (TARGET_PREFIXED
)
10487 if (can_create_pseudo_p ())
10489 /* pli A,L + pli B,H + rldimi A,B,32,0. */
10490 temp
= gen_reg_rtx (DImode
);
10491 rtx temp1
= gen_reg_rtx (DImode
);
10492 emit_move_insn (temp
, GEN_INT ((ud4
<< 16) | ud3
));
10493 emit_move_insn (temp1
, GEN_INT ((ud2
<< 16) | ud1
));
10495 emit_insn (gen_rotldi3_insert_3 (dest
, temp
, GEN_INT (32), temp1
,
10496 GEN_INT (0xffffffff)));
10500 /* pli A,H + sldi A,32 + paddi A,A,L. */
10501 emit_move_insn (dest
, GEN_INT ((ud4
<< 16) | ud3
));
10503 emit_move_insn (dest
, gen_rtx_ASHIFT (DImode
, dest
, GEN_INT (32)));
10505 bool can_use_paddi
= REGNO (dest
) != FIRST_GPR_REGNO
;
10507 /* Use paddi for the low 32 bits. */
10508 if (ud2
!= 0 && ud1
!= 0 && can_use_paddi
)
10509 emit_move_insn (dest
, gen_rtx_PLUS (DImode
, dest
,
10510 GEN_INT ((ud2
<< 16) | ud1
)));
10512 /* Use oris, ori for low 32 bits. */
10513 if (ud2
!= 0 && (ud1
== 0 || !can_use_paddi
))
10514 emit_move_insn (dest
,
10515 gen_rtx_IOR (DImode
, dest
, GEN_INT (ud2
<< 16)));
10516 if (ud1
!= 0 && (ud2
== 0 || !can_use_paddi
))
10517 emit_move_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud1
)));
10522 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10524 emit_move_insn (temp
, GEN_INT (sext_hwi (ud4
<< 16, 32)));
10526 emit_move_insn (temp
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud3
)));
10528 emit_move_insn (ud2
!= 0 || ud1
!= 0 ? temp
: dest
,
10529 gen_rtx_ASHIFT (DImode
, temp
, GEN_INT (32)));
10531 emit_move_insn (ud1
!= 0 ? temp
: dest
,
10532 gen_rtx_IOR (DImode
, temp
, GEN_INT (ud2
<< 16)));
10534 emit_move_insn (dest
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10538 /* Helper for the following. Get rid of [r+r] memory refs
10539 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10542 rs6000_eliminate_indexed_memrefs (rtx operands
[2])
10544 if (MEM_P (operands
[0])
10545 && !REG_P (XEXP (operands
[0], 0))
10546 && ! legitimate_constant_pool_address_p (XEXP (operands
[0], 0),
10547 GET_MODE (operands
[0]), false))
10549 = replace_equiv_address (operands
[0],
10550 copy_addr_to_reg (XEXP (operands
[0], 0)));
10552 if (MEM_P (operands
[1])
10553 && !REG_P (XEXP (operands
[1], 0))
10554 && ! legitimate_constant_pool_address_p (XEXP (operands
[1], 0),
10555 GET_MODE (operands
[1]), false))
10557 = replace_equiv_address (operands
[1],
10558 copy_addr_to_reg (XEXP (operands
[1], 0)));
10561 /* Generate a vector of constants to permute MODE for a little-endian
10562 storage operation by swapping the two halves of a vector. */
10564 rs6000_const_vec (machine_mode mode
)
10592 v
= rtvec_alloc (subparts
);
10594 for (i
= 0; i
< subparts
/ 2; ++i
)
10595 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
+ subparts
/ 2);
10596 for (i
= subparts
/ 2; i
< subparts
; ++i
)
10597 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
- subparts
/ 2);
10602 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10603 store operation. */
10605 rs6000_emit_le_vsx_permute (rtx dest
, rtx source
, machine_mode mode
)
10607 gcc_assert (!altivec_indexed_or_indirect_operand (dest
, mode
));
10608 gcc_assert (!altivec_indexed_or_indirect_operand (source
, mode
));
10610 /* Scalar permutations are easier to express in integer modes rather than
10611 floating-point modes, so cast them here. We use V1TImode instead
10612 of TImode to ensure that the values don't go through GPRs. */
10613 if (FLOAT128_VECTOR_P (mode
))
10615 dest
= gen_lowpart (V1TImode
, dest
);
10616 source
= gen_lowpart (V1TImode
, source
);
10620 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10622 if (mode
== TImode
|| mode
== V1TImode
)
10623 emit_insn (gen_rtx_SET (dest
, gen_rtx_ROTATE (mode
, source
,
10627 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rs6000_const_vec (mode
));
10628 emit_insn (gen_rtx_SET (dest
, gen_rtx_VEC_SELECT (mode
, source
, par
)));
10632 /* Emit a little-endian load from vector memory location SOURCE to VSX
10633 register DEST in mode MODE. The load is done with two permuting
10634 insn's that represent an lxvd2x and xxpermdi. */
10636 rs6000_emit_le_vsx_load (rtx dest
, rtx source
, machine_mode mode
)
10638 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10640 if (mode
== TImode
|| mode
== V1TImode
)
10643 dest
= gen_lowpart (V2DImode
, dest
);
10644 source
= adjust_address (source
, V2DImode
, 0);
10647 rtx tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest
) : dest
;
10648 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10649 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10652 /* Emit a little-endian store to vector memory location DEST from VSX
10653 register SOURCE in mode MODE. The store is done with two permuting
10654 insn's that represent an xxpermdi and an stxvd2x. */
10656 rs6000_emit_le_vsx_store (rtx dest
, rtx source
, machine_mode mode
)
10658 /* This should never be called after LRA. */
10659 gcc_assert (can_create_pseudo_p ());
10661 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10663 if (mode
== TImode
|| mode
== V1TImode
)
10666 dest
= adjust_address (dest
, V2DImode
, 0);
10667 source
= gen_lowpart (V2DImode
, source
);
10670 rtx tmp
= gen_reg_rtx_and_attrs (source
);
10671 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10672 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10675 /* Emit a sequence representing a little-endian VSX load or store,
10676 moving data from SOURCE to DEST in mode MODE. This is done
10677 separately from rs6000_emit_move to ensure it is called only
10678 during expand. LE VSX loads and stores introduced later are
10679 handled with a split. The expand-time RTL generation allows
10680 us to optimize away redundant pairs of register-permutes. */
10682 rs6000_emit_le_vsx_move (rtx dest
, rtx source
, machine_mode mode
)
10684 gcc_assert (!BYTES_BIG_ENDIAN
10685 && VECTOR_MEM_VSX_P (mode
)
10686 && !TARGET_P9_VECTOR
10687 && !gpr_or_gpr_p (dest
, source
)
10688 && (MEM_P (source
) ^ MEM_P (dest
)));
10690 if (MEM_P (source
))
10692 gcc_assert (REG_P (dest
) || SUBREG_P (dest
));
10693 rs6000_emit_le_vsx_load (dest
, source
, mode
);
10697 if (!REG_P (source
))
10698 source
= force_reg (mode
, source
);
10699 rs6000_emit_le_vsx_store (dest
, source
, mode
);
10703 /* Return whether a SFmode or SImode move can be done without converting one
10704 mode to another. This arrises when we have:
10706 (SUBREG:SF (REG:SI ...))
10707 (SUBREG:SI (REG:SF ...))
10709 and one of the values is in a floating point/vector register, where SFmode
10710 scalars are stored in DFmode format. */
10713 valid_sf_si_move (rtx dest
, rtx src
, machine_mode mode
)
10715 if (TARGET_ALLOW_SF_SUBREG
)
10718 if (mode
!= SFmode
&& GET_MODE_CLASS (mode
) != MODE_INT
)
10721 if (!SUBREG_P (src
) || !sf_subreg_operand (src
, mode
))
10724 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10725 if (SUBREG_P (dest
))
10727 rtx dest_subreg
= SUBREG_REG (dest
);
10728 rtx src_subreg
= SUBREG_REG (src
);
10729 return GET_MODE (dest_subreg
) == GET_MODE (src_subreg
);
10736 /* Helper function to change moves with:
10738 (SUBREG:SF (REG:SI)) and
10739 (SUBREG:SI (REG:SF))
10741 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10742 values are stored as DFmode values in the VSX registers. We need to convert
10743 the bits before we can use a direct move or operate on the bits in the
10744 vector register as an integer type.
10746 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10749 rs6000_emit_move_si_sf_subreg (rtx dest
, rtx source
, machine_mode mode
)
10751 if (TARGET_DIRECT_MOVE_64BIT
&& !reload_completed
10752 && (!SUBREG_P (dest
) || !sf_subreg_operand (dest
, mode
))
10753 && SUBREG_P (source
) && sf_subreg_operand (source
, mode
))
10755 rtx inner_source
= SUBREG_REG (source
);
10756 machine_mode inner_mode
= GET_MODE (inner_source
);
10758 if (mode
== SImode
&& inner_mode
== SFmode
)
10760 emit_insn (gen_movsi_from_sf (dest
, inner_source
));
10764 if (mode
== SFmode
&& inner_mode
== SImode
)
10766 emit_insn (gen_movsf_from_si (dest
, inner_source
));
10774 /* Emit a move from SOURCE to DEST in mode MODE. */
10776 rs6000_emit_move (rtx dest
, rtx source
, machine_mode mode
)
10779 operands
[0] = dest
;
10780 operands
[1] = source
;
10782 if (TARGET_DEBUG_ADDR
)
10785 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10786 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10787 GET_MODE_NAME (mode
),
10790 can_create_pseudo_p ());
10792 fprintf (stderr
, "source:\n");
10793 debug_rtx (source
);
10796 /* Check that we get CONST_WIDE_INT only when we should. */
10797 if (CONST_WIDE_INT_P (operands
[1])
10798 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
10799 gcc_unreachable ();
10801 #ifdef HAVE_AS_GNU_ATTRIBUTE
10802 /* If we use a long double type, set the flags in .gnu_attribute that say
10803 what the long double type is. This is to allow the linker's warning
10804 message for the wrong long double to be useful, even if the function does
10805 not do a call (for example, doing a 128-bit add on power9 if the long
10806 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10807 used if they aren't the default long dobule type. */
10808 if (rs6000_gnu_attr
&& (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
))
10810 if (TARGET_LONG_DOUBLE_128
&& (mode
== TFmode
|| mode
== TCmode
))
10811 rs6000_passes_float
= rs6000_passes_long_double
= true;
10813 else if (!TARGET_LONG_DOUBLE_128
&& (mode
== DFmode
|| mode
== DCmode
))
10814 rs6000_passes_float
= rs6000_passes_long_double
= true;
10818 /* See if we need to special case SImode/SFmode SUBREG moves. */
10819 if ((mode
== SImode
|| mode
== SFmode
) && SUBREG_P (source
)
10820 && rs6000_emit_move_si_sf_subreg (dest
, source
, mode
))
10823 /* Check if GCC is setting up a block move that will end up using FP
10824 registers as temporaries. We must make sure this is acceptable. */
10825 if (MEM_P (operands
[0])
10826 && MEM_P (operands
[1])
10828 && (rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[0]))
10829 || rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[1])))
10830 && ! (rs6000_slow_unaligned_access (SImode
,
10831 (MEM_ALIGN (operands
[0]) > 32
10832 ? 32 : MEM_ALIGN (operands
[0])))
10833 || rs6000_slow_unaligned_access (SImode
,
10834 (MEM_ALIGN (operands
[1]) > 32
10835 ? 32 : MEM_ALIGN (operands
[1]))))
10836 && ! MEM_VOLATILE_P (operands
[0])
10837 && ! MEM_VOLATILE_P (operands
[1]))
10839 emit_move_insn (adjust_address (operands
[0], SImode
, 0),
10840 adjust_address (operands
[1], SImode
, 0));
10841 emit_move_insn (adjust_address (copy_rtx (operands
[0]), SImode
, 4),
10842 adjust_address (copy_rtx (operands
[1]), SImode
, 4));
10846 if (can_create_pseudo_p () && MEM_P (operands
[0])
10847 && !gpc_reg_operand (operands
[1], mode
))
10848 operands
[1] = force_reg (mode
, operands
[1]);
10850 /* Recognize the case where operand[1] is a reference to thread-local
10851 data and load its address to a register. */
10852 if (tls_referenced_p (operands
[1]))
10854 enum tls_model model
;
10855 rtx tmp
= operands
[1];
10858 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
10860 addend
= XEXP (XEXP (tmp
, 0), 1);
10861 tmp
= XEXP (XEXP (tmp
, 0), 0);
10864 gcc_assert (SYMBOL_REF_P (tmp
));
10865 model
= SYMBOL_REF_TLS_MODEL (tmp
);
10866 gcc_assert (model
!= 0);
10868 tmp
= rs6000_legitimize_tls_address (tmp
, model
);
10871 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
10872 tmp
= force_operand (tmp
, operands
[0]);
10877 /* 128-bit constant floating-point values on Darwin should really be loaded
10878 as two parts. However, this premature splitting is a problem when DFmode
10879 values can go into Altivec registers. */
10880 if (TARGET_MACHO
&& CONST_DOUBLE_P (operands
[1]) && FLOAT128_IBM_P (mode
)
10881 && !reg_addr
[DFmode
].scalar_in_vmx_p
)
10883 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
, 0),
10884 simplify_gen_subreg (DFmode
, operands
[1], mode
, 0),
10886 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
,
10887 GET_MODE_SIZE (DFmode
)),
10888 simplify_gen_subreg (DFmode
, operands
[1], mode
,
10889 GET_MODE_SIZE (DFmode
)),
10894 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10895 p1:SD) if p1 is not of floating point class and p0 is spilled as
10896 we can have no analogous movsd_store for this. */
10897 if (lra_in_progress
&& mode
== DDmode
10898 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
10899 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10900 && SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1]))
10901 && GET_MODE (SUBREG_REG (operands
[1])) == SDmode
)
10904 int regno
= REGNO (SUBREG_REG (operands
[1]));
10906 if (!HARD_REGISTER_NUM_P (regno
))
10908 cl
= reg_preferred_class (regno
);
10909 regno
= reg_renumber
[regno
];
10911 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][1];
10913 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10916 operands
[0] = gen_lowpart_SUBREG (SDmode
, operands
[0]);
10917 operands
[1] = SUBREG_REG (operands
[1]);
10920 if (lra_in_progress
10922 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
10923 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10924 && (REG_P (operands
[1])
10925 || (SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1])))))
10927 int regno
= reg_or_subregno (operands
[1]);
10930 if (!HARD_REGISTER_NUM_P (regno
))
10932 cl
= reg_preferred_class (regno
);
10933 gcc_assert (cl
!= NO_REGS
);
10934 regno
= reg_renumber
[regno
];
10936 regno
= ira_class_hard_regs
[cl
][0];
10938 if (FP_REGNO_P (regno
))
10940 if (GET_MODE (operands
[0]) != DDmode
)
10941 operands
[0] = gen_rtx_SUBREG (DDmode
, operands
[0], 0);
10942 emit_insn (gen_movsd_store (operands
[0], operands
[1]));
10944 else if (INT_REGNO_P (regno
))
10945 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10950 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10951 p:DD)) if p0 is not of floating point class and p1 is spilled as
10952 we can have no analogous movsd_load for this. */
10953 if (lra_in_progress
&& mode
== DDmode
10954 && SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))
10955 && GET_MODE (SUBREG_REG (operands
[0])) == SDmode
10956 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
10957 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10960 int regno
= REGNO (SUBREG_REG (operands
[0]));
10962 if (!HARD_REGISTER_NUM_P (regno
))
10964 cl
= reg_preferred_class (regno
);
10965 regno
= reg_renumber
[regno
];
10967 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][0];
10969 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10972 operands
[0] = SUBREG_REG (operands
[0]);
10973 operands
[1] = gen_lowpart_SUBREG (SDmode
, operands
[1]);
10976 if (lra_in_progress
10978 && (REG_P (operands
[0])
10979 || (SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))))
10980 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
10981 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10983 int regno
= reg_or_subregno (operands
[0]);
10986 if (!HARD_REGISTER_NUM_P (regno
))
10988 cl
= reg_preferred_class (regno
);
10989 gcc_assert (cl
!= NO_REGS
);
10990 regno
= reg_renumber
[regno
];
10992 regno
= ira_class_hard_regs
[cl
][0];
10994 if (FP_REGNO_P (regno
))
10996 if (GET_MODE (operands
[1]) != DDmode
)
10997 operands
[1] = gen_rtx_SUBREG (DDmode
, operands
[1], 0);
10998 emit_insn (gen_movsd_load (operands
[0], operands
[1]));
11000 else if (INT_REGNO_P (regno
))
11001 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
11007 /* FIXME: In the long term, this switch statement should go away
11008 and be replaced by a sequence of tests based on things like
11014 if (CONSTANT_P (operands
[1])
11015 && !CONST_INT_P (operands
[1]))
11016 operands
[1] = force_const_mem (mode
, operands
[1]);
11023 if (FLOAT128_2REG_P (mode
))
11024 rs6000_eliminate_indexed_memrefs (operands
);
11031 if (CONSTANT_P (operands
[1])
11032 && ! easy_fp_constant (operands
[1], mode
))
11033 operands
[1] = force_const_mem (mode
, operands
[1]);
11043 if (CONSTANT_P (operands
[1])
11044 && !easy_vector_constant (operands
[1], mode
))
11045 operands
[1] = force_const_mem (mode
, operands
[1]);
11050 if (CONST_INT_P (operands
[1]) && INTVAL (operands
[1]) != 0)
11051 error ("%qs is an opaque type, and you cannot set it to other values",
11052 (mode
== OOmode
) ? "__vector_pair" : "__vector_quad");
11056 if (CONST_INT_P (operands
[1]))
11057 error ("%qs is an opaque type, and you cannot set it to constants",
11063 /* Use default pattern for address of ELF small data */
11066 && DEFAULT_ABI
== ABI_V4
11067 && (SYMBOL_REF_P (operands
[1])
11068 || GET_CODE (operands
[1]) == CONST
)
11069 && small_data_operand (operands
[1], mode
))
11071 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11075 /* Use the default pattern for loading up PC-relative addresses. */
11076 if (TARGET_PCREL
&& mode
== Pmode
11077 && pcrel_local_or_external_address (operands
[1], Pmode
))
11079 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11083 if (DEFAULT_ABI
== ABI_V4
11084 && mode
== Pmode
&& mode
== SImode
11085 && flag_pic
== 1 && got_operand (operands
[1], mode
))
11087 emit_insn (gen_movsi_got (operands
[0], operands
[1]));
11091 if ((TARGET_ELF
|| DEFAULT_ABI
== ABI_DARWIN
)
11092 && TARGET_NO_TOC_OR_PCREL
11095 && CONSTANT_P (operands
[1])
11096 && GET_CODE (operands
[1]) != HIGH
11097 && !CONST_INT_P (operands
[1]))
11099 rtx target
= (!can_create_pseudo_p ()
11101 : gen_reg_rtx (mode
));
11103 /* If this is a function address on -mcall-aixdesc,
11104 convert it to the address of the descriptor. */
11105 if (DEFAULT_ABI
== ABI_AIX
11106 && SYMBOL_REF_P (operands
[1])
11107 && XSTR (operands
[1], 0)[0] == '.')
11109 const char *name
= XSTR (operands
[1], 0);
11111 while (*name
== '.')
11113 new_ref
= gen_rtx_SYMBOL_REF (Pmode
, name
);
11114 CONSTANT_POOL_ADDRESS_P (new_ref
)
11115 = CONSTANT_POOL_ADDRESS_P (operands
[1]);
11116 SYMBOL_REF_FLAGS (new_ref
) = SYMBOL_REF_FLAGS (operands
[1]);
11117 SYMBOL_REF_USED (new_ref
) = SYMBOL_REF_USED (operands
[1]);
11118 SYMBOL_REF_DATA (new_ref
) = SYMBOL_REF_DATA (operands
[1]);
11119 operands
[1] = new_ref
;
11122 if (DEFAULT_ABI
== ABI_DARWIN
)
11125 /* This is not PIC code, but could require the subset of
11126 indirections used by mdynamic-no-pic. */
11127 if (MACHO_DYNAMIC_NO_PIC_P
)
11129 /* Take care of any required data indirection. */
11130 operands
[1] = rs6000_machopic_legitimize_pic_address (
11131 operands
[1], mode
, operands
[0]);
11132 if (operands
[0] != operands
[1])
11133 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11137 emit_insn (gen_macho_high (Pmode
, target
, operands
[1]));
11138 emit_insn (gen_macho_low (Pmode
, operands
[0],
11139 target
, operands
[1]));
11143 emit_insn (gen_elf_high (target
, operands
[1]));
11144 emit_insn (gen_elf_low (operands
[0], target
, operands
[1]));
11148 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11149 and we have put it in the TOC, we just need to make a TOC-relative
11150 reference to it. */
11152 && SYMBOL_REF_P (operands
[1])
11153 && use_toc_relative_ref (operands
[1], mode
))
11154 operands
[1] = create_TOC_reference (operands
[1], operands
[0]);
11155 else if (mode
== Pmode
11156 && CONSTANT_P (operands
[1])
11157 && GET_CODE (operands
[1]) != HIGH
11158 && ((REG_P (operands
[0])
11159 && FP_REGNO_P (REGNO (operands
[0])))
11160 || !CONST_INT_P (operands
[1])
11161 || (num_insns_constant (operands
[1], mode
)
11162 > (TARGET_CMODEL
!= CMODEL_SMALL
? 3 : 2)))
11163 && !toc_relative_expr_p (operands
[1], false, NULL
, NULL
)
11164 && (TARGET_CMODEL
== CMODEL_SMALL
11165 || can_create_pseudo_p ()
11166 || (REG_P (operands
[0])
11167 && INT_REG_OK_FOR_BASE_P (operands
[0], true))))
11171 /* Darwin uses a special PIC legitimizer. */
11172 if (DEFAULT_ABI
== ABI_DARWIN
&& MACHOPIC_INDIRECT
)
11175 rs6000_machopic_legitimize_pic_address (operands
[1], mode
,
11177 if (operands
[0] != operands
[1])
11178 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11183 /* If we are to limit the number of things we put in the TOC and
11184 this is a symbol plus a constant we can add in one insn,
11185 just put the symbol in the TOC and add the constant. */
11186 if (GET_CODE (operands
[1]) == CONST
11187 && TARGET_NO_SUM_IN_TOC
11188 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
11189 && add_operand (XEXP (XEXP (operands
[1], 0), 1), mode
)
11190 && (GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == LABEL_REF
11191 || SYMBOL_REF_P (XEXP (XEXP (operands
[1], 0), 0)))
11192 && ! side_effects_p (operands
[0]))
11195 force_const_mem (mode
, XEXP (XEXP (operands
[1], 0), 0));
11196 rtx other
= XEXP (XEXP (operands
[1], 0), 1);
11198 sym
= force_reg (mode
, sym
);
11199 emit_insn (gen_add3_insn (operands
[0], sym
, other
));
11203 operands
[1] = force_const_mem (mode
, operands
[1]);
11206 && SYMBOL_REF_P (XEXP (operands
[1], 0))
11207 && use_toc_relative_ref (XEXP (operands
[1], 0), mode
))
11209 rtx tocref
= create_TOC_reference (XEXP (operands
[1], 0),
11211 operands
[1] = gen_const_mem (mode
, tocref
);
11212 set_mem_alias_set (operands
[1], get_TOC_alias_set ());
11218 if (!VECTOR_MEM_VSX_P (TImode
))
11219 rs6000_eliminate_indexed_memrefs (operands
);
11223 rs6000_eliminate_indexed_memrefs (operands
);
11227 fatal_insn ("bad move", gen_rtx_SET (dest
, source
));
11230 /* Above, we may have called force_const_mem which may have returned
11231 an invalid address. If we can, fix this up; otherwise, reload will
11232 have to deal with it. */
11233 if (MEM_P (operands
[1]))
11234 operands
[1] = validize_mem (operands
[1]);
11236 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11240 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
11242 init_float128_ibm (machine_mode mode
)
11244 if (!TARGET_XL_COMPAT
)
11246 set_optab_libfunc (add_optab
, mode
, "__gcc_qadd");
11247 set_optab_libfunc (sub_optab
, mode
, "__gcc_qsub");
11248 set_optab_libfunc (smul_optab
, mode
, "__gcc_qmul");
11249 set_optab_libfunc (sdiv_optab
, mode
, "__gcc_qdiv");
11251 if (!TARGET_HARD_FLOAT
)
11253 set_optab_libfunc (neg_optab
, mode
, "__gcc_qneg");
11254 set_optab_libfunc (eq_optab
, mode
, "__gcc_qeq");
11255 set_optab_libfunc (ne_optab
, mode
, "__gcc_qne");
11256 set_optab_libfunc (gt_optab
, mode
, "__gcc_qgt");
11257 set_optab_libfunc (ge_optab
, mode
, "__gcc_qge");
11258 set_optab_libfunc (lt_optab
, mode
, "__gcc_qlt");
11259 set_optab_libfunc (le_optab
, mode
, "__gcc_qle");
11260 set_optab_libfunc (unord_optab
, mode
, "__gcc_qunord");
11262 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__gcc_stoq");
11263 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__gcc_dtoq");
11264 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__gcc_qtos");
11265 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__gcc_qtod");
11266 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__gcc_qtoi");
11267 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__gcc_qtou");
11268 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__gcc_itoq");
11269 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__gcc_utoq");
11274 set_optab_libfunc (add_optab
, mode
, "_xlqadd");
11275 set_optab_libfunc (sub_optab
, mode
, "_xlqsub");
11276 set_optab_libfunc (smul_optab
, mode
, "_xlqmul");
11277 set_optab_libfunc (sdiv_optab
, mode
, "_xlqdiv");
11280 /* Add various conversions for IFmode to use the traditional TFmode
11282 if (mode
== IFmode
)
11284 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdtf");
11285 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddtf");
11286 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdtf");
11287 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunctfsd");
11288 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunctfdd");
11289 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtftd");
11291 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixtfdi");
11292 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunstfdi");
11294 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatditf");
11295 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatunditf");
11297 if (TARGET_POWERPC64
)
11299 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixtfti");
11300 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunstfti");
11301 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattitf");
11302 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntitf");
11307 /* Set up IEEE 128-bit floating point routines. Use different names if the
11308 arguments can be passed in a vector register. The historical PowerPC
11309 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11310 continue to use that if we aren't using vector registers to pass IEEE
11311 128-bit floating point. */
11314 init_float128_ieee (machine_mode mode
)
11316 if (FLOAT128_VECTOR_P (mode
))
11318 set_optab_libfunc (add_optab
, mode
, "__addkf3");
11319 set_optab_libfunc (sub_optab
, mode
, "__subkf3");
11320 set_optab_libfunc (neg_optab
, mode
, "__negkf2");
11321 set_optab_libfunc (smul_optab
, mode
, "__mulkf3");
11322 set_optab_libfunc (sdiv_optab
, mode
, "__divkf3");
11323 set_optab_libfunc (sqrt_optab
, mode
, "__sqrtkf2");
11324 set_optab_libfunc (abs_optab
, mode
, "__abskf2");
11325 set_optab_libfunc (powi_optab
, mode
, "__powikf2");
11327 set_optab_libfunc (eq_optab
, mode
, "__eqkf2");
11328 set_optab_libfunc (ne_optab
, mode
, "__nekf2");
11329 set_optab_libfunc (gt_optab
, mode
, "__gtkf2");
11330 set_optab_libfunc (ge_optab
, mode
, "__gekf2");
11331 set_optab_libfunc (lt_optab
, mode
, "__ltkf2");
11332 set_optab_libfunc (le_optab
, mode
, "__lekf2");
11333 set_optab_libfunc (unord_optab
, mode
, "__unordkf2");
11335 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__extendsfkf2");
11336 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__extenddfkf2");
11337 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__trunckfsf2");
11338 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__trunckfdf2");
11340 set_conv_libfunc (sext_optab
, mode
, IFmode
, "__trunctfkf2");
11341 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11342 set_conv_libfunc (sext_optab
, mode
, TFmode
, "__trunctfkf2");
11344 set_conv_libfunc (trunc_optab
, IFmode
, mode
, "__extendkftf2");
11345 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11346 set_conv_libfunc (trunc_optab
, TFmode
, mode
, "__extendkftf2");
11348 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdkf");
11349 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddkf");
11350 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdkf");
11351 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunckfsd");
11352 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunckfdd");
11353 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendkftd");
11355 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__fixkfsi");
11356 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__fixunskfsi");
11357 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixkfdi");
11358 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunskfdi");
11360 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__floatsikf");
11361 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__floatunsikf");
11362 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatdikf");
11363 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatundikf");
11365 if (TARGET_POWERPC64
)
11367 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixkfti_sw");
11368 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunskfti_sw");
11369 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattikf_sw");
11370 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntikf_sw");
11376 set_optab_libfunc (add_optab
, mode
, "_q_add");
11377 set_optab_libfunc (sub_optab
, mode
, "_q_sub");
11378 set_optab_libfunc (neg_optab
, mode
, "_q_neg");
11379 set_optab_libfunc (smul_optab
, mode
, "_q_mul");
11380 set_optab_libfunc (sdiv_optab
, mode
, "_q_div");
11381 if (TARGET_PPC_GPOPT
)
11382 set_optab_libfunc (sqrt_optab
, mode
, "_q_sqrt");
11384 set_optab_libfunc (eq_optab
, mode
, "_q_feq");
11385 set_optab_libfunc (ne_optab
, mode
, "_q_fne");
11386 set_optab_libfunc (gt_optab
, mode
, "_q_fgt");
11387 set_optab_libfunc (ge_optab
, mode
, "_q_fge");
11388 set_optab_libfunc (lt_optab
, mode
, "_q_flt");
11389 set_optab_libfunc (le_optab
, mode
, "_q_fle");
11391 set_conv_libfunc (sext_optab
, mode
, SFmode
, "_q_stoq");
11392 set_conv_libfunc (sext_optab
, mode
, DFmode
, "_q_dtoq");
11393 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "_q_qtos");
11394 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "_q_qtod");
11395 set_conv_libfunc (sfix_optab
, SImode
, mode
, "_q_qtoi");
11396 set_conv_libfunc (ufix_optab
, SImode
, mode
, "_q_qtou");
11397 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "_q_itoq");
11398 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "_q_utoq");
11403 rs6000_init_libfuncs (void)
11405 /* __float128 support. */
11406 if (TARGET_FLOAT128_TYPE
)
11408 init_float128_ibm (IFmode
);
11409 init_float128_ieee (KFmode
);
11412 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11413 if (TARGET_LONG_DOUBLE_128
)
11415 if (!TARGET_IEEEQUAD
)
11416 init_float128_ibm (TFmode
);
11418 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11420 init_float128_ieee (TFmode
);
11424 /* Emit a potentially record-form instruction, setting DST from SRC.
11425 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11426 signed comparison of DST with zero. If DOT is 1, the generated RTL
11427 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11428 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11429 a separate COMPARE. */
11432 rs6000_emit_dot_insn (rtx dst
, rtx src
, int dot
, rtx ccreg
)
11436 emit_move_insn (dst
, src
);
11440 if (cc_reg_not_cr0_operand (ccreg
, CCmode
))
11442 emit_move_insn (dst
, src
);
11443 emit_move_insn (ccreg
, gen_rtx_COMPARE (CCmode
, dst
, const0_rtx
));
11447 rtx ccset
= gen_rtx_SET (ccreg
, gen_rtx_COMPARE (CCmode
, src
, const0_rtx
));
11450 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, dst
);
11451 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, clobber
)));
11455 rtx set
= gen_rtx_SET (dst
, src
);
11456 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, set
)));
11461 /* A validation routine: say whether CODE, a condition code, and MODE
11462 match. The other alternatives either don't make sense or should
11463 never be generated. */
11466 validate_condition_mode (enum rtx_code code
, machine_mode mode
)
11468 gcc_assert ((GET_RTX_CLASS (code
) == RTX_COMPARE
11469 || GET_RTX_CLASS (code
) == RTX_COMM_COMPARE
)
11470 && GET_MODE_CLASS (mode
) == MODE_CC
);
11472 /* These don't make sense. */
11473 gcc_assert ((code
!= GT
&& code
!= LT
&& code
!= GE
&& code
!= LE
)
11474 || mode
!= CCUNSmode
);
11476 gcc_assert ((code
!= GTU
&& code
!= LTU
&& code
!= GEU
&& code
!= LEU
)
11477 || mode
== CCUNSmode
);
11479 gcc_assert (mode
== CCFPmode
11480 || (code
!= ORDERED
&& code
!= UNORDERED
11481 && code
!= UNEQ
&& code
!= LTGT
11482 && code
!= UNGT
&& code
!= UNLT
11483 && code
!= UNGE
&& code
!= UNLE
));
11485 /* These are invalid; the information is not there. */
11486 gcc_assert (mode
!= CCEQmode
|| code
== EQ
|| code
== NE
);
11490 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11491 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11492 not zero, store there the bit offset (counted from the right) where
11493 the single stretch of 1 bits begins; and similarly for B, the bit
11494 offset where it ends. */
11497 rs6000_is_valid_mask (rtx mask
, int *b
, int *e
, machine_mode mode
)
11499 unsigned HOST_WIDE_INT val
= INTVAL (mask
);
11500 unsigned HOST_WIDE_INT bit
;
11502 int n
= GET_MODE_PRECISION (mode
);
11504 if (mode
!= DImode
&& mode
!= SImode
)
11507 if (INTVAL (mask
) >= 0)
11510 ne
= exact_log2 (bit
);
11511 nb
= exact_log2 (val
+ bit
);
11513 else if (val
+ 1 == 0)
11522 nb
= exact_log2 (bit
);
11523 ne
= exact_log2 (val
+ bit
);
11528 ne
= exact_log2 (bit
);
11529 if (val
+ bit
== 0)
11537 if (nb
< 0 || ne
< 0 || nb
>= n
|| ne
>= n
)
11549 rs6000_is_valid_rotate_dot_mask (rtx mask
, machine_mode mode
)
11552 return rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
) && nb
>= ne
&& ne
> 0;
11555 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11556 or rldicr instruction, to implement an AND with it in mode MODE. */
11559 rs6000_is_valid_and_mask (rtx mask
, machine_mode mode
)
11563 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11566 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11568 if (mode
== DImode
)
11569 return (ne
== 0 || nb
== 63 || (nb
< 32 && ne
<= nb
));
11571 /* For SImode, rlwinm can do everything. */
11572 if (mode
== SImode
)
11573 return (nb
< 32 && ne
< 32);
11578 /* Return the instruction template for an AND with mask in mode MODE, with
11579 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11582 rs6000_insn_for_and_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11586 if (!rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
))
11587 gcc_unreachable ();
11589 if (mode
== DImode
&& ne
== 0)
11591 operands
[3] = GEN_INT (63 - nb
);
11593 return "rldicl. %0,%1,0,%3";
11594 return "rldicl %0,%1,0,%3";
11597 if (mode
== DImode
&& nb
== 63)
11599 operands
[3] = GEN_INT (63 - ne
);
11601 return "rldicr. %0,%1,0,%3";
11602 return "rldicr %0,%1,0,%3";
11605 if (nb
< 32 && ne
< 32)
11607 operands
[3] = GEN_INT (31 - nb
);
11608 operands
[4] = GEN_INT (31 - ne
);
11610 return "rlwinm. %0,%1,0,%3,%4";
11611 return "rlwinm %0,%1,0,%3,%4";
11614 gcc_unreachable ();
11617 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11618 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11619 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11622 rs6000_is_valid_shift_mask (rtx mask
, rtx shift
, machine_mode mode
)
11626 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11629 int n
= GET_MODE_PRECISION (mode
);
11632 if (CONST_INT_P (XEXP (shift
, 1)))
11634 sh
= INTVAL (XEXP (shift
, 1));
11635 if (sh
< 0 || sh
>= n
)
11639 rtx_code code
= GET_CODE (shift
);
11641 /* Convert any shift by 0 to a rotate, to simplify below code. */
11645 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11646 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11648 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11654 /* DImode rotates need rld*. */
11655 if (mode
== DImode
&& code
== ROTATE
)
11656 return (nb
== 63 || ne
== 0 || ne
== sh
);
11658 /* SImode rotates need rlw*. */
11659 if (mode
== SImode
&& code
== ROTATE
)
11660 return (nb
< 32 && ne
< 32 && sh
< 32);
11662 /* Wrap-around masks are only okay for rotates. */
11666 /* Variable shifts are only okay for rotates. */
11670 /* Don't allow ASHIFT if the mask is wrong for that. */
11671 if (code
== ASHIFT
&& ne
< sh
)
11674 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11675 if the mask is wrong for that. */
11676 if (nb
< 32 && ne
< 32 && sh
< 32
11677 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11680 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11681 if the mask is wrong for that. */
11682 if (code
== LSHIFTRT
)
11684 if (nb
== 63 || ne
== 0 || ne
== sh
)
11685 return !(code
== LSHIFTRT
&& nb
>= sh
);
11690 /* Return the instruction template for a shift with mask in mode MODE, with
11691 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11694 rs6000_insn_for_shift_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11698 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11699 gcc_unreachable ();
11701 if (mode
== DImode
&& ne
== 0)
11703 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11704 operands
[2] = GEN_INT (64 - INTVAL (operands
[2]));
11705 operands
[3] = GEN_INT (63 - nb
);
11707 return "rld%I2cl. %0,%1,%2,%3";
11708 return "rld%I2cl %0,%1,%2,%3";
11711 if (mode
== DImode
&& nb
== 63)
11713 operands
[3] = GEN_INT (63 - ne
);
11715 return "rld%I2cr. %0,%1,%2,%3";
11716 return "rld%I2cr %0,%1,%2,%3";
11720 && GET_CODE (operands
[4]) != LSHIFTRT
11721 && CONST_INT_P (operands
[2])
11722 && ne
== INTVAL (operands
[2]))
11724 operands
[3] = GEN_INT (63 - nb
);
11726 return "rld%I2c. %0,%1,%2,%3";
11727 return "rld%I2c %0,%1,%2,%3";
11730 if (nb
< 32 && ne
< 32)
11732 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11733 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11734 operands
[3] = GEN_INT (31 - nb
);
11735 operands
[4] = GEN_INT (31 - ne
);
11736 /* This insn can also be a 64-bit rotate with mask that really makes
11737 it just a shift right (with mask); the %h below are to adjust for
11738 that situation (shift count is >= 32 in that case). */
11740 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11741 return "rlw%I2nm %0,%1,%h2,%3,%4";
11744 gcc_unreachable ();
11747 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11748 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11749 ASHIFT, or LSHIFTRT) in mode MODE. */
11752 rs6000_is_valid_insert_mask (rtx mask
, rtx shift
, machine_mode mode
)
11756 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11759 int n
= GET_MODE_PRECISION (mode
);
11761 int sh
= INTVAL (XEXP (shift
, 1));
11762 if (sh
< 0 || sh
>= n
)
11765 rtx_code code
= GET_CODE (shift
);
11767 /* Convert any shift by 0 to a rotate, to simplify below code. */
11771 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11772 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11774 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11780 /* DImode rotates need rldimi. */
11781 if (mode
== DImode
&& code
== ROTATE
)
11784 /* SImode rotates need rlwimi. */
11785 if (mode
== SImode
&& code
== ROTATE
)
11786 return (nb
< 32 && ne
< 32 && sh
< 32);
11788 /* Wrap-around masks are only okay for rotates. */
11792 /* Don't allow ASHIFT if the mask is wrong for that. */
11793 if (code
== ASHIFT
&& ne
< sh
)
11796 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11797 if the mask is wrong for that. */
11798 if (nb
< 32 && ne
< 32 && sh
< 32
11799 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11802 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11803 if the mask is wrong for that. */
11804 if (code
== LSHIFTRT
)
11807 return !(code
== LSHIFTRT
&& nb
>= sh
);
11812 /* Return the instruction template for an insert with mask in mode MODE, with
11813 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11816 rs6000_insn_for_insert_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11820 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11821 gcc_unreachable ();
11823 /* Prefer rldimi because rlwimi is cracked. */
11824 if (TARGET_POWERPC64
11825 && (!dot
|| mode
== DImode
)
11826 && GET_CODE (operands
[4]) != LSHIFTRT
11827 && ne
== INTVAL (operands
[2]))
11829 operands
[3] = GEN_INT (63 - nb
);
11831 return "rldimi. %0,%1,%2,%3";
11832 return "rldimi %0,%1,%2,%3";
11835 if (nb
< 32 && ne
< 32)
11837 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11838 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11839 operands
[3] = GEN_INT (31 - nb
);
11840 operands
[4] = GEN_INT (31 - ne
);
11842 return "rlwimi. %0,%1,%2,%3,%4";
11843 return "rlwimi %0,%1,%2,%3,%4";
11846 gcc_unreachable ();
11849 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11850 using two machine instructions. */
11853 rs6000_is_valid_2insn_and (rtx c
, machine_mode mode
)
11855 /* There are two kinds of AND we can handle with two insns:
11856 1) those we can do with two rl* insn;
11859 We do not handle that last case yet. */
11861 /* If there is just one stretch of ones, we can do it. */
11862 if (rs6000_is_valid_mask (c
, NULL
, NULL
, mode
))
11865 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11866 one insn, we can do the whole thing with two. */
11867 unsigned HOST_WIDE_INT val
= INTVAL (c
);
11868 unsigned HOST_WIDE_INT bit1
= val
& -val
;
11869 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
11870 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
11871 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
11872 return rs6000_is_valid_and_mask (GEN_INT (val
+ bit3
- bit2
), mode
);
11875 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11876 If EXPAND is true, split rotate-and-mask instructions we generate to
11877 their constituent parts as well (this is used during expand); if DOT
11878 is 1, make the last insn a record-form instruction clobbering the
11879 destination GPR and setting the CC reg (from operands[3]); if 2, set
11880 that GPR as well as the CC reg. */
11883 rs6000_emit_2insn_and (machine_mode mode
, rtx
*operands
, bool expand
, int dot
)
11885 gcc_assert (!(expand
&& dot
));
11887 unsigned HOST_WIDE_INT val
= INTVAL (operands
[2]);
11889 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11890 shift right. This generates better code than doing the masks without
11891 shifts, or shifting first right and then left. */
11893 if (rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
) && nb
>= ne
)
11895 gcc_assert (mode
== DImode
);
11897 int shift
= 63 - nb
;
11900 rtx tmp1
= gen_reg_rtx (DImode
);
11901 rtx tmp2
= gen_reg_rtx (DImode
);
11902 emit_insn (gen_ashldi3 (tmp1
, operands
[1], GEN_INT (shift
)));
11903 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (val
<< shift
)));
11904 emit_insn (gen_lshrdi3 (operands
[0], tmp2
, GEN_INT (shift
)));
11908 rtx tmp
= gen_rtx_ASHIFT (mode
, operands
[1], GEN_INT (shift
));
11909 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (val
<< shift
));
11910 emit_move_insn (operands
[0], tmp
);
11911 tmp
= gen_rtx_LSHIFTRT (mode
, operands
[0], GEN_INT (shift
));
11912 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11917 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11918 that does the rest. */
11919 unsigned HOST_WIDE_INT bit1
= val
& -val
;
11920 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
11921 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
11922 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
11924 unsigned HOST_WIDE_INT mask1
= -bit3
+ bit2
- 1;
11925 unsigned HOST_WIDE_INT mask2
= val
+ bit3
- bit2
;
11927 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2
), mode
));
11929 /* Two "no-rotate"-and-mask instructions, for SImode. */
11930 if (rs6000_is_valid_and_mask (GEN_INT (mask1
), mode
))
11932 gcc_assert (mode
== SImode
);
11934 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
11935 rtx tmp
= gen_rtx_AND (mode
, operands
[1], GEN_INT (mask1
));
11936 emit_move_insn (reg
, tmp
);
11937 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
11938 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11942 gcc_assert (mode
== DImode
);
11944 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11945 insns; we have to do the first in SImode, because it wraps. */
11946 if (mask2
<= 0xffffffff
11947 && rs6000_is_valid_and_mask (GEN_INT (mask1
), SImode
))
11949 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
11950 rtx tmp
= gen_rtx_AND (SImode
, gen_lowpart (SImode
, operands
[1]),
11952 rtx reg_low
= gen_lowpart (SImode
, reg
);
11953 emit_move_insn (reg_low
, tmp
);
11954 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
11955 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11959 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11960 at the top end), rotate back and clear the other hole. */
11961 int right
= exact_log2 (bit3
);
11962 int left
= 64 - right
;
11964 /* Rotate the mask too. */
11965 mask1
= (mask1
>> right
) | ((bit2
- 1) << left
);
11969 rtx tmp1
= gen_reg_rtx (DImode
);
11970 rtx tmp2
= gen_reg_rtx (DImode
);
11971 rtx tmp3
= gen_reg_rtx (DImode
);
11972 emit_insn (gen_rotldi3 (tmp1
, operands
[1], GEN_INT (left
)));
11973 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (mask1
)));
11974 emit_insn (gen_rotldi3 (tmp3
, tmp2
, GEN_INT (right
)));
11975 emit_insn (gen_anddi3 (operands
[0], tmp3
, GEN_INT (mask2
)));
11979 rtx tmp
= gen_rtx_ROTATE (mode
, operands
[1], GEN_INT (left
));
11980 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask1
));
11981 emit_move_insn (operands
[0], tmp
);
11982 tmp
= gen_rtx_ROTATE (mode
, operands
[0], GEN_INT (right
));
11983 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask2
));
11984 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11988 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11989 for lfq and stfq insns iff the registers are hard registers. */
11992 registers_ok_for_quad_peep (rtx reg1
, rtx reg2
)
11994 /* We might have been passed a SUBREG. */
11995 if (!REG_P (reg1
) || !REG_P (reg2
))
11998 /* We might have been passed non floating point registers. */
11999 if (!FP_REGNO_P (REGNO (reg1
))
12000 || !FP_REGNO_P (REGNO (reg2
)))
12003 return (REGNO (reg1
) == REGNO (reg2
) - 1);
12006 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
12007 addr1 and addr2 must be in consecutive memory locations
12008 (addr2 == addr1 + 8). */
12011 mems_ok_for_quad_peep (rtx mem1
, rtx mem2
)
12014 unsigned int reg1
, reg2
;
12015 int offset1
, offset2
;
12017 /* The mems cannot be volatile. */
12018 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
12021 addr1
= XEXP (mem1
, 0);
12022 addr2
= XEXP (mem2
, 0);
12024 /* Extract an offset (if used) from the first addr. */
12025 if (GET_CODE (addr1
) == PLUS
)
12027 /* If not a REG, return zero. */
12028 if (!REG_P (XEXP (addr1
, 0)))
12032 reg1
= REGNO (XEXP (addr1
, 0));
12033 /* The offset must be constant! */
12034 if (!CONST_INT_P (XEXP (addr1
, 1)))
12036 offset1
= INTVAL (XEXP (addr1
, 1));
12039 else if (!REG_P (addr1
))
12043 reg1
= REGNO (addr1
);
12044 /* This was a simple (mem (reg)) expression. Offset is 0. */
12048 /* And now for the second addr. */
12049 if (GET_CODE (addr2
) == PLUS
)
12051 /* If not a REG, return zero. */
12052 if (!REG_P (XEXP (addr2
, 0)))
12056 reg2
= REGNO (XEXP (addr2
, 0));
12057 /* The offset must be constant. */
12058 if (!CONST_INT_P (XEXP (addr2
, 1)))
12060 offset2
= INTVAL (XEXP (addr2
, 1));
12063 else if (!REG_P (addr2
))
12067 reg2
= REGNO (addr2
);
12068 /* This was a simple (mem (reg)) expression. Offset is 0. */
12072 /* Both of these must have the same base register. */
12076 /* The offset for the second addr must be 8 more than the first addr. */
12077 if (offset2
!= offset1
+ 8)
12080 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
12085 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
12086 need to use DDmode, in all other cases we can use the same mode. */
12087 static machine_mode
12088 rs6000_secondary_memory_needed_mode (machine_mode mode
)
12090 if (lra_in_progress
&& mode
== SDmode
)
12095 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
12096 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
12097 only work on the traditional altivec registers, note if an altivec register
12100 static enum rs6000_reg_type
12101 register_to_reg_type (rtx reg
, bool *is_altivec
)
12103 HOST_WIDE_INT regno
;
12104 enum reg_class rclass
;
12106 if (SUBREG_P (reg
))
12107 reg
= SUBREG_REG (reg
);
12110 return NO_REG_TYPE
;
12112 regno
= REGNO (reg
);
12113 if (!HARD_REGISTER_NUM_P (regno
))
12115 if (!lra_in_progress
&& !reload_completed
)
12116 return PSEUDO_REG_TYPE
;
12118 regno
= true_regnum (reg
);
12119 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
))
12120 return PSEUDO_REG_TYPE
;
12123 gcc_assert (regno
>= 0);
12125 if (is_altivec
&& ALTIVEC_REGNO_P (regno
))
12126 *is_altivec
= true;
12128 rclass
= rs6000_regno_regclass
[regno
];
12129 return reg_class_to_reg_type
[(int)rclass
];
12132 /* Helper function to return the cost of adding a TOC entry address. */
12135 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask
)
12139 if (TARGET_CMODEL
!= CMODEL_SMALL
)
12140 ret
= ((addr_mask
& RELOAD_REG_OFFSET
) == 0) ? 1 : 2;
12143 ret
= (TARGET_MINIMAL_TOC
) ? 6 : 3;
12148 /* Helper function for rs6000_secondary_reload to determine whether the memory
12149 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
12150 needs reloading. Return negative if the memory is not handled by the memory
12151 helper functions and to try a different reload method, 0 if no additional
12152 instructions are need, and positive to give the extra cost for the
12156 rs6000_secondary_reload_memory (rtx addr
,
12157 enum reg_class rclass
,
12160 int extra_cost
= 0;
12161 rtx reg
, and_arg
, plus_arg0
, plus_arg1
;
12162 addr_mask_type addr_mask
;
12163 const char *type
= NULL
;
12164 const char *fail_msg
= NULL
;
12166 if (GPR_REG_CLASS_P (rclass
))
12167 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
12169 else if (rclass
== FLOAT_REGS
)
12170 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
12172 else if (rclass
== ALTIVEC_REGS
)
12173 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
12175 /* For the combined VSX_REGS, turn off Altivec AND -16. */
12176 else if (rclass
== VSX_REGS
)
12177 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
]
12178 & ~RELOAD_REG_AND_M16
);
12180 /* DMR registers use VSX registers, and need to generate some extra
12182 else if (rclass
== DM_REGS
)
12185 /* If the register allocator hasn't made up its mind yet on the register
12186 class to use, settle on defaults to use. */
12187 else if (rclass
== NO_REGS
)
12189 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
]
12190 & ~RELOAD_REG_AND_M16
);
12192 if ((addr_mask
& RELOAD_REG_MULTIPLE
) != 0)
12193 addr_mask
&= ~(RELOAD_REG_INDEXED
12194 | RELOAD_REG_PRE_INCDEC
12195 | RELOAD_REG_PRE_MODIFY
);
12201 /* If the register isn't valid in this register class, just return now. */
12202 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
12204 if (TARGET_DEBUG_ADDR
)
12207 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12208 "not valid in class\n",
12209 GET_MODE_NAME (mode
), reg_class_names
[rclass
]);
12216 switch (GET_CODE (addr
))
12218 /* Does the register class supports auto update forms for this mode? We
12219 don't need a scratch register, since the powerpc only supports
12220 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12223 reg
= XEXP (addr
, 0);
12224 if (!base_reg_operand (addr
, GET_MODE (reg
)))
12226 fail_msg
= "no base register #1";
12230 else if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
12238 reg
= XEXP (addr
, 0);
12239 plus_arg1
= XEXP (addr
, 1);
12240 if (!base_reg_operand (reg
, GET_MODE (reg
))
12241 || GET_CODE (plus_arg1
) != PLUS
12242 || !rtx_equal_p (reg
, XEXP (plus_arg1
, 0)))
12244 fail_msg
= "bad PRE_MODIFY";
12248 else if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
12255 /* Do we need to simulate AND -16 to clear the bottom address bits used
12256 in VMX load/stores? Only allow the AND for vector sizes. */
12258 and_arg
= XEXP (addr
, 0);
12259 if (GET_MODE_SIZE (mode
) != 16
12260 || !CONST_INT_P (XEXP (addr
, 1))
12261 || INTVAL (XEXP (addr
, 1)) != -16)
12263 fail_msg
= "bad Altivec AND #1";
12267 if (rclass
!= ALTIVEC_REGS
)
12269 if (legitimate_indirect_address_p (and_arg
, false))
12272 else if (legitimate_indexed_address_p (and_arg
, false))
12277 fail_msg
= "bad Altivec AND #2";
12285 /* If this is an indirect address, make sure it is a base register. */
12288 if (!legitimate_indirect_address_p (addr
, false))
12295 /* If this is an indexed address, make sure the register class can handle
12296 indexed addresses for this mode. */
12298 plus_arg0
= XEXP (addr
, 0);
12299 plus_arg1
= XEXP (addr
, 1);
12301 /* (plus (plus (reg) (constant)) (constant)) is generated during
12302 push_reload processing, so handle it now. */
12303 if (GET_CODE (plus_arg0
) == PLUS
&& CONST_INT_P (plus_arg1
))
12305 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12312 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12313 push_reload processing, so handle it now. */
12314 else if (GET_CODE (plus_arg0
) == PLUS
&& REG_P (plus_arg1
))
12316 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12319 type
= "indexed #2";
12323 else if (!base_reg_operand (plus_arg0
, GET_MODE (plus_arg0
)))
12325 fail_msg
= "no base register #2";
12329 else if (int_reg_operand (plus_arg1
, GET_MODE (plus_arg1
)))
12331 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0
12332 || !legitimate_indexed_address_p (addr
, false))
12339 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0
12340 && CONST_INT_P (plus_arg1
))
12342 if (!quad_address_offset_p (INTVAL (plus_arg1
)))
12345 type
= "vector d-form offset";
12349 /* Make sure the register class can handle offset addresses. */
12350 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
12352 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12355 type
= "offset #2";
12361 fail_msg
= "bad PLUS";
12368 /* Quad offsets are restricted and can't handle normal addresses. */
12369 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12372 type
= "vector d-form lo_sum";
12375 else if (!legitimate_lo_sum_address_p (mode
, addr
, false))
12377 fail_msg
= "bad LO_SUM";
12381 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12388 /* Static addresses need to create a TOC entry. */
12392 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12395 type
= "vector d-form lo_sum #2";
12401 extra_cost
= rs6000_secondary_reload_toc_costs (addr_mask
);
12405 /* TOC references look like offsetable memory. */
12407 if (TARGET_CMODEL
== CMODEL_SMALL
|| XINT (addr
, 1) != UNSPEC_TOCREL
)
12409 fail_msg
= "bad UNSPEC";
12413 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12416 type
= "vector d-form lo_sum #3";
12419 else if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12422 type
= "toc reference";
12428 fail_msg
= "bad address";
12433 if (TARGET_DEBUG_ADDR
/* && extra_cost != 0 */)
12435 if (extra_cost
< 0)
12437 "rs6000_secondary_reload_memory error: mode = %s, "
12438 "class = %s, addr_mask = '%s', %s\n",
12439 GET_MODE_NAME (mode
),
12440 reg_class_names
[rclass
],
12441 rs6000_debug_addr_mask (addr_mask
, false),
12442 (fail_msg
!= NULL
) ? fail_msg
: "<bad address>");
12446 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12447 "addr_mask = '%s', extra cost = %d, %s\n",
12448 GET_MODE_NAME (mode
),
12449 reg_class_names
[rclass
],
12450 rs6000_debug_addr_mask (addr_mask
, false),
12452 (type
) ? type
: "<none>");
12460 /* Helper function for rs6000_secondary_reload to return true if a move to a
12461 different register classe is really a simple move. */
12464 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type
,
12465 enum rs6000_reg_type from_type
,
12468 int size
= GET_MODE_SIZE (mode
);
12470 /* Add support for various direct moves available. In this function, we only
12471 look at cases where we don't need any extra registers, and one or more
12472 simple move insns are issued. Originally small integers are not allowed
12473 in FPR/VSX registers. Single precision binary floating is not a simple
12474 move because we need to convert to the single precision memory layout.
12475 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12476 need special direct move handling, which we do not support yet. */
12477 if (TARGET_DIRECT_MOVE
12478 && ((to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12479 || (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12481 if (TARGET_POWERPC64
)
12483 /* ISA 2.07: MTVSRD or MVFVSRD. */
12487 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12488 if (size
== 16 && TARGET_P9_VECTOR
&& mode
!= TDmode
)
12492 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12493 if (TARGET_P8_VECTOR
)
12495 if (mode
== SImode
)
12498 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
12502 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12503 if (mode
== SDmode
)
12507 /* Move to/from SPR. */
12508 else if ((size
== 4 || (TARGET_POWERPC64
&& size
== 8))
12509 && ((to_type
== GPR_REG_TYPE
&& from_type
== SPR_REG_TYPE
)
12510 || (to_type
== SPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12513 /* We can transfer between VSX registers and DMR registers without needing
12514 extra registers. */
12515 if (TARGET_DENSE_MATH
&& (mode
== XOmode
|| mode
== TDOmode
)
12516 && ((to_type
== DMR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12517 || (to_type
== VSX_REG_TYPE
&& from_type
== DMR_REG_TYPE
)))
12523 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12524 special direct moves that involve allocating an extra register, return the
12525 insn code of the helper function if there is such a function or
12526 CODE_FOR_nothing if not. */
12529 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type
,
12530 enum rs6000_reg_type from_type
,
12532 secondary_reload_info
*sri
,
12536 enum insn_code icode
= CODE_FOR_nothing
;
12538 int size
= GET_MODE_SIZE (mode
);
12540 if (TARGET_POWERPC64
&& size
== 16)
12542 /* Handle moving 128-bit values from GPRs to VSX point registers on
12543 ISA 2.07 (power8, power9) when running in 64-bit mode using
12544 XXPERMDI to glue the two 64-bit values back together. */
12545 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12547 cost
= 3; /* 2 mtvsrd's, 1 xxpermdi. */
12548 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12551 /* Handle moving 128-bit values from VSX point registers to GPRs on
12552 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12553 bottom 64-bit value. */
12554 else if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12556 cost
= 3; /* 2 mfvsrd's, 1 xxpermdi. */
12557 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12561 else if (TARGET_POWERPC64
&& mode
== SFmode
)
12563 if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12565 cost
= 3; /* xscvdpspn, mfvsrd, and. */
12566 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12569 else if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12571 cost
= 2; /* mtvsrz, xscvspdpn. */
12572 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12576 else if (!TARGET_POWERPC64
&& size
== 8)
12578 /* Handle moving 64-bit values from GPRs to floating point registers on
12579 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12580 32-bit values back together. Altivec register classes must be handled
12581 specially since a different instruction is used, and the secondary
12582 reload support requires a single instruction class in the scratch
12583 register constraint. However, right now TFmode is not allowed in
12584 Altivec registers, so the pattern will never match. */
12585 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
&& !altivec_p
)
12587 cost
= 3; /* 2 mtvsrwz's, 1 fmrgow. */
12588 icode
= reg_addr
[mode
].reload_fpr_gpr
;
12592 if (icode
!= CODE_FOR_nothing
)
12597 sri
->icode
= icode
;
12598 sri
->extra_cost
= cost
;
12605 /* Return whether a move between two register classes can be done either
12606 directly (simple move) or via a pattern that uses a single extra temporary
12607 (using ISA 2.07's direct move in this case. */
12610 rs6000_secondary_reload_move (enum rs6000_reg_type to_type
,
12611 enum rs6000_reg_type from_type
,
12613 secondary_reload_info
*sri
,
12616 /* Fall back to load/store reloads if either type is not a register. */
12617 if (to_type
== NO_REG_TYPE
|| from_type
== NO_REG_TYPE
)
12620 /* If we haven't allocated registers yet, assume the move can be done for the
12621 standard register types. */
12622 if ((to_type
== PSEUDO_REG_TYPE
&& from_type
== PSEUDO_REG_TYPE
)
12623 || (to_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (from_type
))
12624 || (from_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (to_type
)))
12627 /* Moves to the same set of registers is a simple move for non-specialized
12629 if (to_type
== from_type
&& IS_STD_REG_TYPE (to_type
))
12632 /* Check whether a simple move can be done directly. */
12633 if (rs6000_secondary_reload_simple_move (to_type
, from_type
, mode
))
12637 sri
->icode
= CODE_FOR_nothing
;
12638 sri
->extra_cost
= 0;
12643 /* Now check if we can do it in a few steps. */
12644 return rs6000_secondary_reload_direct_move (to_type
, from_type
, mode
, sri
,
12648 /* Inform reload about cases where moving X with a mode MODE to a register in
12649 RCLASS requires an extra scratch or immediate register. Return the class
12650 needed for the immediate register.
12652 For VSX and Altivec, we may need a register to convert sp+offset into
12655 For misaligned 64-bit gpr loads and stores we need a register to
12656 convert an offset address to indirect. */
12659 rs6000_secondary_reload (bool in_p
,
12661 reg_class_t rclass_i
,
12663 secondary_reload_info
*sri
)
12665 enum reg_class rclass
= (enum reg_class
) rclass_i
;
12666 reg_class_t ret
= ALL_REGS
;
12667 enum insn_code icode
;
12668 bool default_p
= false;
12669 bool done_p
= false;
12671 /* Allow subreg of memory before/during reload. */
12672 bool memory_p
= (MEM_P (x
)
12673 || (!reload_completed
&& SUBREG_P (x
)
12674 && MEM_P (SUBREG_REG (x
))));
12676 sri
->icode
= CODE_FOR_nothing
;
12677 sri
->t_icode
= CODE_FOR_nothing
;
12678 sri
->extra_cost
= 0;
12680 ? reg_addr
[mode
].reload_load
12681 : reg_addr
[mode
].reload_store
);
12683 if (REG_P (x
) || register_operand (x
, mode
))
12685 enum rs6000_reg_type to_type
= reg_class_to_reg_type
[(int)rclass
];
12686 bool altivec_p
= (rclass
== ALTIVEC_REGS
);
12687 enum rs6000_reg_type from_type
= register_to_reg_type (x
, &altivec_p
);
12690 std::swap (to_type
, from_type
);
12692 /* Can we do a direct move of some sort? */
12693 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
, sri
,
12696 icode
= (enum insn_code
)sri
->icode
;
12703 /* Make sure 0.0 is not reloaded or forced into memory. */
12704 if (x
== CONST0_RTX (mode
) && VSX_REG_CLASS_P (rclass
))
12711 /* If this is a scalar floating point value and we want to load it into the
12712 traditional Altivec registers, do it via a move via a traditional floating
12713 point register, unless we have D-form addressing. Also make sure that
12714 non-zero constants use a FPR. */
12715 if (!done_p
&& reg_addr
[mode
].scalar_in_vmx_p
12716 && !mode_supports_vmx_dform (mode
)
12717 && (rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
12718 && (memory_p
|| CONST_DOUBLE_P (x
)))
12725 /* Handle reload of load/stores if we have reload helper functions. */
12726 if (!done_p
&& icode
!= CODE_FOR_nothing
&& memory_p
)
12728 int extra_cost
= rs6000_secondary_reload_memory (XEXP (x
, 0), rclass
,
12731 if (extra_cost
>= 0)
12735 if (extra_cost
> 0)
12737 sri
->extra_cost
= extra_cost
;
12738 sri
->icode
= icode
;
12743 /* Handle unaligned loads and stores of integer registers. */
12744 if (!done_p
&& TARGET_POWERPC64
12745 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12747 && GET_MODE_SIZE (GET_MODE (x
)) >= UNITS_PER_WORD
)
12749 rtx addr
= XEXP (x
, 0);
12750 rtx off
= address_offset (addr
);
12752 if (off
!= NULL_RTX
)
12754 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12755 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12757 /* We need a secondary reload when our legitimate_address_p
12758 says the address is good (as otherwise the entire address
12759 will be reloaded), and the offset is not a multiple of
12760 four or we have an address wrap. Address wrap will only
12761 occur for LO_SUMs since legitimate_offset_address_p
12762 rejects addresses for 16-byte mems that will wrap. */
12763 if (GET_CODE (addr
) == LO_SUM
12764 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12765 && ((offset
& 3) != 0
12766 || ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
))
12767 : (offset
+ 0x8000 < 0x10000 - extra
/* legitimate_address_p */
12768 && (offset
& 3) != 0))
12770 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12772 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_load
12773 : CODE_FOR_reload_di_load
);
12775 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_store
12776 : CODE_FOR_reload_di_store
);
12777 sri
->extra_cost
= 2;
12788 if (!done_p
&& !TARGET_POWERPC64
12789 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12791 && GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
12793 rtx addr
= XEXP (x
, 0);
12794 rtx off
= address_offset (addr
);
12796 if (off
!= NULL_RTX
)
12798 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12799 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12801 /* We need a secondary reload when our legitimate_address_p
12802 says the address is good (as otherwise the entire address
12803 will be reloaded), and we have a wrap.
12805 legitimate_lo_sum_address_p allows LO_SUM addresses to
12806 have any offset so test for wrap in the low 16 bits.
12808 legitimate_offset_address_p checks for the range
12809 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12810 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12811 [0x7ff4,0x7fff] respectively, so test for the
12812 intersection of these ranges, [0x7ffc,0x7fff] and
12813 [0x7ff4,0x7ff7] respectively.
12815 Note that the address we see here may have been
12816 manipulated by legitimize_reload_address. */
12817 if (GET_CODE (addr
) == LO_SUM
12818 ? ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
12819 : offset
- (0x8000 - extra
) < UNITS_PER_WORD
)
12822 sri
->icode
= CODE_FOR_reload_si_load
;
12824 sri
->icode
= CODE_FOR_reload_si_store
;
12825 sri
->extra_cost
= 2;
12840 ret
= default_secondary_reload (in_p
, x
, rclass
, mode
, sri
);
12842 gcc_assert (ret
!= ALL_REGS
);
12844 if (TARGET_DEBUG_ADDR
)
12847 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12849 reg_class_names
[ret
],
12850 in_p
? "true" : "false",
12851 reg_class_names
[rclass
],
12852 GET_MODE_NAME (mode
));
12854 if (reload_completed
)
12855 fputs (", after reload", stderr
);
12858 fputs (", done_p not set", stderr
);
12861 fputs (", default secondary reload", stderr
);
12863 if (sri
->icode
!= CODE_FOR_nothing
)
12864 fprintf (stderr
, ", reload func = %s, extra cost = %d",
12865 insn_data
[sri
->icode
].name
, sri
->extra_cost
);
12867 else if (sri
->extra_cost
> 0)
12868 fprintf (stderr
, ", extra cost = %d", sri
->extra_cost
);
12870 fputs ("\n", stderr
);
12877 /* Better tracing for rs6000_secondary_reload_inner. */
12880 rs6000_secondary_reload_trace (int line
, rtx reg
, rtx mem
, rtx scratch
,
12885 gcc_assert (reg
!= NULL_RTX
&& mem
!= NULL_RTX
&& scratch
!= NULL_RTX
);
12887 fprintf (stderr
, "rs6000_secondary_reload_inner:%d, type = %s\n", line
,
12888 store_p
? "store" : "load");
12891 set
= gen_rtx_SET (mem
, reg
);
12893 set
= gen_rtx_SET (reg
, mem
);
12895 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
12896 debug_rtx (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
12899 static void rs6000_secondary_reload_fail (int, rtx
, rtx
, rtx
, bool)
12900 ATTRIBUTE_NORETURN
;
12903 rs6000_secondary_reload_fail (int line
, rtx reg
, rtx mem
, rtx scratch
,
12906 rs6000_secondary_reload_trace (line
, reg
, mem
, scratch
, store_p
);
12907 gcc_unreachable ();
12910 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12911 reload helper functions. These were identified in
12912 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12913 reload, it calls the insns:
12914 reload_<RELOAD:mode>_<P:mptrsize>_store
12915 reload_<RELOAD:mode>_<P:mptrsize>_load
12917 which in turn calls this function, to do whatever is necessary to create
12918 valid addresses. */
12921 rs6000_secondary_reload_inner (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
12923 int regno
= true_regnum (reg
);
12924 machine_mode mode
= GET_MODE (reg
);
12925 addr_mask_type addr_mask
;
12928 rtx op_reg
, op0
, op1
;
12933 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
) || !MEM_P (mem
)
12934 || !base_reg_operand (scratch
, GET_MODE (scratch
)))
12935 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12937 if (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
))
12938 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
12940 else if (IN_RANGE (regno
, FIRST_FPR_REGNO
, LAST_FPR_REGNO
))
12941 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
12943 else if (IN_RANGE (regno
, FIRST_ALTIVEC_REGNO
, LAST_ALTIVEC_REGNO
))
12944 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
12947 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12949 /* Make sure the mode is valid in this register class. */
12950 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
12951 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12953 if (TARGET_DEBUG_ADDR
)
12954 rs6000_secondary_reload_trace (__LINE__
, reg
, mem
, scratch
, store_p
);
12956 new_addr
= addr
= XEXP (mem
, 0);
12957 switch (GET_CODE (addr
))
12959 /* Does the register class support auto update forms for this mode? If
12960 not, do the update now. We don't need a scratch register, since the
12961 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12964 op_reg
= XEXP (addr
, 0);
12965 if (!base_reg_operand (op_reg
, Pmode
))
12966 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12968 if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
12970 int delta
= GET_MODE_SIZE (mode
);
12971 if (GET_CODE (addr
) == PRE_DEC
)
12973 emit_insn (gen_add2_insn (op_reg
, GEN_INT (delta
)));
12979 op0
= XEXP (addr
, 0);
12980 op1
= XEXP (addr
, 1);
12981 if (!base_reg_operand (op0
, Pmode
)
12982 || GET_CODE (op1
) != PLUS
12983 || !rtx_equal_p (op0
, XEXP (op1
, 0)))
12984 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12986 if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
12988 emit_insn (gen_rtx_SET (op0
, op1
));
12993 /* Do we need to simulate AND -16 to clear the bottom address bits used
12994 in VMX load/stores? */
12996 op0
= XEXP (addr
, 0);
12997 op1
= XEXP (addr
, 1);
12998 if ((addr_mask
& RELOAD_REG_AND_M16
) == 0)
13000 if (REG_P (op0
) || SUBREG_P (op0
))
13003 else if (GET_CODE (op1
) == PLUS
)
13005 emit_insn (gen_rtx_SET (scratch
, op1
));
13010 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13012 and_op
= gen_rtx_AND (GET_MODE (scratch
), op_reg
, op1
);
13013 cc_clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (CCmode
));
13014 rv
= gen_rtvec (2, gen_rtx_SET (scratch
, and_op
), cc_clobber
);
13015 emit_insn (gen_rtx_PARALLEL (VOIDmode
, rv
));
13016 new_addr
= scratch
;
13020 /* If this is an indirect address, make sure it is a base register. */
13023 if (!base_reg_operand (addr
, GET_MODE (addr
)))
13025 emit_insn (gen_rtx_SET (scratch
, addr
));
13026 new_addr
= scratch
;
13030 /* If this is an indexed address, make sure the register class can handle
13031 indexed addresses for this mode. */
13033 op0
= XEXP (addr
, 0);
13034 op1
= XEXP (addr
, 1);
13035 if (!base_reg_operand (op0
, Pmode
))
13036 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13038 else if (int_reg_operand (op1
, Pmode
))
13040 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
13042 emit_insn (gen_rtx_SET (scratch
, addr
));
13043 new_addr
= scratch
;
13047 else if (mode_supports_dq_form (mode
) && CONST_INT_P (op1
))
13049 if (((addr_mask
& RELOAD_REG_QUAD_OFFSET
) == 0)
13050 || !quad_address_p (addr
, mode
, false))
13052 emit_insn (gen_rtx_SET (scratch
, addr
));
13053 new_addr
= scratch
;
13057 /* Make sure the register class can handle offset addresses. */
13058 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
13060 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
13062 emit_insn (gen_rtx_SET (scratch
, addr
));
13063 new_addr
= scratch
;
13068 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13073 op0
= XEXP (addr
, 0);
13074 op1
= XEXP (addr
, 1);
13075 if (!base_reg_operand (op0
, Pmode
))
13076 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13078 else if (int_reg_operand (op1
, Pmode
))
13080 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
13082 emit_insn (gen_rtx_SET (scratch
, addr
));
13083 new_addr
= scratch
;
13087 /* Quad offsets are restricted and can't handle normal addresses. */
13088 else if (mode_supports_dq_form (mode
))
13090 emit_insn (gen_rtx_SET (scratch
, addr
));
13091 new_addr
= scratch
;
13094 /* Make sure the register class can handle offset addresses. */
13095 else if (legitimate_lo_sum_address_p (mode
, addr
, false))
13097 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
13099 emit_insn (gen_rtx_SET (scratch
, addr
));
13100 new_addr
= scratch
;
13105 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13112 rs6000_emit_move (scratch
, addr
, Pmode
);
13113 new_addr
= scratch
;
13117 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13120 /* Adjust the address if it changed. */
13121 if (addr
!= new_addr
)
13123 mem
= replace_equiv_address_nv (mem
, new_addr
);
13124 if (TARGET_DEBUG_ADDR
)
13125 fprintf (stderr
, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
13128 /* Now create the move. */
13130 emit_insn (gen_rtx_SET (mem
, reg
));
13132 emit_insn (gen_rtx_SET (reg
, mem
));
13137 /* Convert reloads involving 64-bit gprs and misaligned offset
13138 addressing, or multiple 32-bit gprs and offsets that are too large,
13139 to use indirect addressing. */
13142 rs6000_secondary_reload_gpr (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
13144 int regno
= true_regnum (reg
);
13145 enum reg_class rclass
;
13147 rtx scratch_or_premodify
= scratch
;
13149 if (TARGET_DEBUG_ADDR
)
13151 fprintf (stderr
, "\nrs6000_secondary_reload_gpr, type = %s\n",
13152 store_p
? "store" : "load");
13153 fprintf (stderr
, "reg:\n");
13155 fprintf (stderr
, "mem:\n");
13157 fprintf (stderr
, "scratch:\n");
13158 debug_rtx (scratch
);
13161 gcc_assert (regno
>= 0 && HARD_REGISTER_NUM_P (regno
));
13162 gcc_assert (MEM_P (mem
));
13163 rclass
= REGNO_REG_CLASS (regno
);
13164 gcc_assert (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
);
13165 addr
= XEXP (mem
, 0);
13167 if (GET_CODE (addr
) == PRE_MODIFY
)
13169 gcc_assert (REG_P (XEXP (addr
, 0))
13170 && GET_CODE (XEXP (addr
, 1)) == PLUS
13171 && XEXP (XEXP (addr
, 1), 0) == XEXP (addr
, 0));
13172 scratch_or_premodify
= XEXP (addr
, 0);
13173 addr
= XEXP (addr
, 1);
13175 gcc_assert (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
);
13177 rs6000_emit_move (scratch_or_premodify
, addr
, Pmode
);
13179 mem
= replace_equiv_address_nv (mem
, scratch_or_premodify
);
13181 /* Now create the move. */
13183 emit_insn (gen_rtx_SET (mem
, reg
));
13185 emit_insn (gen_rtx_SET (reg
, mem
));
13190 /* Given an rtx X being reloaded into a reg required to be
13191 in class CLASS, return the class of reg to actually use.
13192 In general this is just CLASS; but on some machines
13193 in some cases it is preferable to use a more restrictive class.
13195 On the RS/6000, we have to return NO_REGS when we want to reload a
13196 floating-point CONST_DOUBLE to force it to be copied to memory.
13198 We also don't want to reload integer values into floating-point
13199 registers if we can at all help it. In fact, this can
13200 cause reload to die, if it tries to generate a reload of CTR
13201 into a FP register and discovers it doesn't have the memory location
13204 ??? Would it be a good idea to have reload do the converse, that is
13205 try to reload floating modes into FP registers if possible?
13208 static enum reg_class
13209 rs6000_preferred_reload_class (rtx x
, enum reg_class rclass
)
13211 machine_mode mode
= GET_MODE (x
);
13212 bool is_constant
= CONSTANT_P (x
);
13214 /* DMR registers can't be loaded or stored. */
13215 if (rclass
== DM_REGS
)
13218 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
13219 reload class for it. */
13220 if ((rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
13221 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
) == 0)
13224 if ((rclass
== FLOAT_REGS
|| rclass
== VSX_REGS
)
13225 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
] & RELOAD_REG_VALID
) == 0)
13228 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13229 the reloading of address expressions using PLUS into floating point
13231 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
) && GET_CODE (x
) != PLUS
)
13235 /* Zero is always allowed in all VSX registers. */
13236 if (x
== CONST0_RTX (mode
))
13239 /* If this is a vector constant that can be formed with a few Altivec
13240 instructions, we want altivec registers. */
13241 if (GET_CODE (x
) == CONST_VECTOR
&& easy_vector_constant (x
, mode
))
13242 return ALTIVEC_REGS
;
13244 /* If this is an integer constant that can easily be loaded into
13245 vector registers, allow it. */
13246 if (CONST_INT_P (x
))
13248 HOST_WIDE_INT value
= INTVAL (x
);
13250 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13251 2.06 can generate it in the Altivec registers with
13255 if (TARGET_P8_VECTOR
)
13257 else if (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
13258 return ALTIVEC_REGS
;
13263 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13264 a sign extend in the Altivec registers. */
13265 if (IN_RANGE (value
, -128, 127) && TARGET_P9_VECTOR
13266 && (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
))
13267 return ALTIVEC_REGS
;
13270 /* Force constant to memory. */
13274 /* D-form addressing can easily reload the value. */
13275 if (mode_supports_vmx_dform (mode
)
13276 || mode_supports_dq_form (mode
))
13279 /* If this is a scalar floating point value and we don't have D-form
13280 addressing, prefer the traditional floating point registers so that we
13281 can use D-form (register+offset) addressing. */
13282 if (rclass
== VSX_REGS
13283 && (mode
== SFmode
|| GET_MODE_SIZE (mode
) == 8))
13286 /* Prefer the Altivec registers if Altivec is handling the vector
13287 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13289 if (VECTOR_UNIT_ALTIVEC_P (mode
) || VECTOR_MEM_ALTIVEC_P (mode
)
13290 || mode
== V1TImode
)
13291 return ALTIVEC_REGS
;
13296 if (is_constant
|| GET_CODE (x
) == PLUS
)
13298 if (reg_class_subset_p (GENERAL_REGS
, rclass
))
13299 return GENERAL_REGS
;
13300 if (reg_class_subset_p (BASE_REGS
, rclass
))
13305 /* For the vector pair and vector quad modes, prefer their natural register
13306 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13307 the GPR registers. */
13308 if (rclass
== GEN_OR_FLOAT_REGS
)
13310 if (mode
== OOmode
)
13313 if (mode
== XOmode
)
13314 return TARGET_DENSE_MATH
? VSX_REGS
: FLOAT_REGS
;
13316 if (mode
== TDOmode
)
13319 if (GET_MODE_CLASS (mode
) == MODE_INT
)
13320 return GENERAL_REGS
;
13326 /* Debug version of rs6000_preferred_reload_class. */
13327 static enum reg_class
13328 rs6000_debug_preferred_reload_class (rtx x
, enum reg_class rclass
)
13330 enum reg_class ret
= rs6000_preferred_reload_class (x
, rclass
);
13333 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13335 reg_class_names
[ret
], reg_class_names
[rclass
],
13336 GET_MODE_NAME (GET_MODE (x
)));
13342 /* If we are copying between FP or AltiVec registers and anything else, we need
13343 a memory location. The exception is when we are targeting ppc64 and the
13344 move to/from fpr to gpr instructions are available. Also, under VSX, you
13345 can copy vector registers from the FP register set to the Altivec register
13346 set and vice versa. */
13349 rs6000_secondary_memory_needed (machine_mode mode
,
13350 reg_class_t from_class
,
13351 reg_class_t to_class
)
13353 enum rs6000_reg_type from_type
, to_type
;
13354 bool altivec_p
= ((from_class
== ALTIVEC_REGS
)
13355 || (to_class
== ALTIVEC_REGS
));
13357 /* If a simple/direct move is available, we don't need secondary memory */
13358 from_type
= reg_class_to_reg_type
[(int)from_class
];
13359 to_type
= reg_class_to_reg_type
[(int)to_class
];
13361 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
,
13362 (secondary_reload_info
*)0, altivec_p
))
13365 /* If we have a floating point or vector register class, we need to use
13366 memory to transfer the data. */
13367 if (IS_FP_VECT_REG_TYPE (from_type
) || IS_FP_VECT_REG_TYPE (to_type
))
13373 /* Debug version of rs6000_secondary_memory_needed. */
13375 rs6000_debug_secondary_memory_needed (machine_mode mode
,
13376 reg_class_t from_class
,
13377 reg_class_t to_class
)
13379 bool ret
= rs6000_secondary_memory_needed (mode
, from_class
, to_class
);
13382 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13383 "to_class = %s, mode = %s\n",
13384 ret
? "true" : "false",
13385 reg_class_names
[from_class
],
13386 reg_class_names
[to_class
],
13387 GET_MODE_NAME (mode
));
13392 /* Return the register class of a scratch register needed to copy IN into
13393 or out of a register in RCLASS in MODE. If it can be done directly,
13394 NO_REGS is returned. */
13396 static enum reg_class
13397 rs6000_secondary_reload_class (enum reg_class rclass
, machine_mode mode
,
13402 if (TARGET_ELF
|| (DEFAULT_ABI
== ABI_DARWIN
13404 && MACHOPIC_INDIRECT
13408 /* We cannot copy a symbolic operand directly into anything
13409 other than BASE_REGS for TARGET_ELF. So indicate that a
13410 register from BASE_REGS is needed as an intermediate
13413 On Darwin, pic addresses require a load from memory, which
13414 needs a base register. */
13415 if (rclass
!= BASE_REGS
13416 && (SYMBOL_REF_P (in
)
13417 || GET_CODE (in
) == HIGH
13418 || GET_CODE (in
) == LABEL_REF
13419 || GET_CODE (in
) == CONST
))
13425 regno
= REGNO (in
);
13426 if (!HARD_REGISTER_NUM_P (regno
))
13428 regno
= true_regnum (in
);
13429 if (!HARD_REGISTER_NUM_P (regno
))
13433 else if (SUBREG_P (in
))
13435 regno
= true_regnum (in
);
13436 if (!HARD_REGISTER_NUM_P (regno
))
13442 /* Dense math registers don't have loads or stores. We have to go through
13443 the VSX registers to load XOmode (vector quad) and TDOmode (dmr 1024
13445 if (TARGET_DENSE_MATH
&& rclass
== DM_REGS
)
13448 /* If we have VSX register moves, prefer moving scalar values between
13449 Altivec registers and GPR by going via an FPR (and then via memory)
13450 instead of reloading the secondary memory address for Altivec moves. */
13452 && GET_MODE_SIZE (mode
) < 16
13453 && !mode_supports_vmx_dform (mode
)
13454 && (((rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
)
13455 && (regno
>= 0 && ALTIVEC_REGNO_P (regno
)))
13456 || ((rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
13457 && (regno
>= 0 && INT_REGNO_P (regno
)))))
13460 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13462 if (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
13463 || (regno
>= 0 && INT_REGNO_P (regno
)))
13466 /* Constants, memory, and VSX registers can go into VSX registers (both the
13467 traditional floating point and the altivec registers). */
13468 if (rclass
== VSX_REGS
13469 && (regno
== -1 || VSX_REGNO_P (regno
)))
13472 /* Constants, memory, and FP registers can go into FP registers. */
13473 if ((regno
== -1 || FP_REGNO_P (regno
))
13474 && (rclass
== FLOAT_REGS
|| rclass
== GEN_OR_FLOAT_REGS
))
13475 return (mode
!= SDmode
|| lra_in_progress
) ? NO_REGS
: GENERAL_REGS
;
13477 /* Memory, and AltiVec registers can go into AltiVec registers. */
13478 if ((regno
== -1 || ALTIVEC_REGNO_P (regno
))
13479 && rclass
== ALTIVEC_REGS
)
13482 /* We can copy among the CR registers. */
13483 if ((rclass
== CR_REGS
|| rclass
== CR0_REGS
)
13484 && regno
>= 0 && CR_REGNO_P (regno
))
13487 /* Otherwise, we need GENERAL_REGS. */
13488 return GENERAL_REGS
;
13491 /* Debug version of rs6000_secondary_reload_class. */
13492 static enum reg_class
13493 rs6000_debug_secondary_reload_class (enum reg_class rclass
,
13494 machine_mode mode
, rtx in
)
13496 enum reg_class ret
= rs6000_secondary_reload_class (rclass
, mode
, in
);
13498 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13499 "mode = %s, input rtx:\n",
13500 reg_class_names
[ret
], reg_class_names
[rclass
],
13501 GET_MODE_NAME (mode
));
13507 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13510 rs6000_can_change_mode_class (machine_mode from
,
13512 reg_class_t rclass
)
13514 unsigned from_size
= GET_MODE_SIZE (from
);
13515 unsigned to_size
= GET_MODE_SIZE (to
);
13517 if (from_size
!= to_size
)
13519 enum reg_class xclass
= (TARGET_VSX
) ? VSX_REGS
: FLOAT_REGS
;
13521 if (reg_classes_intersect_p (xclass
, rclass
))
13523 unsigned to_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, to
);
13524 unsigned from_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, from
);
13525 bool to_float128_vector_p
= FLOAT128_VECTOR_P (to
);
13526 bool from_float128_vector_p
= FLOAT128_VECTOR_P (from
);
13528 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13529 single register under VSX because the scalar part of the register
13530 is in the upper 64-bits, and not the lower 64-bits. Types like
13531 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13532 IEEE floating point can't overlap, and neither can small
13535 if (to_float128_vector_p
&& from_float128_vector_p
)
13538 else if (to_float128_vector_p
|| from_float128_vector_p
)
13541 /* TDmode in floating-mode registers must always go into a register
13542 pair with the most significant word in the even-numbered register
13543 to match ISA requirements. In little-endian mode, this does not
13544 match subreg numbering, so we cannot allow subregs. */
13545 if (!BYTES_BIG_ENDIAN
&& (to
== TDmode
|| from
== TDmode
))
13548 /* Allow SD<->DD changes, since SDmode values are stored in
13549 the low half of the DDmode, just like target-independent
13550 code expects. We need to allow at least SD->DD since
13551 rs6000_secondary_memory_needed_mode asks for that change
13552 to be made for SD reloads. */
13553 if ((to
== DDmode
&& from
== SDmode
)
13554 || (to
== SDmode
&& from
== DDmode
))
13557 if (from_size
< 8 || to_size
< 8)
13560 if (from_size
== 8 && (8 * to_nregs
) != to_size
)
13563 if (to_size
== 8 && (8 * from_nregs
) != from_size
)
13572 /* Since the VSX register set includes traditional floating point registers
13573 and altivec registers, just check for the size being different instead of
13574 trying to check whether the modes are vector modes. Otherwise it won't
13575 allow say DF and DI to change classes. For types like TFmode and TDmode
13576 that take 2 64-bit registers, rather than a single 128-bit register, don't
13577 allow subregs of those types to other 128 bit types. */
13578 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
))
13580 unsigned num_regs
= (from_size
+ 15) / 16;
13581 if (hard_regno_nregs (FIRST_FPR_REGNO
, to
) > num_regs
13582 || hard_regno_nregs (FIRST_FPR_REGNO
, from
) > num_regs
)
13585 return (from_size
== 8 || from_size
== 16);
13588 if (TARGET_ALTIVEC
&& rclass
== ALTIVEC_REGS
13589 && (ALTIVEC_VECTOR_MODE (from
) + ALTIVEC_VECTOR_MODE (to
)) == 1)
13595 /* Debug version of rs6000_can_change_mode_class. */
13597 rs6000_debug_can_change_mode_class (machine_mode from
,
13599 reg_class_t rclass
)
13601 bool ret
= rs6000_can_change_mode_class (from
, to
, rclass
);
13604 "rs6000_can_change_mode_class, return %s, from = %s, "
13605 "to = %s, rclass = %s\n",
13606 ret
? "true" : "false",
13607 GET_MODE_NAME (from
), GET_MODE_NAME (to
),
13608 reg_class_names
[rclass
]);
13613 /* Return a string to do a move operation of 128 bits of data. */
13616 rs6000_output_move_128bit (rtx operands
[])
13618 rtx dest
= operands
[0];
13619 rtx src
= operands
[1];
13620 machine_mode mode
= GET_MODE (dest
);
13623 bool dest_gpr_p
, dest_fp_p
, dest_vmx_p
, dest_vsx_p
;
13624 bool src_gpr_p
, src_fp_p
, src_vmx_p
, src_vsx_p
;
13628 dest_regno
= REGNO (dest
);
13629 dest_gpr_p
= INT_REGNO_P (dest_regno
);
13630 dest_fp_p
= FP_REGNO_P (dest_regno
);
13631 dest_vmx_p
= ALTIVEC_REGNO_P (dest_regno
);
13632 dest_vsx_p
= dest_fp_p
| dest_vmx_p
;
13637 dest_gpr_p
= dest_fp_p
= dest_vmx_p
= dest_vsx_p
= false;
13642 src_regno
= REGNO (src
);
13643 src_gpr_p
= INT_REGNO_P (src_regno
);
13644 src_fp_p
= FP_REGNO_P (src_regno
);
13645 src_vmx_p
= ALTIVEC_REGNO_P (src_regno
);
13646 src_vsx_p
= src_fp_p
| src_vmx_p
;
13651 src_gpr_p
= src_fp_p
= src_vmx_p
= src_vsx_p
= false;
13654 /* Register moves. */
13655 if (dest_regno
>= 0 && src_regno
>= 0)
13662 if (TARGET_DIRECT_MOVE_128
&& src_vsx_p
)
13663 return (WORDS_BIG_ENDIAN
13664 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13665 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13667 else if (TARGET_VSX
&& TARGET_DIRECT_MOVE
&& src_vsx_p
)
13671 else if (TARGET_VSX
&& dest_vsx_p
)
13674 return "xxlor %x0,%x1,%x1";
13676 else if (TARGET_DIRECT_MOVE_128
&& src_gpr_p
)
13677 return (WORDS_BIG_ENDIAN
13678 ? "mtvsrdd %x0,%1,%L1"
13679 : "mtvsrdd %x0,%L1,%1");
13681 else if (TARGET_DIRECT_MOVE
&& src_gpr_p
)
13685 else if (TARGET_ALTIVEC
&& dest_vmx_p
&& src_vmx_p
)
13686 return "vor %0,%1,%1";
13688 else if (dest_fp_p
&& src_fp_p
)
13693 else if (dest_regno
>= 0 && MEM_P (src
))
13697 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13703 else if (TARGET_ALTIVEC
&& dest_vmx_p
13704 && altivec_indexed_or_indirect_operand (src
, mode
))
13705 return "lvx %0,%y1";
13707 else if (TARGET_VSX
&& dest_vsx_p
)
13709 if (mode_supports_dq_form (mode
)
13710 && quad_address_p (XEXP (src
, 0), mode
, true))
13711 return "lxv %x0,%1";
13713 else if (TARGET_P9_VECTOR
)
13714 return "lxvx %x0,%y1";
13716 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13717 return "lxvw4x %x0,%y1";
13720 return "lxvd2x %x0,%y1";
13723 else if (TARGET_ALTIVEC
&& dest_vmx_p
)
13724 return "lvx %0,%y1";
13726 else if (dest_fp_p
)
13731 else if (src_regno
>= 0 && MEM_P (dest
))
13735 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13736 return "stq %1,%0";
13741 else if (TARGET_ALTIVEC
&& src_vmx_p
13742 && altivec_indexed_or_indirect_operand (dest
, mode
))
13743 return "stvx %1,%y0";
13745 else if (TARGET_VSX
&& src_vsx_p
)
13747 if (mode_supports_dq_form (mode
)
13748 && quad_address_p (XEXP (dest
, 0), mode
, true))
13749 return "stxv %x1,%0";
13751 else if (TARGET_P9_VECTOR
)
13752 return "stxvx %x1,%y0";
13754 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13755 return "stxvw4x %x1,%y0";
13758 return "stxvd2x %x1,%y0";
13761 else if (TARGET_ALTIVEC
&& src_vmx_p
)
13762 return "stvx %1,%y0";
13769 else if (dest_regno
>= 0
13770 && (CONST_INT_P (src
)
13771 || CONST_WIDE_INT_P (src
)
13772 || CONST_DOUBLE_P (src
)
13773 || GET_CODE (src
) == CONST_VECTOR
))
13778 else if ((dest_vmx_p
&& TARGET_ALTIVEC
)
13779 || (dest_vsx_p
&& TARGET_VSX
))
13780 return output_vec_const_move (operands
);
13783 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest
, src
));
13786 /* Validate a 128-bit move. */
13788 rs6000_move_128bit_ok_p (rtx operands
[])
13790 machine_mode mode
= GET_MODE (operands
[0]);
13791 return (gpc_reg_operand (operands
[0], mode
)
13792 || gpc_reg_operand (operands
[1], mode
));
13795 /* Return true if a 128-bit move needs to be split. */
13797 rs6000_split_128bit_ok_p (rtx operands
[])
13799 if (!reload_completed
)
13802 if (!gpr_or_gpr_p (operands
[0], operands
[1]))
13805 if (quad_load_store_p (operands
[0], operands
[1]))
13812 /* Given a comparison operation, return the bit number in CCR to test. We
13813 know this is a valid comparison.
13815 SCC_P is 1 if this is for an scc. That means that %D will have been
13816 used instead of %C, so the bits will be in different places.
13818 Return -1 if OP isn't a valid comparison for some reason. */
13821 ccr_bit (rtx op
, int scc_p
)
13823 enum rtx_code code
= GET_CODE (op
);
13824 machine_mode cc_mode
;
13829 if (!COMPARISON_P (op
))
13832 reg
= XEXP (op
, 0);
13834 if (!REG_P (reg
) || !CR_REGNO_P (REGNO (reg
)))
13837 cc_mode
= GET_MODE (reg
);
13838 cc_regnum
= REGNO (reg
);
13839 base_bit
= 4 * (cc_regnum
- CR0_REGNO
);
13841 validate_condition_mode (code
, cc_mode
);
13843 /* When generating a sCOND operation, only positive conditions are
13862 return scc_p
? base_bit
+ 3 : base_bit
+ 2;
13864 return base_bit
+ 2;
13865 case GT
: case GTU
: case UNLE
:
13866 return base_bit
+ 1;
13867 case LT
: case LTU
: case UNGE
:
13869 case ORDERED
: case UNORDERED
:
13870 return base_bit
+ 3;
13873 /* If scc, we will have done a cror to put the bit in the
13874 unordered position. So test that bit. For integer, this is ! LT
13875 unless this is an scc insn. */
13876 return scc_p
? base_bit
+ 3 : base_bit
;
13879 return scc_p
? base_bit
+ 3 : base_bit
+ 1;
13886 /* Return the GOT register. */
13889 rs6000_got_register (rtx value ATTRIBUTE_UNUSED
)
13891 /* The second flow pass currently (June 1999) can't update
13892 regs_ever_live without disturbing other parts of the compiler, so
13893 update it here to make the prolog/epilogue code happy. */
13894 if (!can_create_pseudo_p ()
13895 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
13896 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM
, true);
13898 crtl
->uses_pic_offset_table
= 1;
13900 return pic_offset_table_rtx
;
13903 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13905 /* Write out a function code label. */
13908 rs6000_output_function_entry (FILE *file
, const char *fname
)
13910 if (fname
[0] != '.')
13912 switch (DEFAULT_ABI
)
13915 gcc_unreachable ();
13921 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "L.");
13931 RS6000_OUTPUT_BASENAME (file
, fname
);
13934 /* Print an operand. Recognize special options, documented below. */
13937 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13938 only introduced by the linker, when applying the sda21
13940 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13941 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13943 #define SMALL_DATA_RELOC "sda21"
13944 #define SMALL_DATA_REG 0
13948 print_operand (FILE *file
, rtx x
, int code
)
13951 unsigned HOST_WIDE_INT uval
;
13955 /* %a is output_address. */
13957 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13961 /* Write the MMA accumulator number associated with VSX register X. On
13962 dense math systems, only allow DMR accumulators, not accumulators
13963 overlapping with the FPR registers. */
13965 output_operand_lossage ("invalid %%A value");
13966 else if (TARGET_DENSE_MATH
)
13968 if (DMR_REGNO_P (REGNO (x
)))
13969 fprintf (file
, "%d", REGNO (x
) - FIRST_DMR_REGNO
);
13971 output_operand_lossage ("%%A operand is not a DMR");
13973 else if (!FP_REGNO_P (REGNO (x
)) || (REGNO (x
) % 4) != 0)
13974 output_operand_lossage ("invalid %%A value");
13976 fprintf (file
, "%d", (REGNO (x
) - FIRST_FPR_REGNO
) / 4);
13980 /* Like 'J' but get to the GT bit only. */
13981 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13983 output_operand_lossage ("invalid %%D value");
13987 /* Bit 1 is GT bit. */
13988 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 1;
13990 /* Add one for shift count in rlinm for scc. */
13991 fprintf (file
, "%d", i
+ 1);
13995 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13998 output_operand_lossage ("invalid %%e value");
14003 if ((uval
& 0xffff) == 0 && uval
!= 0)
14008 /* X is a CR register. Print the number of the EQ bit of the CR */
14009 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14010 output_operand_lossage ("invalid %%E value");
14012 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
) + 2);
14016 /* X is a CR register. Print the shift count needed to move it
14017 to the high-order four bits. */
14018 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14019 output_operand_lossage ("invalid %%f value");
14021 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
));
14025 /* Similar, but print the count for the rotate in the opposite
14027 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14028 output_operand_lossage ("invalid %%F value");
14030 fprintf (file
, "%d", 32 - 4 * (REGNO (x
) - CR0_REGNO
));
14034 /* X is a constant integer. If it is negative, print "m",
14035 otherwise print "z". This is to make an aze or ame insn. */
14036 if (!CONST_INT_P (x
))
14037 output_operand_lossage ("invalid %%G value");
14038 else if (INTVAL (x
) >= 0)
14045 /* If constant, output low-order five bits. Otherwise, write
14048 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 31);
14050 print_operand (file
, x
, 0);
14054 /* If constant, output low-order six bits. Otherwise, write
14057 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 63);
14059 print_operand (file
, x
, 0);
14063 /* Print `i' if this is a constant, else nothing. */
14069 /* Write the bit number in CCR for jump. */
14070 i
= ccr_bit (x
, 0);
14072 output_operand_lossage ("invalid %%j code");
14074 fprintf (file
, "%d", i
);
14078 /* Similar, but add one for shift count in rlinm for scc and pass
14079 scc flag to `ccr_bit'. */
14080 i
= ccr_bit (x
, 1);
14082 output_operand_lossage ("invalid %%J code");
14084 /* If we want bit 31, write a shift count of zero, not 32. */
14085 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
14089 /* X must be a constant. Write the 1's complement of the
14092 output_operand_lossage ("invalid %%k value");
14094 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~ INTVAL (x
));
14098 /* X must be a symbolic constant on ELF. Write an
14099 expression suitable for an 'addi' that adds in the low 16
14100 bits of the MEM. */
14101 if (GET_CODE (x
) == CONST
)
14103 if (GET_CODE (XEXP (x
, 0)) != PLUS
14104 || (!SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
14105 && GET_CODE (XEXP (XEXP (x
, 0), 0)) != LABEL_REF
)
14106 || !CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
14107 output_operand_lossage ("invalid %%K value");
14109 print_operand_address (file
, x
);
14110 fputs ("@l", file
);
14113 /* %l is output_asm_label. */
14116 /* Write second word of DImode or DFmode reference. Works on register
14117 or non-indexed memory only. */
14119 fputs (reg_names
[REGNO (x
) + 1], file
);
14120 else if (MEM_P (x
))
14122 machine_mode mode
= GET_MODE (x
);
14123 /* Handle possible auto-increment. Since it is pre-increment and
14124 we have already done it, we can just use an offset of word. */
14125 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14126 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14127 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
14129 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14130 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
14133 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
,
14137 if (small_data_operand (x
, GET_MODE (x
)))
14138 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14139 reg_names
[SMALL_DATA_REG
]);
14143 case 'N': /* Unused */
14144 /* Write the number of elements in the vector times 4. */
14145 if (GET_CODE (x
) != PARALLEL
)
14146 output_operand_lossage ("invalid %%N value");
14148 fprintf (file
, "%d", XVECLEN (x
, 0) * 4);
14151 case 'O': /* Unused */
14152 /* Similar, but subtract 1 first. */
14153 if (GET_CODE (x
) != PARALLEL
)
14154 output_operand_lossage ("invalid %%O value");
14156 fprintf (file
, "%d", (XVECLEN (x
, 0) - 1) * 4);
14160 /* X is a CONST_INT that is a power of two. Output the logarithm. */
14163 || (i
= exact_log2 (INTVAL (x
))) < 0)
14164 output_operand_lossage ("invalid %%p value");
14166 fprintf (file
, "%d", i
);
14170 /* The operand must be an indirect memory reference. The result
14171 is the register name. */
14172 if (!MEM_P (x
) || !REG_P (XEXP (x
, 0))
14173 || REGNO (XEXP (x
, 0)) >= 32)
14174 output_operand_lossage ("invalid %%P value");
14176 fputs (reg_names
[REGNO (XEXP (x
, 0))], file
);
14180 /* This outputs the logical code corresponding to a boolean
14181 expression. The expression may have one or both operands
14182 negated (if one, only the first one). For condition register
14183 logical operations, it will also treat the negated
14184 CR codes as NOTs, but not handle NOTs of them. */
14186 const char *const *t
= 0;
14188 enum rtx_code code
= GET_CODE (x
);
14189 static const char * const tbl
[3][3] = {
14190 { "and", "andc", "nor" },
14191 { "or", "orc", "nand" },
14192 { "xor", "eqv", "xor" } };
14196 else if (code
== IOR
)
14198 else if (code
== XOR
)
14201 output_operand_lossage ("invalid %%q value");
14203 if (GET_CODE (XEXP (x
, 0)) != NOT
)
14207 if (GET_CODE (XEXP (x
, 1)) == NOT
)
14218 if (! TARGET_MFCRF
)
14224 /* X is a CR register. Print the mask for `mtcrf'. */
14225 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14226 output_operand_lossage ("invalid %%R value");
14228 fprintf (file
, "%d", 128 >> (REGNO (x
) - CR0_REGNO
));
14232 /* Low 5 bits of 32 - value */
14234 output_operand_lossage ("invalid %%s value");
14236 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (32 - INTVAL (x
)) & 31);
14240 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
14241 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14243 output_operand_lossage ("invalid %%t value");
14247 /* Bit 3 is OV bit. */
14248 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 3;
14250 /* If we want bit 31, write a shift count of zero, not 32. */
14251 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
14255 /* Print the symbolic name of a branch target register. */
14256 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14257 x
= XVECEXP (x
, 0, 0);
14258 if (!REG_P (x
) || (REGNO (x
) != LR_REGNO
14259 && REGNO (x
) != CTR_REGNO
))
14260 output_operand_lossage ("invalid %%T value");
14261 else if (REGNO (x
) == LR_REGNO
)
14262 fputs ("lr", file
);
14264 fputs ("ctr", file
);
14268 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14269 for use in unsigned operand. */
14272 output_operand_lossage ("invalid %%u value");
14277 if ((uval
& 0xffff) == 0)
14280 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
, uval
& 0xffff);
14284 /* High-order 16 bits of constant for use in signed operand. */
14286 output_operand_lossage ("invalid %%v value");
14288 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
,
14289 (INTVAL (x
) >> 16) & 0xffff);
14293 /* Print `u' if this has an auto-increment or auto-decrement. */
14295 && (GET_CODE (XEXP (x
, 0)) == PRE_INC
14296 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
14297 || GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
))
14302 /* Print the trap code for this operand. */
14303 switch (GET_CODE (x
))
14306 fputs ("eq", file
); /* 4 */
14309 fputs ("ne", file
); /* 24 */
14312 fputs ("lt", file
); /* 16 */
14315 fputs ("le", file
); /* 20 */
14318 fputs ("gt", file
); /* 8 */
14321 fputs ("ge", file
); /* 12 */
14324 fputs ("llt", file
); /* 2 */
14327 fputs ("lle", file
); /* 6 */
14330 fputs ("lgt", file
); /* 1 */
14333 fputs ("lge", file
); /* 5 */
14336 output_operand_lossage ("invalid %%V value");
14341 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14344 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, sext_hwi (INTVAL (x
), 16));
14346 print_operand (file
, x
, 0);
14350 /* X is a FPR or Altivec register used in a VSX context. */
14351 if (!REG_P (x
) || !VSX_REGNO_P (REGNO (x
)))
14352 output_operand_lossage ("invalid %%x value");
14355 int reg
= REGNO (x
);
14356 int vsx_reg
= (FP_REGNO_P (reg
)
14358 : reg
- FIRST_ALTIVEC_REGNO
+ 32);
14360 #ifdef TARGET_REGNAMES
14361 if (TARGET_REGNAMES
)
14362 fprintf (file
, "%%vs%d", vsx_reg
);
14365 fprintf (file
, "%d", vsx_reg
);
14371 && (legitimate_indexed_address_p (XEXP (x
, 0), 0)
14372 || (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
14373 && legitimate_indexed_address_p (XEXP (XEXP (x
, 0), 1), 0))))
14378 /* Like 'L', for third word of TImode/PTImode */
14380 fputs (reg_names
[REGNO (x
) + 2], file
);
14381 else if (MEM_P (x
))
14383 machine_mode mode
= GET_MODE (x
);
14384 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14385 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14386 output_address (mode
, plus_constant (Pmode
,
14387 XEXP (XEXP (x
, 0), 0), 8));
14388 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14389 output_address (mode
, plus_constant (Pmode
,
14390 XEXP (XEXP (x
, 0), 0), 8));
14392 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 8), 0));
14393 if (small_data_operand (x
, GET_MODE (x
)))
14394 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14395 reg_names
[SMALL_DATA_REG
]);
14400 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14401 x
= XVECEXP (x
, 0, 1);
14402 /* X is a SYMBOL_REF. Write out the name preceded by a
14403 period and without any trailing data in brackets. Used for function
14404 names. If we are configured for System V (or the embedded ABI) on
14405 the PowerPC, do not emit the period, since those systems do not use
14406 TOCs and the like. */
14407 if (!SYMBOL_REF_P (x
))
14409 output_operand_lossage ("invalid %%z value");
14413 /* For macho, check to see if we need a stub. */
14416 const char *name
= XSTR (x
, 0);
14418 if (darwin_symbol_stubs
14419 && MACHOPIC_INDIRECT
14420 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
14421 name
= machopic_indirection_name (x
, /*stub_p=*/true);
14423 assemble_name (file
, name
);
14425 else if (!DOT_SYMBOLS
)
14426 assemble_name (file
, XSTR (x
, 0));
14428 rs6000_output_function_entry (file
, XSTR (x
, 0));
14432 /* Like 'L', for last word of TImode/PTImode. */
14434 fputs (reg_names
[REGNO (x
) + 3], file
);
14435 else if (MEM_P (x
))
14437 machine_mode mode
= GET_MODE (x
);
14438 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14439 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14440 output_address (mode
, plus_constant (Pmode
,
14441 XEXP (XEXP (x
, 0), 0), 12));
14442 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14443 output_address (mode
, plus_constant (Pmode
,
14444 XEXP (XEXP (x
, 0), 0), 12));
14446 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 12), 0));
14447 if (small_data_operand (x
, GET_MODE (x
)))
14448 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14449 reg_names
[SMALL_DATA_REG
]);
14453 /* Print AltiVec memory operand. */
14458 gcc_assert (MEM_P (x
));
14462 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x
))
14463 && GET_CODE (tmp
) == AND
14464 && CONST_INT_P (XEXP (tmp
, 1))
14465 && INTVAL (XEXP (tmp
, 1)) == -16)
14466 tmp
= XEXP (tmp
, 0);
14467 else if (VECTOR_MEM_VSX_P (GET_MODE (x
))
14468 && GET_CODE (tmp
) == PRE_MODIFY
)
14469 tmp
= XEXP (tmp
, 1);
14471 fprintf (file
, "0,%s", reg_names
[REGNO (tmp
)]);
14474 if (GET_CODE (tmp
) != PLUS
14475 || !REG_P (XEXP (tmp
, 0))
14476 || !REG_P (XEXP (tmp
, 1)))
14478 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14482 if (REGNO (XEXP (tmp
, 0)) == 0)
14483 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 1)) ],
14484 reg_names
[ REGNO (XEXP (tmp
, 0)) ]);
14486 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 0)) ],
14487 reg_names
[ REGNO (XEXP (tmp
, 1)) ]);
14494 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
14495 else if (MEM_P (x
))
14497 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14498 know the width from the mode. */
14499 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
)
14500 fprintf (file
, "%d(%s)", GET_MODE_SIZE (GET_MODE (x
)),
14501 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14502 else if (GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14503 fprintf (file
, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x
)),
14504 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14505 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14506 output_address (GET_MODE (x
), XEXP (XEXP (x
, 0), 1));
14508 output_address (GET_MODE (x
), XEXP (x
, 0));
14510 else if (toc_relative_expr_p (x
, false,
14511 &tocrel_base_oac
, &tocrel_offset_oac
))
14512 /* This hack along with a corresponding hack in
14513 rs6000_output_addr_const_extra arranges to output addends
14514 where the assembler expects to find them. eg.
14515 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14516 without this hack would be output as "x@toc+4". We
14518 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14519 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
14520 output_addr_const (file
, XVECEXP (x
, 0, 0));
14521 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14522 output_addr_const (file
, XVECEXP (x
, 0, 1));
14524 output_addr_const (file
, x
);
14528 if (const char *name
= get_some_local_dynamic_name ())
14529 assemble_name (file
, name
);
14531 output_operand_lossage ("'%%&' used without any "
14532 "local dynamic TLS references");
14536 output_operand_lossage ("invalid %%xn code");
14540 /* Print the address of an operand. */
14543 print_operand_address (FILE *file
, rtx x
)
14546 fprintf (file
, "0(%s)", reg_names
[ REGNO (x
) ]);
14548 /* Is it a PC-relative address? */
14549 else if (TARGET_PCREL
&& pcrel_local_or_external_address (x
, VOIDmode
))
14551 HOST_WIDE_INT offset
;
14553 if (GET_CODE (x
) == CONST
)
14556 if (GET_CODE (x
) == PLUS
)
14558 offset
= INTVAL (XEXP (x
, 1));
14564 output_addr_const (file
, x
);
14567 fprintf (file
, "%+" PRId64
, offset
);
14569 if (SYMBOL_REF_P (x
) && !SYMBOL_REF_LOCAL_P (x
))
14570 fprintf (file
, "@got");
14572 fprintf (file
, "@pcrel");
14574 else if (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
14575 || GET_CODE (x
) == LABEL_REF
)
14577 output_addr_const (file
, x
);
14578 if (small_data_operand (x
, GET_MODE (x
)))
14579 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14580 reg_names
[SMALL_DATA_REG
]);
14582 gcc_assert (!TARGET_TOC
);
14584 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14585 && REG_P (XEXP (x
, 1)))
14587 if (REGNO (XEXP (x
, 0)) == 0)
14588 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 1)) ],
14589 reg_names
[ REGNO (XEXP (x
, 0)) ]);
14591 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 0)) ],
14592 reg_names
[ REGNO (XEXP (x
, 1)) ]);
14594 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14595 && CONST_INT_P (XEXP (x
, 1)))
14596 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
14597 INTVAL (XEXP (x
, 1)), reg_names
[ REGNO (XEXP (x
, 0)) ]);
14599 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14600 && CONSTANT_P (XEXP (x
, 1)))
14602 fprintf (file
, "lo16(");
14603 output_addr_const (file
, XEXP (x
, 1));
14604 fprintf (file
, ")(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14608 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14609 && CONSTANT_P (XEXP (x
, 1)))
14611 output_addr_const (file
, XEXP (x
, 1));
14612 fprintf (file
, "@l(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14615 else if (toc_relative_expr_p (x
, false, &tocrel_base_oac
, &tocrel_offset_oac
))
14617 /* This hack along with a corresponding hack in
14618 rs6000_output_addr_const_extra arranges to output addends
14619 where the assembler expects to find them. eg.
14621 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14622 without this hack would be output as "x@toc+8@l(9)". We
14623 want "x+8@toc@l(9)". */
14624 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14625 if (GET_CODE (x
) == LO_SUM
)
14626 fprintf (file
, "@l(%s)", reg_names
[REGNO (XEXP (x
, 0))]);
14628 fprintf (file
, "(%s)", reg_names
[REGNO (XVECEXP (tocrel_base_oac
, 0, 1))]);
14631 output_addr_const (file
, x
);
14634 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14637 rs6000_output_addr_const_extra (FILE *file
, rtx x
)
14639 if (GET_CODE (x
) == UNSPEC
)
14640 switch (XINT (x
, 1))
14642 case UNSPEC_TOCREL
:
14643 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x
, 0, 0))
14644 && REG_P (XVECEXP (x
, 0, 1))
14645 && REGNO (XVECEXP (x
, 0, 1)) == TOC_REGISTER
);
14646 output_addr_const (file
, XVECEXP (x
, 0, 0));
14647 if (x
== tocrel_base_oac
&& tocrel_offset_oac
!= const0_rtx
)
14649 if (INTVAL (tocrel_offset_oac
) >= 0)
14650 fprintf (file
, "+");
14651 output_addr_const (file
, CONST_CAST_RTX (tocrel_offset_oac
));
14653 if (!TARGET_AIX
|| (TARGET_ELF
&& TARGET_MINIMAL_TOC
))
14656 assemble_name (file
, toc_label_name
);
14659 else if (TARGET_ELF
)
14660 fputs ("@toc", file
);
14664 case UNSPEC_MACHOPIC_OFFSET
:
14665 output_addr_const (file
, XVECEXP (x
, 0, 0));
14667 machopic_output_function_base_name (file
);
14674 /* Target hook for assembling integer objects. The PowerPC version has
14675 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14676 is defined. It also needs to handle DI-mode objects on 64-bit
14680 rs6000_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
14682 #ifdef RELOCATABLE_NEEDS_FIXUP
14683 /* Special handling for SI values. */
14684 if (RELOCATABLE_NEEDS_FIXUP
&& size
== 4 && aligned_p
)
14686 static int recurse
= 0;
14688 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14689 the .fixup section. Since the TOC section is already relocated, we
14690 don't need to mark it here. We used to skip the text section, but it
14691 should never be valid for relocated addresses to be placed in the text
14693 if (DEFAULT_ABI
== ABI_V4
14694 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
14695 && in_section
!= toc_section
14697 && !CONST_SCALAR_INT_P (x
)
14703 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCP", fixuplabelno
);
14705 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
14706 fprintf (asm_out_file
, "\t.long\t(");
14707 output_addr_const (asm_out_file
, x
);
14708 fprintf (asm_out_file
, ")@fixup\n");
14709 fprintf (asm_out_file
, "\t.section\t\".fixup\",\"aw\"\n");
14710 ASM_OUTPUT_ALIGN (asm_out_file
, 2);
14711 fprintf (asm_out_file
, "\t.long\t");
14712 assemble_name (asm_out_file
, buf
);
14713 fprintf (asm_out_file
, "\n\t.previous\n");
14717 /* Remove initial .'s to turn a -mcall-aixdesc function
14718 address into the address of the descriptor, not the function
14720 else if (SYMBOL_REF_P (x
)
14721 && XSTR (x
, 0)[0] == '.'
14722 && DEFAULT_ABI
== ABI_AIX
)
14724 const char *name
= XSTR (x
, 0);
14725 while (*name
== '.')
14728 fprintf (asm_out_file
, "\t.long\t%s\n", name
);
14732 #endif /* RELOCATABLE_NEEDS_FIXUP */
14733 return default_assemble_integer (x
, size
, aligned_p
);
14736 /* Return a template string for assembly to emit when making an
14737 external call. FUNOP is the call mem argument operand number. */
14739 static const char *
14740 rs6000_call_template_1 (rtx
*operands
, unsigned int funop
, bool sibcall
)
14742 /* -Wformat-overflow workaround, without which gcc thinks that %u
14743 might produce 10 digits. */
14744 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14748 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14750 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14751 sprintf (arg
, "(%%%u@tlsgd)", funop
+ 1);
14752 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14753 sprintf (arg
, "(%%&@tlsld)");
14756 /* The magic 32768 offset here corresponds to the offset of
14757 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14759 sprintf (z
, "%%z%u%s", funop
,
14760 (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
== 2
14763 static char str
[32]; /* 1 spare */
14764 if (rs6000_pcrel_p ())
14765 sprintf (str
, "b%s %s@notoc%s", sibcall
? "" : "l", z
, arg
);
14766 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
14767 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14768 sibcall
? "" : "\n\tnop");
14769 else if (DEFAULT_ABI
== ABI_V4
)
14770 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14771 flag_pic
? "@plt" : "");
14773 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14774 else if (DEFAULT_ABI
== ABI_DARWIN
)
14776 /* The cookie is in operand func+2. */
14777 gcc_checking_assert (GET_CODE (operands
[funop
+ 2]) == CONST_INT
);
14778 int cookie
= INTVAL (operands
[funop
+ 2]);
14779 if (cookie
& CALL_LONG
)
14781 tree funname
= get_identifier (XSTR (operands
[funop
], 0));
14782 tree labelname
= get_prev_label (funname
);
14783 gcc_checking_assert (labelname
&& !sibcall
);
14785 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14786 instruction will reach 'foo', otherwise link as 'bl L42'".
14787 "L42" should be a 'branch island', that will do a far jump to
14788 'foo'. Branch islands are generated in
14789 macho_branch_islands(). */
14790 sprintf (str
, "jbsr %%z%u,%.10s", funop
,
14791 IDENTIFIER_POINTER (labelname
));
14794 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14796 sprintf (str
, "b%s %s%s", sibcall
? "" : "l", z
, arg
);
14800 gcc_unreachable ();
14805 rs6000_call_template (rtx
*operands
, unsigned int funop
)
14807 return rs6000_call_template_1 (operands
, funop
, false);
14811 rs6000_sibcall_template (rtx
*operands
, unsigned int funop
)
14813 return rs6000_call_template_1 (operands
, funop
, true);
14816 /* As above, for indirect calls. */
14818 static const char *
14819 rs6000_indirect_call_template_1 (rtx
*operands
, unsigned int funop
,
14822 /* -Wformat-overflow workaround, without which gcc thinks that %u
14823 might produce 10 digits. Note that -Wformat-overflow will not
14824 currently warn here for str[], so do not rely on a warning to
14825 ensure str[] is correctly sized. */
14826 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14828 /* Currently, funop is either 0 or 1. The maximum string is always
14829 a !speculate 64-bit __tls_get_addr call.
14832 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14833 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14835 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14836 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14843 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14844 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14846 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14847 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14854 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14855 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14857 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14858 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14865 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14866 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14868 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14869 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14873 static char str
[160]; /* 8 spare */
14875 const char *ptrload
= TARGET_64BIT
? "d" : "wz";
14877 if (DEFAULT_ABI
== ABI_AIX
)
14880 ptrload
, funop
+ 3);
14882 /* We don't need the extra code to stop indirect call speculation if
14884 bool speculate
= (TARGET_MACHO
14885 || rs6000_speculate_indirect_jumps
14886 || (REG_P (operands
[funop
])
14887 && REGNO (operands
[funop
]) == LR_REGNO
));
14889 if (TARGET_PLTSEQ
&& GET_CODE (operands
[funop
]) == UNSPEC
)
14891 const char *rel64
= TARGET_64BIT
? "64" : "";
14894 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14896 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14897 sprintf (tls
, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14899 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14900 sprintf (tls
, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14904 const char *notoc
= rs6000_pcrel_p () ? "_NOTOC" : "";
14905 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
14906 && flag_pic
== 2 ? "+32768" : "");
14910 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14911 tls
, rel64
, notoc
, funop
, addend
);
14912 s
+= sprintf (s
, "crset 2\n\t");
14915 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14916 tls
, rel64
, notoc
, funop
, addend
);
14918 else if (!speculate
)
14919 s
+= sprintf (s
, "crset 2\n\t");
14921 if (rs6000_pcrel_p ())
14924 sprintf (s
, "b%%T%ul", funop
);
14926 sprintf (s
, "beq%%T%ul-", funop
);
14928 else if (DEFAULT_ABI
== ABI_AIX
)
14934 funop
, ptrload
, funop
+ 4);
14939 funop
, ptrload
, funop
+ 4);
14941 else if (DEFAULT_ABI
== ABI_ELFv2
)
14947 funop
, ptrload
, funop
+ 3);
14952 funop
, ptrload
, funop
+ 3);
14959 funop
, sibcall
? "" : "l");
14963 funop
, sibcall
? "" : "l", sibcall
? "\n\tb $" : "");
14969 rs6000_indirect_call_template (rtx
*operands
, unsigned int funop
)
14971 return rs6000_indirect_call_template_1 (operands
, funop
, false);
14975 rs6000_indirect_sibcall_template (rtx
*operands
, unsigned int funop
)
14977 return rs6000_indirect_call_template_1 (operands
, funop
, true);
14981 /* Output indirect call insns. WHICH identifies the type of sequence. */
14983 rs6000_pltseq_template (rtx
*operands
, int which
)
14985 const char *rel64
= TARGET_64BIT
? "64" : "";
14988 if (GET_CODE (operands
[3]) == UNSPEC
)
14990 char off
= which
== RS6000_PLTSEQ_PLT_PCREL34
? '8' : '4';
14991 if (XINT (operands
[3], 1) == UNSPEC_TLSGD
)
14992 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14994 else if (XINT (operands
[3], 1) == UNSPEC_TLSLD
)
14995 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14999 gcc_assert (DEFAULT_ABI
== ABI_ELFv2
|| DEFAULT_ABI
== ABI_V4
);
15000 static char str
[96]; /* 10 spare */
15001 char off
= WORDS_BIG_ENDIAN
? '2' : '4';
15002 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
15003 && flag_pic
== 2 ? "+32768" : "");
15006 case RS6000_PLTSEQ_TOCSAVE
:
15009 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
15010 TARGET_64BIT
? "d 2,24(1)" : "w 2,12(1)",
15013 case RS6000_PLTSEQ_PLT16_HA
:
15014 if (DEFAULT_ABI
== ABI_V4
&& !flag_pic
)
15017 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
15021 "addis %%0,%%1,0\n\t"
15022 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
15023 tls
, off
, rel64
, addend
);
15025 case RS6000_PLTSEQ_PLT16_LO
:
15027 "l%s %%0,0(%%1)\n\t"
15028 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
15029 TARGET_64BIT
? "d" : "wz",
15030 tls
, off
, rel64
, TARGET_64BIT
? "_DS" : "", addend
);
15032 case RS6000_PLTSEQ_MTCTR
:
15035 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
15036 tls
, rel64
, addend
);
15038 case RS6000_PLTSEQ_PLT_PCREL34
:
15040 "pl%s %%0,0(0),1\n\t"
15041 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
15042 TARGET_64BIT
? "d" : "wz",
15046 gcc_unreachable ();
15052 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
15053 /* Emit an assembler directive to set symbol visibility for DECL to
15054 VISIBILITY_TYPE. */
15057 rs6000_assemble_visibility (tree decl
, int vis
)
15062 /* Functions need to have their entry point symbol visibility set as
15063 well as their descriptor symbol visibility. */
15064 if (DEFAULT_ABI
== ABI_AIX
15066 && TREE_CODE (decl
) == FUNCTION_DECL
)
15068 static const char * const visibility_types
[] = {
15069 NULL
, "protected", "hidden", "internal"
15072 const char *name
, *type
;
15074 name
= ((* targetm
.strip_name_encoding
)
15075 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
))));
15076 type
= visibility_types
[vis
];
15078 fprintf (asm_out_file
, "\t.%s\t%s\n", type
, name
);
15079 fprintf (asm_out_file
, "\t.%s\t.%s\n", type
, name
);
15082 default_assemble_visibility (decl
, vis
);
15086 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
15087 entry. If RECORD_P is true and the target supports named sections,
15088 the location of the NOPs will be recorded in a special object section
15089 called "__patchable_function_entries". This routine may be called
15090 twice per function to put NOPs before and after the function
15094 rs6000_print_patchable_function_entry (FILE *file
,
15095 unsigned HOST_WIDE_INT patch_area_size
,
15098 bool global_entry_needed_p
= rs6000_global_entry_point_prologue_needed_p ();
15099 /* For a function which needs global entry point, we will emit the
15100 patchable area before and after local entry point under the control of
15101 cfun->machine->global_entry_emitted, see the handling in function
15102 rs6000_output_function_prologue. */
15103 if (!global_entry_needed_p
|| cfun
->machine
->global_entry_emitted
)
15104 default_print_patchable_function_entry (file
, patch_area_size
, record_p
);
15108 rs6000_reverse_condition (machine_mode mode
, enum rtx_code code
)
15110 /* Reversal of FP compares takes care -- an ordered compare
15111 becomes an unordered compare and vice versa. */
15112 if (mode
== CCFPmode
15113 && (!flag_finite_math_only
15114 || code
== UNLT
|| code
== UNLE
|| code
== UNGT
|| code
== UNGE
15115 || code
== UNEQ
|| code
== LTGT
))
15116 return reverse_condition_maybe_unordered (code
);
15118 return reverse_condition (code
);
15121 /* Check if C (as 64bit integer) can be rotated to a constant which constains
15122 nonzero bits at the LOWBITS low bits only.
15124 Return true if C can be rotated to such constant. If so, *ROT is written
15125 to the number by which C is rotated.
15126 Return false otherwise. */
15129 can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c
, int lowbits
, int *rot
)
15131 int clz
= HOST_BITS_PER_WIDE_INT
- lowbits
;
15133 /* case a. 0..0xxx: already at least clz zeros. */
15134 int lz
= clz_hwi (c
);
15141 /* case b. 0..0xxx0..0: at least clz zeros. */
15142 int tz
= ctz_hwi (c
);
15143 if (lz
+ tz
>= clz
)
15145 *rot
= HOST_BITS_PER_WIDE_INT
- tz
;
15149 /* case c. xx10.....0xx: rotate 'clz - 1' bits first, then check case b.
15150 ^bit -> Vbit, , then zeros are at head or tail.
15151 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
15152 const int rot_bits
= lowbits
+ 1;
15153 unsigned HOST_WIDE_INT rc
= (c
>> rot_bits
) | (c
<< (clz
- 1));
15155 if (clz_hwi (rc
) + tz
>= clz
)
15157 *rot
= HOST_BITS_PER_WIDE_INT
- (tz
+ rot_bits
);
15164 /* Check if C (as 64bit integer) can be rotated to a positive 16bits constant
15165 which contains 48bits leading zeros and 16bits of any value. */
15168 can_be_rotated_to_positive_16bits (HOST_WIDE_INT c
)
15171 bool res
= can_be_rotated_to_lowbits (c
, 16, &rot
);
15172 return res
&& rot
> 0;
15175 /* Check if C (as 64bit integer) can be rotated to a negative 15bits constant
15176 which contains 49bits leading ones and 15bits of any value. */
15179 can_be_rotated_to_negative_15bits (HOST_WIDE_INT c
)
15182 bool res
= can_be_rotated_to_lowbits (~c
, 15, &rot
);
15183 return res
&& rot
> 0;
15186 /* Generate a compare for CODE. Return a brand-new rtx that
15187 represents the result of the compare. */
15190 rs6000_generate_compare (rtx cmp
, machine_mode mode
)
15192 machine_mode comp_mode
;
15193 rtx compare_result
;
15194 enum rtx_code code
= GET_CODE (cmp
);
15195 rtx op0
= XEXP (cmp
, 0);
15196 rtx op1
= XEXP (cmp
, 1);
15198 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
15199 comp_mode
= CCmode
;
15200 else if (FLOAT_MODE_P (mode
))
15201 comp_mode
= CCFPmode
;
15202 else if (code
== GTU
|| code
== LTU
15203 || code
== GEU
|| code
== LEU
)
15204 comp_mode
= CCUNSmode
;
15205 else if ((code
== EQ
|| code
== NE
)
15206 && unsigned_reg_p (op0
)
15207 && (unsigned_reg_p (op1
)
15208 || (CONST_INT_P (op1
) && INTVAL (op1
) != 0)))
15209 /* These are unsigned values, perhaps there will be a later
15210 ordering compare that can be shared with this one. */
15211 comp_mode
= CCUNSmode
;
15213 comp_mode
= CCmode
;
15215 /* If we have an unsigned compare, make sure we don't have a signed value as
15217 if (comp_mode
== CCUNSmode
&& CONST_INT_P (op1
)
15218 && INTVAL (op1
) < 0)
15220 op0
= copy_rtx_if_shared (op0
);
15221 op1
= force_reg (GET_MODE (op0
), op1
);
15222 cmp
= gen_rtx_fmt_ee (code
, GET_MODE (cmp
), op0
, op1
);
15225 /* First, the compare. */
15226 compare_result
= gen_reg_rtx (comp_mode
);
15228 /* IEEE 128-bit support in VSX registers when we do not have hardware
15230 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
15232 rtx libfunc
= NULL_RTX
;
15233 bool check_nan
= false;
15240 libfunc
= optab_libfunc (eq_optab
, mode
);
15245 libfunc
= optab_libfunc (ge_optab
, mode
);
15250 libfunc
= optab_libfunc (le_optab
, mode
);
15255 libfunc
= optab_libfunc (unord_optab
, mode
);
15256 code
= (code
== UNORDERED
) ? NE
: EQ
;
15262 libfunc
= optab_libfunc (ge_optab
, mode
);
15263 code
= (code
== UNGE
) ? GE
: GT
;
15269 libfunc
= optab_libfunc (le_optab
, mode
);
15270 code
= (code
== UNLE
) ? LE
: LT
;
15276 libfunc
= optab_libfunc (eq_optab
, mode
);
15277 code
= (code
= UNEQ
) ? EQ
: NE
;
15281 gcc_unreachable ();
15284 gcc_assert (libfunc
);
15287 dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
15288 SImode
, op0
, mode
, op1
, mode
);
15290 /* The library signals an exception for signalling NaNs, so we need to
15291 handle isgreater, etc. by first checking isordered. */
15294 rtx ne_rtx
, normal_dest
, unord_dest
;
15295 rtx unord_func
= optab_libfunc (unord_optab
, mode
);
15296 rtx join_label
= gen_label_rtx ();
15297 rtx join_ref
= gen_rtx_LABEL_REF (VOIDmode
, join_label
);
15298 rtx unord_cmp
= gen_reg_rtx (comp_mode
);
15301 /* Test for either value being a NaN. */
15302 gcc_assert (unord_func
);
15303 unord_dest
= emit_library_call_value (unord_func
, NULL_RTX
, LCT_CONST
,
15304 SImode
, op0
, mode
, op1
, mode
);
15306 /* Set value (0) if either value is a NaN, and jump to the join
15308 dest
= gen_reg_rtx (SImode
);
15309 emit_move_insn (dest
, const1_rtx
);
15310 emit_insn (gen_rtx_SET (unord_cmp
,
15311 gen_rtx_COMPARE (comp_mode
, unord_dest
,
15314 ne_rtx
= gen_rtx_NE (comp_mode
, unord_cmp
, const0_rtx
);
15315 emit_jump_insn (gen_rtx_SET (pc_rtx
,
15316 gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
15320 /* Do the normal comparison, knowing that the values are not
15322 normal_dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
15323 SImode
, op0
, mode
, op1
, mode
);
15325 emit_insn (gen_cstoresi4 (dest
,
15326 gen_rtx_fmt_ee (code
, SImode
, normal_dest
,
15328 normal_dest
, const0_rtx
));
15330 /* Join NaN and non-Nan paths. Compare dest against 0. */
15331 emit_label (join_label
);
15335 emit_insn (gen_rtx_SET (compare_result
,
15336 gen_rtx_COMPARE (comp_mode
, dest
, const0_rtx
)));
15341 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15342 CLOBBERs to match cmptf_internal2 pattern. */
15343 if (comp_mode
== CCFPmode
&& TARGET_XL_COMPAT
15344 && FLOAT128_IBM_P (GET_MODE (op0
))
15345 && TARGET_HARD_FLOAT
)
15346 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
15348 gen_rtx_SET (compare_result
,
15349 gen_rtx_COMPARE (comp_mode
, op0
, op1
)),
15350 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15351 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15352 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15353 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15354 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15355 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15356 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15357 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15358 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (Pmode
)))));
15359 else if (GET_CODE (op1
) == UNSPEC
15360 && XINT (op1
, 1) == UNSPEC_SP_TEST
)
15362 rtx op1b
= XVECEXP (op1
, 0, 0);
15363 comp_mode
= CCEQmode
;
15364 compare_result
= gen_reg_rtx (CCEQmode
);
15366 emit_insn (gen_stack_protect_testdi (compare_result
, op0
, op1b
));
15368 emit_insn (gen_stack_protect_testsi (compare_result
, op0
, op1b
));
15371 emit_insn (gen_rtx_SET (compare_result
,
15372 gen_rtx_COMPARE (comp_mode
, op0
, op1
)));
15375 validate_condition_mode (code
, GET_MODE (compare_result
));
15377 return gen_rtx_fmt_ee (code
, VOIDmode
, compare_result
, const0_rtx
);
15381 /* Return the diagnostic message string if the binary operation OP is
15382 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15385 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED
,
15389 machine_mode mode1
= TYPE_MODE (type1
);
15390 machine_mode mode2
= TYPE_MODE (type2
);
15392 /* For complex modes, use the inner type. */
15393 if (COMPLEX_MODE_P (mode1
))
15394 mode1
= GET_MODE_INNER (mode1
);
15396 if (COMPLEX_MODE_P (mode2
))
15397 mode2
= GET_MODE_INNER (mode2
);
15399 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15400 double to intermix unless -mfloat128-convert. */
15401 if (mode1
== mode2
)
15404 if (!TARGET_FLOAT128_CVT
)
15406 if ((FLOAT128_IEEE_P (mode1
) && FLOAT128_IBM_P (mode2
))
15407 || (FLOAT128_IBM_P (mode1
) && FLOAT128_IEEE_P (mode2
)))
15408 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15416 /* Expand floating point conversion to/from __float128 and __ibm128. */
15419 rs6000_expand_float128_convert (rtx dest
, rtx src
, bool unsigned_p
)
15421 machine_mode dest_mode
= GET_MODE (dest
);
15422 machine_mode src_mode
= GET_MODE (src
);
15423 convert_optab cvt
= unknown_optab
;
15424 bool do_move
= false;
15425 rtx libfunc
= NULL_RTX
;
15427 typedef rtx (*rtx_2func_t
) (rtx
, rtx
);
15428 rtx_2func_t hw_convert
= (rtx_2func_t
)0;
15432 rtx_2func_t from_df
;
15433 rtx_2func_t from_sf
;
15434 rtx_2func_t from_si_sign
;
15435 rtx_2func_t from_si_uns
;
15436 rtx_2func_t from_di_sign
;
15437 rtx_2func_t from_di_uns
;
15440 rtx_2func_t to_si_sign
;
15441 rtx_2func_t to_si_uns
;
15442 rtx_2func_t to_di_sign
;
15443 rtx_2func_t to_di_uns
;
15444 } hw_conversions
[2] = {
15445 /* convertions to/from KFmode */
15447 gen_extenddfkf2_hw
, /* KFmode <- DFmode. */
15448 gen_extendsfkf2_hw
, /* KFmode <- SFmode. */
15449 gen_float_kfsi2_hw
, /* KFmode <- SImode (signed). */
15450 gen_floatuns_kfsi2_hw
, /* KFmode <- SImode (unsigned). */
15451 gen_float_kfdi2_hw
, /* KFmode <- DImode (signed). */
15452 gen_floatuns_kfdi2_hw
, /* KFmode <- DImode (unsigned). */
15453 gen_trunckfdf2_hw
, /* DFmode <- KFmode. */
15454 gen_trunckfsf2_hw
, /* SFmode <- KFmode. */
15455 gen_fix_kfsi2_hw
, /* SImode <- KFmode (signed). */
15456 gen_fixuns_kfsi2_hw
, /* SImode <- KFmode (unsigned). */
15457 gen_fix_kfdi2_hw
, /* DImode <- KFmode (signed). */
15458 gen_fixuns_kfdi2_hw
, /* DImode <- KFmode (unsigned). */
15461 /* convertions to/from TFmode */
15463 gen_extenddftf2_hw
, /* TFmode <- DFmode. */
15464 gen_extendsftf2_hw
, /* TFmode <- SFmode. */
15465 gen_float_tfsi2_hw
, /* TFmode <- SImode (signed). */
15466 gen_floatuns_tfsi2_hw
, /* TFmode <- SImode (unsigned). */
15467 gen_float_tfdi2_hw
, /* TFmode <- DImode (signed). */
15468 gen_floatuns_tfdi2_hw
, /* TFmode <- DImode (unsigned). */
15469 gen_trunctfdf2_hw
, /* DFmode <- TFmode. */
15470 gen_trunctfsf2_hw
, /* SFmode <- TFmode. */
15471 gen_fix_tfsi2_hw
, /* SImode <- TFmode (signed). */
15472 gen_fixuns_tfsi2_hw
, /* SImode <- TFmode (unsigned). */
15473 gen_fix_tfdi2_hw
, /* DImode <- TFmode (signed). */
15474 gen_fixuns_tfdi2_hw
, /* DImode <- TFmode (unsigned). */
15478 if (dest_mode
== src_mode
)
15479 gcc_unreachable ();
15481 /* Eliminate memory operations. */
15483 src
= force_reg (src_mode
, src
);
15487 rtx tmp
= gen_reg_rtx (dest_mode
);
15488 rs6000_expand_float128_convert (tmp
, src
, unsigned_p
);
15489 rs6000_emit_move (dest
, tmp
, dest_mode
);
15493 /* Convert to IEEE 128-bit floating point. */
15494 if (FLOAT128_IEEE_P (dest_mode
))
15496 if (dest_mode
== KFmode
)
15498 else if (dest_mode
== TFmode
)
15501 gcc_unreachable ();
15507 hw_convert
= hw_conversions
[kf_or_tf
].from_df
;
15512 hw_convert
= hw_conversions
[kf_or_tf
].from_sf
;
15518 if (FLOAT128_IBM_P (src_mode
))
15527 cvt
= ufloat_optab
;
15528 hw_convert
= hw_conversions
[kf_or_tf
].from_si_uns
;
15532 cvt
= sfloat_optab
;
15533 hw_convert
= hw_conversions
[kf_or_tf
].from_si_sign
;
15540 cvt
= ufloat_optab
;
15541 hw_convert
= hw_conversions
[kf_or_tf
].from_di_uns
;
15545 cvt
= sfloat_optab
;
15546 hw_convert
= hw_conversions
[kf_or_tf
].from_di_sign
;
15551 gcc_unreachable ();
15555 /* Convert from IEEE 128-bit floating point. */
15556 else if (FLOAT128_IEEE_P (src_mode
))
15558 if (src_mode
== KFmode
)
15560 else if (src_mode
== TFmode
)
15563 gcc_unreachable ();
15569 hw_convert
= hw_conversions
[kf_or_tf
].to_df
;
15574 hw_convert
= hw_conversions
[kf_or_tf
].to_sf
;
15580 if (FLOAT128_IBM_P (dest_mode
))
15590 hw_convert
= hw_conversions
[kf_or_tf
].to_si_uns
;
15595 hw_convert
= hw_conversions
[kf_or_tf
].to_si_sign
;
15603 hw_convert
= hw_conversions
[kf_or_tf
].to_di_uns
;
15608 hw_convert
= hw_conversions
[kf_or_tf
].to_di_sign
;
15613 gcc_unreachable ();
15617 /* Both IBM format. */
15618 else if (FLOAT128_IBM_P (dest_mode
) && FLOAT128_IBM_P (src_mode
))
15622 gcc_unreachable ();
15624 /* Handle conversion between TFmode/KFmode/IFmode. */
15626 emit_insn (gen_rtx_SET (dest
, gen_rtx_FLOAT_EXTEND (dest_mode
, src
)));
15628 /* Handle conversion if we have hardware support. */
15629 else if (TARGET_FLOAT128_HW
&& hw_convert
)
15630 emit_insn ((hw_convert
) (dest
, src
));
15632 /* Call an external function to do the conversion. */
15633 else if (cvt
!= unknown_optab
)
15635 libfunc
= convert_optab_libfunc (cvt
, dest_mode
, src_mode
);
15636 gcc_assert (libfunc
!= NULL_RTX
);
15638 dest2
= emit_library_call_value (libfunc
, dest
, LCT_CONST
, dest_mode
,
15641 gcc_assert (dest2
!= NULL_RTX
);
15642 if (!rtx_equal_p (dest
, dest2
))
15643 emit_move_insn (dest
, dest2
);
15647 gcc_unreachable ();
15653 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15654 can be used as that dest register. Return the dest register. */
15657 rs6000_emit_eqne (machine_mode mode
, rtx op1
, rtx op2
, rtx scratch
)
15659 if (op2
== const0_rtx
)
15662 if (GET_CODE (scratch
) == SCRATCH
)
15663 scratch
= gen_reg_rtx (mode
);
15665 if (logical_operand (op2
, mode
))
15666 emit_insn (gen_rtx_SET (scratch
, gen_rtx_XOR (mode
, op1
, op2
)));
15668 emit_insn (gen_rtx_SET (scratch
,
15669 gen_rtx_PLUS (mode
, op1
, negate_rtx (mode
, op2
))));
15674 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15675 requires this. The result is mode MODE. */
15677 rs6000_emit_fp_cror (rtx_code code
, machine_mode mode
, rtx x
)
15681 if (code
== LTGT
|| code
== LE
|| code
== UNLT
)
15682 cond
[n
++] = gen_rtx_fmt_ee (LT
, mode
, x
, const0_rtx
);
15683 if (code
== LTGT
|| code
== GE
|| code
== UNGT
)
15684 cond
[n
++] = gen_rtx_fmt_ee (GT
, mode
, x
, const0_rtx
);
15685 if (code
== LE
|| code
== GE
|| code
== UNEQ
)
15686 cond
[n
++] = gen_rtx_fmt_ee (EQ
, mode
, x
, const0_rtx
);
15687 if (code
== UNLT
|| code
== UNGT
|| code
== UNEQ
)
15688 cond
[n
++] = gen_rtx_fmt_ee (UNORDERED
, mode
, x
, const0_rtx
);
15690 gcc_assert (n
== 2);
15692 rtx cc
= gen_reg_rtx (CCEQmode
);
15693 rtx logical
= gen_rtx_IOR (mode
, cond
[0], cond
[1]);
15694 emit_insn (gen_cceq_ior_compare (mode
, cc
, logical
, cond
[0], x
, cond
[1], x
));
15700 rs6000_emit_sCOND (machine_mode mode
, rtx operands
[])
15702 rtx condition_rtx
= rs6000_generate_compare (operands
[1], mode
);
15703 rtx_code cond_code
= GET_CODE (condition_rtx
);
15705 if (FLOAT_MODE_P (mode
) && HONOR_NANS (mode
)
15706 && !(FLOAT128_VECTOR_P (mode
) && !TARGET_FLOAT128_HW
))
15708 else if (cond_code
== NE
15709 || cond_code
== GE
|| cond_code
== LE
15710 || cond_code
== GEU
|| cond_code
== LEU
15711 || cond_code
== ORDERED
|| cond_code
== UNGE
|| cond_code
== UNLE
)
15713 rtx not_result
= gen_reg_rtx (CCEQmode
);
15714 rtx not_op
, rev_cond_rtx
;
15715 machine_mode cc_mode
;
15717 cc_mode
= GET_MODE (XEXP (condition_rtx
, 0));
15719 rev_cond_rtx
= gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode
, cond_code
),
15720 SImode
, XEXP (condition_rtx
, 0), const0_rtx
);
15721 not_op
= gen_rtx_COMPARE (CCEQmode
, rev_cond_rtx
, const0_rtx
);
15722 emit_insn (gen_rtx_SET (not_result
, not_op
));
15723 condition_rtx
= gen_rtx_EQ (VOIDmode
, not_result
, const0_rtx
);
15726 machine_mode op_mode
= GET_MODE (XEXP (operands
[1], 0));
15727 if (op_mode
== VOIDmode
)
15728 op_mode
= GET_MODE (XEXP (operands
[1], 1));
15730 if (TARGET_POWERPC64
&& (op_mode
== DImode
|| FLOAT_MODE_P (mode
)))
15732 PUT_MODE (condition_rtx
, DImode
);
15733 convert_move (operands
[0], condition_rtx
, 0);
15737 PUT_MODE (condition_rtx
, SImode
);
15738 emit_insn (gen_rtx_SET (operands
[0], condition_rtx
));
15742 /* Emit a branch of kind CODE to location LOC. */
15745 rs6000_emit_cbranch (machine_mode mode
, rtx operands
[])
15747 rtx condition_rtx
= rs6000_generate_compare (operands
[0], mode
);
15748 rtx loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
15749 rtx ite
= gen_rtx_IF_THEN_ELSE (VOIDmode
, condition_rtx
, loc_ref
, pc_rtx
);
15750 emit_jump_insn (gen_rtx_SET (pc_rtx
, ite
));
15753 /* Return the string to output a conditional branch to LABEL, which is
15754 the operand template of the label, or NULL if the branch is really a
15755 conditional return.
15757 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15758 condition code register and its mode specifies what kind of
15759 comparison we made.
15761 REVERSED is nonzero if we should reverse the sense of the comparison.
15763 INSN is the insn. */
15766 output_cbranch (rtx op
, const char *label
, int reversed
, rtx_insn
*insn
)
15768 static char string
[64];
15769 enum rtx_code code
= GET_CODE (op
);
15770 rtx cc_reg
= XEXP (op
, 0);
15771 machine_mode mode
= GET_MODE (cc_reg
);
15772 int cc_regno
= REGNO (cc_reg
) - CR0_REGNO
;
15773 int need_longbranch
= label
!= NULL
&& get_attr_length (insn
) == 8;
15774 int really_reversed
= reversed
^ need_longbranch
;
15780 validate_condition_mode (code
, mode
);
15782 /* Work out which way this really branches. We could use
15783 reverse_condition_maybe_unordered here always but this
15784 makes the resulting assembler clearer. */
15785 if (really_reversed
)
15787 /* Reversal of FP compares takes care -- an ordered compare
15788 becomes an unordered compare and vice versa. */
15789 if (mode
== CCFPmode
)
15790 code
= reverse_condition_maybe_unordered (code
);
15792 code
= reverse_condition (code
);
15797 /* Not all of these are actually distinct opcodes, but
15798 we distinguish them for clarity of the resulting assembler. */
15799 case NE
: case LTGT
:
15800 ccode
= "ne"; break;
15801 case EQ
: case UNEQ
:
15802 ccode
= "eq"; break;
15804 ccode
= "ge"; break;
15805 case GT
: case GTU
: case UNGT
:
15806 ccode
= "gt"; break;
15808 ccode
= "le"; break;
15809 case LT
: case LTU
: case UNLT
:
15810 ccode
= "lt"; break;
15811 case UNORDERED
: ccode
= "un"; break;
15812 case ORDERED
: ccode
= "nu"; break;
15813 case UNGE
: ccode
= "nl"; break;
15814 case UNLE
: ccode
= "ng"; break;
15816 gcc_unreachable ();
15819 /* Maybe we have a guess as to how likely the branch is. */
15821 note
= find_reg_note (insn
, REG_BR_PROB
, NULL_RTX
);
15822 if (note
!= NULL_RTX
)
15824 /* PROB is the difference from 50%. */
15825 int prob
= profile_probability::from_reg_br_prob_note (XINT (note
, 0))
15826 .to_reg_br_prob_base () - REG_BR_PROB_BASE
/ 2;
15828 /* Only hint for highly probable/improbable branches on newer cpus when
15829 we have real profile data, as static prediction overrides processor
15830 dynamic prediction. For older cpus we may as well always hint, but
15831 assume not taken for branches that are very close to 50% as a
15832 mispredicted taken branch is more expensive than a
15833 mispredicted not-taken branch. */
15834 if (rs6000_always_hint
15835 || (abs (prob
) > REG_BR_PROB_BASE
/ 100 * 48
15836 && (profile_status_for_fn (cfun
) != PROFILE_GUESSED
)
15837 && br_prob_note_reliable_p (note
)))
15839 if (abs (prob
) > REG_BR_PROB_BASE
/ 20
15840 && ((prob
> 0) ^ need_longbranch
))
15848 s
+= sprintf (s
, "b%slr%s ", ccode
, pred
);
15850 s
+= sprintf (s
, "b%s%s ", ccode
, pred
);
15852 /* We need to escape any '%' characters in the reg_names string.
15853 Assume they'd only be the first character.... */
15854 if (reg_names
[cc_regno
+ CR0_REGNO
][0] == '%')
15856 s
+= sprintf (s
, "%s", reg_names
[cc_regno
+ CR0_REGNO
]);
15860 /* If the branch distance was too far, we may have to use an
15861 unconditional branch to go the distance. */
15862 if (need_longbranch
)
15863 s
+= sprintf (s
, ",$+8\n\tb %s", label
);
15865 s
+= sprintf (s
, ",%s", label
);
15871 /* Return insn for VSX or Altivec comparisons. */
15874 rs6000_emit_vector_compare_inner (enum rtx_code code
, rtx op0
, rtx op1
)
15877 machine_mode mode
= GET_MODE (op0
);
15885 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15896 mask
= gen_reg_rtx (mode
);
15897 emit_insn (gen_rtx_SET (mask
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
15904 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15905 DMODE is expected destination mode. This is a recursive function. */
15908 rs6000_emit_vector_compare (enum rtx_code rcode
,
15910 machine_mode dmode
)
15913 bool swap_operands
= false;
15914 bool try_again
= false;
15916 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode
));
15917 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
15919 /* See if the comparison works as is. */
15920 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
15928 swap_operands
= true;
15933 swap_operands
= true;
15941 /* Invert condition and try again.
15942 e.g., A != B becomes ~(A==B). */
15944 enum rtx_code rev_code
;
15945 enum insn_code nor_code
;
15948 rev_code
= reverse_condition_maybe_unordered (rcode
);
15949 if (rev_code
== UNKNOWN
)
15952 nor_code
= optab_handler (one_cmpl_optab
, dmode
);
15953 if (nor_code
== CODE_FOR_nothing
)
15956 mask2
= rs6000_emit_vector_compare (rev_code
, op0
, op1
, dmode
);
15960 mask
= gen_reg_rtx (dmode
);
15961 emit_insn (GEN_FCN (nor_code
) (mask
, mask2
));
15969 /* Try GT/GTU/LT/LTU OR EQ */
15972 enum insn_code ior_code
;
15973 enum rtx_code new_code
;
15994 gcc_unreachable ();
15997 ior_code
= optab_handler (ior_optab
, dmode
);
15998 if (ior_code
== CODE_FOR_nothing
)
16001 c_rtx
= rs6000_emit_vector_compare (new_code
, op0
, op1
, dmode
);
16005 eq_rtx
= rs6000_emit_vector_compare (EQ
, op0
, op1
, dmode
);
16009 mask
= gen_reg_rtx (dmode
);
16010 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
16021 std::swap (op0
, op1
);
16023 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
16028 /* You only get two chances. */
16032 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
16033 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
16034 operands for the relation operation COND. */
16037 rs6000_emit_vector_cond_expr (rtx dest
, rtx op_true
, rtx op_false
,
16038 rtx cond
, rtx cc_op0
, rtx cc_op1
)
16040 machine_mode dest_mode
= GET_MODE (dest
);
16041 machine_mode mask_mode
= GET_MODE (cc_op0
);
16042 enum rtx_code rcode
= GET_CODE (cond
);
16044 bool invert_move
= false;
16046 if (VECTOR_UNIT_NONE_P (dest_mode
))
16049 gcc_assert (GET_MODE_SIZE (dest_mode
) == GET_MODE_SIZE (mask_mode
)
16050 && GET_MODE_NUNITS (dest_mode
) == GET_MODE_NUNITS (mask_mode
));
16054 /* Swap operands if we can, and fall back to doing the operation as
16055 specified, and doing a NOR to invert the test. */
16061 /* Invert condition and try again.
16062 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
16063 invert_move
= true;
16064 rcode
= reverse_condition_maybe_unordered (rcode
);
16065 if (rcode
== UNKNOWN
)
16071 if (GET_MODE_CLASS (mask_mode
) == MODE_VECTOR_INT
)
16073 /* Invert condition to avoid compound test. */
16074 invert_move
= true;
16075 rcode
= reverse_condition (rcode
);
16084 /* Invert condition to avoid compound test if necessary. */
16085 if (rcode
== GEU
|| rcode
== LEU
)
16087 invert_move
= true;
16088 rcode
= reverse_condition (rcode
);
16096 /* Get the vector mask for the given relational operations. */
16097 mask
= rs6000_emit_vector_compare (rcode
, cc_op0
, cc_op1
, mask_mode
);
16102 if (mask_mode
!= dest_mode
)
16103 mask
= simplify_gen_subreg (dest_mode
, mask
, mask_mode
, 0);
16106 std::swap (op_true
, op_false
);
16108 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
16109 if (GET_MODE_CLASS (dest_mode
) == MODE_VECTOR_INT
16110 && (GET_CODE (op_true
) == CONST_VECTOR
16111 || GET_CODE (op_false
) == CONST_VECTOR
))
16113 rtx constant_0
= CONST0_RTX (dest_mode
);
16114 rtx constant_m1
= CONSTM1_RTX (dest_mode
);
16116 if (op_true
== constant_m1
&& op_false
== constant_0
)
16118 emit_move_insn (dest
, mask
);
16122 else if (op_true
== constant_0
&& op_false
== constant_m1
)
16124 emit_insn (gen_rtx_SET (dest
, gen_rtx_NOT (dest_mode
, mask
)));
16128 /* If we can't use the vector comparison directly, perhaps we can use
16129 the mask for the true or false fields, instead of loading up a
16131 if (op_true
== constant_m1
)
16134 if (op_false
== constant_0
)
16138 if (!REG_P (op_true
) && !SUBREG_P (op_true
))
16139 op_true
= force_reg (dest_mode
, op_true
);
16141 if (!REG_P (op_false
) && !SUBREG_P (op_false
))
16142 op_false
= force_reg (dest_mode
, op_false
);
16144 rtx tmp
= gen_rtx_IOR (dest_mode
,
16145 gen_rtx_AND (dest_mode
, gen_rtx_NOT (dest_mode
, mask
),
16147 gen_rtx_AND (dest_mode
, mask
, op_true
));
16148 emit_insn (gen_rtx_SET (dest
, tmp
));
16152 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
16153 maximum or minimum with "C" semantics.
16155 Unless you use -ffast-math, you can't use these instructions to replace
16156 conditions that implicitly reverse the condition because the comparison
16157 might generate a NaN or signed zer0.
16159 I.e. the following can be replaced all of the time
16160 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
16161 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
16162 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
16163 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
16165 The following can be replaced only if -ffast-math is used:
16166 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
16167 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
16168 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
16169 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
16171 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16172 nonzero/true, FALSE_COND if it is zero/false.
16174 Return false if we can't generate the appropriate minimum or maximum, and
16175 true if we can did the minimum or maximum. */
16178 rs6000_maybe_emit_maxc_minc (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16180 enum rtx_code code
= GET_CODE (op
);
16181 rtx op0
= XEXP (op
, 0);
16182 rtx op1
= XEXP (op
, 1);
16183 machine_mode compare_mode
= GET_MODE (op0
);
16184 machine_mode result_mode
= GET_MODE (dest
);
16186 if (result_mode
!= compare_mode
)
16189 /* See the comments of this function, it simply expects GE/GT/LE/LT in
16190 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
16191 we need to do the reversions first to make the following checks
16192 support fewer cases, like:
16194 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
16195 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
16196 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
16197 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
16199 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
16200 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
16201 have to check for fast-math or the like. */
16202 if (code
== UNGE
|| code
== UNGT
|| code
== UNLE
|| code
== UNLT
)
16204 code
= reverse_condition_maybe_unordered (code
);
16205 std::swap (true_cond
, false_cond
);
16209 if (code
== GE
|| code
== GT
)
16211 else if (code
== LE
|| code
== LT
)
16216 if (rtx_equal_p (op0
, true_cond
) && rtx_equal_p (op1
, false_cond
))
16219 /* Only when NaNs and signed-zeros are not in effect, smax could be
16220 used for `op0 < op1 ? op1 : op0`, and smin could be used for
16221 `op0 > op1 ? op1 : op0`. */
16222 else if (rtx_equal_p (op1
, true_cond
) && rtx_equal_p (op0
, false_cond
)
16223 && !HONOR_NANS (compare_mode
) && !HONOR_SIGNED_ZEROS (compare_mode
))
16229 rs6000_emit_minmax (dest
, max_p
? SMAX
: SMIN
, op0
, op1
);
16233 /* Possibly emit a floating point conditional move by generating a compare that
16234 sets a mask instruction and a XXSEL select instruction.
16236 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16237 nonzero/true, FALSE_COND if it is zero/false.
16239 Return false if the operation cannot be generated, and true if we could
16240 generate the instruction. */
16243 rs6000_maybe_emit_fp_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16245 enum rtx_code code
= GET_CODE (op
);
16246 rtx op0
= XEXP (op
, 0);
16247 rtx op1
= XEXP (op
, 1);
16248 machine_mode compare_mode
= GET_MODE (op0
);
16249 machine_mode result_mode
= GET_MODE (dest
);
16254 if (!can_create_pseudo_p ())
16257 /* We allow the comparison to be either SFmode/DFmode and the true/false
16258 condition to be either SFmode/DFmode. I.e. we allow:
16263 r = (a == b) ? c : d;
16270 r = (a == b) ? c : d;
16272 but we don't allow intermixing the IEEE 128-bit floating point types with
16273 the 32/64-bit scalar types. */
16275 if (!(compare_mode
== result_mode
16276 || (compare_mode
== SFmode
&& result_mode
== DFmode
)
16277 || (compare_mode
== DFmode
&& result_mode
== SFmode
)))
16290 code
= swap_condition (code
);
16291 std::swap (op0
, op1
);
16298 /* Generate: [(parallel [(set (dest)
16299 (if_then_else (op (cmp1) (cmp2))
16302 (clobber (scratch))])]. */
16304 compare_rtx
= gen_rtx_fmt_ee (code
, CCFPmode
, op0
, op1
);
16305 cmove_rtx
= gen_rtx_SET (dest
,
16306 gen_rtx_IF_THEN_ELSE (result_mode
,
16311 clobber_rtx
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (V2DImode
));
16312 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
16313 gen_rtvec (2, cmove_rtx
, clobber_rtx
)));
16318 /* Helper function to return true if the target has instructions to do a
16319 compare and set mask instruction that can be used with XXSEL to implement a
16320 conditional move. It is also assumed that such a target also supports the
16321 "C" minimum and maximum instructions. */
16324 have_compare_and_set_mask (machine_mode mode
)
16330 return TARGET_P9_MINMAX
;
16334 return TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
);
16343 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16344 operands of the last comparison is nonzero/true, FALSE_COND if it
16345 is zero/false. Return 0 if the hardware has no such operation. */
16348 rs6000_emit_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16350 enum rtx_code code
= GET_CODE (op
);
16351 rtx op0
= XEXP (op
, 0);
16352 rtx op1
= XEXP (op
, 1);
16353 machine_mode compare_mode
= GET_MODE (op0
);
16354 machine_mode result_mode
= GET_MODE (dest
);
16356 bool is_against_zero
;
16358 /* These modes should always match. */
16359 if (GET_MODE (op1
) != compare_mode
16360 /* In the isel case however, we can use a compare immediate, so
16361 op1 may be a small constant. */
16362 && (!TARGET_ISEL
|| !short_cint_operand (op1
, VOIDmode
)))
16364 if (GET_MODE (true_cond
) != result_mode
)
16366 if (GET_MODE (false_cond
) != result_mode
)
16369 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16371 if (have_compare_and_set_mask (compare_mode
)
16372 && have_compare_and_set_mask (result_mode
))
16374 if (rs6000_maybe_emit_maxc_minc (dest
, op
, true_cond
, false_cond
))
16377 if (rs6000_maybe_emit_fp_cmove (dest
, op
, true_cond
, false_cond
))
16381 /* Don't allow using floating point comparisons for integer results for
16383 if (FLOAT_MODE_P (compare_mode
) && !FLOAT_MODE_P (result_mode
))
16386 /* First, work out if the hardware can do this at all, or
16387 if it's too slow.... */
16388 if (!FLOAT_MODE_P (compare_mode
))
16391 return rs6000_emit_int_cmove (dest
, op
, true_cond
, false_cond
);
16395 is_against_zero
= op1
== CONST0_RTX (compare_mode
);
16397 /* A floating-point subtract might overflow, underflow, or produce
16398 an inexact result, thus changing the floating-point flags, so it
16399 can't be generated if we care about that. It's safe if one side
16400 of the construct is zero, since then no subtract will be
16402 if (SCALAR_FLOAT_MODE_P (compare_mode
)
16403 && flag_trapping_math
&& ! is_against_zero
)
16406 /* Eliminate half of the comparisons by switching operands, this
16407 makes the remaining code simpler. */
16408 if (code
== UNLT
|| code
== UNGT
|| code
== UNORDERED
|| code
== NE
16409 || code
== LTGT
|| code
== LT
|| code
== UNLE
)
16411 code
= reverse_condition_maybe_unordered (code
);
16413 true_cond
= false_cond
;
16417 /* UNEQ and LTGT take four instructions for a comparison with zero,
16418 it'll probably be faster to use a branch here too. */
16419 if (code
== UNEQ
&& HONOR_NANS (compare_mode
))
16422 /* We're going to try to implement comparisons by performing
16423 a subtract, then comparing against zero. Unfortunately,
16424 Inf - Inf is NaN which is not zero, and so if we don't
16425 know that the operand is finite and the comparison
16426 would treat EQ different to UNORDERED, we can't do it. */
16427 if (HONOR_INFINITIES (compare_mode
)
16428 && code
!= GT
&& code
!= UNGE
16429 && (!CONST_DOUBLE_P (op1
)
16430 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1
)))
16431 /* Constructs of the form (a OP b ? a : b) are safe. */
16432 && ((! rtx_equal_p (op0
, false_cond
) && ! rtx_equal_p (op1
, false_cond
))
16433 || (! rtx_equal_p (op0
, true_cond
)
16434 && ! rtx_equal_p (op1
, true_cond
))))
16437 /* At this point we know we can use fsel. */
16439 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16440 is no fsel instruction. */
16441 if (compare_mode
!= SFmode
&& compare_mode
!= DFmode
)
16444 /* Reduce the comparison to a comparison against zero. */
16445 if (! is_against_zero
)
16447 temp
= gen_reg_rtx (compare_mode
);
16448 emit_insn (gen_rtx_SET (temp
, gen_rtx_MINUS (compare_mode
, op0
, op1
)));
16450 op1
= CONST0_RTX (compare_mode
);
16453 /* If we don't care about NaNs we can reduce some of the comparisons
16454 down to faster ones. */
16455 if (! HONOR_NANS (compare_mode
))
16461 true_cond
= false_cond
;
16474 /* Now, reduce everything down to a GE. */
16481 temp
= gen_reg_rtx (compare_mode
);
16482 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16487 temp
= gen_reg_rtx (compare_mode
);
16488 emit_insn (gen_rtx_SET (temp
, gen_rtx_ABS (compare_mode
, op0
)));
16493 temp
= gen_reg_rtx (compare_mode
);
16494 emit_insn (gen_rtx_SET (temp
,
16495 gen_rtx_NEG (compare_mode
,
16496 gen_rtx_ABS (compare_mode
, op0
))));
16501 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16502 temp
= gen_reg_rtx (result_mode
);
16503 emit_insn (gen_rtx_SET (temp
,
16504 gen_rtx_IF_THEN_ELSE (result_mode
,
16505 gen_rtx_GE (VOIDmode
,
16507 true_cond
, false_cond
)));
16508 false_cond
= true_cond
;
16511 temp
= gen_reg_rtx (compare_mode
);
16512 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16517 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16518 temp
= gen_reg_rtx (result_mode
);
16519 emit_insn (gen_rtx_SET (temp
,
16520 gen_rtx_IF_THEN_ELSE (result_mode
,
16521 gen_rtx_GE (VOIDmode
,
16523 true_cond
, false_cond
)));
16524 true_cond
= false_cond
;
16527 temp
= gen_reg_rtx (compare_mode
);
16528 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16533 gcc_unreachable ();
16536 emit_insn (gen_rtx_SET (dest
,
16537 gen_rtx_IF_THEN_ELSE (result_mode
,
16538 gen_rtx_GE (VOIDmode
,
16540 true_cond
, false_cond
)));
16544 /* Same as above, but for ints (isel). */
16547 rs6000_emit_int_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16549 rtx condition_rtx
, cr
;
16550 machine_mode mode
= GET_MODE (dest
);
16551 enum rtx_code cond_code
;
16552 rtx (*isel_func
) (rtx
, rtx
, rtx
, rtx
, rtx
);
16555 if (mode
!= SImode
&& (!TARGET_POWERPC64
|| mode
!= DImode
))
16558 /* PR104335: We now need to expect CC-mode "comparisons"
16559 coming from ifcvt. The following code expects proper
16560 comparisons so better abort here. */
16561 if (GET_MODE_CLASS (GET_MODE (XEXP (op
, 0))) == MODE_CC
)
16564 /* We still have to do the compare, because isel doesn't do a
16565 compare, it just looks at the CRx bits set by a previous compare
16567 condition_rtx
= rs6000_generate_compare (op
, mode
);
16568 cond_code
= GET_CODE (condition_rtx
);
16569 cr
= XEXP (condition_rtx
, 0);
16570 signedp
= GET_MODE (cr
) == CCmode
;
16572 isel_func
= (mode
== SImode
16573 ? (signedp
? gen_isel_cc_si
: gen_isel_ccuns_si
)
16574 : (signedp
? gen_isel_cc_di
: gen_isel_ccuns_di
));
16578 case LT
: case GT
: case LTU
: case GTU
: case EQ
:
16579 /* isel handles these directly. */
16583 /* We need to swap the sense of the comparison. */
16585 std::swap (false_cond
, true_cond
);
16586 PUT_CODE (condition_rtx
, reverse_condition (cond_code
));
16591 false_cond
= force_reg (mode
, false_cond
);
16592 if (true_cond
!= const0_rtx
)
16593 true_cond
= force_reg (mode
, true_cond
);
16595 emit_insn (isel_func (dest
, condition_rtx
, true_cond
, false_cond
, cr
));
16601 rs6000_emit_minmax (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
16603 machine_mode mode
= GET_MODE (op0
);
16607 /* VSX/altivec have direct min/max insns. */
16608 if ((code
== SMAX
|| code
== SMIN
)
16609 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode
)
16610 || (mode
== SFmode
&& VECTOR_UNIT_VSX_P (DFmode
))
16611 || (TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
))))
16613 emit_insn (gen_rtx_SET (dest
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
16617 if (code
== SMAX
|| code
== SMIN
)
16622 if (code
== SMAX
|| code
== UMAX
)
16623 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16624 op0
, op1
, mode
, 0);
16626 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16627 op1
, op0
, mode
, 0);
16628 gcc_assert (target
);
16629 if (target
!= dest
)
16630 emit_move_insn (dest
, target
);
16633 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16634 COND is true. Mark the jump as unlikely to be taken. */
16637 emit_unlikely_jump (rtx cond
, rtx label
)
16639 rtx x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, label
, pc_rtx
);
16640 rtx_insn
*insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, x
));
16641 add_reg_br_prob_note (insn
, profile_probability::very_unlikely ());
16644 /* A subroutine of the atomic operation splitters. Emit a load-locked
16645 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16646 the zero_extend operation. */
16649 emit_load_locked (machine_mode mode
, rtx reg
, rtx mem
)
16651 rtx (*fn
) (rtx
, rtx
) = NULL
;
16656 fn
= gen_load_lockedqi
;
16659 fn
= gen_load_lockedhi
;
16662 if (GET_MODE (mem
) == QImode
)
16663 fn
= gen_load_lockedqi_si
;
16664 else if (GET_MODE (mem
) == HImode
)
16665 fn
= gen_load_lockedhi_si
;
16667 fn
= gen_load_lockedsi
;
16670 fn
= gen_load_lockeddi
;
16673 fn
= gen_load_lockedti
;
16676 gcc_unreachable ();
16678 emit_insn (fn (reg
, mem
));
16681 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16682 instruction in MODE. */
16685 emit_store_conditional (machine_mode mode
, rtx res
, rtx mem
, rtx val
)
16687 rtx (*fn
) (rtx
, rtx
, rtx
) = NULL
;
16692 fn
= gen_store_conditionalqi
;
16695 fn
= gen_store_conditionalhi
;
16698 fn
= gen_store_conditionalsi
;
16701 fn
= gen_store_conditionaldi
;
16704 fn
= gen_store_conditionalti
;
16707 gcc_unreachable ();
16710 /* Emit sync before stwcx. to address PPC405 Erratum. */
16711 if (PPC405_ERRATUM77
)
16712 emit_insn (gen_hwsync ());
16714 emit_insn (fn (res
, mem
, val
));
16717 /* Expand barriers before and after a load_locked/store_cond sequence. */
16720 rs6000_pre_atomic_barrier (rtx mem
, enum memmodel model
)
16722 rtx addr
= XEXP (mem
, 0);
16724 if (!legitimate_indirect_address_p (addr
, reload_completed
)
16725 && !legitimate_indexed_address_p (addr
, reload_completed
))
16727 addr
= force_reg (Pmode
, addr
);
16728 mem
= replace_equiv_address_nv (mem
, addr
);
16733 case MEMMODEL_RELAXED
:
16734 case MEMMODEL_CONSUME
:
16735 case MEMMODEL_ACQUIRE
:
16737 case MEMMODEL_RELEASE
:
16738 case MEMMODEL_ACQ_REL
:
16739 emit_insn (gen_lwsync ());
16741 case MEMMODEL_SEQ_CST
:
16742 emit_insn (gen_hwsync ());
16745 gcc_unreachable ();
16751 rs6000_post_atomic_barrier (enum memmodel model
)
16755 case MEMMODEL_RELAXED
:
16756 case MEMMODEL_CONSUME
:
16757 case MEMMODEL_RELEASE
:
16759 case MEMMODEL_ACQUIRE
:
16760 case MEMMODEL_ACQ_REL
:
16761 case MEMMODEL_SEQ_CST
:
16762 emit_insn (gen_isync ());
16765 gcc_unreachable ();
16769 /* A subroutine of the various atomic expanders. For sub-word operations,
16770 we must adjust things to operate on SImode. Given the original MEM,
16771 return a new aligned memory. Also build and return the quantities by
16772 which to shift and mask. */
16775 rs6000_adjust_atomic_subword (rtx orig_mem
, rtx
*pshift
, rtx
*pmask
)
16777 rtx addr
, align
, shift
, mask
, mem
;
16778 HOST_WIDE_INT shift_mask
;
16779 machine_mode mode
= GET_MODE (orig_mem
);
16781 /* For smaller modes, we have to implement this via SImode. */
16782 shift_mask
= (mode
== QImode
? 0x18 : 0x10);
16784 addr
= XEXP (orig_mem
, 0);
16785 addr
= force_reg (GET_MODE (addr
), addr
);
16787 /* Aligned memory containing subword. Generate a new memory. We
16788 do not want any of the existing MEM_ATTR data, as we're now
16789 accessing memory outside the original object. */
16790 align
= expand_simple_binop (Pmode
, AND
, addr
, GEN_INT (-4),
16791 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16792 mem
= gen_rtx_MEM (SImode
, align
);
16793 MEM_VOLATILE_P (mem
) = MEM_VOLATILE_P (orig_mem
);
16794 if (MEM_ALIAS_SET (orig_mem
) == ALIAS_SET_MEMORY_BARRIER
)
16795 set_mem_alias_set (mem
, ALIAS_SET_MEMORY_BARRIER
);
16797 /* Shift amount for subword relative to aligned word. */
16798 shift
= gen_reg_rtx (SImode
);
16799 addr
= gen_lowpart (SImode
, addr
);
16800 rtx tmp
= gen_reg_rtx (SImode
);
16801 emit_insn (gen_ashlsi3 (tmp
, addr
, GEN_INT (3)));
16802 emit_insn (gen_andsi3 (shift
, tmp
, GEN_INT (shift_mask
)));
16803 if (BYTES_BIG_ENDIAN
)
16804 shift
= expand_simple_binop (SImode
, XOR
, shift
, GEN_INT (shift_mask
),
16805 shift
, 1, OPTAB_LIB_WIDEN
);
16808 /* Mask for insertion. */
16809 mask
= expand_simple_binop (SImode
, ASHIFT
, GEN_INT (GET_MODE_MASK (mode
)),
16810 shift
, NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16816 /* A subroutine of the various atomic expanders. For sub-word operands,
16817 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16820 rs6000_mask_atomic_subword (rtx oldval
, rtx newval
, rtx mask
)
16824 x
= gen_reg_rtx (SImode
);
16825 emit_insn (gen_rtx_SET (x
, gen_rtx_AND (SImode
,
16826 gen_rtx_NOT (SImode
, mask
),
16829 x
= expand_simple_binop (SImode
, IOR
, newval
, x
, x
, 1, OPTAB_LIB_WIDEN
);
16834 /* A subroutine of the various atomic expanders. For sub-word operands,
16835 extract WIDE to NARROW via SHIFT. */
16838 rs6000_finish_atomic_subword (rtx narrow
, rtx wide
, rtx shift
)
16840 wide
= expand_simple_binop (SImode
, LSHIFTRT
, wide
, shift
,
16841 wide
, 1, OPTAB_LIB_WIDEN
);
16842 emit_move_insn (narrow
, gen_lowpart (GET_MODE (narrow
), wide
));
16845 /* Expand an atomic compare and swap operation. */
16848 rs6000_expand_atomic_compare_and_swap (rtx operands
[])
16850 rtx boolval
, retval
, mem
, oldval
, newval
, cond
;
16851 rtx label1
, label2
, x
, mask
, shift
;
16852 machine_mode mode
, orig_mode
;
16853 enum memmodel mod_s
, mod_f
;
16856 boolval
= operands
[0];
16857 retval
= operands
[1];
16859 oldval
= operands
[3];
16860 newval
= operands
[4];
16861 is_weak
= (INTVAL (operands
[5]) != 0);
16862 mod_s
= memmodel_base (INTVAL (operands
[6]));
16863 mod_f
= memmodel_base (INTVAL (operands
[7]));
16864 orig_mode
= mode
= GET_MODE (mem
);
16866 mask
= shift
= NULL_RTX
;
16867 if (mode
== QImode
|| mode
== HImode
)
16869 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16870 lwarx and shift/mask operations. With power8, we need to do the
16871 comparison in SImode, but the store is still done in QI/HImode. */
16872 oldval
= convert_modes (SImode
, mode
, oldval
, 1);
16874 if (!TARGET_SYNC_HI_QI
)
16876 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16878 /* Shift and mask OLDVAL into position with the word. */
16879 oldval
= expand_simple_binop (SImode
, ASHIFT
, oldval
, shift
,
16880 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16882 /* Shift and mask NEWVAL into position within the word. */
16883 newval
= convert_modes (SImode
, mode
, newval
, 1);
16884 newval
= expand_simple_binop (SImode
, ASHIFT
, newval
, shift
,
16885 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16888 /* Prepare to adjust the return value. */
16889 retval
= gen_reg_rtx (SImode
);
16892 else if (reg_overlap_mentioned_p (retval
, oldval
))
16893 oldval
= copy_to_reg (oldval
);
16895 if (mode
!= TImode
&& !reg_or_short_operand (oldval
, mode
))
16896 oldval
= copy_to_mode_reg (mode
, oldval
);
16898 if (reg_overlap_mentioned_p (retval
, newval
))
16899 newval
= copy_to_reg (newval
);
16901 mem
= rs6000_pre_atomic_barrier (mem
, mod_s
);
16906 label1
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16907 emit_label (XEXP (label1
, 0));
16909 label2
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16911 emit_load_locked (mode
, retval
, mem
);
16915 x
= expand_simple_binop (SImode
, AND
, retval
, mask
,
16916 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16918 cond
= gen_reg_rtx (CCmode
);
16919 /* If we have TImode, synthesize a comparison. */
16920 if (mode
!= TImode
)
16921 x
= gen_rtx_COMPARE (CCmode
, x
, oldval
);
16924 rtx xor1_result
= gen_reg_rtx (DImode
);
16925 rtx xor2_result
= gen_reg_rtx (DImode
);
16926 rtx or_result
= gen_reg_rtx (DImode
);
16927 rtx new_word0
= simplify_gen_subreg (DImode
, x
, TImode
, 0);
16928 rtx new_word1
= simplify_gen_subreg (DImode
, x
, TImode
, 8);
16929 rtx old_word0
= simplify_gen_subreg (DImode
, oldval
, TImode
, 0);
16930 rtx old_word1
= simplify_gen_subreg (DImode
, oldval
, TImode
, 8);
16932 emit_insn (gen_xordi3 (xor1_result
, new_word0
, old_word0
));
16933 emit_insn (gen_xordi3 (xor2_result
, new_word1
, old_word1
));
16934 emit_insn (gen_iordi3 (or_result
, xor1_result
, xor2_result
));
16935 x
= gen_rtx_COMPARE (CCmode
, or_result
, const0_rtx
);
16938 emit_insn (gen_rtx_SET (cond
, x
));
16940 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16941 emit_unlikely_jump (x
, label2
);
16945 x
= rs6000_mask_atomic_subword (retval
, newval
, mask
);
16947 emit_store_conditional (orig_mode
, cond
, mem
, x
);
16951 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16952 emit_unlikely_jump (x
, label1
);
16955 if (!is_mm_relaxed (mod_f
))
16956 emit_label (XEXP (label2
, 0));
16958 rs6000_post_atomic_barrier (mod_s
);
16960 if (is_mm_relaxed (mod_f
))
16961 emit_label (XEXP (label2
, 0));
16964 rs6000_finish_atomic_subword (operands
[1], retval
, shift
);
16965 else if (mode
!= GET_MODE (operands
[1]))
16966 convert_move (operands
[1], retval
, 1);
16968 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16969 x
= gen_rtx_EQ (SImode
, cond
, const0_rtx
);
16970 emit_insn (gen_rtx_SET (boolval
, x
));
16973 /* Expand an atomic exchange operation. */
16976 rs6000_expand_atomic_exchange (rtx operands
[])
16978 rtx retval
, mem
, val
, cond
;
16980 enum memmodel model
;
16981 rtx label
, x
, mask
, shift
;
16983 retval
= operands
[0];
16986 model
= memmodel_base (INTVAL (operands
[3]));
16987 mode
= GET_MODE (mem
);
16989 mask
= shift
= NULL_RTX
;
16990 if (!TARGET_SYNC_HI_QI
&& (mode
== QImode
|| mode
== HImode
))
16992 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16994 /* Shift and mask VAL into position with the word. */
16995 val
= convert_modes (SImode
, mode
, val
, 1);
16996 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
16997 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16999 /* Prepare to adjust the return value. */
17000 retval
= gen_reg_rtx (SImode
);
17004 mem
= rs6000_pre_atomic_barrier (mem
, model
);
17006 label
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
17007 emit_label (XEXP (label
, 0));
17009 emit_load_locked (mode
, retval
, mem
);
17013 x
= rs6000_mask_atomic_subword (retval
, val
, mask
);
17015 cond
= gen_reg_rtx (CCmode
);
17016 emit_store_conditional (mode
, cond
, mem
, x
);
17018 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
17019 emit_unlikely_jump (x
, label
);
17021 rs6000_post_atomic_barrier (model
);
17024 rs6000_finish_atomic_subword (operands
[0], retval
, shift
);
17027 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
17028 to perform. MEM is the memory on which to operate. VAL is the second
17029 operand of the binary operator. BEFORE and AFTER are optional locations to
17030 return the value of MEM either before of after the operation. MODEL_RTX
17031 is a CONST_INT containing the memory model to use. */
17034 rs6000_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
17035 rtx orig_before
, rtx orig_after
, rtx model_rtx
)
17037 enum memmodel model
= memmodel_base (INTVAL (model_rtx
));
17038 machine_mode mode
= GET_MODE (mem
);
17039 machine_mode store_mode
= mode
;
17040 rtx label
, x
, cond
, mask
, shift
;
17041 rtx before
= orig_before
, after
= orig_after
;
17043 mask
= shift
= NULL_RTX
;
17044 /* On power8, we want to use SImode for the operation. On previous systems,
17045 use the operation in a subword and shift/mask to get the proper byte or
17047 if (mode
== QImode
|| mode
== HImode
)
17049 if (TARGET_SYNC_HI_QI
)
17051 val
= convert_modes (SImode
, mode
, val
, 1);
17053 /* Prepare to adjust the return value. */
17054 before
= gen_reg_rtx (SImode
);
17056 after
= gen_reg_rtx (SImode
);
17061 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
17063 /* Shift and mask VAL into position with the word. */
17064 val
= convert_modes (SImode
, mode
, val
, 1);
17065 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
17066 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17072 /* We've already zero-extended VAL. That is sufficient to
17073 make certain that it does not affect other bits. */
17078 /* If we make certain that all of the other bits in VAL are
17079 set, that will be sufficient to not affect other bits. */
17080 x
= gen_rtx_NOT (SImode
, mask
);
17081 x
= gen_rtx_IOR (SImode
, x
, val
);
17082 emit_insn (gen_rtx_SET (val
, x
));
17089 /* These will all affect bits outside the field and need
17090 adjustment via MASK within the loop. */
17094 gcc_unreachable ();
17097 /* Prepare to adjust the return value. */
17098 before
= gen_reg_rtx (SImode
);
17100 after
= gen_reg_rtx (SImode
);
17101 store_mode
= mode
= SImode
;
17105 mem
= rs6000_pre_atomic_barrier (mem
, model
);
17107 label
= gen_label_rtx ();
17108 emit_label (label
);
17109 label
= gen_rtx_LABEL_REF (VOIDmode
, label
);
17111 if (before
== NULL_RTX
)
17112 before
= gen_reg_rtx (mode
);
17114 emit_load_locked (mode
, before
, mem
);
17118 x
= expand_simple_binop (mode
, AND
, before
, val
,
17119 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17120 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
17124 after
= expand_simple_binop (mode
, code
, before
, val
,
17125 after
, 1, OPTAB_LIB_WIDEN
);
17131 x
= expand_simple_binop (SImode
, AND
, after
, mask
,
17132 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17133 x
= rs6000_mask_atomic_subword (before
, x
, mask
);
17135 else if (store_mode
!= mode
)
17136 x
= convert_modes (store_mode
, mode
, x
, 1);
17138 cond
= gen_reg_rtx (CCmode
);
17139 emit_store_conditional (store_mode
, cond
, mem
, x
);
17141 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
17142 emit_unlikely_jump (x
, label
);
17144 rs6000_post_atomic_barrier (model
);
17148 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
17149 then do the calcuations in a SImode register. */
17151 rs6000_finish_atomic_subword (orig_before
, before
, shift
);
17153 rs6000_finish_atomic_subword (orig_after
, after
, shift
);
17155 else if (store_mode
!= mode
)
17157 /* QImode/HImode on machines with lbarx/lharx where we do the native
17158 operation and then do the calcuations in a SImode register. */
17160 convert_move (orig_before
, before
, 1);
17162 convert_move (orig_after
, after
, 1);
17164 else if (orig_after
&& after
!= orig_after
)
17165 emit_move_insn (orig_after
, after
);
17168 static GTY(()) alias_set_type TOC_alias_set
= -1;
17171 get_TOC_alias_set (void)
17173 if (TOC_alias_set
== -1)
17174 TOC_alias_set
= new_alias_set ();
17175 return TOC_alias_set
;
17178 /* The mode the ABI uses for a word. This is not the same as word_mode
17179 for -m32 -mpowerpc64. This is used to implement various target hooks. */
17181 static scalar_int_mode
17182 rs6000_abi_word_mode (void)
17184 return TARGET_32BIT
? SImode
: DImode
;
17187 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
17189 rs6000_offload_options (void)
17192 return xstrdup ("-foffload-abi=lp64");
17194 return xstrdup ("-foffload-abi=ilp32");
17198 /* A quick summary of the various types of 'constant-pool tables'
17201 Target Flags Name One table per
17202 AIX (none) AIX TOC object file
17203 AIX -mfull-toc AIX TOC object file
17204 AIX -mminimal-toc AIX minimal TOC translation unit
17205 SVR4/EABI (none) SVR4 SDATA object file
17206 SVR4/EABI -fpic SVR4 pic object file
17207 SVR4/EABI -fPIC SVR4 PIC translation unit
17208 SVR4/EABI -mrelocatable EABI TOC function
17209 SVR4/EABI -maix AIX TOC object file
17210 SVR4/EABI -maix -mminimal-toc
17211 AIX minimal TOC translation unit
17213 Name Reg. Set by entries contains:
17214 made by addrs? fp? sum?
17216 AIX TOC 2 crt0 as Y option option
17217 AIX minimal TOC 30 prolog gcc Y Y option
17218 SVR4 SDATA 13 crt0 gcc N Y N
17219 SVR4 pic 30 prolog ld Y not yet N
17220 SVR4 PIC 30 prolog gcc Y option option
17221 EABI TOC 30 prolog gcc Y option option
17225 /* Hash functions for the hash table. */
17228 rs6000_hash_constant (rtx k
)
17230 enum rtx_code code
= GET_CODE (k
);
17231 machine_mode mode
= GET_MODE (k
);
17232 unsigned result
= (code
<< 3) ^ mode
;
17233 const char *format
;
17236 format
= GET_RTX_FORMAT (code
);
17237 flen
= strlen (format
);
17243 return result
* 1231 + (unsigned) INSN_UID (XEXP (k
, 0));
17245 case CONST_WIDE_INT
:
17248 flen
= CONST_WIDE_INT_NUNITS (k
);
17249 for (i
= 0; i
< flen
; i
++)
17250 result
= result
* 613 + CONST_WIDE_INT_ELT (k
, i
);
17255 return real_hash (CONST_DOUBLE_REAL_VALUE (k
)) * result
;
17265 for (; fidx
< flen
; fidx
++)
17266 switch (format
[fidx
])
17271 const char *str
= XSTR (k
, fidx
);
17272 len
= strlen (str
);
17273 result
= result
* 613 + len
;
17274 for (i
= 0; i
< len
; i
++)
17275 result
= result
* 613 + (unsigned) str
[i
];
17280 result
= result
* 1231 + rs6000_hash_constant (XEXP (k
, fidx
));
17284 result
= result
* 613 + (unsigned) XINT (k
, fidx
);
17287 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT
))
17288 result
= result
* 613 + (unsigned) XWINT (k
, fidx
);
17292 for (i
= 0; i
< sizeof (HOST_WIDE_INT
) / sizeof (unsigned); i
++)
17293 result
= result
* 613 + (unsigned) (XWINT (k
, fidx
)
17300 gcc_unreachable ();
17307 toc_hasher::hash (toc_hash_struct
*thc
)
17309 return rs6000_hash_constant (thc
->key
) ^ thc
->key_mode
;
17312 /* Compare H1 and H2 for equivalence. */
17315 toc_hasher::equal (toc_hash_struct
*h1
, toc_hash_struct
*h2
)
17320 if (h1
->key_mode
!= h2
->key_mode
)
17323 return rtx_equal_p (r1
, r2
);
17326 /* These are the names given by the C++ front-end to vtables, and
17327 vtable-like objects. Ideally, this logic should not be here;
17328 instead, there should be some programmatic way of inquiring as
17329 to whether or not an object is a vtable. */
17331 #define VTABLE_NAME_P(NAME) \
17332 (startswith (name, "_vt.") \
17333 || startswith (name, "_ZTV") \
17334 || startswith (name, "_ZTT") \
17335 || startswith (name, "_ZTI") \
17336 || startswith (name, "_ZTC"))
17338 #ifdef NO_DOLLAR_IN_LABEL
17339 /* Return a GGC-allocated character string translating dollar signs in
17340 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17343 rs6000_xcoff_strip_dollar (const char *name
)
17349 q
= (const char *) strchr (name
, '$');
17351 if (q
== 0 || q
== name
)
17354 len
= strlen (name
);
17355 strip
= XALLOCAVEC (char, len
+ 1);
17356 strcpy (strip
, name
);
17357 p
= strip
+ (q
- name
);
17361 p
= strchr (p
+ 1, '$');
17364 return ggc_alloc_string (strip
, len
);
17369 rs6000_output_symbol_ref (FILE *file
, rtx x
)
17371 const char *name
= XSTR (x
, 0);
17373 /* Currently C++ toc references to vtables can be emitted before it
17374 is decided whether the vtable is public or private. If this is
17375 the case, then the linker will eventually complain that there is
17376 a reference to an unknown section. Thus, for vtables only,
17377 we emit the TOC reference to reference the identifier and not the
17379 if (VTABLE_NAME_P (name
))
17381 RS6000_OUTPUT_BASENAME (file
, name
);
17384 assemble_name (file
, name
);
17387 /* Output a TOC entry. We derive the entry name from what is being
17391 output_toc (FILE *file
, rtx x
, int labelno
, machine_mode mode
)
17394 const char *name
= buf
;
17396 HOST_WIDE_INT offset
= 0;
17398 gcc_assert (!TARGET_NO_TOC_OR_PCREL
);
17400 /* When the linker won't eliminate them, don't output duplicate
17401 TOC entries (this happens on AIX if there is any kind of TOC,
17402 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17404 if (TARGET_TOC
&& GET_CODE (x
) != LABEL_REF
)
17406 struct toc_hash_struct
*h
;
17408 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17409 time because GGC is not initialized at that point. */
17410 if (toc_hash_table
== NULL
)
17411 toc_hash_table
= hash_table
<toc_hasher
>::create_ggc (1021);
17413 h
= ggc_alloc
<toc_hash_struct
> ();
17415 h
->key_mode
= mode
;
17416 h
->labelno
= labelno
;
17418 toc_hash_struct
**found
= toc_hash_table
->find_slot (h
, INSERT
);
17419 if (*found
== NULL
)
17421 else /* This is indeed a duplicate.
17422 Set this label equal to that label. */
17424 fputs ("\t.set ", file
);
17425 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17426 fprintf (file
, "%d,", labelno
);
17427 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17428 fprintf (file
, "%d\n", ((*found
)->labelno
));
17431 if (TARGET_XCOFF
&& SYMBOL_REF_P (x
)
17432 && (SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_GLOBAL_DYNAMIC
17433 || SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
))
17435 fputs ("\t.set ", file
);
17436 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17437 fprintf (file
, "%d,", labelno
);
17438 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17439 fprintf (file
, "%d\n", ((*found
)->labelno
));
17446 /* If we're going to put a double constant in the TOC, make sure it's
17447 aligned properly when strict alignment is on. */
17448 if ((CONST_DOUBLE_P (x
) || CONST_WIDE_INT_P (x
))
17449 && STRICT_ALIGNMENT
17450 && GET_MODE_BITSIZE (mode
) >= 64
17451 && ! (TARGET_NO_FP_IN_TOC
&& ! TARGET_MINIMAL_TOC
)) {
17452 ASM_OUTPUT_ALIGN (file
, 3);
17455 (*targetm
.asm_out
.internal_label
) (file
, "LC", labelno
);
17457 /* Handle FP constants specially. Note that if we have a minimal
17458 TOC, things we put here aren't actually in the TOC, so we can allow
17460 if (CONST_DOUBLE_P (x
)
17461 && (GET_MODE (x
) == TFmode
|| GET_MODE (x
) == TDmode
17462 || GET_MODE (x
) == IFmode
|| GET_MODE (x
) == KFmode
))
17466 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17467 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17469 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17473 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17474 fputs (DOUBLE_INT_ASM_OP
, file
);
17476 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17477 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17478 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17479 fprintf (file
, "0x%lx%08lx,0x%lx%08lx\n",
17480 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17481 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff,
17482 k
[WORDS_BIG_ENDIAN
? 2 : 3] & 0xffffffff,
17483 k
[WORDS_BIG_ENDIAN
? 3 : 2] & 0xffffffff);
17488 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17489 fputs ("\t.long ", file
);
17491 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17492 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17493 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17494 fprintf (file
, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17495 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17496 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17500 else if (CONST_DOUBLE_P (x
)
17501 && (GET_MODE (x
) == DFmode
|| GET_MODE (x
) == DDmode
))
17505 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17506 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17508 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17512 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17513 fputs (DOUBLE_INT_ASM_OP
, file
);
17515 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17516 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17517 fprintf (file
, "0x%lx%08lx\n",
17518 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17519 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff);
17524 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17525 fputs ("\t.long ", file
);
17527 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17528 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17529 fprintf (file
, "0x%lx,0x%lx\n",
17530 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17534 else if (CONST_DOUBLE_P (x
)
17535 && (GET_MODE (x
) == SFmode
|| GET_MODE (x
) == SDmode
))
17539 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17540 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17542 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17546 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17547 fputs (DOUBLE_INT_ASM_OP
, file
);
17549 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17550 if (WORDS_BIG_ENDIAN
)
17551 fprintf (file
, "0x%lx00000000\n", l
& 0xffffffff);
17553 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17558 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17559 fputs ("\t.long ", file
);
17561 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17562 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17566 else if (GET_MODE (x
) == VOIDmode
&& CONST_INT_P (x
))
17568 unsigned HOST_WIDE_INT low
;
17569 HOST_WIDE_INT high
;
17571 low
= INTVAL (x
) & 0xffffffff;
17572 high
= (HOST_WIDE_INT
) INTVAL (x
) >> 32;
17574 /* TOC entries are always Pmode-sized, so when big-endian
17575 smaller integer constants in the TOC need to be padded.
17576 (This is still a win over putting the constants in
17577 a separate constant pool, because then we'd have
17578 to have both a TOC entry _and_ the actual constant.)
17580 For a 32-bit target, CONST_INT values are loaded and shifted
17581 entirely within `low' and can be stored in one TOC entry. */
17583 /* It would be easy to make this work, but it doesn't now. */
17584 gcc_assert (!TARGET_64BIT
|| POINTER_SIZE
>= GET_MODE_BITSIZE (mode
));
17586 if (WORDS_BIG_ENDIAN
&& POINTER_SIZE
> GET_MODE_BITSIZE (mode
))
17589 low
<<= POINTER_SIZE
- GET_MODE_BITSIZE (mode
);
17590 high
= (HOST_WIDE_INT
) low
>> 32;
17596 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17597 fputs (DOUBLE_INT_ASM_OP
, file
);
17599 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17600 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17601 fprintf (file
, "0x%lx%08lx\n",
17602 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17607 if (POINTER_SIZE
< GET_MODE_BITSIZE (mode
))
17609 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17610 fputs ("\t.long ", file
);
17612 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17613 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17614 fprintf (file
, "0x%lx,0x%lx\n",
17615 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17619 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17620 fputs ("\t.long ", file
);
17622 fprintf (file
, "\t.tc IS_%lx[TC],", (long) low
& 0xffffffff);
17623 fprintf (file
, "0x%lx\n", (long) low
& 0xffffffff);
17629 if (GET_CODE (x
) == CONST
)
17631 gcc_assert (GET_CODE (XEXP (x
, 0)) == PLUS
17632 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)));
17634 base
= XEXP (XEXP (x
, 0), 0);
17635 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
17638 switch (GET_CODE (base
))
17641 name
= XSTR (base
, 0);
17645 ASM_GENERATE_INTERNAL_LABEL (buf
, "L",
17646 CODE_LABEL_NUMBER (XEXP (base
, 0)));
17650 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (base
));
17654 gcc_unreachable ();
17657 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17658 fputs (TARGET_32BIT
? "\t.long " : DOUBLE_INT_ASM_OP
, file
);
17661 fputs ("\t.tc ", file
);
17662 RS6000_OUTPUT_BASENAME (file
, name
);
17665 fprintf (file
, ".N" HOST_WIDE_INT_PRINT_UNSIGNED
, - offset
);
17667 fprintf (file
, ".P" HOST_WIDE_INT_PRINT_UNSIGNED
, offset
);
17669 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17670 after other TOC symbols, reducing overflow of small TOC access
17671 to [TC] symbols. */
17672 fputs (TARGET_XCOFF
&& TARGET_CMODEL
!= CMODEL_SMALL
17673 ? "[TE]," : "[TC],", file
);
17676 /* Currently C++ toc references to vtables can be emitted before it
17677 is decided whether the vtable is public or private. If this is
17678 the case, then the linker will eventually complain that there is
17679 a TOC reference to an unknown section. Thus, for vtables only,
17680 we emit the TOC reference to reference the symbol and not the
17682 if (VTABLE_NAME_P (name
))
17684 RS6000_OUTPUT_BASENAME (file
, name
);
17686 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
);
17687 else if (offset
> 0)
17688 fprintf (file
, "+" HOST_WIDE_INT_PRINT_DEC
, offset
);
17691 output_addr_const (file
, x
);
17694 if (TARGET_XCOFF
&& SYMBOL_REF_P (base
))
17696 switch (SYMBOL_REF_TLS_MODEL (base
))
17700 case TLS_MODEL_LOCAL_EXEC
:
17701 fputs ("@le", file
);
17703 case TLS_MODEL_INITIAL_EXEC
:
17704 fputs ("@ie", file
);
17706 /* Use global-dynamic for local-dynamic. */
17707 case TLS_MODEL_GLOBAL_DYNAMIC
:
17708 case TLS_MODEL_LOCAL_DYNAMIC
:
17710 (*targetm
.asm_out
.internal_label
) (file
, "LCM", labelno
);
17711 fputs ("\t.tc .", file
);
17712 RS6000_OUTPUT_BASENAME (file
, name
);
17713 fputs ("[TC],", file
);
17714 output_addr_const (file
, x
);
17715 fputs ("@m", file
);
17718 gcc_unreachable ();
17726 /* Output an assembler pseudo-op to write an ASCII string of N characters
17727 starting at P to FILE.
17729 On the RS/6000, we have to do this using the .byte operation and
17730 write out special characters outside the quoted string.
17731 Also, the assembler is broken; very long strings are truncated,
17732 so we must artificially break them up early. */
17735 output_ascii (FILE *file
, const char *p
, int n
)
17738 int i
, count_string
;
17739 const char *for_string
= "\t.byte \"";
17740 const char *for_decimal
= "\t.byte ";
17741 const char *to_close
= NULL
;
17744 for (i
= 0; i
< n
; i
++)
17747 if (c
>= ' ' && c
< 0177)
17750 fputs (for_string
, file
);
17753 /* Write two quotes to get one. */
17761 for_decimal
= "\"\n\t.byte ";
17765 if (count_string
>= 512)
17767 fputs (to_close
, file
);
17769 for_string
= "\t.byte \"";
17770 for_decimal
= "\t.byte ";
17778 fputs (for_decimal
, file
);
17779 fprintf (file
, "%d", c
);
17781 for_string
= "\n\t.byte \"";
17782 for_decimal
= ", ";
17788 /* Now close the string if we have written one. Then end the line. */
17790 fputs (to_close
, file
);
17793 /* Generate a unique section name for FILENAME for a section type
17794 represented by SECTION_DESC. Output goes into BUF.
17796 SECTION_DESC can be any string, as long as it is different for each
17797 possible section type.
17799 We name the section in the same manner as xlc. The name begins with an
17800 underscore followed by the filename (after stripping any leading directory
17801 names) with the last period replaced by the string SECTION_DESC. If
17802 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17806 rs6000_gen_section_name (char **buf
, const char *filename
,
17807 const char *section_desc
)
17809 const char *q
, *after_last_slash
, *last_period
= 0;
17813 after_last_slash
= filename
;
17814 for (q
= filename
; *q
; q
++)
17817 after_last_slash
= q
+ 1;
17818 else if (*q
== '.')
17822 len
= strlen (after_last_slash
) + strlen (section_desc
) + 2;
17823 *buf
= (char *) xmalloc (len
);
17828 for (q
= after_last_slash
; *q
; q
++)
17830 if (q
== last_period
)
17832 strcpy (p
, section_desc
);
17833 p
+= strlen (section_desc
);
17837 else if (ISALNUM (*q
))
17841 if (last_period
== 0)
17842 strcpy (p
, section_desc
);
17847 /* Emit profile function. */
17850 output_profile_hook (int labelno ATTRIBUTE_UNUSED
)
17852 /* Non-standard profiling for kernels, which just saves LR then calls
17853 _mcount without worrying about arg saves. The idea is to change
17854 the function prologue as little as possible as it isn't easy to
17855 account for arg save/restore code added just for _mcount. */
17856 if (TARGET_PROFILE_KERNEL
)
17859 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
17861 #ifndef NO_PROFILE_COUNTERS
17862 # define NO_PROFILE_COUNTERS 0
17864 if (NO_PROFILE_COUNTERS
)
17865 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17866 LCT_NORMAL
, VOIDmode
);
17870 const char *label_name
;
17873 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
17874 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
17875 fun
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
17877 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17878 LCT_NORMAL
, VOIDmode
, fun
, Pmode
);
17881 else if (DEFAULT_ABI
== ABI_DARWIN
)
17883 const char *mcount_name
= RS6000_MCOUNT
;
17884 int caller_addr_regno
= LR_REGNO
;
17886 /* Be conservative and always set this, at least for now. */
17887 crtl
->uses_pic_offset_table
= 1;
17890 /* For PIC code, set up a stub and collect the caller's address
17891 from r0, which is where the prologue puts it. */
17892 if (MACHOPIC_INDIRECT
17893 && crtl
->uses_pic_offset_table
)
17894 caller_addr_regno
= 0;
17896 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, mcount_name
),
17897 LCT_NORMAL
, VOIDmode
,
17898 gen_rtx_REG (Pmode
, caller_addr_regno
), Pmode
);
17902 /* Write function profiler code. */
17905 output_function_profiler (FILE *file
, int labelno
)
17909 switch (DEFAULT_ABI
)
17912 gcc_unreachable ();
17917 warning (0, "no profiling of 64-bit code for this ABI");
17920 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
17921 fprintf (file
, "\tmflr %s\n", reg_names
[0]);
17922 if (NO_PROFILE_COUNTERS
)
17924 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17925 reg_names
[0], reg_names
[1]);
17927 else if (TARGET_SECURE_PLT
&& flag_pic
)
17929 if (TARGET_LINK_STACK
)
17932 get_ppc476_thunk_name (name
);
17933 asm_fprintf (file
, "\tbl %s\n", name
);
17936 asm_fprintf (file
, "\tbcl 20,31,1f\n1:\n");
17937 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17938 reg_names
[0], reg_names
[1]);
17939 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
17940 asm_fprintf (file
, "\taddis %s,%s,",
17941 reg_names
[12], reg_names
[12]);
17942 assemble_name (file
, buf
);
17943 asm_fprintf (file
, "-1b@ha\n\tla %s,", reg_names
[0]);
17944 assemble_name (file
, buf
);
17945 asm_fprintf (file
, "-1b@l(%s)\n", reg_names
[12]);
17947 else if (flag_pic
== 1)
17949 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file
);
17950 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17951 reg_names
[0], reg_names
[1]);
17952 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
17953 asm_fprintf (file
, "\tlwz %s,", reg_names
[0]);
17954 assemble_name (file
, buf
);
17955 asm_fprintf (file
, "@got(%s)\n", reg_names
[12]);
17957 else if (flag_pic
> 1)
17959 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17960 reg_names
[0], reg_names
[1]);
17961 /* Now, we need to get the address of the label. */
17962 if (TARGET_LINK_STACK
)
17965 get_ppc476_thunk_name (name
);
17966 asm_fprintf (file
, "\tbl %s\n\tb 1f\n\t.long ", name
);
17967 assemble_name (file
, buf
);
17968 fputs ("-.\n1:", file
);
17969 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
17970 asm_fprintf (file
, "\taddi %s,%s,4\n",
17971 reg_names
[11], reg_names
[11]);
17975 fputs ("\tbcl 20,31,1f\n\t.long ", file
);
17976 assemble_name (file
, buf
);
17977 fputs ("-.\n1:", file
);
17978 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
17980 asm_fprintf (file
, "\tlwz %s,0(%s)\n",
17981 reg_names
[0], reg_names
[11]);
17982 asm_fprintf (file
, "\tadd %s,%s,%s\n",
17983 reg_names
[0], reg_names
[0], reg_names
[11]);
17987 asm_fprintf (file
, "\tlis %s,", reg_names
[12]);
17988 assemble_name (file
, buf
);
17989 fputs ("@ha\n", file
);
17990 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17991 reg_names
[0], reg_names
[1]);
17992 asm_fprintf (file
, "\tla %s,", reg_names
[0]);
17993 assemble_name (file
, buf
);
17994 asm_fprintf (file
, "@l(%s)\n", reg_names
[12]);
17997 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17998 fprintf (file
, "\tbl %s%s\n",
17999 RS6000_MCOUNT
, flag_pic
? "@plt" : "");
18005 /* Don't do anything, done in output_profile_hook (). */
18012 /* The following variable value is the last issued insn. */
18014 static rtx_insn
*last_scheduled_insn
;
18016 /* The following variable helps to balance issuing of load and
18017 store instructions */
18019 static int load_store_pendulum
;
18021 /* The following variable helps pair divide insns during scheduling. */
18022 static int divide_cnt
;
18023 /* The following variable helps pair and alternate vector and vector load
18024 insns during scheduling. */
18025 static int vec_pairing
;
18028 /* Power4 load update and store update instructions are cracked into a
18029 load or store and an integer insn which are executed in the same cycle.
18030 Branches have their own dispatch slot which does not count against the
18031 GCC issue rate, but it changes the program flow so there are no other
18032 instructions to issue in this cycle. */
18035 rs6000_variable_issue_1 (rtx_insn
*insn
, int more
)
18037 last_scheduled_insn
= insn
;
18038 if (GET_CODE (PATTERN (insn
)) == USE
18039 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18041 cached_can_issue_more
= more
;
18042 return cached_can_issue_more
;
18045 if (insn_terminates_group_p (insn
, current_group
))
18047 cached_can_issue_more
= 0;
18048 return cached_can_issue_more
;
18051 /* If no reservation, but reach here */
18052 if (recog_memoized (insn
) < 0)
18055 if (rs6000_sched_groups
)
18057 if (is_microcoded_insn (insn
))
18058 cached_can_issue_more
= 0;
18059 else if (is_cracked_insn (insn
))
18060 cached_can_issue_more
= more
> 2 ? more
- 2 : 0;
18062 cached_can_issue_more
= more
- 1;
18064 return cached_can_issue_more
;
18067 if (rs6000_tune
== PROCESSOR_CELL
&& is_nonpipeline_insn (insn
))
18070 cached_can_issue_more
= more
- 1;
18071 return cached_can_issue_more
;
18075 rs6000_variable_issue (FILE *stream
, int verbose
, rtx_insn
*insn
, int more
)
18077 int r
= rs6000_variable_issue_1 (insn
, more
);
18079 fprintf (stream
, "// rs6000_variable_issue (more = %d) = %d\n", more
, r
);
18083 /* Adjust the cost of a scheduling dependency. Return the new cost of
18084 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
18087 rs6000_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
18090 enum attr_type attr_type
;
18092 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
18099 /* Data dependency; DEP_INSN writes a register that INSN reads
18100 some cycles later. */
18102 /* Separate a load from a narrower, dependent store. */
18103 if ((rs6000_sched_groups
|| rs6000_tune
== PROCESSOR_POWER9
18104 || rs6000_tune
== PROCESSOR_POWER10
18105 || rs6000_tune
== PROCESSOR_FUTURE
)
18106 && GET_CODE (PATTERN (insn
)) == SET
18107 && GET_CODE (PATTERN (dep_insn
)) == SET
18108 && MEM_P (XEXP (PATTERN (insn
), 1))
18109 && MEM_P (XEXP (PATTERN (dep_insn
), 0))
18110 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn
), 1)))
18111 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn
), 0)))))
18114 attr_type
= get_attr_type (insn
);
18119 /* Tell the first scheduling pass about the latency between
18120 a mtctr and bctr (and mtlr and br/blr). The first
18121 scheduling pass will not know about this latency since
18122 the mtctr instruction, which has the latency associated
18123 to it, will be generated by reload. */
18126 /* Leave some extra cycles between a compare and its
18127 dependent branch, to inhibit expensive mispredicts. */
18128 if ((rs6000_tune
== PROCESSOR_PPC603
18129 || rs6000_tune
== PROCESSOR_PPC604
18130 || rs6000_tune
== PROCESSOR_PPC604e
18131 || rs6000_tune
== PROCESSOR_PPC620
18132 || rs6000_tune
== PROCESSOR_PPC630
18133 || rs6000_tune
== PROCESSOR_PPC750
18134 || rs6000_tune
== PROCESSOR_PPC7400
18135 || rs6000_tune
== PROCESSOR_PPC7450
18136 || rs6000_tune
== PROCESSOR_PPCE5500
18137 || rs6000_tune
== PROCESSOR_PPCE6500
18138 || rs6000_tune
== PROCESSOR_POWER4
18139 || rs6000_tune
== PROCESSOR_POWER5
18140 || rs6000_tune
== PROCESSOR_POWER7
18141 || rs6000_tune
== PROCESSOR_POWER8
18142 || rs6000_tune
== PROCESSOR_POWER9
18143 || rs6000_tune
== PROCESSOR_POWER10
18144 || rs6000_tune
== PROCESSOR_FUTURE
18145 || rs6000_tune
== PROCESSOR_CELL
)
18146 && recog_memoized (dep_insn
)
18147 && (INSN_CODE (dep_insn
) >= 0))
18149 switch (get_attr_type (dep_insn
))
18152 case TYPE_FPCOMPARE
:
18153 case TYPE_CR_LOGICAL
:
18157 if (get_attr_dot (dep_insn
) == DOT_YES
)
18162 if (get_attr_dot (dep_insn
) == DOT_YES
18163 && get_attr_var_shift (dep_insn
) == VAR_SHIFT_NO
)
18174 if ((rs6000_tune
== PROCESSOR_POWER6
)
18175 && recog_memoized (dep_insn
)
18176 && (INSN_CODE (dep_insn
) >= 0))
18179 if (GET_CODE (PATTERN (insn
)) != SET
)
18180 /* If this happens, we have to extend this to schedule
18181 optimally. Return default for now. */
18184 /* Adjust the cost for the case where the value written
18185 by a fixed point operation is used as the address
18186 gen value on a store. */
18187 switch (get_attr_type (dep_insn
))
18192 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18193 return get_attr_sign_extend (dep_insn
)
18194 == SIGN_EXTEND_YES
? 6 : 4;
18199 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18200 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
18210 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18218 if (get_attr_update (dep_insn
) == UPDATE_YES
18219 && ! rs6000_store_data_bypass_p (dep_insn
, insn
))
18225 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18231 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18232 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
18242 if ((rs6000_tune
== PROCESSOR_POWER6
)
18243 && recog_memoized (dep_insn
)
18244 && (INSN_CODE (dep_insn
) >= 0))
18247 /* Adjust the cost for the case where the value written
18248 by a fixed point instruction is used within the address
18249 gen portion of a subsequent load(u)(x) */
18250 switch (get_attr_type (dep_insn
))
18255 if (set_to_load_agen (dep_insn
, insn
))
18256 return get_attr_sign_extend (dep_insn
)
18257 == SIGN_EXTEND_YES
? 6 : 4;
18262 if (set_to_load_agen (dep_insn
, insn
))
18263 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
18273 if (set_to_load_agen (dep_insn
, insn
))
18281 if (get_attr_update (dep_insn
) == UPDATE_YES
18282 && set_to_load_agen (dep_insn
, insn
))
18288 if (set_to_load_agen (dep_insn
, insn
))
18294 if (set_to_load_agen (dep_insn
, insn
))
18295 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
18308 /* Fall out to return default cost. */
18312 case REG_DEP_OUTPUT
:
18313 /* Output dependency; DEP_INSN writes a register that INSN writes some
18315 if ((rs6000_tune
== PROCESSOR_POWER6
)
18316 && recog_memoized (dep_insn
)
18317 && (INSN_CODE (dep_insn
) >= 0))
18319 attr_type
= get_attr_type (insn
);
18324 case TYPE_FPSIMPLE
:
18325 if (get_attr_type (dep_insn
) == TYPE_FP
18326 || get_attr_type (dep_insn
) == TYPE_FPSIMPLE
)
18333 /* Fall through, no cost for output dependency. */
18337 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18342 gcc_unreachable ();
18348 /* Debug version of rs6000_adjust_cost. */
18351 rs6000_debug_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
18352 int cost
, unsigned int dw
)
18354 int ret
= rs6000_adjust_cost (insn
, dep_type
, dep_insn
, cost
, dw
);
18362 default: dep
= "unknown depencency"; break;
18363 case REG_DEP_TRUE
: dep
= "data dependency"; break;
18364 case REG_DEP_OUTPUT
: dep
= "output dependency"; break;
18365 case REG_DEP_ANTI
: dep
= "anti depencency"; break;
18369 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18370 "%s, insn:\n", ret
, cost
, dep
);
18378 /* The function returns a true if INSN is microcoded.
18379 Return false otherwise. */
18382 is_microcoded_insn (rtx_insn
*insn
)
18384 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18385 || GET_CODE (PATTERN (insn
)) == USE
18386 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18389 if (rs6000_tune
== PROCESSOR_CELL
)
18390 return get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
;
18392 if (rs6000_sched_groups
18393 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18395 enum attr_type type
= get_attr_type (insn
);
18396 if ((type
== TYPE_LOAD
18397 && get_attr_update (insn
) == UPDATE_YES
18398 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
18399 || ((type
== TYPE_LOAD
|| type
== TYPE_STORE
)
18400 && get_attr_update (insn
) == UPDATE_YES
18401 && get_attr_indexed (insn
) == INDEXED_YES
)
18402 || type
== TYPE_MFCR
)
18409 /* The function returns true if INSN is cracked into 2 instructions
18410 by the processor (and therefore occupies 2 issue slots). */
18413 is_cracked_insn (rtx_insn
*insn
)
18415 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18416 || GET_CODE (PATTERN (insn
)) == USE
18417 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18420 if (rs6000_sched_groups
18421 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18423 enum attr_type type
= get_attr_type (insn
);
18424 if ((type
== TYPE_LOAD
18425 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
18426 && get_attr_update (insn
) == UPDATE_NO
)
18427 || (type
== TYPE_LOAD
18428 && get_attr_sign_extend (insn
) == SIGN_EXTEND_NO
18429 && get_attr_update (insn
) == UPDATE_YES
18430 && get_attr_indexed (insn
) == INDEXED_NO
)
18431 || (type
== TYPE_STORE
18432 && get_attr_update (insn
) == UPDATE_YES
18433 && get_attr_indexed (insn
) == INDEXED_NO
)
18434 || ((type
== TYPE_FPLOAD
|| type
== TYPE_FPSTORE
)
18435 && get_attr_update (insn
) == UPDATE_YES
)
18436 || (type
== TYPE_CR_LOGICAL
18437 && get_attr_cr_logical_3op (insn
) == CR_LOGICAL_3OP_YES
)
18438 || (type
== TYPE_EXTS
18439 && get_attr_dot (insn
) == DOT_YES
)
18440 || (type
== TYPE_SHIFT
18441 && get_attr_dot (insn
) == DOT_YES
18442 && get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
18443 || (type
== TYPE_MUL
18444 && get_attr_dot (insn
) == DOT_YES
)
18445 || type
== TYPE_DIV
18446 || (type
== TYPE_INSERT
18447 && get_attr_size (insn
) == SIZE_32
))
18454 /* The function returns true if INSN can be issued only from
18455 the branch slot. */
18458 is_branch_slot_insn (rtx_insn
*insn
)
18460 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18461 || GET_CODE (PATTERN (insn
)) == USE
18462 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18465 if (rs6000_sched_groups
)
18467 enum attr_type type
= get_attr_type (insn
);
18468 if (type
== TYPE_BRANCH
|| type
== TYPE_JMPREG
)
18476 /* The function returns true if out_inst sets a value that is
18477 used in the address generation computation of in_insn */
18479 set_to_load_agen (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
18481 rtx out_set
, in_set
;
18483 /* For performance reasons, only handle the simple case where
18484 both loads are a single_set. */
18485 out_set
= single_set (out_insn
);
18488 in_set
= single_set (in_insn
);
18490 return reg_mentioned_p (SET_DEST (out_set
), SET_SRC (in_set
));
18496 /* Try to determine base/offset/size parts of the given MEM.
18497 Return true if successful, false if all the values couldn't
18500 This function only looks for REG or REG+CONST address forms.
18501 REG+REG address form will return false. */
18504 get_memref_parts (rtx mem
, rtx
*base
, HOST_WIDE_INT
*offset
,
18505 HOST_WIDE_INT
*size
)
18508 if (MEM_SIZE_KNOWN_P (mem
))
18509 *size
= MEM_SIZE (mem
);
18513 addr_rtx
= (XEXP (mem
, 0));
18514 if (GET_CODE (addr_rtx
) == PRE_MODIFY
)
18515 addr_rtx
= XEXP (addr_rtx
, 1);
18518 while (GET_CODE (addr_rtx
) == PLUS
18519 && CONST_INT_P (XEXP (addr_rtx
, 1)))
18521 *offset
+= INTVAL (XEXP (addr_rtx
, 1));
18522 addr_rtx
= XEXP (addr_rtx
, 0);
18524 if (!REG_P (addr_rtx
))
18531 /* If the target storage locations of arguments MEM1 and MEM2 are
18532 adjacent, then return the argument that has the lower address.
18533 Otherwise, return NULL_RTX. */
18536 adjacent_mem_locations (rtx mem1
, rtx mem2
)
18539 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18543 && get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18544 && get_memref_parts (mem2
, ®2
, &off2
, &size2
)
18545 && REGNO (reg1
) == REGNO (reg2
))
18547 if (off1
+ size1
== off2
)
18549 else if (off2
+ size2
== off1
)
18556 /* This function returns true if it can be determined that the two MEM
18557 locations overlap by at least 1 byte based on base reg/offset/size. */
18560 mem_locations_overlap (rtx mem1
, rtx mem2
)
18563 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18565 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18566 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
18567 return ((REGNO (reg1
) == REGNO (reg2
))
18568 && (((off1
<= off2
) && (off1
+ size1
> off2
))
18569 || ((off2
<= off1
) && (off2
+ size2
> off1
))));
18574 /* A C statement (sans semicolon) to update the integer scheduling
18575 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18576 INSN earlier, reduce the priority to execute INSN later. Do not
18577 define this macro if you do not need to adjust the scheduling
18578 priorities of insns. */
18581 rs6000_adjust_priority (rtx_insn
*insn ATTRIBUTE_UNUSED
, int priority
)
18583 rtx load_mem
, str_mem
;
18584 /* On machines (like the 750) which have asymmetric integer units,
18585 where one integer unit can do multiply and divides and the other
18586 can't, reduce the priority of multiply/divide so it is scheduled
18587 before other integer operations. */
18590 if (! INSN_P (insn
))
18593 if (GET_CODE (PATTERN (insn
)) == USE
)
18596 switch (rs6000_tune
) {
18597 case PROCESSOR_PPC750
:
18598 switch (get_attr_type (insn
))
18605 fprintf (stderr
, "priority was %#x (%d) before adjustment\n",
18606 priority
, priority
);
18607 if (priority
>= 0 && priority
< 0x01000000)
18614 if (insn_must_be_first_in_group (insn
)
18615 && reload_completed
18616 && current_sched_info
->sched_max_insns_priority
18617 && rs6000_sched_restricted_insns_priority
)
18620 /* Prioritize insns that can be dispatched only in the first
18622 if (rs6000_sched_restricted_insns_priority
== 1)
18623 /* Attach highest priority to insn. This means that in
18624 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18625 precede 'priority' (critical path) considerations. */
18626 return current_sched_info
->sched_max_insns_priority
;
18627 else if (rs6000_sched_restricted_insns_priority
== 2)
18628 /* Increase priority of insn by a minimal amount. This means that in
18629 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18630 considerations precede dispatch-slot restriction considerations. */
18631 return (priority
+ 1);
18634 if (rs6000_tune
== PROCESSOR_POWER6
18635 && ((load_store_pendulum
== -2 && is_load_insn (insn
, &load_mem
))
18636 || (load_store_pendulum
== 2 && is_store_insn (insn
, &str_mem
))))
18637 /* Attach highest priority to insn if the scheduler has just issued two
18638 stores and this instruction is a load, or two loads and this instruction
18639 is a store. Power6 wants loads and stores scheduled alternately
18641 return current_sched_info
->sched_max_insns_priority
;
18646 /* Return true if the instruction is nonpipelined on the Cell. */
18648 is_nonpipeline_insn (rtx_insn
*insn
)
18650 enum attr_type type
;
18651 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18652 || GET_CODE (PATTERN (insn
)) == USE
18653 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18656 type
= get_attr_type (insn
);
18657 if (type
== TYPE_MUL
18658 || type
== TYPE_DIV
18659 || type
== TYPE_SDIV
18660 || type
== TYPE_DDIV
18661 || type
== TYPE_SSQRT
18662 || type
== TYPE_DSQRT
18663 || type
== TYPE_MFCR
18664 || type
== TYPE_MFCRF
18665 || type
== TYPE_MFJMPR
)
18673 /* Return how many instructions the machine can issue per cycle. */
18676 rs6000_issue_rate (void)
18678 /* Unless scheduling for register pressure, use issue rate of 1 for
18679 first scheduling pass to decrease degradation. */
18680 if (!reload_completed
&& !flag_sched_pressure
)
18683 switch (rs6000_tune
) {
18684 case PROCESSOR_RS64A
:
18685 case PROCESSOR_PPC601
: /* ? */
18686 case PROCESSOR_PPC7450
:
18688 case PROCESSOR_PPC440
:
18689 case PROCESSOR_PPC603
:
18690 case PROCESSOR_PPC750
:
18691 case PROCESSOR_PPC7400
:
18692 case PROCESSOR_PPC8540
:
18693 case PROCESSOR_PPC8548
:
18694 case PROCESSOR_CELL
:
18695 case PROCESSOR_PPCE300C2
:
18696 case PROCESSOR_PPCE300C3
:
18697 case PROCESSOR_PPCE500MC
:
18698 case PROCESSOR_PPCE500MC64
:
18699 case PROCESSOR_PPCE5500
:
18700 case PROCESSOR_PPCE6500
:
18701 case PROCESSOR_TITAN
:
18703 case PROCESSOR_PPC476
:
18704 case PROCESSOR_PPC604
:
18705 case PROCESSOR_PPC604e
:
18706 case PROCESSOR_PPC620
:
18707 case PROCESSOR_PPC630
:
18709 case PROCESSOR_POWER4
:
18710 case PROCESSOR_POWER5
:
18711 case PROCESSOR_POWER6
:
18712 case PROCESSOR_POWER7
:
18714 case PROCESSOR_POWER8
:
18716 case PROCESSOR_POWER9
:
18718 case PROCESSOR_POWER10
:
18719 case PROCESSOR_FUTURE
:
18726 /* Return how many instructions to look ahead for better insn
18730 rs6000_use_sched_lookahead (void)
18732 switch (rs6000_tune
)
18734 case PROCESSOR_PPC8540
:
18735 case PROCESSOR_PPC8548
:
18738 case PROCESSOR_CELL
:
18739 return (reload_completed
? 8 : 0);
18746 /* We are choosing insn from the ready queue. Return zero if INSN can be
18749 rs6000_use_sched_lookahead_guard (rtx_insn
*insn
, int ready_index
)
18751 if (ready_index
== 0)
18754 if (rs6000_tune
!= PROCESSOR_CELL
)
18757 gcc_assert (insn
!= NULL_RTX
&& INSN_P (insn
));
18759 if (!reload_completed
18760 || is_nonpipeline_insn (insn
)
18761 || is_microcoded_insn (insn
))
18767 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18768 and return true. */
18771 find_mem_ref (rtx pat
, rtx
*mem_ref
)
18776 /* stack_tie does not produce any real memory traffic. */
18777 if (tie_operand (pat
, VOIDmode
))
18786 /* Recursively process the pattern. */
18787 fmt
= GET_RTX_FORMAT (GET_CODE (pat
));
18789 for (i
= GET_RTX_LENGTH (GET_CODE (pat
)) - 1; i
>= 0; i
--)
18793 if (find_mem_ref (XEXP (pat
, i
), mem_ref
))
18796 else if (fmt
[i
] == 'E')
18797 for (j
= XVECLEN (pat
, i
) - 1; j
>= 0; j
--)
18799 if (find_mem_ref (XVECEXP (pat
, i
, j
), mem_ref
))
18807 /* Determine if PAT is a PATTERN of a load insn. */
18810 is_load_insn1 (rtx pat
, rtx
*load_mem
)
18812 if (!pat
|| pat
== NULL_RTX
)
18815 if (GET_CODE (pat
) == SET
)
18817 if (REG_P (SET_DEST (pat
)))
18818 return find_mem_ref (SET_SRC (pat
), load_mem
);
18823 if (GET_CODE (pat
) == PARALLEL
)
18827 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18828 if (is_load_insn1 (XVECEXP (pat
, 0, i
), load_mem
))
18835 /* Determine if INSN loads from memory. */
18838 is_load_insn (rtx insn
, rtx
*load_mem
)
18840 if (!insn
|| !INSN_P (insn
))
18846 return is_load_insn1 (PATTERN (insn
), load_mem
);
18849 /* Determine if PAT is a PATTERN of a store insn. */
18852 is_store_insn1 (rtx pat
, rtx
*str_mem
)
18854 if (!pat
|| pat
== NULL_RTX
)
18857 if (GET_CODE (pat
) == SET
)
18859 if (REG_P (SET_SRC (pat
)) || SUBREG_P (SET_SRC (pat
)))
18860 return find_mem_ref (SET_DEST (pat
), str_mem
);
18865 if (GET_CODE (pat
) == PARALLEL
)
18869 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18870 if (is_store_insn1 (XVECEXP (pat
, 0, i
), str_mem
))
18877 /* Determine if INSN stores to memory. */
18880 is_store_insn (rtx insn
, rtx
*str_mem
)
18882 if (!insn
|| !INSN_P (insn
))
18885 return is_store_insn1 (PATTERN (insn
), str_mem
);
18888 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18891 is_power9_pairable_vec_type (enum attr_type type
)
18895 case TYPE_VECSIMPLE
:
18896 case TYPE_VECCOMPLEX
:
18900 case TYPE_VECFLOAT
:
18902 case TYPE_VECDOUBLE
:
18910 /* Returns whether the dependence between INSN and NEXT is considered
18911 costly by the given target. */
18914 rs6000_is_costly_dependence (dep_t dep
, int cost
, int distance
)
18918 rtx load_mem
, str_mem
;
18920 /* If the flag is not enabled - no dependence is considered costly;
18921 allow all dependent insns in the same group.
18922 This is the most aggressive option. */
18923 if (rs6000_sched_costly_dep
== no_dep_costly
)
18926 /* If the flag is set to 1 - a dependence is always considered costly;
18927 do not allow dependent instructions in the same group.
18928 This is the most conservative option. */
18929 if (rs6000_sched_costly_dep
== all_deps_costly
)
18932 insn
= DEP_PRO (dep
);
18933 next
= DEP_CON (dep
);
18935 if (rs6000_sched_costly_dep
== store_to_load_dep_costly
18936 && is_load_insn (next
, &load_mem
)
18937 && is_store_insn (insn
, &str_mem
))
18938 /* Prevent load after store in the same group. */
18941 if (rs6000_sched_costly_dep
== true_store_to_load_dep_costly
18942 && is_load_insn (next
, &load_mem
)
18943 && is_store_insn (insn
, &str_mem
)
18944 && DEP_TYPE (dep
) == REG_DEP_TRUE
18945 && mem_locations_overlap(str_mem
, load_mem
))
18946 /* Prevent load after store in the same group if it is a true
18950 /* The flag is set to X; dependences with latency >= X are considered costly,
18951 and will not be scheduled in the same group. */
18952 if (rs6000_sched_costly_dep
<= max_dep_latency
18953 && ((cost
- distance
) >= (int)rs6000_sched_costly_dep
))
18959 /* Return the next insn after INSN that is found before TAIL is reached,
18960 skipping any "non-active" insns - insns that will not actually occupy
18961 an issue slot. Return NULL_RTX if such an insn is not found. */
18964 get_next_active_insn (rtx_insn
*insn
, rtx_insn
*tail
)
18966 if (insn
== NULL_RTX
|| insn
== tail
)
18971 insn
= NEXT_INSN (insn
);
18972 if (insn
== NULL_RTX
|| insn
== tail
)
18976 || JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
)
18977 || (NONJUMP_INSN_P (insn
)
18978 && GET_CODE (PATTERN (insn
)) != USE
18979 && GET_CODE (PATTERN (insn
)) != CLOBBER
18980 && INSN_CODE (insn
) != CODE_FOR_stack_tie
))
18986 /* Move instruction at POS to the end of the READY list. */
18989 move_to_end_of_ready (rtx_insn
**ready
, int pos
, int lastpos
)
18995 for (i
= pos
; i
< lastpos
; i
++)
18996 ready
[i
] = ready
[i
+ 1];
18997 ready
[lastpos
] = tmp
;
19000 /* Do Power6 specific sched_reorder2 reordering of ready list. */
19003 power6_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
19005 /* For Power6, we need to handle some special cases to try and keep the
19006 store queue from overflowing and triggering expensive flushes.
19008 This code monitors how load and store instructions are being issued
19009 and skews the ready list one way or the other to increase the likelihood
19010 that a desired instruction is issued at the proper time.
19012 A couple of things are done. First, we maintain a "load_store_pendulum"
19013 to track the current state of load/store issue.
19015 - If the pendulum is at zero, then no loads or stores have been
19016 issued in the current cycle so we do nothing.
19018 - If the pendulum is 1, then a single load has been issued in this
19019 cycle and we attempt to locate another load in the ready list to
19022 - If the pendulum is -2, then two stores have already been
19023 issued in this cycle, so we increase the priority of the first load
19024 in the ready list to increase it's likelihood of being chosen first
19027 - If the pendulum is -1, then a single store has been issued in this
19028 cycle and we attempt to locate another store in the ready list to
19029 issue with it, preferring a store to an adjacent memory location to
19030 facilitate store pairing in the store queue.
19032 - If the pendulum is 2, then two loads have already been
19033 issued in this cycle, so we increase the priority of the first store
19034 in the ready list to increase it's likelihood of being chosen first
19037 - If the pendulum < -2 or > 2, then do nothing.
19039 Note: This code covers the most common scenarios. There exist non
19040 load/store instructions which make use of the LSU and which
19041 would need to be accounted for to strictly model the behavior
19042 of the machine. Those instructions are currently unaccounted
19043 for to help minimize compile time overhead of this code.
19046 rtx load_mem
, str_mem
;
19048 if (is_store_insn (last_scheduled_insn
, &str_mem
))
19049 /* Issuing a store, swing the load_store_pendulum to the left */
19050 load_store_pendulum
--;
19051 else if (is_load_insn (last_scheduled_insn
, &load_mem
))
19052 /* Issuing a load, swing the load_store_pendulum to the right */
19053 load_store_pendulum
++;
19055 return cached_can_issue_more
;
19057 /* If the pendulum is balanced, or there is only one instruction on
19058 the ready list, then all is well, so return. */
19059 if ((load_store_pendulum
== 0) || (lastpos
<= 0))
19060 return cached_can_issue_more
;
19062 if (load_store_pendulum
== 1)
19064 /* A load has been issued in this cycle. Scan the ready list
19065 for another load to issue with it */
19070 if (is_load_insn (ready
[pos
], &load_mem
))
19072 /* Found a load. Move it to the head of the ready list,
19073 and adjust it's priority so that it is more likely to
19075 move_to_end_of_ready (ready
, pos
, lastpos
);
19077 if (!sel_sched_p ()
19078 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19079 INSN_PRIORITY (ready
[lastpos
])++;
19085 else if (load_store_pendulum
== -2)
19087 /* Two stores have been issued in this cycle. Increase the
19088 priority of the first load in the ready list to favor it for
19089 issuing in the next cycle. */
19094 if (is_load_insn (ready
[pos
], &load_mem
)
19096 && INSN_PRIORITY_KNOWN (ready
[pos
]))
19098 INSN_PRIORITY (ready
[pos
])++;
19100 /* Adjust the pendulum to account for the fact that a load
19101 was found and increased in priority. This is to prevent
19102 increasing the priority of multiple loads */
19103 load_store_pendulum
--;
19110 else if (load_store_pendulum
== -1)
19112 /* A store has been issued in this cycle. Scan the ready list for
19113 another store to issue with it, preferring a store to an adjacent
19115 int first_store_pos
= -1;
19121 if (is_store_insn (ready
[pos
], &str_mem
))
19124 /* Maintain the index of the first store found on the
19126 if (first_store_pos
== -1)
19127 first_store_pos
= pos
;
19129 if (is_store_insn (last_scheduled_insn
, &str_mem2
)
19130 && adjacent_mem_locations (str_mem
, str_mem2
))
19132 /* Found an adjacent store. Move it to the head of the
19133 ready list, and adjust it's priority so that it is
19134 more likely to stay there */
19135 move_to_end_of_ready (ready
, pos
, lastpos
);
19137 if (!sel_sched_p ()
19138 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19139 INSN_PRIORITY (ready
[lastpos
])++;
19141 first_store_pos
= -1;
19149 if (first_store_pos
>= 0)
19151 /* An adjacent store wasn't found, but a non-adjacent store was,
19152 so move the non-adjacent store to the front of the ready
19153 list, and adjust its priority so that it is more likely to
19155 move_to_end_of_ready (ready
, first_store_pos
, lastpos
);
19156 if (!sel_sched_p ()
19157 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19158 INSN_PRIORITY (ready
[lastpos
])++;
19161 else if (load_store_pendulum
== 2)
19163 /* Two loads have been issued in this cycle. Increase the priority
19164 of the first store in the ready list to favor it for issuing in
19170 if (is_store_insn (ready
[pos
], &str_mem
)
19172 && INSN_PRIORITY_KNOWN (ready
[pos
]))
19174 INSN_PRIORITY (ready
[pos
])++;
19176 /* Adjust the pendulum to account for the fact that a store
19177 was found and increased in priority. This is to prevent
19178 increasing the priority of multiple stores */
19179 load_store_pendulum
++;
19187 return cached_can_issue_more
;
19190 /* Do Power9 specific sched_reorder2 reordering of ready list. */
19193 power9_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
19196 enum attr_type type
, type2
;
19198 type
= get_attr_type (last_scheduled_insn
);
19200 /* Try to issue fixed point divides back-to-back in pairs so they will be
19201 routed to separate execution units and execute in parallel. */
19202 if (type
== TYPE_DIV
&& divide_cnt
== 0)
19204 /* First divide has been scheduled. */
19207 /* Scan the ready list looking for another divide, if found move it
19208 to the end of the list so it is chosen next. */
19212 if (recog_memoized (ready
[pos
]) >= 0
19213 && get_attr_type (ready
[pos
]) == TYPE_DIV
)
19215 move_to_end_of_ready (ready
, pos
, lastpos
);
19223 /* Last insn was the 2nd divide or not a divide, reset the counter. */
19226 /* The best dispatch throughput for vector and vector load insns can be
19227 achieved by interleaving a vector and vector load such that they'll
19228 dispatch to the same superslice. If this pairing cannot be achieved
19229 then it is best to pair vector insns together and vector load insns
19232 To aid in this pairing, vec_pairing maintains the current state with
19233 the following values:
19235 0 : Initial state, no vecload/vector pairing has been started.
19237 1 : A vecload or vector insn has been issued and a candidate for
19238 pairing has been found and moved to the end of the ready
19240 if (type
== TYPE_VECLOAD
)
19242 /* Issued a vecload. */
19243 if (vec_pairing
== 0)
19245 int vecload_pos
= -1;
19246 /* We issued a single vecload, look for a vector insn to pair it
19247 with. If one isn't found, try to pair another vecload. */
19251 if (recog_memoized (ready
[pos
]) >= 0)
19253 type2
= get_attr_type (ready
[pos
]);
19254 if (is_power9_pairable_vec_type (type2
))
19256 /* Found a vector insn to pair with, move it to the
19257 end of the ready list so it is scheduled next. */
19258 move_to_end_of_ready (ready
, pos
, lastpos
);
19260 return cached_can_issue_more
;
19262 else if (type2
== TYPE_VECLOAD
&& vecload_pos
== -1)
19263 /* Remember position of first vecload seen. */
19268 if (vecload_pos
>= 0)
19270 /* Didn't find a vector to pair with but did find a vecload,
19271 move it to the end of the ready list. */
19272 move_to_end_of_ready (ready
, vecload_pos
, lastpos
);
19274 return cached_can_issue_more
;
19278 else if (is_power9_pairable_vec_type (type
))
19280 /* Issued a vector operation. */
19281 if (vec_pairing
== 0)
19284 /* We issued a single vector insn, look for a vecload to pair it
19285 with. If one isn't found, try to pair another vector. */
19289 if (recog_memoized (ready
[pos
]) >= 0)
19291 type2
= get_attr_type (ready
[pos
]);
19292 if (type2
== TYPE_VECLOAD
)
19294 /* Found a vecload insn to pair with, move it to the
19295 end of the ready list so it is scheduled next. */
19296 move_to_end_of_ready (ready
, pos
, lastpos
);
19298 return cached_can_issue_more
;
19300 else if (is_power9_pairable_vec_type (type2
)
19302 /* Remember position of first vector insn seen. */
19309 /* Didn't find a vecload to pair with but did find a vector
19310 insn, move it to the end of the ready list. */
19311 move_to_end_of_ready (ready
, vec_pos
, lastpos
);
19313 return cached_can_issue_more
;
19318 /* We've either finished a vec/vecload pair, couldn't find an insn to
19319 continue the current pair, or the last insn had nothing to do with
19320 with pairing. In any case, reset the state. */
19324 return cached_can_issue_more
;
19327 /* Determine if INSN is a store to memory that can be fused with a similar
19331 is_fusable_store (rtx_insn
*insn
, rtx
*str_mem
)
19333 /* Insn must be a non-prefixed base+disp form store. */
19334 if (is_store_insn (insn
, str_mem
)
19335 && get_attr_prefixed (insn
) == PREFIXED_NO
19336 && get_attr_update (insn
) == UPDATE_NO
19337 && get_attr_indexed (insn
) == INDEXED_NO
)
19339 /* Further restrictions by mode and size. */
19340 if (!MEM_SIZE_KNOWN_P (*str_mem
))
19343 machine_mode mode
= GET_MODE (*str_mem
);
19344 HOST_WIDE_INT size
= MEM_SIZE (*str_mem
);
19346 if (INTEGRAL_MODE_P (mode
))
19347 /* Must be word or dword size. */
19348 return (size
== 4 || size
== 8);
19349 else if (FLOAT_MODE_P (mode
))
19350 /* Must be dword size. */
19351 return (size
== 8);
19357 /* Do Power10 specific reordering of the ready list. */
19360 power10_sched_reorder (rtx_insn
**ready
, int lastpos
)
19364 /* Do store fusion during sched2 only. */
19365 if (!reload_completed
)
19366 return cached_can_issue_more
;
19368 /* If the prior insn finished off a store fusion pair then simply
19369 reset the counter and return, nothing more to do. */
19370 if (load_store_pendulum
!= 0)
19372 load_store_pendulum
= 0;
19373 return cached_can_issue_more
;
19376 /* Try to pair certain store insns to adjacent memory locations
19377 so that the hardware will fuse them to a single operation. */
19378 if (TARGET_P10_FUSION
&& is_fusable_store (last_scheduled_insn
, &mem1
))
19381 /* A fusable store was just scheduled. Scan the ready list for another
19382 store that it can fuse with. */
19387 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19388 must be ascending only. */
19389 if (is_fusable_store (ready
[pos
], &mem2
)
19390 && ((INTEGRAL_MODE_P (GET_MODE (mem1
))
19391 && adjacent_mem_locations (mem1
, mem2
))
19392 || (FLOAT_MODE_P (GET_MODE (mem1
))
19393 && (adjacent_mem_locations (mem1
, mem2
) == mem1
))))
19395 /* Found a fusable store. Move it to the end of the ready list
19396 so it is scheduled next. */
19397 move_to_end_of_ready (ready
, pos
, lastpos
);
19399 load_store_pendulum
= -1;
19406 return cached_can_issue_more
;
19409 /* We are about to begin issuing insns for this clock cycle. */
19412 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
, int sched_verbose
,
19413 rtx_insn
**ready ATTRIBUTE_UNUSED
,
19414 int *pn_ready ATTRIBUTE_UNUSED
,
19415 int clock_var ATTRIBUTE_UNUSED
)
19417 int n_ready
= *pn_ready
;
19420 fprintf (dump
, "// rs6000_sched_reorder :\n");
19422 /* Reorder the ready list, if the second to last ready insn
19423 is a nonepipeline insn. */
19424 if (rs6000_tune
== PROCESSOR_CELL
&& n_ready
> 1)
19426 if (is_nonpipeline_insn (ready
[n_ready
- 1])
19427 && (recog_memoized (ready
[n_ready
- 2]) > 0))
19428 /* Simply swap first two insns. */
19429 std::swap (ready
[n_ready
- 1], ready
[n_ready
- 2]);
19432 if (rs6000_tune
== PROCESSOR_POWER6
)
19433 load_store_pendulum
= 0;
19435 /* Do Power10 dependent reordering. For now, assume "future" has the same
19436 dependent reordering as power10. */
19437 if ((rs6000_tune
== PROCESSOR_POWER10
19438 || rs6000_tune
== PROCESSOR_FUTURE
) && last_scheduled_insn
)
19439 power10_sched_reorder (ready
, n_ready
- 1);
19441 return rs6000_issue_rate ();
19444 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19447 rs6000_sched_reorder2 (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
19448 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
)
19451 fprintf (dump
, "// rs6000_sched_reorder2 :\n");
19453 /* Do Power6 dependent reordering if necessary. */
19454 if (rs6000_tune
== PROCESSOR_POWER6
&& last_scheduled_insn
)
19455 return power6_sched_reorder2 (ready
, *pn_ready
- 1);
19457 /* Do Power9 dependent reordering if necessary. */
19458 if (rs6000_tune
== PROCESSOR_POWER9
&& last_scheduled_insn
19459 && recog_memoized (last_scheduled_insn
) >= 0)
19460 return power9_sched_reorder2 (ready
, *pn_ready
- 1);
19462 /* Do Power10 dependent reordering. For now, assume "future" has the same
19463 dependent reordering as power10. */
19464 if ((rs6000_tune
== PROCESSOR_POWER10
19465 || rs6000_tune
== PROCESSOR_FUTURE
) && last_scheduled_insn
)
19466 return power10_sched_reorder (ready
, *pn_ready
- 1);
19468 return cached_can_issue_more
;
19471 /* Return whether the presence of INSN causes a dispatch group termination
19472 of group WHICH_GROUP.
19474 If WHICH_GROUP == current_group, this function will return true if INSN
19475 causes the termination of the current group (i.e, the dispatch group to
19476 which INSN belongs). This means that INSN will be the last insn in the
19477 group it belongs to.
19479 If WHICH_GROUP == previous_group, this function will return true if INSN
19480 causes the termination of the previous group (i.e, the dispatch group that
19481 precedes the group to which INSN belongs). This means that INSN will be
19482 the first insn in the group it belongs to). */
19485 insn_terminates_group_p (rtx_insn
*insn
, enum group_termination which_group
)
19492 first
= insn_must_be_first_in_group (insn
);
19493 last
= insn_must_be_last_in_group (insn
);
19498 if (which_group
== current_group
)
19500 else if (which_group
== previous_group
)
19508 insn_must_be_first_in_group (rtx_insn
*insn
)
19510 enum attr_type type
;
19514 || DEBUG_INSN_P (insn
)
19515 || GET_CODE (PATTERN (insn
)) == USE
19516 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19519 switch (rs6000_tune
)
19521 case PROCESSOR_POWER5
:
19522 if (is_cracked_insn (insn
))
19525 case PROCESSOR_POWER4
:
19526 if (is_microcoded_insn (insn
))
19529 if (!rs6000_sched_groups
)
19532 type
= get_attr_type (insn
);
19539 case TYPE_CR_LOGICAL
:
19552 case PROCESSOR_POWER6
:
19553 type
= get_attr_type (insn
);
19562 case TYPE_FPCOMPARE
:
19573 if (get_attr_dot (insn
) == DOT_NO
19574 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19579 if (get_attr_size (insn
) == SIZE_32
)
19587 if (get_attr_update (insn
) == UPDATE_YES
)
19595 case PROCESSOR_POWER7
:
19596 type
= get_attr_type (insn
);
19600 case TYPE_CR_LOGICAL
:
19614 if (get_attr_dot (insn
) == DOT_YES
)
19619 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19620 || get_attr_update (insn
) == UPDATE_YES
)
19627 if (get_attr_update (insn
) == UPDATE_YES
)
19635 case PROCESSOR_POWER8
:
19636 type
= get_attr_type (insn
);
19640 case TYPE_CR_LOGICAL
:
19648 case TYPE_VECSTORE
:
19655 if (get_attr_dot (insn
) == DOT_YES
)
19660 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19661 || get_attr_update (insn
) == UPDATE_YES
)
19666 if (get_attr_update (insn
) == UPDATE_YES
19667 && get_attr_indexed (insn
) == INDEXED_YES
)
19683 insn_must_be_last_in_group (rtx_insn
*insn
)
19685 enum attr_type type
;
19689 || DEBUG_INSN_P (insn
)
19690 || GET_CODE (PATTERN (insn
)) == USE
19691 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19694 switch (rs6000_tune
) {
19695 case PROCESSOR_POWER4
:
19696 case PROCESSOR_POWER5
:
19697 if (is_microcoded_insn (insn
))
19700 if (is_branch_slot_insn (insn
))
19704 case PROCESSOR_POWER6
:
19705 type
= get_attr_type (insn
);
19713 case TYPE_FPCOMPARE
:
19724 if (get_attr_dot (insn
) == DOT_NO
19725 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19730 if (get_attr_size (insn
) == SIZE_32
)
19738 case PROCESSOR_POWER7
:
19739 type
= get_attr_type (insn
);
19749 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19750 && get_attr_update (insn
) == UPDATE_YES
)
19755 if (get_attr_update (insn
) == UPDATE_YES
19756 && get_attr_indexed (insn
) == INDEXED_YES
)
19764 case PROCESSOR_POWER8
:
19765 type
= get_attr_type (insn
);
19777 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19778 && get_attr_update (insn
) == UPDATE_YES
)
19783 if (get_attr_update (insn
) == UPDATE_YES
19784 && get_attr_indexed (insn
) == INDEXED_YES
)
19799 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19800 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19803 is_costly_group (rtx
*group_insns
, rtx next_insn
)
19806 int issue_rate
= rs6000_issue_rate ();
19808 for (i
= 0; i
< issue_rate
; i
++)
19810 sd_iterator_def sd_it
;
19812 rtx insn
= group_insns
[i
];
19817 FOR_EACH_DEP (insn
, SD_LIST_RES_FORW
, sd_it
, dep
)
19819 rtx next
= DEP_CON (dep
);
19821 if (next
== next_insn
19822 && rs6000_is_costly_dependence (dep
, dep_cost (dep
), 0))
19830 /* Utility of the function redefine_groups.
19831 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19832 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19833 to keep it "far" (in a separate group) from GROUP_INSNS, following
19834 one of the following schemes, depending on the value of the flag
19835 -minsert_sched_nops = X:
19836 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19837 in order to force NEXT_INSN into a separate group.
19838 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19839 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19840 insertion (has a group just ended, how many vacant issue slots remain in the
19841 last group, and how many dispatch groups were encountered so far). */
19844 force_new_group (int sched_verbose
, FILE *dump
, rtx
*group_insns
,
19845 rtx_insn
*next_insn
, bool *group_end
, int can_issue_more
,
19850 int issue_rate
= rs6000_issue_rate ();
19851 bool end
= *group_end
;
19854 if (next_insn
== NULL_RTX
|| DEBUG_INSN_P (next_insn
))
19855 return can_issue_more
;
19857 if (rs6000_sched_insert_nops
> sched_finish_regroup_exact
)
19858 return can_issue_more
;
19860 force
= is_costly_group (group_insns
, next_insn
);
19862 return can_issue_more
;
19864 if (sched_verbose
> 6)
19865 fprintf (dump
,"force: group count = %d, can_issue_more = %d\n",
19866 *group_count
,can_issue_more
);
19868 if (rs6000_sched_insert_nops
== sched_finish_regroup_exact
)
19871 can_issue_more
= 0;
19873 /* Since only a branch can be issued in the last issue_slot, it is
19874 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19875 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19876 in this case the last nop will start a new group and the branch
19877 will be forced to the new group. */
19878 if (can_issue_more
&& !is_branch_slot_insn (next_insn
))
19881 /* Do we have a special group ending nop? */
19882 if (rs6000_tune
== PROCESSOR_POWER6
|| rs6000_tune
== PROCESSOR_POWER7
19883 || rs6000_tune
== PROCESSOR_POWER8
)
19885 nop
= gen_group_ending_nop ();
19886 emit_insn_before (nop
, next_insn
);
19887 can_issue_more
= 0;
19890 while (can_issue_more
> 0)
19893 emit_insn_before (nop
, next_insn
);
19901 if (rs6000_sched_insert_nops
< sched_finish_regroup_exact
)
19903 int n_nops
= rs6000_sched_insert_nops
;
19905 /* Nops can't be issued from the branch slot, so the effective
19906 issue_rate for nops is 'issue_rate - 1'. */
19907 if (can_issue_more
== 0)
19908 can_issue_more
= issue_rate
;
19910 if (can_issue_more
== 0)
19912 can_issue_more
= issue_rate
- 1;
19915 for (i
= 0; i
< issue_rate
; i
++)
19917 group_insns
[i
] = 0;
19924 emit_insn_before (nop
, next_insn
);
19925 if (can_issue_more
== issue_rate
- 1) /* new group begins */
19928 if (can_issue_more
== 0)
19930 can_issue_more
= issue_rate
- 1;
19933 for (i
= 0; i
< issue_rate
; i
++)
19935 group_insns
[i
] = 0;
19941 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19944 /* Is next_insn going to start a new group? */
19947 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
19948 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
19949 || (can_issue_more
< issue_rate
&&
19950 insn_terminates_group_p (next_insn
, previous_group
)));
19951 if (*group_end
&& end
)
19954 if (sched_verbose
> 6)
19955 fprintf (dump
, "done force: group count = %d, can_issue_more = %d\n",
19956 *group_count
, can_issue_more
);
19957 return can_issue_more
;
19960 return can_issue_more
;
19963 /* This function tries to synch the dispatch groups that the compiler "sees"
19964 with the dispatch groups that the processor dispatcher is expected to
19965 form in practice. It tries to achieve this synchronization by forcing the
19966 estimated processor grouping on the compiler (as opposed to the function
19967 'pad_goups' which tries to force the scheduler's grouping on the processor).
19969 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19970 examines the (estimated) dispatch groups that will be formed by the processor
19971 dispatcher. It marks these group boundaries to reflect the estimated
19972 processor grouping, overriding the grouping that the scheduler had marked.
19973 Depending on the value of the flag '-minsert-sched-nops' this function can
19974 force certain insns into separate groups or force a certain distance between
19975 them by inserting nops, for example, if there exists a "costly dependence"
19978 The function estimates the group boundaries that the processor will form as
19979 follows: It keeps track of how many vacant issue slots are available after
19980 each insn. A subsequent insn will start a new group if one of the following
19982 - no more vacant issue slots remain in the current dispatch group.
19983 - only the last issue slot, which is the branch slot, is vacant, but the next
19984 insn is not a branch.
19985 - only the last 2 or less issue slots, including the branch slot, are vacant,
19986 which means that a cracked insn (which occupies two issue slots) can't be
19987 issued in this group.
19988 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19989 start a new group. */
19992 redefine_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
19995 rtx_insn
*insn
, *next_insn
;
19997 int can_issue_more
;
20000 int group_count
= 0;
20004 issue_rate
= rs6000_issue_rate ();
20005 group_insns
= XALLOCAVEC (rtx
, issue_rate
);
20006 for (i
= 0; i
< issue_rate
; i
++)
20008 group_insns
[i
] = 0;
20010 can_issue_more
= issue_rate
;
20012 insn
= get_next_active_insn (prev_head_insn
, tail
);
20015 while (insn
!= NULL_RTX
)
20017 slot
= (issue_rate
- can_issue_more
);
20018 group_insns
[slot
] = insn
;
20020 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
20021 if (insn_terminates_group_p (insn
, current_group
))
20022 can_issue_more
= 0;
20024 next_insn
= get_next_active_insn (insn
, tail
);
20025 if (next_insn
== NULL_RTX
)
20026 return group_count
+ 1;
20028 /* Is next_insn going to start a new group? */
20030 = (can_issue_more
== 0
20031 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
20032 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
20033 || (can_issue_more
< issue_rate
&&
20034 insn_terminates_group_p (next_insn
, previous_group
)));
20036 can_issue_more
= force_new_group (sched_verbose
, dump
, group_insns
,
20037 next_insn
, &group_end
, can_issue_more
,
20043 can_issue_more
= 0;
20044 for (i
= 0; i
< issue_rate
; i
++)
20046 group_insns
[i
] = 0;
20050 if (GET_MODE (next_insn
) == TImode
&& can_issue_more
)
20051 PUT_MODE (next_insn
, VOIDmode
);
20052 else if (!can_issue_more
&& GET_MODE (next_insn
) != TImode
)
20053 PUT_MODE (next_insn
, TImode
);
20056 if (can_issue_more
== 0)
20057 can_issue_more
= issue_rate
;
20060 return group_count
;
20063 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
20064 dispatch group boundaries that the scheduler had marked. Pad with nops
20065 any dispatch groups which have vacant issue slots, in order to force the
20066 scheduler's grouping on the processor dispatcher. The function
20067 returns the number of dispatch groups found. */
20070 pad_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
20073 rtx_insn
*insn
, *next_insn
;
20076 int can_issue_more
;
20078 int group_count
= 0;
20080 /* Initialize issue_rate. */
20081 issue_rate
= rs6000_issue_rate ();
20082 can_issue_more
= issue_rate
;
20084 insn
= get_next_active_insn (prev_head_insn
, tail
);
20085 next_insn
= get_next_active_insn (insn
, tail
);
20087 while (insn
!= NULL_RTX
)
20090 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
20092 group_end
= (next_insn
== NULL_RTX
|| GET_MODE (next_insn
) == TImode
);
20094 if (next_insn
== NULL_RTX
)
20099 /* If the scheduler had marked group termination at this location
20100 (between insn and next_insn), and neither insn nor next_insn will
20101 force group termination, pad the group with nops to force group
20104 && (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
20105 && !insn_terminates_group_p (insn
, current_group
)
20106 && !insn_terminates_group_p (next_insn
, previous_group
))
20108 if (!is_branch_slot_insn (next_insn
))
20111 while (can_issue_more
)
20114 emit_insn_before (nop
, next_insn
);
20119 can_issue_more
= issue_rate
;
20124 next_insn
= get_next_active_insn (insn
, tail
);
20127 return group_count
;
20130 /* We're beginning a new block. Initialize data structures as necessary. */
20133 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
20134 int sched_verbose ATTRIBUTE_UNUSED
,
20135 int max_ready ATTRIBUTE_UNUSED
)
20137 last_scheduled_insn
= NULL
;
20138 load_store_pendulum
= 0;
20143 /* The following function is called at the end of scheduling BB.
20144 After reload, it inserts nops at insn group bundling. */
20147 rs6000_sched_finish (FILE *dump
, int sched_verbose
)
20152 fprintf (dump
, "=== Finishing schedule.\n");
20154 if (reload_completed
&& rs6000_sched_groups
)
20156 /* Do not run sched_finish hook when selective scheduling enabled. */
20157 if (sel_sched_p ())
20160 if (rs6000_sched_insert_nops
== sched_finish_none
)
20163 if (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
20164 n_groups
= pad_groups (dump
, sched_verbose
,
20165 current_sched_info
->prev_head
,
20166 current_sched_info
->next_tail
);
20168 n_groups
= redefine_groups (dump
, sched_verbose
,
20169 current_sched_info
->prev_head
,
20170 current_sched_info
->next_tail
);
20172 if (sched_verbose
>= 6)
20174 fprintf (dump
, "ngroups = %d\n", n_groups
);
20175 print_rtl (dump
, current_sched_info
->prev_head
);
20176 fprintf (dump
, "Done finish_sched\n");
20181 struct rs6000_sched_context
20183 short cached_can_issue_more
;
20184 rtx_insn
*last_scheduled_insn
;
20185 int load_store_pendulum
;
20190 typedef struct rs6000_sched_context rs6000_sched_context_def
;
20191 typedef rs6000_sched_context_def
*rs6000_sched_context_t
;
20193 /* Allocate store for new scheduling context. */
20195 rs6000_alloc_sched_context (void)
20197 return xmalloc (sizeof (rs6000_sched_context_def
));
20200 /* If CLEAN_P is true then initializes _SC with clean data,
20201 and from the global context otherwise. */
20203 rs6000_init_sched_context (void *_sc
, bool clean_p
)
20205 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
20209 sc
->cached_can_issue_more
= 0;
20210 sc
->last_scheduled_insn
= NULL
;
20211 sc
->load_store_pendulum
= 0;
20212 sc
->divide_cnt
= 0;
20213 sc
->vec_pairing
= 0;
20217 sc
->cached_can_issue_more
= cached_can_issue_more
;
20218 sc
->last_scheduled_insn
= last_scheduled_insn
;
20219 sc
->load_store_pendulum
= load_store_pendulum
;
20220 sc
->divide_cnt
= divide_cnt
;
20221 sc
->vec_pairing
= vec_pairing
;
20225 /* Sets the global scheduling context to the one pointed to by _SC. */
20227 rs6000_set_sched_context (void *_sc
)
20229 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
20231 gcc_assert (sc
!= NULL
);
20233 cached_can_issue_more
= sc
->cached_can_issue_more
;
20234 last_scheduled_insn
= sc
->last_scheduled_insn
;
20235 load_store_pendulum
= sc
->load_store_pendulum
;
20236 divide_cnt
= sc
->divide_cnt
;
20237 vec_pairing
= sc
->vec_pairing
;
20242 rs6000_free_sched_context (void *_sc
)
20244 gcc_assert (_sc
!= NULL
);
20250 rs6000_sched_can_speculate_insn (rtx_insn
*insn
)
20252 switch (get_attr_type (insn
))
20267 /* Length in units of the trampoline for entering a nested function. */
20270 rs6000_trampoline_size (void)
20274 switch (DEFAULT_ABI
)
20277 gcc_unreachable ();
20280 ret
= (TARGET_32BIT
) ? 12 : 24;
20284 gcc_assert (!TARGET_32BIT
);
20290 ret
= (TARGET_32BIT
) ? 40 : 48;
20297 /* Emit RTL insns to initialize the variable parts of a trampoline.
20298 FNADDR is an RTX for the address of the function's pure code.
20299 CXT is an RTX for the static chain value for the function. */
20302 rs6000_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
20304 int regsize
= (TARGET_32BIT
) ? 4 : 8;
20305 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
20306 rtx ctx_reg
= force_reg (Pmode
, cxt
);
20307 rtx addr
= force_reg (Pmode
, XEXP (m_tramp
, 0));
20309 switch (DEFAULT_ABI
)
20312 gcc_unreachable ();
20314 /* Under AIX, just build the 3 word function descriptor */
20317 rtx fnmem
, fn_reg
, toc_reg
;
20319 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS
)
20320 error ("you cannot take the address of a nested function if you use "
20321 "the %qs option", "-mno-pointers-to-nested-functions");
20323 fnmem
= gen_const_mem (Pmode
, force_reg (Pmode
, fnaddr
));
20324 fn_reg
= gen_reg_rtx (Pmode
);
20325 toc_reg
= gen_reg_rtx (Pmode
);
20327 /* Macro to shorten the code expansions below. */
20328 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20330 m_tramp
= replace_equiv_address (m_tramp
, addr
);
20332 emit_move_insn (fn_reg
, MEM_PLUS (fnmem
, 0));
20333 emit_move_insn (toc_reg
, MEM_PLUS (fnmem
, regsize
));
20334 emit_move_insn (MEM_PLUS (m_tramp
, 0), fn_reg
);
20335 emit_move_insn (MEM_PLUS (m_tramp
, regsize
), toc_reg
);
20336 emit_move_insn (MEM_PLUS (m_tramp
, 2*regsize
), ctx_reg
);
20342 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20346 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__trampoline_setup"),
20347 LCT_NORMAL
, VOIDmode
,
20349 GEN_INT (rs6000_trampoline_size ()), SImode
,
20357 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20358 identifier as an argument, so the front end shouldn't look it up. */
20361 rs6000_attribute_takes_identifier_p (const_tree attr_id
)
20363 return is_attribute_p ("altivec", attr_id
);
20366 /* Handle the "altivec" attribute. The attribute may have
20367 arguments as follows:
20369 __attribute__((altivec(vector__)))
20370 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20371 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20373 and may appear more than once (e.g., 'vector bool char') in a
20374 given declaration. */
20377 rs6000_handle_altivec_attribute (tree
*node
,
20378 tree name ATTRIBUTE_UNUSED
,
20380 int flags ATTRIBUTE_UNUSED
,
20381 bool *no_add_attrs
)
20383 tree type
= *node
, result
= NULL_TREE
;
20387 = ((args
&& TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
)
20388 && TREE_CODE (TREE_VALUE (args
)) == IDENTIFIER_NODE
)
20389 ? *IDENTIFIER_POINTER (TREE_VALUE (args
))
20392 while (POINTER_TYPE_P (type
)
20393 || TREE_CODE (type
) == FUNCTION_TYPE
20394 || TREE_CODE (type
) == METHOD_TYPE
20395 || TREE_CODE (type
) == ARRAY_TYPE
)
20396 type
= TREE_TYPE (type
);
20398 mode
= TYPE_MODE (type
);
20400 /* Check for invalid AltiVec type qualifiers. */
20401 if (type
== long_double_type_node
)
20402 error ("use of %<long double%> in AltiVec types is invalid");
20403 else if (type
== boolean_type_node
)
20404 error ("use of boolean types in AltiVec types is invalid");
20405 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
20406 error ("use of %<complex%> in AltiVec types is invalid");
20407 else if (DECIMAL_FLOAT_MODE_P (mode
))
20408 error ("use of decimal floating-point types in AltiVec types is invalid");
20409 else if (!TARGET_VSX
)
20411 if (type
== long_unsigned_type_node
|| type
== long_integer_type_node
)
20414 error ("use of %<long%> in AltiVec types is invalid for "
20415 "64-bit code without %qs", "-mvsx");
20416 else if (rs6000_warn_altivec_long
)
20417 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20420 else if (type
== long_long_unsigned_type_node
20421 || type
== long_long_integer_type_node
)
20422 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20424 else if (type
== double_type_node
)
20425 error ("use of %<double%> in AltiVec types is invalid without %qs",
20429 switch (altivec_type
)
20432 unsigned_p
= TYPE_UNSIGNED (type
);
20436 result
= (unsigned_p
? unsigned_V1TI_type_node
: V1TI_type_node
);
20439 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
20442 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
20445 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
20448 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
20450 case E_SFmode
: result
= V4SF_type_node
; break;
20451 case E_DFmode
: result
= V2DF_type_node
; break;
20452 /* If the user says 'vector int bool', we may be handed the 'bool'
20453 attribute _before_ the 'vector' attribute, and so select the
20454 proper type in the 'b' case below. */
20455 case E_V4SImode
: case E_V8HImode
: case E_V16QImode
: case E_V4SFmode
:
20456 case E_V2DImode
: case E_V2DFmode
:
20464 case E_TImode
: case E_V1TImode
: result
= bool_V1TI_type_node
; break;
20465 case E_DImode
: case E_V2DImode
: result
= bool_V2DI_type_node
; break;
20466 case E_SImode
: case E_V4SImode
: result
= bool_V4SI_type_node
; break;
20467 case E_HImode
: case E_V8HImode
: result
= bool_V8HI_type_node
; break;
20468 case E_QImode
: case E_V16QImode
: result
= bool_V16QI_type_node
;
20475 case E_V8HImode
: result
= pixel_V8HI_type_node
;
20481 /* Propagate qualifiers attached to the element type
20482 onto the vector type. */
20483 if (result
&& result
!= type
&& TYPE_QUALS (type
))
20484 result
= build_qualified_type (result
, TYPE_QUALS (type
));
20486 *no_add_attrs
= true; /* No need to hang on to the attribute. */
20489 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
20494 /* AltiVec defines five built-in scalar types that serve as vector
20495 elements; we must teach the compiler how to mangle them. The 128-bit
20496 floating point mangling is target-specific as well. MMA defines
20497 two built-in types to be used as opaque vector types. */
20499 static const char *
20500 rs6000_mangle_type (const_tree type
)
20502 type
= TYPE_MAIN_VARIANT (type
);
20504 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
20505 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
20506 && TREE_CODE (type
) != OPAQUE_TYPE
)
20509 if (type
== bool_char_type_node
) return "U6__boolc";
20510 if (type
== bool_short_type_node
) return "U6__bools";
20511 if (type
== pixel_type_node
) return "u7__pixel";
20512 if (type
== bool_int_type_node
) return "U6__booli";
20513 if (type
== bool_long_long_type_node
) return "U6__boolx";
20515 if (type
== float128_type_node
|| type
== float64x_type_node
)
20518 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IBM_P (TYPE_MODE (type
)))
20520 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IEEE_P (TYPE_MODE (type
)))
20521 return "u9__ieee128";
20523 if (type
== vector_pair_type_node
)
20524 return "u13__vector_pair";
20525 if (type
== vector_quad_type_node
)
20526 return "u13__vector_quad";
20527 if (type
== dmr_type_node
)
20530 /* For all other types, use the default mangling. */
20534 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20535 struct attribute_spec.handler. */
20538 rs6000_handle_longcall_attribute (tree
*node
, tree name
,
20539 tree args ATTRIBUTE_UNUSED
,
20540 int flags ATTRIBUTE_UNUSED
,
20541 bool *no_add_attrs
)
20543 if (TREE_CODE (*node
) != FUNCTION_TYPE
20544 && TREE_CODE (*node
) != FIELD_DECL
20545 && TREE_CODE (*node
) != TYPE_DECL
)
20547 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
20549 *no_add_attrs
= true;
20555 /* Set longcall attributes on all functions declared when
20556 rs6000_default_long_calls is true. */
20558 rs6000_set_default_type_attributes (tree type
)
20560 if (rs6000_default_long_calls
20561 && (TREE_CODE (type
) == FUNCTION_TYPE
20562 || TREE_CODE (type
) == METHOD_TYPE
))
20563 TYPE_ATTRIBUTES (type
) = tree_cons (get_identifier ("longcall"),
20565 TYPE_ATTRIBUTES (type
));
20568 darwin_set_default_type_attributes (type
);
20572 /* Return a reference suitable for calling a function with the
20573 longcall attribute. */
20576 rs6000_longcall_ref (rtx call_ref
, rtx arg
)
20578 /* System V adds '.' to the internal name, so skip them. */
20579 const char *call_name
= XSTR (call_ref
, 0);
20580 if (*call_name
== '.')
20582 while (*call_name
== '.')
20585 tree node
= get_identifier (call_name
);
20586 call_ref
= gen_rtx_SYMBOL_REF (VOIDmode
, IDENTIFIER_POINTER (node
));
20591 rtx base
= const0_rtx
;
20593 if (rs6000_pcrel_p ())
20595 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20596 rtx u
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20597 gen_rtvec (3, base
, call_ref
, arg
),
20598 UNSPECV_PLT_PCREL
);
20599 emit_insn (gen_rtx_SET (reg
, u
));
20603 if (DEFAULT_ABI
== ABI_ELFv2
)
20604 base
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
20608 base
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
20611 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20612 may be used by a function global entry point. For SysV4, r11
20613 is used by __glink_PLTresolve lazy resolver entry. */
20614 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20615 rtx hi
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (3, base
, call_ref
, arg
),
20617 rtx lo
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20618 gen_rtvec (3, reg
, call_ref
, arg
),
20620 emit_insn (gen_rtx_SET (reg
, hi
));
20621 emit_insn (gen_rtx_SET (reg
, lo
));
20625 return force_reg (Pmode
, call_ref
);
20628 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20629 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20632 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20633 struct attribute_spec.handler. */
20635 rs6000_handle_struct_attribute (tree
*node
, tree name
,
20636 tree args ATTRIBUTE_UNUSED
,
20637 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
20640 if (DECL_P (*node
))
20642 if (TREE_CODE (*node
) == TYPE_DECL
)
20643 type
= &TREE_TYPE (*node
);
20648 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
20649 || TREE_CODE (*type
) == UNION_TYPE
)))
20651 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
20652 *no_add_attrs
= true;
20655 else if ((is_attribute_p ("ms_struct", name
)
20656 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
20657 || ((is_attribute_p ("gcc_struct", name
)
20658 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
20660 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
20662 *no_add_attrs
= true;
20669 rs6000_ms_bitfield_layout_p (const_tree record_type
)
20671 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
20672 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
20673 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
20676 #ifdef USING_ELFOS_H
20678 /* A get_unnamed_section callback, used for switching to toc_section. */
20681 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
20683 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20684 && TARGET_MINIMAL_TOC
)
20686 if (!toc_initialized
)
20688 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20689 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20690 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LCTOC", 0);
20691 fprintf (asm_out_file
, "\t.tc ");
20692 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1[TC],");
20693 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20694 fprintf (asm_out_file
, "\n");
20696 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20697 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20698 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20699 fprintf (asm_out_file
, " = .+32768\n");
20700 toc_initialized
= 1;
20703 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20705 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20707 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20708 if (!toc_initialized
)
20710 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20711 toc_initialized
= 1;
20716 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20717 if (!toc_initialized
)
20719 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20720 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20721 fprintf (asm_out_file
, " = .+32768\n");
20722 toc_initialized
= 1;
20727 /* Implement TARGET_ASM_INIT_SECTIONS. */
20730 rs6000_elf_asm_init_sections (void)
20733 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op
, NULL
);
20736 = get_unnamed_section (SECTION_WRITE
, output_section_asm_op
,
20737 SDATA2_SECTION_ASM_OP
);
20740 /* Implement TARGET_SELECT_RTX_SECTION. */
20743 rs6000_elf_select_rtx_section (machine_mode mode
, rtx x
,
20744 unsigned HOST_WIDE_INT align
)
20746 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
20747 return toc_section
;
20749 return default_elf_select_rtx_section (mode
, x
, align
);
20752 /* For a SYMBOL_REF, set generic flags and then perform some
20753 target-specific processing.
20755 When the AIX ABI is requested on a non-AIX system, replace the
20756 function name with the real name (with a leading .) rather than the
20757 function descriptor name. This saves a lot of overriding code to
20758 read the prefixes. */
20760 static void rs6000_elf_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
20762 rs6000_elf_encode_section_info (tree decl
, rtx rtl
, int first
)
20764 default_encode_section_info (decl
, rtl
, first
);
20767 && TREE_CODE (decl
) == FUNCTION_DECL
20769 && DEFAULT_ABI
== ABI_AIX
)
20771 rtx sym_ref
= XEXP (rtl
, 0);
20772 size_t len
= strlen (XSTR (sym_ref
, 0));
20773 char *str
= XALLOCAVEC (char, len
+ 2);
20775 memcpy (str
+ 1, XSTR (sym_ref
, 0), len
+ 1);
20776 XSTR (sym_ref
, 0) = ggc_alloc_string (str
, len
+ 1);
20781 compare_section_name (const char *section
, const char *templ
)
20785 len
= strlen (templ
);
20786 return (strncmp (section
, templ
, len
) == 0
20787 && (section
[len
] == 0 || section
[len
] == '.'));
20791 rs6000_elf_in_small_data_p (const_tree decl
)
20793 if (rs6000_sdata
== SDATA_NONE
)
20796 /* We want to merge strings, so we never consider them small data. */
20797 if (TREE_CODE (decl
) == STRING_CST
)
20800 /* Functions are never in the small data area. */
20801 if (TREE_CODE (decl
) == FUNCTION_DECL
)
20804 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_SECTION_NAME (decl
))
20806 const char *section
= DECL_SECTION_NAME (decl
);
20807 if (compare_section_name (section
, ".sdata")
20808 || compare_section_name (section
, ".sdata2")
20809 || compare_section_name (section
, ".gnu.linkonce.s")
20810 || compare_section_name (section
, ".sbss")
20811 || compare_section_name (section
, ".sbss2")
20812 || compare_section_name (section
, ".gnu.linkonce.sb")
20813 || strcmp (section
, ".PPC.EMB.sdata0") == 0
20814 || strcmp (section
, ".PPC.EMB.sbss0") == 0)
20819 /* If we are told not to put readonly data in sdata, then don't. */
20820 if (TREE_READONLY (decl
) && rs6000_sdata
!= SDATA_EABI
20821 && !rs6000_readonly_in_sdata
)
20824 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
20827 && size
<= g_switch_value
20828 /* If it's not public, and we're not going to reference it there,
20829 there's no need to put it in the small data section. */
20830 && (rs6000_sdata
!= SDATA_DATA
|| TREE_PUBLIC (decl
)))
20837 #endif /* USING_ELFOS_H */
20839 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20842 rs6000_use_blocks_for_constant_p (machine_mode mode
, const_rtx x
)
20844 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
);
20847 /* Do not place thread-local symbols refs in the object blocks. */
20850 rs6000_use_blocks_for_decl_p (const_tree decl
)
20852 return !DECL_THREAD_LOCAL_P (decl
);
20855 /* Return a REG that occurs in ADDR with coefficient 1.
20856 ADDR can be effectively incremented by incrementing REG.
20858 r0 is special and we must not select it as an address
20859 register by this routine since our caller will try to
20860 increment the returned register via an "la" instruction. */
20863 find_addr_reg (rtx addr
)
20865 while (GET_CODE (addr
) == PLUS
)
20867 if (REG_P (XEXP (addr
, 0))
20868 && REGNO (XEXP (addr
, 0)) != 0)
20869 addr
= XEXP (addr
, 0);
20870 else if (REG_P (XEXP (addr
, 1))
20871 && REGNO (XEXP (addr
, 1)) != 0)
20872 addr
= XEXP (addr
, 1);
20873 else if (CONSTANT_P (XEXP (addr
, 0)))
20874 addr
= XEXP (addr
, 1);
20875 else if (CONSTANT_P (XEXP (addr
, 1)))
20876 addr
= XEXP (addr
, 0);
20878 gcc_unreachable ();
20880 gcc_assert (REG_P (addr
) && REGNO (addr
) != 0);
20885 rs6000_fatal_bad_address (rtx op
)
20887 fatal_insn ("bad address", op
);
20892 vec
<branch_island
, va_gc
> *branch_islands
;
20894 /* Remember to generate a branch island for far calls to the given
20898 add_compiler_branch_island (tree label_name
, tree function_name
,
20901 branch_island bi
= {function_name
, label_name
, line_number
};
20902 vec_safe_push (branch_islands
, bi
);
20905 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20906 already there or not. */
20909 no_previous_def (tree function_name
)
20914 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
20915 if (function_name
== bi
->function_name
)
20920 /* GET_PREV_LABEL gets the label name from the previous definition of
20924 get_prev_label (tree function_name
)
20929 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
20930 if (function_name
== bi
->function_name
)
20931 return bi
->label_name
;
20935 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20938 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
20940 unsigned int length
;
20941 char *symbol_name
, *lazy_ptr_name
;
20942 char *local_label_0
;
20943 static unsigned label
= 0;
20945 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20946 symb
= (*targetm
.strip_name_encoding
) (symb
);
20948 length
= strlen (symb
);
20949 symbol_name
= XALLOCAVEC (char, length
+ 32);
20950 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
20952 lazy_ptr_name
= XALLOCAVEC (char, length
+ 32);
20953 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name
, symb
, length
);
20957 switch_to_section (darwin_sections
[machopic_picsymbol_stub1_section
]);
20958 fprintf (file
, "\t.align 5\n");
20960 fprintf (file
, "%s:\n", stub
);
20961 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20964 local_label_0
= XALLOCAVEC (char, 16);
20965 sprintf (local_label_0
, "L%u$spb", label
);
20967 fprintf (file
, "\tmflr r0\n");
20968 fprintf (file
, "\tbcl 20,31,%s\n", local_label_0
);
20969 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
20970 fprintf (file
, "\taddis r11,r11,ha16(%s-%s)\n",
20971 lazy_ptr_name
, local_label_0
);
20972 fprintf (file
, "\tmtlr r0\n");
20973 fprintf (file
, "\t%s r12,lo16(%s-%s)(r11)\n",
20974 (TARGET_64BIT
? "ldu" : "lwzu"),
20975 lazy_ptr_name
, local_label_0
);
20976 fprintf (file
, "\tmtctr r12\n");
20977 fprintf (file
, "\tbctr\n");
20979 else /* mdynamic-no-pic or mkernel. */
20981 switch_to_section (darwin_sections
[machopic_symbol_stub1_section
]);
20982 fprintf (file
, "\t.align 4\n");
20984 fprintf (file
, "%s:\n", stub
);
20985 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20987 fprintf (file
, "\tlis r11,ha16(%s)\n", lazy_ptr_name
);
20988 fprintf (file
, "\t%s r12,lo16(%s)(r11)\n",
20989 (TARGET_64BIT
? "ldu" : "lwzu"),
20991 fprintf (file
, "\tmtctr r12\n");
20992 fprintf (file
, "\tbctr\n");
20995 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
20996 fprintf (file
, "%s:\n", lazy_ptr_name
);
20997 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20998 fprintf (file
, "%sdyld_stub_binding_helper\n",
20999 (TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t"));
21002 /* Legitimize PIC addresses. If the address is already
21003 position-independent, we return ORIG. Newly generated
21004 position-independent addresses go into a reg. This is REG if non
21005 zero, otherwise we allocate register(s) as necessary. */
21007 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
21010 rs6000_machopic_legitimize_pic_address (rtx orig
, machine_mode mode
,
21015 if (reg
== NULL
&& !reload_completed
)
21016 reg
= gen_reg_rtx (Pmode
);
21018 if (GET_CODE (orig
) == CONST
)
21022 if (GET_CODE (XEXP (orig
, 0)) == PLUS
21023 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
21026 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
21028 /* Use a different reg for the intermediate value, as
21029 it will be marked UNCHANGING. */
21030 reg_temp
= !can_create_pseudo_p () ? reg
: gen_reg_rtx (Pmode
);
21031 base
= rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 0),
21034 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 1),
21037 if (CONST_INT_P (offset
))
21039 if (SMALL_INT (offset
))
21040 return plus_constant (Pmode
, base
, INTVAL (offset
));
21041 else if (!reload_completed
)
21042 offset
= force_reg (Pmode
, offset
);
21045 rtx mem
= force_const_mem (Pmode
, orig
);
21046 return machopic_legitimize_pic_address (mem
, Pmode
, reg
);
21049 return gen_rtx_PLUS (Pmode
, base
, offset
);
21052 /* Fall back on generic machopic code. */
21053 return machopic_legitimize_pic_address (orig
, mode
, reg
);
21056 /* Output a .machine directive for the Darwin assembler, and call
21057 the generic start_file routine. */
21060 rs6000_darwin_file_start (void)
21062 static const struct
21066 HOST_WIDE_INT if_set
;
21068 { "ppc64", "ppc64", MASK_64BIT
},
21069 { "970", "ppc970", OPTION_MASK_PPC_GPOPT
| OPTION_MASK_MFCRF \
21070 | MASK_POWERPC64
},
21071 { "power4", "ppc970", 0 },
21072 { "G5", "ppc970", 0 },
21073 { "7450", "ppc7450", 0 },
21074 { "7400", "ppc7400", OPTION_MASK_ALTIVEC
},
21075 { "G4", "ppc7400", 0 },
21076 { "750", "ppc750", 0 },
21077 { "740", "ppc750", 0 },
21078 { "G3", "ppc750", 0 },
21079 { "604e", "ppc604e", 0 },
21080 { "604", "ppc604", 0 },
21081 { "603e", "ppc603", 0 },
21082 { "603", "ppc603", 0 },
21083 { "601", "ppc601", 0 },
21084 { NULL
, "ppc", 0 } };
21085 const char *cpu_id
= "";
21088 rs6000_file_start ();
21089 darwin_file_start ();
21091 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
21093 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
21094 cpu_id
= rs6000_default_cpu
;
21096 if (OPTION_SET_P (rs6000_cpu_index
))
21097 cpu_id
= processor_target_table
[rs6000_cpu_index
].name
;
21099 /* Look through the mapping array. Pick the first name that either
21100 matches the argument, has a bit set in IF_SET that is also set
21101 in the target flags, or has a NULL name. */
21104 while (mapping
[i
].arg
!= NULL
21105 && strcmp (mapping
[i
].arg
, cpu_id
) != 0
21106 && (mapping
[i
].if_set
& rs6000_isa_flags
) == 0)
21109 fprintf (asm_out_file
, "\t.machine %s\n", mapping
[i
].name
);
21112 #endif /* TARGET_MACHO */
21116 rs6000_elf_reloc_rw_mask (void)
21120 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
21126 /* Record an element in the table of global constructors. SYMBOL is
21127 a SYMBOL_REF of the function to be called; PRIORITY is a number
21128 between 0 and MAX_INIT_PRIORITY.
21130 This differs from default_named_section_asm_out_constructor in
21131 that we have special handling for -mrelocatable. */
21133 static void rs6000_elf_asm_out_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
21135 rs6000_elf_asm_out_constructor (rtx symbol
, int priority
)
21137 const char *section
= ".ctors";
21140 if (priority
!= DEFAULT_INIT_PRIORITY
)
21142 sprintf (buf
, ".ctors.%.5u",
21143 /* Invert the numbering so the linker puts us in the proper
21144 order; constructors are run from right to left, and the
21145 linker sorts in increasing order. */
21146 MAX_INIT_PRIORITY
- priority
);
21150 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
21151 assemble_align (POINTER_SIZE
);
21153 if (DEFAULT_ABI
== ABI_V4
21154 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
21156 fputs ("\t.long (", asm_out_file
);
21157 output_addr_const (asm_out_file
, symbol
);
21158 fputs (")@fixup\n", asm_out_file
);
21161 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
21164 static void rs6000_elf_asm_out_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
21166 rs6000_elf_asm_out_destructor (rtx symbol
, int priority
)
21168 const char *section
= ".dtors";
21171 if (priority
!= DEFAULT_INIT_PRIORITY
)
21173 sprintf (buf
, ".dtors.%.5u",
21174 /* Invert the numbering so the linker puts us in the proper
21175 order; constructors are run from right to left, and the
21176 linker sorts in increasing order. */
21177 MAX_INIT_PRIORITY
- priority
);
21181 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
21182 assemble_align (POINTER_SIZE
);
21184 if (DEFAULT_ABI
== ABI_V4
21185 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
21187 fputs ("\t.long (", asm_out_file
);
21188 output_addr_const (asm_out_file
, symbol
);
21189 fputs (")@fixup\n", asm_out_file
);
21192 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
21196 rs6000_elf_declare_function_name (FILE *file
, const char *name
, tree decl
)
21198 if (TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
21200 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file
);
21201 ASM_OUTPUT_LABEL (file
, name
);
21202 fputs (DOUBLE_INT_ASM_OP
, file
);
21203 rs6000_output_function_entry (file
, name
);
21204 fputs (",.TOC.@tocbase,0\n\t.previous\n", file
);
21207 fputs ("\t.size\t", file
);
21208 assemble_name (file
, name
);
21209 fputs (",24\n\t.type\t.", file
);
21210 assemble_name (file
, name
);
21211 fputs (",@function\n", file
);
21212 if (TREE_PUBLIC (decl
) && ! DECL_WEAK (decl
))
21214 fputs ("\t.globl\t.", file
);
21215 assemble_name (file
, name
);
21220 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21221 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21222 rs6000_output_function_entry (file
, name
);
21223 fputs (":\n", file
);
21228 if (DEFAULT_ABI
== ABI_V4
21229 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
21230 && !TARGET_SECURE_PLT
21231 && (!constant_pool_empty_p () || crtl
->profile
)
21232 && (uses_toc
= uses_TOC ()))
21237 switch_to_other_text_partition ();
21238 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
21240 fprintf (file
, "\t.long ");
21241 assemble_name (file
, toc_label_name
);
21244 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
21245 assemble_name (file
, buf
);
21248 switch_to_other_text_partition ();
21251 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21252 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21254 if (TARGET_CMODEL
== CMODEL_LARGE
21255 && rs6000_global_entry_point_prologue_needed_p ())
21259 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
21261 fprintf (file
, "\t.quad .TOC.-");
21262 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
21263 assemble_name (file
, buf
);
21267 if (DEFAULT_ABI
== ABI_AIX
)
21269 const char *desc_name
, *orig_name
;
21271 orig_name
= (*targetm
.strip_name_encoding
) (name
);
21272 desc_name
= orig_name
;
21273 while (*desc_name
== '.')
21276 if (TREE_PUBLIC (decl
))
21277 fprintf (file
, "\t.globl %s\n", desc_name
);
21279 fprintf (file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
21280 fprintf (file
, "%s:\n", desc_name
);
21281 fprintf (file
, "\t.long %s\n", orig_name
);
21282 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file
);
21283 fputs ("\t.long 0\n", file
);
21284 fprintf (file
, "\t.previous\n");
21286 ASM_OUTPUT_LABEL (file
, name
);
21289 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED
;
21291 rs6000_elf_file_end (void)
21293 #ifdef HAVE_AS_GNU_ATTRIBUTE
21294 /* ??? The value emitted depends on options active at file end.
21295 Assume anyone using #pragma or attributes that might change
21296 options knows what they are doing. */
21297 if ((TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
21298 && rs6000_passes_float
)
21302 if (TARGET_HARD_FLOAT
)
21306 if (rs6000_passes_long_double
)
21308 if (!TARGET_LONG_DOUBLE_128
)
21310 else if (TARGET_IEEEQUAD
)
21315 fprintf (asm_out_file
, "\t.gnu_attribute 4, %d\n", fp
);
21317 if (TARGET_32BIT
&& DEFAULT_ABI
== ABI_V4
)
21319 if (rs6000_passes_vector
)
21320 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
21321 (TARGET_ALTIVEC_ABI
? 2 : 1));
21322 if (rs6000_returns_struct
)
21323 fprintf (asm_out_file
, "\t.gnu_attribute 12, %d\n",
21324 aix_struct_return
? 2 : 1);
21327 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21328 if (TARGET_32BIT
|| DEFAULT_ABI
== ABI_ELFv2
)
21329 file_end_indicate_exec_stack ();
21332 if (flag_split_stack
)
21333 file_end_indicate_split_stack ();
21337 /* We have expanded a CPU builtin, so we need to emit a reference to
21338 the special symbol that LIBC uses to declare it supports the
21339 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21340 switch_to_section (data_section
);
21341 fprintf (asm_out_file
, "\t.align %u\n", TARGET_32BIT
? 2 : 3);
21342 fprintf (asm_out_file
, "\t%s %s\n",
21343 TARGET_32BIT
? ".long" : ".quad", tcb_verification_symbol
);
21350 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21351 #define HAVE_XCOFF_DWARF_EXTRAS 0
21355 /* Names of bss and data sections. These should be unique names for each
21356 compilation unit. */
21358 char *xcoff_bss_section_name
;
21359 char *xcoff_private_data_section_name
;
21360 char *xcoff_private_rodata_section_name
;
21361 char *xcoff_tls_data_section_name
;
21362 char *xcoff_read_only_section_name
;
21364 static enum unwind_info_type
21365 rs6000_xcoff_debug_unwind_info (void)
21371 rs6000_xcoff_asm_output_anchor (rtx symbol
)
21375 sprintf (buffer
, "$ + " HOST_WIDE_INT_PRINT_DEC
,
21376 SYMBOL_REF_BLOCK_OFFSET (symbol
));
21377 fprintf (asm_out_file
, "%s", SET_ASM_OP
);
21378 RS6000_OUTPUT_BASENAME (asm_out_file
, XSTR (symbol
, 0));
21379 fprintf (asm_out_file
, ",");
21380 RS6000_OUTPUT_BASENAME (asm_out_file
, buffer
);
21381 fprintf (asm_out_file
, "\n");
21385 rs6000_xcoff_asm_globalize_label (FILE *stream
, const char *name
)
21387 fputs (GLOBAL_ASM_OP
, stream
);
21388 RS6000_OUTPUT_BASENAME (stream
, name
);
21389 putc ('\n', stream
);
21392 /* A get_unnamed_decl callback, used for read-only sections. PTR
21393 points to the section string variable. */
21396 rs6000_xcoff_output_readonly_section_asm_op (const char *directive
)
21398 fprintf (asm_out_file
, "\t.csect %s[RO],%s\n",
21400 ? xcoff_private_rodata_section_name
21401 : xcoff_read_only_section_name
,
21402 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21405 /* Likewise for read-write sections. */
21408 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21410 fprintf (asm_out_file
, "\t.csect %s[RW],%s\n",
21411 xcoff_private_data_section_name
,
21412 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21416 rs6000_xcoff_output_tls_section_asm_op (const char *directive
)
21418 fprintf (asm_out_file
, "\t.csect %s[TL],%s\n",
21420 ? xcoff_private_data_section_name
21421 : xcoff_tls_data_section_name
,
21422 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21425 /* A get_unnamed_section callback, used for switching to toc_section. */
21428 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
21430 if (TARGET_MINIMAL_TOC
)
21432 /* toc_section is always selected at least once from
21433 rs6000_xcoff_file_start, so this is guaranteed to
21434 always be defined once and only once in each file. */
21435 if (!toc_initialized
)
21437 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file
);
21438 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file
);
21439 toc_initialized
= 1;
21441 fprintf (asm_out_file
, "\t.csect toc_table[RW]%s\n",
21442 (TARGET_32BIT
? "" : ",3"));
21445 fputs ("\t.toc\n", asm_out_file
);
21448 /* Implement TARGET_ASM_INIT_SECTIONS. */
21451 rs6000_xcoff_asm_init_sections (void)
21453 read_only_data_section
21454 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21457 private_data_section
21458 = get_unnamed_section (SECTION_WRITE
,
21459 rs6000_xcoff_output_readwrite_section_asm_op
,
21462 read_only_private_data_section
21463 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21467 = get_unnamed_section (SECTION_TLS
,
21468 rs6000_xcoff_output_tls_section_asm_op
,
21471 tls_private_data_section
21472 = get_unnamed_section (SECTION_TLS
,
21473 rs6000_xcoff_output_tls_section_asm_op
,
21477 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op
, NULL
);
21479 readonly_data_section
= read_only_data_section
;
21483 rs6000_xcoff_reloc_rw_mask (void)
21489 rs6000_xcoff_asm_named_section (const char *name
, unsigned int flags
,
21490 tree decl ATTRIBUTE_UNUSED
)
21493 static const char * const suffix
[7]
21494 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21496 if (flags
& SECTION_EXCLUDE
)
21498 else if (flags
& SECTION_DEBUG
)
21500 fprintf (asm_out_file
, "\t.dwsect %s\n", name
);
21503 else if (flags
& SECTION_CODE
)
21505 else if (flags
& SECTION_TLS
)
21507 if (flags
& SECTION_BSS
)
21512 else if (flags
& SECTION_WRITE
)
21514 if (flags
& SECTION_BSS
)
21522 fprintf (asm_out_file
, "\t.csect %s%s[%s],%u\n",
21523 (flags
& SECTION_CODE
) ? "." : "",
21524 name
, suffix
[smclass
], flags
& SECTION_ENTSIZE
);
21527 #define IN_NAMED_SECTION(DECL) \
21528 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
21529 && DECL_SECTION_NAME (DECL) != NULL)
21532 rs6000_xcoff_select_section (tree decl
, int reloc
,
21533 unsigned HOST_WIDE_INT align
)
21535 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21537 if (align
> BIGGEST_ALIGNMENT
&& VAR_OR_FUNCTION_DECL_P (decl
))
21539 resolve_unique_section (decl
, reloc
, true);
21540 if (IN_NAMED_SECTION (decl
))
21541 return get_named_section (decl
, NULL
, reloc
);
21544 if (decl_readonly_section (decl
, reloc
))
21546 if (TREE_PUBLIC (decl
))
21547 return read_only_data_section
;
21549 return read_only_private_data_section
;
21554 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
21556 if (bss_initializer_p (decl
))
21557 return tls_comm_section
;
21558 else if (TREE_PUBLIC (decl
))
21559 return tls_data_section
;
21561 return tls_private_data_section
;
21565 if (TREE_PUBLIC (decl
))
21566 return data_section
;
21568 return private_data_section
;
21573 rs6000_xcoff_unique_section (tree decl
, int reloc ATTRIBUTE_UNUSED
)
21577 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
21578 name
= (*targetm
.strip_name_encoding
) (name
);
21579 set_decl_section_name (decl
, name
);
21582 /* Select section for constant in constant pool.
21584 On RS/6000, all constants are in the private read-only data area.
21585 However, if this is being placed in the TOC it must be output as a
21589 rs6000_xcoff_select_rtx_section (machine_mode mode
, rtx x
,
21590 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
21592 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
21593 return toc_section
;
21595 return read_only_private_data_section
;
21598 /* Remove any trailing [DS] or the like from the symbol name. */
21600 static const char *
21601 rs6000_xcoff_strip_name_encoding (const char *name
)
21606 len
= strlen (name
);
21607 if (name
[len
- 1] == ']')
21608 return ggc_alloc_string (name
, len
- 4);
21613 /* Section attributes. AIX is always PIC. */
21615 static unsigned int
21616 rs6000_xcoff_section_type_flags (tree decl
, const char *name
, int reloc
)
21618 unsigned int align
;
21619 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
21621 if (decl
&& DECL_P (decl
) && VAR_P (decl
) && bss_initializer_p (decl
))
21622 flags
|= SECTION_BSS
;
21624 /* Align to at least UNIT size. */
21625 if (!decl
|| !DECL_P (decl
))
21626 align
= MIN_UNITS_PER_WORD
;
21627 /* Align code CSECT to at least 32 bytes. */
21628 else if ((flags
& SECTION_CODE
) != 0)
21629 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
), 32);
21631 /* Increase alignment of large objects if not already stricter. */
21632 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
),
21633 int_size_in_bytes (TREE_TYPE (decl
)) > MIN_UNITS_PER_WORD
21634 ? UNITS_PER_FP_WORD
: MIN_UNITS_PER_WORD
);
21636 return flags
| (exact_log2 (align
) & SECTION_ENTSIZE
);
21639 /* Output at beginning of assembler file.
21641 Initialize the section names for the RS/6000 at this point.
21643 Specify filename, including full path, to assembler.
21645 We want to go into the TOC section so at least one .toc will be emitted.
21646 Also, in order to output proper .bs/.es pairs, we need at least one static
21647 [RW] section emitted.
21649 Finally, declare mcount when profiling to make the assembler happy. */
21652 rs6000_xcoff_file_start (void)
21654 rs6000_gen_section_name (&xcoff_bss_section_name
,
21655 main_input_filename
, ".bss_");
21656 rs6000_gen_section_name (&xcoff_private_data_section_name
,
21657 main_input_filename
, ".rw_");
21658 rs6000_gen_section_name (&xcoff_private_rodata_section_name
,
21659 main_input_filename
, ".rop_");
21660 rs6000_gen_section_name (&xcoff_read_only_section_name
,
21661 main_input_filename
, ".ro_");
21662 rs6000_gen_section_name (&xcoff_tls_data_section_name
,
21663 main_input_filename
, ".tls_");
21665 fputs ("\t.file\t", asm_out_file
);
21666 output_quoted_string (asm_out_file
, main_input_filename
);
21667 fputc ('\n', asm_out_file
);
21668 if (write_symbols
!= NO_DEBUG
)
21669 switch_to_section (private_data_section
);
21670 switch_to_section (toc_section
);
21671 switch_to_section (text_section
);
21673 fprintf (asm_out_file
, "\t.extern %s\n", RS6000_MCOUNT
);
21674 rs6000_file_start ();
21677 /* Output at end of assembler file.
21678 On the RS/6000, referencing data should automatically pull in text. */
21681 rs6000_xcoff_file_end (void)
21683 switch_to_section (text_section
);
21684 if (xcoff_tls_exec_model_detected
)
21686 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21687 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file
);
21689 fputs ("_section_.text:\n", asm_out_file
);
21690 switch_to_section (data_section
);
21691 fputs (TARGET_32BIT
21692 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21697 struct declare_alias_data
21700 bool function_descriptor
;
21703 /* Declare alias N. A helper function for for_node_and_aliases. */
21706 rs6000_declare_alias (struct symtab_node
*n
, void *d
)
21708 struct declare_alias_data
*data
= (struct declare_alias_data
*)d
;
21709 /* Main symbol is output specially, because varasm machinery does part of
21710 the job for us - we do not need to declare .globl/lglobs and such. */
21711 if (!n
->alias
|| n
->weakref
)
21714 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n
->decl
)))
21717 /* Prevent assemble_alias from trying to use .set pseudo operation
21718 that does not behave as expected by the middle-end. */
21719 TREE_ASM_WRITTEN (n
->decl
) = true;
21721 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
));
21722 char *buffer
= (char *) alloca (strlen (name
) + 2);
21724 int dollar_inside
= 0;
21726 strcpy (buffer
, name
);
21727 p
= strchr (buffer
, '$');
21731 p
= strchr (p
+ 1, '$');
21733 if (TREE_PUBLIC (n
->decl
))
21735 if (!RS6000_WEAK
|| !DECL_WEAK (n
->decl
))
21737 if (dollar_inside
) {
21738 if (data
->function_descriptor
)
21739 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21740 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21742 if (data
->function_descriptor
)
21744 fputs ("\t.globl .", data
->file
);
21745 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21746 putc ('\n', data
->file
);
21748 fputs ("\t.globl ", data
->file
);
21749 assemble_name (data
->file
, buffer
);
21750 putc ('\n', data
->file
);
21752 #ifdef ASM_WEAKEN_DECL
21753 else if (DECL_WEAK (n
->decl
) && !data
->function_descriptor
)
21754 ASM_WEAKEN_DECL (data
->file
, n
->decl
, name
, NULL
);
21761 if (data
->function_descriptor
)
21762 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21763 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21765 if (data
->function_descriptor
)
21767 fputs ("\t.lglobl .", data
->file
);
21768 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21769 putc ('\n', data
->file
);
21771 fputs ("\t.lglobl ", data
->file
);
21772 assemble_name (data
->file
, buffer
);
21773 putc ('\n', data
->file
);
21775 if (data
->function_descriptor
)
21776 putc ('.', data
->file
);
21777 ASM_OUTPUT_LABEL (data
->file
, buffer
);
21782 #ifdef HAVE_GAS_HIDDEN
21783 /* Helper function to calculate visibility of a DECL
21784 and return the value as a const string. */
21786 static const char *
21787 rs6000_xcoff_visibility (tree decl
)
21789 static const char * const visibility_types
[] = {
21790 "", ",protected", ",hidden", ",internal"
21793 enum symbol_visibility vis
= DECL_VISIBILITY (decl
);
21794 return visibility_types
[vis
];
21799 /* This macro produces the initial definition of a function name.
21800 On the RS/6000, we need to place an extra '.' in the function name and
21801 output the function descriptor.
21802 Dollar signs are converted to underscores.
21804 The csect for the function will have already been created when
21805 text_section was selected. We do have to go back to that csect, however.
21807 The third and fourth parameters to the .function pseudo-op (16 and 044)
21808 are placeholders which no longer have any use.
21810 Because AIX assembler's .set command has unexpected semantics, we output
21811 all aliases as alternative labels in front of the definition. */
21814 rs6000_xcoff_declare_function_name (FILE *file
, const char *name
, tree decl
)
21816 char *buffer
= (char *) alloca (strlen (name
) + 1);
21818 int dollar_inside
= 0;
21819 struct declare_alias_data data
= {file
, false};
21821 strcpy (buffer
, name
);
21822 p
= strchr (buffer
, '$');
21826 p
= strchr (p
+ 1, '$');
21828 if (TREE_PUBLIC (decl
))
21830 if (!RS6000_WEAK
|| !DECL_WEAK (decl
))
21832 if (dollar_inside
) {
21833 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21834 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21836 fputs ("\t.globl .", file
);
21837 RS6000_OUTPUT_BASENAME (file
, buffer
);
21838 #ifdef HAVE_GAS_HIDDEN
21839 fputs (rs6000_xcoff_visibility (decl
), file
);
21846 if (dollar_inside
) {
21847 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21848 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21850 fputs ("\t.lglobl .", file
);
21851 RS6000_OUTPUT_BASENAME (file
, buffer
);
21855 fputs ("\t.csect ", file
);
21856 assemble_name (file
, buffer
);
21857 fputs (TARGET_32BIT
? "\n" : ",3\n", file
);
21859 ASM_OUTPUT_LABEL (file
, buffer
);
21861 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21863 fputs (TARGET_32BIT
? "\t.long ." : "\t.llong .", file
);
21864 RS6000_OUTPUT_BASENAME (file
, buffer
);
21865 fputs (", TOC[tc0], 0\n", file
);
21868 switch_to_section (function_section (decl
));
21870 ASM_OUTPUT_LABEL (file
, buffer
);
21872 data
.function_descriptor
= true;
21873 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21875 if (!DECL_IGNORED_P (decl
))
21877 if (dwarf_debuginfo_p ())
21879 name
= (*targetm
.strip_name_encoding
) (name
);
21880 fprintf (file
, "\t.function .%s,.%s,2,0\n", name
, name
);
21887 /* Output assembly language to globalize a symbol from a DECL,
21888 possibly with visibility. */
21891 rs6000_xcoff_asm_globalize_decl_name (FILE *stream
, tree decl
)
21893 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
21894 fputs (GLOBAL_ASM_OP
, stream
);
21895 assemble_name (stream
, name
);
21896 #ifdef HAVE_GAS_HIDDEN
21897 fputs (rs6000_xcoff_visibility (decl
), stream
);
21899 putc ('\n', stream
);
21902 /* Output assembly language to define a symbol as COMMON from a DECL,
21903 possibly with visibility. */
21906 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream
,
21907 tree decl ATTRIBUTE_UNUSED
,
21909 unsigned HOST_WIDE_INT size
,
21910 unsigned int align
)
21912 unsigned int align2
= 2;
21915 align
= DATA_ABI_ALIGNMENT (TREE_TYPE (decl
), DECL_ALIGN (decl
));
21918 align2
= floor_log2 (align
/ BITS_PER_UNIT
);
21922 if (! DECL_COMMON (decl
))
21924 /* Forget section. */
21927 /* Globalize TLS BSS. */
21928 if (TREE_PUBLIC (decl
) && DECL_THREAD_LOCAL_P (decl
))
21930 fputs (GLOBAL_ASM_OP
, stream
);
21931 assemble_name (stream
, name
);
21932 fputc ('\n', stream
);
21935 /* Switch to section and skip space. */
21936 fputs ("\t.csect ", stream
);
21937 assemble_name (stream
, name
);
21938 fprintf (stream
, ",%u\n", align2
);
21939 ASM_DECLARE_OBJECT_NAME (stream
, name
, decl
);
21940 ASM_OUTPUT_SKIP (stream
, size
? size
: 1);
21944 if (TREE_PUBLIC (decl
))
21947 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%u" ,
21948 name
, size
, align2
);
21950 #ifdef HAVE_GAS_HIDDEN
21952 fputs (rs6000_xcoff_visibility (decl
), stream
);
21954 putc ('\n', stream
);
21958 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%s,%u\n",
21959 (*targetm
.strip_name_encoding
) (name
), size
, name
, align2
);
21962 /* This macro produces the initial definition of a object (variable) name.
21963 Because AIX assembler's .set command has unexpected semantics, we output
21964 all aliases as alternative labels in front of the definition. */
21967 rs6000_xcoff_declare_object_name (FILE *file
, const char *name
, tree decl
)
21969 struct declare_alias_data data
= {file
, false};
21970 ASM_OUTPUT_LABEL (file
, name
);
21971 symtab_node::get_create (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21975 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21978 rs6000_asm_output_dwarf_pcrel (FILE *file
, int size
, const char *label
)
21980 fputs (integer_asm_op (size
, FALSE
), file
);
21981 assemble_name (file
, label
);
21982 fputs ("-$", file
);
21985 /* Output a symbol offset relative to the dbase for the current object.
21986 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21989 __gcc_unwind_dbase is embedded in all executables/libraries through
21990 libgcc/config/rs6000/crtdbase.S. */
21993 rs6000_asm_output_dwarf_datarel (FILE *file
, int size
, const char *label
)
21995 fputs (integer_asm_op (size
, FALSE
), file
);
21996 assemble_name (file
, label
);
21997 fputs("-__gcc_unwind_dbase", file
);
22002 rs6000_xcoff_encode_section_info (tree decl
, rtx rtl
, int first
)
22006 const char *symname
;
22008 default_encode_section_info (decl
, rtl
, first
);
22010 /* Careful not to prod global register variables. */
22013 symbol
= XEXP (rtl
, 0);
22014 if (!SYMBOL_REF_P (symbol
))
22017 flags
= SYMBOL_REF_FLAGS (symbol
);
22019 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
22020 flags
&= ~SYMBOL_FLAG_HAS_BLOCK_INFO
;
22022 SYMBOL_REF_FLAGS (symbol
) = flags
;
22024 symname
= XSTR (symbol
, 0);
22026 /* Append CSECT mapping class, unless the symbol already is qualified.
22027 Aliases are implemented as labels, so the symbol name should not add
22028 a mapping class. */
22031 && VAR_OR_FUNCTION_DECL_P (decl
)
22032 && (symtab_node::get (decl
) == NULL
22033 || symtab_node::get (decl
)->alias
== 0)
22034 && symname
[strlen (symname
) - 1] != ']')
22036 const char *smclass
= NULL
;
22038 if (TREE_CODE (decl
) == FUNCTION_DECL
)
22040 else if (DECL_THREAD_LOCAL_P (decl
))
22042 if (bss_initializer_p (decl
))
22044 else if (flag_data_sections
)
22047 else if (DECL_EXTERNAL (decl
))
22049 else if (bss_initializer_p (decl
))
22051 else if (flag_data_sections
)
22053 /* This must exactly match the logic of select section. */
22054 if (decl_readonly_section (decl
, compute_reloc_for_var (decl
)))
22060 if (smclass
!= NULL
)
22062 char *newname
= XALLOCAVEC (char, strlen (symname
) + 5);
22064 strcpy (newname
, symname
);
22065 strcat (newname
, smclass
);
22066 XSTR (symbol
, 0) = ggc_strdup (newname
);
22070 #endif /* HAVE_AS_TLS */
22071 #endif /* TARGET_XCOFF */
22074 rs6000_asm_weaken_decl (FILE *stream
, tree decl
,
22075 const char *name
, const char *val
)
22077 fputs ("\t.weak\t", stream
);
22078 assemble_name (stream
, name
);
22079 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
22080 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
22082 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22084 fputs (rs6000_xcoff_visibility (decl
), stream
);
22086 fputs ("\n\t.weak\t.", stream
);
22087 RS6000_OUTPUT_BASENAME (stream
, name
);
22089 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22091 fputs (rs6000_xcoff_visibility (decl
), stream
);
22093 fputc ('\n', stream
);
22097 #ifdef ASM_OUTPUT_DEF
22098 ASM_OUTPUT_DEF (stream
, name
, val
);
22100 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
22101 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
22103 fputs ("\t.set\t.", stream
);
22104 RS6000_OUTPUT_BASENAME (stream
, name
);
22105 fputs (",.", stream
);
22106 RS6000_OUTPUT_BASENAME (stream
, val
);
22107 fputc ('\n', stream
);
22113 /* Return true if INSN should not be copied. */
22116 rs6000_cannot_copy_insn_p (rtx_insn
*insn
)
22118 return recog_memoized (insn
) >= 0
22119 && get_attr_cannot_copy (insn
);
22122 /* Compute a (partial) cost for rtx X. Return true if the complete
22123 cost has been computed, and false if subexpressions should be
22124 scanned. In either case, *TOTAL contains the cost result. */
22127 rs6000_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
22128 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
22130 int code
= GET_CODE (x
);
22134 /* On the RS/6000, if it is valid in the insn, it is free. */
22136 if (((outer_code
== SET
22137 || outer_code
== PLUS
22138 || outer_code
== MINUS
)
22139 && (satisfies_constraint_I (x
)
22140 || satisfies_constraint_L (x
)))
22141 || (outer_code
== AND
22142 && (satisfies_constraint_K (x
)
22144 ? satisfies_constraint_L (x
)
22145 : satisfies_constraint_J (x
))))
22146 || ((outer_code
== IOR
|| outer_code
== XOR
)
22147 && (satisfies_constraint_K (x
)
22149 ? satisfies_constraint_L (x
)
22150 : satisfies_constraint_J (x
))))
22151 || outer_code
== ASHIFT
22152 || outer_code
== ASHIFTRT
22153 || outer_code
== LSHIFTRT
22154 || outer_code
== ROTATE
22155 || outer_code
== ROTATERT
22156 || outer_code
== ZERO_EXTRACT
22157 || (outer_code
== MULT
22158 && satisfies_constraint_I (x
))
22159 || ((outer_code
== DIV
|| outer_code
== UDIV
22160 || outer_code
== MOD
|| outer_code
== UMOD
)
22161 && exact_log2 (INTVAL (x
)) >= 0)
22162 || (outer_code
== COMPARE
22163 && (satisfies_constraint_I (x
)
22164 || satisfies_constraint_K (x
)))
22165 || ((outer_code
== EQ
|| outer_code
== NE
)
22166 && (satisfies_constraint_I (x
)
22167 || satisfies_constraint_K (x
)
22169 ? satisfies_constraint_L (x
)
22170 : satisfies_constraint_J (x
))))
22171 || (outer_code
== GTU
22172 && satisfies_constraint_I (x
))
22173 || (outer_code
== LTU
22174 && satisfies_constraint_P (x
)))
22179 else if ((outer_code
== PLUS
22180 && reg_or_add_cint_operand (x
, mode
))
22181 || (outer_code
== MINUS
22182 && reg_or_sub_cint_operand (x
, mode
))
22183 || ((outer_code
== SET
22184 || outer_code
== IOR
22185 || outer_code
== XOR
)
22187 & ~ (unsigned HOST_WIDE_INT
) 0xffffffff) == 0))
22189 *total
= COSTS_N_INSNS (1);
22195 case CONST_WIDE_INT
:
22199 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22203 /* When optimizing for size, MEM should be slightly more expensive
22204 than generating address, e.g., (plus (reg) (const)).
22205 L1 cache latency is about two instructions. */
22206 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22207 if (rs6000_slow_unaligned_access (mode
, MEM_ALIGN (x
)))
22208 *total
+= COSTS_N_INSNS (100);
22217 if (FLOAT_MODE_P (mode
))
22218 *total
= rs6000_cost
->fp
;
22220 *total
= COSTS_N_INSNS (1);
22224 if (CONST_INT_P (XEXP (x
, 1))
22225 && satisfies_constraint_I (XEXP (x
, 1)))
22227 if (INTVAL (XEXP (x
, 1)) >= -256
22228 && INTVAL (XEXP (x
, 1)) <= 255)
22229 *total
= rs6000_cost
->mulsi_const9
;
22231 *total
= rs6000_cost
->mulsi_const
;
22233 else if (mode
== SFmode
)
22234 *total
= rs6000_cost
->fp
;
22235 else if (FLOAT_MODE_P (mode
))
22236 *total
= rs6000_cost
->dmul
;
22237 else if (mode
== DImode
)
22238 *total
= rs6000_cost
->muldi
;
22240 *total
= rs6000_cost
->mulsi
;
22244 if (mode
== SFmode
)
22245 *total
= rs6000_cost
->fp
;
22247 *total
= rs6000_cost
->dmul
;
22252 if (FLOAT_MODE_P (mode
))
22254 *total
= mode
== DFmode
? rs6000_cost
->ddiv
22255 : rs6000_cost
->sdiv
;
22262 if (CONST_INT_P (XEXP (x
, 1))
22263 && exact_log2 (INTVAL (XEXP (x
, 1))) >= 0)
22265 if (code
== DIV
|| code
== MOD
)
22267 *total
= COSTS_N_INSNS (2);
22270 *total
= COSTS_N_INSNS (1);
22274 if (GET_MODE (XEXP (x
, 1)) == DImode
)
22275 *total
= rs6000_cost
->divdi
;
22277 *total
= rs6000_cost
->divsi
;
22279 /* Add in shift and subtract for MOD unless we have a mod instruction. */
22280 if (!TARGET_MODULO
&& (code
== MOD
|| code
== UMOD
))
22281 *total
+= COSTS_N_INSNS (2);
22285 *total
= COSTS_N_INSNS (TARGET_CTZ
? 1 : 4);
22289 *total
= COSTS_N_INSNS (4);
22293 *total
= COSTS_N_INSNS (TARGET_POPCNTD
? 1 : 6);
22297 *total
= COSTS_N_INSNS (TARGET_CMPB
? 2 : 6);
22301 if (outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
22304 *total
= COSTS_N_INSNS (1);
22308 if (CONST_INT_P (XEXP (x
, 1)))
22310 rtx left
= XEXP (x
, 0);
22311 rtx_code left_code
= GET_CODE (left
);
22313 /* rotate-and-mask: 1 insn. */
22314 if ((left_code
== ROTATE
22315 || left_code
== ASHIFT
22316 || left_code
== LSHIFTRT
)
22317 && rs6000_is_valid_shift_mask (XEXP (x
, 1), left
, mode
))
22319 *total
= rtx_cost (XEXP (left
, 0), mode
, left_code
, 0, speed
);
22320 if (!CONST_INT_P (XEXP (left
, 1)))
22321 *total
+= rtx_cost (XEXP (left
, 1), SImode
, left_code
, 1, speed
);
22322 *total
+= COSTS_N_INSNS (1);
22326 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22327 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
22328 if (rs6000_is_valid_and_mask (XEXP (x
, 1), mode
)
22329 || (val
& 0xffff) == val
22330 || (val
& 0xffff0000) == val
22331 || ((val
& 0xffff) == 0 && mode
== SImode
))
22333 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
22334 *total
+= COSTS_N_INSNS (1);
22339 if (rs6000_is_valid_2insn_and (XEXP (x
, 1), mode
))
22341 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
22342 *total
+= COSTS_N_INSNS (2);
22347 *total
= COSTS_N_INSNS (1);
22352 *total
= COSTS_N_INSNS (1);
22358 *total
= COSTS_N_INSNS (1);
22362 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22363 the sign extend and shift separately within the insn. */
22364 if (TARGET_EXTSWSLI
&& mode
== DImode
22365 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
22366 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
)
22377 /* Handle mul_highpart. */
22378 if (outer_code
== TRUNCATE
22379 && GET_CODE (XEXP (x
, 0)) == MULT
)
22381 if (mode
== DImode
)
22382 *total
= rs6000_cost
->muldi
;
22384 *total
= rs6000_cost
->mulsi
;
22387 else if (outer_code
== AND
)
22390 *total
= COSTS_N_INSNS (1);
22395 if (MEM_P (XEXP (x
, 0)))
22398 *total
= COSTS_N_INSNS (1);
22404 if (!FLOAT_MODE_P (mode
))
22406 *total
= COSTS_N_INSNS (1);
22412 case UNSIGNED_FLOAT
:
22415 case FLOAT_TRUNCATE
:
22416 *total
= rs6000_cost
->fp
;
22420 if (mode
== DFmode
)
22421 *total
= rs6000_cost
->sfdf_convert
;
22423 *total
= rs6000_cost
->fp
;
22430 *total
= COSTS_N_INSNS (1);
22433 else if (FLOAT_MODE_P (mode
) && TARGET_PPC_GFXOPT
&& TARGET_HARD_FLOAT
)
22435 *total
= rs6000_cost
->fp
;
22444 /* Carry bit requires mode == Pmode.
22445 NEG or PLUS already counted so only add one. */
22447 && (outer_code
== NEG
|| outer_code
== PLUS
))
22449 *total
= COSTS_N_INSNS (1);
22457 if (outer_code
== SET
)
22459 if (XEXP (x
, 1) == const0_rtx
)
22461 *total
= COSTS_N_INSNS (2);
22466 *total
= COSTS_N_INSNS (3);
22471 if (outer_code
== COMPARE
)
22479 if (XINT (x
, 1) == UNSPECV_MMA_XXSETACCZ
)
22493 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22496 rs6000_debug_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
22497 int opno
, int *total
, bool speed
)
22499 bool ret
= rs6000_rtx_costs (x
, mode
, outer_code
, opno
, total
, speed
);
22502 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22503 "opno = %d, total = %d, speed = %s, x:\n",
22504 ret
? "complete" : "scan inner",
22505 GET_MODE_NAME (mode
),
22506 GET_RTX_NAME (outer_code
),
22509 speed
? "true" : "false");
22517 rs6000_insn_cost (rtx_insn
*insn
, bool speed
)
22519 if (recog_memoized (insn
) < 0)
22522 /* If we are optimizing for size, just use the length. */
22524 return get_attr_length (insn
);
22526 /* Use the cost if provided. */
22527 int cost
= get_attr_cost (insn
);
22531 /* If the insn tells us how many insns there are, use that. Otherwise use
22532 the length/4. Adjust the insn length to remove the extra size that
22533 prefixed instructions take. */
22534 int n
= get_attr_num_insns (insn
);
22537 int length
= get_attr_length (insn
);
22538 if (get_attr_prefixed (insn
) == PREFIXED_YES
)
22541 ADJUST_INSN_LENGTH (insn
, adjust
);
22548 enum attr_type type
= get_attr_type (insn
);
22555 cost
= COSTS_N_INSNS (n
+ 1);
22559 switch (get_attr_size (insn
))
22562 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const9
;
22565 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const
;
22568 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi
;
22571 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->muldi
;
22574 gcc_unreachable ();
22578 switch (get_attr_size (insn
))
22581 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divsi
;
22584 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divdi
;
22587 gcc_unreachable ();
22592 cost
= n
* rs6000_cost
->fp
;
22595 cost
= n
* rs6000_cost
->dmul
;
22598 cost
= n
* rs6000_cost
->sdiv
;
22601 cost
= n
* rs6000_cost
->ddiv
;
22608 cost
= COSTS_N_INSNS (n
+ 2);
22612 cost
= COSTS_N_INSNS (n
);
22618 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22621 rs6000_debug_address_cost (rtx x
, machine_mode mode
,
22622 addr_space_t as
, bool speed
)
22624 int ret
= TARGET_ADDRESS_COST (x
, mode
, as
, speed
);
22626 fprintf (stderr
, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22627 ret
, speed
? "true" : "false");
22634 /* Subroutine to determine the move cost of dense math registers. If we are
22635 moving to/from VSX_REGISTER registers, the cost is either 1 move (for
22636 512-bit accumulators) or 2 moves (for 1,024 dmr registers). If we are
22637 moving to anything else like GPR registers, make the cost very high. */
22640 rs6000_dmr_register_move_cost (machine_mode mode
, reg_class_t rclass
)
22642 const int reg_move_base
= 2;
22643 HARD_REG_SET vsx_set
= (reg_class_contents
[rclass
]
22644 & reg_class_contents
[VSX_REGS
]);
22646 if (TARGET_DENSE_MATH
&& !hard_reg_set_empty_p (vsx_set
))
22648 /* __vector_quad (i.e. XOmode) is tranfered in 1 instruction. */
22649 if (mode
== XOmode
)
22650 return reg_move_base
;
22652 /* __dmr (i.e. TDOmode) is transferred in 2 instructions. */
22653 else if (mode
== TDOmode
)
22654 return reg_move_base
* 2;
22657 return reg_move_base
* 2 * hard_regno_nregs (FIRST_DMR_REGNO
, mode
);
22660 return 1000 * 2 * hard_regno_nregs (FIRST_DMR_REGNO
, mode
);
22663 /* A C expression returning the cost of moving data from a register of class
22664 CLASS1 to one of CLASS2. */
22667 rs6000_register_move_cost (machine_mode mode
,
22668 reg_class_t from
, reg_class_t to
)
22671 reg_class_t rclass
;
22673 if (TARGET_DEBUG_COST
)
22676 HARD_REG_SET to_vsx
, from_vsx
;
22677 to_vsx
= reg_class_contents
[to
] & reg_class_contents
[VSX_REGS
];
22678 from_vsx
= reg_class_contents
[from
] & reg_class_contents
[VSX_REGS
];
22680 /* Special case DMR registers, that can only move to/from VSX registers. */
22681 if (from
== DM_REGS
&& to
== DM_REGS
)
22682 ret
= 2 * hard_regno_nregs (FIRST_DMR_REGNO
, mode
);
22684 else if (from
== DM_REGS
)
22685 ret
= rs6000_dmr_register_move_cost (mode
, to
);
22687 else if (to
== DM_REGS
)
22688 ret
= rs6000_dmr_register_move_cost (mode
, from
);
22690 /* If we have VSX, we can easily move between FPR or Altivec registers,
22691 otherwise we can only easily move within classes.
22692 Do this first so we give best-case answers for union classes
22693 containing both gprs and vsx regs. */
22694 else if (!hard_reg_set_empty_p (to_vsx
)
22695 && !hard_reg_set_empty_p (from_vsx
)
22697 || hard_reg_set_intersect_p (to_vsx
, from_vsx
)))
22699 int reg
= FIRST_FPR_REGNO
;
22701 || (TEST_HARD_REG_BIT (to_vsx
, FIRST_ALTIVEC_REGNO
)
22702 && TEST_HARD_REG_BIT (from_vsx
, FIRST_ALTIVEC_REGNO
)))
22703 reg
= FIRST_ALTIVEC_REGNO
;
22704 ret
= 2 * hard_regno_nregs (reg
, mode
);
22707 /* Moves from/to GENERAL_REGS. */
22708 else if ((rclass
= from
, reg_classes_intersect_p (to
, GENERAL_REGS
))
22709 || (rclass
= to
, reg_classes_intersect_p (from
, GENERAL_REGS
)))
22711 if (rclass
== FLOAT_REGS
|| rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
22713 if (TARGET_DIRECT_MOVE
)
22715 /* Keep the cost for direct moves above that for within
22716 a register class even if the actual processor cost is
22717 comparable. We do this because a direct move insn
22718 can't be a nop, whereas with ideal register
22719 allocation a move within the same class might turn
22720 out to be a nop. */
22721 if (rs6000_tune
== PROCESSOR_POWER9
22722 || rs6000_tune
== PROCESSOR_POWER10
22723 || rs6000_tune
== PROCESSOR_FUTURE
)
22724 ret
= 3 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22726 ret
= 4 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22727 /* SFmode requires a conversion when moving between gprs
22729 if (mode
== SFmode
)
22733 ret
= (rs6000_memory_move_cost (mode
, rclass
, false)
22734 + rs6000_memory_move_cost (mode
, GENERAL_REGS
, false));
22737 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22739 else if (rclass
== CR_REGS
)
22742 /* For those processors that have slow LR/CTR moves, make them more
22743 expensive than memory in order to bias spills to memory .*/
22744 else if ((rs6000_tune
== PROCESSOR_POWER6
22745 || rs6000_tune
== PROCESSOR_POWER7
22746 || rs6000_tune
== PROCESSOR_POWER8
22747 || rs6000_tune
== PROCESSOR_POWER9
)
22748 && reg_class_subset_p (rclass
, SPECIAL_REGS
))
22749 ret
= 6 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22752 /* A move will cost one instruction per GPR moved. */
22753 ret
= 2 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22756 /* Everything else has to go through GENERAL_REGS. */
22758 ret
= (rs6000_register_move_cost (mode
, GENERAL_REGS
, to
)
22759 + rs6000_register_move_cost (mode
, from
, GENERAL_REGS
));
22761 if (TARGET_DEBUG_COST
)
22763 if (dbg_cost_ctrl
== 1)
22765 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22766 ret
, GET_MODE_NAME (mode
), reg_class_names
[from
],
22767 reg_class_names
[to
]);
22774 /* A C expressions returning the cost of moving data of MODE from a register to
22778 rs6000_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
22779 bool in ATTRIBUTE_UNUSED
)
22783 if (TARGET_DEBUG_COST
)
22786 if (reg_classes_intersect_p (rclass
, GENERAL_REGS
))
22787 ret
= 4 * hard_regno_nregs (0, mode
);
22788 else if ((reg_classes_intersect_p (rclass
, FLOAT_REGS
)
22789 || reg_classes_intersect_p (rclass
, VSX_REGS
)))
22790 ret
= 4 * hard_regno_nregs (32, mode
);
22791 else if (reg_classes_intersect_p (rclass
, ALTIVEC_REGS
))
22792 ret
= 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO
, mode
);
22793 else if (reg_classes_intersect_p (rclass
, DM_REGS
))
22794 ret
= (rs6000_dmr_register_move_cost (mode
, VSX_REGS
)
22795 + rs6000_memory_move_cost (mode
, VSX_REGS
, false));
22797 ret
= 4 + rs6000_register_move_cost (mode
, rclass
, GENERAL_REGS
);
22799 if (TARGET_DEBUG_COST
)
22801 if (dbg_cost_ctrl
== 1)
22803 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22804 ret
, GET_MODE_NAME (mode
), reg_class_names
[rclass
], in
);
22811 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22813 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22814 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22815 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22816 move cost between GENERAL_REGS and VSX_REGS low.
22818 It might seem reasonable to use a union class. After all, if usage
22819 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22820 rather than memory. However, in cases where register pressure of
22821 both is high, like the cactus_adm spec test, allowing
22822 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22823 the first scheduling pass. This is partly due to an allocno of
22824 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22825 class, which gives too high a pressure for GENERAL_REGS and too low
22826 for VSX_REGS. So, force a choice of the subclass here.
22828 The best class is also the union if GENERAL_REGS and VSX_REGS have
22829 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22830 allocno class, since trying to narrow down the class by regno mode
22831 is prone to error. For example, SImode is allowed in VSX regs and
22832 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22833 it would be wrong to choose an allocno of GENERAL_REGS based on
22837 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED
,
22838 reg_class_t allocno_class
,
22839 reg_class_t best_class
)
22841 switch (allocno_class
)
22843 case GEN_OR_VSX_REGS
:
22844 /* best_class must be a subset of allocno_class. */
22845 gcc_checking_assert (best_class
== GEN_OR_VSX_REGS
22846 || best_class
== GEN_OR_FLOAT_REGS
22847 || best_class
== VSX_REGS
22848 || best_class
== ALTIVEC_REGS
22849 || best_class
== FLOAT_REGS
22850 || best_class
== GENERAL_REGS
22851 || best_class
== BASE_REGS
);
22852 /* Use best_class but choose wider classes when copying from the
22853 wider class to best_class is cheap. This mimics IRA choice
22854 of allocno class. */
22855 if (best_class
== BASE_REGS
)
22856 return GENERAL_REGS
;
22857 if (TARGET_VSX
&& best_class
== FLOAT_REGS
)
22862 if (best_class
== ALTIVEC_REGS
)
22863 return ALTIVEC_REGS
;
22869 return allocno_class
;
22872 /* Load up a constant. If the mode is a vector mode, splat the value across
22873 all of the vector elements. */
22876 rs6000_load_constant_and_splat (machine_mode mode
, REAL_VALUE_TYPE dconst
)
22880 if (mode
== SFmode
|| mode
== DFmode
)
22882 rtx d
= const_double_from_real_value (dconst
, mode
);
22883 reg
= force_reg (mode
, d
);
22885 else if (mode
== V4SFmode
)
22887 rtx d
= const_double_from_real_value (dconst
, SFmode
);
22888 rtvec v
= gen_rtvec (4, d
, d
, d
, d
);
22889 reg
= gen_reg_rtx (mode
);
22890 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22892 else if (mode
== V2DFmode
)
22894 rtx d
= const_double_from_real_value (dconst
, DFmode
);
22895 rtvec v
= gen_rtvec (2, d
, d
);
22896 reg
= gen_reg_rtx (mode
);
22897 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22900 gcc_unreachable ();
22905 /* Generate an FMA instruction. */
22908 rs6000_emit_madd (rtx target
, rtx m1
, rtx m2
, rtx a
)
22910 machine_mode mode
= GET_MODE (target
);
22913 dst
= expand_ternary_op (mode
, fma_optab
, m1
, m2
, a
, target
, 0);
22914 gcc_assert (dst
!= NULL
);
22917 emit_move_insn (target
, dst
);
22920 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22923 rs6000_emit_nmsub (rtx dst
, rtx m1
, rtx m2
, rtx a
)
22925 machine_mode mode
= GET_MODE (dst
);
22928 /* This is a tad more complicated, since the fnma_optab is for
22929 a different expression: fma(-m1, m2, a), which is the same
22930 thing except in the case of signed zeros.
22932 Fortunately we know that if FMA is supported that FNMSUB is
22933 also supported in the ISA. Just expand it directly. */
22935 gcc_assert (optab_handler (fma_optab
, mode
) != CODE_FOR_nothing
);
22937 r
= gen_rtx_NEG (mode
, a
);
22938 r
= gen_rtx_FMA (mode
, m1
, m2
, r
);
22939 r
= gen_rtx_NEG (mode
, r
);
22940 emit_insn (gen_rtx_SET (dst
, r
));
22943 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22944 add a reg_note saying that this was a division. Support both scalar and
22945 vector divide. Assumes no trapping math and finite arguments. */
22948 rs6000_emit_swdiv (rtx dst
, rtx n
, rtx d
, bool note_p
)
22950 machine_mode mode
= GET_MODE (dst
);
22951 rtx one
, x0
, e0
, x1
, xprev
, eprev
, xnext
, enext
, u
, v
;
22954 /* Low precision estimates guarantee 5 bits of accuracy. High
22955 precision estimates guarantee 14 bits of accuracy. SFmode
22956 requires 23 bits of accuracy. DFmode requires 52 bits of
22957 accuracy. Each pass at least doubles the accuracy, leading
22958 to the following. */
22959 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
22960 if (mode
== DFmode
|| mode
== V2DFmode
)
22963 enum insn_code code
= optab_handler (smul_optab
, mode
);
22964 insn_gen_fn gen_mul
= GEN_FCN (code
);
22966 gcc_assert (code
!= CODE_FOR_nothing
);
22968 one
= rs6000_load_constant_and_splat (mode
, dconst1
);
22970 /* x0 = 1./d estimate */
22971 x0
= gen_reg_rtx (mode
);
22972 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, d
),
22975 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22978 /* e0 = 1. - d * x0 */
22979 e0
= gen_reg_rtx (mode
);
22980 rs6000_emit_nmsub (e0
, d
, x0
, one
);
22982 /* x1 = x0 + e0 * x0 */
22983 x1
= gen_reg_rtx (mode
);
22984 rs6000_emit_madd (x1
, e0
, x0
, x0
);
22986 for (i
= 0, xprev
= x1
, eprev
= e0
; i
< passes
- 2;
22987 ++i
, xprev
= xnext
, eprev
= enext
) {
22989 /* enext = eprev * eprev */
22990 enext
= gen_reg_rtx (mode
);
22991 emit_insn (gen_mul (enext
, eprev
, eprev
));
22993 /* xnext = xprev + enext * xprev */
22994 xnext
= gen_reg_rtx (mode
);
22995 rs6000_emit_madd (xnext
, enext
, xprev
, xprev
);
23001 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
23003 /* u = n * xprev */
23004 u
= gen_reg_rtx (mode
);
23005 emit_insn (gen_mul (u
, n
, xprev
));
23007 /* v = n - (d * u) */
23008 v
= gen_reg_rtx (mode
);
23009 rs6000_emit_nmsub (v
, d
, u
, n
);
23011 /* dst = (v * xprev) + u */
23012 rs6000_emit_madd (dst
, v
, xprev
, u
);
23015 add_reg_note (get_last_insn (), REG_EQUAL
, gen_rtx_DIV (mode
, n
, d
));
23018 /* Goldschmidt's Algorithm for single/double-precision floating point
23019 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
23022 rs6000_emit_swsqrt (rtx dst
, rtx src
, bool recip
)
23024 machine_mode mode
= GET_MODE (src
);
23025 rtx e
= gen_reg_rtx (mode
);
23026 rtx g
= gen_reg_rtx (mode
);
23027 rtx h
= gen_reg_rtx (mode
);
23029 /* Low precision estimates guarantee 5 bits of accuracy. High
23030 precision estimates guarantee 14 bits of accuracy. SFmode
23031 requires 23 bits of accuracy. DFmode requires 52 bits of
23032 accuracy. Each pass at least doubles the accuracy, leading
23033 to the following. */
23034 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
23035 if (mode
== DFmode
|| mode
== V2DFmode
)
23040 enum insn_code code
= optab_handler (smul_optab
, mode
);
23041 insn_gen_fn gen_mul
= GEN_FCN (code
);
23043 gcc_assert (code
!= CODE_FOR_nothing
);
23045 mhalf
= rs6000_load_constant_and_splat (mode
, dconsthalf
);
23047 /* e = rsqrt estimate */
23048 emit_insn (gen_rtx_SET (e
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, src
),
23051 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
23054 rtx zero
= force_reg (mode
, CONST0_RTX (mode
));
23056 if (mode
== SFmode
)
23058 rtx target
= emit_conditional_move (e
, { GT
, src
, zero
, mode
},
23061 emit_move_insn (e
, target
);
23065 rtx cond
= gen_rtx_GT (VOIDmode
, e
, zero
);
23066 rs6000_emit_vector_cond_expr (e
, e
, zero
, cond
, src
, zero
);
23070 /* g = sqrt estimate. */
23071 emit_insn (gen_mul (g
, e
, src
));
23072 /* h = 1/(2*sqrt) estimate. */
23073 emit_insn (gen_mul (h
, e
, mhalf
));
23079 rtx t
= gen_reg_rtx (mode
);
23080 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
23081 /* Apply correction directly to 1/rsqrt estimate. */
23082 rs6000_emit_madd (dst
, e
, t
, e
);
23086 for (i
= 0; i
< passes
; i
++)
23088 rtx t1
= gen_reg_rtx (mode
);
23089 rtx g1
= gen_reg_rtx (mode
);
23090 rtx h1
= gen_reg_rtx (mode
);
23092 rs6000_emit_nmsub (t1
, g
, h
, mhalf
);
23093 rs6000_emit_madd (g1
, g
, t1
, g
);
23094 rs6000_emit_madd (h1
, h
, t1
, h
);
23099 /* Multiply by 2 for 1/rsqrt. */
23100 emit_insn (gen_add3_insn (dst
, h
, h
));
23105 rtx t
= gen_reg_rtx (mode
);
23106 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
23107 rs6000_emit_madd (dst
, g
, t
, g
);
23113 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
23114 (Power7) targets. DST is the target, and SRC is the argument operand. */
23117 rs6000_emit_popcount (rtx dst
, rtx src
)
23119 machine_mode mode
= GET_MODE (dst
);
23122 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
23123 if (TARGET_POPCNTD
)
23125 if (mode
== SImode
)
23126 emit_insn (gen_popcntdsi2 (dst
, src
));
23128 emit_insn (gen_popcntddi2 (dst
, src
));
23132 tmp1
= gen_reg_rtx (mode
);
23134 if (mode
== SImode
)
23136 emit_insn (gen_popcntbsi2 (tmp1
, src
));
23137 tmp2
= expand_mult (SImode
, tmp1
, GEN_INT (0x01010101),
23139 tmp2
= force_reg (SImode
, tmp2
);
23140 emit_insn (gen_lshrsi3 (dst
, tmp2
, GEN_INT (24)));
23144 emit_insn (gen_popcntbdi2 (tmp1
, src
));
23145 tmp2
= expand_mult (DImode
, tmp1
,
23146 GEN_INT ((HOST_WIDE_INT
)
23147 0x01010101 << 32 | 0x01010101),
23149 tmp2
= force_reg (DImode
, tmp2
);
23150 emit_insn (gen_lshrdi3 (dst
, tmp2
, GEN_INT (56)));
23155 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
23156 target, and SRC is the argument operand. */
23159 rs6000_emit_parity (rtx dst
, rtx src
)
23161 machine_mode mode
= GET_MODE (dst
);
23164 tmp
= gen_reg_rtx (mode
);
23166 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
23169 if (mode
== SImode
)
23171 emit_insn (gen_popcntbsi2 (tmp
, src
));
23172 emit_insn (gen_paritysi2_cmpb (dst
, tmp
));
23176 emit_insn (gen_popcntbdi2 (tmp
, src
));
23177 emit_insn (gen_paritydi2_cmpb (dst
, tmp
));
23182 if (mode
== SImode
)
23184 /* Is mult+shift >= shift+xor+shift+xor? */
23185 if (rs6000_cost
->mulsi_const
>= COSTS_N_INSNS (3))
23187 rtx tmp1
, tmp2
, tmp3
, tmp4
;
23189 tmp1
= gen_reg_rtx (SImode
);
23190 emit_insn (gen_popcntbsi2 (tmp1
, src
));
23192 tmp2
= gen_reg_rtx (SImode
);
23193 emit_insn (gen_lshrsi3 (tmp2
, tmp1
, GEN_INT (16)));
23194 tmp3
= gen_reg_rtx (SImode
);
23195 emit_insn (gen_xorsi3 (tmp3
, tmp1
, tmp2
));
23197 tmp4
= gen_reg_rtx (SImode
);
23198 emit_insn (gen_lshrsi3 (tmp4
, tmp3
, GEN_INT (8)));
23199 emit_insn (gen_xorsi3 (tmp
, tmp3
, tmp4
));
23202 rs6000_emit_popcount (tmp
, src
);
23203 emit_insn (gen_andsi3 (dst
, tmp
, const1_rtx
));
23207 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
23208 if (rs6000_cost
->muldi
>= COSTS_N_INSNS (5))
23210 rtx tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
;
23212 tmp1
= gen_reg_rtx (DImode
);
23213 emit_insn (gen_popcntbdi2 (tmp1
, src
));
23215 tmp2
= gen_reg_rtx (DImode
);
23216 emit_insn (gen_lshrdi3 (tmp2
, tmp1
, GEN_INT (32)));
23217 tmp3
= gen_reg_rtx (DImode
);
23218 emit_insn (gen_xordi3 (tmp3
, tmp1
, tmp2
));
23220 tmp4
= gen_reg_rtx (DImode
);
23221 emit_insn (gen_lshrdi3 (tmp4
, tmp3
, GEN_INT (16)));
23222 tmp5
= gen_reg_rtx (DImode
);
23223 emit_insn (gen_xordi3 (tmp5
, tmp3
, tmp4
));
23225 tmp6
= gen_reg_rtx (DImode
);
23226 emit_insn (gen_lshrdi3 (tmp6
, tmp5
, GEN_INT (8)));
23227 emit_insn (gen_xordi3 (tmp
, tmp5
, tmp6
));
23230 rs6000_emit_popcount (tmp
, src
);
23231 emit_insn (gen_anddi3 (dst
, tmp
, const1_rtx
));
23235 /* Expand an Altivec constant permutation for little endian mode.
23236 OP0 and OP1 are the input vectors and TARGET is the output vector.
23237 SEL specifies the constant permutation vector.
23239 There are two issues: First, the two input operands must be
23240 swapped so that together they form a double-wide array in LE
23241 order. Second, the vperm instruction has surprising behavior
23242 in LE mode: it interprets the elements of the source vectors
23243 in BE mode ("left to right") and interprets the elements of
23244 the destination vector in LE mode ("right to left"). To
23245 correct for this, we must subtract each element of the permute
23246 control vector from 31.
23248 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
23249 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
23250 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
23251 serve as the permute control vector. Then, in BE mode,
23255 places the desired result in vr9. However, in LE mode the
23256 vector contents will be
23258 vr10 = 00000003 00000002 00000001 00000000
23259 vr11 = 00000007 00000006 00000005 00000004
23261 The result of the vperm using the same permute control vector is
23263 vr9 = 05000000 07000000 01000000 03000000
23265 That is, the leftmost 4 bytes of vr10 are interpreted as the
23266 source for the rightmost 4 bytes of vr9, and so on.
23268 If we change the permute control vector to
23270 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
23278 vr9 = 00000006 00000004 00000002 00000000. */
23281 altivec_expand_vec_perm_const_le (rtx target
, rtx op0
, rtx op1
,
23282 const vec_perm_indices
&sel
)
23286 rtx constv
, unspec
;
23288 /* Unpack and adjust the constant selector. */
23289 for (i
= 0; i
< 16; ++i
)
23291 unsigned int elt
= 31 - (sel
[i
] & 31);
23292 perm
[i
] = GEN_INT (elt
);
23295 /* Expand to a permute, swapping the inputs and using the
23296 adjusted selector. */
23298 op0
= force_reg (V16QImode
, op0
);
23300 op1
= force_reg (V16QImode
, op1
);
23302 constv
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
));
23303 constv
= force_reg (V16QImode
, constv
);
23304 unspec
= gen_rtx_UNSPEC (V16QImode
, gen_rtvec (3, op1
, op0
, constv
),
23306 if (!REG_P (target
))
23308 rtx tmp
= gen_reg_rtx (V16QImode
);
23309 emit_move_insn (tmp
, unspec
);
23313 emit_move_insn (target
, unspec
);
23316 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
23317 permute control vector. But here it's not a constant, so we must
23318 generate a vector NAND or NOR to do the adjustment. */
23321 altivec_expand_vec_perm_le (rtx operands
[4])
23323 rtx notx
, iorx
, unspec
;
23324 rtx target
= operands
[0];
23325 rtx op0
= operands
[1];
23326 rtx op1
= operands
[2];
23327 rtx sel
= operands
[3];
23329 rtx norreg
= gen_reg_rtx (V16QImode
);
23330 machine_mode mode
= GET_MODE (target
);
23332 /* Get everything in regs so the pattern matches. */
23334 op0
= force_reg (mode
, op0
);
23336 op1
= force_reg (mode
, op1
);
23338 sel
= force_reg (V16QImode
, sel
);
23339 if (!REG_P (target
))
23340 tmp
= gen_reg_rtx (mode
);
23342 if (TARGET_P9_VECTOR
)
23344 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, sel
),
23349 /* Invert the selector with a VNAND if available, else a VNOR.
23350 The VNAND is preferred for future fusion opportunities. */
23351 notx
= gen_rtx_NOT (V16QImode
, sel
);
23352 iorx
= (TARGET_P8_VECTOR
23353 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
23354 : gen_rtx_AND (V16QImode
, notx
, notx
));
23355 emit_insn (gen_rtx_SET (norreg
, iorx
));
23357 /* Permute with operands reversed and adjusted selector. */
23358 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, norreg
),
23362 /* Copy into target, possibly by way of a register. */
23363 if (!REG_P (target
))
23365 emit_move_insn (tmp
, unspec
);
23369 emit_move_insn (target
, unspec
);
23372 /* Expand an Altivec constant permutation. Return true if we match
23373 an efficient implementation; false to fall back to VPERM.
23375 OP0 and OP1 are the input vectors and TARGET is the output vector.
23376 SEL specifies the constant permutation vector. */
23379 altivec_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
,
23380 const vec_perm_indices
&sel
)
23382 struct altivec_perm_insn
{
23383 HOST_WIDE_INT mask
;
23384 enum insn_code impl
;
23385 unsigned char perm
[16];
23387 static const struct altivec_perm_insn patterns
[] = {
23388 {OPTION_MASK_ALTIVEC
,
23389 CODE_FOR_altivec_vpkuhum_direct
,
23390 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23391 {OPTION_MASK_ALTIVEC
,
23392 CODE_FOR_altivec_vpkuwum_direct
,
23393 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23394 {OPTION_MASK_ALTIVEC
,
23395 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghb_direct
23396 : CODE_FOR_altivec_vmrglb_direct
,
23397 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23398 {OPTION_MASK_ALTIVEC
,
23399 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghh_direct
23400 : CODE_FOR_altivec_vmrglh_direct
,
23401 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23402 {OPTION_MASK_ALTIVEC
,
23403 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghw_direct_v4si
23404 : CODE_FOR_altivec_vmrglw_direct_v4si
,
23405 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23406 {OPTION_MASK_ALTIVEC
,
23407 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglb_direct
23408 : CODE_FOR_altivec_vmrghb_direct
,
23409 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23410 {OPTION_MASK_ALTIVEC
,
23411 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglh_direct
23412 : CODE_FOR_altivec_vmrghh_direct
,
23413 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23414 {OPTION_MASK_ALTIVEC
,
23415 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglw_direct_v4si
23416 : CODE_FOR_altivec_vmrghw_direct_v4si
,
23417 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23418 {OPTION_MASK_P8_VECTOR
,
23419 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgew_v4sf_direct
23420 : CODE_FOR_p8_vmrgow_v4sf_direct
,
23421 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23422 {OPTION_MASK_P8_VECTOR
,
23423 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgow_v4sf_direct
23424 : CODE_FOR_p8_vmrgew_v4sf_direct
,
23425 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23426 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23427 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23428 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23429 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23430 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23431 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23432 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23433 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23435 unsigned int i
, j
, elt
, which
;
23436 unsigned char perm
[16];
23440 /* Unpack the constant selector. */
23441 for (i
= which
= 0; i
< 16; ++i
)
23444 which
|= (elt
< 16 ? 1 : 2);
23448 /* Simplify the constant selector based on operands. */
23452 gcc_unreachable ();
23456 if (!rtx_equal_p (op0
, op1
))
23461 for (i
= 0; i
< 16; ++i
)
23473 /* Look for splat patterns. */
23478 for (i
= 0; i
< 16; ++i
)
23479 if (perm
[i
] != elt
)
23483 if (!BYTES_BIG_ENDIAN
)
23485 emit_insn (gen_altivec_vspltb_direct (target
, op0
, GEN_INT (elt
)));
23491 for (i
= 0; i
< 16; i
+= 2)
23492 if (perm
[i
] != elt
|| perm
[i
+ 1] != elt
+ 1)
23496 int field
= BYTES_BIG_ENDIAN
? elt
/ 2 : 7 - elt
/ 2;
23497 x
= gen_reg_rtx (V8HImode
);
23498 emit_insn (gen_altivec_vsplth_direct (x
, gen_lowpart (V8HImode
, op0
),
23500 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23507 for (i
= 0; i
< 16; i
+= 4)
23509 || perm
[i
+ 1] != elt
+ 1
23510 || perm
[i
+ 2] != elt
+ 2
23511 || perm
[i
+ 3] != elt
+ 3)
23515 int field
= BYTES_BIG_ENDIAN
? elt
/ 4 : 3 - elt
/ 4;
23516 x
= gen_reg_rtx (V4SImode
);
23517 emit_insn (gen_altivec_vspltw_direct (x
, gen_lowpart (V4SImode
, op0
),
23519 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23525 /* Look for merge and pack patterns. */
23526 for (j
= 0; j
< ARRAY_SIZE (patterns
); ++j
)
23530 if ((patterns
[j
].mask
& rs6000_isa_flags
) == 0)
23533 elt
= patterns
[j
].perm
[0];
23534 if (perm
[0] == elt
)
23536 else if (perm
[0] == elt
+ 16)
23540 for (i
= 1; i
< 16; ++i
)
23542 elt
= patterns
[j
].perm
[i
];
23544 elt
= (elt
>= 16 ? elt
- 16 : elt
+ 16);
23545 else if (one_vec
&& elt
>= 16)
23547 if (perm
[i
] != elt
)
23552 enum insn_code icode
= patterns
[j
].impl
;
23553 machine_mode omode
= insn_data
[icode
].operand
[0].mode
;
23554 machine_mode imode
= insn_data
[icode
].operand
[1].mode
;
23556 rtx perm_idx
= GEN_INT (0);
23557 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23574 perm_idx
= GEN_INT (perm_val
);
23577 /* For little-endian, don't use vpkuwum and vpkuhum if the
23578 underlying vector type is not V4SI and V8HI, respectively.
23579 For example, using vpkuwum with a V8HI picks up the even
23580 halfwords (BE numbering) when the even halfwords (LE
23581 numbering) are what we need. */
23582 if (!BYTES_BIG_ENDIAN
23583 && icode
== CODE_FOR_altivec_vpkuwum_direct
23585 && GET_MODE (op0
) != V4SImode
)
23587 && GET_MODE (XEXP (op0
, 0)) != V4SImode
)))
23589 if (!BYTES_BIG_ENDIAN
23590 && icode
== CODE_FOR_altivec_vpkuhum_direct
23592 && GET_MODE (op0
) != V8HImode
)
23594 && GET_MODE (XEXP (op0
, 0)) != V8HImode
)))
23597 /* For little-endian, the two input operands must be swapped
23598 (or swapped back) to ensure proper right-to-left numbering
23600 if (swapped
^ !BYTES_BIG_ENDIAN
23601 && icode
!= CODE_FOR_vsx_xxpermdi_v16qi
)
23602 std::swap (op0
, op1
);
23603 if (imode
!= V16QImode
)
23605 op0
= gen_lowpart (imode
, op0
);
23606 op1
= gen_lowpart (imode
, op1
);
23608 if (omode
== V16QImode
)
23611 x
= gen_reg_rtx (omode
);
23612 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23613 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
, perm_idx
));
23615 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
));
23616 if (omode
!= V16QImode
)
23617 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23622 if (!BYTES_BIG_ENDIAN
)
23624 altivec_expand_vec_perm_const_le (target
, op0
, op1
, sel
);
23631 /* Expand a VSX Permute Doubleword constant permutation.
23632 Return true if we match an efficient implementation. */
23635 rs6000_expand_vec_perm_const_1 (rtx target
, rtx op0
, rtx op1
,
23636 unsigned char perm0
, unsigned char perm1
)
23640 /* If both selectors come from the same operand, fold to single op. */
23641 if ((perm0
& 2) == (perm1
& 2))
23648 /* If both operands are equal, fold to simpler permutation. */
23649 if (rtx_equal_p (op0
, op1
))
23652 perm1
= (perm1
& 1) + 2;
23654 /* If the first selector comes from the second operand, swap. */
23655 else if (perm0
& 2)
23661 std::swap (op0
, op1
);
23663 /* If the second selector does not come from the second operand, fail. */
23664 else if ((perm1
& 2) == 0)
23668 if (target
!= NULL
)
23670 machine_mode vmode
, dmode
;
23673 vmode
= GET_MODE (target
);
23674 gcc_assert (GET_MODE_NUNITS (vmode
) == 2);
23675 dmode
= mode_for_vector (GET_MODE_INNER (vmode
), 4).require ();
23676 x
= gen_rtx_VEC_CONCAT (dmode
, op0
, op1
);
23677 v
= gen_rtvec (2, GEN_INT (perm0
), GEN_INT (perm1
));
23678 x
= gen_rtx_VEC_SELECT (vmode
, x
, gen_rtx_PARALLEL (VOIDmode
, v
));
23679 emit_insn (gen_rtx_SET (target
, x
));
23684 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23687 rs6000_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
23688 rtx target
, rtx op0
, rtx op1
,
23689 const vec_perm_indices
&sel
)
23691 if (vmode
!= op_mode
)
23694 bool testing_p
= !target
;
23696 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23697 if (TARGET_ALTIVEC
&& testing_p
)
23702 rtx nop0
= force_reg (vmode
, op0
);
23708 op1
= force_reg (vmode
, op1
);
23710 /* Check for ps_merge* or xxpermdi insns. */
23711 if ((vmode
== V2DFmode
|| vmode
== V2DImode
) && VECTOR_MEM_VSX_P (vmode
))
23715 op0
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 1);
23716 op1
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 2);
23718 if (rs6000_expand_vec_perm_const_1 (target
, op0
, op1
, sel
[0], sel
[1]))
23722 if (TARGET_ALTIVEC
)
23724 /* Force the target-independent code to lower to V16QImode. */
23725 if (vmode
!= V16QImode
)
23727 if (altivec_expand_vec_perm_const (target
, op0
, op1
, sel
))
23734 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23735 OP0 and OP1 are the input vectors and TARGET is the output vector.
23736 PERM specifies the constant permutation vector. */
23739 rs6000_do_expand_vec_perm (rtx target
, rtx op0
, rtx op1
,
23740 machine_mode vmode
, const vec_perm_builder
&perm
)
23742 rtx x
= expand_vec_perm_const (vmode
, op0
, op1
, perm
, BLKmode
, target
);
23744 emit_move_insn (target
, x
);
23747 /* Expand an extract even operation. */
23750 rs6000_expand_extract_even (rtx target
, rtx op0
, rtx op1
)
23752 machine_mode vmode
= GET_MODE (target
);
23753 unsigned i
, nelt
= GET_MODE_NUNITS (vmode
);
23754 vec_perm_builder
perm (nelt
, nelt
, 1);
23756 for (i
= 0; i
< nelt
; i
++)
23757 perm
.quick_push (i
* 2);
23759 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23762 /* Expand a vector interleave operation. */
23765 rs6000_expand_interleave (rtx target
, rtx op0
, rtx op1
, bool highp
)
23767 machine_mode vmode
= GET_MODE (target
);
23768 unsigned i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
23769 vec_perm_builder
perm (nelt
, nelt
, 1);
23771 high
= (highp
? 0 : nelt
/ 2);
23772 for (i
= 0; i
< nelt
/ 2; i
++)
23774 perm
.quick_push (i
+ high
);
23775 perm
.quick_push (i
+ nelt
+ high
);
23778 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23781 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23783 rs6000_scale_v2df (rtx tgt
, rtx src
, int scale
)
23785 HOST_WIDE_INT
hwi_scale (scale
);
23786 REAL_VALUE_TYPE r_pow
;
23787 rtvec v
= rtvec_alloc (2);
23789 rtx scale_vec
= gen_reg_rtx (V2DFmode
);
23790 (void)real_powi (&r_pow
, DFmode
, &dconst2
, hwi_scale
);
23791 elt
= const_double_from_real_value (r_pow
, DFmode
);
23792 RTVEC_ELT (v
, 0) = elt
;
23793 RTVEC_ELT (v
, 1) = elt
;
23794 rs6000_expand_vector_init (scale_vec
, gen_rtx_PARALLEL (V2DFmode
, v
));
23795 emit_insn (gen_mulv2df3 (tgt
, src
, scale_vec
));
23798 /* Return an RTX representing where to find the function value of a
23799 function returning MODE. */
23801 rs6000_complex_function_value (machine_mode mode
)
23803 unsigned int regno
;
23805 machine_mode inner
= GET_MODE_INNER (mode
);
23806 unsigned int inner_bytes
= GET_MODE_UNIT_SIZE (mode
);
23808 if (TARGET_FLOAT128_TYPE
23810 || (mode
== TCmode
&& TARGET_IEEEQUAD
)))
23811 regno
= ALTIVEC_ARG_RETURN
;
23813 else if (FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23814 regno
= FP_ARG_RETURN
;
23818 regno
= GP_ARG_RETURN
;
23820 /* 32-bit is OK since it'll go in r3/r4. */
23821 if (TARGET_32BIT
&& inner_bytes
>= 4)
23822 return gen_rtx_REG (mode
, regno
);
23825 if (inner_bytes
>= 8)
23826 return gen_rtx_REG (mode
, regno
);
23828 r1
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
),
23830 r2
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
+ 1),
23831 GEN_INT (inner_bytes
));
23832 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
23835 /* Return an rtx describing a return value of MODE as a PARALLEL
23836 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23837 stride REG_STRIDE. */
23840 rs6000_parallel_return (machine_mode mode
,
23841 int n_elts
, machine_mode elt_mode
,
23842 unsigned int regno
, unsigned int reg_stride
)
23844 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
23847 for (i
= 0; i
< n_elts
; i
++)
23849 rtx r
= gen_rtx_REG (elt_mode
, regno
);
23850 rtx off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
23851 XVECEXP (par
, 0, i
) = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
23852 regno
+= reg_stride
;
23858 /* Target hook for TARGET_FUNCTION_VALUE.
23860 An integer value is in r3 and a floating-point value is in fp1,
23861 unless -msoft-float. */
23864 rs6000_function_value (const_tree valtype
,
23865 const_tree fn_decl_or_type ATTRIBUTE_UNUSED
,
23866 bool outgoing ATTRIBUTE_UNUSED
)
23869 unsigned int regno
;
23870 machine_mode elt_mode
;
23873 /* Special handling for structs in darwin64. */
23875 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype
), valtype
))
23877 CUMULATIVE_ARGS valcum
;
23881 valcum
.fregno
= FP_ARG_MIN_REG
;
23882 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
23883 /* Do a trial code generation as if this were going to be passed as
23884 an argument; if any part goes in memory, we return NULL. */
23885 valret
= rs6000_darwin64_record_arg (&valcum
, valtype
, true, /* retval= */ true);
23888 /* Otherwise fall through to standard ABI rules. */
23891 mode
= TYPE_MODE (valtype
);
23893 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23894 if (rs6000_discover_homogeneous_aggregate (mode
, valtype
, &elt_mode
, &n_elts
))
23896 int first_reg
, n_regs
;
23898 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode
))
23900 /* _Decimal128 must use even/odd register pairs. */
23901 first_reg
= (elt_mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23902 n_regs
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
23906 first_reg
= ALTIVEC_ARG_RETURN
;
23910 return rs6000_parallel_return (mode
, n_elts
, elt_mode
, first_reg
, n_regs
);
23913 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23914 if (TARGET_32BIT
&& TARGET_POWERPC64
)
23923 int count
= GET_MODE_SIZE (mode
) / 4;
23924 return rs6000_parallel_return (mode
, count
, SImode
, GP_ARG_RETURN
, 1);
23927 if ((INTEGRAL_TYPE_P (valtype
)
23928 && GET_MODE_BITSIZE (mode
) < (TARGET_32BIT
? 32 : 64))
23929 || POINTER_TYPE_P (valtype
))
23930 mode
= TARGET_32BIT
? SImode
: DImode
;
23932 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23933 /* _Decimal128 must use an even/odd register pair. */
23934 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23935 else if (SCALAR_FLOAT_TYPE_P (valtype
) && TARGET_HARD_FLOAT
23936 && !FLOAT128_VECTOR_P (mode
))
23937 regno
= FP_ARG_RETURN
;
23938 else if (TREE_CODE (valtype
) == COMPLEX_TYPE
23939 && targetm
.calls
.split_complex_arg
)
23940 return rs6000_complex_function_value (mode
);
23941 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23942 return register is used in both cases, and we won't see V2DImode/V2DFmode
23943 for pure altivec, combine the two cases. */
23944 else if ((TREE_CODE (valtype
) == VECTOR_TYPE
|| VECTOR_ALIGNMENT_P (mode
))
23945 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
23946 && ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
23947 regno
= ALTIVEC_ARG_RETURN
;
23949 regno
= GP_ARG_RETURN
;
23951 return gen_rtx_REG (mode
, regno
);
23954 /* Define how to find the value returned by a library function
23955 assuming the value has mode MODE. */
23957 rs6000_libcall_value (machine_mode mode
)
23959 unsigned int regno
;
23961 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23962 if (TARGET_32BIT
&& TARGET_POWERPC64
&& mode
== DImode
)
23963 return rs6000_parallel_return (mode
, 2, SImode
, GP_ARG_RETURN
, 1);
23965 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23966 /* _Decimal128 must use an even/odd register pair. */
23967 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23968 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && TARGET_HARD_FLOAT
)
23969 regno
= FP_ARG_RETURN
;
23970 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23971 return register is used in both cases, and we won't see V2DImode/V2DFmode
23972 for pure altivec, combine the two cases. */
23973 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
23974 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
)
23975 regno
= ALTIVEC_ARG_RETURN
;
23976 else if (COMPLEX_MODE_P (mode
) && targetm
.calls
.split_complex_arg
)
23977 return rs6000_complex_function_value (mode
);
23979 regno
= GP_ARG_RETURN
;
23981 return gen_rtx_REG (mode
, regno
);
23984 /* Compute register pressure classes. We implement the target hook to avoid
23985 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23986 lead to incorrect estimates of number of available registers and therefor
23987 increased register pressure/spill. */
23989 rs6000_compute_pressure_classes (enum reg_class
*pressure_classes
)
23994 pressure_classes
[n
++] = GENERAL_REGS
;
23995 if (TARGET_ALTIVEC
)
23996 pressure_classes
[n
++] = ALTIVEC_REGS
;
23998 pressure_classes
[n
++] = VSX_REGS
;
24001 if (TARGET_HARD_FLOAT
)
24002 pressure_classes
[n
++] = FLOAT_REGS
;
24004 if (TARGET_DENSE_MATH
)
24005 pressure_classes
[n
++] = DM_REGS
;
24006 pressure_classes
[n
++] = CR_REGS
;
24007 pressure_classes
[n
++] = SPECIAL_REGS
;
24012 /* Given FROM and TO register numbers, say whether this elimination is allowed.
24013 Frame pointer elimination is automatically handled.
24015 For the RS/6000, if frame pointer elimination is being done, we would like
24016 to convert ap into fp, not sp.
24018 We need r30 if -mminimal-toc was specified, and there are constant pool
24022 rs6000_can_eliminate (const int from
, const int to
)
24024 return (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
24025 ? ! frame_pointer_needed
24026 : from
== RS6000_PIC_OFFSET_TABLE_REGNUM
24027 ? ! TARGET_MINIMAL_TOC
|| TARGET_NO_TOC_OR_PCREL
24028 || constant_pool_empty_p ()
24032 /* Define the offset between two registers, FROM to be eliminated and its
24033 replacement TO, at the start of a routine. */
24035 rs6000_initial_elimination_offset (int from
, int to
)
24037 rs6000_stack_t
*info
= rs6000_stack_info ();
24038 HOST_WIDE_INT offset
;
24040 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
24041 offset
= info
->push_p
? 0 : -info
->total_size
;
24042 else if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
24044 offset
= info
->push_p
? 0 : -info
->total_size
;
24045 if (FRAME_GROWS_DOWNWARD
)
24046 offset
+= info
->fixed_size
+ info
->vars_size
+ info
->parm_size
;
24048 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
24049 offset
= FRAME_GROWS_DOWNWARD
24050 ? info
->fixed_size
+ info
->vars_size
+ info
->parm_size
24052 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
24053 offset
= info
->total_size
;
24054 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
24055 offset
= info
->push_p
? info
->total_size
: 0;
24056 else if (from
== RS6000_PIC_OFFSET_TABLE_REGNUM
)
24059 gcc_unreachable ();
24064 /* Fill in sizes of registers used by unwinder. */
24067 rs6000_init_dwarf_reg_sizes_extra (tree address
)
24069 if (TARGET_MACHO
&& ! TARGET_ALTIVEC
)
24072 machine_mode mode
= TYPE_MODE (char_type_node
);
24073 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
24074 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
24075 rtx value
= gen_int_mode (16, mode
);
24077 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
24078 The unwinder still needs to know the size of Altivec registers. */
24080 for (i
= FIRST_ALTIVEC_REGNO
; i
< LAST_ALTIVEC_REGNO
+1; i
++)
24082 int column
= DWARF_REG_TO_UNWIND_COLUMN
24083 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
24084 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
24086 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
24091 /* Map internal gcc register numbers to debug format register numbers.
24092 FORMAT specifies the type of debug register number to use:
24093 0 -- debug information, except for frame-related sections
24094 1 -- DWARF .debug_frame section
24095 2 -- DWARF .eh_frame section */
24098 rs6000_debugger_regno (unsigned int regno
, unsigned int format
)
24100 /* On some platforms, we use the standard DWARF register
24101 numbering for .debug_info and .debug_frame. */
24102 if ((format
== 0 && dwarf_debuginfo_p ()) || format
== 1)
24104 #ifdef RS6000_USE_DWARF_NUMBERING
24107 if (FP_REGNO_P (regno
))
24108 return regno
- FIRST_FPR_REGNO
+ 32;
24109 if (ALTIVEC_REGNO_P (regno
))
24110 return regno
- FIRST_ALTIVEC_REGNO
+ 1124;
24111 if (regno
== LR_REGNO
)
24113 if (regno
== CTR_REGNO
)
24115 if (regno
== CA_REGNO
)
24116 return 101; /* XER */
24117 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
24118 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
24119 The actual code emitted saves the whole of CR, so we map CR2_REGNO
24120 to the DWARF reg for CR. */
24121 if (format
== 1 && regno
== CR2_REGNO
)
24123 if (CR_REGNO_P (regno
))
24124 return regno
- CR0_REGNO
+ 86;
24125 if (regno
== VRSAVE_REGNO
)
24127 if (regno
== VSCR_REGNO
)
24130 /* These do not make much sense. */
24131 if (regno
== FRAME_POINTER_REGNUM
)
24133 if (regno
== ARG_POINTER_REGNUM
)
24138 gcc_unreachable ();
24142 /* We use the GCC 7 (and before) internal number for non-DWARF debug
24143 information, and also for .eh_frame. */
24144 /* Translate the regnos to their numbers in GCC 7 (and before). */
24147 if (FP_REGNO_P (regno
))
24148 return regno
- FIRST_FPR_REGNO
+ 32;
24149 if (ALTIVEC_REGNO_P (regno
))
24150 return regno
- FIRST_ALTIVEC_REGNO
+ 77;
24151 if (regno
== LR_REGNO
)
24153 if (regno
== CTR_REGNO
)
24155 if (regno
== CA_REGNO
)
24156 return 76; /* XER */
24157 if (CR_REGNO_P (regno
))
24158 return regno
- CR0_REGNO
+ 68;
24159 if (regno
== VRSAVE_REGNO
)
24161 if (regno
== VSCR_REGNO
)
24164 if (regno
== FRAME_POINTER_REGNUM
)
24166 if (regno
== ARG_POINTER_REGNUM
)
24170 /* XXX: This is a guess. The GCC register number for FIRST_DMR_REGNO is 111,
24171 but the frame pointer regnum uses that. */
24172 if (DMR_REGNO_P (regno
))
24173 return regno
- FIRST_DMR_REGNO
+ 112;
24175 gcc_unreachable ();
24178 /* target hook eh_return_filter_mode */
24179 static scalar_int_mode
24180 rs6000_eh_return_filter_mode (void)
24182 return TARGET_32BIT
? SImode
: word_mode
;
24185 /* Target hook for translate_mode_attribute. */
24186 static machine_mode
24187 rs6000_translate_mode_attribute (machine_mode mode
)
24189 if ((FLOAT128_IEEE_P (mode
)
24190 && ieee128_float_type_node
== long_double_type_node
)
24191 || (FLOAT128_IBM_P (mode
)
24192 && ibm128_float_type_node
== long_double_type_node
))
24193 return COMPLEX_MODE_P (mode
) ? E_TCmode
: E_TFmode
;
24197 /* Target hook for scalar_mode_supported_p. */
24199 rs6000_scalar_mode_supported_p (scalar_mode mode
)
24201 /* -m32 does not support TImode. This is the default, from
24202 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
24203 same ABI as for -m32. But default_scalar_mode_supported_p allows
24204 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
24205 for -mpowerpc64. */
24206 if (TARGET_32BIT
&& mode
== TImode
)
24209 if (DECIMAL_FLOAT_MODE_P (mode
))
24210 return default_decimal_float_supported_p ();
24211 else if (TARGET_FLOAT128_TYPE
&& (mode
== KFmode
|| mode
== IFmode
))
24214 return default_scalar_mode_supported_p (mode
);
24217 /* Target hook for libgcc_floating_mode_supported_p. */
24220 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode
)
24229 /* We only return true for KFmode if IEEE 128-bit types are supported, and
24230 if long double does not use the IEEE 128-bit format. If long double
24231 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
24232 Because the code will not use KFmode in that case, there will be aborts
24233 because it can't find KFmode in the Floatn types. */
24235 return TARGET_FLOAT128_TYPE
&& !TARGET_IEEEQUAD
;
24242 /* Target hook for vector_mode_supported_p. */
24244 rs6000_vector_mode_supported_p (machine_mode mode
)
24246 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
24247 128-bit, the compiler might try to widen IEEE 128-bit to IBM
24249 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
) && !FLOAT128_IEEE_P (mode
))
24256 /* Target hook for floatn_mode. */
24257 static opt_scalar_float_mode
24258 rs6000_floatn_mode (int n
, bool extended
)
24268 if (TARGET_FLOAT128_TYPE
)
24269 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24271 return opt_scalar_float_mode ();
24274 return opt_scalar_float_mode ();
24277 /* Those are the only valid _FloatNx types. */
24278 gcc_unreachable ();
24292 if (TARGET_FLOAT128_TYPE
)
24293 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24295 return opt_scalar_float_mode ();
24298 return opt_scalar_float_mode ();
24304 /* Target hook for c_mode_for_suffix. */
24305 static machine_mode
24306 rs6000_c_mode_for_suffix (char suffix
)
24308 if (TARGET_FLOAT128_TYPE
)
24310 if (suffix
== 'q' || suffix
== 'Q')
24311 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24313 /* At the moment, we are not defining a suffix for IBM extended double.
24314 If/when the default for -mabi=ieeelongdouble is changed, and we want
24315 to support __ibm128 constants in legacy library code, we may need to
24316 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
24317 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
24318 __float80 constants. */
24324 /* Target hook for invalid_arg_for_unprototyped_fn. */
24325 static const char *
24326 invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
24328 return (!rs6000_darwin64_abi
24330 && TREE_CODE (TREE_TYPE (val
)) == VECTOR_TYPE
24331 && (funcdecl
== NULL_TREE
24332 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
24333 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
24334 ? N_("AltiVec argument passed to unprototyped function")
24338 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
24339 setup by using __stack_chk_fail_local hidden function instead of
24340 calling __stack_chk_fail directly. Otherwise it is better to call
24341 __stack_chk_fail directly. */
24343 static tree ATTRIBUTE_UNUSED
24344 rs6000_stack_protect_fail (void)
24346 return (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
24347 ? default_hidden_stack_protect_fail ()
24348 : default_external_stack_protect_fail ();
24351 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
24354 static unsigned HOST_WIDE_INT
24355 rs6000_asan_shadow_offset (void)
24357 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_64BIT
? 41 : 29);
24361 /* Mask options that we want to support inside of attribute((target)) and
24362 #pragma GCC target operations. Note, we do not include things like
24363 64/32-bit, endianness, hard/soft floating point, etc. that would have
24364 different calling sequences. */
24366 struct rs6000_opt_mask
{
24367 const char *name
; /* option name */
24368 HOST_WIDE_INT mask
; /* mask to set */
24369 bool invert
; /* invert sense of mask */
24370 bool valid_target
; /* option is a target option */
24373 static struct rs6000_opt_mask
const rs6000_opt_masks
[] =
24375 { "altivec", OPTION_MASK_ALTIVEC
, false, true },
24376 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
,
24378 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
,
24380 { "cmpb", OPTION_MASK_CMPB
, false, true },
24381 { "crypto", OPTION_MASK_CRYPTO
, false, true },
24382 { "direct-move", OPTION_MASK_DIRECT_MOVE
, false, true },
24383 { "dlmzb", OPTION_MASK_DLMZB
, false, true },
24384 { "dense-math", OPTION_MASK_DENSE_MATH
, false, true },
24385 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX
,
24387 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, true },
24388 { "float128-hardware", OPTION_MASK_FLOAT128_HW
, false, true },
24389 { "fprnd", OPTION_MASK_FPRND
, false, true },
24390 { "power10", OPTION_MASK_POWER10
, false, true },
24391 { "future", OPTION_MASK_FUTURE
, false, true },
24392 { "hard-dfp", OPTION_MASK_DFP
, false, true },
24393 { "htm", OPTION_MASK_HTM
, false, true },
24394 { "isel", OPTION_MASK_ISEL
, false, true },
24395 { "mfcrf", OPTION_MASK_MFCRF
, false, true },
24396 { "mfpgpr", 0, false, true },
24397 { "mma", OPTION_MASK_MMA
, false, true },
24398 { "modulo", OPTION_MASK_MODULO
, false, true },
24399 { "mulhw", OPTION_MASK_MULHW
, false, true },
24400 { "multiple", OPTION_MASK_MULTIPLE
, false, true },
24401 { "pcrel", OPTION_MASK_PCREL
, false, true },
24402 { "pcrel-opt", OPTION_MASK_PCREL_OPT
, false, true },
24403 { "popcntb", OPTION_MASK_POPCNTB
, false, true },
24404 { "popcntd", OPTION_MASK_POPCNTD
, false, true },
24405 { "power8-fusion", OPTION_MASK_P8_FUSION
, false, true },
24406 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN
, false, true },
24407 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, true },
24408 { "power9-minmax", OPTION_MASK_P9_MINMAX
, false, true },
24409 { "power9-misc", OPTION_MASK_P9_MISC
, false, true },
24410 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, true },
24411 { "power10-fusion", OPTION_MASK_P10_FUSION
, false, true },
24412 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT
, false, true },
24413 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT
, false, true },
24414 { "prefixed", OPTION_MASK_PREFIXED
, false, true },
24415 { "quad-memory", OPTION_MASK_QUAD_MEMORY
, false, true },
24416 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC
, false, true },
24417 { "recip-precision", OPTION_MASK_RECIP_PRECISION
, false, true },
24418 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT
, false, true },
24419 { "string", 0, false, true },
24420 { "update", OPTION_MASK_NO_UPDATE
, true , true },
24421 { "vsx", OPTION_MASK_VSX
, false, true },
24422 #ifdef OPTION_MASK_64BIT
24424 { "aix64", OPTION_MASK_64BIT
, false, false },
24425 { "aix32", OPTION_MASK_64BIT
, true, false },
24427 { "64", OPTION_MASK_64BIT
, false, false },
24428 { "32", OPTION_MASK_64BIT
, true, false },
24431 #ifdef OPTION_MASK_EABI
24432 { "eabi", OPTION_MASK_EABI
, false, false },
24434 #ifdef OPTION_MASK_LITTLE_ENDIAN
24435 { "little", OPTION_MASK_LITTLE_ENDIAN
, false, false },
24436 { "big", OPTION_MASK_LITTLE_ENDIAN
, true, false },
24438 #ifdef OPTION_MASK_RELOCATABLE
24439 { "relocatable", OPTION_MASK_RELOCATABLE
, false, false },
24441 #ifdef OPTION_MASK_STRICT_ALIGN
24442 { "strict-align", OPTION_MASK_STRICT_ALIGN
, false, false },
24444 { "soft-float", OPTION_MASK_SOFT_FLOAT
, false, false },
24445 { "string", 0, false, false },
24448 /* Option variables that we want to support inside attribute((target)) and
24449 #pragma GCC target operations. */
24451 struct rs6000_opt_var
{
24452 const char *name
; /* option name */
24453 size_t global_offset
; /* offset of the option in global_options. */
24454 size_t target_offset
; /* offset of the option in target options. */
24457 static struct rs6000_opt_var
const rs6000_opt_vars
[] =
24460 offsetof (struct gcc_options
, x_TARGET_FRIZ
),
24461 offsetof (struct cl_target_option
, x_TARGET_FRIZ
), },
24462 { "avoid-indexed-addresses",
24463 offsetof (struct gcc_options
, x_TARGET_AVOID_XFORM
),
24464 offsetof (struct cl_target_option
, x_TARGET_AVOID_XFORM
) },
24466 offsetof (struct gcc_options
, x_rs6000_default_long_calls
),
24467 offsetof (struct cl_target_option
, x_rs6000_default_long_calls
), },
24468 { "optimize-swaps",
24469 offsetof (struct gcc_options
, x_rs6000_optimize_swaps
),
24470 offsetof (struct cl_target_option
, x_rs6000_optimize_swaps
), },
24471 { "allow-movmisalign",
24472 offsetof (struct gcc_options
, x_TARGET_ALLOW_MOVMISALIGN
),
24473 offsetof (struct cl_target_option
, x_TARGET_ALLOW_MOVMISALIGN
), },
24475 offsetof (struct gcc_options
, x_TARGET_SCHED_GROUPS
),
24476 offsetof (struct cl_target_option
, x_TARGET_SCHED_GROUPS
), },
24478 offsetof (struct gcc_options
, x_TARGET_ALWAYS_HINT
),
24479 offsetof (struct cl_target_option
, x_TARGET_ALWAYS_HINT
), },
24480 { "align-branch-targets",
24481 offsetof (struct gcc_options
, x_TARGET_ALIGN_BRANCH_TARGETS
),
24482 offsetof (struct cl_target_option
, x_TARGET_ALIGN_BRANCH_TARGETS
), },
24484 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24485 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24487 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24488 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24489 { "speculate-indirect-jumps",
24490 offsetof (struct gcc_options
, x_rs6000_speculate_indirect_jumps
),
24491 offsetof (struct cl_target_option
, x_rs6000_speculate_indirect_jumps
), },
24494 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24495 parsing. Return true if there were no errors. */
24498 rs6000_inner_target_options (tree args
, bool attr_p
)
24502 if (args
== NULL_TREE
)
24505 else if (TREE_CODE (args
) == STRING_CST
)
24507 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24510 while ((q
= strtok (p
, ",")) != NULL
)
24512 bool error_p
= false;
24513 bool not_valid_p
= false;
24514 const char *cpu_opt
= NULL
;
24517 if (startswith (q
, "cpu="))
24519 int cpu_index
= rs6000_cpu_name_lookup (q
+4);
24520 if (cpu_index
>= 0)
24521 rs6000_cpu_index
= cpu_index
;
24528 else if (startswith (q
, "tune="))
24530 int tune_index
= rs6000_cpu_name_lookup (q
+5);
24531 if (tune_index
>= 0)
24532 rs6000_tune_index
= tune_index
;
24542 bool invert
= false;
24546 if (startswith (r
, "no-"))
24552 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_masks
); i
++)
24553 if (strcmp (r
, rs6000_opt_masks
[i
].name
) == 0)
24555 HOST_WIDE_INT mask
= rs6000_opt_masks
[i
].mask
;
24557 if (!rs6000_opt_masks
[i
].valid_target
)
24558 not_valid_p
= true;
24562 rs6000_isa_flags_explicit
|= mask
;
24564 /* VSX needs altivec, so -mvsx automagically sets
24565 altivec and disables -mavoid-indexed-addresses. */
24568 if (mask
== OPTION_MASK_VSX
)
24570 mask
|= OPTION_MASK_ALTIVEC
;
24571 TARGET_AVOID_XFORM
= 0;
24575 if (rs6000_opt_masks
[i
].invert
)
24579 rs6000_isa_flags
&= ~mask
;
24581 rs6000_isa_flags
|= mask
;
24586 if (error_p
&& !not_valid_p
)
24588 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_vars
); i
++)
24589 if (strcmp (r
, rs6000_opt_vars
[i
].name
) == 0)
24591 size_t j
= rs6000_opt_vars
[i
].global_offset
;
24592 *((int *) ((char *)&global_options
+ j
)) = !invert
;
24594 not_valid_p
= false;
24602 const char *eprefix
, *esuffix
;
24607 eprefix
= "__attribute__((__target__(";
24612 eprefix
= "#pragma GCC target ";
24617 error ("invalid cpu %qs for %s%qs%s", cpu_opt
, eprefix
,
24619 else if (not_valid_p
)
24620 error ("%s%qs%s is not allowed", eprefix
, q
, esuffix
);
24622 error ("%s%qs%s is invalid", eprefix
, q
, esuffix
);
24627 else if (TREE_CODE (args
) == TREE_LIST
)
24631 tree value
= TREE_VALUE (args
);
24634 bool ret2
= rs6000_inner_target_options (value
, attr_p
);
24638 args
= TREE_CHAIN (args
);
24640 while (args
!= NULL_TREE
);
24645 error ("attribute %<target%> argument not a string");
24652 /* Print out the target options as a list for -mdebug=target. */
24655 rs6000_debug_target_options (tree args
, const char *prefix
)
24657 if (args
== NULL_TREE
)
24658 fprintf (stderr
, "%s<NULL>", prefix
);
24660 else if (TREE_CODE (args
) == STRING_CST
)
24662 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24665 while ((q
= strtok (p
, ",")) != NULL
)
24668 fprintf (stderr
, "%s\"%s\"", prefix
, q
);
24673 else if (TREE_CODE (args
) == TREE_LIST
)
24677 tree value
= TREE_VALUE (args
);
24680 rs6000_debug_target_options (value
, prefix
);
24683 args
= TREE_CHAIN (args
);
24685 while (args
!= NULL_TREE
);
24689 gcc_unreachable ();
24695 /* Hook to validate attribute((target("..."))). */
24698 rs6000_valid_attribute_p (tree fndecl
,
24699 tree
ARG_UNUSED (name
),
24703 struct cl_target_option cur_target
;
24706 tree new_target
, new_optimize
;
24707 tree func_optimize
;
24709 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
24711 if (TARGET_DEBUG_TARGET
)
24713 tree tname
= DECL_NAME (fndecl
);
24714 fprintf (stderr
, "\n==================== rs6000_valid_attribute_p:\n");
24716 fprintf (stderr
, "function: %.*s\n",
24717 (int) IDENTIFIER_LENGTH (tname
),
24718 IDENTIFIER_POINTER (tname
));
24720 fprintf (stderr
, "function: unknown\n");
24722 fprintf (stderr
, "args:");
24723 rs6000_debug_target_options (args
, " ");
24724 fprintf (stderr
, "\n");
24727 fprintf (stderr
, "flags: 0x%x\n", flags
);
24729 fprintf (stderr
, "--------------------\n");
24732 /* attribute((target("default"))) does nothing, beyond
24733 affecting multi-versioning. */
24734 if (TREE_VALUE (args
)
24735 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
24736 && TREE_CHAIN (args
) == NULL_TREE
24737 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
24740 old_optimize
= build_optimization_node (&global_options
,
24741 &global_options_set
);
24742 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
24744 /* If the function changed the optimization levels as well as setting target
24745 options, start with the optimizations specified. */
24746 if (func_optimize
&& func_optimize
!= old_optimize
)
24747 cl_optimization_restore (&global_options
, &global_options_set
,
24748 TREE_OPTIMIZATION (func_optimize
));
24750 /* The target attributes may also change some optimization flags, so update
24751 the optimization options if necessary. */
24752 cl_target_option_save (&cur_target
, &global_options
, &global_options_set
);
24753 rs6000_cpu_index
= rs6000_tune_index
= -1;
24754 ret
= rs6000_inner_target_options (args
, true);
24756 /* Set up any additional state. */
24759 ret
= rs6000_option_override_internal (false);
24760 new_target
= build_target_option_node (&global_options
,
24761 &global_options_set
);
24766 new_optimize
= build_optimization_node (&global_options
,
24767 &global_options_set
);
24774 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
24776 if (old_optimize
!= new_optimize
)
24777 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
24780 cl_target_option_restore (&global_options
, &global_options_set
, &cur_target
);
24782 if (old_optimize
!= new_optimize
)
24783 cl_optimization_restore (&global_options
, &global_options_set
,
24784 TREE_OPTIMIZATION (old_optimize
));
24790 /* Hook to validate the current #pragma GCC target and set the state, and
24791 update the macros based on what was changed. If ARGS is NULL, then
24792 POP_TARGET is used to reset the options. */
24795 rs6000_pragma_target_parse (tree args
, tree pop_target
)
24797 tree prev_tree
= build_target_option_node (&global_options
,
24798 &global_options_set
);
24800 struct cl_target_option
*prev_opt
, *cur_opt
;
24801 HOST_WIDE_INT prev_flags
, cur_flags
, diff_flags
;
24803 if (TARGET_DEBUG_TARGET
)
24805 fprintf (stderr
, "\n==================== rs6000_pragma_target_parse\n");
24806 fprintf (stderr
, "args:");
24807 rs6000_debug_target_options (args
, " ");
24808 fprintf (stderr
, "\n");
24812 fprintf (stderr
, "pop_target:\n");
24813 debug_tree (pop_target
);
24816 fprintf (stderr
, "pop_target: <NULL>\n");
24818 fprintf (stderr
, "--------------------\n");
24823 cur_tree
= ((pop_target
)
24825 : target_option_default_node
);
24826 cl_target_option_restore (&global_options
, &global_options_set
,
24827 TREE_TARGET_OPTION (cur_tree
));
24831 rs6000_cpu_index
= rs6000_tune_index
= -1;
24832 if (!rs6000_inner_target_options (args
, false)
24833 || !rs6000_option_override_internal (false)
24834 || (cur_tree
= build_target_option_node (&global_options
,
24835 &global_options_set
))
24838 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
24839 fprintf (stderr
, "invalid pragma\n");
24845 target_option_current_node
= cur_tree
;
24846 rs6000_activate_target_options (target_option_current_node
);
24848 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24849 change the macros that are defined. */
24850 if (rs6000_target_modify_macros_ptr
)
24852 prev_opt
= TREE_TARGET_OPTION (prev_tree
);
24853 prev_flags
= prev_opt
->x_rs6000_isa_flags
;
24855 cur_opt
= TREE_TARGET_OPTION (cur_tree
);
24856 cur_flags
= cur_opt
->x_rs6000_isa_flags
;
24858 diff_flags
= (prev_flags
^ cur_flags
);
24860 if (diff_flags
!= 0)
24862 /* Delete old macros. */
24863 rs6000_target_modify_macros_ptr (false,
24864 prev_flags
& diff_flags
);
24866 /* Define new macros. */
24867 rs6000_target_modify_macros_ptr (true,
24868 cur_flags
& diff_flags
);
24876 /* Remember the last target of rs6000_set_current_function. */
24877 static GTY(()) tree rs6000_previous_fndecl
;
24879 /* Restore target's globals from NEW_TREE and invalidate the
24880 rs6000_previous_fndecl cache. */
24883 rs6000_activate_target_options (tree new_tree
)
24885 cl_target_option_restore (&global_options
, &global_options_set
,
24886 TREE_TARGET_OPTION (new_tree
));
24887 if (TREE_TARGET_GLOBALS (new_tree
))
24888 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
24889 else if (new_tree
== target_option_default_node
)
24890 restore_target_globals (&default_target_globals
);
24892 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
24893 rs6000_previous_fndecl
= NULL_TREE
;
24896 /* Establish appropriate back-end context for processing the function
24897 FNDECL. The argument might be NULL to indicate processing at top
24898 level, outside of any function scope. */
24900 rs6000_set_current_function (tree fndecl
)
24902 if (TARGET_DEBUG_TARGET
)
24904 fprintf (stderr
, "\n==================== rs6000_set_current_function");
24907 fprintf (stderr
, ", fndecl %s (%p)",
24908 (DECL_NAME (fndecl
)
24909 ? IDENTIFIER_POINTER (DECL_NAME (fndecl
))
24910 : "<unknown>"), (void *)fndecl
);
24912 if (rs6000_previous_fndecl
)
24913 fprintf (stderr
, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl
);
24915 fprintf (stderr
, "\n");
24918 /* Only change the context if the function changes. This hook is called
24919 several times in the course of compiling a function, and we don't want to
24920 slow things down too much or call target_reinit when it isn't safe. */
24921 if (fndecl
== rs6000_previous_fndecl
)
24925 if (rs6000_previous_fndecl
== NULL_TREE
)
24926 old_tree
= target_option_current_node
;
24927 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
))
24928 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
);
24930 old_tree
= target_option_default_node
;
24933 if (fndecl
== NULL_TREE
)
24935 if (old_tree
!= target_option_current_node
)
24936 new_tree
= target_option_current_node
;
24938 new_tree
= NULL_TREE
;
24942 new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
24943 if (new_tree
== NULL_TREE
)
24944 new_tree
= target_option_default_node
;
24947 if (TARGET_DEBUG_TARGET
)
24951 fprintf (stderr
, "\nnew fndecl target specific options:\n");
24952 debug_tree (new_tree
);
24957 fprintf (stderr
, "\nold fndecl target specific options:\n");
24958 debug_tree (old_tree
);
24961 if (old_tree
!= NULL_TREE
|| new_tree
!= NULL_TREE
)
24962 fprintf (stderr
, "--------------------\n");
24965 if (new_tree
&& old_tree
!= new_tree
)
24966 rs6000_activate_target_options (new_tree
);
24969 rs6000_previous_fndecl
= fndecl
;
24973 /* Save the current options */
24976 rs6000_function_specific_save (struct cl_target_option
*ptr
,
24977 struct gcc_options
*opts
,
24978 struct gcc_options */
* opts_set */
)
24980 ptr
->x_rs6000_isa_flags
= opts
->x_rs6000_isa_flags
;
24981 ptr
->x_rs6000_isa_flags_explicit
= opts
->x_rs6000_isa_flags_explicit
;
24984 /* Restore the current options */
24987 rs6000_function_specific_restore (struct gcc_options
*opts
,
24988 struct gcc_options */
* opts_set */
,
24989 struct cl_target_option
*ptr
)
24992 opts
->x_rs6000_isa_flags
= ptr
->x_rs6000_isa_flags
;
24993 opts
->x_rs6000_isa_flags_explicit
= ptr
->x_rs6000_isa_flags_explicit
;
24994 (void) rs6000_option_override_internal (false);
24997 /* Print the current options */
25000 rs6000_function_specific_print (FILE *file
, int indent
,
25001 struct cl_target_option
*ptr
)
25003 rs6000_print_isa_options (file
, indent
, "Isa options set",
25004 ptr
->x_rs6000_isa_flags
);
25006 rs6000_print_isa_options (file
, indent
, "Isa options explicit",
25007 ptr
->x_rs6000_isa_flags_explicit
);
25010 /* Helper function to print the current isa or misc options on a line. */
25013 rs6000_print_options_internal (FILE *file
,
25015 const char *string
,
25016 HOST_WIDE_INT flags
,
25017 const char *prefix
,
25018 const struct rs6000_opt_mask
*opts
,
25019 size_t num_elements
)
25022 size_t start_column
= 0;
25024 size_t max_column
= 120;
25025 size_t prefix_len
= strlen (prefix
);
25026 size_t comma_len
= 0;
25027 const char *comma
= "";
25030 start_column
+= fprintf (file
, "%*s", indent
, "");
25034 fprintf (stderr
, DEBUG_FMT_S
, string
, "<none>");
25038 start_column
+= fprintf (stderr
, DEBUG_FMT_WX
, string
, flags
);
25040 /* Print the various mask options. */
25041 cur_column
= start_column
;
25042 for (i
= 0; i
< num_elements
; i
++)
25044 bool invert
= opts
[i
].invert
;
25045 const char *name
= opts
[i
].name
;
25046 const char *no_str
= "";
25047 HOST_WIDE_INT mask
= opts
[i
].mask
;
25048 size_t len
= comma_len
+ prefix_len
+ strlen (name
);
25052 if ((flags
& mask
) == 0)
25055 len
+= strlen ("no-");
25063 if ((flags
& mask
) != 0)
25066 len
+= strlen ("no-");
25073 if (cur_column
> max_column
)
25075 fprintf (stderr
, ", \\\n%*s", (int)start_column
, "");
25076 cur_column
= start_column
+ len
;
25080 fprintf (file
, "%s%s%s%s", comma
, prefix
, no_str
, name
);
25082 comma_len
= strlen (", ");
25085 fputs ("\n", file
);
25088 /* Helper function to print the current isa options on a line. */
25091 rs6000_print_isa_options (FILE *file
, int indent
, const char *string
,
25092 HOST_WIDE_INT flags
)
25094 rs6000_print_options_internal (file
, indent
, string
, flags
, "-m",
25095 &rs6000_opt_masks
[0],
25096 ARRAY_SIZE (rs6000_opt_masks
));
25099 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
25100 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
25101 -mupper-regs-df, etc.).
25103 If the user used -mno-power8-vector, we need to turn off all of the implicit
25104 ISA 2.07 and 3.0 options that relate to the vector unit.
25106 If the user used -mno-power9-vector, we need to turn off all of the implicit
25107 ISA 3.0 options that relate to the vector unit.
25109 This function does not handle explicit options such as the user specifying
25110 -mdirect-move. These are handled in rs6000_option_override_internal, and
25111 the appropriate error is given if needed.
25113 We return a mask of all of the implicit options that should not be enabled
25116 static HOST_WIDE_INT
25117 rs6000_disable_incompatible_switches (void)
25119 HOST_WIDE_INT ignore_masks
= rs6000_isa_flags_explicit
;
25122 static const struct {
25123 const HOST_WIDE_INT no_flag
; /* flag explicitly turned off. */
25124 const HOST_WIDE_INT dep_flags
; /* flags that depend on this option. */
25125 const char *const name
; /* name of the switch. */
25127 { OPTION_MASK_P9_VECTOR
, OTHER_P9_VECTOR_MASKS
, "power9-vector" },
25128 { OPTION_MASK_P8_VECTOR
, OTHER_P8_VECTOR_MASKS
, "power8-vector" },
25129 { OPTION_MASK_VSX
, OTHER_VSX_VECTOR_MASKS
, "vsx" },
25130 { OPTION_MASK_ALTIVEC
, OTHER_ALTIVEC_MASKS
, "altivec" },
25133 for (i
= 0; i
< ARRAY_SIZE (flags
); i
++)
25135 HOST_WIDE_INT no_flag
= flags
[i
].no_flag
;
25137 if ((rs6000_isa_flags
& no_flag
) == 0
25138 && (rs6000_isa_flags_explicit
& no_flag
) != 0)
25140 HOST_WIDE_INT dep_flags
= flags
[i
].dep_flags
;
25141 HOST_WIDE_INT set_flags
= (rs6000_isa_flags_explicit
25147 for (j
= 0; j
< ARRAY_SIZE (rs6000_opt_masks
); j
++)
25148 if ((set_flags
& rs6000_opt_masks
[j
].mask
) != 0)
25150 set_flags
&= ~rs6000_opt_masks
[j
].mask
;
25151 error ("%<-mno-%s%> turns off %<-m%s%>",
25153 rs6000_opt_masks
[j
].name
);
25156 gcc_assert (!set_flags
);
25159 rs6000_isa_flags
&= ~dep_flags
;
25160 ignore_masks
|= no_flag
| dep_flags
;
25164 return ignore_masks
;
25168 /* Helper function for printing the function name when debugging. */
25170 static const char *
25171 get_decl_name (tree fn
)
25178 name
= DECL_NAME (fn
);
25180 return "<no-name>";
25182 return IDENTIFIER_POINTER (name
);
25185 /* Return the clone id of the target we are compiling code for in a target
25186 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
25187 the priority list for the target clones (ordered from lowest to
25191 rs6000_clone_priority (tree fndecl
)
25193 tree fn_opts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
25194 HOST_WIDE_INT isa_masks
;
25195 int ret
= CLONE_DEFAULT
;
25196 tree attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (fndecl
));
25197 const char *attrs_str
= NULL
;
25199 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
25200 attrs_str
= TREE_STRING_POINTER (attrs
);
25202 /* Return priority zero for default function. Return the ISA needed for the
25203 function if it is not the default. */
25204 if (strcmp (attrs_str
, "default") != 0)
25206 if (fn_opts
== NULL_TREE
)
25207 fn_opts
= target_option_default_node
;
25209 if (!fn_opts
|| !TREE_TARGET_OPTION (fn_opts
))
25210 isa_masks
= rs6000_isa_flags
;
25212 isa_masks
= TREE_TARGET_OPTION (fn_opts
)->x_rs6000_isa_flags
;
25214 for (ret
= CLONE_MAX
- 1; ret
!= 0; ret
--)
25215 if ((rs6000_clone_map
[ret
].isa_mask
& isa_masks
) != 0)
25219 if (TARGET_DEBUG_TARGET
)
25220 fprintf (stderr
, "rs6000_get_function_version_priority (%s) => %d\n",
25221 get_decl_name (fndecl
), ret
);
25226 /* This compares the priority of target features in function DECL1 and DECL2.
25227 It returns positive value if DECL1 is higher priority, negative value if
25228 DECL2 is higher priority and 0 if they are the same. Note, priorities are
25229 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
25232 rs6000_compare_version_priority (tree decl1
, tree decl2
)
25234 int priority1
= rs6000_clone_priority (decl1
);
25235 int priority2
= rs6000_clone_priority (decl2
);
25236 int ret
= priority1
- priority2
;
25238 if (TARGET_DEBUG_TARGET
)
25239 fprintf (stderr
, "rs6000_compare_version_priority (%s, %s) => %d\n",
25240 get_decl_name (decl1
), get_decl_name (decl2
), ret
);
25245 /* Make a dispatcher declaration for the multi-versioned function DECL.
25246 Calls to DECL function will be replaced with calls to the dispatcher
25247 by the front-end. Returns the decl of the dispatcher function. */
25250 rs6000_get_function_versions_dispatcher (void *decl
)
25252 tree fn
= (tree
) decl
;
25253 struct cgraph_node
*node
= NULL
;
25254 struct cgraph_node
*default_node
= NULL
;
25255 struct cgraph_function_version_info
*node_v
= NULL
;
25256 struct cgraph_function_version_info
*first_v
= NULL
;
25258 tree dispatch_decl
= NULL
;
25260 struct cgraph_function_version_info
*default_version_info
= NULL
;
25261 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
25263 if (TARGET_DEBUG_TARGET
)
25264 fprintf (stderr
, "rs6000_get_function_versions_dispatcher (%s)\n",
25265 get_decl_name (fn
));
25267 node
= cgraph_node::get (fn
);
25268 gcc_assert (node
!= NULL
);
25270 node_v
= node
->function_version ();
25271 gcc_assert (node_v
!= NULL
);
25273 if (node_v
->dispatcher_resolver
!= NULL
)
25274 return node_v
->dispatcher_resolver
;
25276 /* Find the default version and make it the first node. */
25278 /* Go to the beginning of the chain. */
25279 while (first_v
->prev
!= NULL
)
25280 first_v
= first_v
->prev
;
25282 default_version_info
= first_v
;
25283 while (default_version_info
!= NULL
)
25285 const tree decl2
= default_version_info
->this_node
->decl
;
25286 if (is_function_default_version (decl2
))
25288 default_version_info
= default_version_info
->next
;
25291 /* If there is no default node, just return NULL. */
25292 if (default_version_info
== NULL
)
25295 /* Make default info the first node. */
25296 if (first_v
!= default_version_info
)
25298 default_version_info
->prev
->next
= default_version_info
->next
;
25299 if (default_version_info
->next
)
25300 default_version_info
->next
->prev
= default_version_info
->prev
;
25301 first_v
->prev
= default_version_info
;
25302 default_version_info
->next
= first_v
;
25303 default_version_info
->prev
= NULL
;
25306 default_node
= default_version_info
->this_node
;
25308 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
25309 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
25310 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
25311 "exports hardware capability bits");
25314 if (targetm
.has_ifunc_p ())
25316 struct cgraph_function_version_info
*it_v
= NULL
;
25317 struct cgraph_node
*dispatcher_node
= NULL
;
25318 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
25320 /* Right now, the dispatching is done via ifunc. */
25321 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
25322 TREE_NOTHROW (dispatch_decl
) = TREE_NOTHROW (fn
);
25324 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
25325 gcc_assert (dispatcher_node
!= NULL
);
25326 dispatcher_node
->dispatcher_function
= 1;
25327 dispatcher_version_info
25328 = dispatcher_node
->insert_new_function_version ();
25329 dispatcher_version_info
->next
= default_version_info
;
25330 dispatcher_node
->definition
= 1;
25332 /* Set the dispatcher for all the versions. */
25333 it_v
= default_version_info
;
25334 while (it_v
!= NULL
)
25336 it_v
->dispatcher_resolver
= dispatch_decl
;
25342 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
25343 "multiversioning needs %<ifunc%> which is not supported "
25348 return dispatch_decl
;
25351 /* Make the resolver function decl to dispatch the versions of a multi-
25352 versioned function, DEFAULT_DECL. Create an empty basic block in the
25353 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25357 make_resolver_func (const tree default_decl
,
25358 const tree dispatch_decl
,
25359 basic_block
*empty_bb
)
25361 /* Make the resolver function static. The resolver function returns
25363 tree decl_name
= clone_function_name (default_decl
, "resolver");
25364 const char *resolver_name
= IDENTIFIER_POINTER (decl_name
);
25365 tree type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
25366 tree decl
= build_fn_decl (resolver_name
, type
);
25367 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
25369 DECL_NAME (decl
) = decl_name
;
25370 TREE_USED (decl
) = 1;
25371 DECL_ARTIFICIAL (decl
) = 1;
25372 DECL_IGNORED_P (decl
) = 0;
25373 TREE_PUBLIC (decl
) = 0;
25374 DECL_UNINLINABLE (decl
) = 1;
25376 /* Resolver is not external, body is generated. */
25377 DECL_EXTERNAL (decl
) = 0;
25378 DECL_EXTERNAL (dispatch_decl
) = 0;
25380 DECL_CONTEXT (decl
) = NULL_TREE
;
25381 DECL_INITIAL (decl
) = make_node (BLOCK
);
25382 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
25384 if (DECL_COMDAT_GROUP (default_decl
)
25385 || TREE_PUBLIC (default_decl
))
25387 /* In this case, each translation unit with a call to this
25388 versioned function will put out a resolver. Ensure it
25389 is comdat to keep just one copy. */
25390 DECL_COMDAT (decl
) = 1;
25391 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
25394 TREE_PUBLIC (dispatch_decl
) = 0;
25396 /* Build result decl and add to function_decl. */
25397 tree t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
25398 DECL_CONTEXT (t
) = decl
;
25399 DECL_ARTIFICIAL (t
) = 1;
25400 DECL_IGNORED_P (t
) = 1;
25401 DECL_RESULT (decl
) = t
;
25403 gimplify_function_tree (decl
);
25404 push_cfun (DECL_STRUCT_FUNCTION (decl
));
25405 *empty_bb
= init_lowered_empty_function (decl
, false,
25406 profile_count::uninitialized ());
25408 cgraph_node::add_new_function (decl
, true);
25409 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
25413 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25414 DECL_ATTRIBUTES (dispatch_decl
)
25415 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
25417 cgraph_node::create_same_body_alias (dispatch_decl
, decl
);
25422 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25423 return a pointer to VERSION_DECL if we are running on a machine that
25424 supports the index CLONE_ISA hardware architecture bits. This function will
25425 be called during version dispatch to decide which function version to
25426 execute. It returns the basic block at the end, to which more conditions
25430 add_condition_to_bb (tree function_decl
, tree version_decl
,
25431 int clone_isa
, basic_block new_bb
)
25433 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
25435 gcc_assert (new_bb
!= NULL
);
25436 gimple_seq gseq
= bb_seq (new_bb
);
25439 tree convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
25440 build_fold_addr_expr (version_decl
));
25441 tree result_var
= create_tmp_var (ptr_type_node
);
25442 gimple
*convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
25443 gimple
*return_stmt
= gimple_build_return (result_var
);
25445 if (clone_isa
== CLONE_DEFAULT
)
25447 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25448 gimple_seq_add_stmt (&gseq
, return_stmt
);
25449 set_bb_seq (new_bb
, gseq
);
25450 gimple_set_bb (convert_stmt
, new_bb
);
25451 gimple_set_bb (return_stmt
, new_bb
);
25456 tree bool_zero
= build_int_cst (bool_int_type_node
, 0);
25457 tree cond_var
= create_tmp_var (bool_int_type_node
);
25458 tree predicate_decl
= rs6000_builtin_decls
[(int) RS6000_BIF_CPU_SUPPORTS
];
25459 const char *arg_str
= rs6000_clone_map
[clone_isa
].name
;
25460 tree predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
25461 gimple
*call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
25462 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
25464 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
25465 gimple_set_bb (call_cond_stmt
, new_bb
);
25466 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
25468 gimple
*if_else_stmt
= gimple_build_cond (NE_EXPR
, cond_var
, bool_zero
,
25469 NULL_TREE
, NULL_TREE
);
25470 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
25471 gimple_set_bb (if_else_stmt
, new_bb
);
25472 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
25474 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25475 gimple_seq_add_stmt (&gseq
, return_stmt
);
25476 set_bb_seq (new_bb
, gseq
);
25478 basic_block bb1
= new_bb
;
25479 edge e12
= split_block (bb1
, if_else_stmt
);
25480 basic_block bb2
= e12
->dest
;
25481 e12
->flags
&= ~EDGE_FALLTHRU
;
25482 e12
->flags
|= EDGE_TRUE_VALUE
;
25484 edge e23
= split_block (bb2
, return_stmt
);
25485 gimple_set_bb (convert_stmt
, bb2
);
25486 gimple_set_bb (return_stmt
, bb2
);
25488 basic_block bb3
= e23
->dest
;
25489 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
25492 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
25498 /* This function generates the dispatch function for multi-versioned functions.
25499 DISPATCH_DECL is the function which will contain the dispatch logic.
25500 FNDECLS are the function choices for dispatch, and is a tree chain.
25501 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25502 code is generated. */
25505 dispatch_function_versions (tree dispatch_decl
,
25507 basic_block
*empty_bb
)
25511 vec
<tree
> *fndecls
;
25512 tree clones
[CLONE_MAX
];
25514 if (TARGET_DEBUG_TARGET
)
25515 fputs ("dispatch_function_versions, top\n", stderr
);
25517 gcc_assert (dispatch_decl
!= NULL
25518 && fndecls_p
!= NULL
25519 && empty_bb
!= NULL
);
25521 /* fndecls_p is actually a vector. */
25522 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
25524 /* At least one more version other than the default. */
25525 gcc_assert (fndecls
->length () >= 2);
25527 /* The first version in the vector is the default decl. */
25528 memset ((void *) clones
, '\0', sizeof (clones
));
25529 clones
[CLONE_DEFAULT
] = (*fndecls
)[0];
25531 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25532 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25533 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25534 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25535 to insert the code here to do the call. */
25537 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
25539 int priority
= rs6000_clone_priority (ele
);
25540 if (!clones
[priority
])
25541 clones
[priority
] = ele
;
25544 for (ix
= CLONE_MAX
- 1; ix
>= 0; ix
--)
25547 if (TARGET_DEBUG_TARGET
)
25548 fprintf (stderr
, "dispatch_function_versions, clone %d, %s\n",
25549 ix
, get_decl_name (clones
[ix
]));
25551 *empty_bb
= add_condition_to_bb (dispatch_decl
, clones
[ix
], ix
,
25558 /* Generate the dispatching code body to dispatch multi-versioned function
25559 DECL. The target hook is called to process the "target" attributes and
25560 provide the code to dispatch the right function at run-time. NODE points
25561 to the dispatcher decl whose body will be created. */
25564 rs6000_generate_version_dispatcher_body (void *node_p
)
25567 basic_block empty_bb
;
25568 struct cgraph_node
*node
= (cgraph_node
*) node_p
;
25569 struct cgraph_function_version_info
*ninfo
= node
->function_version ();
25571 if (ninfo
->dispatcher_resolver
)
25572 return ninfo
->dispatcher_resolver
;
25574 /* node is going to be an alias, so remove the finalized bit. */
25575 node
->definition
= false;
25577 /* The first version in the chain corresponds to the default version. */
25578 ninfo
->dispatcher_resolver
= resolver
25579 = make_resolver_func (ninfo
->next
->this_node
->decl
, node
->decl
, &empty_bb
);
25581 if (TARGET_DEBUG_TARGET
)
25582 fprintf (stderr
, "rs6000_get_function_versions_dispatcher, %s\n",
25583 get_decl_name (resolver
));
25585 push_cfun (DECL_STRUCT_FUNCTION (resolver
));
25586 auto_vec
<tree
, 2> fn_ver_vec
;
25588 for (struct cgraph_function_version_info
*vinfo
= ninfo
->next
;
25590 vinfo
= vinfo
->next
)
25592 struct cgraph_node
*version
= vinfo
->this_node
;
25593 /* Check for virtual functions here again, as by this time it should
25594 have been determined if this function needs a vtable index or
25595 not. This happens for methods in derived classes that override
25596 virtual methods in base classes but are not explicitly marked as
25598 if (DECL_VINDEX (version
->decl
))
25599 sorry ("Virtual function multiversioning not supported");
25601 fn_ver_vec
.safe_push (version
->decl
);
25604 dispatch_function_versions (resolver
, &fn_ver_vec
, &empty_bb
);
25605 cgraph_edge::rebuild_edges ();
25610 /* Hook to decide if we need to scan function gimple statements to
25611 collect target specific information for inlining, and update the
25612 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25613 to predict which ISA feature is used at this time. Return true
25614 if we need to scan, otherwise return false. */
25617 rs6000_need_ipa_fn_target_info (const_tree decl
,
25618 unsigned int &info ATTRIBUTE_UNUSED
)
25620 tree target
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
25622 target
= target_option_default_node
;
25623 struct cl_target_option
*opts
= TREE_TARGET_OPTION (target
);
25625 /* See PR102059, we only handle HTM for now, so will only do
25626 the consequent scannings when HTM feature enabled. */
25627 if (opts
->x_rs6000_isa_flags
& OPTION_MASK_HTM
)
25633 /* Hook to update target specific information INFO for inlining by
25634 checking the given STMT. Return false if we don't need to scan
25635 any more, otherwise return true. */
25638 rs6000_update_ipa_fn_target_info (unsigned int &info
, const gimple
*stmt
)
25640 /* Assume inline asm can use any instruction features. */
25641 if (gimple_code (stmt
) == GIMPLE_ASM
)
25643 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25644 the only bit we care about. */
25645 info
|= RS6000_FN_TARGET_INFO_HTM
;
25648 else if (gimple_code (stmt
) == GIMPLE_CALL
)
25650 tree fndecl
= gimple_call_fndecl (stmt
);
25651 if (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_MD
))
25653 enum rs6000_gen_builtins fcode
25654 = (enum rs6000_gen_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
25655 /* HTM bifs definitely exploit HTM insns. */
25656 if (bif_is_htm (rs6000_builtin_info
[fcode
]))
25658 info
|= RS6000_FN_TARGET_INFO_HTM
;
25667 /* Hook to determine if one function can safely inline another. */
25670 rs6000_can_inline_p (tree caller
, tree callee
)
25673 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
25674 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
25676 /* If the callee has no option attributes, then it is ok to inline. */
25682 HOST_WIDE_INT caller_isa
;
25683 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
25684 HOST_WIDE_INT callee_isa
= callee_opts
->x_rs6000_isa_flags
;
25685 HOST_WIDE_INT explicit_isa
= callee_opts
->x_rs6000_isa_flags_explicit
;
25687 /* If the caller has option attributes, then use them.
25688 Otherwise, use the command line options. */
25690 caller_isa
= TREE_TARGET_OPTION (caller_tree
)->x_rs6000_isa_flags
;
25692 caller_isa
= rs6000_isa_flags
;
25694 cgraph_node
*callee_node
= cgraph_node::get (callee
);
25695 if (ipa_fn_summaries
&& ipa_fn_summaries
->get (callee_node
) != NULL
)
25697 unsigned int info
= ipa_fn_summaries
->get (callee_node
)->target_info
;
25698 if ((info
& RS6000_FN_TARGET_INFO_HTM
) == 0)
25700 callee_isa
&= ~OPTION_MASK_HTM
;
25701 explicit_isa
&= ~OPTION_MASK_HTM
;
25705 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25707 callee_isa
&= ~(OPTION_MASK_P8_FUSION
| OPTION_MASK_P10_FUSION
);
25708 explicit_isa
&= ~(OPTION_MASK_P8_FUSION
| OPTION_MASK_P10_FUSION
);
25710 /* The callee's options must be a subset of the caller's options, i.e.
25711 a vsx function may inline an altivec function, but a no-vsx function
25712 must not inline a vsx function. However, for those options that the
25713 callee has explicitly enabled or disabled, then we must enforce that
25714 the callee's and caller's options match exactly; see PR70010. */
25715 if (((caller_isa
& callee_isa
) == callee_isa
)
25716 && (caller_isa
& explicit_isa
) == (callee_isa
& explicit_isa
))
25720 if (TARGET_DEBUG_TARGET
)
25721 fprintf (stderr
, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25722 get_decl_name (caller
), get_decl_name (callee
),
25723 (ret
? "can" : "cannot"));
25728 /* Allocate a stack temp and fixup the address so it meets the particular
25729 memory requirements (either offetable or REG+REG addressing). */
25732 rs6000_allocate_stack_temp (machine_mode mode
,
25733 bool offsettable_p
,
25736 rtx stack
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
25737 rtx addr
= XEXP (stack
, 0);
25738 int strict_p
= reload_completed
;
25740 if (!legitimate_indirect_address_p (addr
, strict_p
))
25743 && !rs6000_legitimate_offset_address_p (mode
, addr
, strict_p
, true))
25744 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25746 else if (reg_reg_p
&& !legitimate_indexed_address_p (addr
, strict_p
))
25747 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25753 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25754 convert to such a form to deal with memory reference instructions
25755 like STFIWX and LDBRX that only take reg+reg addressing. */
25758 rs6000_force_indexed_or_indirect_mem (rtx x
)
25760 machine_mode mode
= GET_MODE (x
);
25762 gcc_assert (MEM_P (x
));
25763 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x
, mode
))
25765 rtx addr
= XEXP (x
, 0);
25766 if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
25768 rtx reg
= XEXP (addr
, 0);
25769 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (x
));
25770 rtx size_rtx
= GEN_INT ((GET_CODE (addr
) == PRE_DEC
) ? -size
: size
);
25771 gcc_assert (REG_P (reg
));
25772 emit_insn (gen_add3_insn (reg
, reg
, size_rtx
));
25775 else if (GET_CODE (addr
) == PRE_MODIFY
)
25777 rtx reg
= XEXP (addr
, 0);
25778 rtx expr
= XEXP (addr
, 1);
25779 gcc_assert (REG_P (reg
));
25780 gcc_assert (GET_CODE (expr
) == PLUS
);
25781 emit_insn (gen_add3_insn (reg
, XEXP (expr
, 0), XEXP (expr
, 1)));
25785 if (GET_CODE (addr
) == PLUS
)
25787 rtx op0
= XEXP (addr
, 0);
25788 rtx op1
= XEXP (addr
, 1);
25789 op0
= force_reg (Pmode
, op0
);
25790 op1
= force_reg (Pmode
, op1
);
25791 x
= replace_equiv_address (x
, gen_rtx_PLUS (Pmode
, op0
, op1
));
25794 x
= replace_equiv_address (x
, force_reg (Pmode
, addr
));
25800 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25802 On the RS/6000, all integer constants are acceptable, most won't be valid
25803 for particular insns, though. Only easy FP constants are acceptable. */
25806 rs6000_legitimate_constant_p (machine_mode mode
, rtx x
)
25808 if (TARGET_ELF
&& tls_referenced_p (x
))
25811 if (CONST_DOUBLE_P (x
))
25812 return easy_fp_constant (x
, mode
);
25814 if (GET_CODE (x
) == CONST_VECTOR
)
25815 return easy_vector_constant (x
, mode
);
25821 /* Implement TARGET_PRECOMPUTE_TLS_P.
25823 On the AIX, TLS symbols are in the TOC, which is maintained in the
25824 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25825 must be considered legitimate constants. */
25828 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
25830 return tls_referenced_p (x
);
25835 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25838 chain_already_loaded (rtx_insn
*last
)
25840 for (; last
!= NULL
; last
= PREV_INSN (last
))
25842 if (NONJUMP_INSN_P (last
))
25844 rtx patt
= PATTERN (last
);
25846 if (GET_CODE (patt
) == SET
)
25848 rtx lhs
= XEXP (patt
, 0);
25850 if (REG_P (lhs
) && REGNO (lhs
) == STATIC_CHAIN_REGNUM
)
25858 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25861 rs6000_call_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25863 rtx func
= func_desc
;
25864 rtx toc_reg
= gen_rtx_REG (Pmode
, TOC_REGNUM
);
25865 rtx toc_load
= NULL_RTX
;
25866 rtx toc_restore
= NULL_RTX
;
25868 rtx abi_reg
= NULL_RTX
;
25872 bool is_pltseq_longcall
;
25875 tlsarg
= global_tlsarg
;
25877 /* Handle longcall attributes. */
25878 is_pltseq_longcall
= false;
25879 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25880 && GET_CODE (func_desc
) == SYMBOL_REF
)
25882 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25884 is_pltseq_longcall
= true;
25887 /* Handle indirect calls. */
25888 if (!SYMBOL_REF_P (func
)
25889 || (DEFAULT_ABI
== ABI_AIX
&& !SYMBOL_REF_FUNCTION_P (func
)))
25891 if (!rs6000_pcrel_p ())
25893 /* Save the TOC into its reserved slot before the call,
25894 and prepare to restore it after the call. */
25895 rtx stack_toc_offset
= GEN_INT (RS6000_TOC_SAVE_SLOT
);
25896 rtx stack_toc_unspec
= gen_rtx_UNSPEC (Pmode
,
25897 gen_rtvec (1, stack_toc_offset
),
25899 toc_restore
= gen_rtx_SET (toc_reg
, stack_toc_unspec
);
25901 /* Can we optimize saving the TOC in the prologue or
25902 do we need to do it at every call? */
25903 if (TARGET_SAVE_TOC_INDIRECT
&& !cfun
->calls_alloca
)
25904 cfun
->machine
->save_toc_in_prologue
= true;
25907 rtx stack_ptr
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
25908 rtx stack_toc_mem
= gen_frame_mem (Pmode
,
25909 gen_rtx_PLUS (Pmode
, stack_ptr
,
25910 stack_toc_offset
));
25911 MEM_VOLATILE_P (stack_toc_mem
) = 1;
25912 if (is_pltseq_longcall
)
25914 rtvec v
= gen_rtvec (3, toc_reg
, func_desc
, tlsarg
);
25915 rtx mark_toc_reg
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25916 emit_insn (gen_rtx_SET (stack_toc_mem
, mark_toc_reg
));
25919 emit_move_insn (stack_toc_mem
, toc_reg
);
25923 if (DEFAULT_ABI
== ABI_ELFv2
)
25925 /* A function pointer in the ELFv2 ABI is just a plain address, but
25926 the ABI requires it to be loaded into r12 before the call. */
25927 func_addr
= gen_rtx_REG (Pmode
, 12);
25928 emit_move_insn (func_addr
, func
);
25929 abi_reg
= func_addr
;
25930 /* Indirect calls via CTR are strongly preferred over indirect
25931 calls via LR, so move the address there. Needed to mark
25932 this insn for linker plt sequence editing too. */
25933 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25934 if (is_pltseq_longcall
)
25936 rtvec v
= gen_rtvec (3, abi_reg
, func_desc
, tlsarg
);
25937 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25938 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25939 v
= gen_rtvec (2, func_addr
, func_desc
);
25940 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25943 emit_move_insn (func_addr
, abi_reg
);
25947 /* A function pointer under AIX is a pointer to a data area whose
25948 first word contains the actual address of the function, whose
25949 second word contains a pointer to its TOC, and whose third word
25950 contains a value to place in the static chain register (r11).
25951 Note that if we load the static chain, our "trampoline" need
25952 not have any executable code. */
25954 /* Load up address of the actual function. */
25955 func
= force_reg (Pmode
, func
);
25956 func_addr
= gen_reg_rtx (Pmode
);
25957 emit_move_insn (func_addr
, gen_rtx_MEM (Pmode
, func
));
25959 /* Indirect calls via CTR are strongly preferred over indirect
25960 calls via LR, so move the address there. */
25961 rtx ctr_reg
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25962 emit_move_insn (ctr_reg
, func_addr
);
25963 func_addr
= ctr_reg
;
25965 /* Prepare to load the TOC of the called function. Note that the
25966 TOC load must happen immediately before the actual call so
25967 that unwinding the TOC registers works correctly. See the
25968 comment in frob_update_context. */
25969 rtx func_toc_offset
= GEN_INT (GET_MODE_SIZE (Pmode
));
25970 rtx func_toc_mem
= gen_rtx_MEM (Pmode
,
25971 gen_rtx_PLUS (Pmode
, func
,
25973 toc_load
= gen_rtx_USE (VOIDmode
, func_toc_mem
);
25975 /* If we have a static chain, load it up. But, if the call was
25976 originally direct, the 3rd word has not been written since no
25977 trampoline has been built, so we ought not to load it, lest we
25978 override a static chain value. */
25979 if (!(GET_CODE (func_desc
) == SYMBOL_REF
25980 && SYMBOL_REF_FUNCTION_P (func_desc
))
25981 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25982 && !chain_already_loaded (get_current_sequence ()->next
->last
))
25984 rtx sc_reg
= gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
25985 rtx func_sc_offset
= GEN_INT (2 * GET_MODE_SIZE (Pmode
));
25986 rtx func_sc_mem
= gen_rtx_MEM (Pmode
,
25987 gen_rtx_PLUS (Pmode
, func
,
25989 emit_move_insn (sc_reg
, func_sc_mem
);
25996 /* No TOC register needed for calls from PC-relative callers. */
25997 if (!rs6000_pcrel_p ())
25998 /* Direct calls use the TOC: for local calls, the callee will
25999 assume the TOC register is set; for non-local calls, the
26000 PLT stub needs the TOC register. */
26005 /* Create the call. */
26006 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26007 if (value
!= NULL_RTX
)
26008 call
[0] = gen_rtx_SET (value
, call
[0]);
26009 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
26013 call
[n_call
++] = toc_load
;
26015 call
[n_call
++] = toc_restore
;
26017 call
[n_call
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
26019 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n_call
, call
));
26020 insn
= emit_call_insn (insn
);
26022 /* Mention all registers defined by the ABI to hold information
26023 as uses in CALL_INSN_FUNCTION_USAGE. */
26025 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
26028 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
26031 rs6000_sibcall_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
26035 rtx r12
= NULL_RTX
;
26036 rtx func_addr
= func_desc
;
26039 tlsarg
= global_tlsarg
;
26041 /* Handle longcall attributes. */
26042 if (INTVAL (cookie
) & CALL_LONG
&& SYMBOL_REF_P (func_desc
))
26044 /* PCREL can do a sibling call to a longcall function
26045 because we don't need to restore the TOC register. */
26046 gcc_assert (rs6000_pcrel_p ());
26047 func_desc
= rs6000_longcall_ref (func_desc
, tlsarg
);
26050 gcc_assert (INTVAL (cookie
) == 0);
26052 /* For ELFv2, r12 and CTR need to hold the function address
26053 for an indirect call. */
26054 if (GET_CODE (func_desc
) != SYMBOL_REF
&& DEFAULT_ABI
== ABI_ELFv2
)
26056 r12
= gen_rtx_REG (Pmode
, 12);
26057 emit_move_insn (r12
, func_desc
);
26058 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26059 emit_move_insn (func_addr
, r12
);
26062 /* Create the call. */
26063 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26064 if (value
!= NULL_RTX
)
26065 call
[0] = gen_rtx_SET (value
, call
[0]);
26067 call
[1] = simple_return_rtx
;
26069 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (2, call
));
26070 insn
= emit_call_insn (insn
);
26072 /* Note use of the TOC register. */
26073 if (!rs6000_pcrel_p ())
26074 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
26075 gen_rtx_REG (Pmode
, TOC_REGNUM
));
26077 /* Note use of r12. */
26079 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), r12
);
26082 /* Expand code to perform a call under the SYSV4 ABI. */
26085 rs6000_call_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
26087 rtx func
= func_desc
;
26091 rtx abi_reg
= NULL_RTX
;
26095 tlsarg
= global_tlsarg
;
26097 /* Handle longcall attributes. */
26098 if ((INTVAL (cookie
) & CALL_LONG
) != 0
26099 && GET_CODE (func_desc
) == SYMBOL_REF
)
26101 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
26102 /* If the longcall was implemented as an inline PLT call using
26103 PLT unspecs then func will be REG:r11. If not, func will be
26104 a pseudo reg. The inline PLT call sequence supports lazy
26105 linking (and longcalls to functions in dlopen'd libraries).
26106 The other style of longcalls don't. The lazy linking entry
26107 to the dynamic symbol resolver requires r11 be the function
26108 address (as it is for linker generated PLT stubs). Ensure
26109 r11 stays valid to the bctrl by marking r11 used by the call. */
26114 /* Handle indirect calls. */
26115 if (GET_CODE (func
) != SYMBOL_REF
)
26117 func
= force_reg (Pmode
, func
);
26119 /* Indirect calls via CTR are strongly preferred over indirect
26120 calls via LR, so move the address there. That can't be left
26121 to reload because we want to mark every instruction in an
26122 inline PLT call sequence with a reloc, enabling the linker to
26123 edit the sequence back to a direct call when that makes sense. */
26124 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26127 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
26128 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26129 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
26130 v
= gen_rtvec (2, func_addr
, func_desc
);
26131 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26134 emit_move_insn (func_addr
, func
);
26139 /* Create the call. */
26140 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26141 if (value
!= NULL_RTX
)
26142 call
[0] = gen_rtx_SET (value
, call
[0]);
26144 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
26146 if (TARGET_SECURE_PLT
26148 && GET_CODE (func_addr
) == SYMBOL_REF
26149 && !SYMBOL_REF_LOCAL_P (func_addr
))
26150 call
[n
++] = gen_rtx_USE (VOIDmode
, pic_offset_table_rtx
);
26152 call
[n
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
26154 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n
, call
));
26155 insn
= emit_call_insn (insn
);
26157 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
26160 /* Expand code to perform a sibling call under the SysV4 ABI. */
26163 rs6000_sibcall_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
26165 rtx func
= func_desc
;
26169 rtx abi_reg
= NULL_RTX
;
26172 tlsarg
= global_tlsarg
;
26174 /* Handle longcall attributes. */
26175 if ((INTVAL (cookie
) & CALL_LONG
) != 0
26176 && GET_CODE (func_desc
) == SYMBOL_REF
)
26178 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
26179 /* If the longcall was implemented as an inline PLT call using
26180 PLT unspecs then func will be REG:r11. If not, func will be
26181 a pseudo reg. The inline PLT call sequence supports lazy
26182 linking (and longcalls to functions in dlopen'd libraries).
26183 The other style of longcalls don't. The lazy linking entry
26184 to the dynamic symbol resolver requires r11 be the function
26185 address (as it is for linker generated PLT stubs). Ensure
26186 r11 stays valid to the bctr by marking r11 used by the call. */
26191 /* Handle indirect calls. */
26192 if (GET_CODE (func
) != SYMBOL_REF
)
26194 func
= force_reg (Pmode
, func
);
26196 /* Indirect sibcalls must go via CTR. That can't be left to
26197 reload because we want to mark every instruction in an inline
26198 PLT call sequence with a reloc, enabling the linker to edit
26199 the sequence back to a direct call when that makes sense. */
26200 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26203 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
26204 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26205 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
26206 v
= gen_rtvec (2, func_addr
, func_desc
);
26207 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26210 emit_move_insn (func_addr
, func
);
26215 /* Create the call. */
26216 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26217 if (value
!= NULL_RTX
)
26218 call
[0] = gen_rtx_SET (value
, call
[0]);
26220 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
26221 call
[2] = simple_return_rtx
;
26223 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
26224 insn
= emit_call_insn (insn
);
26226 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
26231 /* Expand code to perform a call under the Darwin ABI.
26232 Modulo handling of mlongcall, this is much the same as sysv.
26233 if/when the longcall optimisation is removed, we could drop this
26234 code and use the sysv case (taking care to avoid the tls stuff).
26236 We can use this for sibcalls too, if needed. */
26239 rs6000_call_darwin_1 (rtx value
, rtx func_desc
, rtx tlsarg
,
26240 rtx cookie
, bool sibcall
)
26242 rtx func
= func_desc
;
26246 int cookie_val
= INTVAL (cookie
);
26247 bool make_island
= false;
26249 /* Handle longcall attributes, there are two cases for Darwin:
26250 1) Newer linkers are capable of synthesising any branch islands needed.
26251 2) We need a helper branch island synthesised by the compiler.
26252 The second case has mostly been retired and we don't use it for m64.
26253 In fact, it's is an optimisation, we could just indirect as sysv does..
26254 ... however, backwards compatibility for now.
26255 If we're going to use this, then we need to keep the CALL_LONG bit set,
26256 so that we can pick up the special insn form later. */
26257 if ((cookie_val
& CALL_LONG
) != 0
26258 && GET_CODE (func_desc
) == SYMBOL_REF
)
26260 /* FIXME: the longcall opt should not hang off this flag, it is most
26261 likely incorrect for kernel-mode code-generation. */
26262 if (darwin_symbol_stubs
&& TARGET_32BIT
)
26263 make_island
= true; /* Do nothing yet, retain the CALL_LONG flag. */
26266 /* The linker is capable of doing this, but the user explicitly
26267 asked for -mlongcall, so we'll do the 'normal' version. */
26268 func
= rs6000_longcall_ref (func_desc
, NULL_RTX
);
26269 cookie_val
&= ~CALL_LONG
; /* Handled, zap it. */
26273 /* Handle indirect calls. */
26274 if (GET_CODE (func
) != SYMBOL_REF
)
26276 func
= force_reg (Pmode
, func
);
26278 /* Indirect calls via CTR are strongly preferred over indirect
26279 calls via LR, and are required for indirect sibcalls, so move
26280 the address there. */
26281 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26282 emit_move_insn (func_addr
, func
);
26287 /* Create the call. */
26288 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26289 if (value
!= NULL_RTX
)
26290 call
[0] = gen_rtx_SET (value
, call
[0]);
26292 call
[1] = gen_rtx_USE (VOIDmode
, GEN_INT (cookie_val
));
26295 call
[2] = simple_return_rtx
;
26297 call
[2] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
26299 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
26300 insn
= emit_call_insn (insn
);
26301 /* Now we have the debug info in the insn, we can set up the branch island
26302 if we're using one. */
26305 tree funname
= get_identifier (XSTR (func_desc
, 0));
26307 if (no_previous_def (funname
))
26309 rtx label_rtx
= gen_label_rtx ();
26310 char *label_buf
, temp_buf
[256];
26311 ASM_GENERATE_INTERNAL_LABEL (temp_buf
, "L",
26312 CODE_LABEL_NUMBER (label_rtx
));
26313 label_buf
= temp_buf
[0] == '*' ? temp_buf
+ 1 : temp_buf
;
26314 tree labelname
= get_identifier (label_buf
);
26315 add_compiler_branch_island (labelname
, funname
,
26316 insn_line ((const rtx_insn
*)insn
));
26323 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
26324 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
26327 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, false);
26335 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
26336 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
26339 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, true);
26345 /* Return whether we should generate PC-relative code for FNDECL. */
26347 rs6000_fndecl_pcrel_p (const_tree fndecl
)
26349 if (DEFAULT_ABI
!= ABI_ELFv2
)
26352 struct cl_target_option
*opts
= target_opts_for_fn (fndecl
);
26354 return ((opts
->x_rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26355 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26358 /* Return whether we should generate PC-relative code for *FN. */
26360 rs6000_function_pcrel_p (struct function
*fn
)
26362 if (DEFAULT_ABI
!= ABI_ELFv2
)
26365 /* Optimize usual case. */
26367 return ((rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26368 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26370 return rs6000_fndecl_pcrel_p (fn
->decl
);
26373 /* Return whether we should generate PC-relative code for the current
26378 return (DEFAULT_ABI
== ABI_ELFv2
26379 && (rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26380 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26384 /* Given an address (ADDR), a mode (MODE), and what the format of the
26385 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26386 for the address. */
26389 address_to_insn_form (rtx addr
,
26391 enum non_prefixed_form non_prefixed_format
)
26393 /* Single register is easy. */
26394 if (REG_P (addr
) || SUBREG_P (addr
))
26395 return INSN_FORM_BASE_REG
;
26397 /* If the non prefixed instruction format doesn't support offset addressing,
26398 make sure only indexed addressing is allowed.
26400 We special case SDmode so that the register allocator does not try to move
26401 SDmode through GPR registers, but instead uses the 32-bit integer load and
26402 store instructions for the floating point registers. */
26403 if (non_prefixed_format
== NON_PREFIXED_X
|| (mode
== SDmode
&& TARGET_DFP
))
26405 if (GET_CODE (addr
) != PLUS
)
26406 return INSN_FORM_BAD
;
26408 rtx op0
= XEXP (addr
, 0);
26409 rtx op1
= XEXP (addr
, 1);
26410 if (!REG_P (op0
) && !SUBREG_P (op0
))
26411 return INSN_FORM_BAD
;
26413 if (!REG_P (op1
) && !SUBREG_P (op1
))
26414 return INSN_FORM_BAD
;
26416 return INSN_FORM_X
;
26419 /* Deal with update forms. */
26420 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
26421 return INSN_FORM_UPDATE
;
26423 /* Handle PC-relative symbols and labels. Check for both local and
26424 external symbols. Assume labels are always local. TLS symbols
26425 are not PC-relative for rs6000. */
26428 if (LABEL_REF_P (addr
))
26429 return INSN_FORM_PCREL_LOCAL
;
26431 if (SYMBOL_REF_P (addr
) && !SYMBOL_REF_TLS_MODEL (addr
))
26433 if (!SYMBOL_REF_LOCAL_P (addr
))
26434 return INSN_FORM_PCREL_EXTERNAL
;
26436 return INSN_FORM_PCREL_LOCAL
;
26440 if (GET_CODE (addr
) == CONST
)
26441 addr
= XEXP (addr
, 0);
26443 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26444 if (GET_CODE (addr
) == LO_SUM
)
26445 return INSN_FORM_LO_SUM
;
26447 /* Everything below must be an offset address of some form. */
26448 if (GET_CODE (addr
) != PLUS
)
26449 return INSN_FORM_BAD
;
26451 rtx op0
= XEXP (addr
, 0);
26452 rtx op1
= XEXP (addr
, 1);
26454 /* Check for indexed addresses. */
26455 if (REG_P (op1
) || SUBREG_P (op1
))
26457 if (REG_P (op0
) || SUBREG_P (op0
))
26458 return INSN_FORM_X
;
26460 return INSN_FORM_BAD
;
26463 if (!CONST_INT_P (op1
))
26464 return INSN_FORM_BAD
;
26466 HOST_WIDE_INT offset
= INTVAL (op1
);
26467 if (!SIGNED_INTEGER_34BIT_P (offset
))
26468 return INSN_FORM_BAD
;
26470 /* Check for local and external PC-relative addresses. Labels are always
26471 local. TLS symbols are not PC-relative for rs6000. */
26474 if (LABEL_REF_P (op0
))
26475 return INSN_FORM_PCREL_LOCAL
;
26477 if (SYMBOL_REF_P (op0
) && !SYMBOL_REF_TLS_MODEL (op0
))
26479 if (!SYMBOL_REF_LOCAL_P (op0
))
26480 return INSN_FORM_PCREL_EXTERNAL
;
26482 return INSN_FORM_PCREL_LOCAL
;
26486 /* If it isn't PC-relative, the address must use a base register. */
26487 if (!REG_P (op0
) && !SUBREG_P (op0
))
26488 return INSN_FORM_BAD
;
26490 /* Large offsets must be prefixed. */
26491 if (!SIGNED_INTEGER_16BIT_P (offset
))
26493 if (TARGET_PREFIXED
)
26494 return INSN_FORM_PREFIXED_NUMERIC
;
26496 return INSN_FORM_BAD
;
26499 /* We have a 16-bit offset, see what default instruction format to use. */
26500 if (non_prefixed_format
== NON_PREFIXED_DEFAULT
)
26502 unsigned size
= GET_MODE_SIZE (mode
);
26504 /* On 64-bit systems, assume 64-bit integers need to use DS form
26505 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26506 (for LXV and STXV). TImode is problematical in that its normal usage
26507 is expected to be GPRs where it wants a DS instruction format, but if
26508 it goes into the vector registers, it wants a DQ instruction
26510 if (TARGET_POWERPC64
&& size
>= 8 && GET_MODE_CLASS (mode
) == MODE_INT
)
26511 non_prefixed_format
= NON_PREFIXED_DS
;
26513 else if (TARGET_VSX
&& size
>= 16
26514 && (VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
)))
26515 non_prefixed_format
= NON_PREFIXED_DQ
;
26518 non_prefixed_format
= NON_PREFIXED_D
;
26521 /* Classify the D/DS/DQ-form addresses. */
26522 switch (non_prefixed_format
)
26524 /* Instruction format D, all 16 bits are valid. */
26525 case NON_PREFIXED_D
:
26526 return INSN_FORM_D
;
26528 /* Instruction format DS, bottom 2 bits must be 0. */
26529 case NON_PREFIXED_DS
:
26530 if ((offset
& 3) == 0)
26531 return INSN_FORM_DS
;
26533 else if (TARGET_PREFIXED
)
26534 return INSN_FORM_PREFIXED_NUMERIC
;
26537 return INSN_FORM_BAD
;
26539 /* Instruction format DQ, bottom 4 bits must be 0. */
26540 case NON_PREFIXED_DQ
:
26541 if ((offset
& 15) == 0)
26542 return INSN_FORM_DQ
;
26544 else if (TARGET_PREFIXED
)
26545 return INSN_FORM_PREFIXED_NUMERIC
;
26548 return INSN_FORM_BAD
;
26554 return INSN_FORM_BAD
;
26557 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26558 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26559 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26560 a D-form or DS-form instruction. X-form and base_reg are always
26563 address_is_non_pfx_d_or_x (rtx addr
, machine_mode mode
,
26564 enum non_prefixed_form non_prefixed_format
)
26566 enum insn_form result_form
;
26568 result_form
= address_to_insn_form (addr
, mode
, non_prefixed_format
);
26570 switch (non_prefixed_format
)
26572 case NON_PREFIXED_D
:
26573 switch (result_form
)
26578 case INSN_FORM_BASE_REG
:
26584 case NON_PREFIXED_DS
:
26585 switch (result_form
)
26589 case INSN_FORM_BASE_REG
:
26601 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26602 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26603 the load or store with the PCREL_OPT optimization to make sure it is an
26604 instruction that can be optimized.
26606 We need to specify the MODE separately from the REG to allow for loads that
26607 include zero/sign/float extension. */
26610 pcrel_opt_valid_mem_p (rtx reg
, machine_mode mode
, rtx mem
)
26612 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26613 PCREL_OPT optimization. */
26614 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mode
);
26615 if (non_prefixed
== NON_PREFIXED_X
)
26618 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26619 rtx addr
= XEXP (mem
, 0);
26620 enum insn_form iform
= address_to_insn_form (addr
, mode
, non_prefixed
);
26621 return (iform
== INSN_FORM_BASE_REG
26622 || iform
== INSN_FORM_D
26623 || iform
== INSN_FORM_DS
26624 || iform
== INSN_FORM_DQ
);
26627 /* Helper function to see if we're potentially looking at lfs/stfs.
26628 - PARALLEL containing a SET and a CLOBBER
26630 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26631 - CLOBBER is a V4SF
26633 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26638 is_lfs_stfs_insn (rtx_insn
*insn
)
26640 rtx pattern
= PATTERN (insn
);
26641 if (GET_CODE (pattern
) != PARALLEL
)
26644 /* This should be a parallel with exactly one set and one clobber. */
26645 if (XVECLEN (pattern
, 0) != 2)
26648 rtx set
= XVECEXP (pattern
, 0, 0);
26649 if (GET_CODE (set
) != SET
)
26652 rtx clobber
= XVECEXP (pattern
, 0, 1);
26653 if (GET_CODE (clobber
) != CLOBBER
)
26656 /* All we care is that the destination of the SET is a mem:SI,
26657 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26658 should be a scratch:V4SF. */
26660 rtx dest
= SET_DEST (set
);
26661 rtx src
= SET_SRC (set
);
26662 rtx scratch
= SET_DEST (clobber
);
26664 if (GET_CODE (src
) != UNSPEC
)
26668 if (XINT (src
, 1) == UNSPEC_SI_FROM_SF
26669 && GET_CODE (dest
) == MEM
&& GET_MODE (dest
) == SImode
26670 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == V4SFmode
)
26674 if (XINT (src
, 1) == UNSPEC_SF_FROM_SI
26675 && GET_CODE (dest
) == REG
&& GET_MODE (dest
) == SFmode
26676 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == DImode
)
26682 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26683 instruction format (D/DS/DQ) used for offset memory. */
26685 enum non_prefixed_form
26686 reg_to_non_prefixed (rtx reg
, machine_mode mode
)
26688 /* If it isn't a register, use the defaults. */
26689 if (!REG_P (reg
) && !SUBREG_P (reg
))
26690 return NON_PREFIXED_DEFAULT
;
26692 unsigned int r
= reg_or_subregno (reg
);
26694 /* If we have a pseudo, use the default instruction format. */
26695 if (!HARD_REGISTER_NUM_P (r
))
26696 return NON_PREFIXED_DEFAULT
;
26698 unsigned size
= GET_MODE_SIZE (mode
);
26700 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26701 128-bit floating point, and 128-bit integers. Before power9, only indexed
26702 addressing was available for vectors. */
26703 if (FP_REGNO_P (r
))
26705 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26706 return NON_PREFIXED_D
;
26709 return NON_PREFIXED_X
;
26711 else if (TARGET_VSX
&& size
>= 16
26712 && (VECTOR_MODE_P (mode
)
26713 || VECTOR_ALIGNMENT_P (mode
)
26714 || mode
== TImode
|| mode
== CTImode
))
26715 return (TARGET_P9_VECTOR
) ? NON_PREFIXED_DQ
: NON_PREFIXED_X
;
26718 return NON_PREFIXED_DEFAULT
;
26721 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26722 128-bit floating point, and 128-bit integers. Before power9, only indexed
26723 addressing was available. */
26724 else if (ALTIVEC_REGNO_P (r
))
26726 if (!TARGET_P9_VECTOR
)
26727 return NON_PREFIXED_X
;
26729 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26730 return NON_PREFIXED_DS
;
26733 return NON_PREFIXED_X
;
26735 else if (TARGET_VSX
&& size
>= 16
26736 && (VECTOR_MODE_P (mode
)
26737 || VECTOR_ALIGNMENT_P (mode
)
26738 || mode
== TImode
|| mode
== CTImode
))
26739 return NON_PREFIXED_DQ
;
26742 return NON_PREFIXED_DEFAULT
;
26745 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26746 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26747 through the GPR registers for memory operations. */
26748 else if (TARGET_POWERPC64
&& size
>= 8)
26749 return NON_PREFIXED_DS
;
26751 return NON_PREFIXED_D
;
26755 /* Whether a load instruction is a prefixed instruction. This is called from
26756 the prefixed attribute processing. */
26759 prefixed_load_p (rtx_insn
*insn
)
26761 /* Validate the insn to make sure it is a normal load insn. */
26762 extract_insn_cached (insn
);
26763 if (recog_data
.n_operands
< 2)
26766 rtx reg
= recog_data
.operand
[0];
26767 rtx mem
= recog_data
.operand
[1];
26769 if (!REG_P (reg
) && !SUBREG_P (reg
))
26775 /* Prefixed load instructions do not support update or indexed forms. */
26776 if (get_attr_indexed (insn
) == INDEXED_YES
26777 || get_attr_update (insn
) == UPDATE_YES
)
26780 /* LWA uses the DS format instead of the D format that LWZ uses. */
26781 enum non_prefixed_form non_prefixed
;
26782 machine_mode reg_mode
= GET_MODE (reg
);
26783 machine_mode mem_mode
= GET_MODE (mem
);
26785 if (mem_mode
== SImode
&& reg_mode
== DImode
26786 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
26787 non_prefixed
= NON_PREFIXED_DS
;
26790 non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26792 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26793 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, NON_PREFIXED_DEFAULT
);
26795 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, non_prefixed
);
26798 /* Whether a store instruction is a prefixed instruction. This is called from
26799 the prefixed attribute processing. */
26802 prefixed_store_p (rtx_insn
*insn
)
26804 /* Validate the insn to make sure it is a normal store insn. */
26805 extract_insn_cached (insn
);
26806 if (recog_data
.n_operands
< 2)
26809 rtx mem
= recog_data
.operand
[0];
26810 rtx reg
= recog_data
.operand
[1];
26812 if (!REG_P (reg
) && !SUBREG_P (reg
))
26818 /* Prefixed store instructions do not support update or indexed forms. */
26819 if (get_attr_indexed (insn
) == INDEXED_YES
26820 || get_attr_update (insn
) == UPDATE_YES
)
26823 machine_mode mem_mode
= GET_MODE (mem
);
26824 rtx addr
= XEXP (mem
, 0);
26825 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26827 /* Need to make sure we aren't looking at a stfs which doesn't look
26828 like the other things reg_to_non_prefixed/address_is_prefixed
26830 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26831 return address_is_prefixed (addr
, mem_mode
, NON_PREFIXED_DEFAULT
);
26833 return address_is_prefixed (addr
, mem_mode
, non_prefixed
);
26836 /* Whether a load immediate or add instruction is a prefixed instruction. This
26837 is called from the prefixed attribute processing. */
26840 prefixed_paddi_p (rtx_insn
*insn
)
26842 rtx set
= single_set (insn
);
26846 rtx dest
= SET_DEST (set
);
26847 rtx src
= SET_SRC (set
);
26849 if (!REG_P (dest
) && !SUBREG_P (dest
))
26852 /* Is this a load immediate that can't be done with a simple ADDI or
26854 if (CONST_INT_P (src
))
26855 return (satisfies_constraint_eI (src
)
26856 && !satisfies_constraint_I (src
)
26857 && !satisfies_constraint_L (src
));
26859 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26861 if (GET_CODE (src
) == PLUS
)
26863 rtx op1
= XEXP (src
, 1);
26865 return (CONST_INT_P (op1
)
26866 && satisfies_constraint_eI (op1
)
26867 && !satisfies_constraint_I (op1
)
26868 && !satisfies_constraint_L (op1
));
26871 /* If not, is it a load of a PC-relative address? */
26872 if (!TARGET_PCREL
|| GET_MODE (dest
) != Pmode
)
26875 if (!SYMBOL_REF_P (src
) && !LABEL_REF_P (src
) && GET_CODE (src
) != CONST
)
26878 enum insn_form iform
= address_to_insn_form (src
, Pmode
,
26879 NON_PREFIXED_DEFAULT
);
26881 return (iform
== INSN_FORM_PCREL_EXTERNAL
|| iform
== INSN_FORM_PCREL_LOCAL
);
26884 /* Whether the next instruction needs a 'p' prefix issued before the
26885 instruction is printed out. */
26886 static bool prepend_p_to_next_insn
;
26888 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26889 outputting the assembler code. On the PowerPC, we remember if the current
26890 insn is a prefixed insn where we need to emit a 'p' before the insn.
26892 In addition, if the insn is part of a PC-relative reference to an external
26893 label optimization, this is recorded also. */
26895 rs6000_final_prescan_insn (rtx_insn
*insn
, rtx
[], int)
26897 prepend_p_to_next_insn
= (get_attr_maybe_prefixed (insn
)
26898 == MAYBE_PREFIXED_YES
26899 && get_attr_prefixed (insn
) == PREFIXED_YES
);
26903 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26904 We use it to emit a 'p' for prefixed insns that is set in
26905 FINAL_PRESCAN_INSN. */
26907 rs6000_asm_output_opcode (FILE *stream
)
26909 if (prepend_p_to_next_insn
)
26911 fprintf (stream
, "p");
26913 /* Reset the flag in the case where there are separate insn lines in the
26914 sequence, so the 'p' is only emitted for the first line. This shows up
26915 when we are doing the PCREL_OPT optimization, in that the label created
26916 with %r<n> would have a leading 'p' printed. */
26917 prepend_p_to_next_insn
= false;
26923 /* Emit the relocation to tie the next instruction to a previous instruction
26924 that loads up an external address. This is used to do the PCREL_OPT
26925 optimization. Note, the label is generated after the PLD of the got
26926 pc-relative address to allow for the assembler to insert NOPs before the PLD
26927 instruction. The operand is a constant integer that is the label
26931 output_pcrel_opt_reloc (rtx label_num
)
26933 rtx operands
[1] = { label_num
};
26934 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26938 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26939 should be adjusted to reflect any required changes. This macro is used when
26940 there is some systematic length adjustment required that would be difficult
26941 to express in the length attribute.
26943 In the PowerPC, we use this to adjust the length of an instruction if one or
26944 more prefixed instructions are generated, using the attribute
26945 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26946 hardware requires that a prefied instruciton does not cross a 64-byte
26947 boundary. This means the compiler has to assume the length of the first
26948 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26949 already set for the non-prefixed instruction, we just need to udpate for the
26953 rs6000_adjust_insn_length (rtx_insn
*insn
, int length
)
26955 if (TARGET_PREFIXED
&& NONJUMP_INSN_P (insn
))
26957 rtx pattern
= PATTERN (insn
);
26958 if (GET_CODE (pattern
) != USE
&& GET_CODE (pattern
) != CLOBBER
26959 && get_attr_prefixed (insn
) == PREFIXED_YES
)
26961 int num_prefixed
= get_attr_max_prefixed_insns (insn
);
26962 length
+= 4 * (num_prefixed
+ 1);
26970 #ifdef HAVE_GAS_HIDDEN
26971 # define USE_HIDDEN_LINKONCE 1
26973 # define USE_HIDDEN_LINKONCE 0
26976 /* Fills in the label name that should be used for a 476 link stack thunk. */
26979 get_ppc476_thunk_name (char name
[32])
26981 gcc_assert (TARGET_LINK_STACK
);
26983 if (USE_HIDDEN_LINKONCE
)
26984 sprintf (name
, "__ppc476.get_thunk");
26986 ASM_GENERATE_INTERNAL_LABEL (name
, "LPPC476_", 0);
26989 /* This function emits the simple thunk routine that is used to preserve
26990 the link stack on the 476 cpu. */
26992 static void rs6000_code_end (void) ATTRIBUTE_UNUSED
;
26994 rs6000_code_end (void)
26999 if (!TARGET_LINK_STACK
)
27002 get_ppc476_thunk_name (name
);
27004 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
, get_identifier (name
),
27005 build_function_type_list (void_type_node
, NULL_TREE
));
27006 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
27007 NULL_TREE
, void_type_node
);
27008 TREE_PUBLIC (decl
) = 1;
27009 TREE_STATIC (decl
) = 1;
27012 if (USE_HIDDEN_LINKONCE
&& !TARGET_XCOFF
)
27014 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
27015 targetm
.asm_out
.unique_section (decl
, 0);
27016 switch_to_section (get_named_section (decl
, NULL
, 0));
27017 DECL_WEAK (decl
) = 1;
27018 ASM_WEAKEN_DECL (asm_out_file
, decl
, name
, 0);
27019 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
27020 targetm
.asm_out
.assemble_visibility (decl
, VISIBILITY_HIDDEN
);
27021 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
27026 switch_to_section (text_section
);
27027 ASM_OUTPUT_LABEL (asm_out_file
, name
);
27030 DECL_INITIAL (decl
) = make_node (BLOCK
);
27031 current_function_decl
= decl
;
27032 allocate_struct_function (decl
, false);
27033 init_function_start (decl
);
27034 first_function_block_is_cold
= false;
27035 /* Make sure unwind info is emitted for the thunk if needed. */
27036 final_start_function (emit_barrier (), asm_out_file
, 1);
27038 fputs ("\tblr\n", asm_out_file
);
27040 final_end_function ();
27041 init_insn_lengths ();
27042 free_after_compilation (cfun
);
27044 current_function_decl
= NULL
;
27047 /* Add r30 to hard reg set if the prologue sets it up and it is not
27048 pic_offset_table_rtx. */
27051 rs6000_set_up_by_prologue (struct hard_reg_set_container
*set
)
27053 if (!TARGET_SINGLE_PIC_BASE
27055 && TARGET_MINIMAL_TOC
27056 && !constant_pool_empty_p ())
27057 add_to_hard_reg_set (&set
->set
, Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
27058 if (cfun
->machine
->split_stack_argp_used
)
27059 add_to_hard_reg_set (&set
->set
, Pmode
, 12);
27061 /* Make sure the hard reg set doesn't include r2, which was possibly added
27062 via PIC_OFFSET_TABLE_REGNUM. */
27064 remove_from_hard_reg_set (&set
->set
, Pmode
, TOC_REGNUM
);
27068 /* Helper function for rs6000_split_logical to emit a logical instruction after
27069 spliting the operation to single GPR registers.
27071 DEST is the destination register.
27072 OP1 and OP2 are the input source registers.
27073 CODE is the base operation (AND, IOR, XOR, NOT).
27074 MODE is the machine mode.
27075 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27076 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27077 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27080 rs6000_split_logical_inner (rtx dest
,
27083 enum rtx_code code
,
27085 bool complement_final_p
,
27086 bool complement_op1_p
,
27087 bool complement_op2_p
)
27091 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
27092 if (op2
&& CONST_INT_P (op2
)
27093 && (mode
== SImode
|| (mode
== DImode
&& TARGET_POWERPC64
))
27094 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
27096 HOST_WIDE_INT mask
= GET_MODE_MASK (mode
);
27097 HOST_WIDE_INT value
= INTVAL (op2
) & mask
;
27099 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
27104 emit_insn (gen_rtx_SET (dest
, const0_rtx
));
27108 else if (value
== mask
)
27110 if (!rtx_equal_p (dest
, op1
))
27111 emit_insn (gen_rtx_SET (dest
, op1
));
27116 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
27117 into separate ORI/ORIS or XORI/XORIS instrucitons. */
27118 else if (code
== IOR
|| code
== XOR
)
27122 if (!rtx_equal_p (dest
, op1
))
27123 emit_insn (gen_rtx_SET (dest
, op1
));
27129 if (code
== AND
&& mode
== SImode
27130 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
27132 emit_insn (gen_andsi3 (dest
, op1
, op2
));
27136 if (complement_op1_p
)
27137 op1
= gen_rtx_NOT (mode
, op1
);
27139 if (complement_op2_p
)
27140 op2
= gen_rtx_NOT (mode
, op2
);
27142 /* For canonical RTL, if only one arm is inverted it is the first. */
27143 if (!complement_op1_p
&& complement_op2_p
)
27144 std::swap (op1
, op2
);
27146 bool_rtx
= ((code
== NOT
)
27147 ? gen_rtx_NOT (mode
, op1
)
27148 : gen_rtx_fmt_ee (code
, mode
, op1
, op2
));
27150 if (complement_final_p
)
27151 bool_rtx
= gen_rtx_NOT (mode
, bool_rtx
);
27153 emit_insn (gen_rtx_SET (dest
, bool_rtx
));
27156 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
27157 operations are split immediately during RTL generation to allow for more
27158 optimizations of the AND/IOR/XOR.
27160 OPERANDS is an array containing the destination and two input operands.
27161 CODE is the base operation (AND, IOR, XOR, NOT).
27162 MODE is the machine mode.
27163 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27164 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27165 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
27166 CLOBBER_REG is either NULL or a scratch register of type CC to allow
27167 formation of the AND instructions. */
27170 rs6000_split_logical_di (rtx operands
[3],
27171 enum rtx_code code
,
27172 bool complement_final_p
,
27173 bool complement_op1_p
,
27174 bool complement_op2_p
)
27176 const HOST_WIDE_INT lower_32bits
= HOST_WIDE_INT_C(0xffffffff);
27177 const HOST_WIDE_INT upper_32bits
= ~ lower_32bits
;
27178 const HOST_WIDE_INT sign_bit
= HOST_WIDE_INT_C(0x80000000);
27179 enum hi_lo
{ hi
= 0, lo
= 1 };
27180 rtx op0_hi_lo
[2], op1_hi_lo
[2], op2_hi_lo
[2];
27183 op0_hi_lo
[hi
] = gen_highpart (SImode
, operands
[0]);
27184 op1_hi_lo
[hi
] = gen_highpart (SImode
, operands
[1]);
27185 op0_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[0]);
27186 op1_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[1]);
27189 op2_hi_lo
[hi
] = op2_hi_lo
[lo
] = NULL_RTX
;
27192 if (!CONST_INT_P (operands
[2]))
27194 op2_hi_lo
[hi
] = gen_highpart_mode (SImode
, DImode
, operands
[2]);
27195 op2_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[2]);
27199 HOST_WIDE_INT value
= INTVAL (operands
[2]);
27200 HOST_WIDE_INT value_hi_lo
[2];
27202 gcc_assert (!complement_final_p
);
27203 gcc_assert (!complement_op1_p
);
27204 gcc_assert (!complement_op2_p
);
27206 value_hi_lo
[hi
] = value
>> 32;
27207 value_hi_lo
[lo
] = value
& lower_32bits
;
27209 for (i
= 0; i
< 2; i
++)
27211 HOST_WIDE_INT sub_value
= value_hi_lo
[i
];
27213 if (sub_value
& sign_bit
)
27214 sub_value
|= upper_32bits
;
27216 op2_hi_lo
[i
] = GEN_INT (sub_value
);
27218 /* If this is an AND instruction, check to see if we need to load
27219 the value in a register. */
27220 if (code
== AND
&& sub_value
!= -1 && sub_value
!= 0
27221 && !and_operand (op2_hi_lo
[i
], SImode
))
27222 op2_hi_lo
[i
] = force_reg (SImode
, op2_hi_lo
[i
]);
27227 for (i
= 0; i
< 2; i
++)
27229 /* Split large IOR/XOR operations. */
27230 if ((code
== IOR
|| code
== XOR
)
27231 && CONST_INT_P (op2_hi_lo
[i
])
27232 && !complement_final_p
27233 && !complement_op1_p
27234 && !complement_op2_p
27235 && !logical_const_operand (op2_hi_lo
[i
], SImode
))
27237 HOST_WIDE_INT value
= INTVAL (op2_hi_lo
[i
]);
27238 HOST_WIDE_INT hi_16bits
= value
& HOST_WIDE_INT_C(0xffff0000);
27239 HOST_WIDE_INT lo_16bits
= value
& HOST_WIDE_INT_C(0x0000ffff);
27240 rtx tmp
= gen_reg_rtx (SImode
);
27242 /* Make sure the constant is sign extended. */
27243 if ((hi_16bits
& sign_bit
) != 0)
27244 hi_16bits
|= upper_32bits
;
27246 rs6000_split_logical_inner (tmp
, op1_hi_lo
[i
], GEN_INT (hi_16bits
),
27247 code
, SImode
, false, false, false);
27249 rs6000_split_logical_inner (op0_hi_lo
[i
], tmp
, GEN_INT (lo_16bits
),
27250 code
, SImode
, false, false, false);
27253 rs6000_split_logical_inner (op0_hi_lo
[i
], op1_hi_lo
[i
], op2_hi_lo
[i
],
27254 code
, SImode
, complement_final_p
,
27255 complement_op1_p
, complement_op2_p
);
27261 /* Split the insns that make up boolean operations operating on multiple GPR
27262 registers. The boolean MD patterns ensure that the inputs either are
27263 exactly the same as the output registers, or there is no overlap.
27265 OPERANDS is an array containing the destination and two input operands.
27266 CODE is the base operation (AND, IOR, XOR, NOT).
27267 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27268 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27269 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27272 rs6000_split_logical (rtx operands
[3],
27273 enum rtx_code code
,
27274 bool complement_final_p
,
27275 bool complement_op1_p
,
27276 bool complement_op2_p
)
27278 machine_mode mode
= GET_MODE (operands
[0]);
27279 machine_mode sub_mode
;
27281 int sub_size
, regno0
, regno1
, nregs
, i
;
27283 /* If this is DImode, use the specialized version that can run before
27284 register allocation. */
27285 if (mode
== DImode
&& !TARGET_POWERPC64
)
27287 rs6000_split_logical_di (operands
, code
, complement_final_p
,
27288 complement_op1_p
, complement_op2_p
);
27294 op2
= (code
== NOT
) ? NULL_RTX
: operands
[2];
27295 sub_mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
27296 sub_size
= GET_MODE_SIZE (sub_mode
);
27297 regno0
= REGNO (op0
);
27298 regno1
= REGNO (op1
);
27300 gcc_assert (reload_completed
);
27301 gcc_assert (IN_RANGE (regno0
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27302 gcc_assert (IN_RANGE (regno1
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27304 nregs
= rs6000_hard_regno_nregs
[(int)mode
][regno0
];
27305 gcc_assert (nregs
> 1);
27307 if (op2
&& REG_P (op2
))
27308 gcc_assert (IN_RANGE (REGNO (op2
), FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27310 for (i
= 0; i
< nregs
; i
++)
27312 int offset
= i
* sub_size
;
27313 rtx sub_op0
= simplify_subreg (sub_mode
, op0
, mode
, offset
);
27314 rtx sub_op1
= simplify_subreg (sub_mode
, op1
, mode
, offset
);
27315 rtx sub_op2
= ((code
== NOT
)
27317 : simplify_subreg (sub_mode
, op2
, mode
, offset
));
27319 rs6000_split_logical_inner (sub_op0
, sub_op1
, sub_op2
, code
, sub_mode
,
27320 complement_final_p
, complement_op1_p
,
27327 /* Emit instructions to move SRC to DST. Called by splitters for
27328 multi-register moves. It will emit at most one instruction for
27329 each register that is accessed; that is, it won't emit li/lis pairs
27330 (or equivalent for 64-bit code). One of SRC or DST must be a hard
27334 rs6000_split_multireg_move (rtx dst
, rtx src
)
27336 /* The register number of the first register being moved. */
27338 /* The mode that is to be moved. */
27340 /* The mode that the move is being done in, and its size. */
27341 machine_mode reg_mode
;
27343 /* The number of registers that will be moved. */
27346 reg
= REG_P (dst
) ? REGNO (dst
) : REGNO (src
);
27347 mode
= GET_MODE (dst
);
27348 nregs
= hard_regno_nregs (reg
, mode
);
27350 /* If we have a vector quad register for MMA or DMR register for dense math,
27351 and this is a load or store, see if we can use vector paired
27353 if ((mode
== XOmode
|| mode
== TDOmode
) && TARGET_MMA
27354 && (MEM_P (dst
) || MEM_P (src
)))
27359 /* If we have a vector pair/quad mode, split it into two/four separate
27361 else if (mode
== OOmode
|| mode
== XOmode
|| mode
== TDOmode
)
27362 reg_mode
= V1TImode
;
27363 else if (FP_REGNO_P (reg
))
27364 reg_mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
:
27365 (TARGET_HARD_FLOAT
? DFmode
: SFmode
);
27366 else if (ALTIVEC_REGNO_P (reg
))
27367 reg_mode
= V16QImode
;
27369 reg_mode
= word_mode
;
27370 reg_mode_size
= GET_MODE_SIZE (reg_mode
);
27372 gcc_assert (reg_mode_size
* nregs
== GET_MODE_SIZE (mode
));
27374 /* TDmode residing in FP registers is special, since the ISA requires that
27375 the lower-numbered word of a register pair is always the most significant
27376 word, even in little-endian mode. This does not match the usual subreg
27377 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27378 the appropriate constituent registers "by hand" in little-endian mode.
27380 Note we do not need to check for destructive overlap here since TDmode
27381 can only reside in even/odd register pairs. */
27382 if (FP_REGNO_P (reg
) && DECIMAL_FLOAT_MODE_P (mode
) && !BYTES_BIG_ENDIAN
)
27387 for (i
= 0; i
< nregs
; i
++)
27389 if (REG_P (src
) && FP_REGNO_P (REGNO (src
)))
27390 p_src
= gen_rtx_REG (reg_mode
, REGNO (src
) + nregs
- 1 - i
);
27392 p_src
= simplify_gen_subreg (reg_mode
, src
, mode
,
27393 i
* reg_mode_size
);
27395 if (REG_P (dst
) && FP_REGNO_P (REGNO (dst
)))
27396 p_dst
= gen_rtx_REG (reg_mode
, REGNO (dst
) + nregs
- 1 - i
);
27398 p_dst
= simplify_gen_subreg (reg_mode
, dst
, mode
,
27399 i
* reg_mode_size
);
27401 emit_insn (gen_rtx_SET (p_dst
, p_src
));
27407 /* The __vector_pair, __vector_quad, and __dmr modes are multi-register
27408 modes, so if we have to load or store the registers, we have to be careful
27409 to properly swap them if we're in little endian mode below. This means
27410 the last register gets the first memory location. We also need to be
27411 careful of using the right register numbers if we are splitting XO to
27413 if (mode
== OOmode
|| mode
== XOmode
|| mode
== TDOmode
)
27415 nregs
= hard_regno_nregs (reg
, mode
);
27416 int reg_mode_nregs
= hard_regno_nregs (reg
, reg_mode
);
27419 unsigned offset
= 0;
27420 unsigned size
= GET_MODE_SIZE (reg_mode
);
27422 /* If we are reading an accumulator register, we have to
27423 deprime it before we can access it. */
27424 if (TARGET_MMA
&& !TARGET_DENSE_MATH
27425 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27426 emit_insn (gen_mma_xxmfacc (src
, src
));
27428 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
27431 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
27432 rtx dst2
= adjust_address (dst
, reg_mode
, offset
);
27433 rtx src2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
27435 emit_insn (gen_rtx_SET (dst2
, src2
));
27443 unsigned offset
= 0;
27444 unsigned size
= GET_MODE_SIZE (reg_mode
);
27446 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
27449 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
27450 rtx dst2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
27451 rtx src2
= adjust_address (src
, reg_mode
, offset
);
27453 emit_insn (gen_rtx_SET (dst2
, src2
));
27456 /* If we are writing an accumulator register that overlaps with the
27457 FPR registers, we have to prime it after we've written it. */
27458 if (TARGET_MMA
&& !TARGET_DENSE_MATH
27459 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27460 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27465 if (GET_CODE (src
) == UNSPEC
27466 || GET_CODE (src
) == UNSPEC_VOLATILE
)
27468 gcc_assert (XINT (src
, 1) == UNSPEC_VSX_ASSEMBLE
27469 || XINT (src
, 1) == UNSPECV_MMA_ASSEMBLE
);
27470 gcc_assert (REG_P (dst
));
27471 if (GET_MODE (src
) == XOmode
)
27472 gcc_assert ((TARGET_DENSE_MATH
27473 ? VSX_REGNO_P (REGNO (dst
))
27474 : FP_REGNO_P (REGNO (dst
))));
27475 if (GET_MODE (src
) == OOmode
)
27476 gcc_assert (VSX_REGNO_P (REGNO (dst
)));
27478 int nvecs
= XVECLEN (src
, 0);
27479 for (int i
= 0; i
< nvecs
; i
++)
27482 int regno
= reg
+ i
;
27484 if (WORDS_BIG_ENDIAN
)
27486 op
= XVECEXP (src
, 0, i
);
27488 /* If we are loading an even VSX register and the memory location
27489 is adjacent to the next register's memory location (if any),
27490 then we can load them both with one LXVP instruction. */
27491 if ((regno
& 1) == 0)
27493 rtx op2
= XVECEXP (src
, 0, i
+ 1);
27494 if (adjacent_mem_locations (op
, op2
) == op
)
27496 op
= adjust_address (op
, OOmode
, 0);
27497 /* Skip the next register, since we're going to
27498 load it together with this register. */
27505 op
= XVECEXP (src
, 0, nvecs
- i
- 1);
27507 /* If we are loading an even VSX register and the memory location
27508 is adjacent to the next register's memory location (if any),
27509 then we can load them both with one LXVP instruction. */
27510 if ((regno
& 1) == 0)
27512 rtx op2
= XVECEXP (src
, 0, nvecs
- i
- 2);
27513 if (adjacent_mem_locations (op2
, op
) == op2
)
27515 op
= adjust_address (op2
, OOmode
, 0);
27516 /* Skip the next register, since we're going to
27517 load it together with this register. */
27523 rtx dst_i
= gen_rtx_REG (GET_MODE (op
), regno
);
27524 emit_insn (gen_rtx_SET (dst_i
, op
));
27527 /* On systems without dense math where accumulators overlap with the
27528 vector registers, we have to prime it after we've written it. */
27529 if (GET_MODE (src
) == XOmode
&& !TARGET_DENSE_MATH
)
27530 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27535 /* Register -> register moves can use common code. */
27538 if (REG_P (src
) && REG_P (dst
) && (REGNO (src
) < REGNO (dst
)))
27540 /* If we are reading an accumulator register and we don't have dense
27541 math, we have to deprime it before we can access it. */
27542 if (TARGET_MMA
&& !TARGET_DENSE_MATH
27543 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27544 emit_insn (gen_mma_xxmfacc (src
, src
));
27546 /* Move register range backwards, if we might have destructive
27549 /* XO/OO are opaque so cannot use subregs. */
27550 if (mode
== OOmode
|| mode
== XOmode
|| mode
== TDOmode
)
27552 for (i
= nregs
- 1; i
>= 0; i
--)
27554 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + i
);
27555 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + i
);
27556 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27561 for (i
= nregs
- 1; i
>= 0; i
--)
27562 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27563 i
* reg_mode_size
),
27564 simplify_gen_subreg (reg_mode
, src
, mode
,
27565 i
* reg_mode_size
)));
27568 /* If we are writing an accumulator register, we have to
27569 prime it after we've written it. */
27570 if (TARGET_MMA
&& !TARGET_DENSE_MATH
27571 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27572 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27578 bool used_update
= false;
27579 rtx restore_basereg
= NULL_RTX
;
27581 if (MEM_P (src
) && INT_REGNO_P (reg
))
27585 if (GET_CODE (XEXP (src
, 0)) == PRE_INC
27586 || GET_CODE (XEXP (src
, 0)) == PRE_DEC
)
27589 breg
= XEXP (XEXP (src
, 0), 0);
27590 delta_rtx
= (GET_CODE (XEXP (src
, 0)) == PRE_INC
27591 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src
)))
27592 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src
))));
27593 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27594 src
= replace_equiv_address (src
, breg
);
27596 else if (! rs6000_offsettable_memref_p (src
, reg_mode
, true))
27598 if (GET_CODE (XEXP (src
, 0)) == PRE_MODIFY
)
27600 rtx basereg
= XEXP (XEXP (src
, 0), 0);
27603 rtx ndst
= simplify_gen_subreg (reg_mode
, dst
, mode
, 0);
27604 emit_insn (gen_rtx_SET (ndst
,
27605 gen_rtx_MEM (reg_mode
,
27607 used_update
= true;
27610 emit_insn (gen_rtx_SET (basereg
,
27611 XEXP (XEXP (src
, 0), 1)));
27612 src
= replace_equiv_address (src
, basereg
);
27616 rtx basereg
= gen_rtx_REG (Pmode
, reg
);
27617 emit_insn (gen_rtx_SET (basereg
, XEXP (src
, 0)));
27618 src
= replace_equiv_address (src
, basereg
);
27622 breg
= XEXP (src
, 0);
27623 if (GET_CODE (breg
) == PLUS
|| GET_CODE (breg
) == LO_SUM
)
27624 breg
= XEXP (breg
, 0);
27626 /* If the base register we are using to address memory is
27627 also a destination reg, then change that register last. */
27629 && REGNO (breg
) >= REGNO (dst
)
27630 && REGNO (breg
) < REGNO (dst
) + nregs
)
27631 j
= REGNO (breg
) - REGNO (dst
);
27633 else if (MEM_P (dst
) && INT_REGNO_P (reg
))
27637 if (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27638 || GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
27641 breg
= XEXP (XEXP (dst
, 0), 0);
27642 delta_rtx
= (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27643 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst
)))
27644 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst
))));
27646 /* We have to update the breg before doing the store.
27647 Use store with update, if available. */
27651 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27652 emit_insn (TARGET_32BIT
27653 ? (TARGET_POWERPC64
27654 ? gen_movdi_si_update (breg
, breg
, delta_rtx
, nsrc
)
27655 : gen_movsi_si_update (breg
, breg
, delta_rtx
, nsrc
))
27656 : gen_movdi_di_update (breg
, breg
, delta_rtx
, nsrc
));
27657 used_update
= true;
27660 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27661 dst
= replace_equiv_address (dst
, breg
);
27663 else if (!rs6000_offsettable_memref_p (dst
, reg_mode
, true)
27664 && GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27666 if (GET_CODE (XEXP (dst
, 0)) == PRE_MODIFY
)
27668 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27671 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27672 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode
,
27675 used_update
= true;
27678 emit_insn (gen_rtx_SET (basereg
,
27679 XEXP (XEXP (dst
, 0), 1)));
27680 dst
= replace_equiv_address (dst
, basereg
);
27684 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27685 rtx offsetreg
= XEXP (XEXP (dst
, 0), 1);
27686 gcc_assert (GET_CODE (XEXP (dst
, 0)) == PLUS
27688 && REG_P (offsetreg
)
27689 && REGNO (basereg
) != REGNO (offsetreg
));
27690 if (REGNO (basereg
) == 0)
27692 rtx tmp
= offsetreg
;
27693 offsetreg
= basereg
;
27696 emit_insn (gen_add3_insn (basereg
, basereg
, offsetreg
));
27697 restore_basereg
= gen_sub3_insn (basereg
, basereg
, offsetreg
);
27698 dst
= replace_equiv_address (dst
, basereg
);
27701 else if (GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27702 gcc_assert (rs6000_offsettable_memref_p (dst
, reg_mode
, true));
27705 /* If we are reading an accumulator register, we have to
27706 deprime it before we can access it. */
27707 if (TARGET_MMA
&& !TARGET_DENSE_MATH
&& REG_P (src
)
27708 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27709 emit_insn (gen_mma_xxmfacc (src
, src
));
27711 for (i
= 0; i
< nregs
; i
++)
27713 /* Calculate index to next subword. */
27718 /* If compiler already emitted move of first word by
27719 store with update, no need to do anything. */
27720 if (j
== 0 && used_update
)
27723 /* XO/OO are opaque so cannot use subregs. */
27724 if (mode
== OOmode
|| mode
== XOmode
|| mode
== TDOmode
)
27726 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + j
);
27727 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + j
);
27728 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27731 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27732 j
* reg_mode_size
),
27733 simplify_gen_subreg (reg_mode
, src
, mode
,
27734 j
* reg_mode_size
)));
27737 /* If we are writing an accumulator register, we have to
27738 prime it after we've written it. */
27739 if (TARGET_MMA
&& !TARGET_DENSE_MATH
&& REG_P (dst
)
27740 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27741 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27743 if (restore_basereg
!= NULL_RTX
)
27744 emit_insn (restore_basereg
);
27748 /* Return true if the peephole2 can combine a load involving a combination of
27749 an addis instruction and a load with an offset that can be fused together on
27753 fusion_gpr_load_p (rtx addis_reg
, /* register set via addis. */
27754 rtx addis_value
, /* addis value. */
27755 rtx target
, /* target register that is loaded. */
27756 rtx mem
) /* bottom part of the memory addr. */
27761 /* Validate arguments. */
27762 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
27765 if (!base_reg_operand (target
, GET_MODE (target
)))
27768 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
27771 /* Allow sign/zero extension. */
27772 if (GET_CODE (mem
) == ZERO_EXTEND
27773 || (GET_CODE (mem
) == SIGN_EXTEND
&& TARGET_P8_FUSION_SIGN
))
27774 mem
= XEXP (mem
, 0);
27779 if (!fusion_gpr_mem_load (mem
, GET_MODE (mem
)))
27782 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
27783 if (GET_CODE (addr
) != PLUS
&& GET_CODE (addr
) != LO_SUM
)
27786 /* Validate that the register used to load the high value is either the
27787 register being loaded, or we can safely replace its use.
27789 This function is only called from the peephole2 pass and we assume that
27790 there are 2 instructions in the peephole (addis and load), so we want to
27791 check if the target register was not used in the memory address and the
27792 register to hold the addis result is dead after the peephole. */
27793 if (REGNO (addis_reg
) != REGNO (target
))
27795 if (reg_mentioned_p (target
, mem
))
27798 if (!peep2_reg_dead_p (2, addis_reg
))
27801 /* If the target register being loaded is the stack pointer, we must
27802 avoid loading any other value into it, even temporarily. */
27803 if (REG_P (target
) && REGNO (target
) == STACK_POINTER_REGNUM
)
27807 base_reg
= XEXP (addr
, 0);
27808 return REGNO (addis_reg
) == REGNO (base_reg
);
27811 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27812 sequence. We adjust the addis register to use the target register. If the
27813 load sign extends, we adjust the code to do the zero extending load, and an
27814 explicit sign extension later since the fusion only covers zero extending
27818 operands[0] register set with addis (to be replaced with target)
27819 operands[1] value set via addis
27820 operands[2] target register being loaded
27821 operands[3] D-form memory reference using operands[0]. */
27824 expand_fusion_gpr_load (rtx
*operands
)
27826 rtx addis_value
= operands
[1];
27827 rtx target
= operands
[2];
27828 rtx orig_mem
= operands
[3];
27829 rtx new_addr
, new_mem
, orig_addr
, offset
;
27830 enum rtx_code plus_or_lo_sum
;
27831 machine_mode target_mode
= GET_MODE (target
);
27832 machine_mode extend_mode
= target_mode
;
27833 machine_mode ptr_mode
= Pmode
;
27834 enum rtx_code extend
= UNKNOWN
;
27836 if (GET_CODE (orig_mem
) == ZERO_EXTEND
27837 || (TARGET_P8_FUSION_SIGN
&& GET_CODE (orig_mem
) == SIGN_EXTEND
))
27839 extend
= GET_CODE (orig_mem
);
27840 orig_mem
= XEXP (orig_mem
, 0);
27841 target_mode
= GET_MODE (orig_mem
);
27844 gcc_assert (MEM_P (orig_mem
));
27846 orig_addr
= XEXP (orig_mem
, 0);
27847 plus_or_lo_sum
= GET_CODE (orig_addr
);
27848 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
27850 offset
= XEXP (orig_addr
, 1);
27851 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
27852 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
27854 if (extend
!= UNKNOWN
)
27855 new_mem
= gen_rtx_fmt_e (ZERO_EXTEND
, extend_mode
, new_mem
);
27857 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
27858 UNSPEC_FUSION_GPR
);
27859 emit_insn (gen_rtx_SET (target
, new_mem
));
27861 if (extend
== SIGN_EXTEND
)
27863 int sub_off
= ((BYTES_BIG_ENDIAN
)
27864 ? GET_MODE_SIZE (extend_mode
) - GET_MODE_SIZE (target_mode
)
27867 = simplify_subreg (target_mode
, target
, extend_mode
, sub_off
);
27869 emit_insn (gen_rtx_SET (target
,
27870 gen_rtx_SIGN_EXTEND (extend_mode
, sign_reg
)));
27876 /* Emit the addis instruction that will be part of a fused instruction
27880 emit_fusion_addis (rtx target
, rtx addis_value
)
27883 const char *addis_str
= NULL
;
27885 /* Emit the addis instruction. */
27886 fuse_ops
[0] = target
;
27887 if (satisfies_constraint_L (addis_value
))
27889 fuse_ops
[1] = addis_value
;
27890 addis_str
= "lis %0,%v1";
27893 else if (GET_CODE (addis_value
) == PLUS
)
27895 rtx op0
= XEXP (addis_value
, 0);
27896 rtx op1
= XEXP (addis_value
, 1);
27898 if (REG_P (op0
) && CONST_INT_P (op1
)
27899 && satisfies_constraint_L (op1
))
27903 addis_str
= "addis %0,%1,%v2";
27907 else if (GET_CODE (addis_value
) == HIGH
)
27909 rtx value
= XEXP (addis_value
, 0);
27910 if (GET_CODE (value
) == UNSPEC
&& XINT (value
, 1) == UNSPEC_TOCREL
)
27912 fuse_ops
[1] = XVECEXP (value
, 0, 0); /* symbol ref. */
27913 fuse_ops
[2] = XVECEXP (value
, 0, 1); /* TOC register. */
27915 addis_str
= "addis %0,%2,%1@toc@ha";
27917 else if (TARGET_XCOFF
)
27918 addis_str
= "addis %0,%1@u(%2)";
27921 gcc_unreachable ();
27924 else if (GET_CODE (value
) == PLUS
)
27926 rtx op0
= XEXP (value
, 0);
27927 rtx op1
= XEXP (value
, 1);
27929 if (GET_CODE (op0
) == UNSPEC
27930 && XINT (op0
, 1) == UNSPEC_TOCREL
27931 && CONST_INT_P (op1
))
27933 fuse_ops
[1] = XVECEXP (op0
, 0, 0); /* symbol ref. */
27934 fuse_ops
[2] = XVECEXP (op0
, 0, 1); /* TOC register. */
27937 addis_str
= "addis %0,%2,%1+%3@toc@ha";
27939 else if (TARGET_XCOFF
)
27940 addis_str
= "addis %0,%1+%3@u(%2)";
27943 gcc_unreachable ();
27947 else if (satisfies_constraint_L (value
))
27949 fuse_ops
[1] = value
;
27950 addis_str
= "lis %0,%v1";
27953 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (value
))
27955 fuse_ops
[1] = value
;
27956 addis_str
= "lis %0,%1@ha";
27961 fatal_insn ("Could not generate addis value for fusion", addis_value
);
27963 output_asm_insn (addis_str
, fuse_ops
);
27966 /* Emit a D-form load or store instruction that is the second instruction
27967 of a fusion sequence. */
27970 emit_fusion_load (rtx load_reg
, rtx addis_reg
, rtx offset
, const char *insn_str
)
27973 char insn_template
[80];
27975 fuse_ops
[0] = load_reg
;
27976 fuse_ops
[1] = addis_reg
;
27978 if (CONST_INT_P (offset
) && satisfies_constraint_I (offset
))
27980 sprintf (insn_template
, "%s %%0,%%2(%%1)", insn_str
);
27981 fuse_ops
[2] = offset
;
27982 output_asm_insn (insn_template
, fuse_ops
);
27985 else if (GET_CODE (offset
) == UNSPEC
27986 && XINT (offset
, 1) == UNSPEC_TOCREL
)
27989 sprintf (insn_template
, "%s %%0,%%2@toc@l(%%1)", insn_str
);
27991 else if (TARGET_XCOFF
)
27992 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
27995 gcc_unreachable ();
27997 fuse_ops
[2] = XVECEXP (offset
, 0, 0);
27998 output_asm_insn (insn_template
, fuse_ops
);
28001 else if (GET_CODE (offset
) == PLUS
28002 && GET_CODE (XEXP (offset
, 0)) == UNSPEC
28003 && XINT (XEXP (offset
, 0), 1) == UNSPEC_TOCREL
28004 && CONST_INT_P (XEXP (offset
, 1)))
28006 rtx tocrel_unspec
= XEXP (offset
, 0);
28008 sprintf (insn_template
, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str
);
28010 else if (TARGET_XCOFF
)
28011 sprintf (insn_template
, "%s %%0,%%2+%%3@l(%%1)", insn_str
);
28014 gcc_unreachable ();
28016 fuse_ops
[2] = XVECEXP (tocrel_unspec
, 0, 0);
28017 fuse_ops
[3] = XEXP (offset
, 1);
28018 output_asm_insn (insn_template
, fuse_ops
);
28021 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (offset
))
28023 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
28025 fuse_ops
[2] = offset
;
28026 output_asm_insn (insn_template
, fuse_ops
);
28030 fatal_insn ("Unable to generate load/store offset for fusion", offset
);
28035 /* Given an address, convert it into the addis and load offset parts. Addresses
28036 created during the peephole2 process look like:
28037 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
28038 (unspec [(...)] UNSPEC_TOCREL)) */
28041 fusion_split_address (rtx addr
, rtx
*p_hi
, rtx
*p_lo
)
28045 if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
28047 hi
= XEXP (addr
, 0);
28048 lo
= XEXP (addr
, 1);
28051 gcc_unreachable ();
28057 /* Return a string to fuse an addis instruction with a gpr load to the same
28058 register that we loaded up the addis instruction. The address that is used
28059 is the logical address that was formed during peephole2:
28060 (lo_sum (high) (low-part))
28062 The code is complicated, so we call output_asm_insn directly, and just
28066 emit_fusion_gpr_load (rtx target
, rtx mem
)
28071 const char *load_str
= NULL
;
28074 if (GET_CODE (mem
) == ZERO_EXTEND
)
28075 mem
= XEXP (mem
, 0);
28077 gcc_assert (REG_P (target
) && MEM_P (mem
));
28079 addr
= XEXP (mem
, 0);
28080 fusion_split_address (addr
, &addis_value
, &load_offset
);
28082 /* Now emit the load instruction to the same register. */
28083 mode
= GET_MODE (mem
);
28101 gcc_assert (TARGET_POWERPC64
);
28106 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target
, mem
));
28109 /* Emit the addis instruction. */
28110 emit_fusion_addis (target
, addis_value
);
28112 /* Emit the D-form load instruction. */
28113 emit_fusion_load (target
, target
, load_offset
, load_str
);
28118 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
28119 ignores it then. */
28120 static GTY(()) tree atomic_hold_decl
;
28121 static GTY(()) tree atomic_clear_decl
;
28122 static GTY(()) tree atomic_update_decl
;
28124 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
28126 rs6000_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
28128 if (!TARGET_HARD_FLOAT
)
28130 #ifdef RS6000_GLIBC_ATOMIC_FENV
28131 if (atomic_hold_decl
== NULL_TREE
)
28134 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
28135 get_identifier ("__atomic_feholdexcept"),
28136 build_function_type_list (void_type_node
,
28137 double_ptr_type_node
,
28139 TREE_PUBLIC (atomic_hold_decl
) = 1;
28140 DECL_EXTERNAL (atomic_hold_decl
) = 1;
28143 if (atomic_clear_decl
== NULL_TREE
)
28146 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
28147 get_identifier ("__atomic_feclearexcept"),
28148 build_function_type_list (void_type_node
,
28150 TREE_PUBLIC (atomic_clear_decl
) = 1;
28151 DECL_EXTERNAL (atomic_clear_decl
) = 1;
28154 tree const_double
= build_qualified_type (double_type_node
,
28156 tree const_double_ptr
= build_pointer_type (const_double
);
28157 if (atomic_update_decl
== NULL_TREE
)
28160 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
28161 get_identifier ("__atomic_feupdateenv"),
28162 build_function_type_list (void_type_node
,
28165 TREE_PUBLIC (atomic_update_decl
) = 1;
28166 DECL_EXTERNAL (atomic_update_decl
) = 1;
28169 tree fenv_var
= create_tmp_var_raw (double_type_node
);
28170 TREE_ADDRESSABLE (fenv_var
) = 1;
28171 tree fenv_addr
= build1 (ADDR_EXPR
, double_ptr_type_node
,
28172 build4 (TARGET_EXPR
, double_type_node
, fenv_var
,
28173 void_node
, NULL_TREE
, NULL_TREE
));
28175 *hold
= build_call_expr (atomic_hold_decl
, 1, fenv_addr
);
28176 *clear
= build_call_expr (atomic_clear_decl
, 0);
28177 *update
= build_call_expr (atomic_update_decl
, 1,
28178 fold_convert (const_double_ptr
, fenv_addr
));
28183 tree mffs
= rs6000_builtin_decls
[RS6000_BIF_MFFS
];
28184 tree mtfsf
= rs6000_builtin_decls
[RS6000_BIF_MTFSF
];
28185 tree call_mffs
= build_call_expr (mffs
, 0);
28187 /* Generates the equivalent of feholdexcept (&fenv_var)
28189 *fenv_var = __builtin_mffs ();
28191 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
28192 __builtin_mtfsf (0xff, fenv_hold); */
28194 /* Mask to clear everything except for the rounding modes and non-IEEE
28195 arithmetic flag. */
28196 const unsigned HOST_WIDE_INT hold_exception_mask
28197 = HOST_WIDE_INT_C (0xffffffff00000007);
28199 tree fenv_var
= create_tmp_var_raw (double_type_node
);
28201 tree hold_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_var
, call_mffs
,
28202 NULL_TREE
, NULL_TREE
);
28204 tree fenv_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_var
);
28205 tree fenv_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
28206 build_int_cst (uint64_type_node
,
28207 hold_exception_mask
));
28209 tree fenv_hold_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28212 tree hold_mtfsf
= build_call_expr (mtfsf
, 2,
28213 build_int_cst (unsigned_type_node
, 0xff),
28216 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_mffs
, hold_mtfsf
);
28218 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
28220 double fenv_clear = __builtin_mffs ();
28221 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
28222 __builtin_mtfsf (0xff, fenv_clear); */
28224 /* Mask to clear everything except for the rounding modes and non-IEEE
28225 arithmetic flag. */
28226 const unsigned HOST_WIDE_INT clear_exception_mask
28227 = HOST_WIDE_INT_C (0xffffffff00000000);
28229 tree fenv_clear
= create_tmp_var_raw (double_type_node
);
28231 tree clear_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_clear
,
28232 call_mffs
, NULL_TREE
, NULL_TREE
);
28234 tree fenv_clean_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_clear
);
28235 tree fenv_clear_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
,
28237 build_int_cst (uint64_type_node
,
28238 clear_exception_mask
));
28240 tree fenv_clear_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28241 fenv_clear_llu_and
);
28243 tree clear_mtfsf
= build_call_expr (mtfsf
, 2,
28244 build_int_cst (unsigned_type_node
, 0xff),
28247 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, clear_mffs
, clear_mtfsf
);
28249 /* Generates the equivalent of feupdateenv (&fenv_var)
28251 double old_fenv = __builtin_mffs ();
28252 double fenv_update;
28253 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
28254 (*(uint64_t*)fenv_var 0x1ff80fff);
28255 __builtin_mtfsf (0xff, fenv_update); */
28257 const unsigned HOST_WIDE_INT update_exception_mask
28258 = HOST_WIDE_INT_C (0xffffffff1fffff00);
28259 const unsigned HOST_WIDE_INT new_exception_mask
28260 = HOST_WIDE_INT_C (0x1ff80fff);
28262 tree old_fenv
= create_tmp_var_raw (double_type_node
);
28263 tree update_mffs
= build4 (TARGET_EXPR
, double_type_node
, old_fenv
,
28264 call_mffs
, NULL_TREE
, NULL_TREE
);
28266 tree old_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, old_fenv
);
28267 tree old_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, old_llu
,
28268 build_int_cst (uint64_type_node
,
28269 update_exception_mask
));
28271 tree new_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
28272 build_int_cst (uint64_type_node
,
28273 new_exception_mask
));
28275 tree new_llu_mask
= build2 (BIT_IOR_EXPR
, uint64_type_node
,
28276 old_llu_and
, new_llu_and
);
28278 tree fenv_update_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28281 tree update_mtfsf
= build_call_expr (mtfsf
, 2,
28282 build_int_cst (unsigned_type_node
, 0xff),
28283 fenv_update_mtfsf
);
28285 *update
= build2 (COMPOUND_EXPR
, void_type_node
, update_mffs
, update_mtfsf
);
28289 rs6000_generate_float2_double_code (rtx dst
, rtx src1
, rtx src2
)
28291 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28293 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
28294 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
28296 /* The destination of the vmrgew instruction layout is:
28297 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28298 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28299 vmrgew instruction will be correct. */
28300 if (BYTES_BIG_ENDIAN
)
28302 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0
, src1
, src2
,
28304 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1
, src1
, src2
,
28309 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
28310 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
28313 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
28314 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
28316 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2
, rtx_tmp0
));
28317 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3
, rtx_tmp1
));
28319 if (BYTES_BIG_ENDIAN
)
28320 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
28322 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
28326 rs6000_generate_float2_code (bool signed_convert
, rtx dst
, rtx src1
, rtx src2
)
28328 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28330 rtx_tmp0
= gen_reg_rtx (V2DImode
);
28331 rtx_tmp1
= gen_reg_rtx (V2DImode
);
28333 /* The destination of the vmrgew instruction layout is:
28334 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28335 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28336 vmrgew instruction will be correct. */
28337 if (BYTES_BIG_ENDIAN
)
28339 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
28340 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
28344 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
28345 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
28348 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
28349 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
28351 if (signed_convert
)
28353 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2
, rtx_tmp0
));
28354 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3
, rtx_tmp1
));
28358 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2
, rtx_tmp0
));
28359 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3
, rtx_tmp1
));
28362 if (BYTES_BIG_ENDIAN
)
28363 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
28365 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
28369 rs6000_generate_vsigned2_code (bool signed_convert
, rtx dst
, rtx src1
,
28372 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28374 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
28375 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
28377 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
28378 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
28380 rtx_tmp2
= gen_reg_rtx (V4SImode
);
28381 rtx_tmp3
= gen_reg_rtx (V4SImode
);
28383 if (signed_convert
)
28385 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2
, rtx_tmp0
));
28386 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3
, rtx_tmp1
));
28390 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2
, rtx_tmp0
));
28391 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3
, rtx_tmp1
));
28394 emit_insn (gen_p8_vmrgew_v4si (dst
, rtx_tmp2
, rtx_tmp3
));
28397 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28400 rs6000_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
28401 optimization_type opt_type
)
28406 return (opt_type
== OPTIMIZE_FOR_SPEED
28407 && RS6000_RECIP_AUTO_RSQRTE_P (mode1
));
28414 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28416 static HOST_WIDE_INT
28417 rs6000_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
28419 if (TREE_CODE (exp
) == STRING_CST
28420 && (STRICT_ALIGNMENT
|| !optimize_size
))
28421 return MAX (align
, BITS_PER_WORD
);
28425 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28427 static HOST_WIDE_INT
28428 rs6000_starting_frame_offset (void)
28430 if (FRAME_GROWS_DOWNWARD
)
28432 return RS6000_STARTING_FRAME_OFFSET
;
28435 /* Internal function to return the built-in function id for the complex
28436 multiply operation for a given mode. */
28438 static inline built_in_function
28439 complex_multiply_builtin_code (machine_mode mode
)
28441 return (built_in_function
) (BUILT_IN_COMPLEX_MUL_MIN
+ mode
28442 - MIN_MODE_COMPLEX_FLOAT
);
28445 /* Internal function to return the built-in function id for the complex divide
28446 operation for a given mode. */
28448 static inline built_in_function
28449 complex_divide_builtin_code (machine_mode mode
)
28451 return (built_in_function
) (BUILT_IN_COMPLEX_DIV_MIN
+ mode
28452 - MIN_MODE_COMPLEX_FLOAT
);
28455 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28456 function names from <foo>l to <foo>f128 if the default long double type is
28457 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28458 include file switches the names on systems that support long double as IEEE
28459 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28460 In the future, glibc will export names like __ieee128_sinf128 and we can
28461 switch to using those instead of using sinf128, which pollutes the user's
28464 This will switch the names for Fortran math functions as well (which doesn't
28465 use math.h). However, Fortran needs other changes to the compiler and
28466 library before you can switch the real*16 type at compile time.
28468 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28469 only do this transformation if the __float128 type is enabled. This
28470 prevents us from doing the transformation on older 32-bit ports that might
28471 have enabled using IEEE 128-bit floating point as the default long double
28474 We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the
28475 function names used for complex multiply and divide to the appropriate
28479 rs6000_mangle_decl_assembler_name (tree decl
, tree id
)
28481 /* Handle complex multiply/divide. For IEEE 128-bit, use __mulkc3 or
28482 __divkc3 and for IBM 128-bit use __multc3 and __divtc3. */
28483 if (TARGET_FLOAT128_TYPE
28484 && TREE_CODE (decl
) == FUNCTION_DECL
28485 && DECL_IS_UNDECLARED_BUILTIN (decl
)
28486 && DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
28488 built_in_function id
= DECL_FUNCTION_CODE (decl
);
28489 const char *newname
= NULL
;
28491 if (id
== complex_multiply_builtin_code (KCmode
))
28492 newname
= "__mulkc3";
28494 else if (id
== complex_multiply_builtin_code (ICmode
))
28495 newname
= "__multc3";
28497 else if (id
== complex_multiply_builtin_code (TCmode
))
28498 newname
= (TARGET_IEEEQUAD
) ? "__mulkc3" : "__multc3";
28500 else if (id
== complex_divide_builtin_code (KCmode
))
28501 newname
= "__divkc3";
28503 else if (id
== complex_divide_builtin_code (ICmode
))
28504 newname
= "__divtc3";
28506 else if (id
== complex_divide_builtin_code (TCmode
))
28507 newname
= (TARGET_IEEEQUAD
) ? "__divkc3" : "__divtc3";
28511 if (TARGET_DEBUG_BUILTIN
)
28512 fprintf (stderr
, "Map complex mul/div => %s\n", newname
);
28514 return get_identifier (newname
);
28518 /* Map long double built-in functions if long double is IEEE 128-bit. */
28519 if (TARGET_FLOAT128_TYPE
&& TARGET_IEEEQUAD
&& TARGET_LONG_DOUBLE_128
28520 && TREE_CODE (decl
) == FUNCTION_DECL
28521 && DECL_IS_UNDECLARED_BUILTIN (decl
)
28522 && DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
28524 size_t len
= IDENTIFIER_LENGTH (id
);
28525 const char *name
= IDENTIFIER_POINTER (id
);
28526 char *newname
= NULL
;
28528 /* See if it is one of the built-in functions with an unusual name. */
28529 switch (DECL_FUNCTION_CODE (decl
))
28531 case BUILT_IN_DREML
:
28532 newname
= xstrdup ("__remainderieee128");
28535 case BUILT_IN_GAMMAL
:
28536 newname
= xstrdup ("__lgammaieee128");
28539 case BUILT_IN_GAMMAL_R
:
28540 case BUILT_IN_LGAMMAL_R
:
28541 newname
= xstrdup ("__lgammaieee128_r");
28544 case BUILT_IN_NEXTTOWARD
:
28545 newname
= xstrdup ("__nexttoward_to_ieee128");
28548 case BUILT_IN_NEXTTOWARDF
:
28549 newname
= xstrdup ("__nexttowardf_to_ieee128");
28552 case BUILT_IN_NEXTTOWARDL
:
28553 newname
= xstrdup ("__nexttowardieee128");
28556 case BUILT_IN_POW10L
:
28557 newname
= xstrdup ("__exp10ieee128");
28560 case BUILT_IN_SCALBL
:
28561 newname
= xstrdup ("__scalbieee128");
28564 case BUILT_IN_SIGNIFICANDL
:
28565 newname
= xstrdup ("__significandieee128");
28568 case BUILT_IN_SINCOSL
:
28569 newname
= xstrdup ("__sincosieee128");
28576 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28579 size_t printf_len
= strlen ("printf");
28580 size_t scanf_len
= strlen ("scanf");
28581 size_t printf_chk_len
= strlen ("printf_chk");
28583 if (len
>= printf_len
28584 && strcmp (name
+ len
- printf_len
, "printf") == 0)
28585 newname
= xasprintf ("__%sieee128", name
);
28587 else if (len
>= scanf_len
28588 && strcmp (name
+ len
- scanf_len
, "scanf") == 0)
28589 newname
= xasprintf ("__isoc99_%sieee128", name
);
28591 else if (len
>= printf_chk_len
28592 && strcmp (name
+ len
- printf_chk_len
, "printf_chk") == 0)
28593 newname
= xasprintf ("%sieee128", name
);
28595 else if (name
[len
- 1] == 'l')
28597 bool uses_ieee128_p
= false;
28598 tree type
= TREE_TYPE (decl
);
28599 machine_mode ret_mode
= TYPE_MODE (type
);
28601 /* See if the function returns a IEEE 128-bit floating point type or
28603 if (ret_mode
== TFmode
|| ret_mode
== TCmode
)
28604 uses_ieee128_p
= true;
28607 function_args_iterator args_iter
;
28610 /* See if the function passes a IEEE 128-bit floating point type
28611 or complex type. */
28612 FOREACH_FUNCTION_ARGS (type
, arg
, args_iter
)
28614 machine_mode arg_mode
= TYPE_MODE (arg
);
28615 if (arg_mode
== TFmode
|| arg_mode
== TCmode
)
28617 uses_ieee128_p
= true;
28623 /* If we passed or returned an IEEE 128-bit floating point type,
28624 change the name. Use __<name>ieee128, instead of <name>l. */
28625 if (uses_ieee128_p
)
28626 newname
= xasprintf ("__%.*sieee128", (int)(len
- 1), name
);
28632 if (TARGET_DEBUG_BUILTIN
)
28633 fprintf (stderr
, "Map %s => %s\n", name
, newname
);
28635 id
= get_identifier (newname
);
28643 /* Predict whether the given loop in gimple will be transformed in the RTL
28644 doloop_optimize pass. */
28647 rs6000_predict_doloop_p (struct loop
*loop
)
28651 /* On rs6000, targetm.can_use_doloop_p is actually
28652 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28653 if (loop
->inner
!= NULL
)
28655 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
28656 fprintf (dump_file
, "Predict doloop failure due to"
28657 " loop nesting.\n");
28664 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28666 static machine_mode
28667 rs6000_preferred_doloop_mode (machine_mode
)
28672 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28675 rs6000_cannot_substitute_mem_equiv_p (rtx mem
)
28677 gcc_assert (MEM_P (mem
));
28679 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28680 type addresses, so don't allow MEMs with those address types to be
28681 substituted as an equivalent expression. See PR93974 for details. */
28682 if (GET_CODE (XEXP (mem
, 0)) == AND
)
28688 /* Implement TARGET_INVALID_CONVERSION. */
28690 static const char *
28691 rs6000_invalid_conversion (const_tree fromtype
, const_tree totype
)
28693 /* Make sure we're working with the canonical types. */
28694 if (TYPE_CANONICAL (fromtype
) != NULL_TREE
)
28695 fromtype
= TYPE_CANONICAL (fromtype
);
28696 if (TYPE_CANONICAL (totype
) != NULL_TREE
)
28697 totype
= TYPE_CANONICAL (totype
);
28699 machine_mode frommode
= TYPE_MODE (fromtype
);
28700 machine_mode tomode
= TYPE_MODE (totype
);
28702 if (frommode
!= tomode
)
28704 /* Do not allow conversions to/from XOmode, OOmode, and TDOmode
28706 if (frommode
== XOmode
)
28707 return N_("invalid conversion from type %<__vector_quad%>");
28708 if (tomode
== XOmode
)
28709 return N_("invalid conversion to type %<__vector_quad%>");
28710 if (frommode
== OOmode
)
28711 return N_("invalid conversion from type %<__vector_pair%>");
28712 if (tomode
== OOmode
)
28713 return N_("invalid conversion to type %<__vector_pair%>");
28714 if (frommode
== TDOmode
)
28715 return N_("invalid conversion from type %<__dmr%>");
28716 if (tomode
== TDOmode
)
28717 return N_("invalid conversion to type %<__dmr%>");
28720 /* Conversion allowed. */
28724 /* Convert a SFmode constant to the integer bit pattern. */
28727 rs6000_const_f32_to_i32 (rtx operand
)
28730 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (operand
);
28732 gcc_assert (GET_MODE (operand
) == SFmode
);
28733 REAL_VALUE_TO_TARGET_SINGLE (*rv
, value
);
28738 rs6000_emit_xxspltidp_v2df (rtx dst
, long value
)
28740 if (((value
& 0x7F800000) == 0) && ((value
& 0x7FFFFF) != 0))
28741 inform (input_location
,
28742 "the result for the xxspltidp instruction "
28743 "is undefined for subnormal input values");
28744 emit_insn( gen_xxspltidp_v2df_inst (dst
, GEN_INT (value
)));
28747 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28750 rs6000_gen_pic_addr_diff_vec (void)
28752 return rs6000_relative_jumptables
;
28756 rs6000_output_addr_vec_elt (FILE *file
, int value
)
28758 const char *directive
= TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t";
28761 fprintf (file
, "%s", directive
);
28762 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", value
);
28763 assemble_name (file
, buf
);
28764 fprintf (file
, "\n");
28768 /* Copy an integer constant to the vector constant structure. */
28771 constant_int_to_128bit_vector (rtx op
,
28774 vec_const_128bit_type
*info
)
28776 unsigned HOST_WIDE_INT uvalue
= UINTVAL (op
);
28777 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28779 for (int shift
= bitsize
- 8; shift
>= 0; shift
-= 8)
28780 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28783 /* Copy a floating point constant to the vector constant structure. */
28786 constant_fp_to_128bit_vector (rtx op
,
28789 vec_const_128bit_type
*info
)
28791 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28792 unsigned num_words
= bitsize
/ 32;
28793 const REAL_VALUE_TYPE
*rtype
= CONST_DOUBLE_REAL_VALUE (op
);
28794 long real_words
[VECTOR_128BIT_WORDS
];
28796 /* Make sure we don't overflow the real_words array and that it is
28797 filled completely. */
28798 gcc_assert (num_words
<= VECTOR_128BIT_WORDS
&& (bitsize
% 32) == 0);
28800 real_to_target (real_words
, rtype
, mode
);
28802 /* Iterate over each 32-bit word in the floating point constant. The
28803 real_to_target function puts out words in target endian fashion. We need
28804 to arrange the order so that the bytes are written in big endian order. */
28805 for (unsigned num
= 0; num
< num_words
; num
++)
28807 unsigned endian_num
= (BYTES_BIG_ENDIAN
28809 : num_words
- 1 - num
);
28811 unsigned uvalue
= real_words
[endian_num
];
28812 for (int shift
= 32 - 8; shift
>= 0; shift
-= 8)
28813 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28816 /* Mark that this constant involves floating point. */
28817 info
->fp_constant_p
= true;
28820 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28823 Break out the constant out to bytes, half words, words, and double words.
28824 Return true if we have successfully converted the constant.
28826 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28827 constants. Integer and floating point scalar constants are splatted to fill
28831 vec_const_128bit_to_bytes (rtx op
,
28833 vec_const_128bit_type
*info
)
28835 /* Initialize the constant structure. */
28836 memset ((void *)info
, 0, sizeof (vec_const_128bit_type
));
28838 /* Assume CONST_INTs are DImode. */
28839 if (mode
== VOIDmode
)
28840 mode
= CONST_INT_P (op
) ? DImode
: GET_MODE (op
);
28842 if (mode
== VOIDmode
)
28845 unsigned size
= GET_MODE_SIZE (mode
);
28846 bool splat_p
= false;
28848 if (size
> VECTOR_128BIT_BYTES
)
28851 /* Set up the bits. */
28852 switch (GET_CODE (op
))
28854 /* Integer constants, default to double word. */
28857 constant_int_to_128bit_vector (op
, mode
, 0, info
);
28862 /* Floating point constants. */
28865 /* Fail if the floating point constant is the wrong mode. */
28866 if (GET_MODE (op
) != mode
)
28869 /* SFmode stored as scalars are stored in DFmode format. */
28870 if (mode
== SFmode
)
28873 size
= GET_MODE_SIZE (DFmode
);
28876 constant_fp_to_128bit_vector (op
, mode
, 0, info
);
28881 /* Vector constants, iterate over each element. On little endian
28882 systems, we have to reverse the element numbers. */
28885 /* Fail if the vector constant is the wrong mode or size. */
28886 if (GET_MODE (op
) != mode
28887 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
28890 machine_mode ele_mode
= GET_MODE_INNER (mode
);
28891 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
28892 size_t nunits
= GET_MODE_NUNITS (mode
);
28894 for (size_t num
= 0; num
< nunits
; num
++)
28896 rtx ele
= CONST_VECTOR_ELT (op
, num
);
28897 size_t byte_num
= (BYTES_BIG_ENDIAN
28899 : nunits
- 1 - num
) * ele_size
;
28901 if (CONST_INT_P (ele
))
28902 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28903 else if (CONST_DOUBLE_P (ele
))
28904 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28912 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28913 Since we are duplicating the element, we don't have to worry about
28915 case VEC_DUPLICATE
:
28917 /* Fail if the vector duplicate is the wrong mode or size. */
28918 if (GET_MODE (op
) != mode
28919 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
28922 machine_mode ele_mode
= GET_MODE_INNER (mode
);
28923 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
28924 rtx ele
= XEXP (op
, 0);
28925 size_t nunits
= GET_MODE_NUNITS (mode
);
28927 if (!CONST_INT_P (ele
) && !CONST_DOUBLE_P (ele
))
28930 for (size_t num
= 0; num
< nunits
; num
++)
28932 size_t byte_num
= num
* ele_size
;
28934 if (CONST_INT_P (ele
))
28935 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28937 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28943 /* Any thing else, just return failure. */
28948 /* Splat the constant to fill 128 bits if desired. */
28949 if (splat_p
&& size
< VECTOR_128BIT_BYTES
)
28951 if ((VECTOR_128BIT_BYTES
% size
) != 0)
28954 for (size_t offset
= size
;
28955 offset
< VECTOR_128BIT_BYTES
;
28957 memcpy ((void *) &info
->bytes
[offset
],
28958 (void *) &info
->bytes
[0],
28962 /* Remember original size. */
28963 info
->original_size
= size
;
28965 /* Determine if the bytes are all the same. */
28966 unsigned char first_byte
= info
->bytes
[0];
28967 info
->all_bytes_same
= true;
28968 for (size_t i
= 1; i
< VECTOR_128BIT_BYTES
; i
++)
28969 if (first_byte
!= info
->bytes
[i
])
28971 info
->all_bytes_same
= false;
28975 /* Pack half words together & determine if all of the half words are the
28977 for (size_t i
= 0; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
28978 info
->half_words
[i
] = ((info
->bytes
[i
* 2] << 8)
28979 | info
->bytes
[(i
* 2) + 1]);
28981 unsigned short first_hword
= info
->half_words
[0];
28982 info
->all_half_words_same
= true;
28983 for (size_t i
= 1; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
28984 if (first_hword
!= info
->half_words
[i
])
28986 info
->all_half_words_same
= false;
28990 /* Pack words together & determine if all of the words are the same. */
28991 for (size_t i
= 0; i
< VECTOR_128BIT_WORDS
; i
++)
28992 info
->words
[i
] = ((info
->bytes
[i
* 4] << 24)
28993 | (info
->bytes
[(i
* 4) + 1] << 16)
28994 | (info
->bytes
[(i
* 4) + 2] << 8)
28995 | info
->bytes
[(i
* 4) + 3]);
28997 info
->all_words_same
28998 = (info
->words
[0] == info
->words
[1]
28999 && info
->words
[0] == info
->words
[1]
29000 && info
->words
[0] == info
->words
[2]
29001 && info
->words
[0] == info
->words
[3]);
29003 /* Pack double words together & determine if all of the double words are the
29005 for (size_t i
= 0; i
< VECTOR_128BIT_DOUBLE_WORDS
; i
++)
29007 unsigned HOST_WIDE_INT d_word
= 0;
29008 for (size_t j
= 0; j
< 8; j
++)
29009 d_word
= (d_word
<< 8) | info
->bytes
[(i
* 8) + j
];
29011 info
->double_words
[i
] = d_word
;
29014 info
->all_double_words_same
29015 = (info
->double_words
[0] == info
->double_words
[1]);
29020 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
29021 if the LXVKQ instruction cannot be used. Otherwise return the immediate
29022 value to be used with the LXVKQ instruction. */
29025 constant_generates_lxvkq (vec_const_128bit_type
*vsx_const
)
29027 /* Is the instruction supported with power10 code generation, IEEE 128-bit
29028 floating point hardware and VSX registers are available. */
29029 if (!TARGET_IEEE128_CONSTANT
|| !TARGET_FLOAT128_HW
|| !TARGET_POWER10
29033 /* All of the constants that are generated by LXVKQ have the bottom 3 words
29035 if (vsx_const
->words
[1] != 0
29036 || vsx_const
->words
[2] != 0
29037 || vsx_const
->words
[3] != 0)
29040 /* See if we have a match for the first word. */
29041 switch (vsx_const
->words
[0])
29043 case 0x3FFF0000U
: return 1; /* IEEE 128-bit +1.0. */
29044 case 0x40000000U
: return 2; /* IEEE 128-bit +2.0. */
29045 case 0x40008000U
: return 3; /* IEEE 128-bit +3.0. */
29046 case 0x40010000U
: return 4; /* IEEE 128-bit +4.0. */
29047 case 0x40014000U
: return 5; /* IEEE 128-bit +5.0. */
29048 case 0x40018000U
: return 6; /* IEEE 128-bit +6.0. */
29049 case 0x4001C000U
: return 7; /* IEEE 128-bit +7.0. */
29050 case 0x7FFF0000U
: return 8; /* IEEE 128-bit +Infinity. */
29051 case 0x7FFF8000U
: return 9; /* IEEE 128-bit quiet NaN. */
29052 case 0x80000000U
: return 16; /* IEEE 128-bit -0.0. */
29053 case 0xBFFF0000U
: return 17; /* IEEE 128-bit -1.0. */
29054 case 0xC0000000U
: return 18; /* IEEE 128-bit -2.0. */
29055 case 0xC0008000U
: return 19; /* IEEE 128-bit -3.0. */
29056 case 0xC0010000U
: return 20; /* IEEE 128-bit -4.0. */
29057 case 0xC0014000U
: return 21; /* IEEE 128-bit -5.0. */
29058 case 0xC0018000U
: return 22; /* IEEE 128-bit -6.0. */
29059 case 0xC001C000U
: return 23; /* IEEE 128-bit -7.0. */
29060 case 0xFFFF0000U
: return 24; /* IEEE 128-bit -Infinity. */
29062 /* anything else cannot be loaded. */
29070 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
29071 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
29072 value to be used with the XXSPLTIW instruction. */
29075 constant_generates_xxspltiw (vec_const_128bit_type
*vsx_const
)
29077 if (!TARGET_SPLAT_WORD_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
29080 if (!vsx_const
->all_words_same
)
29083 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
29084 if (vsx_const
->all_bytes_same
)
29087 /* See if we can use VSPLTISH or VSPLTISW. */
29088 if (vsx_const
->all_half_words_same
)
29090 short sign_h_word
= vsx_const
->half_words
[0];
29091 if (EASY_VECTOR_15 (sign_h_word
))
29095 int sign_word
= vsx_const
->words
[0];
29096 if (EASY_VECTOR_15 (sign_word
))
29099 return vsx_const
->words
[0];
29102 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
29103 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
29104 value to be used with the XXSPLTIDP instruction. */
29107 constant_generates_xxspltidp (vec_const_128bit_type
*vsx_const
)
29109 if (!TARGET_SPLAT_FLOAT_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
29112 /* Reject if the two 64-bit segments are not the same. */
29113 if (!vsx_const
->all_double_words_same
)
29116 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
29117 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
29118 if (vsx_const
->all_bytes_same
29119 || vsx_const
->all_half_words_same
29120 || vsx_const
->all_words_same
)
29123 unsigned HOST_WIDE_INT value
= vsx_const
->double_words
[0];
29125 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
29126 pattern and the signalling NaN bit pattern. Recognize infinity and
29127 negative infinity. */
29129 /* Bit representation of DFmode normal quiet NaN. */
29130 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
29132 /* Bit representation of DFmode normal signaling NaN. */
29133 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
29135 /* Bit representation of DFmode positive infinity. */
29136 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
29138 /* Bit representation of DFmode negative infinity. */
29139 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
29141 if (value
!= RS6000_CONST_DF_NAN
29142 && value
!= RS6000_CONST_DF_NANS
29143 && value
!= RS6000_CONST_DF_INF
29144 && value
!= RS6000_CONST_DF_NEG_INF
)
29146 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
29147 the exponent, and 52 bits for the mantissa (not counting the hidden
29148 bit used for normal numbers). NaN values have the exponent set to all
29149 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
29151 int df_exponent
= (value
>> 52) & 0x7ff;
29152 unsigned HOST_WIDE_INT
29153 df_mantissa
= value
& ((HOST_WIDE_INT_1U
<< 52) - HOST_WIDE_INT_1U
);
29155 if (df_exponent
== 0x7ff && df_mantissa
!= 0) /* other NaNs. */
29158 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
29159 the exponent all 0 bits, and the mantissa non-zero. If the value is
29160 subnormal, then the hidden bit in the mantissa is not set. */
29161 if (df_exponent
== 0 && df_mantissa
!= 0) /* subnormal. */
29165 /* Change the representation to DFmode constant. */
29166 long df_words
[2] = { vsx_const
->words
[0], vsx_const
->words
[1] };
29168 /* real_from_target takes the target words in target order. */
29169 if (!BYTES_BIG_ENDIAN
)
29170 std::swap (df_words
[0], df_words
[1]);
29172 REAL_VALUE_TYPE rv_type
;
29173 real_from_target (&rv_type
, df_words
, DFmode
);
29175 const REAL_VALUE_TYPE
*rv
= &rv_type
;
29177 /* Validate that the number can be stored as a SFmode value. */
29178 if (!exact_real_truncate (SFmode
, rv
))
29181 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
29182 mantissa field is non-zero) which is undefined for the XXSPLTIDP
29185 real_to_target (&sf_value
, rv
, SFmode
);
29187 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
29188 and 23 bits for the mantissa. Subnormal numbers have the exponent all
29189 0 bits, and the mantissa non-zero. */
29190 long sf_exponent
= (sf_value
>> 23) & 0xFF;
29191 long sf_mantissa
= sf_value
& 0x7FFFFF;
29193 if (sf_exponent
== 0 && sf_mantissa
!= 0)
29196 /* Return the immediate to be used. */
29200 /* Now we have only two opaque types, they are __vector_quad and
29201 __vector_pair built-in types. They are target specific and
29202 only available when MMA is supported. With MMA supported, it
29203 simply returns true, otherwise it checks if the given gimple
29204 STMT is an assignment, asm or call stmt and uses either of
29205 these two opaque types unexpectedly, if yes, it would raise
29206 an error message and returns true, otherwise it returns false. */
29209 rs6000_opaque_type_invalid_use_p (gimple
*stmt
)
29214 /* If the given TYPE is one MMA opaque type, emit the corresponding
29215 error messages and return true, otherwise return false. */
29216 auto check_and_error_invalid_use
= [](tree type
)
29218 tree mv
= TYPE_MAIN_VARIANT (type
);
29219 if (mv
== vector_quad_type_node
)
29221 error ("type %<__vector_quad%> requires the %qs option", "-mmma");
29224 else if (mv
== vector_pair_type_node
)
29226 error ("type %<__vector_pair%> requires the %qs option", "-mmma");
29234 /* The usage of MMA opaque types is very limited for now,
29235 to check with gassign, gasm and gcall is enough so far. */
29236 if (gassign
*ga
= dyn_cast
<gassign
*> (stmt
))
29238 tree lhs
= gimple_assign_lhs (ga
);
29239 tree type
= TREE_TYPE (lhs
);
29240 if (check_and_error_invalid_use (type
))
29243 else if (gasm
*gs
= dyn_cast
<gasm
*> (stmt
))
29245 unsigned ninputs
= gimple_asm_ninputs (gs
);
29246 for (unsigned i
= 0; i
< ninputs
; i
++)
29248 tree op
= gimple_asm_input_op (gs
, i
);
29249 tree val
= TREE_VALUE (op
);
29250 tree type
= TREE_TYPE (val
);
29251 if (check_and_error_invalid_use (type
))
29254 unsigned noutputs
= gimple_asm_noutputs (gs
);
29255 for (unsigned i
= 0; i
< noutputs
; i
++)
29257 tree op
= gimple_asm_output_op (gs
, i
);
29258 tree val
= TREE_VALUE (op
);
29259 tree type
= TREE_TYPE (val
);
29260 if (check_and_error_invalid_use (type
))
29264 else if (gcall
*gc
= dyn_cast
<gcall
*> (stmt
))
29266 unsigned nargs
= gimple_call_num_args (gc
);
29267 for (unsigned i
= 0; i
< nargs
; i
++)
29269 tree arg
= gimple_call_arg (gc
, i
);
29270 tree type
= TREE_TYPE (arg
);
29271 if (check_and_error_invalid_use (type
))
29280 struct gcc_target targetm
= TARGET_INITIALIZER
;
29282 #include "gt-rs6000.h"