1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2022 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
26 #include "coretypes.h"
36 #include "stringpool.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
47 #include "fold-const.h"
49 #include "stor-layout.h"
51 #include "print-tree.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
59 #include "sched-int.h"
61 #include "gimple-fold.h"
62 #include "gimple-iterator.h"
63 #include "gimple-walk.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
71 #include "tree-vector-builder.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
76 #include "ipa-fnsummary.h"
79 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
81 #include "case-cfn-macros.h"
83 #include "rs6000-internal.h"
86 /* This file should be included last. */
87 #include "target-def.h"
89 extern tree
rs6000_builtin_mask_for_load (void);
90 extern tree
rs6000_builtin_md_vectorized_function (tree
, tree
, tree
);
91 extern tree
rs6000_builtin_reciprocal (tree
);
93 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
94 systems will also set long double to be IEEE 128-bit. AIX and Darwin
95 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
96 those systems will not pick up this default. This needs to be after all
97 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
99 #ifndef TARGET_IEEEQUAD_DEFAULT
100 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
101 #define TARGET_IEEEQUAD_DEFAULT 1
103 #define TARGET_IEEEQUAD_DEFAULT 0
107 /* Don't enable PC-relative addressing if the target does not support it. */
108 #ifndef PCREL_SUPPORTED_BY_OS
109 #define PCREL_SUPPORTED_BY_OS 0
113 /* Counter for labels which are to be placed in .fixup. */
114 int fixuplabelno
= 0;
117 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
120 /* Specify the machine mode that pointers have. After generation of rtl, the
121 compiler makes no further distinction between pointers and any other objects
122 of this machine mode. */
123 scalar_int_mode rs6000_pmode
;
125 /* Track use of r13 in 64bit AIX TLS. */
126 static bool xcoff_tls_exec_model_detected
= false;
128 /* Width in bits of a pointer. */
129 unsigned rs6000_pointer_size
;
131 #ifdef HAVE_AS_GNU_ATTRIBUTE
132 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
133 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
135 /* Flag whether floating point values have been passed/returned.
136 Note that this doesn't say whether fprs are used, since the
137 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
138 should be set for soft-float values passed in gprs and ieee128
139 values passed in vsx registers. */
140 bool rs6000_passes_float
= false;
141 bool rs6000_passes_long_double
= false;
142 /* Flag whether vector values have been passed/returned. */
143 bool rs6000_passes_vector
= false;
144 /* Flag whether small (<= 8 byte) structures have been returned. */
145 bool rs6000_returns_struct
= false;
148 /* Value is TRUE if register/mode pair is acceptable. */
149 static bool rs6000_hard_regno_mode_ok_p
150 [NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
152 /* Maximum number of registers needed for a given register class and mode. */
153 unsigned char rs6000_class_max_nregs
[NUM_MACHINE_MODES
][LIM_REG_CLASSES
];
155 /* How many registers are needed for a given register and mode. */
156 unsigned char rs6000_hard_regno_nregs
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
158 /* Map register number to register class. */
159 enum reg_class rs6000_regno_regclass
[FIRST_PSEUDO_REGISTER
];
161 static int dbg_cost_ctrl
;
163 /* Flag to say the TOC is initialized */
164 int toc_initialized
, need_toc_init
;
165 char toc_label_name
[10];
167 /* Cached value of rs6000_variable_issue. This is cached in
168 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
169 static short cached_can_issue_more
;
171 static GTY(()) section
*read_only_data_section
;
172 static GTY(()) section
*private_data_section
;
173 static GTY(()) section
*tls_data_section
;
174 static GTY(()) section
*tls_private_data_section
;
175 static GTY(()) section
*read_only_private_data_section
;
176 static GTY(()) section
*sdata2_section
;
178 section
*toc_section
= 0;
180 /* Describe the vector unit used for modes. */
181 enum rs6000_vector rs6000_vector_unit
[NUM_MACHINE_MODES
];
182 enum rs6000_vector rs6000_vector_mem
[NUM_MACHINE_MODES
];
184 /* Register classes for various constraints that are based on the target
186 enum reg_class rs6000_constraints
[RS6000_CONSTRAINT_MAX
];
188 /* Describe the alignment of a vector. */
189 int rs6000_vector_align
[NUM_MACHINE_MODES
];
191 /* What modes to automatically generate reciprocal divide estimate (fre) and
192 reciprocal sqrt (frsqrte) for. */
193 unsigned char rs6000_recip_bits
[MAX_MACHINE_MODE
];
195 /* Masks to determine which reciprocal esitmate instructions to generate
197 enum rs6000_recip_mask
{
198 RECIP_SF_DIV
= 0x001, /* Use divide estimate */
199 RECIP_DF_DIV
= 0x002,
200 RECIP_V4SF_DIV
= 0x004,
201 RECIP_V2DF_DIV
= 0x008,
203 RECIP_SF_RSQRT
= 0x010, /* Use reciprocal sqrt estimate. */
204 RECIP_DF_RSQRT
= 0x020,
205 RECIP_V4SF_RSQRT
= 0x040,
206 RECIP_V2DF_RSQRT
= 0x080,
208 /* Various combination of flags for -mrecip=xxx. */
210 RECIP_ALL
= (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
211 | RECIP_V2DF_DIV
| RECIP_SF_RSQRT
| RECIP_DF_RSQRT
212 | RECIP_V4SF_RSQRT
| RECIP_V2DF_RSQRT
),
214 RECIP_HIGH_PRECISION
= RECIP_ALL
,
216 /* On low precision machines like the power5, don't enable double precision
217 reciprocal square root estimate, since it isn't accurate enough. */
218 RECIP_LOW_PRECISION
= (RECIP_ALL
& ~(RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
))
221 /* -mrecip options. */
224 const char *string
; /* option name */
225 unsigned int mask
; /* mask bits to set */
226 } recip_options
[] = {
227 { "all", RECIP_ALL
},
228 { "none", RECIP_NONE
},
229 { "div", (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
231 { "divf", (RECIP_SF_DIV
| RECIP_V4SF_DIV
) },
232 { "divd", (RECIP_DF_DIV
| RECIP_V2DF_DIV
) },
233 { "rsqrt", (RECIP_SF_RSQRT
| RECIP_DF_RSQRT
| RECIP_V4SF_RSQRT
234 | RECIP_V2DF_RSQRT
) },
235 { "rsqrtf", (RECIP_SF_RSQRT
| RECIP_V4SF_RSQRT
) },
236 { "rsqrtd", (RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
) },
239 /* On PowerPC, we have a limited number of target clones that we care about
240 which means we can use an array to hold the options, rather than having more
241 elaborate data structures to identify each possible variation. Order the
242 clones from the default to the highest ISA. */
244 CLONE_DEFAULT
= 0, /* default clone. */
245 CLONE_ISA_2_05
, /* ISA 2.05 (power6). */
246 CLONE_ISA_2_06
, /* ISA 2.06 (power7). */
247 CLONE_ISA_2_07
, /* ISA 2.07 (power8). */
248 CLONE_ISA_3_00
, /* ISA 3.0 (power9). */
249 CLONE_ISA_3_1
, /* ISA 3.1 (power10). */
253 /* Map compiler ISA bits into HWCAP names. */
255 HOST_WIDE_INT isa_mask
; /* rs6000_isa mask */
256 const char *name
; /* name to use in __builtin_cpu_supports. */
259 static const struct clone_map rs6000_clone_map
[CLONE_MAX
] = {
260 { 0, "" }, /* Default options. */
261 { OPTION_MASK_CMPB
, "arch_2_05" }, /* ISA 2.05 (power6). */
262 { OPTION_MASK_POPCNTD
, "arch_2_06" }, /* ISA 2.06 (power7). */
263 { OPTION_MASK_P8_VECTOR
, "arch_2_07" }, /* ISA 2.07 (power8). */
264 { OPTION_MASK_P9_VECTOR
, "arch_3_00" }, /* ISA 3.0 (power9). */
265 { OPTION_MASK_POWER10
, "arch_3_1" }, /* ISA 3.1 (power10). */
269 /* Newer LIBCs explicitly export this symbol to declare that they provide
270 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
271 reference to this symbol whenever we expand a CPU builtin, so that
272 we never link against an old LIBC. */
273 const char *tcb_verification_symbol
= "__parse_hwcap_and_convert_at_platform";
275 /* True if we have expanded a CPU builtin. */
276 bool cpu_builtin_p
= false;
278 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
279 macros that have changed. Languages that don't support the preprocessor
280 don't link in rs6000-c.cc, so we can't call it directly. */
281 void (*rs6000_target_modify_macros_ptr
) (bool, HOST_WIDE_INT
, HOST_WIDE_INT
);
283 /* Simplfy register classes into simpler classifications. We assume
284 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
285 check for standard register classes (gpr/floating/altivec/vsx) and
286 floating/vector classes (float/altivec/vsx). */
288 enum rs6000_reg_type
{
299 /* Map register class to register type. */
300 static enum rs6000_reg_type reg_class_to_reg_type
[N_REG_CLASSES
];
302 /* First/last register type for the 'normal' register types (i.e. general
303 purpose, floating point, altivec, and VSX registers). */
304 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
306 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
309 /* Register classes we care about in secondary reload or go if legitimate
310 address. We only need to worry about GPR, FPR, and Altivec registers here,
311 along an ANY field that is the OR of the 3 register classes. */
313 enum rs6000_reload_reg_type
{
314 RELOAD_REG_GPR
, /* General purpose registers. */
315 RELOAD_REG_FPR
, /* Traditional floating point regs. */
316 RELOAD_REG_VMX
, /* Altivec (VMX) registers. */
317 RELOAD_REG_ANY
, /* OR of GPR, FPR, Altivec masks. */
321 /* For setting up register classes, loop through the 3 register classes mapping
322 into real registers, and skip the ANY class, which is just an OR of the
324 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
325 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
327 /* Map reload register type to a register in the register class. */
328 struct reload_reg_map_type
{
329 const char *name
; /* Register class name. */
330 int reg
; /* Register in the register class. */
333 static const struct reload_reg_map_type reload_reg_map
[N_RELOAD_REG
] = {
334 { "Gpr", FIRST_GPR_REGNO
}, /* RELOAD_REG_GPR. */
335 { "Fpr", FIRST_FPR_REGNO
}, /* RELOAD_REG_FPR. */
336 { "VMX", FIRST_ALTIVEC_REGNO
}, /* RELOAD_REG_VMX. */
337 { "Any", -1 }, /* RELOAD_REG_ANY. */
340 /* Mask bits for each register class, indexed per mode. Historically the
341 compiler has been more restrictive which types can do PRE_MODIFY instead of
342 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
343 typedef unsigned char addr_mask_type
;
345 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
346 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
347 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
348 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
349 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
350 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
351 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
352 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
354 /* Register type masks based on the type, of valid addressing modes. */
355 struct rs6000_reg_addr
{
356 enum insn_code reload_load
; /* INSN to reload for loading. */
357 enum insn_code reload_store
; /* INSN to reload for storing. */
358 enum insn_code reload_fpr_gpr
; /* INSN to move from FPR to GPR. */
359 enum insn_code reload_gpr_vsx
; /* INSN to move from GPR to VSX. */
360 enum insn_code reload_vsx_gpr
; /* INSN to move from VSX to GPR. */
361 addr_mask_type addr_mask
[(int)N_RELOAD_REG
]; /* Valid address masks. */
362 bool scalar_in_vmx_p
; /* Scalar value can go in VMX. */
365 static struct rs6000_reg_addr reg_addr
[NUM_MACHINE_MODES
];
367 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
369 mode_supports_pre_incdec_p (machine_mode mode
)
371 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_INCDEC
)
375 /* Helper function to say whether a mode supports PRE_MODIFY. */
377 mode_supports_pre_modify_p (machine_mode mode
)
379 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_MODIFY
)
383 /* Return true if we have D-form addressing in altivec registers. */
385 mode_supports_vmx_dform (machine_mode mode
)
387 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_OFFSET
) != 0);
390 /* Return true if we have D-form addressing in VSX registers. This addressing
391 is more limited than normal d-form addressing in that the offset must be
392 aligned on a 16-byte boundary. */
394 mode_supports_dq_form (machine_mode mode
)
396 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_QUAD_OFFSET
)
400 /* Given that there exists at least one variable that is set (produced)
401 by OUT_INSN and read (consumed) by IN_INSN, return true iff
402 IN_INSN represents one or more memory store operations and none of
403 the variables set by OUT_INSN is used by IN_INSN as the address of a
404 store operation. If either IN_INSN or OUT_INSN does not represent
405 a "single" RTL SET expression (as loosely defined by the
406 implementation of the single_set function) or a PARALLEL with only
407 SETs, CLOBBERs, and USEs inside, this function returns false.
409 This rs6000-specific version of store_data_bypass_p checks for
410 certain conditions that result in assertion failures (and internal
411 compiler errors) in the generic store_data_bypass_p function and
412 returns false rather than calling store_data_bypass_p if one of the
413 problematic conditions is detected. */
416 rs6000_store_data_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
423 in_set
= single_set (in_insn
);
426 if (MEM_P (SET_DEST (in_set
)))
428 out_set
= single_set (out_insn
);
431 out_pat
= PATTERN (out_insn
);
432 if (GET_CODE (out_pat
) == PARALLEL
)
434 for (i
= 0; i
< XVECLEN (out_pat
, 0); i
++)
436 out_exp
= XVECEXP (out_pat
, 0, i
);
437 if ((GET_CODE (out_exp
) == CLOBBER
)
438 || (GET_CODE (out_exp
) == USE
))
440 else if (GET_CODE (out_exp
) != SET
)
449 in_pat
= PATTERN (in_insn
);
450 if (GET_CODE (in_pat
) != PARALLEL
)
453 for (i
= 0; i
< XVECLEN (in_pat
, 0); i
++)
455 in_exp
= XVECEXP (in_pat
, 0, i
);
456 if ((GET_CODE (in_exp
) == CLOBBER
) || (GET_CODE (in_exp
) == USE
))
458 else if (GET_CODE (in_exp
) != SET
)
461 if (MEM_P (SET_DEST (in_exp
)))
463 out_set
= single_set (out_insn
);
466 out_pat
= PATTERN (out_insn
);
467 if (GET_CODE (out_pat
) != PARALLEL
)
469 for (j
= 0; j
< XVECLEN (out_pat
, 0); j
++)
471 out_exp
= XVECEXP (out_pat
, 0, j
);
472 if ((GET_CODE (out_exp
) == CLOBBER
)
473 || (GET_CODE (out_exp
) == USE
))
475 else if (GET_CODE (out_exp
) != SET
)
482 return store_data_bypass_p (out_insn
, in_insn
);
486 /* Processor costs (relative to an add) */
488 const struct processor_costs
*rs6000_cost
;
490 /* Instruction size costs on 32bit processors. */
492 struct processor_costs size32_cost
= {
493 COSTS_N_INSNS (1), /* mulsi */
494 COSTS_N_INSNS (1), /* mulsi_const */
495 COSTS_N_INSNS (1), /* mulsi_const9 */
496 COSTS_N_INSNS (1), /* muldi */
497 COSTS_N_INSNS (1), /* divsi */
498 COSTS_N_INSNS (1), /* divdi */
499 COSTS_N_INSNS (1), /* fp */
500 COSTS_N_INSNS (1), /* dmul */
501 COSTS_N_INSNS (1), /* sdiv */
502 COSTS_N_INSNS (1), /* ddiv */
503 32, /* cache line size */
507 0, /* SF->DF convert */
510 /* Instruction size costs on 64bit processors. */
512 struct processor_costs size64_cost
= {
513 COSTS_N_INSNS (1), /* mulsi */
514 COSTS_N_INSNS (1), /* mulsi_const */
515 COSTS_N_INSNS (1), /* mulsi_const9 */
516 COSTS_N_INSNS (1), /* muldi */
517 COSTS_N_INSNS (1), /* divsi */
518 COSTS_N_INSNS (1), /* divdi */
519 COSTS_N_INSNS (1), /* fp */
520 COSTS_N_INSNS (1), /* dmul */
521 COSTS_N_INSNS (1), /* sdiv */
522 COSTS_N_INSNS (1), /* ddiv */
523 128, /* cache line size */
527 0, /* SF->DF convert */
530 /* Instruction costs on RS64A processors. */
532 struct processor_costs rs64a_cost
= {
533 COSTS_N_INSNS (20), /* mulsi */
534 COSTS_N_INSNS (12), /* mulsi_const */
535 COSTS_N_INSNS (8), /* mulsi_const9 */
536 COSTS_N_INSNS (34), /* muldi */
537 COSTS_N_INSNS (65), /* divsi */
538 COSTS_N_INSNS (67), /* divdi */
539 COSTS_N_INSNS (4), /* fp */
540 COSTS_N_INSNS (4), /* dmul */
541 COSTS_N_INSNS (31), /* sdiv */
542 COSTS_N_INSNS (31), /* ddiv */
543 128, /* cache line size */
547 0, /* SF->DF convert */
550 /* Instruction costs on MPCCORE processors. */
552 struct processor_costs mpccore_cost
= {
553 COSTS_N_INSNS (2), /* mulsi */
554 COSTS_N_INSNS (2), /* mulsi_const */
555 COSTS_N_INSNS (2), /* mulsi_const9 */
556 COSTS_N_INSNS (2), /* muldi */
557 COSTS_N_INSNS (6), /* divsi */
558 COSTS_N_INSNS (6), /* divdi */
559 COSTS_N_INSNS (4), /* fp */
560 COSTS_N_INSNS (5), /* dmul */
561 COSTS_N_INSNS (10), /* sdiv */
562 COSTS_N_INSNS (17), /* ddiv */
563 32, /* cache line size */
567 0, /* SF->DF convert */
570 /* Instruction costs on PPC403 processors. */
572 struct processor_costs ppc403_cost
= {
573 COSTS_N_INSNS (4), /* mulsi */
574 COSTS_N_INSNS (4), /* mulsi_const */
575 COSTS_N_INSNS (4), /* mulsi_const9 */
576 COSTS_N_INSNS (4), /* muldi */
577 COSTS_N_INSNS (33), /* divsi */
578 COSTS_N_INSNS (33), /* divdi */
579 COSTS_N_INSNS (11), /* fp */
580 COSTS_N_INSNS (11), /* dmul */
581 COSTS_N_INSNS (11), /* sdiv */
582 COSTS_N_INSNS (11), /* ddiv */
583 32, /* cache line size */
587 0, /* SF->DF convert */
590 /* Instruction costs on PPC405 processors. */
592 struct processor_costs ppc405_cost
= {
593 COSTS_N_INSNS (5), /* mulsi */
594 COSTS_N_INSNS (4), /* mulsi_const */
595 COSTS_N_INSNS (3), /* mulsi_const9 */
596 COSTS_N_INSNS (5), /* muldi */
597 COSTS_N_INSNS (35), /* divsi */
598 COSTS_N_INSNS (35), /* divdi */
599 COSTS_N_INSNS (11), /* fp */
600 COSTS_N_INSNS (11), /* dmul */
601 COSTS_N_INSNS (11), /* sdiv */
602 COSTS_N_INSNS (11), /* ddiv */
603 32, /* cache line size */
607 0, /* SF->DF convert */
610 /* Instruction costs on PPC440 processors. */
612 struct processor_costs ppc440_cost
= {
613 COSTS_N_INSNS (3), /* mulsi */
614 COSTS_N_INSNS (2), /* mulsi_const */
615 COSTS_N_INSNS (2), /* mulsi_const9 */
616 COSTS_N_INSNS (3), /* muldi */
617 COSTS_N_INSNS (34), /* divsi */
618 COSTS_N_INSNS (34), /* divdi */
619 COSTS_N_INSNS (5), /* fp */
620 COSTS_N_INSNS (5), /* dmul */
621 COSTS_N_INSNS (19), /* sdiv */
622 COSTS_N_INSNS (33), /* ddiv */
623 32, /* cache line size */
627 0, /* SF->DF convert */
630 /* Instruction costs on PPC476 processors. */
632 struct processor_costs ppc476_cost
= {
633 COSTS_N_INSNS (4), /* mulsi */
634 COSTS_N_INSNS (4), /* mulsi_const */
635 COSTS_N_INSNS (4), /* mulsi_const9 */
636 COSTS_N_INSNS (4), /* muldi */
637 COSTS_N_INSNS (11), /* divsi */
638 COSTS_N_INSNS (11), /* divdi */
639 COSTS_N_INSNS (6), /* fp */
640 COSTS_N_INSNS (6), /* dmul */
641 COSTS_N_INSNS (19), /* sdiv */
642 COSTS_N_INSNS (33), /* ddiv */
643 32, /* l1 cache line size */
647 0, /* SF->DF convert */
650 /* Instruction costs on PPC601 processors. */
652 struct processor_costs ppc601_cost
= {
653 COSTS_N_INSNS (5), /* mulsi */
654 COSTS_N_INSNS (5), /* mulsi_const */
655 COSTS_N_INSNS (5), /* mulsi_const9 */
656 COSTS_N_INSNS (5), /* muldi */
657 COSTS_N_INSNS (36), /* divsi */
658 COSTS_N_INSNS (36), /* divdi */
659 COSTS_N_INSNS (4), /* fp */
660 COSTS_N_INSNS (5), /* dmul */
661 COSTS_N_INSNS (17), /* sdiv */
662 COSTS_N_INSNS (31), /* ddiv */
663 32, /* cache line size */
667 0, /* SF->DF convert */
670 /* Instruction costs on PPC603 processors. */
672 struct processor_costs ppc603_cost
= {
673 COSTS_N_INSNS (5), /* mulsi */
674 COSTS_N_INSNS (3), /* mulsi_const */
675 COSTS_N_INSNS (2), /* mulsi_const9 */
676 COSTS_N_INSNS (5), /* muldi */
677 COSTS_N_INSNS (37), /* divsi */
678 COSTS_N_INSNS (37), /* divdi */
679 COSTS_N_INSNS (3), /* fp */
680 COSTS_N_INSNS (4), /* dmul */
681 COSTS_N_INSNS (18), /* sdiv */
682 COSTS_N_INSNS (33), /* ddiv */
683 32, /* cache line size */
687 0, /* SF->DF convert */
690 /* Instruction costs on PPC604 processors. */
692 struct processor_costs ppc604_cost
= {
693 COSTS_N_INSNS (4), /* mulsi */
694 COSTS_N_INSNS (4), /* mulsi_const */
695 COSTS_N_INSNS (4), /* mulsi_const9 */
696 COSTS_N_INSNS (4), /* muldi */
697 COSTS_N_INSNS (20), /* divsi */
698 COSTS_N_INSNS (20), /* divdi */
699 COSTS_N_INSNS (3), /* fp */
700 COSTS_N_INSNS (3), /* dmul */
701 COSTS_N_INSNS (18), /* sdiv */
702 COSTS_N_INSNS (32), /* ddiv */
703 32, /* cache line size */
707 0, /* SF->DF convert */
710 /* Instruction costs on PPC604e processors. */
712 struct processor_costs ppc604e_cost
= {
713 COSTS_N_INSNS (2), /* mulsi */
714 COSTS_N_INSNS (2), /* mulsi_const */
715 COSTS_N_INSNS (2), /* mulsi_const9 */
716 COSTS_N_INSNS (2), /* muldi */
717 COSTS_N_INSNS (20), /* divsi */
718 COSTS_N_INSNS (20), /* divdi */
719 COSTS_N_INSNS (3), /* fp */
720 COSTS_N_INSNS (3), /* dmul */
721 COSTS_N_INSNS (18), /* sdiv */
722 COSTS_N_INSNS (32), /* ddiv */
723 32, /* cache line size */
727 0, /* SF->DF convert */
730 /* Instruction costs on PPC620 processors. */
732 struct processor_costs ppc620_cost
= {
733 COSTS_N_INSNS (5), /* mulsi */
734 COSTS_N_INSNS (4), /* mulsi_const */
735 COSTS_N_INSNS (3), /* mulsi_const9 */
736 COSTS_N_INSNS (7), /* muldi */
737 COSTS_N_INSNS (21), /* divsi */
738 COSTS_N_INSNS (37), /* divdi */
739 COSTS_N_INSNS (3), /* fp */
740 COSTS_N_INSNS (3), /* dmul */
741 COSTS_N_INSNS (18), /* sdiv */
742 COSTS_N_INSNS (32), /* ddiv */
743 128, /* cache line size */
747 0, /* SF->DF convert */
750 /* Instruction costs on PPC630 processors. */
752 struct processor_costs ppc630_cost
= {
753 COSTS_N_INSNS (5), /* mulsi */
754 COSTS_N_INSNS (4), /* mulsi_const */
755 COSTS_N_INSNS (3), /* mulsi_const9 */
756 COSTS_N_INSNS (7), /* muldi */
757 COSTS_N_INSNS (21), /* divsi */
758 COSTS_N_INSNS (37), /* divdi */
759 COSTS_N_INSNS (3), /* fp */
760 COSTS_N_INSNS (3), /* dmul */
761 COSTS_N_INSNS (17), /* sdiv */
762 COSTS_N_INSNS (21), /* ddiv */
763 128, /* cache line size */
767 0, /* SF->DF convert */
770 /* Instruction costs on Cell processor. */
771 /* COSTS_N_INSNS (1) ~ one add. */
773 struct processor_costs ppccell_cost
= {
774 COSTS_N_INSNS (9/2)+2, /* mulsi */
775 COSTS_N_INSNS (6/2), /* mulsi_const */
776 COSTS_N_INSNS (6/2), /* mulsi_const9 */
777 COSTS_N_INSNS (15/2)+2, /* muldi */
778 COSTS_N_INSNS (38/2), /* divsi */
779 COSTS_N_INSNS (70/2), /* divdi */
780 COSTS_N_INSNS (10/2), /* fp */
781 COSTS_N_INSNS (10/2), /* dmul */
782 COSTS_N_INSNS (74/2), /* sdiv */
783 COSTS_N_INSNS (74/2), /* ddiv */
784 128, /* cache line size */
788 0, /* SF->DF convert */
791 /* Instruction costs on PPC750 and PPC7400 processors. */
793 struct processor_costs ppc750_cost
= {
794 COSTS_N_INSNS (5), /* mulsi */
795 COSTS_N_INSNS (3), /* mulsi_const */
796 COSTS_N_INSNS (2), /* mulsi_const9 */
797 COSTS_N_INSNS (5), /* muldi */
798 COSTS_N_INSNS (17), /* divsi */
799 COSTS_N_INSNS (17), /* divdi */
800 COSTS_N_INSNS (3), /* fp */
801 COSTS_N_INSNS (3), /* dmul */
802 COSTS_N_INSNS (17), /* sdiv */
803 COSTS_N_INSNS (31), /* ddiv */
804 32, /* cache line size */
808 0, /* SF->DF convert */
811 /* Instruction costs on PPC7450 processors. */
813 struct processor_costs ppc7450_cost
= {
814 COSTS_N_INSNS (4), /* mulsi */
815 COSTS_N_INSNS (3), /* mulsi_const */
816 COSTS_N_INSNS (3), /* mulsi_const9 */
817 COSTS_N_INSNS (4), /* muldi */
818 COSTS_N_INSNS (23), /* divsi */
819 COSTS_N_INSNS (23), /* divdi */
820 COSTS_N_INSNS (5), /* fp */
821 COSTS_N_INSNS (5), /* dmul */
822 COSTS_N_INSNS (21), /* sdiv */
823 COSTS_N_INSNS (35), /* ddiv */
824 32, /* cache line size */
828 0, /* SF->DF convert */
831 /* Instruction costs on PPC8540 processors. */
833 struct processor_costs ppc8540_cost
= {
834 COSTS_N_INSNS (4), /* mulsi */
835 COSTS_N_INSNS (4), /* mulsi_const */
836 COSTS_N_INSNS (4), /* mulsi_const9 */
837 COSTS_N_INSNS (4), /* muldi */
838 COSTS_N_INSNS (19), /* divsi */
839 COSTS_N_INSNS (19), /* divdi */
840 COSTS_N_INSNS (4), /* fp */
841 COSTS_N_INSNS (4), /* dmul */
842 COSTS_N_INSNS (29), /* sdiv */
843 COSTS_N_INSNS (29), /* ddiv */
844 32, /* cache line size */
847 1, /* prefetch streams /*/
848 0, /* SF->DF convert */
851 /* Instruction costs on E300C2 and E300C3 cores. */
853 struct processor_costs ppce300c2c3_cost
= {
854 COSTS_N_INSNS (4), /* mulsi */
855 COSTS_N_INSNS (4), /* mulsi_const */
856 COSTS_N_INSNS (4), /* mulsi_const9 */
857 COSTS_N_INSNS (4), /* muldi */
858 COSTS_N_INSNS (19), /* divsi */
859 COSTS_N_INSNS (19), /* divdi */
860 COSTS_N_INSNS (3), /* fp */
861 COSTS_N_INSNS (4), /* dmul */
862 COSTS_N_INSNS (18), /* sdiv */
863 COSTS_N_INSNS (33), /* ddiv */
867 1, /* prefetch streams /*/
868 0, /* SF->DF convert */
871 /* Instruction costs on PPCE500MC processors. */
873 struct processor_costs ppce500mc_cost
= {
874 COSTS_N_INSNS (4), /* mulsi */
875 COSTS_N_INSNS (4), /* mulsi_const */
876 COSTS_N_INSNS (4), /* mulsi_const9 */
877 COSTS_N_INSNS (4), /* muldi */
878 COSTS_N_INSNS (14), /* divsi */
879 COSTS_N_INSNS (14), /* divdi */
880 COSTS_N_INSNS (8), /* fp */
881 COSTS_N_INSNS (10), /* dmul */
882 COSTS_N_INSNS (36), /* sdiv */
883 COSTS_N_INSNS (66), /* ddiv */
884 64, /* cache line size */
887 1, /* prefetch streams /*/
888 0, /* SF->DF convert */
891 /* Instruction costs on PPCE500MC64 processors. */
893 struct processor_costs ppce500mc64_cost
= {
894 COSTS_N_INSNS (4), /* mulsi */
895 COSTS_N_INSNS (4), /* mulsi_const */
896 COSTS_N_INSNS (4), /* mulsi_const9 */
897 COSTS_N_INSNS (4), /* muldi */
898 COSTS_N_INSNS (14), /* divsi */
899 COSTS_N_INSNS (14), /* divdi */
900 COSTS_N_INSNS (4), /* fp */
901 COSTS_N_INSNS (10), /* dmul */
902 COSTS_N_INSNS (36), /* sdiv */
903 COSTS_N_INSNS (66), /* ddiv */
904 64, /* cache line size */
907 1, /* prefetch streams /*/
908 0, /* SF->DF convert */
911 /* Instruction costs on PPCE5500 processors. */
913 struct processor_costs ppce5500_cost
= {
914 COSTS_N_INSNS (5), /* mulsi */
915 COSTS_N_INSNS (5), /* mulsi_const */
916 COSTS_N_INSNS (4), /* mulsi_const9 */
917 COSTS_N_INSNS (5), /* muldi */
918 COSTS_N_INSNS (14), /* divsi */
919 COSTS_N_INSNS (14), /* divdi */
920 COSTS_N_INSNS (7), /* fp */
921 COSTS_N_INSNS (10), /* dmul */
922 COSTS_N_INSNS (36), /* sdiv */
923 COSTS_N_INSNS (66), /* ddiv */
924 64, /* cache line size */
927 1, /* prefetch streams /*/
928 0, /* SF->DF convert */
931 /* Instruction costs on PPCE6500 processors. */
933 struct processor_costs ppce6500_cost
= {
934 COSTS_N_INSNS (5), /* mulsi */
935 COSTS_N_INSNS (5), /* mulsi_const */
936 COSTS_N_INSNS (4), /* mulsi_const9 */
937 COSTS_N_INSNS (5), /* muldi */
938 COSTS_N_INSNS (14), /* divsi */
939 COSTS_N_INSNS (14), /* divdi */
940 COSTS_N_INSNS (7), /* fp */
941 COSTS_N_INSNS (10), /* dmul */
942 COSTS_N_INSNS (36), /* sdiv */
943 COSTS_N_INSNS (66), /* ddiv */
944 64, /* cache line size */
947 1, /* prefetch streams /*/
948 0, /* SF->DF convert */
951 /* Instruction costs on AppliedMicro Titan processors. */
953 struct processor_costs titan_cost
= {
954 COSTS_N_INSNS (5), /* mulsi */
955 COSTS_N_INSNS (5), /* mulsi_const */
956 COSTS_N_INSNS (5), /* mulsi_const9 */
957 COSTS_N_INSNS (5), /* muldi */
958 COSTS_N_INSNS (18), /* divsi */
959 COSTS_N_INSNS (18), /* divdi */
960 COSTS_N_INSNS (10), /* fp */
961 COSTS_N_INSNS (10), /* dmul */
962 COSTS_N_INSNS (46), /* sdiv */
963 COSTS_N_INSNS (72), /* ddiv */
964 32, /* cache line size */
967 1, /* prefetch streams /*/
968 0, /* SF->DF convert */
971 /* Instruction costs on POWER4 and POWER5 processors. */
973 struct processor_costs power4_cost
= {
974 COSTS_N_INSNS (3), /* mulsi */
975 COSTS_N_INSNS (2), /* mulsi_const */
976 COSTS_N_INSNS (2), /* mulsi_const9 */
977 COSTS_N_INSNS (4), /* muldi */
978 COSTS_N_INSNS (18), /* divsi */
979 COSTS_N_INSNS (34), /* divdi */
980 COSTS_N_INSNS (3), /* fp */
981 COSTS_N_INSNS (3), /* dmul */
982 COSTS_N_INSNS (17), /* sdiv */
983 COSTS_N_INSNS (17), /* ddiv */
984 128, /* cache line size */
987 8, /* prefetch streams /*/
988 0, /* SF->DF convert */
991 /* Instruction costs on POWER6 processors. */
993 struct processor_costs power6_cost
= {
994 COSTS_N_INSNS (8), /* mulsi */
995 COSTS_N_INSNS (8), /* mulsi_const */
996 COSTS_N_INSNS (8), /* mulsi_const9 */
997 COSTS_N_INSNS (8), /* muldi */
998 COSTS_N_INSNS (22), /* divsi */
999 COSTS_N_INSNS (28), /* divdi */
1000 COSTS_N_INSNS (3), /* fp */
1001 COSTS_N_INSNS (3), /* dmul */
1002 COSTS_N_INSNS (13), /* sdiv */
1003 COSTS_N_INSNS (16), /* ddiv */
1004 128, /* cache line size */
1006 2048, /* l2 cache */
1007 16, /* prefetch streams */
1008 0, /* SF->DF convert */
1011 /* Instruction costs on POWER7 processors. */
1013 struct processor_costs power7_cost
= {
1014 COSTS_N_INSNS (2), /* mulsi */
1015 COSTS_N_INSNS (2), /* mulsi_const */
1016 COSTS_N_INSNS (2), /* mulsi_const9 */
1017 COSTS_N_INSNS (2), /* muldi */
1018 COSTS_N_INSNS (18), /* divsi */
1019 COSTS_N_INSNS (34), /* divdi */
1020 COSTS_N_INSNS (3), /* fp */
1021 COSTS_N_INSNS (3), /* dmul */
1022 COSTS_N_INSNS (13), /* sdiv */
1023 COSTS_N_INSNS (16), /* ddiv */
1024 128, /* cache line size */
1027 12, /* prefetch streams */
1028 COSTS_N_INSNS (3), /* SF->DF convert */
1031 /* Instruction costs on POWER8 processors. */
1033 struct processor_costs power8_cost
= {
1034 COSTS_N_INSNS (3), /* mulsi */
1035 COSTS_N_INSNS (3), /* mulsi_const */
1036 COSTS_N_INSNS (3), /* mulsi_const9 */
1037 COSTS_N_INSNS (3), /* muldi */
1038 COSTS_N_INSNS (19), /* divsi */
1039 COSTS_N_INSNS (35), /* divdi */
1040 COSTS_N_INSNS (3), /* fp */
1041 COSTS_N_INSNS (3), /* dmul */
1042 COSTS_N_INSNS (14), /* sdiv */
1043 COSTS_N_INSNS (17), /* ddiv */
1044 128, /* cache line size */
1047 12, /* prefetch streams */
1048 COSTS_N_INSNS (3), /* SF->DF convert */
1051 /* Instruction costs on POWER9 processors. */
1053 struct processor_costs power9_cost
= {
1054 COSTS_N_INSNS (3), /* mulsi */
1055 COSTS_N_INSNS (3), /* mulsi_const */
1056 COSTS_N_INSNS (3), /* mulsi_const9 */
1057 COSTS_N_INSNS (3), /* muldi */
1058 COSTS_N_INSNS (8), /* divsi */
1059 COSTS_N_INSNS (12), /* divdi */
1060 COSTS_N_INSNS (3), /* fp */
1061 COSTS_N_INSNS (3), /* dmul */
1062 COSTS_N_INSNS (13), /* sdiv */
1063 COSTS_N_INSNS (18), /* ddiv */
1064 128, /* cache line size */
1067 8, /* prefetch streams */
1068 COSTS_N_INSNS (3), /* SF->DF convert */
1071 /* Instruction costs on POWER10 processors. */
1073 struct processor_costs power10_cost
= {
1074 COSTS_N_INSNS (2), /* mulsi */
1075 COSTS_N_INSNS (2), /* mulsi_const */
1076 COSTS_N_INSNS (2), /* mulsi_const9 */
1077 COSTS_N_INSNS (2), /* muldi */
1078 COSTS_N_INSNS (6), /* divsi */
1079 COSTS_N_INSNS (6), /* divdi */
1080 COSTS_N_INSNS (2), /* fp */
1081 COSTS_N_INSNS (2), /* dmul */
1082 COSTS_N_INSNS (11), /* sdiv */
1083 COSTS_N_INSNS (13), /* ddiv */
1084 128, /* cache line size */
1087 16, /* prefetch streams */
1088 COSTS_N_INSNS (2), /* SF->DF convert */
1091 /* Instruction costs on POWER A2 processors. */
1093 struct processor_costs ppca2_cost
= {
1094 COSTS_N_INSNS (16), /* mulsi */
1095 COSTS_N_INSNS (16), /* mulsi_const */
1096 COSTS_N_INSNS (16), /* mulsi_const9 */
1097 COSTS_N_INSNS (16), /* muldi */
1098 COSTS_N_INSNS (22), /* divsi */
1099 COSTS_N_INSNS (28), /* divdi */
1100 COSTS_N_INSNS (3), /* fp */
1101 COSTS_N_INSNS (3), /* dmul */
1102 COSTS_N_INSNS (59), /* sdiv */
1103 COSTS_N_INSNS (72), /* ddiv */
1106 2048, /* l2 cache */
1107 16, /* prefetch streams */
1108 0, /* SF->DF convert */
1111 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1112 static tree (*rs6000_veclib_handler
) (combined_fn
, tree
, tree
);
1115 static bool rs6000_debug_legitimate_address_p (machine_mode
, rtx
, bool);
1116 static tree
rs6000_handle_longcall_attribute (tree
*, tree
, tree
, int, bool *);
1117 static tree
rs6000_handle_altivec_attribute (tree
*, tree
, tree
, int, bool *);
1118 static tree
rs6000_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1119 static tree
rs6000_builtin_vectorized_libmass (combined_fn
, tree
, tree
);
1120 static void rs6000_emit_set_long_const (rtx
, HOST_WIDE_INT
);
1121 static int rs6000_memory_move_cost (machine_mode
, reg_class_t
, bool);
1122 static bool rs6000_debug_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
1123 static int rs6000_debug_address_cost (rtx
, machine_mode
, addr_space_t
,
1125 static int rs6000_debug_adjust_cost (rtx_insn
*, int, rtx_insn
*, int,
1127 static bool is_microcoded_insn (rtx_insn
*);
1128 static bool is_nonpipeline_insn (rtx_insn
*);
1129 static bool is_cracked_insn (rtx_insn
*);
1130 static bool is_load_insn (rtx
, rtx
*);
1131 static bool is_store_insn (rtx
, rtx
*);
1132 static bool set_to_load_agen (rtx_insn
*,rtx_insn
*);
1133 static bool insn_terminates_group_p (rtx_insn
*, enum group_termination
);
1134 static bool insn_must_be_first_in_group (rtx_insn
*);
1135 static bool insn_must_be_last_in_group (rtx_insn
*);
1136 bool easy_vector_constant (rtx
, machine_mode
);
1137 static rtx
rs6000_debug_legitimize_address (rtx
, rtx
, machine_mode
);
1138 static rtx
rs6000_legitimize_tls_address (rtx
, enum tls_model
);
1140 static tree
get_prev_label (tree
);
1142 static bool rs6000_mode_dependent_address (const_rtx
);
1143 static bool rs6000_debug_mode_dependent_address (const_rtx
);
1144 static bool rs6000_offsettable_memref_p (rtx
, machine_mode
, bool);
1145 static enum reg_class
rs6000_secondary_reload_class (enum reg_class
,
1147 static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class
,
1150 static enum reg_class
rs6000_preferred_reload_class (rtx
, enum reg_class
);
1151 static enum reg_class
rs6000_debug_preferred_reload_class (rtx
,
1153 static bool rs6000_debug_secondary_memory_needed (machine_mode
,
1156 static bool rs6000_debug_can_change_mode_class (machine_mode
,
1160 static bool (*rs6000_mode_dependent_address_ptr
) (const_rtx
)
1161 = rs6000_mode_dependent_address
;
1163 enum reg_class (*rs6000_secondary_reload_class_ptr
) (enum reg_class
,
1165 = rs6000_secondary_reload_class
;
1167 enum reg_class (*rs6000_preferred_reload_class_ptr
) (rtx
, enum reg_class
)
1168 = rs6000_preferred_reload_class
;
1170 const int INSN_NOT_AVAILABLE
= -1;
1172 static void rs6000_print_isa_options (FILE *, int, const char *,
1174 static void rs6000_print_builtin_options (FILE *, int, const char *,
1176 static HOST_WIDE_INT
rs6000_disable_incompatible_switches (void);
1178 static enum rs6000_reg_type
register_to_reg_type (rtx
, bool *);
1179 static bool rs6000_secondary_reload_move (enum rs6000_reg_type
,
1180 enum rs6000_reg_type
,
1182 secondary_reload_info
*,
1184 rtl_opt_pass
*make_pass_analyze_swaps (gcc::context
*);
1186 /* Hash table stuff for keeping track of TOC entries. */
1188 struct GTY((for_user
)) toc_hash_struct
1190 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1191 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1193 machine_mode key_mode
;
1197 struct toc_hasher
: ggc_ptr_hash
<toc_hash_struct
>
1199 static hashval_t
hash (toc_hash_struct
*);
1200 static bool equal (toc_hash_struct
*, toc_hash_struct
*);
1203 static GTY (()) hash_table
<toc_hasher
> *toc_hash_table
;
1207 /* Default register names. */
1208 char rs6000_reg_names
[][8] =
1211 "0", "1", "2", "3", "4", "5", "6", "7",
1212 "8", "9", "10", "11", "12", "13", "14", "15",
1213 "16", "17", "18", "19", "20", "21", "22", "23",
1214 "24", "25", "26", "27", "28", "29", "30", "31",
1216 "0", "1", "2", "3", "4", "5", "6", "7",
1217 "8", "9", "10", "11", "12", "13", "14", "15",
1218 "16", "17", "18", "19", "20", "21", "22", "23",
1219 "24", "25", "26", "27", "28", "29", "30", "31",
1221 "0", "1", "2", "3", "4", "5", "6", "7",
1222 "8", "9", "10", "11", "12", "13", "14", "15",
1223 "16", "17", "18", "19", "20", "21", "22", "23",
1224 "24", "25", "26", "27", "28", "29", "30", "31",
1226 "lr", "ctr", "ca", "ap",
1228 "0", "1", "2", "3", "4", "5", "6", "7",
1229 /* vrsave vscr sfp */
1230 "vrsave", "vscr", "sfp",
1233 #ifdef TARGET_REGNAMES
1234 static const char alt_reg_names
[][8] =
1237 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1238 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1239 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1240 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1242 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1243 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1244 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1245 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1247 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1248 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1249 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1250 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1252 "lr", "ctr", "ca", "ap",
1254 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1255 /* vrsave vscr sfp */
1256 "vrsave", "vscr", "sfp",
1260 /* Table of valid machine attributes. */
1262 static const struct attribute_spec rs6000_attribute_table
[] =
1264 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1265 affects_type_identity, handler, exclude } */
1266 { "altivec", 1, 1, false, true, false, false,
1267 rs6000_handle_altivec_attribute
, NULL
},
1268 { "longcall", 0, 0, false, true, true, false,
1269 rs6000_handle_longcall_attribute
, NULL
},
1270 { "shortcall", 0, 0, false, true, true, false,
1271 rs6000_handle_longcall_attribute
, NULL
},
1272 { "ms_struct", 0, 0, false, false, false, false,
1273 rs6000_handle_struct_attribute
, NULL
},
1274 { "gcc_struct", 0, 0, false, false, false, false,
1275 rs6000_handle_struct_attribute
, NULL
},
1276 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1277 SUBTARGET_ATTRIBUTE_TABLE
,
1279 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
1282 #ifndef TARGET_PROFILE_KERNEL
1283 #define TARGET_PROFILE_KERNEL 0
1286 /* Initialize the GCC target structure. */
1287 #undef TARGET_ATTRIBUTE_TABLE
1288 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1289 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1290 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1291 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1292 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1294 #undef TARGET_ASM_ALIGNED_DI_OP
1295 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1297 /* Default unaligned ops are only provided for ELF. Find the ops needed
1298 for non-ELF systems. */
1299 #ifndef OBJECT_FORMAT_ELF
1301 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1303 #undef TARGET_ASM_UNALIGNED_HI_OP
1304 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1305 #undef TARGET_ASM_UNALIGNED_SI_OP
1306 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1307 #undef TARGET_ASM_UNALIGNED_DI_OP
1308 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1311 #undef TARGET_ASM_UNALIGNED_HI_OP
1312 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1313 #undef TARGET_ASM_UNALIGNED_SI_OP
1314 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1315 #undef TARGET_ASM_UNALIGNED_DI_OP
1316 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1317 #undef TARGET_ASM_ALIGNED_DI_OP
1318 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1322 /* This hook deals with fixups for relocatable code and DI-mode objects
1324 #undef TARGET_ASM_INTEGER
1325 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1327 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1328 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1329 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1332 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1333 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1334 rs6000_print_patchable_function_entry
1336 #undef TARGET_SET_UP_BY_PROLOGUE
1337 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1339 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1340 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1341 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1342 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1343 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1344 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1345 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1346 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1347 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1348 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1349 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1350 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1352 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1353 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1355 #undef TARGET_INTERNAL_ARG_POINTER
1356 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1358 #undef TARGET_HAVE_TLS
1359 #define TARGET_HAVE_TLS HAVE_AS_TLS
1361 #undef TARGET_CANNOT_FORCE_CONST_MEM
1362 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1364 #undef TARGET_DELEGITIMIZE_ADDRESS
1365 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1367 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1368 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1370 #undef TARGET_LEGITIMATE_COMBINED_INSN
1371 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1373 #undef TARGET_ASM_FUNCTION_PROLOGUE
1374 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1375 #undef TARGET_ASM_FUNCTION_EPILOGUE
1376 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1378 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1379 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1381 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1382 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1384 #undef TARGET_LEGITIMIZE_ADDRESS
1385 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1387 #undef TARGET_SCHED_VARIABLE_ISSUE
1388 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1390 #undef TARGET_SCHED_ISSUE_RATE
1391 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1392 #undef TARGET_SCHED_ADJUST_COST
1393 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1394 #undef TARGET_SCHED_ADJUST_PRIORITY
1395 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1396 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1397 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1398 #undef TARGET_SCHED_INIT
1399 #define TARGET_SCHED_INIT rs6000_sched_init
1400 #undef TARGET_SCHED_FINISH
1401 #define TARGET_SCHED_FINISH rs6000_sched_finish
1402 #undef TARGET_SCHED_REORDER
1403 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1404 #undef TARGET_SCHED_REORDER2
1405 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1407 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1408 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1410 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1411 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1413 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1414 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1415 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1416 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1417 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1418 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1419 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1420 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1422 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1423 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1425 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1426 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1427 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1428 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1429 rs6000_builtin_support_vector_misalignment
1430 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1431 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1432 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1433 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1434 rs6000_builtin_vectorization_cost
1435 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1436 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1437 rs6000_preferred_simd_mode
1438 #undef TARGET_VECTORIZE_CREATE_COSTS
1439 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1441 #undef TARGET_LOOP_UNROLL_ADJUST
1442 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1444 #undef TARGET_INIT_BUILTINS
1445 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1446 #undef TARGET_BUILTIN_DECL
1447 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1449 #undef TARGET_FOLD_BUILTIN
1450 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1451 #undef TARGET_GIMPLE_FOLD_BUILTIN
1452 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1454 #undef TARGET_EXPAND_BUILTIN
1455 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1457 #undef TARGET_MANGLE_TYPE
1458 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1460 #undef TARGET_INIT_LIBFUNCS
1461 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1464 #undef TARGET_BINDS_LOCAL_P
1465 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1468 #undef TARGET_MS_BITFIELD_LAYOUT_P
1469 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1471 #undef TARGET_ASM_OUTPUT_MI_THUNK
1472 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1474 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1475 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1477 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1478 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1480 #undef TARGET_REGISTER_MOVE_COST
1481 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1482 #undef TARGET_MEMORY_MOVE_COST
1483 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1484 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1485 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1486 rs6000_ira_change_pseudo_allocno_class
1487 #undef TARGET_CANNOT_COPY_INSN_P
1488 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1489 #undef TARGET_RTX_COSTS
1490 #define TARGET_RTX_COSTS rs6000_rtx_costs
1491 #undef TARGET_ADDRESS_COST
1492 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1493 #undef TARGET_INSN_COST
1494 #define TARGET_INSN_COST rs6000_insn_cost
1496 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1497 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1499 #undef TARGET_PROMOTE_FUNCTION_MODE
1500 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1502 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1503 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1505 #undef TARGET_RETURN_IN_MEMORY
1506 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1508 #undef TARGET_RETURN_IN_MSB
1509 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1511 #undef TARGET_SETUP_INCOMING_VARARGS
1512 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1514 /* Always strict argument naming on rs6000. */
1515 #undef TARGET_STRICT_ARGUMENT_NAMING
1516 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1517 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1518 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1519 #undef TARGET_SPLIT_COMPLEX_ARG
1520 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1521 #undef TARGET_MUST_PASS_IN_STACK
1522 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1523 #undef TARGET_PASS_BY_REFERENCE
1524 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1525 #undef TARGET_ARG_PARTIAL_BYTES
1526 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1527 #undef TARGET_FUNCTION_ARG_ADVANCE
1528 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1529 #undef TARGET_FUNCTION_ARG
1530 #define TARGET_FUNCTION_ARG rs6000_function_arg
1531 #undef TARGET_FUNCTION_ARG_PADDING
1532 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1533 #undef TARGET_FUNCTION_ARG_BOUNDARY
1534 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1536 #undef TARGET_BUILD_BUILTIN_VA_LIST
1537 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1539 #undef TARGET_EXPAND_BUILTIN_VA_START
1540 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1542 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1543 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1545 #undef TARGET_EH_RETURN_FILTER_MODE
1546 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1548 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1549 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1551 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1552 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1554 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1555 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1556 rs6000_libgcc_floating_mode_supported_p
1558 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1559 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1561 #undef TARGET_FLOATN_MODE
1562 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1564 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1565 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1567 #undef TARGET_MD_ASM_ADJUST
1568 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1570 #undef TARGET_OPTION_OVERRIDE
1571 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1573 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1574 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1575 rs6000_builtin_vectorized_function
1577 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1578 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1579 rs6000_builtin_md_vectorized_function
1581 #undef TARGET_STACK_PROTECT_GUARD
1582 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1585 #undef TARGET_STACK_PROTECT_FAIL
1586 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1590 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1591 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1594 /* Use a 32-bit anchor range. This leads to sequences like:
1596 addis tmp,anchor,high
1599 where tmp itself acts as an anchor, and can be shared between
1600 accesses to the same 64k page. */
1601 #undef TARGET_MIN_ANCHOR_OFFSET
1602 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1603 #undef TARGET_MAX_ANCHOR_OFFSET
1604 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1605 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1606 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1607 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1608 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1610 #undef TARGET_BUILTIN_RECIPROCAL
1611 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1613 #undef TARGET_SECONDARY_RELOAD
1614 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1615 #undef TARGET_SECONDARY_MEMORY_NEEDED
1616 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1617 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1618 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1620 #undef TARGET_LEGITIMATE_ADDRESS_P
1621 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1623 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1624 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1626 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1627 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1629 #undef TARGET_CAN_ELIMINATE
1630 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1632 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1633 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1635 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1636 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1638 #undef TARGET_TRAMPOLINE_INIT
1639 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1641 #undef TARGET_FUNCTION_VALUE
1642 #define TARGET_FUNCTION_VALUE rs6000_function_value
1644 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1645 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1647 #undef TARGET_OPTION_SAVE
1648 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1650 #undef TARGET_OPTION_RESTORE
1651 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1653 #undef TARGET_OPTION_PRINT
1654 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1656 #undef TARGET_CAN_INLINE_P
1657 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1659 #undef TARGET_SET_CURRENT_FUNCTION
1660 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1662 #undef TARGET_LEGITIMATE_CONSTANT_P
1663 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1665 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1666 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1668 #undef TARGET_CAN_USE_DOLOOP_P
1669 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1671 #undef TARGET_PREDICT_DOLOOP_P
1672 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1674 #undef TARGET_HAVE_COUNT_REG_DECR_P
1675 #define TARGET_HAVE_COUNT_REG_DECR_P true
1677 /* 1000000000 is infinite cost in IVOPTs. */
1678 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1679 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1681 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1682 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1684 #undef TARGET_PREFERRED_DOLOOP_MODE
1685 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1687 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1688 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1690 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1691 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1692 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1693 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1694 #undef TARGET_UNWIND_WORD_MODE
1695 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1697 #undef TARGET_OFFLOAD_OPTIONS
1698 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1700 #undef TARGET_C_MODE_FOR_SUFFIX
1701 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1703 #undef TARGET_INVALID_BINARY_OP
1704 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1706 #undef TARGET_OPTAB_SUPPORTED_P
1707 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1709 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1710 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1712 #undef TARGET_COMPARE_VERSION_PRIORITY
1713 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1715 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1716 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1717 rs6000_generate_version_dispatcher_body
1719 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1720 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1721 rs6000_get_function_versions_dispatcher
1723 #undef TARGET_OPTION_FUNCTION_VERSIONS
1724 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1726 #undef TARGET_HARD_REGNO_NREGS
1727 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1728 #undef TARGET_HARD_REGNO_MODE_OK
1729 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1731 #undef TARGET_MODES_TIEABLE_P
1732 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1734 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1735 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1736 rs6000_hard_regno_call_part_clobbered
1738 #undef TARGET_SLOW_UNALIGNED_ACCESS
1739 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1741 #undef TARGET_CAN_CHANGE_MODE_CLASS
1742 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1744 #undef TARGET_CONSTANT_ALIGNMENT
1745 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1747 #undef TARGET_STARTING_FRAME_OFFSET
1748 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1750 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1751 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1753 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1754 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1756 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1757 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1758 rs6000_cannot_substitute_mem_equiv_p
1760 #undef TARGET_INVALID_CONVERSION
1761 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1763 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1764 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1766 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1767 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1770 /* Processor table. */
1773 const char *const name
; /* Canonical processor name. */
1774 const enum processor_type processor
; /* Processor type enum value. */
1775 const HOST_WIDE_INT target_enable
; /* Target flags to enable. */
1778 static struct rs6000_ptt
const processor_target_table
[] =
1780 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1781 #include "rs6000-cpus.def"
1785 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1789 rs6000_cpu_name_lookup (const char *name
)
1795 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
1796 if (! strcmp (name
, processor_target_table
[i
].name
))
1804 /* Return number of consecutive hard regs needed starting at reg REGNO
1805 to hold something of mode MODE.
1806 This is ordinarily the length in words of a value of mode MODE
1807 but can be less for certain modes in special long registers.
1809 POWER and PowerPC GPRs hold 32 bits worth;
1810 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1813 rs6000_hard_regno_nregs_internal (int regno
, machine_mode mode
)
1815 unsigned HOST_WIDE_INT reg_size
;
1817 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1818 128-bit floating point that can go in vector registers, which has VSX
1819 memory addressing. */
1820 if (FP_REGNO_P (regno
))
1821 reg_size
= (VECTOR_MEM_VSX_P (mode
) || VECTOR_ALIGNMENT_P (mode
)
1822 ? UNITS_PER_VSX_WORD
1823 : UNITS_PER_FP_WORD
);
1825 else if (ALTIVEC_REGNO_P (regno
))
1826 reg_size
= UNITS_PER_ALTIVEC_WORD
;
1829 reg_size
= UNITS_PER_WORD
;
1831 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
1834 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1837 rs6000_hard_regno_mode_ok_uncached (int regno
, machine_mode mode
)
1839 int last_regno
= regno
+ rs6000_hard_regno_nregs
[mode
][regno
] - 1;
1841 if (COMPLEX_MODE_P (mode
))
1842 mode
= GET_MODE_INNER (mode
);
1844 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1847 return (TARGET_MMA
&& VSX_REGNO_P (regno
) && (regno
& 1) == 0);
1849 /* MMA accumulator modes need FPR registers divisible by 4. */
1851 return (TARGET_MMA
&& FP_REGNO_P (regno
) && (regno
& 3) == 0);
1853 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1854 register combinations, and use PTImode where we need to deal with quad
1855 word memory operations. Don't allow quad words in the argument or frame
1856 pointer registers, just registers 0..31. */
1857 if (mode
== PTImode
)
1858 return (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1859 && IN_RANGE (last_regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1860 && ((regno
& 1) == 0));
1862 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1863 implementations. Don't allow an item to be split between a FP register
1864 and an Altivec register. Allow TImode in all VSX registers if the user
1866 if (TARGET_VSX
&& VSX_REGNO_P (regno
)
1867 && (VECTOR_MEM_VSX_P (mode
)
1868 || VECTOR_ALIGNMENT_P (mode
)
1869 || reg_addr
[mode
].scalar_in_vmx_p
1871 || (TARGET_VADDUQM
&& mode
== V1TImode
)))
1873 if (FP_REGNO_P (regno
))
1874 return FP_REGNO_P (last_regno
);
1876 if (ALTIVEC_REGNO_P (regno
))
1878 if (GET_MODE_SIZE (mode
) < 16 && !reg_addr
[mode
].scalar_in_vmx_p
)
1881 return ALTIVEC_REGNO_P (last_regno
);
1885 /* The GPRs can hold any mode, but values bigger than one register
1886 cannot go past R31. */
1887 if (INT_REGNO_P (regno
))
1888 return INT_REGNO_P (last_regno
);
1890 /* The float registers (except for VSX vector modes) can only hold floating
1891 modes and DImode. */
1892 if (FP_REGNO_P (regno
))
1894 if (VECTOR_ALIGNMENT_P (mode
))
1897 if (SCALAR_FLOAT_MODE_P (mode
)
1898 && (mode
!= TDmode
|| (regno
% 2) == 0)
1899 && FP_REGNO_P (last_regno
))
1902 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1904 if(GET_MODE_SIZE (mode
) == UNITS_PER_FP_WORD
)
1907 if (TARGET_P8_VECTOR
&& (mode
== SImode
))
1910 if (TARGET_P9_VECTOR
&& (mode
== QImode
|| mode
== HImode
))
1917 /* The CR register can only hold CC modes. */
1918 if (CR_REGNO_P (regno
))
1919 return GET_MODE_CLASS (mode
) == MODE_CC
;
1921 if (CA_REGNO_P (regno
))
1922 return mode
== Pmode
|| mode
== SImode
;
1924 /* AltiVec only in AldyVec registers. */
1925 if (ALTIVEC_REGNO_P (regno
))
1926 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
1927 || mode
== V1TImode
);
1929 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1930 and it must be able to fit within the register set. */
1932 return GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
;
1935 /* Implement TARGET_HARD_REGNO_NREGS. */
1938 rs6000_hard_regno_nregs_hook (unsigned int regno
, machine_mode mode
)
1940 return rs6000_hard_regno_nregs
[mode
][regno
];
1943 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1946 rs6000_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
1948 return rs6000_hard_regno_mode_ok_p
[mode
][regno
];
1951 /* Implement TARGET_MODES_TIEABLE_P.
1953 PTImode cannot tie with other modes because PTImode is restricted to even
1954 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1957 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1958 registers) or XOmode (vector quad, restricted to FPR registers divisible
1959 by 4) to tie with other modes.
1961 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1962 128-bit floating point on VSX systems ties with other vectors. */
1965 rs6000_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
1967 if (mode1
== PTImode
|| mode1
== OOmode
|| mode1
== XOmode
1968 || mode2
== PTImode
|| mode2
== OOmode
|| mode2
== XOmode
)
1969 return mode1
== mode2
;
1971 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1
))
1972 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2
);
1973 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2
))
1976 if (SCALAR_FLOAT_MODE_P (mode1
))
1977 return SCALAR_FLOAT_MODE_P (mode2
);
1978 if (SCALAR_FLOAT_MODE_P (mode2
))
1981 if (GET_MODE_CLASS (mode1
) == MODE_CC
)
1982 return GET_MODE_CLASS (mode2
) == MODE_CC
;
1983 if (GET_MODE_CLASS (mode2
) == MODE_CC
)
1989 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1992 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
1997 && GET_MODE_SIZE (mode
) > 4
1998 && INT_REGNO_P (regno
))
2002 && FP_REGNO_P (regno
)
2003 && GET_MODE_SIZE (mode
) > 8
2004 && !FLOAT128_2REG_P (mode
))
2010 /* Print interesting facts about registers. */
2012 rs6000_debug_reg_print (int first_regno
, int last_regno
, const char *reg_name
)
2016 for (r
= first_regno
; r
<= last_regno
; ++r
)
2018 const char *comma
= "";
2021 if (first_regno
== last_regno
)
2022 fprintf (stderr
, "%s:\t", reg_name
);
2024 fprintf (stderr
, "%s%d:\t", reg_name
, r
- first_regno
);
2027 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2028 if (rs6000_hard_regno_mode_ok_p
[m
][r
] && rs6000_hard_regno_nregs
[m
][r
])
2032 fprintf (stderr
, ",\n\t");
2037 if (rs6000_hard_regno_nregs
[m
][r
] > 1)
2038 len
+= fprintf (stderr
, "%s%s/%d", comma
, GET_MODE_NAME (m
),
2039 rs6000_hard_regno_nregs
[m
][r
]);
2041 len
+= fprintf (stderr
, "%s%s", comma
, GET_MODE_NAME (m
));
2046 if (call_used_or_fixed_reg_p (r
))
2050 fprintf (stderr
, ",\n\t");
2055 len
+= fprintf (stderr
, "%s%s", comma
, "call-used");
2063 fprintf (stderr
, ",\n\t");
2068 len
+= fprintf (stderr
, "%s%s", comma
, "fixed");
2074 fprintf (stderr
, ",\n\t");
2078 len
+= fprintf (stderr
, "%sreg-class = %s", comma
,
2079 reg_class_names
[(int)rs6000_regno_regclass
[r
]]);
2084 fprintf (stderr
, ",\n\t");
2088 fprintf (stderr
, "%sregno = %d\n", comma
, r
);
2093 rs6000_debug_vector_unit (enum rs6000_vector v
)
2099 case VECTOR_NONE
: ret
= "none"; break;
2100 case VECTOR_ALTIVEC
: ret
= "altivec"; break;
2101 case VECTOR_VSX
: ret
= "vsx"; break;
2102 case VECTOR_P8_VECTOR
: ret
= "p8_vector"; break;
2103 default: ret
= "unknown"; break;
2109 /* Inner function printing just the address mask for a particular reload
2111 DEBUG_FUNCTION
char *
2112 rs6000_debug_addr_mask (addr_mask_type mask
, bool keep_spaces
)
2117 if ((mask
& RELOAD_REG_VALID
) != 0)
2119 else if (keep_spaces
)
2122 if ((mask
& RELOAD_REG_MULTIPLE
) != 0)
2124 else if (keep_spaces
)
2127 if ((mask
& RELOAD_REG_INDEXED
) != 0)
2129 else if (keep_spaces
)
2132 if ((mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
2134 else if ((mask
& RELOAD_REG_OFFSET
) != 0)
2136 else if (keep_spaces
)
2139 if ((mask
& RELOAD_REG_PRE_INCDEC
) != 0)
2141 else if (keep_spaces
)
2144 if ((mask
& RELOAD_REG_PRE_MODIFY
) != 0)
2146 else if (keep_spaces
)
2149 if ((mask
& RELOAD_REG_AND_M16
) != 0)
2151 else if (keep_spaces
)
2159 /* Print the address masks in a human readble fashion. */
2161 rs6000_debug_print_mode (ssize_t m
)
2166 fprintf (stderr
, "Mode: %-5s", GET_MODE_NAME (m
));
2167 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2168 fprintf (stderr
, " %s: %s", reload_reg_map
[rc
].name
,
2169 rs6000_debug_addr_mask (reg_addr
[m
].addr_mask
[rc
], true));
2171 if ((reg_addr
[m
].reload_store
!= CODE_FOR_nothing
)
2172 || (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
))
2174 fprintf (stderr
, "%*s Reload=%c%c", spaces
, "",
2175 (reg_addr
[m
].reload_store
!= CODE_FOR_nothing
) ? 's' : '*',
2176 (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
) ? 'l' : '*');
2180 spaces
+= strlen (" Reload=sl");
2182 if (reg_addr
[m
].scalar_in_vmx_p
)
2184 fprintf (stderr
, "%*s Upper=y", spaces
, "");
2188 spaces
+= strlen (" Upper=y");
2190 if (rs6000_vector_unit
[m
] != VECTOR_NONE
2191 || rs6000_vector_mem
[m
] != VECTOR_NONE
)
2193 fprintf (stderr
, "%*s vector: arith=%-10s mem=%s",
2195 rs6000_debug_vector_unit (rs6000_vector_unit
[m
]),
2196 rs6000_debug_vector_unit (rs6000_vector_mem
[m
]));
2199 fputs ("\n", stderr
);
2202 #define DEBUG_FMT_ID "%-32s= "
2203 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2204 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2205 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2207 /* Print various interesting information with -mdebug=reg. */
2209 rs6000_debug_reg_global (void)
2211 static const char *const tf
[2] = { "false", "true" };
2212 const char *nl
= (const char *)0;
2215 char costly_num
[20];
2217 char flags_buffer
[40];
2218 const char *costly_str
;
2219 const char *nop_str
;
2220 const char *trace_str
;
2221 const char *abi_str
;
2222 const char *cmodel_str
;
2223 struct cl_target_option cl_opts
;
2225 /* Modes we want tieable information on. */
2226 static const machine_mode print_tieable_modes
[] = {
2265 /* Virtual regs we are interested in. */
2266 const static struct {
2267 int regno
; /* register number. */
2268 const char *name
; /* register name. */
2269 } virtual_regs
[] = {
2270 { STACK_POINTER_REGNUM
, "stack pointer:" },
2271 { TOC_REGNUM
, "toc: " },
2272 { STATIC_CHAIN_REGNUM
, "static chain: " },
2273 { RS6000_PIC_OFFSET_TABLE_REGNUM
, "pic offset: " },
2274 { HARD_FRAME_POINTER_REGNUM
, "hard frame: " },
2275 { ARG_POINTER_REGNUM
, "arg pointer: " },
2276 { FRAME_POINTER_REGNUM
, "frame pointer:" },
2277 { FIRST_PSEUDO_REGISTER
, "first pseudo: " },
2278 { FIRST_VIRTUAL_REGISTER
, "first virtual:" },
2279 { VIRTUAL_INCOMING_ARGS_REGNUM
, "incoming_args:" },
2280 { VIRTUAL_STACK_VARS_REGNUM
, "stack_vars: " },
2281 { VIRTUAL_STACK_DYNAMIC_REGNUM
, "stack_dynamic:" },
2282 { VIRTUAL_OUTGOING_ARGS_REGNUM
, "outgoing_args:" },
2283 { VIRTUAL_CFA_REGNUM
, "cfa (frame): " },
2284 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM
, "stack boundry:" },
2285 { LAST_VIRTUAL_REGISTER
, "last virtual: " },
2288 fputs ("\nHard register information:\n", stderr
);
2289 rs6000_debug_reg_print (FIRST_GPR_REGNO
, LAST_GPR_REGNO
, "gr");
2290 rs6000_debug_reg_print (FIRST_FPR_REGNO
, LAST_FPR_REGNO
, "fp");
2291 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO
,
2294 rs6000_debug_reg_print (LR_REGNO
, LR_REGNO
, "lr");
2295 rs6000_debug_reg_print (CTR_REGNO
, CTR_REGNO
, "ctr");
2296 rs6000_debug_reg_print (CR0_REGNO
, CR7_REGNO
, "cr");
2297 rs6000_debug_reg_print (CA_REGNO
, CA_REGNO
, "ca");
2298 rs6000_debug_reg_print (VRSAVE_REGNO
, VRSAVE_REGNO
, "vrsave");
2299 rs6000_debug_reg_print (VSCR_REGNO
, VSCR_REGNO
, "vscr");
2301 fputs ("\nVirtual/stack/frame registers:\n", stderr
);
2302 for (v
= 0; v
< ARRAY_SIZE (virtual_regs
); v
++)
2303 fprintf (stderr
, "%s regno = %3d\n", virtual_regs
[v
].name
, virtual_regs
[v
].regno
);
2307 "d reg_class = %s\n"
2308 "f reg_class = %s\n"
2309 "v reg_class = %s\n"
2310 "wa reg_class = %s\n"
2311 "we reg_class = %s\n"
2312 "wr reg_class = %s\n"
2313 "wx reg_class = %s\n"
2314 "wA reg_class = %s\n"
2316 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_d
]],
2317 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_f
]],
2318 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_v
]],
2319 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wa
]],
2320 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_we
]],
2321 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wr
]],
2322 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wx
]],
2323 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wA
]]);
2326 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2327 rs6000_debug_print_mode (m
);
2329 fputs ("\n", stderr
);
2331 for (m1
= 0; m1
< ARRAY_SIZE (print_tieable_modes
); m1
++)
2333 machine_mode mode1
= print_tieable_modes
[m1
];
2334 bool first_time
= true;
2336 nl
= (const char *)0;
2337 for (m2
= 0; m2
< ARRAY_SIZE (print_tieable_modes
); m2
++)
2339 machine_mode mode2
= print_tieable_modes
[m2
];
2340 if (mode1
!= mode2
&& rs6000_modes_tieable_p (mode1
, mode2
))
2344 fprintf (stderr
, "Tieable modes %s:", GET_MODE_NAME (mode1
));
2349 fprintf (stderr
, " %s", GET_MODE_NAME (mode2
));
2354 fputs ("\n", stderr
);
2360 if (rs6000_recip_control
)
2362 fprintf (stderr
, "\nReciprocal mask = 0x%x\n", rs6000_recip_control
);
2364 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2365 if (rs6000_recip_bits
[m
])
2368 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2370 (RS6000_RECIP_AUTO_RE_P (m
)
2372 : (RS6000_RECIP_HAVE_RE_P (m
) ? "have" : "none")),
2373 (RS6000_RECIP_AUTO_RSQRTE_P (m
)
2375 : (RS6000_RECIP_HAVE_RSQRTE_P (m
) ? "have" : "none")));
2378 fputs ("\n", stderr
);
2381 if (rs6000_cpu_index
>= 0)
2383 const char *name
= processor_target_table
[rs6000_cpu_index
].name
;
2385 = processor_target_table
[rs6000_cpu_index
].target_enable
;
2387 sprintf (flags_buffer
, "-mcpu=%s flags", name
);
2388 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2391 fprintf (stderr
, DEBUG_FMT_S
, "cpu", "<none>");
2393 if (rs6000_tune_index
>= 0)
2395 const char *name
= processor_target_table
[rs6000_tune_index
].name
;
2397 = processor_target_table
[rs6000_tune_index
].target_enable
;
2399 sprintf (flags_buffer
, "-mtune=%s flags", name
);
2400 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2403 fprintf (stderr
, DEBUG_FMT_S
, "tune", "<none>");
2405 cl_target_option_save (&cl_opts
, &global_options
, &global_options_set
);
2406 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags",
2409 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags_explicit",
2410 rs6000_isa_flags_explicit
);
2412 rs6000_print_builtin_options (stderr
, 0, "rs6000_builtin_mask",
2413 rs6000_builtin_mask
);
2415 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
2417 fprintf (stderr
, DEBUG_FMT_S
, "--with-cpu default",
2418 OPTION_TARGET_CPU_DEFAULT
? OPTION_TARGET_CPU_DEFAULT
: "<none>");
2420 switch (rs6000_sched_costly_dep
)
2422 case max_dep_latency
:
2423 costly_str
= "max_dep_latency";
2427 costly_str
= "no_dep_costly";
2430 case all_deps_costly
:
2431 costly_str
= "all_deps_costly";
2434 case true_store_to_load_dep_costly
:
2435 costly_str
= "true_store_to_load_dep_costly";
2438 case store_to_load_dep_costly
:
2439 costly_str
= "store_to_load_dep_costly";
2443 costly_str
= costly_num
;
2444 sprintf (costly_num
, "%d", (int)rs6000_sched_costly_dep
);
2448 fprintf (stderr
, DEBUG_FMT_S
, "sched_costly_dep", costly_str
);
2450 switch (rs6000_sched_insert_nops
)
2452 case sched_finish_regroup_exact
:
2453 nop_str
= "sched_finish_regroup_exact";
2456 case sched_finish_pad_groups
:
2457 nop_str
= "sched_finish_pad_groups";
2460 case sched_finish_none
:
2461 nop_str
= "sched_finish_none";
2466 sprintf (nop_num
, "%d", (int)rs6000_sched_insert_nops
);
2470 fprintf (stderr
, DEBUG_FMT_S
, "sched_insert_nops", nop_str
);
2472 switch (rs6000_sdata
)
2479 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "data");
2483 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "sysv");
2487 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "eabi");
2492 switch (rs6000_traceback
)
2494 case traceback_default
: trace_str
= "default"; break;
2495 case traceback_none
: trace_str
= "none"; break;
2496 case traceback_part
: trace_str
= "part"; break;
2497 case traceback_full
: trace_str
= "full"; break;
2498 default: trace_str
= "unknown"; break;
2501 fprintf (stderr
, DEBUG_FMT_S
, "traceback", trace_str
);
2503 switch (rs6000_current_cmodel
)
2505 case CMODEL_SMALL
: cmodel_str
= "small"; break;
2506 case CMODEL_MEDIUM
: cmodel_str
= "medium"; break;
2507 case CMODEL_LARGE
: cmodel_str
= "large"; break;
2508 default: cmodel_str
= "unknown"; break;
2511 fprintf (stderr
, DEBUG_FMT_S
, "cmodel", cmodel_str
);
2513 switch (rs6000_current_abi
)
2515 case ABI_NONE
: abi_str
= "none"; break;
2516 case ABI_AIX
: abi_str
= "aix"; break;
2517 case ABI_ELFv2
: abi_str
= "ELFv2"; break;
2518 case ABI_V4
: abi_str
= "V4"; break;
2519 case ABI_DARWIN
: abi_str
= "darwin"; break;
2520 default: abi_str
= "unknown"; break;
2523 fprintf (stderr
, DEBUG_FMT_S
, "abi", abi_str
);
2525 if (rs6000_altivec_abi
)
2526 fprintf (stderr
, DEBUG_FMT_S
, "altivec_abi", "true");
2528 if (rs6000_aix_extabi
)
2529 fprintf (stderr
, DEBUG_FMT_S
, "AIX vec-extabi", "true");
2531 if (rs6000_darwin64_abi
)
2532 fprintf (stderr
, DEBUG_FMT_S
, "darwin64_abi", "true");
2534 fprintf (stderr
, DEBUG_FMT_S
, "soft_float",
2535 (TARGET_SOFT_FLOAT
? "true" : "false"));
2537 if (TARGET_LINK_STACK
)
2538 fprintf (stderr
, DEBUG_FMT_S
, "link_stack", "true");
2540 if (TARGET_P8_FUSION
)
2544 strcpy (options
, "power8");
2545 if (TARGET_P8_FUSION_SIGN
)
2546 strcat (options
, ", sign");
2548 fprintf (stderr
, DEBUG_FMT_S
, "fusion", options
);
2551 fprintf (stderr
, DEBUG_FMT_S
, "plt-format",
2552 TARGET_SECURE_PLT
? "secure" : "bss");
2553 fprintf (stderr
, DEBUG_FMT_S
, "struct-return",
2554 aix_struct_return
? "aix" : "sysv");
2555 fprintf (stderr
, DEBUG_FMT_S
, "always_hint", tf
[!!rs6000_always_hint
]);
2556 fprintf (stderr
, DEBUG_FMT_S
, "sched_groups", tf
[!!rs6000_sched_groups
]);
2557 fprintf (stderr
, DEBUG_FMT_S
, "align_branch",
2558 tf
[!!rs6000_align_branch_targets
]);
2559 fprintf (stderr
, DEBUG_FMT_D
, "tls_size", rs6000_tls_size
);
2560 fprintf (stderr
, DEBUG_FMT_D
, "long_double_size",
2561 rs6000_long_double_type_size
);
2562 if (rs6000_long_double_type_size
> 64)
2564 fprintf (stderr
, DEBUG_FMT_S
, "long double type",
2565 TARGET_IEEEQUAD
? "IEEE" : "IBM");
2566 fprintf (stderr
, DEBUG_FMT_S
, "default long double type",
2567 TARGET_IEEEQUAD_DEFAULT
? "IEEE" : "IBM");
2569 fprintf (stderr
, DEBUG_FMT_D
, "sched_restricted_insns_priority",
2570 (int)rs6000_sched_restricted_insns_priority
);
2571 fprintf (stderr
, DEBUG_FMT_D
, "Number of standard builtins",
2574 fprintf (stderr
, DEBUG_FMT_D
, "Enable float128 on VSX",
2575 (int)TARGET_FLOAT128_ENABLE_TYPE
);
2578 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit scalar element",
2579 (int)VECTOR_ELEMENT_SCALAR_64BIT
);
2581 if (TARGET_DIRECT_MOVE_128
)
2582 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit mfvsrld element",
2583 (int)VECTOR_ELEMENT_MFVSRLD_64BIT
);
2587 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2588 legitimate address support to figure out the appropriate addressing to
2592 rs6000_setup_reg_addr_masks (void)
2594 ssize_t rc
, reg
, m
, nregs
;
2595 addr_mask_type any_addr_mask
, addr_mask
;
2597 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2599 machine_mode m2
= (machine_mode
) m
;
2600 bool complex_p
= false;
2601 bool small_int_p
= (m2
== QImode
|| m2
== HImode
|| m2
== SImode
);
2604 if (COMPLEX_MODE_P (m2
))
2607 m2
= GET_MODE_INNER (m2
);
2610 msize
= GET_MODE_SIZE (m2
);
2612 /* SDmode is special in that we want to access it only via REG+REG
2613 addressing on power7 and above, since we want to use the LFIWZX and
2614 STFIWZX instructions to load it. */
2615 bool indexed_only_p
= (m
== SDmode
&& TARGET_NO_SDMODE_STACK
);
2618 for (rc
= FIRST_RELOAD_REG_CLASS
; rc
<= LAST_RELOAD_REG_CLASS
; rc
++)
2621 reg
= reload_reg_map
[rc
].reg
;
2623 /* Can mode values go in the GPR/FPR/Altivec registers? */
2624 if (reg
>= 0 && rs6000_hard_regno_mode_ok_p
[m
][reg
])
2626 bool small_int_vsx_p
= (small_int_p
2627 && (rc
== RELOAD_REG_FPR
2628 || rc
== RELOAD_REG_VMX
));
2630 nregs
= rs6000_hard_regno_nregs
[m
][reg
];
2631 addr_mask
|= RELOAD_REG_VALID
;
2633 /* Indicate if the mode takes more than 1 physical register. If
2634 it takes a single register, indicate it can do REG+REG
2635 addressing. Small integers in VSX registers can only do
2636 REG+REG addressing. */
2637 if (small_int_vsx_p
)
2638 addr_mask
|= RELOAD_REG_INDEXED
;
2639 else if (nregs
> 1 || m
== BLKmode
|| complex_p
)
2640 addr_mask
|= RELOAD_REG_MULTIPLE
;
2642 addr_mask
|= RELOAD_REG_INDEXED
;
2644 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2645 addressing. If we allow scalars into Altivec registers,
2646 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2648 For VSX systems, we don't allow update addressing for
2649 DFmode/SFmode if those registers can go in both the
2650 traditional floating point registers and Altivec registers.
2651 The load/store instructions for the Altivec registers do not
2652 have update forms. If we allowed update addressing, it seems
2653 to break IV-OPT code using floating point if the index type is
2654 int instead of long (PR target/81550 and target/84042). */
2657 && (rc
== RELOAD_REG_GPR
|| rc
== RELOAD_REG_FPR
)
2659 && !VECTOR_MODE_P (m2
)
2660 && !VECTOR_ALIGNMENT_P (m2
)
2662 && (m
!= E_DFmode
|| !TARGET_VSX
)
2663 && (m
!= E_SFmode
|| !TARGET_P8_VECTOR
)
2664 && !small_int_vsx_p
)
2666 addr_mask
|= RELOAD_REG_PRE_INCDEC
;
2668 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2669 we don't allow PRE_MODIFY for some multi-register
2674 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2678 if (TARGET_POWERPC64
)
2679 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2684 if (TARGET_HARD_FLOAT
)
2685 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2691 /* GPR and FPR registers can do REG+OFFSET addressing, except
2692 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2693 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2694 if ((addr_mask
!= 0) && !indexed_only_p
2696 && (rc
== RELOAD_REG_GPR
2697 || ((msize
== 8 || m2
== SFmode
)
2698 && (rc
== RELOAD_REG_FPR
2699 || (rc
== RELOAD_REG_VMX
&& TARGET_P9_VECTOR
)))))
2700 addr_mask
|= RELOAD_REG_OFFSET
;
2702 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2703 instructions are enabled. The offset for 128-bit VSX registers is
2704 only 12-bits. While GPRs can handle the full offset range, VSX
2705 registers can only handle the restricted range. */
2706 else if ((addr_mask
!= 0) && !indexed_only_p
2707 && msize
== 16 && TARGET_P9_VECTOR
2708 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2
)
2709 || (m2
== TImode
&& TARGET_VSX
)))
2711 addr_mask
|= RELOAD_REG_OFFSET
;
2712 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2713 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2716 /* Vector pairs can do both indexed and offset loads if the
2717 instructions are enabled, otherwise they can only do offset loads
2718 since it will be broken into two vector moves. Vector quads can
2719 only do offset loads. */
2720 else if ((addr_mask
!= 0) && TARGET_MMA
2721 && (m2
== OOmode
|| m2
== XOmode
))
2723 addr_mask
|= RELOAD_REG_OFFSET
;
2724 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2726 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2728 addr_mask
|= RELOAD_REG_INDEXED
;
2732 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2733 addressing on 128-bit types. */
2734 if (rc
== RELOAD_REG_VMX
&& msize
== 16
2735 && (addr_mask
& RELOAD_REG_VALID
) != 0)
2736 addr_mask
|= RELOAD_REG_AND_M16
;
2738 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
2739 any_addr_mask
|= addr_mask
;
2742 reg_addr
[m
].addr_mask
[RELOAD_REG_ANY
] = any_addr_mask
;
2747 /* Initialize the various global tables that are based on register size. */
2749 rs6000_init_hard_regno_mode_ok (bool global_init_p
)
2755 /* Precalculate REGNO_REG_CLASS. */
2756 rs6000_regno_regclass
[0] = GENERAL_REGS
;
2757 for (r
= 1; r
< 32; ++r
)
2758 rs6000_regno_regclass
[r
] = BASE_REGS
;
2760 for (r
= 32; r
< 64; ++r
)
2761 rs6000_regno_regclass
[r
] = FLOAT_REGS
;
2763 for (r
= 64; HARD_REGISTER_NUM_P (r
); ++r
)
2764 rs6000_regno_regclass
[r
] = NO_REGS
;
2766 for (r
= FIRST_ALTIVEC_REGNO
; r
<= LAST_ALTIVEC_REGNO
; ++r
)
2767 rs6000_regno_regclass
[r
] = ALTIVEC_REGS
;
2769 rs6000_regno_regclass
[CR0_REGNO
] = CR0_REGS
;
2770 for (r
= CR1_REGNO
; r
<= CR7_REGNO
; ++r
)
2771 rs6000_regno_regclass
[r
] = CR_REGS
;
2773 rs6000_regno_regclass
[LR_REGNO
] = LINK_REGS
;
2774 rs6000_regno_regclass
[CTR_REGNO
] = CTR_REGS
;
2775 rs6000_regno_regclass
[CA_REGNO
] = NO_REGS
;
2776 rs6000_regno_regclass
[VRSAVE_REGNO
] = VRSAVE_REGS
;
2777 rs6000_regno_regclass
[VSCR_REGNO
] = VRSAVE_REGS
;
2778 rs6000_regno_regclass
[ARG_POINTER_REGNUM
] = BASE_REGS
;
2779 rs6000_regno_regclass
[FRAME_POINTER_REGNUM
] = BASE_REGS
;
2781 /* Precalculate register class to simpler reload register class. We don't
2782 need all of the register classes that are combinations of different
2783 classes, just the simple ones that have constraint letters. */
2784 for (c
= 0; c
< N_REG_CLASSES
; c
++)
2785 reg_class_to_reg_type
[c
] = NO_REG_TYPE
;
2787 reg_class_to_reg_type
[(int)GENERAL_REGS
] = GPR_REG_TYPE
;
2788 reg_class_to_reg_type
[(int)BASE_REGS
] = GPR_REG_TYPE
;
2789 reg_class_to_reg_type
[(int)VSX_REGS
] = VSX_REG_TYPE
;
2790 reg_class_to_reg_type
[(int)VRSAVE_REGS
] = SPR_REG_TYPE
;
2791 reg_class_to_reg_type
[(int)VSCR_REGS
] = SPR_REG_TYPE
;
2792 reg_class_to_reg_type
[(int)LINK_REGS
] = SPR_REG_TYPE
;
2793 reg_class_to_reg_type
[(int)CTR_REGS
] = SPR_REG_TYPE
;
2794 reg_class_to_reg_type
[(int)LINK_OR_CTR_REGS
] = SPR_REG_TYPE
;
2795 reg_class_to_reg_type
[(int)CR_REGS
] = CR_REG_TYPE
;
2796 reg_class_to_reg_type
[(int)CR0_REGS
] = CR_REG_TYPE
;
2800 reg_class_to_reg_type
[(int)FLOAT_REGS
] = VSX_REG_TYPE
;
2801 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = VSX_REG_TYPE
;
2805 reg_class_to_reg_type
[(int)FLOAT_REGS
] = FPR_REG_TYPE
;
2806 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = ALTIVEC_REG_TYPE
;
2809 /* Precalculate the valid memory formats as well as the vector information,
2810 this must be set up before the rs6000_hard_regno_nregs_internal calls
2812 gcc_assert ((int)VECTOR_NONE
== 0);
2813 memset ((void *) &rs6000_vector_unit
[0], '\0', sizeof (rs6000_vector_unit
));
2814 memset ((void *) &rs6000_vector_mem
[0], '\0', sizeof (rs6000_vector_mem
));
2816 gcc_assert ((int)CODE_FOR_nothing
== 0);
2817 memset ((void *) ®_addr
[0], '\0', sizeof (reg_addr
));
2819 gcc_assert ((int)NO_REGS
== 0);
2820 memset ((void *) &rs6000_constraints
[0], '\0', sizeof (rs6000_constraints
));
2822 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2823 believes it can use native alignment or still uses 128-bit alignment. */
2824 if (TARGET_VSX
&& !TARGET_VSX_ALIGN_128
)
2835 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2836 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2837 if (TARGET_FLOAT128_TYPE
)
2839 rs6000_vector_mem
[KFmode
] = VECTOR_VSX
;
2840 rs6000_vector_align
[KFmode
] = 128;
2842 if (FLOAT128_IEEE_P (TFmode
))
2844 rs6000_vector_mem
[TFmode
] = VECTOR_VSX
;
2845 rs6000_vector_align
[TFmode
] = 128;
2849 /* V2DF mode, VSX only. */
2852 rs6000_vector_unit
[V2DFmode
] = VECTOR_VSX
;
2853 rs6000_vector_mem
[V2DFmode
] = VECTOR_VSX
;
2854 rs6000_vector_align
[V2DFmode
] = align64
;
2857 /* V4SF mode, either VSX or Altivec. */
2860 rs6000_vector_unit
[V4SFmode
] = VECTOR_VSX
;
2861 rs6000_vector_mem
[V4SFmode
] = VECTOR_VSX
;
2862 rs6000_vector_align
[V4SFmode
] = align32
;
2864 else if (TARGET_ALTIVEC
)
2866 rs6000_vector_unit
[V4SFmode
] = VECTOR_ALTIVEC
;
2867 rs6000_vector_mem
[V4SFmode
] = VECTOR_ALTIVEC
;
2868 rs6000_vector_align
[V4SFmode
] = align32
;
2871 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2875 rs6000_vector_unit
[V4SImode
] = VECTOR_ALTIVEC
;
2876 rs6000_vector_unit
[V8HImode
] = VECTOR_ALTIVEC
;
2877 rs6000_vector_unit
[V16QImode
] = VECTOR_ALTIVEC
;
2878 rs6000_vector_align
[V4SImode
] = align32
;
2879 rs6000_vector_align
[V8HImode
] = align32
;
2880 rs6000_vector_align
[V16QImode
] = align32
;
2884 rs6000_vector_mem
[V4SImode
] = VECTOR_VSX
;
2885 rs6000_vector_mem
[V8HImode
] = VECTOR_VSX
;
2886 rs6000_vector_mem
[V16QImode
] = VECTOR_VSX
;
2890 rs6000_vector_mem
[V4SImode
] = VECTOR_ALTIVEC
;
2891 rs6000_vector_mem
[V8HImode
] = VECTOR_ALTIVEC
;
2892 rs6000_vector_mem
[V16QImode
] = VECTOR_ALTIVEC
;
2896 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2897 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2900 rs6000_vector_mem
[V2DImode
] = VECTOR_VSX
;
2901 rs6000_vector_unit
[V2DImode
]
2902 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2903 rs6000_vector_align
[V2DImode
] = align64
;
2905 rs6000_vector_mem
[V1TImode
] = VECTOR_VSX
;
2906 rs6000_vector_unit
[V1TImode
]
2907 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2908 rs6000_vector_align
[V1TImode
] = 128;
2911 /* DFmode, see if we want to use the VSX unit. Memory is handled
2912 differently, so don't set rs6000_vector_mem. */
2915 rs6000_vector_unit
[DFmode
] = VECTOR_VSX
;
2916 rs6000_vector_align
[DFmode
] = 64;
2919 /* SFmode, see if we want to use the VSX unit. */
2920 if (TARGET_P8_VECTOR
)
2922 rs6000_vector_unit
[SFmode
] = VECTOR_VSX
;
2923 rs6000_vector_align
[SFmode
] = 32;
2926 /* Allow TImode in VSX register and set the VSX memory macros. */
2929 rs6000_vector_mem
[TImode
] = VECTOR_VSX
;
2930 rs6000_vector_align
[TImode
] = align64
;
2933 /* Add support for vector pairs and vector quad registers. */
2936 rs6000_vector_unit
[OOmode
] = VECTOR_NONE
;
2937 rs6000_vector_mem
[OOmode
] = VECTOR_VSX
;
2938 rs6000_vector_align
[OOmode
] = 256;
2940 rs6000_vector_unit
[XOmode
] = VECTOR_NONE
;
2941 rs6000_vector_mem
[XOmode
] = VECTOR_VSX
;
2942 rs6000_vector_align
[XOmode
] = 512;
2945 /* Register class constraints for the constraints that depend on compile
2946 switches. When the VSX code was added, different constraints were added
2947 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2948 of the VSX registers are used. The register classes for scalar floating
2949 point types is set, based on whether we allow that type into the upper
2950 (Altivec) registers. GCC has register classes to target the Altivec
2951 registers for load/store operations, to select using a VSX memory
2952 operation instead of the traditional floating point operation. The
2955 d - Register class to use with traditional DFmode instructions.
2956 f - Register class to use with traditional SFmode instructions.
2957 v - Altivec register.
2958 wa - Any VSX register.
2959 wc - Reserved to represent individual CR bits (used in LLVM).
2960 wn - always NO_REGS.
2961 wr - GPR if 64-bit mode is permitted.
2962 wx - Float register if we can do 32-bit int stores. */
2964 if (TARGET_HARD_FLOAT
)
2966 rs6000_constraints
[RS6000_CONSTRAINT_f
] = FLOAT_REGS
; /* SFmode */
2967 rs6000_constraints
[RS6000_CONSTRAINT_d
] = FLOAT_REGS
; /* DFmode */
2971 rs6000_constraints
[RS6000_CONSTRAINT_wa
] = VSX_REGS
;
2973 /* Add conditional constraints based on various options, to allow us to
2974 collapse multiple insn patterns. */
2976 rs6000_constraints
[RS6000_CONSTRAINT_v
] = ALTIVEC_REGS
;
2978 if (TARGET_POWERPC64
)
2980 rs6000_constraints
[RS6000_CONSTRAINT_wr
] = GENERAL_REGS
;
2981 rs6000_constraints
[RS6000_CONSTRAINT_wA
] = BASE_REGS
;
2985 rs6000_constraints
[RS6000_CONSTRAINT_wx
] = FLOAT_REGS
; /* DImode */
2987 /* Support for new direct moves (ISA 3.0 + 64bit). */
2988 if (TARGET_DIRECT_MOVE_128
)
2989 rs6000_constraints
[RS6000_CONSTRAINT_we
] = VSX_REGS
;
2991 /* Set up the reload helper and direct move functions. */
2992 if (TARGET_VSX
|| TARGET_ALTIVEC
)
2996 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_di_store
;
2997 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_di_load
;
2998 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_di_store
;
2999 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_di_load
;
3000 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_di_store
;
3001 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_di_load
;
3002 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_di_store
;
3003 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_di_load
;
3004 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_di_store
;
3005 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_di_load
;
3006 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_di_store
;
3007 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_di_load
;
3008 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_di_store
;
3009 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_di_load
;
3010 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_di_store
;
3011 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_di_load
;
3012 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_di_store
;
3013 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_di_load
;
3014 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_di_store
;
3015 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_di_load
;
3017 if (FLOAT128_VECTOR_P (KFmode
))
3019 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_di_store
;
3020 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_di_load
;
3023 if (FLOAT128_VECTOR_P (TFmode
))
3025 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_di_store
;
3026 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_di_load
;
3029 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3031 if (TARGET_NO_SDMODE_STACK
)
3033 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_di_store
;
3034 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_di_load
;
3039 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_di_store
;
3040 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_di_load
;
3043 if (TARGET_DIRECT_MOVE
&& !TARGET_DIRECT_MOVE_128
)
3045 reg_addr
[TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxti
;
3046 reg_addr
[V1TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv1ti
;
3047 reg_addr
[V2DFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2df
;
3048 reg_addr
[V2DImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2di
;
3049 reg_addr
[V4SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4sf
;
3050 reg_addr
[V4SImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4si
;
3051 reg_addr
[V8HImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv8hi
;
3052 reg_addr
[V16QImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv16qi
;
3053 reg_addr
[SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxsf
;
3055 reg_addr
[TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprti
;
3056 reg_addr
[V1TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv1ti
;
3057 reg_addr
[V2DFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2df
;
3058 reg_addr
[V2DImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2di
;
3059 reg_addr
[V4SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4sf
;
3060 reg_addr
[V4SImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4si
;
3061 reg_addr
[V8HImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv8hi
;
3062 reg_addr
[V16QImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv16qi
;
3063 reg_addr
[SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprsf
;
3065 if (FLOAT128_VECTOR_P (KFmode
))
3067 reg_addr
[KFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxkf
;
3068 reg_addr
[KFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprkf
;
3071 if (FLOAT128_VECTOR_P (TFmode
))
3073 reg_addr
[TFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxtf
;
3074 reg_addr
[TFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprtf
;
3079 reg_addr
[OOmode
].reload_store
= CODE_FOR_reload_oo_di_store
;
3080 reg_addr
[OOmode
].reload_load
= CODE_FOR_reload_oo_di_load
;
3081 reg_addr
[XOmode
].reload_store
= CODE_FOR_reload_xo_di_store
;
3082 reg_addr
[XOmode
].reload_load
= CODE_FOR_reload_xo_di_load
;
3088 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_si_store
;
3089 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_si_load
;
3090 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_si_store
;
3091 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_si_load
;
3092 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_si_store
;
3093 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_si_load
;
3094 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_si_store
;
3095 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_si_load
;
3096 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_si_store
;
3097 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_si_load
;
3098 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_si_store
;
3099 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_si_load
;
3100 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_si_store
;
3101 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_si_load
;
3102 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_si_store
;
3103 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_si_load
;
3104 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_si_store
;
3105 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_si_load
;
3106 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_si_store
;
3107 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_si_load
;
3109 if (FLOAT128_VECTOR_P (KFmode
))
3111 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_si_store
;
3112 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_si_load
;
3115 if (FLOAT128_IEEE_P (TFmode
))
3117 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_si_store
;
3118 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_si_load
;
3121 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3123 if (TARGET_NO_SDMODE_STACK
)
3125 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_si_store
;
3126 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_si_load
;
3131 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_si_store
;
3132 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_si_load
;
3135 if (TARGET_DIRECT_MOVE
)
3137 reg_addr
[DImode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdi
;
3138 reg_addr
[DDmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdd
;
3139 reg_addr
[DFmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdf
;
3143 reg_addr
[DFmode
].scalar_in_vmx_p
= true;
3144 reg_addr
[DImode
].scalar_in_vmx_p
= true;
3146 if (TARGET_P8_VECTOR
)
3148 reg_addr
[SFmode
].scalar_in_vmx_p
= true;
3149 reg_addr
[SImode
].scalar_in_vmx_p
= true;
3151 if (TARGET_P9_VECTOR
)
3153 reg_addr
[HImode
].scalar_in_vmx_p
= true;
3154 reg_addr
[QImode
].scalar_in_vmx_p
= true;
3159 /* Precalculate HARD_REGNO_NREGS. */
3160 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3161 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3162 rs6000_hard_regno_nregs
[m
][r
]
3163 = rs6000_hard_regno_nregs_internal (r
, (machine_mode
) m
);
3165 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3166 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3167 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3168 rs6000_hard_regno_mode_ok_p
[m
][r
]
3169 = rs6000_hard_regno_mode_ok_uncached (r
, (machine_mode
) m
);
3171 /* Precalculate CLASS_MAX_NREGS sizes. */
3172 for (c
= 0; c
< LIM_REG_CLASSES
; ++c
)
3176 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
))
3177 reg_size
= UNITS_PER_VSX_WORD
;
3179 else if (c
== ALTIVEC_REGS
)
3180 reg_size
= UNITS_PER_ALTIVEC_WORD
;
3182 else if (c
== FLOAT_REGS
)
3183 reg_size
= UNITS_PER_FP_WORD
;
3186 reg_size
= UNITS_PER_WORD
;
3188 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3190 machine_mode m2
= (machine_mode
)m
;
3191 int reg_size2
= reg_size
;
3193 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3195 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
) && FLOAT128_2REG_P (m
))
3196 reg_size2
= UNITS_PER_FP_WORD
;
3198 rs6000_class_max_nregs
[m
][c
]
3199 = (GET_MODE_SIZE (m2
) + reg_size2
- 1) / reg_size2
;
3203 /* Calculate which modes to automatically generate code to use a the
3204 reciprocal divide and square root instructions. In the future, possibly
3205 automatically generate the instructions even if the user did not specify
3206 -mrecip. The older machines double precision reciprocal sqrt estimate is
3207 not accurate enough. */
3208 memset (rs6000_recip_bits
, 0, sizeof (rs6000_recip_bits
));
3210 rs6000_recip_bits
[SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3212 rs6000_recip_bits
[DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3213 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3214 rs6000_recip_bits
[V4SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3215 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3216 rs6000_recip_bits
[V2DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3218 if (TARGET_FRSQRTES
)
3219 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3221 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3222 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3223 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3224 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3225 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3227 if (rs6000_recip_control
)
3229 if (!flag_finite_math_only
)
3230 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3232 if (flag_trapping_math
)
3233 warning (0, "%qs requires %qs or %qs", "-mrecip",
3234 "-fno-trapping-math", "-ffast-math");
3235 if (!flag_reciprocal_math
)
3236 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3238 if (flag_finite_math_only
&& !flag_trapping_math
&& flag_reciprocal_math
)
3240 if (RS6000_RECIP_HAVE_RE_P (SFmode
)
3241 && (rs6000_recip_control
& RECIP_SF_DIV
) != 0)
3242 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3244 if (RS6000_RECIP_HAVE_RE_P (DFmode
)
3245 && (rs6000_recip_control
& RECIP_DF_DIV
) != 0)
3246 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3248 if (RS6000_RECIP_HAVE_RE_P (V4SFmode
)
3249 && (rs6000_recip_control
& RECIP_V4SF_DIV
) != 0)
3250 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3252 if (RS6000_RECIP_HAVE_RE_P (V2DFmode
)
3253 && (rs6000_recip_control
& RECIP_V2DF_DIV
) != 0)
3254 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3256 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode
)
3257 && (rs6000_recip_control
& RECIP_SF_RSQRT
) != 0)
3258 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3260 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode
)
3261 && (rs6000_recip_control
& RECIP_DF_RSQRT
) != 0)
3262 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3264 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode
)
3265 && (rs6000_recip_control
& RECIP_V4SF_RSQRT
) != 0)
3266 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3268 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode
)
3269 && (rs6000_recip_control
& RECIP_V2DF_RSQRT
) != 0)
3270 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3274 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3275 legitimate address support to figure out the appropriate addressing to
3277 rs6000_setup_reg_addr_masks ();
3279 if (global_init_p
|| TARGET_DEBUG_TARGET
)
3281 if (TARGET_DEBUG_REG
)
3282 rs6000_debug_reg_global ();
3284 if (TARGET_DEBUG_COST
|| TARGET_DEBUG_REG
)
3286 "SImode variable mult cost = %d\n"
3287 "SImode constant mult cost = %d\n"
3288 "SImode short constant mult cost = %d\n"
3289 "DImode multipliciation cost = %d\n"
3290 "SImode division cost = %d\n"
3291 "DImode division cost = %d\n"
3292 "Simple fp operation cost = %d\n"
3293 "DFmode multiplication cost = %d\n"
3294 "SFmode division cost = %d\n"
3295 "DFmode division cost = %d\n"
3296 "cache line size = %d\n"
3297 "l1 cache size = %d\n"
3298 "l2 cache size = %d\n"
3299 "simultaneous prefetches = %d\n"
3302 rs6000_cost
->mulsi_const
,
3303 rs6000_cost
->mulsi_const9
,
3311 rs6000_cost
->cache_line_size
,
3312 rs6000_cost
->l1_cache_size
,
3313 rs6000_cost
->l2_cache_size
,
3314 rs6000_cost
->simultaneous_prefetches
);
3319 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3322 darwin_rs6000_override_options (void)
3324 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3326 rs6000_altivec_abi
= 1;
3327 TARGET_ALTIVEC_VRSAVE
= 1;
3328 rs6000_current_abi
= ABI_DARWIN
;
3330 if (DEFAULT_ABI
== ABI_DARWIN
3332 darwin_one_byte_bool
= 1;
3334 if (TARGET_64BIT
&& ! TARGET_POWERPC64
)
3336 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3337 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3340 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3341 optimisation, and will not work with the most generic case (where the
3342 symbol is undefined external, but there is no symbl stub). */
3344 rs6000_default_long_calls
= 0;
3346 /* ld_classic is (so far) still used for kernel (static) code, and supports
3347 the JBSR longcall / branch islands. */
3350 rs6000_default_long_calls
= 1;
3352 /* Allow a kext author to do -mkernel -mhard-float. */
3353 if (! (rs6000_isa_flags_explicit
& OPTION_MASK_SOFT_FLOAT
))
3354 rs6000_isa_flags
|= OPTION_MASK_SOFT_FLOAT
;
3357 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3359 if (!flag_mkernel
&& !flag_apple_kext
3361 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3362 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3364 /* Unless the user (not the configurer) has explicitly overridden
3365 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3366 G4 unless targeting the kernel. */
3369 && strverscmp (darwin_macosx_version_min
, "10.5") >= 0
3370 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
)
3371 && ! OPTION_SET_P (rs6000_cpu_index
))
3373 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3378 /* If not otherwise specified by a target, make 'long double' equivalent to
3381 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3382 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3385 /* Return the builtin mask of the various options used that could affect which
3386 builtins were used. In the past we used target_flags, but we've run out of
3387 bits, and some options are no longer in target_flags. */
3390 rs6000_builtin_mask_calculate (void)
3392 return (((TARGET_ALTIVEC
) ? RS6000_BTM_ALTIVEC
: 0)
3393 | ((TARGET_CMPB
) ? RS6000_BTM_CMPB
: 0)
3394 | ((TARGET_VSX
) ? RS6000_BTM_VSX
: 0)
3395 | ((TARGET_FRE
) ? RS6000_BTM_FRE
: 0)
3396 | ((TARGET_FRES
) ? RS6000_BTM_FRES
: 0)
3397 | ((TARGET_FRSQRTE
) ? RS6000_BTM_FRSQRTE
: 0)
3398 | ((TARGET_FRSQRTES
) ? RS6000_BTM_FRSQRTES
: 0)
3399 | ((TARGET_POPCNTD
) ? RS6000_BTM_POPCNTD
: 0)
3400 | ((rs6000_cpu
== PROCESSOR_CELL
) ? RS6000_BTM_CELL
: 0)
3401 | ((TARGET_P8_VECTOR
) ? RS6000_BTM_P8_VECTOR
: 0)
3402 | ((TARGET_P9_VECTOR
) ? RS6000_BTM_P9_VECTOR
: 0)
3403 | ((TARGET_P9_MISC
) ? RS6000_BTM_P9_MISC
: 0)
3404 | ((TARGET_MODULO
) ? RS6000_BTM_MODULO
: 0)
3405 | ((TARGET_64BIT
) ? RS6000_BTM_64BIT
: 0)
3406 | ((TARGET_POWERPC64
) ? RS6000_BTM_POWERPC64
: 0)
3407 | ((TARGET_CRYPTO
) ? RS6000_BTM_CRYPTO
: 0)
3408 | ((TARGET_HTM
) ? RS6000_BTM_HTM
: 0)
3409 | ((TARGET_DFP
) ? RS6000_BTM_DFP
: 0)
3410 | ((TARGET_HARD_FLOAT
) ? RS6000_BTM_HARD_FLOAT
: 0)
3411 | ((TARGET_LONG_DOUBLE_128
3412 && TARGET_HARD_FLOAT
3413 && !TARGET_IEEEQUAD
) ? RS6000_BTM_LDBL128
: 0)
3414 | ((TARGET_FLOAT128_TYPE
) ? RS6000_BTM_FLOAT128
: 0)
3415 | ((TARGET_FLOAT128_HW
) ? RS6000_BTM_FLOAT128_HW
: 0)
3416 | ((TARGET_MMA
) ? RS6000_BTM_MMA
: 0)
3417 | ((TARGET_POWER10
) ? RS6000_BTM_P10
: 0));
3420 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3421 to clobber the XER[CA] bit because clobbering that bit without telling
3422 the compiler worked just fine with versions of GCC before GCC 5, and
3423 breaking a lot of older code in ways that are hard to track down is
3424 not such a great idea. */
3427 rs6000_md_asm_adjust (vec
<rtx
> & /*outputs*/, vec
<rtx
> & /*inputs*/,
3428 vec
<machine_mode
> & /*input_modes*/,
3429 vec
<const char *> & /*constraints*/, vec
<rtx
> &clobbers
,
3430 HARD_REG_SET
&clobbered_regs
, location_t
/*loc*/)
3432 clobbers
.safe_push (gen_rtx_REG (SImode
, CA_REGNO
));
3433 SET_HARD_REG_BIT (clobbered_regs
, CA_REGNO
);
3437 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3438 but is called when the optimize level is changed via an attribute or
3439 pragma or when it is reset at the end of the code affected by the
3440 attribute or pragma. It is not called at the beginning of compilation
3441 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3442 actions then, you should have TARGET_OPTION_OVERRIDE call
3443 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3446 rs6000_override_options_after_change (void)
3448 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3449 turns -frename-registers on. */
3450 if ((OPTION_SET_P (flag_unroll_loops
) && flag_unroll_loops
)
3451 || (OPTION_SET_P (flag_unroll_all_loops
)
3452 && flag_unroll_all_loops
))
3454 if (!OPTION_SET_P (unroll_only_small_loops
))
3455 unroll_only_small_loops
= 0;
3456 if (!OPTION_SET_P (flag_rename_registers
))
3457 flag_rename_registers
= 1;
3458 if (!OPTION_SET_P (flag_cunroll_grow_size
))
3459 flag_cunroll_grow_size
= 1;
3461 else if (!OPTION_SET_P (flag_cunroll_grow_size
))
3462 flag_cunroll_grow_size
= flag_peel_loops
|| optimize
>= 3;
3464 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3465 if (rs6000_rop_protect
)
3466 flag_shrink_wrap
= 0;
3469 #ifdef TARGET_USES_LINUX64_OPT
3471 rs6000_linux64_override_options ()
3473 if (!OPTION_SET_P (rs6000_alignment_flags
))
3474 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
3475 if (rs6000_isa_flags
& OPTION_MASK_64BIT
)
3477 if (DEFAULT_ABI
!= ABI_AIX
)
3479 rs6000_current_abi
= ABI_AIX
;
3480 error (INVALID_64BIT
, "call");
3482 dot_symbols
= !strcmp (rs6000_abi_name
, "aixdesc");
3483 if (ELFv2_ABI_CHECK
)
3485 rs6000_current_abi
= ABI_ELFv2
;
3487 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3489 if (rs6000_isa_flags
& OPTION_MASK_RELOCATABLE
)
3491 rs6000_isa_flags
&= ~OPTION_MASK_RELOCATABLE
;
3492 error (INVALID_64BIT
, "relocatable");
3494 if (rs6000_isa_flags
& OPTION_MASK_EABI
)
3496 rs6000_isa_flags
&= ~OPTION_MASK_EABI
;
3497 error (INVALID_64BIT
, "eabi");
3499 if (TARGET_PROTOTYPE
)
3501 target_prototype
= 0;
3502 error (INVALID_64BIT
, "prototype");
3504 if ((rs6000_isa_flags
& OPTION_MASK_POWERPC64
) == 0)
3506 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3507 error ("%<-m64%> requires a PowerPC64 cpu");
3509 if (!OPTION_SET_P (rs6000_current_cmodel
))
3510 SET_CMODEL (CMODEL_MEDIUM
);
3511 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MINIMAL_TOC
) != 0)
3513 if (OPTION_SET_P (rs6000_current_cmodel
)
3514 && rs6000_current_cmodel
!= CMODEL_SMALL
)
3515 error ("%<-mcmodel incompatible with other toc options%>");
3516 if (TARGET_MINIMAL_TOC
)
3517 SET_CMODEL (CMODEL_SMALL
);
3518 else if (TARGET_PCREL
3519 || (PCREL_SUPPORTED_BY_OS
3520 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0))
3521 /* Ignore -mno-minimal-toc. */
3524 SET_CMODEL (CMODEL_SMALL
);
3526 if (rs6000_current_cmodel
!= CMODEL_SMALL
)
3528 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
3529 TARGET_NO_FP_IN_TOC
= rs6000_current_cmodel
== CMODEL_MEDIUM
;
3530 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC
))
3531 TARGET_NO_SUM_IN_TOC
= 0;
3533 if (TARGET_PLTSEQ
&& DEFAULT_ABI
!= ABI_ELFv2
)
3535 if (OPTION_SET_P (rs6000_pltseq
))
3536 warning (0, "%qs unsupported for this ABI",
3538 rs6000_pltseq
= false;
3541 else if (TARGET_64BIT
)
3542 error (INVALID_32BIT
, "32");
3545 if (TARGET_PROFILE_KERNEL
)
3548 error (INVALID_32BIT
, "profile-kernel");
3550 if (OPTION_SET_P (rs6000_current_cmodel
))
3552 SET_CMODEL (CMODEL_SMALL
);
3553 error (INVALID_32BIT
, "cmodel");
3559 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3560 This support is only in little endian GLIBC 2.32 or newer. */
3562 glibc_supports_ieee_128bit (void)
3565 if (OPTION_GLIBC
&& !BYTES_BIG_ENDIAN
3566 && ((TARGET_GLIBC_MAJOR
* 1000) + TARGET_GLIBC_MINOR
) >= 2032)
3568 #endif /* OPTION_GLIBC. */
3573 /* Override command line options.
3575 Combine build-specific configuration information with options
3576 specified on the command line to set various state variables which
3577 influence code generation, optimization, and expansion of built-in
3578 functions. Assure that command-line configuration preferences are
3579 compatible with each other and with the build configuration; issue
3580 warnings while adjusting configuration or error messages while
3581 rejecting configuration.
3583 Upon entry to this function:
3585 This function is called once at the beginning of
3586 compilation, and then again at the start and end of compiling
3587 each section of code that has a different configuration, as
3588 indicated, for example, by adding the
3590 __attribute__((__target__("cpu=power9")))
3592 qualifier to a function definition or, for example, by bracketing
3595 #pragma GCC target("altivec")
3599 #pragma GCC reset_options
3601 directives. Parameter global_init_p is true for the initial
3602 invocation, which initializes global variables, and false for all
3603 subsequent invocations.
3606 Various global state information is assumed to be valid. This
3607 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3608 default CPU specified at build configure time, TARGET_DEFAULT,
3609 representing the default set of option flags for the default
3610 target, and OPTION_SET_P (rs6000_isa_flags), representing
3611 which options were requested on the command line.
3613 Upon return from this function:
3615 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3616 was set by name on the command line. Additionally, if certain
3617 attributes are automatically enabled or disabled by this function
3618 in order to assure compatibility between options and
3619 configuration, the flags associated with those attributes are
3620 also set. By setting these "explicit bits", we avoid the risk
3621 that other code might accidentally overwrite these particular
3622 attributes with "default values".
3624 The various bits of rs6000_isa_flags are set to indicate the
3625 target options that have been selected for the most current
3626 compilation efforts. This has the effect of also turning on the
3627 associated TARGET_XXX values since these are macros which are
3628 generally defined to test the corresponding bit of the
3629 rs6000_isa_flags variable.
3631 The variable rs6000_builtin_mask is set to represent the target
3632 options for the most current compilation efforts, consistent with
3633 the current contents of rs6000_isa_flags. This variable controls
3634 expansion of built-in functions.
3636 Various other global variables and fields of global structures
3637 (over 50 in all) are initialized to reflect the desired options
3638 for the most current compilation efforts. */
3641 rs6000_option_override_internal (bool global_init_p
)
3645 HOST_WIDE_INT set_masks
;
3646 HOST_WIDE_INT ignore_masks
;
3649 struct cl_target_option
*main_target_opt
3650 = ((global_init_p
|| target_option_default_node
== NULL
)
3651 ? NULL
: TREE_TARGET_OPTION (target_option_default_node
));
3653 /* Print defaults. */
3654 if ((TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
) && global_init_p
)
3655 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
3657 /* Remember the explicit arguments. */
3659 rs6000_isa_flags_explicit
= OPTION_SET_P (rs6000_isa_flags
);
3661 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3662 library functions, so warn about it. The flag may be useful for
3663 performance studies from time to time though, so don't disable it
3665 if (OPTION_SET_P (rs6000_alignment_flags
)
3666 && rs6000_alignment_flags
== MASK_ALIGN_POWER
3667 && DEFAULT_ABI
== ABI_DARWIN
3669 warning (0, "%qs is not supported for 64-bit Darwin;"
3670 " it is incompatible with the installed C and C++ libraries",
3673 /* Numerous experiment shows that IRA based loop pressure
3674 calculation works better for RTL loop invariant motion on targets
3675 with enough (>= 32) registers. It is an expensive optimization.
3676 So it is on only for peak performance. */
3677 if (optimize
>= 3 && global_init_p
3678 && !OPTION_SET_P (flag_ira_loop_pressure
))
3679 flag_ira_loop_pressure
= 1;
3681 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3682 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3683 options were already specified. */
3684 if (flag_sanitize
& SANITIZE_USER_ADDRESS
3685 && !OPTION_SET_P (flag_asynchronous_unwind_tables
))
3686 flag_asynchronous_unwind_tables
= 1;
3688 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3689 loop unroller is active. It is only checked during unrolling, so
3690 we can just set it on by default. */
3691 if (!OPTION_SET_P (flag_variable_expansion_in_unroller
))
3692 flag_variable_expansion_in_unroller
= 1;
3694 /* Set the pointer size. */
3697 rs6000_pmode
= DImode
;
3698 rs6000_pointer_size
= 64;
3702 rs6000_pmode
= SImode
;
3703 rs6000_pointer_size
= 32;
3706 /* Some OSs don't support saving the high part of 64-bit registers on context
3707 switch. Other OSs don't support saving Altivec registers. On those OSs,
3708 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3709 if the user wants either, the user must explicitly specify them and we
3710 won't interfere with the user's specification. */
3712 set_masks
= POWERPC_MASKS
;
3713 #ifdef OS_MISSING_POWERPC64
3714 if (OS_MISSING_POWERPC64
)
3715 set_masks
&= ~OPTION_MASK_POWERPC64
;
3717 #ifdef OS_MISSING_ALTIVEC
3718 if (OS_MISSING_ALTIVEC
)
3719 set_masks
&= ~(OPTION_MASK_ALTIVEC
| OPTION_MASK_VSX
3720 | OTHER_VSX_VECTOR_MASKS
);
3723 /* Don't override by the processor default if given explicitly. */
3724 set_masks
&= ~rs6000_isa_flags_explicit
;
3726 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3727 the cpu in a target attribute or pragma, but did not specify a tuning
3728 option, use the cpu for the tuning option rather than the option specified
3729 with -mtune on the command line. Process a '--with-cpu' configuration
3730 request as an implicit --cpu. */
3731 if (rs6000_cpu_index
>= 0)
3732 cpu_index
= rs6000_cpu_index
;
3733 else if (main_target_opt
!= NULL
&& main_target_opt
->x_rs6000_cpu_index
>= 0)
3734 cpu_index
= main_target_opt
->x_rs6000_cpu_index
;
3735 else if (OPTION_TARGET_CPU_DEFAULT
)
3736 cpu_index
= rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT
);
3738 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3739 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3740 with those from the cpu, except for options that were explicitly set. If
3741 we don't have a cpu, do not override the target bits set in
3745 rs6000_cpu_index
= cpu_index
;
3746 rs6000_isa_flags
&= ~set_masks
;
3747 rs6000_isa_flags
|= (processor_target_table
[cpu_index
].target_enable
3752 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3753 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3754 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3755 to using rs6000_isa_flags, we need to do the initialization here.
3757 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3758 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3759 HOST_WIDE_INT flags
;
3761 flags
= TARGET_DEFAULT
;
3764 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3765 const char *default_cpu
= (!TARGET_POWERPC64
3770 int default_cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
3771 flags
= processor_target_table
[default_cpu_index
].target_enable
;
3773 rs6000_isa_flags
|= (flags
& ~rs6000_isa_flags_explicit
);
3776 if (rs6000_tune_index
>= 0)
3777 tune_index
= rs6000_tune_index
;
3778 else if (cpu_index
>= 0)
3779 rs6000_tune_index
= tune_index
= cpu_index
;
3783 enum processor_type tune_proc
3784 = (TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
);
3787 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
3788 if (processor_target_table
[i
].processor
== tune_proc
)
3796 rs6000_cpu
= processor_target_table
[cpu_index
].processor
;
3798 rs6000_cpu
= TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
;
3800 gcc_assert (tune_index
>= 0);
3801 rs6000_tune
= processor_target_table
[tune_index
].processor
;
3803 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
3804 || rs6000_cpu
== PROCESSOR_PPCE500MC
|| rs6000_cpu
== PROCESSOR_PPCE500MC64
3805 || rs6000_cpu
== PROCESSOR_PPCE5500
)
3808 error ("AltiVec not supported in this target");
3811 /* If we are optimizing big endian systems for space, use the load/store
3812 multiple instructions. */
3813 if (BYTES_BIG_ENDIAN
&& optimize_size
)
3814 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
;
3816 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3817 because the hardware doesn't support the instructions used in little
3818 endian mode, and causes an alignment trap. The 750 does not cause an
3819 alignment trap (except when the target is unaligned). */
3821 if (!BYTES_BIG_ENDIAN
&& rs6000_cpu
!= PROCESSOR_PPC750
&& TARGET_MULTIPLE
)
3823 rs6000_isa_flags
&= ~OPTION_MASK_MULTIPLE
;
3824 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
) != 0)
3825 warning (0, "%qs is not supported on little endian systems",
3829 /* If little-endian, default to -mstrict-align on older processors.
3830 Testing for direct_move matches power8 and later. */
3831 if (!BYTES_BIG_ENDIAN
3832 && !(processor_target_table
[tune_index
].target_enable
3833 & OPTION_MASK_DIRECT_MOVE
))
3834 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_STRICT_ALIGN
;
3836 if (!rs6000_fold_gimple
)
3838 "gimple folding of rs6000 builtins has been disabled.\n");
3840 /* Add some warnings for VSX. */
3843 const char *msg
= NULL
;
3844 if (!TARGET_HARD_FLOAT
)
3846 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3847 msg
= N_("%<-mvsx%> requires hardware floating point");
3850 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3851 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3854 else if (TARGET_AVOID_XFORM
> 0)
3855 msg
= N_("%<-mvsx%> needs indexed addressing");
3856 else if (!TARGET_ALTIVEC
&& (rs6000_isa_flags_explicit
3857 & OPTION_MASK_ALTIVEC
))
3859 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3860 msg
= N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3862 msg
= N_("%<-mno-altivec%> disables vsx");
3868 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3869 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3873 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3874 the -mcpu setting to enable options that conflict. */
3875 if ((!TARGET_HARD_FLOAT
|| !TARGET_ALTIVEC
|| !TARGET_VSX
)
3876 && (rs6000_isa_flags_explicit
& (OPTION_MASK_SOFT_FLOAT
3877 | OPTION_MASK_ALTIVEC
3878 | OPTION_MASK_VSX
)) != 0)
3879 rs6000_isa_flags
&= ~((OPTION_MASK_P8_VECTOR
| OPTION_MASK_CRYPTO
3880 | OPTION_MASK_DIRECT_MOVE
)
3881 & ~rs6000_isa_flags_explicit
);
3883 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
3884 rs6000_print_isa_options (stderr
, 0, "before defaults", rs6000_isa_flags
);
3886 #ifdef XCOFF_DEBUGGING_INFO
3887 /* For AIX default to 64-bit DWARF. */
3888 if (!OPTION_SET_P (dwarf_offset_size
))
3889 dwarf_offset_size
= POINTER_SIZE_UNITS
;
3892 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3893 off all of the options that depend on those flags. */
3894 ignore_masks
= rs6000_disable_incompatible_switches ();
3896 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3897 unless the user explicitly used the -mno-<option> to disable the code. */
3898 if (TARGET_P9_VECTOR
|| TARGET_MODULO
|| TARGET_P9_MISC
)
3899 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3900 else if (TARGET_P9_MINMAX
)
3904 if (cpu_index
== PROCESSOR_POWER9
)
3906 /* legacy behavior: allow -mcpu=power9 with certain
3907 capabilities explicitly disabled. */
3908 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3911 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3912 "for <xxx> less than power9", "-mcpu");
3914 else if ((ISA_3_0_MASKS_SERVER
& rs6000_isa_flags_explicit
)
3915 != (ISA_3_0_MASKS_SERVER
& rs6000_isa_flags
3916 & rs6000_isa_flags_explicit
))
3917 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3918 were explicitly cleared. */
3919 error ("%qs incompatible with explicitly disabled options",
3922 rs6000_isa_flags
|= ISA_3_0_MASKS_SERVER
;
3924 else if (TARGET_P8_VECTOR
|| TARGET_DIRECT_MOVE
|| TARGET_CRYPTO
)
3925 rs6000_isa_flags
|= (ISA_2_7_MASKS_SERVER
& ~ignore_masks
);
3926 else if (TARGET_VSX
)
3927 rs6000_isa_flags
|= (ISA_2_6_MASKS_SERVER
& ~ignore_masks
);
3928 else if (TARGET_POPCNTD
)
3929 rs6000_isa_flags
|= (ISA_2_6_MASKS_EMBEDDED
& ~ignore_masks
);
3930 else if (TARGET_DFP
)
3931 rs6000_isa_flags
|= (ISA_2_5_MASKS_SERVER
& ~ignore_masks
);
3932 else if (TARGET_CMPB
)
3933 rs6000_isa_flags
|= (ISA_2_5_MASKS_EMBEDDED
& ~ignore_masks
);
3934 else if (TARGET_FPRND
)
3935 rs6000_isa_flags
|= (ISA_2_4_MASKS
& ~ignore_masks
);
3936 else if (TARGET_POPCNTB
)
3937 rs6000_isa_flags
|= (ISA_2_2_MASKS
& ~ignore_masks
);
3938 else if (TARGET_ALTIVEC
)
3939 rs6000_isa_flags
|= (OPTION_MASK_PPC_GFXOPT
& ~ignore_masks
);
3941 if (TARGET_CRYPTO
&& !TARGET_ALTIVEC
)
3943 if (rs6000_isa_flags_explicit
& OPTION_MASK_CRYPTO
)
3944 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3945 rs6000_isa_flags
&= ~OPTION_MASK_CRYPTO
;
3948 if (!TARGET_FPRND
&& TARGET_VSX
)
3950 if (rs6000_isa_flags_explicit
& OPTION_MASK_FPRND
)
3951 /* TARGET_VSX = 1 implies Power 7 and newer */
3952 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3953 rs6000_isa_flags
&= ~OPTION_MASK_FPRND
;
3956 if (TARGET_DIRECT_MOVE
&& !TARGET_VSX
)
3958 if (rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
3959 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3960 rs6000_isa_flags
&= ~OPTION_MASK_DIRECT_MOVE
;
3963 if (TARGET_P8_VECTOR
&& !TARGET_ALTIVEC
)
3965 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3966 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3967 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3970 if (TARGET_P8_VECTOR
&& !TARGET_VSX
)
3972 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3973 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
))
3974 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3975 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
) == 0)
3977 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3978 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3979 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
3983 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3985 rs6000_isa_flags
|= OPTION_MASK_VSX
;
3986 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3990 if (TARGET_DFP
&& !TARGET_HARD_FLOAT
)
3992 if (rs6000_isa_flags_explicit
& OPTION_MASK_DFP
)
3993 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3994 rs6000_isa_flags
&= ~OPTION_MASK_DFP
;
3997 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3998 silently turn off quad memory mode. */
3999 if ((TARGET_QUAD_MEMORY
|| TARGET_QUAD_MEMORY_ATOMIC
) && !TARGET_POWERPC64
)
4001 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
4002 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
4004 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) != 0)
4005 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
4007 rs6000_isa_flags
&= ~(OPTION_MASK_QUAD_MEMORY
4008 | OPTION_MASK_QUAD_MEMORY_ATOMIC
);
4011 /* Non-atomic quad memory load/store are disabled for little endian, since
4012 the words are reversed, but atomic operations can still be done by
4013 swapping the words. */
4014 if (TARGET_QUAD_MEMORY
&& !WORDS_BIG_ENDIAN
)
4016 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
4017 warning (0, N_("%<-mquad-memory%> is not available in little endian "
4020 rs6000_isa_flags
&= ~OPTION_MASK_QUAD_MEMORY
;
4023 /* Assume if the user asked for normal quad memory instructions, they want
4024 the atomic versions as well, unless they explicity told us not to use quad
4025 word atomic instructions. */
4026 if (TARGET_QUAD_MEMORY
4027 && !TARGET_QUAD_MEMORY_ATOMIC
4028 && ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) == 0))
4029 rs6000_isa_flags
|= OPTION_MASK_QUAD_MEMORY_ATOMIC
;
4031 /* If we can shrink-wrap the TOC register save separately, then use
4032 -msave-toc-indirect unless explicitly disabled. */
4033 if ((rs6000_isa_flags_explicit
& OPTION_MASK_SAVE_TOC_INDIRECT
) == 0
4034 && flag_shrink_wrap_separate
4035 && optimize_function_for_speed_p (cfun
))
4036 rs6000_isa_flags
|= OPTION_MASK_SAVE_TOC_INDIRECT
;
4038 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4039 generating power8 instructions. Power9 does not optimize power8 fusion
4041 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
))
4043 if (processor_target_table
[tune_index
].processor
== PROCESSOR_POWER8
)
4044 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4046 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4049 /* Setting additional fusion flags turns on base fusion. */
4050 if (!TARGET_P8_FUSION
&& TARGET_P8_FUSION_SIGN
)
4052 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4054 if (TARGET_P8_FUSION_SIGN
)
4055 error ("%qs requires %qs", "-mpower8-fusion-sign",
4058 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4061 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4064 /* Power8 does not fuse sign extended loads with the addis. If we are
4065 optimizing at high levels for speed, convert a sign extended load into a
4066 zero extending load, and an explicit sign extension. */
4067 if (TARGET_P8_FUSION
4068 && !(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION_SIGN
)
4069 && optimize_function_for_speed_p (cfun
)
4071 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION_SIGN
;
4073 /* ISA 3.0 vector instructions include ISA 2.07. */
4074 if (TARGET_P9_VECTOR
&& !TARGET_P8_VECTOR
)
4076 /* We prefer to not mention undocumented options in
4077 error messages. However, if users have managed to select
4078 power9-vector without selecting power8-vector, they
4079 already know about undocumented flags. */
4080 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) &&
4081 (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
))
4082 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4083 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) == 0)
4085 rs6000_isa_flags
&= ~OPTION_MASK_P9_VECTOR
;
4086 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4087 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4091 /* OPTION_MASK_P9_VECTOR is explicit and
4092 OPTION_MASK_P8_VECTOR is not explicit. */
4093 rs6000_isa_flags
|= OPTION_MASK_P8_VECTOR
;
4094 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4098 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4099 support. If we only have ISA 2.06 support, and the user did not specify
4100 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4101 but we don't enable the full vectorization support */
4102 if (TARGET_ALLOW_MOVMISALIGN
== -1 && TARGET_P8_VECTOR
&& TARGET_DIRECT_MOVE
)
4103 TARGET_ALLOW_MOVMISALIGN
= 1;
4105 else if (TARGET_ALLOW_MOVMISALIGN
&& !TARGET_VSX
)
4107 if (TARGET_ALLOW_MOVMISALIGN
> 0
4108 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN
))
4109 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4111 TARGET_ALLOW_MOVMISALIGN
= 0;
4114 /* Determine when unaligned vector accesses are permitted, and when
4115 they are preferred over masked Altivec loads. Note that if
4116 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4117 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4119 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4123 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4124 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4126 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4129 else if (!TARGET_ALLOW_MOVMISALIGN
)
4131 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4132 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4133 "-mallow-movmisalign");
4135 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4139 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
))
4141 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4142 rs6000_isa_flags
|= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4144 rs6000_isa_flags
&= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4147 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
))
4149 if (TARGET_MMA
&& TARGET_EFFICIENT_UNALIGNED_VSX
)
4150 rs6000_isa_flags
|= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
;
4152 rs6000_isa_flags
&= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
;
4155 /* Use long double size to select the appropriate long double. We use
4156 TYPE_PRECISION to differentiate the 3 different long double types. We map
4157 128 into the precision used for TFmode. */
4158 int default_long_double_size
= (RS6000_DEFAULT_LONG_DOUBLE_SIZE
== 64
4160 : FLOAT_PRECISION_TFmode
);
4162 /* Set long double size before the IEEE 128-bit tests. */
4163 if (!OPTION_SET_P (rs6000_long_double_type_size
))
4165 if (main_target_opt
!= NULL
4166 && (main_target_opt
->x_rs6000_long_double_type_size
4167 != default_long_double_size
))
4168 error ("target attribute or pragma changes %<long double%> size");
4170 rs6000_long_double_type_size
= default_long_double_size
;
4172 else if (rs6000_long_double_type_size
== FLOAT_PRECISION_TFmode
)
4173 ; /* The option value can be seen when cl_target_option_restore is called. */
4174 else if (rs6000_long_double_type_size
== 128)
4175 rs6000_long_double_type_size
= FLOAT_PRECISION_TFmode
;
4176 else if (OPTION_SET_P (rs6000_ieeequad
))
4178 if (global_options
.x_rs6000_ieeequad
)
4179 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4181 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4184 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4185 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4186 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4187 those systems will not pick up this default. Warn if the user changes the
4188 default unless -Wno-psabi. */
4189 if (!OPTION_SET_P (rs6000_ieeequad
))
4190 rs6000_ieeequad
= TARGET_IEEEQUAD_DEFAULT
;
4194 if (global_options
.x_rs6000_ieeequad
4195 && (!TARGET_POPCNTD
|| !TARGET_VSX
))
4196 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4198 if (rs6000_ieeequad
!= TARGET_IEEEQUAD_DEFAULT
&& TARGET_LONG_DOUBLE_128
)
4200 /* Determine if the user can change the default long double type at
4201 compilation time. You need GLIBC 2.32 or newer to be able to
4202 change the long double type. Only issue one warning. */
4203 static bool warned_change_long_double
;
4205 if (!warned_change_long_double
&& !glibc_supports_ieee_128bit ())
4207 warned_change_long_double
= true;
4208 if (TARGET_IEEEQUAD
)
4209 warning (OPT_Wpsabi
, "Using IEEE extended precision "
4212 warning (OPT_Wpsabi
, "Using IBM extended precision "
4218 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4219 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4220 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4221 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4222 the keyword as well as the type. */
4223 TARGET_FLOAT128_TYPE
= TARGET_FLOAT128_ENABLE_TYPE
&& TARGET_VSX
;
4225 /* IEEE 128-bit floating point requires VSX support. */
4226 if (TARGET_FLOAT128_KEYWORD
)
4230 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4231 error ("%qs requires VSX support", "-mfloat128");
4233 TARGET_FLOAT128_TYPE
= 0;
4234 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_KEYWORD
4235 | OPTION_MASK_FLOAT128_HW
);
4237 else if (!TARGET_FLOAT128_TYPE
)
4239 TARGET_FLOAT128_TYPE
= 1;
4240 warning (0, "The %<-mfloat128%> option may not be fully supported");
4244 /* Enable the __float128 keyword under Linux by default. */
4245 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_KEYWORD
4246 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) == 0)
4247 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_KEYWORD
;
4249 /* If we have are supporting the float128 type and full ISA 3.0 support,
4250 enable -mfloat128-hardware by default. However, don't enable the
4251 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4252 because sometimes the compiler wants to put things in an integer
4253 container, and if we don't have __int128 support, it is impossible. */
4254 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_HW
&& TARGET_64BIT
4255 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) == ISA_3_0_MASKS_IEEE
4256 && !(rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
))
4257 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_HW
;
4259 if (TARGET_FLOAT128_HW
4260 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) != ISA_3_0_MASKS_IEEE
)
4262 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4263 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4265 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4268 if (TARGET_FLOAT128_HW
&& !TARGET_64BIT
)
4270 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4271 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4273 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4276 /* Enable -mprefixed by default on power10 systems. */
4277 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) == 0)
4278 rs6000_isa_flags
|= OPTION_MASK_PREFIXED
;
4280 /* -mprefixed requires -mcpu=power10 (or later). */
4281 else if (TARGET_PREFIXED
&& !TARGET_POWER10
)
4283 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) != 0)
4284 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4286 rs6000_isa_flags
&= ~OPTION_MASK_PREFIXED
;
4289 /* -mpcrel requires prefixed load/store addressing. */
4290 if (TARGET_PCREL
&& !TARGET_PREFIXED
)
4292 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4293 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4295 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4298 /* Print the options after updating the defaults. */
4299 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4300 rs6000_print_isa_options (stderr
, 0, "after defaults", rs6000_isa_flags
);
4302 /* E500mc does "better" if we inline more aggressively. Respect the
4303 user's opinion, though. */
4304 if (rs6000_block_move_inline_limit
== 0
4305 && (rs6000_tune
== PROCESSOR_PPCE500MC
4306 || rs6000_tune
== PROCESSOR_PPCE500MC64
4307 || rs6000_tune
== PROCESSOR_PPCE5500
4308 || rs6000_tune
== PROCESSOR_PPCE6500
))
4309 rs6000_block_move_inline_limit
= 128;
4311 /* store_one_arg depends on expand_block_move to handle at least the
4312 size of reg_parm_stack_space. */
4313 if (rs6000_block_move_inline_limit
< (TARGET_POWERPC64
? 64 : 32))
4314 rs6000_block_move_inline_limit
= (TARGET_POWERPC64
? 64 : 32);
4318 /* If the appropriate debug option is enabled, replace the target hooks
4319 with debug versions that call the real version and then prints
4320 debugging information. */
4321 if (TARGET_DEBUG_COST
)
4323 targetm
.rtx_costs
= rs6000_debug_rtx_costs
;
4324 targetm
.address_cost
= rs6000_debug_address_cost
;
4325 targetm
.sched
.adjust_cost
= rs6000_debug_adjust_cost
;
4328 if (TARGET_DEBUG_ADDR
)
4330 targetm
.legitimate_address_p
= rs6000_debug_legitimate_address_p
;
4331 targetm
.legitimize_address
= rs6000_debug_legitimize_address
;
4332 rs6000_secondary_reload_class_ptr
4333 = rs6000_debug_secondary_reload_class
;
4334 targetm
.secondary_memory_needed
4335 = rs6000_debug_secondary_memory_needed
;
4336 targetm
.can_change_mode_class
4337 = rs6000_debug_can_change_mode_class
;
4338 rs6000_preferred_reload_class_ptr
4339 = rs6000_debug_preferred_reload_class
;
4340 rs6000_mode_dependent_address_ptr
4341 = rs6000_debug_mode_dependent_address
;
4344 if (rs6000_veclibabi_name
)
4346 if (strcmp (rs6000_veclibabi_name
, "mass") == 0)
4347 rs6000_veclib_handler
= rs6000_builtin_vectorized_libmass
;
4350 error ("unknown vectorization library ABI type (%qs) for "
4351 "%qs switch", rs6000_veclibabi_name
, "-mveclibabi=");
4357 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4358 target attribute or pragma which automatically enables both options,
4359 unless the altivec ABI was set. This is set by default for 64-bit, but
4361 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4363 TARGET_FLOAT128_TYPE
= 0;
4364 rs6000_isa_flags
&= ~((OPTION_MASK_VSX
| OPTION_MASK_ALTIVEC
4365 | OPTION_MASK_FLOAT128_KEYWORD
)
4366 & ~rs6000_isa_flags_explicit
);
4369 /* Enable Altivec ABI for AIX -maltivec. */
4371 && (TARGET_ALTIVEC
|| TARGET_VSX
)
4372 && !OPTION_SET_P (rs6000_altivec_abi
))
4374 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4375 error ("target attribute or pragma changes AltiVec ABI");
4377 rs6000_altivec_abi
= 1;
4380 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4381 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4382 be explicitly overridden in either case. */
4385 if (!OPTION_SET_P (rs6000_altivec_abi
)
4386 && (TARGET_64BIT
|| TARGET_ALTIVEC
|| TARGET_VSX
))
4388 if (main_target_opt
!= NULL
&&
4389 !main_target_opt
->x_rs6000_altivec_abi
)
4390 error ("target attribute or pragma changes AltiVec ABI");
4392 rs6000_altivec_abi
= 1;
4396 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4397 So far, the only darwin64 targets are also MACH-O. */
4399 && DEFAULT_ABI
== ABI_DARWIN
4402 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_darwin64_abi
)
4403 error ("target attribute or pragma changes darwin64 ABI");
4406 rs6000_darwin64_abi
= 1;
4407 /* Default to natural alignment, for better performance. */
4408 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
4412 /* Place FP constants in the constant pool instead of TOC
4413 if section anchors enabled. */
4414 if (flag_section_anchors
4415 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
4416 TARGET_NO_FP_IN_TOC
= 1;
4418 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4419 rs6000_print_isa_options (stderr
, 0, "before subtarget", rs6000_isa_flags
);
4421 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4422 SUBTARGET_OVERRIDE_OPTIONS
;
4424 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4425 SUBSUBTARGET_OVERRIDE_OPTIONS
;
4427 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4428 SUB3TARGET_OVERRIDE_OPTIONS
;
4431 /* If the ABI has support for PC-relative relocations, enable it by default.
4432 This test depends on the sub-target tests above setting the code model to
4433 medium for ELF v2 systems. */
4434 if (PCREL_SUPPORTED_BY_OS
4435 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0)
4436 rs6000_isa_flags
|= OPTION_MASK_PCREL
;
4438 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4439 after the subtarget override options are done. */
4440 else if (TARGET_PCREL
&& TARGET_CMODEL
!= CMODEL_MEDIUM
)
4442 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4443 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4445 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4448 /* Enable -mmma by default on power10 systems. */
4449 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) == 0)
4450 rs6000_isa_flags
|= OPTION_MASK_MMA
;
4453 && (rs6000_isa_flags_explicit
& OPTION_MASK_P10_FUSION
) == 0)
4454 rs6000_isa_flags
|= OPTION_MASK_P10_FUSION
;
4456 if (TARGET_POWER10
&&
4457 (rs6000_isa_flags_explicit
& OPTION_MASK_P10_FUSION_LD_CMPI
) == 0)
4458 rs6000_isa_flags
|= OPTION_MASK_P10_FUSION_LD_CMPI
;
4461 && (rs6000_isa_flags_explicit
& OPTION_MASK_P10_FUSION_2LOGICAL
) == 0)
4462 rs6000_isa_flags
|= OPTION_MASK_P10_FUSION_2LOGICAL
;
4465 && (rs6000_isa_flags_explicit
& OPTION_MASK_P10_FUSION_LOGADD
) == 0)
4466 rs6000_isa_flags
|= OPTION_MASK_P10_FUSION_LOGADD
;
4469 && (rs6000_isa_flags_explicit
& OPTION_MASK_P10_FUSION_ADDLOG
) == 0)
4470 rs6000_isa_flags
|= OPTION_MASK_P10_FUSION_ADDLOG
;
4473 && (rs6000_isa_flags_explicit
& OPTION_MASK_P10_FUSION_2ADD
) == 0)
4474 rs6000_isa_flags
|= OPTION_MASK_P10_FUSION_2ADD
;
4477 && (rs6000_isa_flags_explicit
& OPTION_MASK_P10_FUSION_2STORE
) == 0)
4478 rs6000_isa_flags
|= OPTION_MASK_P10_FUSION_2STORE
;
4480 /* Turn off vector pair/mma options on non-power10 systems. */
4481 else if (!TARGET_POWER10
&& TARGET_MMA
)
4483 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4484 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4486 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4489 if (!TARGET_PCREL
&& TARGET_PCREL_OPT
)
4490 rs6000_isa_flags
&= ~OPTION_MASK_PCREL_OPT
;
4492 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4493 rs6000_print_isa_options (stderr
, 0, "after subtarget", rs6000_isa_flags
);
4495 rs6000_always_hint
= (rs6000_tune
!= PROCESSOR_POWER4
4496 && rs6000_tune
!= PROCESSOR_POWER5
4497 && rs6000_tune
!= PROCESSOR_POWER6
4498 && rs6000_tune
!= PROCESSOR_POWER7
4499 && rs6000_tune
!= PROCESSOR_POWER8
4500 && rs6000_tune
!= PROCESSOR_POWER9
4501 && rs6000_tune
!= PROCESSOR_POWER10
4502 && rs6000_tune
!= PROCESSOR_PPCA2
4503 && rs6000_tune
!= PROCESSOR_CELL
4504 && rs6000_tune
!= PROCESSOR_PPC476
);
4505 rs6000_sched_groups
= (rs6000_tune
== PROCESSOR_POWER4
4506 || rs6000_tune
== PROCESSOR_POWER5
4507 || rs6000_tune
== PROCESSOR_POWER7
4508 || rs6000_tune
== PROCESSOR_POWER8
);
4509 rs6000_align_branch_targets
= (rs6000_tune
== PROCESSOR_POWER4
4510 || rs6000_tune
== PROCESSOR_POWER5
4511 || rs6000_tune
== PROCESSOR_POWER6
4512 || rs6000_tune
== PROCESSOR_POWER7
4513 || rs6000_tune
== PROCESSOR_POWER8
4514 || rs6000_tune
== PROCESSOR_POWER9
4515 || rs6000_tune
== PROCESSOR_POWER10
4516 || rs6000_tune
== PROCESSOR_PPCE500MC
4517 || rs6000_tune
== PROCESSOR_PPCE500MC64
4518 || rs6000_tune
== PROCESSOR_PPCE5500
4519 || rs6000_tune
== PROCESSOR_PPCE6500
);
4521 /* Allow debug switches to override the above settings. These are set to -1
4522 in rs6000.opt to indicate the user hasn't directly set the switch. */
4523 if (TARGET_ALWAYS_HINT
>= 0)
4524 rs6000_always_hint
= TARGET_ALWAYS_HINT
;
4526 if (TARGET_SCHED_GROUPS
>= 0)
4527 rs6000_sched_groups
= TARGET_SCHED_GROUPS
;
4529 if (TARGET_ALIGN_BRANCH_TARGETS
>= 0)
4530 rs6000_align_branch_targets
= TARGET_ALIGN_BRANCH_TARGETS
;
4532 rs6000_sched_restricted_insns_priority
4533 = (rs6000_sched_groups
? 1 : 0);
4535 /* Handle -msched-costly-dep option. */
4536 rs6000_sched_costly_dep
4537 = (rs6000_sched_groups
? true_store_to_load_dep_costly
: no_dep_costly
);
4539 if (rs6000_sched_costly_dep_str
)
4541 if (! strcmp (rs6000_sched_costly_dep_str
, "no"))
4542 rs6000_sched_costly_dep
= no_dep_costly
;
4543 else if (! strcmp (rs6000_sched_costly_dep_str
, "all"))
4544 rs6000_sched_costly_dep
= all_deps_costly
;
4545 else if (! strcmp (rs6000_sched_costly_dep_str
, "true_store_to_load"))
4546 rs6000_sched_costly_dep
= true_store_to_load_dep_costly
;
4547 else if (! strcmp (rs6000_sched_costly_dep_str
, "store_to_load"))
4548 rs6000_sched_costly_dep
= store_to_load_dep_costly
;
4550 rs6000_sched_costly_dep
= ((enum rs6000_dependence_cost
)
4551 atoi (rs6000_sched_costly_dep_str
));
4554 /* Handle -minsert-sched-nops option. */
4555 rs6000_sched_insert_nops
4556 = (rs6000_sched_groups
? sched_finish_regroup_exact
: sched_finish_none
);
4558 if (rs6000_sched_insert_nops_str
)
4560 if (! strcmp (rs6000_sched_insert_nops_str
, "no"))
4561 rs6000_sched_insert_nops
= sched_finish_none
;
4562 else if (! strcmp (rs6000_sched_insert_nops_str
, "pad"))
4563 rs6000_sched_insert_nops
= sched_finish_pad_groups
;
4564 else if (! strcmp (rs6000_sched_insert_nops_str
, "regroup_exact"))
4565 rs6000_sched_insert_nops
= sched_finish_regroup_exact
;
4567 rs6000_sched_insert_nops
= ((enum rs6000_nop_insertion
)
4568 atoi (rs6000_sched_insert_nops_str
));
4571 /* Handle stack protector */
4572 if (!OPTION_SET_P (rs6000_stack_protector_guard
))
4573 #ifdef TARGET_THREAD_SSP_OFFSET
4574 rs6000_stack_protector_guard
= SSP_TLS
;
4576 rs6000_stack_protector_guard
= SSP_GLOBAL
;
4579 #ifdef TARGET_THREAD_SSP_OFFSET
4580 rs6000_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
4581 rs6000_stack_protector_guard_reg
= TARGET_64BIT
? 13 : 2;
4584 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str
))
4587 const char *str
= rs6000_stack_protector_guard_offset_str
;
4590 long offset
= strtol (str
, &endp
, 0);
4591 if (!*str
|| *endp
|| errno
)
4592 error ("%qs is not a valid number in %qs", str
,
4593 "-mstack-protector-guard-offset=");
4595 if (!IN_RANGE (offset
, -0x8000, 0x7fff)
4596 || (TARGET_64BIT
&& (offset
& 3)))
4597 error ("%qs is not a valid offset in %qs", str
,
4598 "-mstack-protector-guard-offset=");
4600 rs6000_stack_protector_guard_offset
= offset
;
4603 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str
))
4605 const char *str
= rs6000_stack_protector_guard_reg_str
;
4606 int reg
= decode_reg_name (str
);
4608 if (!IN_RANGE (reg
, 1, 31))
4609 error ("%qs is not a valid base register in %qs", str
,
4610 "-mstack-protector-guard-reg=");
4612 rs6000_stack_protector_guard_reg
= reg
;
4615 if (rs6000_stack_protector_guard
== SSP_TLS
4616 && !IN_RANGE (rs6000_stack_protector_guard_reg
, 1, 31))
4617 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4621 #ifdef TARGET_REGNAMES
4622 /* If the user desires alternate register names, copy in the
4623 alternate names now. */
4624 if (TARGET_REGNAMES
)
4625 memcpy (rs6000_reg_names
, alt_reg_names
, sizeof (rs6000_reg_names
));
4628 /* Set aix_struct_return last, after the ABI is determined.
4629 If -maix-struct-return or -msvr4-struct-return was explicitly
4630 used, don't override with the ABI default. */
4631 if (!OPTION_SET_P (aix_struct_return
))
4632 aix_struct_return
= (DEFAULT_ABI
!= ABI_V4
|| DRAFT_V4_STRUCT_RET
);
4635 /* IBM XL compiler defaults to unsigned bitfields. */
4636 if (TARGET_XL_COMPAT
)
4637 flag_signed_bitfields
= 0;
4640 if (TARGET_LONG_DOUBLE_128
&& !TARGET_IEEEQUAD
)
4641 REAL_MODE_FORMAT (TFmode
) = &ibm_extended_format
;
4643 ASM_GENERATE_INTERNAL_LABEL (toc_label_name
, "LCTOC", 1);
4645 /* We can only guarantee the availability of DI pseudo-ops when
4646 assembling for 64-bit targets. */
4649 targetm
.asm_out
.aligned_op
.di
= NULL
;
4650 targetm
.asm_out
.unaligned_op
.di
= NULL
;
4654 /* Set branch target alignment, if not optimizing for size. */
4657 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4658 aligned 8byte to avoid misprediction by the branch predictor. */
4659 if (rs6000_tune
== PROCESSOR_TITAN
4660 || rs6000_tune
== PROCESSOR_CELL
)
4662 if (flag_align_functions
&& !str_align_functions
)
4663 str_align_functions
= "8";
4664 if (flag_align_jumps
&& !str_align_jumps
)
4665 str_align_jumps
= "8";
4666 if (flag_align_loops
&& !str_align_loops
)
4667 str_align_loops
= "8";
4669 if (rs6000_align_branch_targets
)
4671 if (flag_align_functions
&& !str_align_functions
)
4672 str_align_functions
= "16";
4673 if (flag_align_jumps
&& !str_align_jumps
)
4674 str_align_jumps
= "16";
4675 if (flag_align_loops
&& !str_align_loops
)
4677 can_override_loop_align
= 1;
4678 str_align_loops
= "16";
4683 /* Arrange to save and restore machine status around nested functions. */
4684 init_machine_status
= rs6000_init_machine_status
;
4686 /* We should always be splitting complex arguments, but we can't break
4687 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4688 if (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
4689 targetm
.calls
.split_complex_arg
= NULL
;
4691 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4692 if (DEFAULT_ABI
== ABI_AIX
)
4693 targetm
.calls
.custom_function_descriptors
= 0;
4696 /* Initialize rs6000_cost with the appropriate target costs. */
4698 rs6000_cost
= TARGET_POWERPC64
? &size64_cost
: &size32_cost
;
4700 switch (rs6000_tune
)
4702 case PROCESSOR_RS64A
:
4703 rs6000_cost
= &rs64a_cost
;
4706 case PROCESSOR_MPCCORE
:
4707 rs6000_cost
= &mpccore_cost
;
4710 case PROCESSOR_PPC403
:
4711 rs6000_cost
= &ppc403_cost
;
4714 case PROCESSOR_PPC405
:
4715 rs6000_cost
= &ppc405_cost
;
4718 case PROCESSOR_PPC440
:
4719 rs6000_cost
= &ppc440_cost
;
4722 case PROCESSOR_PPC476
:
4723 rs6000_cost
= &ppc476_cost
;
4726 case PROCESSOR_PPC601
:
4727 rs6000_cost
= &ppc601_cost
;
4730 case PROCESSOR_PPC603
:
4731 rs6000_cost
= &ppc603_cost
;
4734 case PROCESSOR_PPC604
:
4735 rs6000_cost
= &ppc604_cost
;
4738 case PROCESSOR_PPC604e
:
4739 rs6000_cost
= &ppc604e_cost
;
4742 case PROCESSOR_PPC620
:
4743 rs6000_cost
= &ppc620_cost
;
4746 case PROCESSOR_PPC630
:
4747 rs6000_cost
= &ppc630_cost
;
4750 case PROCESSOR_CELL
:
4751 rs6000_cost
= &ppccell_cost
;
4754 case PROCESSOR_PPC750
:
4755 case PROCESSOR_PPC7400
:
4756 rs6000_cost
= &ppc750_cost
;
4759 case PROCESSOR_PPC7450
:
4760 rs6000_cost
= &ppc7450_cost
;
4763 case PROCESSOR_PPC8540
:
4764 case PROCESSOR_PPC8548
:
4765 rs6000_cost
= &ppc8540_cost
;
4768 case PROCESSOR_PPCE300C2
:
4769 case PROCESSOR_PPCE300C3
:
4770 rs6000_cost
= &ppce300c2c3_cost
;
4773 case PROCESSOR_PPCE500MC
:
4774 rs6000_cost
= &ppce500mc_cost
;
4777 case PROCESSOR_PPCE500MC64
:
4778 rs6000_cost
= &ppce500mc64_cost
;
4781 case PROCESSOR_PPCE5500
:
4782 rs6000_cost
= &ppce5500_cost
;
4785 case PROCESSOR_PPCE6500
:
4786 rs6000_cost
= &ppce6500_cost
;
4789 case PROCESSOR_TITAN
:
4790 rs6000_cost
= &titan_cost
;
4793 case PROCESSOR_POWER4
:
4794 case PROCESSOR_POWER5
:
4795 rs6000_cost
= &power4_cost
;
4798 case PROCESSOR_POWER6
:
4799 rs6000_cost
= &power6_cost
;
4802 case PROCESSOR_POWER7
:
4803 rs6000_cost
= &power7_cost
;
4806 case PROCESSOR_POWER8
:
4807 rs6000_cost
= &power8_cost
;
4810 case PROCESSOR_POWER9
:
4811 rs6000_cost
= &power9_cost
;
4814 case PROCESSOR_POWER10
:
4815 rs6000_cost
= &power10_cost
;
4818 case PROCESSOR_PPCA2
:
4819 rs6000_cost
= &ppca2_cost
;
4828 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4829 param_simultaneous_prefetches
,
4830 rs6000_cost
->simultaneous_prefetches
);
4831 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4832 param_l1_cache_size
,
4833 rs6000_cost
->l1_cache_size
);
4834 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4835 param_l1_cache_line_size
,
4836 rs6000_cost
->cache_line_size
);
4837 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4838 param_l2_cache_size
,
4839 rs6000_cost
->l2_cache_size
);
4841 /* Increase loop peeling limits based on performance analysis. */
4842 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4843 param_max_peeled_insns
, 400);
4844 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4845 param_max_completely_peeled_insns
, 400);
4847 /* The lxvl/stxvl instructions don't perform well before Power10. */
4849 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4850 param_vect_partial_vector_usage
, 1);
4852 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4853 param_vect_partial_vector_usage
, 0);
4855 /* Use the 'model' -fsched-pressure algorithm by default. */
4856 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4857 param_sched_pressure_algorithm
,
4858 SCHED_PRESSURE_MODEL
);
4860 /* If using typedef char *va_list, signal that
4861 __builtin_va_start (&ap, 0) can be optimized to
4862 ap = __builtin_next_arg (0). */
4863 if (DEFAULT_ABI
!= ABI_V4
)
4864 targetm
.expand_builtin_va_start
= NULL
;
4867 rs6000_override_options_after_change ();
4869 /* If not explicitly specified via option, decide whether to generate indexed
4870 load/store instructions. A value of -1 indicates that the
4871 initial value of this variable has not been overwritten. During
4872 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4873 if (TARGET_AVOID_XFORM
== -1)
4874 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4875 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4876 need indexed accesses and the type used is the scalar type of the element
4877 being loaded or stored. */
4878 TARGET_AVOID_XFORM
= (rs6000_tune
== PROCESSOR_POWER6
&& TARGET_CMPB
4879 && !TARGET_ALTIVEC
);
4881 /* Set the -mrecip options. */
4882 if (rs6000_recip_name
)
4884 char *p
= ASTRDUP (rs6000_recip_name
);
4886 unsigned int mask
, i
;
4889 while ((q
= strtok (p
, ",")) != NULL
)
4900 if (!strcmp (q
, "default"))
4901 mask
= ((TARGET_RECIP_PRECISION
)
4902 ? RECIP_HIGH_PRECISION
: RECIP_LOW_PRECISION
);
4905 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4906 if (!strcmp (q
, recip_options
[i
].string
))
4908 mask
= recip_options
[i
].mask
;
4912 if (i
== ARRAY_SIZE (recip_options
))
4914 error ("unknown option for %<%s=%s%>", "-mrecip", q
);
4922 rs6000_recip_control
&= ~mask
;
4924 rs6000_recip_control
|= mask
;
4928 /* Set the builtin mask of the various options used that could affect which
4929 builtins were used. In the past we used target_flags, but we've run out
4930 of bits, and some options are no longer in target_flags. */
4931 rs6000_builtin_mask
= rs6000_builtin_mask_calculate ();
4932 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
4933 rs6000_print_builtin_options (stderr
, 0, "builtin mask",
4934 rs6000_builtin_mask
);
4936 /* Initialize all of the registers. */
4937 rs6000_init_hard_regno_mode_ok (global_init_p
);
4939 /* Save the initial options in case the user does function specific options */
4941 target_option_default_node
= target_option_current_node
4942 = build_target_option_node (&global_options
, &global_options_set
);
4944 /* If not explicitly specified via option, decide whether to generate the
4945 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4946 if (TARGET_LINK_STACK
== -1)
4947 SET_TARGET_LINK_STACK (rs6000_tune
== PROCESSOR_PPC476
&& flag_pic
);
4949 /* Deprecate use of -mno-speculate-indirect-jumps. */
4950 if (!rs6000_speculate_indirect_jumps
)
4951 warning (0, "%qs is deprecated and not recommended in any circumstances",
4952 "-mno-speculate-indirect-jumps");
4957 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4958 define the target cpu type. */
4961 rs6000_option_override (void)
4963 (void) rs6000_option_override_internal (true);
4967 /* Implement LOOP_ALIGN. */
4969 rs6000_loop_align (rtx label
)
4974 /* Don't override loop alignment if -falign-loops was specified. */
4975 if (!can_override_loop_align
)
4978 bb
= BLOCK_FOR_INSN (label
);
4979 ninsns
= num_loop_insns(bb
->loop_father
);
4981 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4982 if (ninsns
> 4 && ninsns
<= 8
4983 && (rs6000_tune
== PROCESSOR_POWER4
4984 || rs6000_tune
== PROCESSOR_POWER5
4985 || rs6000_tune
== PROCESSOR_POWER6
4986 || rs6000_tune
== PROCESSOR_POWER7
4987 || rs6000_tune
== PROCESSOR_POWER8
))
4988 return align_flags (5);
4993 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4994 after applying N number of iterations. This routine does not determine
4995 how may iterations are required to reach desired alignment. */
4998 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
5005 if (rs6000_alignment_flags
== MASK_ALIGN_NATURAL
)
5008 if (rs6000_alignment_flags
== MASK_ALIGN_POWER
)
5018 /* Assuming that all other types are naturally aligned. CHECKME! */
5023 /* Return true if the vector misalignment factor is supported by the
5026 rs6000_builtin_support_vector_misalignment (machine_mode mode
,
5033 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5036 /* Return if movmisalign pattern is not supported for this mode. */
5037 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
5040 if (misalignment
== -1)
5042 /* Misalignment factor is unknown at compile time but we know
5043 it's word aligned. */
5044 if (rs6000_vector_alignment_reachable (type
, is_packed
))
5046 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
5048 if (element_size
== 64 || element_size
== 32)
5055 /* VSX supports word-aligned vector. */
5056 if (misalignment
% 4 == 0)
5062 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5064 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
5065 tree vectype
, int misalign
)
5070 switch (type_of_cost
)
5078 case cond_branch_not_taken
:
5082 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5086 /* Power7 has only one permute unit, make it a bit expensive. */
5087 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5092 case vec_promote_demote
:
5093 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5094 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5099 case cond_branch_taken
:
5102 case unaligned_load
:
5103 case vector_gather_load
:
5104 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5105 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5108 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5110 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5111 /* See PR102767, consider V1TI to keep consistency. */
5112 if (elements
== 2 || elements
== 1)
5113 /* Double word aligned. */
5121 /* Double word aligned. */
5125 /* Unknown misalignment. */
5138 /* Misaligned loads are not supported. */
5141 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5144 case unaligned_store
:
5145 case vector_scatter_store
:
5146 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5149 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5151 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5152 /* See PR102767, consider V1TI to keep consistency. */
5153 if (elements
== 2 || elements
== 1)
5154 /* Double word aligned. */
5162 /* Double word aligned. */
5166 /* Unknown misalignment. */
5179 /* Misaligned stores are not supported. */
5185 /* This is a rough approximation assuming non-constant elements
5186 constructed into a vector via element insertion. FIXME:
5187 vec_construct is not granular enough for uniformly good
5188 decisions. If the initialization is a splat, this is
5189 cheaper than we estimate. Improve this someday. */
5190 elem_type
= TREE_TYPE (vectype
);
5191 /* 32-bit vectors loaded into registers are stored as double
5192 precision, so we need 2 permutes, 2 converts, and 1 merge
5193 to construct a vector of short floats from them. */
5194 if (SCALAR_FLOAT_TYPE_P (elem_type
)
5195 && TYPE_PRECISION (elem_type
) == 32)
5197 /* On POWER9, integer vector types are built up in GPRs and then
5198 use a direct move (2 cycles). For POWER8 this is even worse,
5199 as we need two direct moves and a merge, and the direct moves
5201 else if (INTEGRAL_TYPE_P (elem_type
))
5203 if (TARGET_P9_VECTOR
)
5204 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 2;
5206 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 5;
5209 /* V2DFmode doesn't need a direct move. */
5217 /* Implement targetm.vectorize.preferred_simd_mode. */
5220 rs6000_preferred_simd_mode (scalar_mode mode
)
5222 opt_machine_mode vmode
= mode_for_vector (mode
, 16 / GET_MODE_SIZE (mode
));
5224 if (vmode
.exists () && !VECTOR_MEM_NONE_P (vmode
.require ()))
5225 return vmode
.require ();
5230 class rs6000_cost_data
: public vector_costs
5233 using vector_costs::vector_costs
;
5235 unsigned int add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5236 stmt_vec_info stmt_info
, tree vectype
,
5238 vect_cost_model_location where
) override
;
5239 void finish_cost (const vector_costs
*) override
;
5242 void update_target_cost_per_stmt (vect_cost_for_stmt
, stmt_vec_info
,
5243 vect_cost_model_location
, unsigned int);
5244 void density_test (loop_vec_info
);
5245 void adjust_vect_cost_per_loop (loop_vec_info
);
5247 /* Total number of vectorized stmts (loop only). */
5248 unsigned m_nstmts
= 0;
5249 /* Total number of loads (loop only). */
5250 unsigned m_nloads
= 0;
5251 /* Possible extra penalized cost on vector construction (loop only). */
5252 unsigned m_extra_ctor_cost
= 0;
5253 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5254 instruction is needed by the vectorization. */
5255 bool m_vect_nonmem
= false;
5258 /* Test for likely overcommitment of vector hardware resources. If a
5259 loop iteration is relatively large, and too large a percentage of
5260 instructions in the loop are vectorized, the cost model may not
5261 adequately reflect delays from unavailable vector resources.
5262 Penalize the loop body cost for this case. */
5265 rs6000_cost_data::density_test (loop_vec_info loop_vinfo
)
5267 /* This density test only cares about the cost of vector version of the
5268 loop, so immediately return if we are passed costing for the scalar
5269 version (namely computing single scalar iteration cost). */
5270 if (m_costing_for_scalar
)
5273 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5274 basic_block
*bbs
= get_loop_body (loop
);
5275 int nbbs
= loop
->num_nodes
;
5276 int vec_cost
= m_costs
[vect_body
], not_vec_cost
= 0;
5278 for (int i
= 0; i
< nbbs
; i
++)
5280 basic_block bb
= bbs
[i
];
5281 gimple_stmt_iterator gsi
;
5283 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5285 gimple
*stmt
= gsi_stmt (gsi
);
5286 if (is_gimple_debug (stmt
))
5289 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
5291 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5292 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
5298 int density_pct
= (vec_cost
* 100) / (vec_cost
+ not_vec_cost
);
5300 if (density_pct
> rs6000_density_pct_threshold
5301 && vec_cost
+ not_vec_cost
> rs6000_density_size_threshold
)
5303 m_costs
[vect_body
] = vec_cost
* (100 + rs6000_density_penalty
) / 100;
5304 if (dump_enabled_p ())
5305 dump_printf_loc (MSG_NOTE
, vect_location
,
5306 "density %d%%, cost %d exceeds threshold, penalizing "
5307 "loop body cost by %u%%\n", density_pct
,
5308 vec_cost
+ not_vec_cost
, rs6000_density_penalty
);
5311 /* Check whether we need to penalize the body cost to account
5312 for excess strided or elementwise loads. */
5313 if (m_extra_ctor_cost
> 0)
5315 gcc_assert (m_nloads
<= m_nstmts
);
5316 unsigned int load_pct
= (m_nloads
* 100) / m_nstmts
;
5318 /* It's likely to be bounded by latency and execution resources
5319 from many scalar loads which are strided or elementwise loads
5320 into a vector if both conditions below are found:
5321 1. there are many loads, it's easy to result in a long wait
5323 2. load has a big proportion of all vectorized statements,
5324 it's not easy to schedule other statements to spread among
5326 One typical case is the innermost loop of the hotspot of SPEC2017
5327 503.bwaves_r without loop interchange. */
5328 if (m_nloads
> (unsigned int) rs6000_density_load_num_threshold
5329 && load_pct
> (unsigned int) rs6000_density_load_pct_threshold
)
5331 m_costs
[vect_body
] += m_extra_ctor_cost
;
5332 if (dump_enabled_p ())
5333 dump_printf_loc (MSG_NOTE
, vect_location
,
5334 "Found %u loads and "
5335 "load pct. %u%% exceed "
5337 "penalizing loop body "
5338 "cost by extra cost %u "
5346 /* Implement targetm.vectorize.create_costs. */
5348 static vector_costs
*
5349 rs6000_vectorize_create_costs (vec_info
*vinfo
, bool costing_for_scalar
)
5351 return new rs6000_cost_data (vinfo
, costing_for_scalar
);
5354 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5355 For some statement, we would like to further fine-grain tweak the cost on
5356 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5357 information on statement operation codes etc. One typical case here is
5358 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5359 for scalar cost, but it should be priced more whatever transformed to either
5360 compare + branch or compare + isel instructions. */
5363 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind
,
5364 struct _stmt_vec_info
*stmt_info
)
5366 if (kind
== scalar_stmt
&& stmt_info
&& stmt_info
->stmt
5367 && gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
5369 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
5370 if (subcode
== COND_EXPR
)
5377 /* Helper function for add_stmt_cost. Check each statement cost
5378 entry, gather information and update the target_cost fields
5381 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind
,
5382 stmt_vec_info stmt_info
,
5383 vect_cost_model_location where
,
5384 unsigned int orig_count
)
5387 /* Check whether we're doing something other than just a copy loop.
5388 Not all such loops may be profitably vectorized; see
5389 rs6000_finish_cost. */
5390 if (kind
== vec_to_scalar
5392 || kind
== vec_promote_demote
5393 || kind
== vec_construct
5394 || kind
== scalar_to_vec
5395 || (where
== vect_body
&& kind
== vector_stmt
))
5396 m_vect_nonmem
= true;
5398 /* Gather some information when we are costing the vectorized instruction
5399 for the statements located in a loop body. */
5400 if (!m_costing_for_scalar
5401 && is_a
<loop_vec_info
> (m_vinfo
)
5402 && where
== vect_body
)
5404 m_nstmts
+= orig_count
;
5406 if (kind
== scalar_load
|| kind
== vector_load
5407 || kind
== unaligned_load
|| kind
== vector_gather_load
)
5408 m_nloads
+= orig_count
;
5410 /* Power processors do not currently have instructions for strided
5411 and elementwise loads, and instead we must generate multiple
5412 scalar loads. This leads to undercounting of the cost. We
5413 account for this by scaling the construction cost by the number
5414 of elements involved, and saving this as extra cost that we may
5415 or may not need to apply. When finalizing the cost of the loop,
5416 the extra penalty is applied when the load density heuristics
5418 if (kind
== vec_construct
&& stmt_info
5419 && STMT_VINFO_TYPE (stmt_info
) == load_vec_info_type
5420 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_ELEMENTWISE
5421 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_STRIDED_SLP
))
5423 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5424 unsigned int nunits
= vect_nunits_for_cost (vectype
);
5425 /* As PR103702 shows, it's possible that vectorizer wants to do
5426 costings for only one unit here, it's no need to do any
5427 penalization for it, so simply early return here. */
5430 /* i386 port adopts nunits * stmt_cost as the penalized cost
5431 for this kind of penalization, we used to follow it but
5432 found it could result in an unreliable body cost especially
5433 for V16QI/V8HI modes. To make it better, we choose this
5434 new heuristic: for each scalar load, we use 2 as penalized
5435 cost for the case with 2 nunits and use 1 for the other
5436 cases. It's without much supporting theory, mainly
5437 concluded from the broad performance evaluations on Power8,
5438 Power9 and Power10. One possibly related point is that:
5439 vector construction for more units would use more insns,
5440 it has more chances to schedule them better (even run in
5441 parallelly when enough available units at that time), so
5442 it seems reasonable not to penalize that much for them. */
5443 unsigned int adjusted_cost
= (nunits
== 2) ? 2 : 1;
5444 unsigned int extra_cost
= nunits
* adjusted_cost
;
5445 m_extra_ctor_cost
+= extra_cost
;
5451 rs6000_cost_data::add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5452 stmt_vec_info stmt_info
, tree vectype
,
5453 int misalign
, vect_cost_model_location where
)
5455 unsigned retval
= 0;
5457 if (flag_vect_cost_model
)
5459 int stmt_cost
= rs6000_builtin_vectorization_cost (kind
, vectype
,
5461 stmt_cost
+= rs6000_adjust_vect_cost_per_stmt (kind
, stmt_info
);
5462 /* Statements in an inner loop relative to the loop being
5463 vectorized are weighted more heavily. The value here is
5464 arbitrary and could potentially be improved with analysis. */
5465 unsigned int orig_count
= count
;
5466 retval
= adjust_cost_for_freq (stmt_info
, where
, count
* stmt_cost
);
5467 m_costs
[where
] += retval
;
5469 update_target_cost_per_stmt (kind
, stmt_info
, where
, orig_count
);
5475 /* For some target specific vectorization cost which can't be handled per stmt,
5476 we check the requisite conditions and adjust the vectorization cost
5477 accordingly if satisfied. One typical example is to model shift cost for
5478 vector with length by counting number of required lengths under condition
5479 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5482 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo
)
5484 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
5486 rgroup_controls
*rgc
;
5487 unsigned int num_vectors_m1
;
5488 unsigned int shift_cnt
= 0;
5489 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo
), num_vectors_m1
, rgc
)
5491 /* Each length needs one shift to fill into bits 0-7. */
5492 shift_cnt
+= num_vectors_m1
+ 1;
5494 add_stmt_cost (shift_cnt
, scalar_stmt
, NULL
, NULL_TREE
, 0, vect_body
);
5499 rs6000_cost_data::finish_cost (const vector_costs
*scalar_costs
)
5501 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (m_vinfo
))
5503 adjust_vect_cost_per_loop (loop_vinfo
);
5504 density_test (loop_vinfo
);
5506 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5507 that require versioning for any reason. The vectorization is at
5508 best a wash inside the loop, and the versioning checks make
5509 profitability highly unlikely and potentially quite harmful. */
5511 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
) == 2
5512 && LOOP_REQUIRES_VERSIONING (loop_vinfo
))
5513 m_costs
[vect_body
] += 10000;
5516 vector_costs::finish_cost (scalar_costs
);
5519 /* Implement targetm.loop_unroll_adjust. */
5522 rs6000_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
5524 if (unroll_only_small_loops
)
5526 /* TODO: These are hardcoded values right now. We probably should use
5528 if (loop
->ninsns
<= 6)
5529 return MIN (4, nunroll
);
5530 if (loop
->ninsns
<= 10)
5531 return MIN (2, nunroll
);
5539 /* Returns a function decl for a vectorized version of the builtin function
5540 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5541 if it is not available.
5543 Implement targetm.vectorize.builtin_vectorized_function. */
5546 rs6000_builtin_vectorized_function (unsigned int fn
, tree type_out
,
5549 machine_mode in_mode
, out_mode
;
5552 if (TARGET_DEBUG_BUILTIN
)
5553 fprintf (stderr
, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5554 combined_fn_name (combined_fn (fn
)),
5555 GET_MODE_NAME (TYPE_MODE (type_out
)),
5556 GET_MODE_NAME (TYPE_MODE (type_in
)));
5558 /* TODO: Should this be gcc_assert? */
5559 if (TREE_CODE (type_out
) != VECTOR_TYPE
5560 || TREE_CODE (type_in
) != VECTOR_TYPE
)
5563 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5564 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
5565 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5566 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5571 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5572 && out_mode
== DFmode
&& out_n
== 2
5573 && in_mode
== DFmode
&& in_n
== 2)
5574 return rs6000_builtin_decls
[RS6000_BIF_CPSGNDP
];
5575 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5576 && out_mode
== SFmode
&& out_n
== 4
5577 && in_mode
== SFmode
&& in_n
== 4)
5578 return rs6000_builtin_decls
[RS6000_BIF_CPSGNSP
];
5579 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5580 && out_mode
== SFmode
&& out_n
== 4
5581 && in_mode
== SFmode
&& in_n
== 4)
5582 return rs6000_builtin_decls
[RS6000_BIF_COPYSIGN_V4SF
];
5585 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5586 && out_mode
== DFmode
&& out_n
== 2
5587 && in_mode
== DFmode
&& in_n
== 2)
5588 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIP
];
5589 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5590 && out_mode
== SFmode
&& out_n
== 4
5591 && in_mode
== SFmode
&& in_n
== 4)
5592 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIP
];
5593 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5594 && out_mode
== SFmode
&& out_n
== 4
5595 && in_mode
== SFmode
&& in_n
== 4)
5596 return rs6000_builtin_decls
[RS6000_BIF_VRFIP
];
5599 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5600 && out_mode
== DFmode
&& out_n
== 2
5601 && in_mode
== DFmode
&& in_n
== 2)
5602 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIM
];
5603 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5604 && out_mode
== SFmode
&& out_n
== 4
5605 && in_mode
== SFmode
&& in_n
== 4)
5606 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIM
];
5607 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5608 && out_mode
== SFmode
&& out_n
== 4
5609 && in_mode
== SFmode
&& in_n
== 4)
5610 return rs6000_builtin_decls
[RS6000_BIF_VRFIM
];
5613 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5614 && out_mode
== DFmode
&& out_n
== 2
5615 && in_mode
== DFmode
&& in_n
== 2)
5616 return rs6000_builtin_decls
[RS6000_BIF_XVMADDDP
];
5617 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5618 && out_mode
== SFmode
&& out_n
== 4
5619 && in_mode
== SFmode
&& in_n
== 4)
5620 return rs6000_builtin_decls
[RS6000_BIF_XVMADDSP
];
5621 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5622 && out_mode
== SFmode
&& out_n
== 4
5623 && in_mode
== SFmode
&& in_n
== 4)
5624 return rs6000_builtin_decls
[RS6000_BIF_VMADDFP
];
5627 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5628 && out_mode
== DFmode
&& out_n
== 2
5629 && in_mode
== DFmode
&& in_n
== 2)
5630 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIZ
];
5631 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5632 && out_mode
== SFmode
&& out_n
== 4
5633 && in_mode
== SFmode
&& in_n
== 4)
5634 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIZ
];
5635 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5636 && out_mode
== SFmode
&& out_n
== 4
5637 && in_mode
== SFmode
&& in_n
== 4)
5638 return rs6000_builtin_decls
[RS6000_BIF_VRFIZ
];
5641 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5642 && flag_unsafe_math_optimizations
5643 && out_mode
== DFmode
&& out_n
== 2
5644 && in_mode
== DFmode
&& in_n
== 2)
5645 return rs6000_builtin_decls
[RS6000_BIF_XVRDPI
];
5646 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5647 && flag_unsafe_math_optimizations
5648 && out_mode
== SFmode
&& out_n
== 4
5649 && in_mode
== SFmode
&& in_n
== 4)
5650 return rs6000_builtin_decls
[RS6000_BIF_XVRSPI
];
5653 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5654 && !flag_trapping_math
5655 && out_mode
== DFmode
&& out_n
== 2
5656 && in_mode
== DFmode
&& in_n
== 2)
5657 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIC
];
5658 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5659 && !flag_trapping_math
5660 && out_mode
== SFmode
&& out_n
== 4
5661 && in_mode
== SFmode
&& in_n
== 4)
5662 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIC
];
5668 /* Generate calls to libmass if appropriate. */
5669 if (rs6000_veclib_handler
)
5670 return rs6000_veclib_handler (combined_fn (fn
), type_out
, type_in
);
5675 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5676 library with vectorized intrinsics. */
5679 rs6000_builtin_vectorized_libmass (combined_fn fn
, tree type_out
,
5683 const char *suffix
= NULL
;
5684 tree fntype
, new_fndecl
, bdecl
= NULL_TREE
;
5687 machine_mode el_mode
, in_mode
;
5690 /* Libmass is suitable for unsafe math only as it does not correctly support
5691 parts of IEEE with the required precision such as denormals. Only support
5692 it if we have VSX to use the simd d2 or f4 functions.
5693 XXX: Add variable length support. */
5694 if (!flag_unsafe_math_optimizations
|| !TARGET_VSX
)
5697 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5698 n
= TYPE_VECTOR_SUBPARTS (type_out
);
5699 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5700 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5701 if (el_mode
!= in_mode
5737 if (el_mode
== DFmode
&& n
== 2)
5739 bdecl
= mathfn_built_in (double_type_node
, fn
);
5740 suffix
= "d2"; /* pow -> powd2 */
5742 else if (el_mode
== SFmode
&& n
== 4)
5744 bdecl
= mathfn_built_in (float_type_node
, fn
);
5745 suffix
= "4"; /* powf -> powf4 */
5757 gcc_assert (suffix
!= NULL
);
5758 bname
= IDENTIFIER_POINTER (DECL_NAME (bdecl
));
5762 strcpy (name
, bname
+ strlen ("__builtin_"));
5763 strcat (name
, suffix
);
5766 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
5767 else if (n_args
== 2)
5768 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
5772 /* Build a function declaration for the vectorized function. */
5773 new_fndecl
= build_decl (BUILTINS_LOCATION
,
5774 FUNCTION_DECL
, get_identifier (name
), fntype
);
5775 TREE_PUBLIC (new_fndecl
) = 1;
5776 DECL_EXTERNAL (new_fndecl
) = 1;
5777 DECL_IS_NOVOPS (new_fndecl
) = 1;
5778 TREE_READONLY (new_fndecl
) = 1;
5784 /* Default CPU string for rs6000*_file_start functions. */
5785 static const char *rs6000_default_cpu
;
5787 #ifdef USING_ELFOS_H
5788 const char *rs6000_machine
;
5791 rs6000_machine_from_flags (void)
5793 /* For some CPUs, the machine cannot be determined by ISA flags. We have to
5794 check them first. */
5797 case PROCESSOR_PPC8540
:
5798 case PROCESSOR_PPC8548
:
5801 case PROCESSOR_PPCE300C2
:
5802 case PROCESSOR_PPCE300C3
:
5805 case PROCESSOR_PPCE500MC
:
5808 case PROCESSOR_PPCE500MC64
:
5811 case PROCESSOR_PPCE5500
:
5814 case PROCESSOR_PPCE6500
:
5821 HOST_WIDE_INT flags
= rs6000_isa_flags
;
5823 /* Disable the flags that should never influence the .machine selection. */
5824 flags
&= ~(OPTION_MASK_PPC_GFXOPT
| OPTION_MASK_PPC_GPOPT
| OPTION_MASK_ISEL
);
5826 if ((flags
& (ISA_3_1_MASKS_SERVER
& ~ISA_3_0_MASKS_SERVER
)) != 0)
5828 if ((flags
& (ISA_3_0_MASKS_SERVER
& ~ISA_2_7_MASKS_SERVER
)) != 0)
5830 if ((flags
& (ISA_2_7_MASKS_SERVER
& ~ISA_2_6_MASKS_SERVER
)) != 0)
5832 if ((flags
& (ISA_2_6_MASKS_SERVER
& ~ISA_2_5_MASKS_SERVER
)) != 0)
5834 if ((flags
& (ISA_2_5_MASKS_SERVER
& ~ISA_2_4_MASKS
)) != 0)
5836 if ((flags
& (ISA_2_4_MASKS
& ~ISA_2_1_MASKS
)) != 0)
5838 if ((flags
& ISA_2_1_MASKS
) != 0)
5840 if ((flags
& OPTION_MASK_POWERPC64
) != 0)
5846 emit_asm_machine (void)
5848 fprintf (asm_out_file
, "\t.machine %s\n", rs6000_machine
);
5852 /* Do anything needed at the start of the asm file. */
5855 rs6000_file_start (void)
5858 const char *start
= buffer
;
5859 FILE *file
= asm_out_file
;
5861 rs6000_default_cpu
= TARGET_CPU_DEFAULT
;
5863 default_file_start ();
5865 if (flag_verbose_asm
)
5867 sprintf (buffer
, "\n%s rs6000/powerpc options:", ASM_COMMENT_START
);
5869 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
5871 fprintf (file
, "%s --with-cpu=%s", start
, rs6000_default_cpu
);
5875 if (OPTION_SET_P (rs6000_cpu_index
))
5877 fprintf (file
, "%s -mcpu=%s", start
,
5878 processor_target_table
[rs6000_cpu_index
].name
);
5882 if (OPTION_SET_P (rs6000_tune_index
))
5884 fprintf (file
, "%s -mtune=%s", start
,
5885 processor_target_table
[rs6000_tune_index
].name
);
5889 if (PPC405_ERRATUM77
)
5891 fprintf (file
, "%s PPC405CR_ERRATUM77", start
);
5895 #ifdef USING_ELFOS_H
5896 switch (rs6000_sdata
)
5898 case SDATA_NONE
: fprintf (file
, "%s -msdata=none", start
); start
= ""; break;
5899 case SDATA_DATA
: fprintf (file
, "%s -msdata=data", start
); start
= ""; break;
5900 case SDATA_SYSV
: fprintf (file
, "%s -msdata=sysv", start
); start
= ""; break;
5901 case SDATA_EABI
: fprintf (file
, "%s -msdata=eabi", start
); start
= ""; break;
5904 if (rs6000_sdata
&& g_switch_value
)
5906 fprintf (file
, "%s -G %d", start
,
5916 #ifdef USING_ELFOS_H
5917 rs6000_machine
= rs6000_machine_from_flags ();
5918 emit_asm_machine ();
5921 if (DEFAULT_ABI
== ABI_ELFv2
)
5922 fprintf (file
, "\t.abiversion 2\n");
5926 /* Return nonzero if this function is known to have a null epilogue. */
5929 direct_return (void)
5931 if (reload_completed
)
5933 rs6000_stack_t
*info
= rs6000_stack_info ();
5935 if (info
->first_gp_reg_save
== 32
5936 && info
->first_fp_reg_save
== 64
5937 && info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1
5938 && ! info
->lr_save_p
5939 && ! info
->cr_save_p
5940 && info
->vrsave_size
== 0
5948 /* Helper for num_insns_constant. Calculate number of instructions to
5949 load VALUE to a single gpr using combinations of addi, addis, ori,
5950 oris, sldi and rldimi instructions. */
5953 num_insns_constant_gpr (HOST_WIDE_INT value
)
5955 /* signed constant loadable with addi */
5956 if (SIGNED_INTEGER_16BIT_P (value
))
5959 /* constant loadable with addis */
5960 else if ((value
& 0xffff) == 0
5961 && (value
>> 31 == -1 || value
>> 31 == 0))
5964 /* PADDI can support up to 34 bit signed integers. */
5965 else if (TARGET_PREFIXED
&& SIGNED_INTEGER_34BIT_P (value
))
5968 else if (TARGET_POWERPC64
)
5970 HOST_WIDE_INT low
= ((value
& 0xffffffff) ^ 0x80000000) - 0x80000000;
5971 HOST_WIDE_INT high
= value
>> 31;
5973 if (high
== 0 || high
== -1)
5978 if (low
== 0 || low
== high
)
5979 return num_insns_constant_gpr (high
) + 1;
5981 return num_insns_constant_gpr (low
) + 1;
5983 return (num_insns_constant_gpr (high
)
5984 + num_insns_constant_gpr (low
) + 1);
5991 /* Helper for num_insns_constant. Allow constants formed by the
5992 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5993 and handle modes that require multiple gprs. */
5996 num_insns_constant_multi (HOST_WIDE_INT value
, machine_mode mode
)
5998 int nregs
= (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6002 HOST_WIDE_INT low
= sext_hwi (value
, BITS_PER_WORD
);
6003 int insns
= num_insns_constant_gpr (low
);
6005 /* We won't get more than 2 from num_insns_constant_gpr
6006 except when TARGET_POWERPC64 and mode is DImode or
6007 wider, so the register mode must be DImode. */
6008 && rs6000_is_valid_and_mask (GEN_INT (low
), DImode
))
6011 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6012 it all at once would be UB. */
6013 value
>>= (BITS_PER_WORD
- 1);
6019 /* Return the number of instructions it takes to form a constant in as
6020 many gprs are needed for MODE. */
6023 num_insns_constant (rtx op
, machine_mode mode
)
6027 switch (GET_CODE (op
))
6033 case CONST_WIDE_INT
:
6036 for (int i
= 0; i
< CONST_WIDE_INT_NUNITS (op
); i
++)
6037 insns
+= num_insns_constant_multi (CONST_WIDE_INT_ELT (op
, i
),
6044 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (op
);
6046 if (mode
== SFmode
|| mode
== SDmode
)
6051 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv
, l
);
6053 REAL_VALUE_TO_TARGET_SINGLE (*rv
, l
);
6054 /* See the first define_split in rs6000.md handling a
6055 const_double_operand. */
6059 else if (mode
== DFmode
|| mode
== DDmode
)
6064 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv
, l
);
6066 REAL_VALUE_TO_TARGET_DOUBLE (*rv
, l
);
6068 /* See the second (32-bit) and third (64-bit) define_split
6069 in rs6000.md handling a const_double_operand. */
6070 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 1] << 32;
6071 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffffUL
;
6074 else if (mode
== TFmode
|| mode
== TDmode
6075 || mode
== KFmode
|| mode
== IFmode
)
6081 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv
, l
);
6083 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv
, l
);
6085 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 3] << 32;
6086 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 2] & 0xffffffffUL
;
6087 insns
= num_insns_constant_multi (val
, DImode
);
6088 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 2 : 1] << 32;
6089 val
|= l
[WORDS_BIG_ENDIAN
? 3 : 0] & 0xffffffffUL
;
6090 insns
+= num_insns_constant_multi (val
, DImode
);
6102 return num_insns_constant_multi (val
, mode
);
6105 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6106 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6107 corresponding element of the vector, but for V4SFmode, the
6108 corresponding "float" is interpreted as an SImode integer. */
6111 const_vector_elt_as_int (rtx op
, unsigned int elt
)
6115 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6116 gcc_assert (GET_MODE (op
) != V2DImode
6117 && GET_MODE (op
) != V2DFmode
);
6119 tmp
= CONST_VECTOR_ELT (op
, elt
);
6120 if (GET_MODE (op
) == V4SFmode
)
6121 tmp
= gen_lowpart (SImode
, tmp
);
6122 return INTVAL (tmp
);
6125 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6126 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6127 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6128 all items are set to the same value and contain COPIES replicas of the
6129 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6130 operand and the others are set to the value of the operand's msb. */
6133 vspltis_constant (rtx op
, unsigned step
, unsigned copies
)
6135 machine_mode mode
= GET_MODE (op
);
6136 machine_mode inner
= GET_MODE_INNER (mode
);
6144 HOST_WIDE_INT splat_val
;
6145 HOST_WIDE_INT msb_val
;
6147 if (mode
== V2DImode
|| mode
== V2DFmode
|| mode
== V1TImode
)
6150 nunits
= GET_MODE_NUNITS (mode
);
6151 bitsize
= GET_MODE_BITSIZE (inner
);
6152 mask
= GET_MODE_MASK (inner
);
6154 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6156 msb_val
= val
>= 0 ? 0 : -1;
6158 if (val
== 0 && step
> 1)
6160 /* Special case for loading most significant bit with step > 1.
6161 In that case, match 0s in all but step-1s elements, where match
6163 for (i
= 1; i
< nunits
; ++i
)
6165 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6166 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6167 if ((i
& (step
- 1)) == step
- 1)
6169 if (!EASY_VECTOR_MSB (elt_val
, inner
))
6179 /* Construct the value to be splatted, if possible. If not, return 0. */
6180 for (i
= 2; i
<= copies
; i
*= 2)
6182 HOST_WIDE_INT small_val
;
6184 small_val
= splat_val
>> bitsize
;
6186 if (splat_val
!= ((HOST_WIDE_INT
)
6187 ((unsigned HOST_WIDE_INT
) small_val
<< bitsize
)
6188 | (small_val
& mask
)))
6190 splat_val
= small_val
;
6191 inner
= smallest_int_mode_for_size (bitsize
);
6194 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6195 if (EASY_VECTOR_15 (splat_val
))
6198 /* Also check if we can splat, and then add the result to itself. Do so if
6199 the value is positive, of if the splat instruction is using OP's mode;
6200 for splat_val < 0, the splat and the add should use the same mode. */
6201 else if (EASY_VECTOR_15_ADD_SELF (splat_val
)
6202 && (splat_val
>= 0 || (step
== 1 && copies
== 1)))
6205 /* Also check if are loading up the most significant bit which can be done by
6206 loading up -1 and shifting the value left by -1. Only do this for
6207 step 1 here, for larger steps it is done earlier. */
6208 else if (EASY_VECTOR_MSB (splat_val
, inner
) && step
== 1)
6214 /* Check if VAL is present in every STEP-th element, and the
6215 other elements are filled with its most significant bit. */
6216 for (i
= 1; i
< nunits
; ++i
)
6218 HOST_WIDE_INT desired_val
;
6219 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6220 if ((i
& (step
- 1)) == 0)
6223 desired_val
= msb_val
;
6225 if (desired_val
!= const_vector_elt_as_int (op
, elt
))
6232 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6233 instruction, filling in the bottom elements with 0 or -1.
6235 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6236 for the number of zeroes to shift in, or negative for the number of 0xff
6239 OP is a CONST_VECTOR. */
6242 vspltis_shifted (rtx op
)
6244 machine_mode mode
= GET_MODE (op
);
6245 machine_mode inner
= GET_MODE_INNER (mode
);
6253 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
)
6256 /* We need to create pseudo registers to do the shift, so don't recognize
6257 shift vector constants after reload. */
6258 if (!can_create_pseudo_p ())
6261 nunits
= GET_MODE_NUNITS (mode
);
6262 mask
= GET_MODE_MASK (inner
);
6264 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? 0 : nunits
- 1);
6266 /* Check if the value can really be the operand of a vspltis[bhw]. */
6267 if (EASY_VECTOR_15 (val
))
6270 /* Also check if we are loading up the most significant bit which can be done
6271 by loading up -1 and shifting the value left by -1. */
6272 else if (EASY_VECTOR_MSB (val
, inner
))
6278 /* Check if VAL is present in every STEP-th element until we find elements
6279 that are 0 or all 1 bits. */
6280 for (i
= 1; i
< nunits
; ++i
)
6282 unsigned elt
= BYTES_BIG_ENDIAN
? i
: nunits
- 1 - i
;
6283 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6285 /* If the value isn't the splat value, check for the remaining elements
6291 for (j
= i
+1; j
< nunits
; ++j
)
6293 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6294 if (const_vector_elt_as_int (op
, elt2
) != 0)
6298 return (nunits
- i
) * GET_MODE_SIZE (inner
);
6301 else if ((elt_val
& mask
) == mask
)
6303 for (j
= i
+1; j
< nunits
; ++j
)
6305 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6306 if ((const_vector_elt_as_int (op
, elt2
) & mask
) != mask
)
6310 return -((nunits
- i
) * GET_MODE_SIZE (inner
));
6318 /* If all elements are equal, we don't need to do VSLDOI. */
6323 /* Return non-zero (element mode byte size) if OP is of the given MODE
6324 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6327 easy_altivec_constant (rtx op
, machine_mode mode
)
6329 unsigned step
, copies
;
6331 if (mode
== VOIDmode
)
6332 mode
= GET_MODE (op
);
6333 else if (mode
!= GET_MODE (op
))
6336 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6338 if (mode
== V2DFmode
)
6339 return zero_constant (op
, mode
) ? 8 : 0;
6341 else if (mode
== V2DImode
)
6343 if (!CONST_INT_P (CONST_VECTOR_ELT (op
, 0))
6344 || !CONST_INT_P (CONST_VECTOR_ELT (op
, 1)))
6347 if (zero_constant (op
, mode
))
6350 if (INTVAL (CONST_VECTOR_ELT (op
, 0)) == -1
6351 && INTVAL (CONST_VECTOR_ELT (op
, 1)) == -1)
6357 /* V1TImode is a special container for TImode. Ignore for now. */
6358 else if (mode
== V1TImode
)
6361 /* Start with a vspltisw. */
6362 step
= GET_MODE_NUNITS (mode
) / 4;
6365 if (vspltis_constant (op
, step
, copies
))
6368 /* Then try with a vspltish. */
6374 if (vspltis_constant (op
, step
, copies
))
6377 /* And finally a vspltisb. */
6383 if (vspltis_constant (op
, step
, copies
))
6386 if (vspltis_shifted (op
) != 0)
6387 return GET_MODE_SIZE (GET_MODE_INNER (mode
));
6392 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6393 result is OP. Abort if it is not possible. */
6396 gen_easy_altivec_constant (rtx op
)
6398 machine_mode mode
= GET_MODE (op
);
6399 int nunits
= GET_MODE_NUNITS (mode
);
6400 rtx val
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6401 unsigned step
= nunits
/ 4;
6402 unsigned copies
= 1;
6404 /* Start with a vspltisw. */
6405 if (vspltis_constant (op
, step
, copies
))
6406 return gen_rtx_VEC_DUPLICATE (V4SImode
, gen_lowpart (SImode
, val
));
6408 /* Then try with a vspltish. */
6414 if (vspltis_constant (op
, step
, copies
))
6415 return gen_rtx_VEC_DUPLICATE (V8HImode
, gen_lowpart (HImode
, val
));
6417 /* And finally a vspltisb. */
6423 if (vspltis_constant (op
, step
, copies
))
6424 return gen_rtx_VEC_DUPLICATE (V16QImode
, gen_lowpart (QImode
, val
));
6429 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6430 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6432 Return the number of instructions needed (1 or 2) into the address pointed
6435 Return the constant that is being split via CONSTANT_PTR. */
6438 xxspltib_constant_p (rtx op
,
6443 size_t nunits
= GET_MODE_NUNITS (mode
);
6445 HOST_WIDE_INT value
;
6448 /* Set the returned values to out of bound values. */
6449 *num_insns_ptr
= -1;
6450 *constant_ptr
= 256;
6452 if (!TARGET_P9_VECTOR
)
6455 if (mode
== VOIDmode
)
6456 mode
= GET_MODE (op
);
6458 else if (mode
!= GET_MODE (op
) && GET_MODE (op
) != VOIDmode
)
6461 /* Handle (vec_duplicate <constant>). */
6462 if (GET_CODE (op
) == VEC_DUPLICATE
)
6464 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6465 && mode
!= V2DImode
)
6468 element
= XEXP (op
, 0);
6469 if (!CONST_INT_P (element
))
6472 value
= INTVAL (element
);
6473 if (!IN_RANGE (value
, -128, 127))
6477 /* Handle (const_vector [...]). */
6478 else if (GET_CODE (op
) == CONST_VECTOR
)
6480 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6481 && mode
!= V2DImode
)
6484 element
= CONST_VECTOR_ELT (op
, 0);
6485 if (!CONST_INT_P (element
))
6488 value
= INTVAL (element
);
6489 if (!IN_RANGE (value
, -128, 127))
6492 for (i
= 1; i
< nunits
; i
++)
6494 element
= CONST_VECTOR_ELT (op
, i
);
6495 if (!CONST_INT_P (element
))
6498 if (value
!= INTVAL (element
))
6503 /* Handle integer constants being loaded into the upper part of the VSX
6504 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6505 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6506 else if (CONST_INT_P (op
))
6508 if (!SCALAR_INT_MODE_P (mode
))
6511 value
= INTVAL (op
);
6512 if (!IN_RANGE (value
, -128, 127))
6515 if (!IN_RANGE (value
, -1, 0))
6517 if (!(reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
))
6520 if (EASY_VECTOR_15 (value
))
6528 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6529 sign extend. Special case 0/-1 to allow getting any VSX register instead
6530 of an Altivec register. */
6531 if ((mode
== V4SImode
|| mode
== V8HImode
) && !IN_RANGE (value
, -1, 0)
6532 && EASY_VECTOR_15 (value
))
6535 /* Return # of instructions and the constant byte for XXSPLTIB. */
6536 if (mode
== V16QImode
)
6539 else if (IN_RANGE (value
, -1, 0))
6542 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6543 single XXSPLTIW or XXSPLTIDP instruction. */
6544 else if (vsx_prefixed_constant (op
, mode
))
6547 /* Return XXSPLITB followed by a sign extend operation to convert the
6548 constant to V8HImode or V4SImode. */
6552 *constant_ptr
= (int) value
;
6557 output_vec_const_move (rtx
*operands
)
6565 mode
= GET_MODE (dest
);
6569 bool dest_vmx_p
= ALTIVEC_REGNO_P (REGNO (dest
));
6570 int xxspltib_value
= 256;
6573 if (zero_constant (vec
, mode
))
6575 if (TARGET_P9_VECTOR
)
6576 return "xxspltib %x0,0";
6578 else if (dest_vmx_p
)
6579 return "vspltisw %0,0";
6582 return "xxlxor %x0,%x0,%x0";
6585 if (all_ones_constant (vec
, mode
))
6587 if (TARGET_P9_VECTOR
)
6588 return "xxspltib %x0,255";
6590 else if (dest_vmx_p
)
6591 return "vspltisw %0,-1";
6593 else if (TARGET_P8_VECTOR
)
6594 return "xxlorc %x0,%x0,%x0";
6600 vec_const_128bit_type vsx_const
;
6601 if (TARGET_POWER10
&& vec_const_128bit_to_bytes (vec
, mode
, &vsx_const
))
6603 unsigned imm
= constant_generates_lxvkq (&vsx_const
);
6606 operands
[2] = GEN_INT (imm
);
6607 return "lxvkq %x0,%2";
6610 imm
= constant_generates_xxspltiw (&vsx_const
);
6613 operands
[2] = GEN_INT (imm
);
6614 return "xxspltiw %x0,%2";
6617 imm
= constant_generates_xxspltidp (&vsx_const
);
6620 operands
[2] = GEN_INT (imm
);
6621 return "xxspltidp %x0,%2";
6625 if (TARGET_P9_VECTOR
6626 && xxspltib_constant_p (vec
, mode
, &num_insns
, &xxspltib_value
))
6630 operands
[2] = GEN_INT (xxspltib_value
& 0xff);
6631 return "xxspltib %x0,%2";
6642 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest
)));
6643 if (zero_constant (vec
, mode
))
6644 return "vspltisw %0,0";
6646 if (all_ones_constant (vec
, mode
))
6647 return "vspltisw %0,-1";
6649 /* Do we need to construct a value using VSLDOI? */
6650 shift
= vspltis_shifted (vec
);
6654 splat_vec
= gen_easy_altivec_constant (vec
);
6655 gcc_assert (GET_CODE (splat_vec
) == VEC_DUPLICATE
);
6656 operands
[1] = XEXP (splat_vec
, 0);
6657 if (!EASY_VECTOR_15 (INTVAL (operands
[1])))
6660 switch (GET_MODE (splat_vec
))
6663 return "vspltisw %0,%1";
6666 return "vspltish %0,%1";
6669 return "vspltisb %0,%1";
6679 /* Initialize vector TARGET to VALS. */
6682 rs6000_expand_vector_init (rtx target
, rtx vals
)
6684 machine_mode mode
= GET_MODE (target
);
6685 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6686 unsigned int n_elts
= GET_MODE_NUNITS (mode
);
6687 int n_var
= 0, one_var
= -1;
6688 bool all_same
= true, all_const_zero
= true;
6692 for (i
= 0; i
< n_elts
; ++i
)
6694 x
= XVECEXP (vals
, 0, i
);
6695 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
6696 ++n_var
, one_var
= i
;
6697 else if (x
!= CONST0_RTX (inner_mode
))
6698 all_const_zero
= false;
6700 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6706 rtx const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6707 bool int_vector_p
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
6708 if ((int_vector_p
|| TARGET_VSX
) && all_const_zero
)
6710 /* Zero register. */
6711 emit_move_insn (target
, CONST0_RTX (mode
));
6714 else if (int_vector_p
&& easy_vector_constant (const_vec
, mode
))
6716 /* Splat immediate. */
6717 emit_insn (gen_rtx_SET (target
, const_vec
));
6722 /* Load from constant pool. */
6723 emit_move_insn (target
, const_vec
);
6728 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6729 if (VECTOR_MEM_VSX_P (mode
) && (mode
== V2DFmode
|| mode
== V2DImode
))
6733 size_t num_elements
= all_same
? 1 : 2;
6734 for (i
= 0; i
< num_elements
; i
++)
6736 op
[i
] = XVECEXP (vals
, 0, i
);
6737 /* Just in case there is a SUBREG with a smaller mode, do a
6739 if (GET_MODE (op
[i
]) != inner_mode
)
6741 rtx tmp
= gen_reg_rtx (inner_mode
);
6742 convert_move (tmp
, op
[i
], 0);
6745 /* Allow load with splat double word. */
6746 else if (MEM_P (op
[i
]))
6749 op
[i
] = force_reg (inner_mode
, op
[i
]);
6751 else if (!REG_P (op
[i
]))
6752 op
[i
] = force_reg (inner_mode
, op
[i
]);
6757 if (mode
== V2DFmode
)
6758 emit_insn (gen_vsx_splat_v2df (target
, op
[0]));
6760 emit_insn (gen_vsx_splat_v2di (target
, op
[0]));
6764 if (mode
== V2DFmode
)
6765 emit_insn (gen_vsx_concat_v2df (target
, op
[0], op
[1]));
6767 emit_insn (gen_vsx_concat_v2di (target
, op
[0], op
[1]));
6772 /* Special case initializing vector int if we are on 64-bit systems with
6773 direct move or we have the ISA 3.0 instructions. */
6774 if (mode
== V4SImode
&& VECTOR_MEM_VSX_P (V4SImode
)
6775 && TARGET_DIRECT_MOVE_64BIT
)
6779 rtx element0
= XVECEXP (vals
, 0, 0);
6780 if (MEM_P (element0
))
6781 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6783 element0
= force_reg (SImode
, element0
);
6785 if (TARGET_P9_VECTOR
)
6786 emit_insn (gen_vsx_splat_v4si (target
, element0
));
6789 rtx tmp
= gen_reg_rtx (DImode
);
6790 emit_insn (gen_zero_extendsidi2 (tmp
, element0
));
6791 emit_insn (gen_vsx_splat_v4si_di (target
, tmp
));
6800 for (i
= 0; i
< 4; i
++)
6801 elements
[i
] = force_reg (SImode
, XVECEXP (vals
, 0, i
));
6803 emit_insn (gen_vsx_init_v4si (target
, elements
[0], elements
[1],
6804 elements
[2], elements
[3]));
6809 /* With single precision floating point on VSX, know that internally single
6810 precision is actually represented as a double, and either make 2 V2DF
6811 vectors, and convert these vectors to single precision, or do one
6812 conversion, and splat the result to the other elements. */
6813 if (mode
== V4SFmode
&& VECTOR_MEM_VSX_P (V4SFmode
))
6817 rtx element0
= XVECEXP (vals
, 0, 0);
6819 if (TARGET_P9_VECTOR
)
6821 if (MEM_P (element0
))
6822 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6824 emit_insn (gen_vsx_splat_v4sf (target
, element0
));
6829 rtx freg
= gen_reg_rtx (V4SFmode
);
6830 rtx sreg
= force_reg (SFmode
, element0
);
6831 rtx cvt
= (TARGET_XSCVDPSPN
6832 ? gen_vsx_xscvdpspn_scalar (freg
, sreg
)
6833 : gen_vsx_xscvdpsp_scalar (freg
, sreg
));
6836 emit_insn (gen_vsx_xxspltw_v4sf_direct (target
, freg
,
6842 if (TARGET_P8_VECTOR
&& TARGET_POWERPC64
)
6848 for (i
= 0; i
< 4; i
++)
6850 tmp_si
[i
] = gen_reg_rtx (SImode
);
6851 tmp_di
[i
] = gen_reg_rtx (DImode
);
6852 mrg_di
[i
] = gen_reg_rtx (DImode
);
6853 tmp_sf
[i
] = force_reg (SFmode
, XVECEXP (vals
, 0, i
));
6854 emit_insn (gen_movsi_from_sf (tmp_si
[i
], tmp_sf
[i
]));
6855 emit_insn (gen_zero_extendsidi2 (tmp_di
[i
], tmp_si
[i
]));
6858 if (!BYTES_BIG_ENDIAN
)
6860 std::swap (tmp_di
[0], tmp_di
[1]);
6861 std::swap (tmp_di
[2], tmp_di
[3]);
6864 emit_insn (gen_ashldi3 (mrg_di
[0], tmp_di
[0], GEN_INT (32)));
6865 emit_insn (gen_iordi3 (mrg_di
[1], mrg_di
[0], tmp_di
[1]));
6866 emit_insn (gen_ashldi3 (mrg_di
[2], tmp_di
[2], GEN_INT (32)));
6867 emit_insn (gen_iordi3 (mrg_di
[3], mrg_di
[2], tmp_di
[3]));
6869 rtx tmp_v2di
= gen_reg_rtx (V2DImode
);
6870 emit_insn (gen_vsx_concat_v2di (tmp_v2di
, mrg_di
[1], mrg_di
[3]));
6871 emit_move_insn (target
, gen_lowpart (V4SFmode
, tmp_v2di
));
6875 rtx dbl_even
= gen_reg_rtx (V2DFmode
);
6876 rtx dbl_odd
= gen_reg_rtx (V2DFmode
);
6877 rtx flt_even
= gen_reg_rtx (V4SFmode
);
6878 rtx flt_odd
= gen_reg_rtx (V4SFmode
);
6879 rtx op0
= force_reg (SFmode
, XVECEXP (vals
, 0, 0));
6880 rtx op1
= force_reg (SFmode
, XVECEXP (vals
, 0, 1));
6881 rtx op2
= force_reg (SFmode
, XVECEXP (vals
, 0, 2));
6882 rtx op3
= force_reg (SFmode
, XVECEXP (vals
, 0, 3));
6884 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op1
));
6885 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op2
, op3
));
6886 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
6887 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
6888 rs6000_expand_extract_even (target
, flt_even
, flt_odd
);
6894 /* Special case initializing vector short/char that are splats if we are on
6895 64-bit systems with direct move. */
6896 if (all_same
&& TARGET_DIRECT_MOVE_64BIT
6897 && (mode
== V16QImode
|| mode
== V8HImode
))
6899 rtx op0
= XVECEXP (vals
, 0, 0);
6900 rtx di_tmp
= gen_reg_rtx (DImode
);
6903 op0
= force_reg (GET_MODE_INNER (mode
), op0
);
6905 if (mode
== V16QImode
)
6907 emit_insn (gen_zero_extendqidi2 (di_tmp
, op0
));
6908 emit_insn (gen_vsx_vspltb_di (target
, di_tmp
));
6912 if (mode
== V8HImode
)
6914 emit_insn (gen_zero_extendhidi2 (di_tmp
, op0
));
6915 emit_insn (gen_vsx_vsplth_di (target
, di_tmp
));
6920 /* Store value to stack temp. Load vector element. Splat. However, splat
6921 of 64-bit items is not supported on Altivec. */
6922 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
6924 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
6925 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0),
6926 XVECEXP (vals
, 0, 0));
6927 x
= gen_rtx_UNSPEC (VOIDmode
,
6928 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
6929 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
6931 gen_rtx_SET (target
, mem
),
6933 x
= gen_rtx_VEC_SELECT (inner_mode
, target
,
6934 gen_rtx_PARALLEL (VOIDmode
,
6935 gen_rtvec (1, const0_rtx
)));
6936 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
6940 /* One field is non-constant. Load constant then overwrite
6944 rtx copy
= copy_rtx (vals
);
6946 /* Load constant part of vector, substitute neighboring value for
6948 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
6949 rs6000_expand_vector_init (target
, copy
);
6951 /* Insert variable. */
6952 rs6000_expand_vector_set (target
, XVECEXP (vals
, 0, one_var
),
6957 if (TARGET_DIRECT_MOVE
&& (mode
== V16QImode
|| mode
== V8HImode
))
6960 /* Force the values into word_mode registers. */
6961 for (i
= 0; i
< n_elts
; i
++)
6963 rtx tmp
= force_reg (inner_mode
, XVECEXP (vals
, 0, i
));
6964 machine_mode tmode
= TARGET_POWERPC64
? DImode
: SImode
;
6965 op
[i
] = simplify_gen_subreg (tmode
, tmp
, inner_mode
, 0);
6968 /* Take unsigned char big endianness on 64bit as example for below
6969 construction, the input values are: A, B, C, D, ..., O, P. */
6971 if (TARGET_DIRECT_MOVE_128
)
6973 /* Move to VSX register with vec_concat, each has 2 values.
6974 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
6975 vr1[1] = { xxxxxxxC, xxxxxxxD };
6977 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
6979 for (i
= 0; i
< n_elts
/ 2; i
++)
6981 vr1
[i
] = gen_reg_rtx (V2DImode
);
6982 emit_insn (gen_vsx_concat_v2di (vr1
[i
], op
[i
* 2],
6986 /* Pack vectors with 2 values into vectors with 4 values.
6987 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
6988 vr2[1] = { xxxExxxF, xxxGxxxH };
6989 vr2[1] = { xxxIxxxJ, xxxKxxxL };
6990 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
6992 for (i
= 0; i
< n_elts
/ 4; i
++)
6994 vr2
[i
] = gen_reg_rtx (V4SImode
);
6995 emit_insn (gen_altivec_vpkudum (vr2
[i
], vr1
[i
* 2],
6999 /* Pack vectors with 4 values into vectors with 8 values.
7000 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7001 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7003 for (i
= 0; i
< n_elts
/ 8; i
++)
7005 vr3
[i
] = gen_reg_rtx (V8HImode
);
7006 emit_insn (gen_altivec_vpkuwum (vr3
[i
], vr2
[i
* 2],
7010 /* If it's V8HImode, it's done and return it. */
7011 if (mode
== V8HImode
)
7013 emit_insn (gen_rtx_SET (target
, vr3
[0]));
7017 /* Pack vectors with 8 values into 16 values. */
7018 rtx res
= gen_reg_rtx (V16QImode
);
7019 emit_insn (gen_altivec_vpkuhum (res
, vr3
[0], vr3
[1]));
7020 emit_insn (gen_rtx_SET (target
, res
));
7024 rtx (*merge_v16qi
) (rtx
, rtx
, rtx
) = NULL
;
7025 rtx (*merge_v8hi
) (rtx
, rtx
, rtx
) = NULL
;
7026 rtx (*merge_v4si
) (rtx
, rtx
, rtx
) = NULL
;
7029 /* Set up some common gen routines and values. */
7030 if (BYTES_BIG_ENDIAN
)
7032 if (mode
== V16QImode
)
7034 merge_v16qi
= gen_altivec_vmrghb
;
7035 merge_v8hi
= gen_altivec_vmrglh
;
7038 merge_v8hi
= gen_altivec_vmrghh
;
7040 merge_v4si
= gen_altivec_vmrglw
;
7041 perm_idx
= GEN_INT (3);
7045 if (mode
== V16QImode
)
7047 merge_v16qi
= gen_altivec_vmrglb
;
7048 merge_v8hi
= gen_altivec_vmrghh
;
7051 merge_v8hi
= gen_altivec_vmrglh
;
7053 merge_v4si
= gen_altivec_vmrghw
;
7054 perm_idx
= GEN_INT (0);
7057 /* Move to VSX register with direct move.
7058 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7059 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7061 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7063 for (i
= 0; i
< n_elts
; i
++)
7065 vr_qi
[i
] = gen_reg_rtx (V16QImode
);
7066 if (TARGET_POWERPC64
)
7067 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi
[i
], op
[i
]));
7069 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi
[i
], op
[i
]));
7072 /* Merge/move to vector short.
7073 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7074 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7076 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7078 for (i
= 0; i
< 8; i
++)
7081 if (mode
== V16QImode
)
7083 tmp
= gen_reg_rtx (V16QImode
);
7084 emit_insn (merge_v16qi (tmp
, vr_qi
[2 * i
], vr_qi
[2 * i
+ 1]));
7086 vr_hi
[i
] = gen_reg_rtx (V8HImode
);
7087 emit_move_insn (vr_hi
[i
], gen_lowpart (V8HImode
, tmp
));
7090 /* Merge vector short to vector int.
7091 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7092 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7094 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7096 for (i
= 0; i
< 4; i
++)
7098 rtx tmp
= gen_reg_rtx (V8HImode
);
7099 emit_insn (merge_v8hi (tmp
, vr_hi
[2 * i
], vr_hi
[2 * i
+ 1]));
7100 vr_si
[i
] = gen_reg_rtx (V4SImode
);
7101 emit_move_insn (vr_si
[i
], gen_lowpart (V4SImode
, tmp
));
7104 /* Merge vector int to vector long.
7105 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7106 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7108 for (i
= 0; i
< 2; i
++)
7110 rtx tmp
= gen_reg_rtx (V4SImode
);
7111 emit_insn (merge_v4si (tmp
, vr_si
[2 * i
], vr_si
[2 * i
+ 1]));
7112 vr_di
[i
] = gen_reg_rtx (V2DImode
);
7113 emit_move_insn (vr_di
[i
], gen_lowpart (V2DImode
, tmp
));
7116 rtx res
= gen_reg_rtx (V2DImode
);
7117 emit_insn (gen_vsx_xxpermdi_v2di (res
, vr_di
[0], vr_di
[1], perm_idx
));
7118 emit_insn (gen_rtx_SET (target
, gen_lowpart (mode
, res
)));
7124 /* Construct the vector in memory one field at a time
7125 and load the whole vector. */
7126 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7127 for (i
= 0; i
< n_elts
; i
++)
7128 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7129 i
* GET_MODE_SIZE (inner_mode
)),
7130 XVECEXP (vals
, 0, i
));
7131 emit_move_insn (target
, mem
);
7134 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7135 is variable and also counts by vector element size for p9 and above. */
7138 rs6000_expand_vector_set_var_p9 (rtx target
, rtx val
, rtx idx
)
7140 machine_mode mode
= GET_MODE (target
);
7142 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7144 machine_mode inner_mode
= GET_MODE (val
);
7146 int width
= GET_MODE_SIZE (inner_mode
);
7148 gcc_assert (width
>= 1 && width
<= 8);
7150 int shift
= exact_log2 (width
);
7152 machine_mode idx_mode
= GET_MODE (idx
);
7154 machine_mode shift_mode
;
7155 rtx (*gen_ashl
)(rtx
, rtx
, rtx
);
7156 rtx (*gen_lvsl
)(rtx
, rtx
);
7157 rtx (*gen_lvsr
)(rtx
, rtx
);
7159 if (TARGET_POWERPC64
)
7161 shift_mode
= DImode
;
7162 gen_ashl
= gen_ashldi3
;
7163 gen_lvsl
= gen_altivec_lvsl_reg_di
;
7164 gen_lvsr
= gen_altivec_lvsr_reg_di
;
7168 shift_mode
= SImode
;
7169 gen_ashl
= gen_ashlsi3
;
7170 gen_lvsl
= gen_altivec_lvsl_reg_si
;
7171 gen_lvsr
= gen_altivec_lvsr_reg_si
;
7173 /* Generate the IDX for permute shift, width is the vector element size.
7174 idx = idx * width. */
7175 rtx tmp
= gen_reg_rtx (shift_mode
);
7176 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7178 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7180 /* lvsr v1,0,idx. */
7181 rtx pcvr
= gen_reg_rtx (V16QImode
);
7182 emit_insn (gen_lvsr (pcvr
, tmp
));
7184 /* lvsl v2,0,idx. */
7185 rtx pcvl
= gen_reg_rtx (V16QImode
);
7186 emit_insn (gen_lvsl (pcvl
, tmp
));
7188 rtx sub_target
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7191 = gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
, pcvr
);
7194 rs6000_expand_vector_set (target
, val
, const0_rtx
);
7197 = gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
, pcvl
);
7201 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7202 is variable and also counts by vector element size for p7 & p8. */
7205 rs6000_expand_vector_set_var_p7 (rtx target
, rtx val
, rtx idx
)
7207 machine_mode mode
= GET_MODE (target
);
7209 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7211 machine_mode inner_mode
= GET_MODE (val
);
7212 HOST_WIDE_INT mode_mask
= GET_MODE_MASK (inner_mode
);
7214 int width
= GET_MODE_SIZE (inner_mode
);
7215 gcc_assert (width
>= 1 && width
<= 4);
7217 int shift
= exact_log2 (width
);
7219 machine_mode idx_mode
= GET_MODE (idx
);
7221 machine_mode shift_mode
;
7222 rtx (*gen_ashl
)(rtx
, rtx
, rtx
);
7223 rtx (*gen_add
)(rtx
, rtx
, rtx
);
7224 rtx (*gen_sub
)(rtx
, rtx
, rtx
);
7225 rtx (*gen_lvsl
)(rtx
, rtx
);
7227 if (TARGET_POWERPC64
)
7229 shift_mode
= DImode
;
7230 gen_ashl
= gen_ashldi3
;
7231 gen_add
= gen_adddi3
;
7232 gen_sub
= gen_subdi3
;
7233 gen_lvsl
= gen_altivec_lvsl_reg_di
;
7237 shift_mode
= SImode
;
7238 gen_ashl
= gen_ashlsi3
;
7239 gen_add
= gen_addsi3
;
7240 gen_sub
= gen_subsi3
;
7241 gen_lvsl
= gen_altivec_lvsl_reg_si
;
7244 /* idx = idx * width. */
7245 rtx tmp
= gen_reg_rtx (shift_mode
);
7246 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7248 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7250 /* For LE: idx = idx + 8. */
7251 if (!BYTES_BIG_ENDIAN
)
7252 emit_insn (gen_add (tmp
, tmp
, GEN_INT (8)));
7254 emit_insn (gen_sub (tmp
, GEN_INT (24 - width
), tmp
));
7257 DImode: 0xffffffffffffffff0000000000000000
7258 SImode: 0x00000000ffffffff0000000000000000
7259 HImode: 0x000000000000ffff0000000000000000.
7260 QImode: 0x00000000000000ff0000000000000000. */
7261 rtx mask
= gen_reg_rtx (V16QImode
);
7262 rtx mask_v2di
= gen_reg_rtx (V2DImode
);
7263 rtvec v
= rtvec_alloc (2);
7264 if (!BYTES_BIG_ENDIAN
)
7266 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7267 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7271 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7272 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7274 emit_insn (gen_vec_initv2didi (mask_v2di
, gen_rtx_PARALLEL (V2DImode
, v
)));
7275 rtx sub_mask
= simplify_gen_subreg (V16QImode
, mask_v2di
, V2DImode
, 0);
7276 emit_insn (gen_rtx_SET (mask
, sub_mask
));
7278 /* mtvsrd[wz] f0,tmp_val. */
7279 rtx tmp_val
= gen_reg_rtx (SImode
);
7280 if (inner_mode
== E_SFmode
)
7281 if (TARGET_DIRECT_MOVE_64BIT
)
7282 emit_insn (gen_movsi_from_sf (tmp_val
, val
));
7285 rtx stack
= rs6000_allocate_stack_temp (SFmode
, false, true);
7286 emit_insn (gen_movsf_hardfloat (stack
, val
));
7287 rtx stack2
= copy_rtx (stack
);
7288 PUT_MODE (stack2
, SImode
);
7289 emit_move_insn (tmp_val
, stack2
);
7292 tmp_val
= force_reg (SImode
, val
);
7294 rtx val_v16qi
= gen_reg_rtx (V16QImode
);
7295 rtx val_v2di
= gen_reg_rtx (V2DImode
);
7296 rtvec vec_val
= rtvec_alloc (2);
7297 if (!BYTES_BIG_ENDIAN
)
7299 RTVEC_ELT (vec_val
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7300 RTVEC_ELT (vec_val
, 1) = tmp_val
;
7304 RTVEC_ELT (vec_val
, 0) = tmp_val
;
7305 RTVEC_ELT (vec_val
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7308 gen_vec_initv2didi (val_v2di
, gen_rtx_PARALLEL (V2DImode
, vec_val
)));
7309 rtx sub_val
= simplify_gen_subreg (V16QImode
, val_v2di
, V2DImode
, 0);
7310 emit_insn (gen_rtx_SET (val_v16qi
, sub_val
));
7312 /* lvsl 13,0,idx. */
7313 rtx pcv
= gen_reg_rtx (V16QImode
);
7314 emit_insn (gen_lvsl (pcv
, tmp
));
7316 /* vperm 1,1,1,13. */
7317 /* vperm 0,0,0,13. */
7318 rtx val_perm
= gen_reg_rtx (V16QImode
);
7319 rtx mask_perm
= gen_reg_rtx (V16QImode
);
7320 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm
, val_v16qi
, val_v16qi
, pcv
));
7321 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm
, mask
, mask
, pcv
));
7323 rtx target_v16qi
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7325 /* xxsel 34,34,32,33. */
7327 gen_vector_select_v16qi (target_v16qi
, target_v16qi
, val_perm
, mask_perm
));
7330 /* Set field ELT_RTX of TARGET to VAL. */
7333 rs6000_expand_vector_set (rtx target
, rtx val
, rtx elt_rtx
)
7335 machine_mode mode
= GET_MODE (target
);
7336 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7337 rtx reg
= gen_reg_rtx (mode
);
7339 int width
= GET_MODE_SIZE (inner_mode
);
7342 val
= force_reg (GET_MODE (val
), val
);
7344 if (VECTOR_MEM_VSX_P (mode
))
7346 if (!CONST_INT_P (elt_rtx
))
7348 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7349 when elt_rtx is variable. */
7350 if ((TARGET_P9_VECTOR
&& TARGET_POWERPC64
) || width
== 8)
7352 rs6000_expand_vector_set_var_p9 (target
, val
, elt_rtx
);
7355 else if (TARGET_VSX
)
7357 rs6000_expand_vector_set_var_p7 (target
, val
, elt_rtx
);
7361 gcc_assert (CONST_INT_P (elt_rtx
));
7364 rtx insn
= NULL_RTX
;
7366 if (mode
== V2DFmode
)
7367 insn
= gen_vsx_set_v2df (target
, target
, val
, elt_rtx
);
7369 else if (mode
== V2DImode
)
7370 insn
= gen_vsx_set_v2di (target
, target
, val
, elt_rtx
);
7372 else if (TARGET_P9_VECTOR
&& TARGET_POWERPC64
)
7374 if (mode
== V4SImode
)
7375 insn
= gen_vsx_set_v4si_p9 (target
, target
, val
, elt_rtx
);
7376 else if (mode
== V8HImode
)
7377 insn
= gen_vsx_set_v8hi_p9 (target
, target
, val
, elt_rtx
);
7378 else if (mode
== V16QImode
)
7379 insn
= gen_vsx_set_v16qi_p9 (target
, target
, val
, elt_rtx
);
7380 else if (mode
== V4SFmode
)
7381 insn
= gen_vsx_set_v4sf_p9 (target
, target
, val
, elt_rtx
);
7391 /* Simplify setting single element vectors like V1TImode. */
7392 if (GET_MODE_SIZE (mode
) == GET_MODE_SIZE (inner_mode
)
7393 && INTVAL (elt_rtx
) == 0)
7395 emit_move_insn (target
, gen_lowpart (mode
, val
));
7399 /* Load single variable value. */
7400 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7401 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0), val
);
7402 x
= gen_rtx_UNSPEC (VOIDmode
,
7403 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7404 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7406 gen_rtx_SET (reg
, mem
),
7409 /* Linear sequence. */
7410 mask
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
7411 for (i
= 0; i
< 16; ++i
)
7412 XVECEXP (mask
, 0, i
) = GEN_INT (i
);
7414 /* Set permute mask to insert element into target. */
7415 for (i
= 0; i
< width
; ++i
)
7416 XVECEXP (mask
, 0, INTVAL (elt_rtx
) * width
+ i
) = GEN_INT (i
+ 0x10);
7417 x
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (mask
, 0));
7419 if (BYTES_BIG_ENDIAN
)
7420 x
= gen_rtx_UNSPEC (mode
,
7421 gen_rtvec (3, target
, reg
,
7422 force_reg (V16QImode
, x
)),
7426 if (TARGET_P9_VECTOR
)
7427 x
= gen_rtx_UNSPEC (mode
,
7428 gen_rtvec (3, reg
, target
,
7429 force_reg (V16QImode
, x
)),
7433 /* Invert selector. We prefer to generate VNAND on P8 so
7434 that future fusion opportunities can kick in, but must
7435 generate VNOR elsewhere. */
7436 rtx notx
= gen_rtx_NOT (V16QImode
, force_reg (V16QImode
, x
));
7437 rtx iorx
= (TARGET_P8_VECTOR
7438 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
7439 : gen_rtx_AND (V16QImode
, notx
, notx
));
7440 rtx tmp
= gen_reg_rtx (V16QImode
);
7441 emit_insn (gen_rtx_SET (tmp
, iorx
));
7443 /* Permute with operands reversed and adjusted selector. */
7444 x
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, reg
, target
, tmp
),
7449 emit_insn (gen_rtx_SET (target
, x
));
7452 /* Extract field ELT from VEC into TARGET. */
7455 rs6000_expand_vector_extract (rtx target
, rtx vec
, rtx elt
)
7457 machine_mode mode
= GET_MODE (vec
);
7458 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7461 if (VECTOR_MEM_VSX_P (mode
) && CONST_INT_P (elt
))
7468 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7471 emit_insn (gen_vsx_extract_v2df (target
, vec
, elt
));
7474 emit_insn (gen_vsx_extract_v2di (target
, vec
, elt
));
7477 emit_insn (gen_vsx_extract_v4sf (target
, vec
, elt
));
7480 if (TARGET_DIRECT_MOVE_64BIT
)
7482 emit_insn (gen_vsx_extract_v16qi (target
, vec
, elt
));
7488 if (TARGET_DIRECT_MOVE_64BIT
)
7490 emit_insn (gen_vsx_extract_v8hi (target
, vec
, elt
));
7496 if (TARGET_DIRECT_MOVE_64BIT
)
7498 emit_insn (gen_vsx_extract_v4si (target
, vec
, elt
));
7504 else if (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (elt
)
7505 && TARGET_DIRECT_MOVE_64BIT
)
7507 if (GET_MODE (elt
) != DImode
)
7509 rtx tmp
= gen_reg_rtx (DImode
);
7510 convert_move (tmp
, elt
, 0);
7513 else if (!REG_P (elt
))
7514 elt
= force_reg (DImode
, elt
);
7519 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7523 emit_insn (gen_vsx_extract_v2df_var (target
, vec
, elt
));
7527 emit_insn (gen_vsx_extract_v2di_var (target
, vec
, elt
));
7531 emit_insn (gen_vsx_extract_v4sf_var (target
, vec
, elt
));
7535 emit_insn (gen_vsx_extract_v4si_var (target
, vec
, elt
));
7539 emit_insn (gen_vsx_extract_v8hi_var (target
, vec
, elt
));
7543 emit_insn (gen_vsx_extract_v16qi_var (target
, vec
, elt
));
7551 /* Allocate mode-sized buffer. */
7552 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7554 emit_move_insn (mem
, vec
);
7555 if (CONST_INT_P (elt
))
7557 int modulo_elt
= INTVAL (elt
) % GET_MODE_NUNITS (mode
);
7559 /* Add offset to field within buffer matching vector element. */
7560 mem
= adjust_address_nv (mem
, inner_mode
,
7561 modulo_elt
* GET_MODE_SIZE (inner_mode
));
7562 emit_move_insn (target
, adjust_address_nv (mem
, inner_mode
, 0));
7566 unsigned int ele_size
= GET_MODE_SIZE (inner_mode
);
7567 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (mode
) - 1);
7568 rtx new_addr
= gen_reg_rtx (Pmode
);
7570 elt
= gen_rtx_AND (Pmode
, elt
, num_ele_m1
);
7572 elt
= gen_rtx_MULT (Pmode
, elt
, GEN_INT (ele_size
));
7573 new_addr
= gen_rtx_PLUS (Pmode
, XEXP (mem
, 0), elt
);
7574 new_addr
= change_address (mem
, inner_mode
, new_addr
);
7575 emit_move_insn (target
, new_addr
);
7579 /* Return the offset within a memory object (MEM) of a vector type to a given
7580 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7581 the element is constant, we return a constant integer.
7583 Otherwise, we use a base register temporary to calculate the offset after
7584 masking it to fit within the bounds of the vector and scaling it. The
7585 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7586 built-in function. */
7589 get_vector_offset (rtx mem
, rtx element
, rtx base_tmp
, unsigned scalar_size
)
7591 if (CONST_INT_P (element
))
7592 return GEN_INT (INTVAL (element
) * scalar_size
);
7594 /* All insns should use the 'Q' constraint (address is a single register) if
7595 the element number is not a constant. */
7596 gcc_assert (satisfies_constraint_Q (mem
));
7598 /* Mask the element to make sure the element number is between 0 and the
7599 maximum number of elements - 1 so that we don't generate an address
7600 outside the vector. */
7601 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (GET_MODE (mem
)) - 1);
7602 rtx and_op
= gen_rtx_AND (Pmode
, element
, num_ele_m1
);
7603 emit_insn (gen_rtx_SET (base_tmp
, and_op
));
7605 /* Shift the element to get the byte offset from the element number. */
7606 int shift
= exact_log2 (scalar_size
);
7607 gcc_assert (shift
>= 0);
7611 rtx shift_op
= gen_rtx_ASHIFT (Pmode
, base_tmp
, GEN_INT (shift
));
7612 emit_insn (gen_rtx_SET (base_tmp
, shift_op
));
7618 /* Helper function update PC-relative addresses when we are adjusting a memory
7619 address (ADDR) to a vector to point to a scalar field within the vector with
7620 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7621 use the base register temporary (BASE_TMP) to form the address. */
7624 adjust_vec_address_pcrel (rtx addr
, rtx element_offset
, rtx base_tmp
)
7626 rtx new_addr
= NULL
;
7628 gcc_assert (CONST_INT_P (element_offset
));
7630 if (GET_CODE (addr
) == CONST
)
7631 addr
= XEXP (addr
, 0);
7633 if (GET_CODE (addr
) == PLUS
)
7635 rtx op0
= XEXP (addr
, 0);
7636 rtx op1
= XEXP (addr
, 1);
7638 if (CONST_INT_P (op1
))
7640 HOST_WIDE_INT offset
7641 = INTVAL (XEXP (addr
, 1)) + INTVAL (element_offset
);
7648 rtx plus
= gen_rtx_PLUS (Pmode
, op0
, GEN_INT (offset
));
7649 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7655 emit_move_insn (base_tmp
, addr
);
7656 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7660 else if (SYMBOL_REF_P (addr
) || LABEL_REF_P (addr
))
7662 rtx plus
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7663 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7672 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7673 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7674 temporary (BASE_TMP) to fixup the address. Return the new memory address
7675 that is valid for reads or writes to a given register (SCALAR_REG).
7677 This function is expected to be called after reload is completed when we are
7678 splitting insns. The temporary BASE_TMP might be set multiple times with
7682 rs6000_adjust_vec_address (rtx scalar_reg
,
7686 machine_mode scalar_mode
)
7688 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7689 rtx addr
= XEXP (mem
, 0);
7692 gcc_assert (!reg_mentioned_p (base_tmp
, addr
));
7693 gcc_assert (!reg_mentioned_p (base_tmp
, element
));
7695 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7696 gcc_assert (GET_RTX_CLASS (GET_CODE (addr
)) != RTX_AUTOINC
);
7698 /* Calculate what we need to add to the address to get the element
7700 rtx element_offset
= get_vector_offset (mem
, element
, base_tmp
, scalar_size
);
7702 /* Create the new address pointing to the element within the vector. If we
7703 are adding 0, we don't have to change the address. */
7704 if (element_offset
== const0_rtx
)
7707 /* A simple indirect address can be converted into a reg + offset
7709 else if (REG_P (addr
) || SUBREG_P (addr
))
7710 new_addr
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7712 /* For references to local static variables, fold a constant offset into the
7714 else if (pcrel_local_address (addr
, Pmode
) && CONST_INT_P (element_offset
))
7715 new_addr
= adjust_vec_address_pcrel (addr
, element_offset
, base_tmp
);
7717 /* Optimize D-FORM addresses with constant offset with a constant element, to
7718 include the element offset in the address directly. */
7719 else if (GET_CODE (addr
) == PLUS
)
7721 rtx op0
= XEXP (addr
, 0);
7722 rtx op1
= XEXP (addr
, 1);
7724 gcc_assert (REG_P (op0
) || SUBREG_P (op0
));
7725 if (CONST_INT_P (op1
) && CONST_INT_P (element_offset
))
7727 /* op0 should never be r0, because r0+offset is not valid. But it
7728 doesn't hurt to make sure it is not r0. */
7729 gcc_assert (reg_or_subregno (op0
) != 0);
7731 /* D-FORM address with constant element number. */
7732 HOST_WIDE_INT offset
= INTVAL (op1
) + INTVAL (element_offset
);
7733 rtx offset_rtx
= GEN_INT (offset
);
7734 new_addr
= gen_rtx_PLUS (Pmode
, op0
, offset_rtx
);
7738 /* If we don't have a D-FORM address with a constant element number,
7739 add the two elements in the current address. Then add the offset.
7741 Previously, we tried to add the offset to OP1 and change the
7742 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7743 complicated because we had to verify that op1 was not GPR0 and we
7744 had a constant element offset (due to the way ADDI is defined).
7745 By doing the add of OP0 and OP1 first, and then adding in the
7746 offset, it has the benefit that if D-FORM instructions are
7747 allowed, the offset is part of the memory access to the vector
7749 emit_insn (gen_rtx_SET (base_tmp
, gen_rtx_PLUS (Pmode
, op0
, op1
)));
7750 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7756 emit_move_insn (base_tmp
, addr
);
7757 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7760 /* If the address isn't valid, move the address into the temporary base
7761 register. Some reasons it could not be valid include:
7763 The address offset overflowed the 16 or 34 bit offset size;
7764 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7765 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7766 Only X_FORM loads can be done, and the address is D_FORM. */
7768 enum insn_form iform
7769 = address_to_insn_form (new_addr
, scalar_mode
,
7770 reg_to_non_prefixed (scalar_reg
, scalar_mode
));
7772 if (iform
== INSN_FORM_BAD
)
7774 emit_move_insn (base_tmp
, new_addr
);
7775 new_addr
= base_tmp
;
7778 return change_address (mem
, scalar_mode
, new_addr
);
7781 /* Split a variable vec_extract operation into the component instructions. */
7784 rs6000_split_vec_extract_var (rtx dest
, rtx src
, rtx element
, rtx tmp_gpr
,
7787 machine_mode mode
= GET_MODE (src
);
7788 machine_mode scalar_mode
= GET_MODE_INNER (GET_MODE (src
));
7789 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7790 int byte_shift
= exact_log2 (scalar_size
);
7792 gcc_assert (byte_shift
>= 0);
7794 /* If we are given a memory address, optimize to load just the element. We
7795 don't have to adjust the vector element number on little endian
7799 emit_move_insn (dest
,
7800 rs6000_adjust_vec_address (dest
, src
, element
, tmp_gpr
,
7805 else if (REG_P (src
) || SUBREG_P (src
))
7807 int num_elements
= GET_MODE_NUNITS (mode
);
7808 int bits_in_element
= mode_to_bits (GET_MODE_INNER (mode
));
7809 int bit_shift
= 7 - exact_log2 (num_elements
);
7811 unsigned int dest_regno
= reg_or_subregno (dest
);
7812 unsigned int src_regno
= reg_or_subregno (src
);
7813 unsigned int element_regno
= reg_or_subregno (element
);
7815 gcc_assert (REG_P (tmp_gpr
));
7817 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7818 a general purpose register. */
7819 if (TARGET_P9_VECTOR
7820 && (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
7821 && INT_REGNO_P (dest_regno
)
7822 && ALTIVEC_REGNO_P (src_regno
)
7823 && INT_REGNO_P (element_regno
))
7825 rtx dest_si
= gen_rtx_REG (SImode
, dest_regno
);
7826 rtx element_si
= gen_rtx_REG (SImode
, element_regno
);
7828 if (mode
== V16QImode
)
7829 emit_insn (BYTES_BIG_ENDIAN
7830 ? gen_vextublx (dest_si
, element_si
, src
)
7831 : gen_vextubrx (dest_si
, element_si
, src
));
7833 else if (mode
== V8HImode
)
7835 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7836 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const1_rtx
));
7837 emit_insn (BYTES_BIG_ENDIAN
7838 ? gen_vextuhlx (dest_si
, tmp_gpr_si
, src
)
7839 : gen_vextuhrx (dest_si
, tmp_gpr_si
, src
));
7845 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7846 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const2_rtx
));
7847 emit_insn (BYTES_BIG_ENDIAN
7848 ? gen_vextuwlx (dest_si
, tmp_gpr_si
, src
)
7849 : gen_vextuwrx (dest_si
, tmp_gpr_si
, src
));
7856 gcc_assert (REG_P (tmp_altivec
));
7858 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7859 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7860 will shift the element into the upper position (adding 3 to convert a
7861 byte shift into a bit shift). */
7862 if (scalar_size
== 8)
7864 if (!BYTES_BIG_ENDIAN
)
7866 emit_insn (gen_xordi3 (tmp_gpr
, element
, const1_rtx
));
7872 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7874 emit_insn (gen_rtx_SET (tmp_gpr
,
7875 gen_rtx_AND (DImode
,
7876 gen_rtx_ASHIFT (DImode
,
7883 if (!BYTES_BIG_ENDIAN
)
7885 rtx num_ele_m1
= GEN_INT (num_elements
- 1);
7887 emit_insn (gen_anddi3 (tmp_gpr
, element
, num_ele_m1
));
7888 emit_insn (gen_subdi3 (tmp_gpr
, num_ele_m1
, tmp_gpr
));
7894 emit_insn (gen_ashldi3 (tmp_gpr
, element2
, GEN_INT (bit_shift
)));
7897 /* Get the value into the lower byte of the Altivec register where VSLO
7899 if (TARGET_P9_VECTOR
)
7900 emit_insn (gen_vsx_splat_v2di (tmp_altivec
, tmp_gpr
));
7901 else if (can_create_pseudo_p ())
7902 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_gpr
, tmp_gpr
));
7905 rtx tmp_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7906 emit_move_insn (tmp_di
, tmp_gpr
);
7907 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_di
, tmp_di
));
7910 /* Do the VSLO to get the value into the final location. */
7914 emit_insn (gen_vsx_vslo_v2df (dest
, src
, tmp_altivec
));
7918 emit_insn (gen_vsx_vslo_v2di (dest
, src
, tmp_altivec
));
7923 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7924 rtx tmp_altivec_v4sf
= gen_rtx_REG (V4SFmode
, REGNO (tmp_altivec
));
7925 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
7926 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
7929 emit_insn (gen_vsx_xscvspdp_scalar2 (dest
, tmp_altivec_v4sf
));
7937 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7938 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
7939 rtx tmp_gpr_di
= gen_rtx_REG (DImode
, REGNO (dest
));
7940 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
7942 emit_move_insn (tmp_gpr_di
, tmp_altivec_di
);
7943 emit_insn (gen_lshrdi3 (tmp_gpr_di
, tmp_gpr_di
,
7944 GEN_INT (64 - bits_in_element
)));
7958 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7959 selects whether the alignment is abi mandated, optional, or
7960 both abi and optional alignment. */
7963 rs6000_data_alignment (tree type
, unsigned int align
, enum data_align how
)
7965 if (how
!= align_opt
)
7967 if (TREE_CODE (type
) == VECTOR_TYPE
&& align
< 128)
7971 if (how
!= align_abi
)
7973 if (TREE_CODE (type
) == ARRAY_TYPE
7974 && TYPE_MODE (TREE_TYPE (type
)) == QImode
)
7976 if (align
< BITS_PER_WORD
)
7977 align
= BITS_PER_WORD
;
7984 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7985 instructions simply ignore the low bits; VSX memory instructions
7986 are aligned to 4 or 8 bytes. */
7989 rs6000_slow_unaligned_access (machine_mode mode
, unsigned int align
)
7991 return (STRICT_ALIGNMENT
7992 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7993 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && align
< 32)
7994 || ((VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
))
7995 && (int) align
< VECTOR_ALIGN (mode
)))));
7998 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
8001 rs6000_special_adjust_field_align (tree type
, unsigned int computed
)
8003 if (computed
<= 32 || TYPE_PACKED (type
))
8006 /* Strip initial arrays. */
8007 while (TREE_CODE (type
) == ARRAY_TYPE
)
8008 type
= TREE_TYPE (type
);
8010 /* If RECORD or UNION, recursively find the first field. */
8011 while (AGGREGATE_TYPE_P (type
))
8013 tree field
= TYPE_FIELDS (type
);
8015 /* Skip all non field decls */
8016 while (field
!= NULL
8017 && (TREE_CODE (field
) != FIELD_DECL
8018 || DECL_FIELD_ABI_IGNORED (field
)))
8019 field
= DECL_CHAIN (field
);
8024 /* A packed field does not contribute any extra alignment. */
8025 if (DECL_PACKED (field
))
8028 type
= TREE_TYPE (field
);
8031 while (TREE_CODE (type
) == ARRAY_TYPE
)
8032 type
= TREE_TYPE (type
);
8035 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8036 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8037 computed
= MIN (computed
, 32);
8042 /* AIX increases natural record alignment to doubleword if the innermost first
8043 field is an FP double while the FP fields remain word aligned.
8044 Only called if TYPE initially is a RECORD or UNION. */
8047 rs6000_special_round_type_align (tree type
, unsigned int computed
,
8048 unsigned int specified
)
8050 unsigned int align
= MAX (computed
, specified
);
8052 if (TYPE_PACKED (type
) || align
>= 64)
8055 /* If RECORD or UNION, recursively find the first field. */
8058 tree field
= TYPE_FIELDS (type
);
8060 /* Skip all non field decls */
8061 while (field
!= NULL
8062 && (TREE_CODE (field
) != FIELD_DECL
8063 || DECL_FIELD_ABI_IGNORED (field
)))
8064 field
= DECL_CHAIN (field
);
8069 /* A packed field does not contribute any extra alignment. */
8070 if (DECL_PACKED (field
))
8073 type
= TREE_TYPE (field
);
8076 while (TREE_CODE (type
) == ARRAY_TYPE
)
8077 type
= TREE_TYPE (type
);
8078 } while (AGGREGATE_TYPE_P (type
));
8080 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8081 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8082 align
= MAX (align
, 64);
8087 /* Darwin increases record alignment to the natural alignment of
8091 darwin_rs6000_special_round_type_align (tree type
, unsigned int computed
,
8092 unsigned int specified
)
8094 unsigned int align
= MAX (computed
, specified
);
8096 if (TYPE_PACKED (type
))
8099 /* Find the first field, looking down into aggregates. */
8101 tree field
= TYPE_FIELDS (type
);
8102 /* Skip all non field decls */
8103 while (field
!= NULL
8104 && (TREE_CODE (field
) != FIELD_DECL
8105 || DECL_FIELD_ABI_IGNORED (field
)))
8106 field
= DECL_CHAIN (field
);
8109 /* A packed field does not contribute any extra alignment. */
8110 if (DECL_PACKED (field
))
8112 type
= TREE_TYPE (field
);
8113 while (TREE_CODE (type
) == ARRAY_TYPE
)
8114 type
= TREE_TYPE (type
);
8115 } while (AGGREGATE_TYPE_P (type
));
8117 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
)
8118 align
= MAX (align
, TYPE_ALIGN (type
));
8123 /* Return 1 for an operand in small memory on V.4/eabi. */
8126 small_data_operand (rtx op ATTRIBUTE_UNUSED
,
8127 machine_mode mode ATTRIBUTE_UNUSED
)
8132 if (rs6000_sdata
== SDATA_NONE
|| rs6000_sdata
== SDATA_DATA
)
8135 if (DEFAULT_ABI
!= ABI_V4
)
8138 if (SYMBOL_REF_P (op
))
8141 else if (GET_CODE (op
) != CONST
8142 || GET_CODE (XEXP (op
, 0)) != PLUS
8143 || !SYMBOL_REF_P (XEXP (XEXP (op
, 0), 0))
8144 || !CONST_INT_P (XEXP (XEXP (op
, 0), 1)))
8149 rtx sum
= XEXP (op
, 0);
8150 HOST_WIDE_INT summand
;
8152 /* We have to be careful here, because it is the referenced address
8153 that must be 32k from _SDA_BASE_, not just the symbol. */
8154 summand
= INTVAL (XEXP (sum
, 1));
8155 if (summand
< 0 || summand
> g_switch_value
)
8158 sym_ref
= XEXP (sum
, 0);
8161 return SYMBOL_REF_SMALL_P (sym_ref
);
8167 /* Return true if either operand is a general purpose register. */
8170 gpr_or_gpr_p (rtx op0
, rtx op1
)
8172 return ((REG_P (op0
) && INT_REGNO_P (REGNO (op0
)))
8173 || (REG_P (op1
) && INT_REGNO_P (REGNO (op1
))));
8176 /* Return true if this is a move direct operation between GPR registers and
8177 floating point/VSX registers. */
8180 direct_move_p (rtx op0
, rtx op1
)
8182 if (!REG_P (op0
) || !REG_P (op1
))
8185 if (!TARGET_DIRECT_MOVE
)
8188 int regno0
= REGNO (op0
);
8189 int regno1
= REGNO (op1
);
8190 if (!HARD_REGISTER_NUM_P (regno0
) || !HARD_REGISTER_NUM_P (regno1
))
8193 if (INT_REGNO_P (regno0
) && VSX_REGNO_P (regno1
))
8196 if (VSX_REGNO_P (regno0
) && INT_REGNO_P (regno1
))
8202 /* Return true if the ADDR is an acceptable address for a quad memory
8203 operation of mode MODE (either LQ/STQ for general purpose registers, or
8204 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8205 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8206 3.0 LXV/STXV instruction. */
8209 quad_address_p (rtx addr
, machine_mode mode
, bool strict
)
8213 if (GET_MODE_SIZE (mode
) < 16)
8216 if (legitimate_indirect_address_p (addr
, strict
))
8219 if (VECTOR_MODE_P (mode
) && !mode_supports_dq_form (mode
))
8222 /* Is this a valid prefixed address? If the bottom four bits of the offset
8223 are non-zero, we could use a prefixed instruction (which does not have the
8224 DQ-form constraint that the traditional instruction had) instead of
8225 forcing the unaligned offset to a GPR. */
8226 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DQ
))
8229 if (GET_CODE (addr
) != PLUS
)
8232 op0
= XEXP (addr
, 0);
8233 if (!REG_P (op0
) || !INT_REG_OK_FOR_BASE_P (op0
, strict
))
8236 op1
= XEXP (addr
, 1);
8237 if (!CONST_INT_P (op1
))
8240 return quad_address_offset_p (INTVAL (op1
));
8243 /* Return true if this is a load or store quad operation. This function does
8244 not handle the atomic quad memory instructions. */
8247 quad_load_store_p (rtx op0
, rtx op1
)
8251 if (!TARGET_QUAD_MEMORY
)
8254 else if (REG_P (op0
) && MEM_P (op1
))
8255 ret
= (quad_int_reg_operand (op0
, GET_MODE (op0
))
8256 && quad_memory_operand (op1
, GET_MODE (op1
))
8257 && !reg_overlap_mentioned_p (op0
, op1
));
8259 else if (MEM_P (op0
) && REG_P (op1
))
8260 ret
= (quad_memory_operand (op0
, GET_MODE (op0
))
8261 && quad_int_reg_operand (op1
, GET_MODE (op1
)));
8266 if (TARGET_DEBUG_ADDR
)
8268 fprintf (stderr
, "\n========== quad_load_store, return %s\n",
8269 ret
? "true" : "false");
8270 debug_rtx (gen_rtx_SET (op0
, op1
));
8276 /* Given an address, return a constant offset term if one exists. */
8279 address_offset (rtx op
)
8281 if (GET_CODE (op
) == PRE_INC
8282 || GET_CODE (op
) == PRE_DEC
)
8284 else if (GET_CODE (op
) == PRE_MODIFY
8285 || GET_CODE (op
) == LO_SUM
)
8288 if (GET_CODE (op
) == CONST
)
8291 if (GET_CODE (op
) == PLUS
)
8294 if (CONST_INT_P (op
))
8300 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8301 the mode. If we can't find (or don't know) the alignment of the symbol
8302 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8303 should be pessimistic]. Offsets are validated in the same way as for
8306 darwin_rs6000_legitimate_lo_sum_const_p (rtx x
, machine_mode mode
)
8308 /* We should not get here with this. */
8309 gcc_checking_assert (! mode_supports_dq_form (mode
));
8311 if (GET_CODE (x
) == CONST
)
8314 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
8315 x
= XVECEXP (x
, 0, 0);
8318 unsigned HOST_WIDE_INT offset
= 0;
8320 if (GET_CODE (x
) == PLUS
)
8323 if (! SYMBOL_REF_P (sym
))
8325 if (!CONST_INT_P (XEXP (x
, 1)))
8327 offset
= INTVAL (XEXP (x
, 1));
8329 else if (SYMBOL_REF_P (x
))
8331 else if (CONST_INT_P (x
))
8332 offset
= INTVAL (x
);
8333 else if (GET_CODE (x
) == LABEL_REF
)
8334 offset
= 0; // We assume code labels are Pmode aligned
8336 return false; // not sure what we have here.
8338 /* If we don't know the alignment of the thing to which the symbol refers,
8339 we assume optimistically it is "enough".
8340 ??? maybe we should be pessimistic instead. */
8345 tree decl
= SYMBOL_REF_DECL (sym
);
8347 if (MACHO_SYMBOL_INDIRECTION_P (sym
))
8348 /* The decl in an indirection symbol is the original one, which might
8349 be less aligned than the indirection. Our indirections are always
8354 if (decl
&& DECL_ALIGN (decl
))
8355 align
= DECL_ALIGN_UNIT (decl
);
8358 unsigned int extra
= 0;
8364 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8366 if (VECTOR_MEM_VSX_P (mode
))
8369 if (!TARGET_POWERPC64
)
8371 else if ((offset
& 3) || (align
& 3))
8382 if (!TARGET_POWERPC64
)
8384 else if ((offset
& 3) || (align
& 3))
8392 /* We only care if the access(es) would cause a change to the high part. */
8393 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8394 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8397 /* Return true if the MEM operand is a memory operand suitable for use
8398 with a (full width, possibly multiple) gpr load/store. On
8399 powerpc64 this means the offset must be divisible by 4.
8400 Implements 'Y' constraint.
8402 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8403 a constraint function we know the operand has satisfied a suitable
8406 Offsetting a lo_sum should not be allowed, except where we know by
8407 alignment that a 32k boundary is not crossed. Note that by
8408 "offsetting" here we mean a further offset to access parts of the
8409 MEM. It's fine to have a lo_sum where the inner address is offset
8410 from a sym, since the same sym+offset will appear in the high part
8411 of the address calculation. */
8414 mem_operand_gpr (rtx op
, machine_mode mode
)
8416 unsigned HOST_WIDE_INT offset
;
8418 rtx addr
= XEXP (op
, 0);
8420 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8422 && (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
8423 && mode_supports_pre_incdec_p (mode
)
8424 && legitimate_indirect_address_p (XEXP (addr
, 0), false))
8427 /* Allow prefixed instructions if supported. If the bottom two bits of the
8428 offset are non-zero, we could use a prefixed instruction (which does not
8429 have the DS-form constraint that the traditional instruction had) instead
8430 of forcing the unaligned offset to a GPR. */
8431 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8434 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8435 really OK. Doing this early avoids teaching all the other machinery
8437 if (TARGET_MACHO
&& GET_CODE (addr
) == LO_SUM
)
8438 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr
, 1), mode
);
8440 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8441 if (!rs6000_offsettable_memref_p (op
, mode
, false))
8444 op
= address_offset (addr
);
8448 offset
= INTVAL (op
);
8449 if (TARGET_POWERPC64
&& (offset
& 3) != 0)
8452 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8456 if (GET_CODE (addr
) == LO_SUM
)
8457 /* For lo_sum addresses, we must allow any offset except one that
8458 causes a wrap, so test only the low 16 bits. */
8459 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8461 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8464 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8465 enforce an offset divisible by 4 even for 32-bit. */
8468 mem_operand_ds_form (rtx op
, machine_mode mode
)
8470 unsigned HOST_WIDE_INT offset
;
8472 rtx addr
= XEXP (op
, 0);
8474 /* Allow prefixed instructions if supported. If the bottom two bits of the
8475 offset are non-zero, we could use a prefixed instruction (which does not
8476 have the DS-form constraint that the traditional instruction had) instead
8477 of forcing the unaligned offset to a GPR. */
8478 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8481 if (!offsettable_address_p (false, mode
, addr
))
8484 op
= address_offset (addr
);
8488 offset
= INTVAL (op
);
8489 if ((offset
& 3) != 0)
8492 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8496 if (GET_CODE (addr
) == LO_SUM
)
8497 /* For lo_sum addresses, we must allow any offset except one that
8498 causes a wrap, so test only the low 16 bits. */
8499 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8501 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8504 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8507 reg_offset_addressing_ok_p (machine_mode mode
)
8521 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8522 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8523 a vector mode, if we want to use the VSX registers to move it around,
8524 we need to restrict ourselves to reg+reg addressing. Similarly for
8525 IEEE 128-bit floating point that is passed in a single vector
8527 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
))
8528 return mode_supports_dq_form (mode
);
8531 /* The vector pair/quad types support offset addressing if the
8532 underlying vectors support offset addressing. */
8538 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8539 addressing for the LFIWZX and STFIWX instructions. */
8540 if (TARGET_NO_SDMODE_STACK
)
8552 virtual_stack_registers_memory_p (rtx op
)
8557 regnum
= REGNO (op
);
8559 else if (GET_CODE (op
) == PLUS
8560 && REG_P (XEXP (op
, 0))
8561 && CONST_INT_P (XEXP (op
, 1)))
8562 regnum
= REGNO (XEXP (op
, 0));
8567 return (regnum
>= FIRST_VIRTUAL_REGISTER
8568 && regnum
<= LAST_VIRTUAL_POINTER_REGISTER
);
8571 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8572 is known to not straddle a 32k boundary. This function is used
8573 to determine whether -mcmodel=medium code can use TOC pointer
8574 relative addressing for OP. This means the alignment of the TOC
8575 pointer must also be taken into account, and unfortunately that is
8578 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8579 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8583 offsettable_ok_by_alignment (rtx op
, HOST_WIDE_INT offset
,
8587 unsigned HOST_WIDE_INT dsize
, dalign
, lsb
, mask
;
8589 if (!SYMBOL_REF_P (op
))
8592 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8594 if (mode_supports_dq_form (mode
))
8597 dsize
= GET_MODE_SIZE (mode
);
8598 decl
= SYMBOL_REF_DECL (op
);
8604 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8605 replacing memory addresses with an anchor plus offset. We
8606 could find the decl by rummaging around in the block->objects
8607 VEC for the given offset but that seems like too much work. */
8608 dalign
= BITS_PER_UNIT
;
8609 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op
)
8610 && SYMBOL_REF_ANCHOR_P (op
)
8611 && SYMBOL_REF_BLOCK (op
) != NULL
)
8613 struct object_block
*block
= SYMBOL_REF_BLOCK (op
);
8615 dalign
= block
->alignment
;
8616 offset
+= SYMBOL_REF_BLOCK_OFFSET (op
);
8618 else if (CONSTANT_POOL_ADDRESS_P (op
))
8620 /* It would be nice to have get_pool_align().. */
8621 machine_mode cmode
= get_pool_mode (op
);
8623 dalign
= GET_MODE_ALIGNMENT (cmode
);
8626 else if (DECL_P (decl
))
8628 dalign
= DECL_ALIGN (decl
);
8632 /* Allow BLKmode when the entire object is known to not
8633 cross a 32k boundary. */
8634 if (!DECL_SIZE_UNIT (decl
))
8637 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl
)))
8640 dsize
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
8644 dalign
/= BITS_PER_UNIT
;
8645 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8646 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8647 return dalign
>= dsize
;
8653 /* Find how many bits of the alignment we know for this access. */
8654 dalign
/= BITS_PER_UNIT
;
8655 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8656 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8658 lsb
= offset
& -offset
;
8662 return dalign
>= dsize
;
8666 constant_pool_expr_p (rtx op
)
8670 split_const (op
, &base
, &offset
);
8671 return (SYMBOL_REF_P (base
)
8672 && CONSTANT_POOL_ADDRESS_P (base
)
8673 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base
), Pmode
));
8676 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8677 use that as the register to put the HIGH value into if register allocation
8681 create_TOC_reference (rtx symbol
, rtx largetoc_reg
)
8683 rtx tocrel
, tocreg
, hi
;
8685 gcc_assert (TARGET_TOC
);
8687 if (TARGET_DEBUG_ADDR
)
8689 if (SYMBOL_REF_P (symbol
))
8690 fprintf (stderr
, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8694 fprintf (stderr
, "\ncreate_TOC_reference, code %s:\n",
8695 GET_RTX_NAME (GET_CODE (symbol
)));
8700 if (!can_create_pseudo_p ())
8701 df_set_regs_ever_live (TOC_REGISTER
, true);
8703 tocreg
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
8704 tocrel
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, symbol
, tocreg
), UNSPEC_TOCREL
);
8705 if (TARGET_CMODEL
== CMODEL_SMALL
|| can_create_pseudo_p ())
8708 hi
= gen_rtx_HIGH (Pmode
, copy_rtx (tocrel
));
8709 if (largetoc_reg
!= NULL
)
8711 emit_move_insn (largetoc_reg
, hi
);
8714 return gen_rtx_LO_SUM (Pmode
, hi
, tocrel
);
8717 /* These are only used to pass through from print_operand/print_operand_address
8718 to rs6000_output_addr_const_extra over the intervening function
8719 output_addr_const which is not target code. */
8720 static const_rtx tocrel_base_oac
, tocrel_offset_oac
;
8722 /* Return true if OP is a toc pointer relative address (the output
8723 of create_TOC_reference). If STRICT, do not match non-split
8724 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8725 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8726 TOCREL_OFFSET_RET respectively. */
8729 toc_relative_expr_p (const_rtx op
, bool strict
, const_rtx
*tocrel_base_ret
,
8730 const_rtx
*tocrel_offset_ret
)
8735 if (TARGET_CMODEL
!= CMODEL_SMALL
)
8737 /* When strict ensure we have everything tidy. */
8739 && !(GET_CODE (op
) == LO_SUM
8740 && REG_P (XEXP (op
, 0))
8741 && INT_REG_OK_FOR_BASE_P (XEXP (op
, 0), strict
)))
8744 /* When not strict, allow non-split TOC addresses and also allow
8745 (lo_sum (high ..)) TOC addresses created during reload. */
8746 if (GET_CODE (op
) == LO_SUM
)
8750 const_rtx tocrel_base
= op
;
8751 const_rtx tocrel_offset
= const0_rtx
;
8753 if (GET_CODE (op
) == PLUS
&& add_cint_operand (XEXP (op
, 1), GET_MODE (op
)))
8755 tocrel_base
= XEXP (op
, 0);
8756 tocrel_offset
= XEXP (op
, 1);
8759 if (tocrel_base_ret
)
8760 *tocrel_base_ret
= tocrel_base
;
8761 if (tocrel_offset_ret
)
8762 *tocrel_offset_ret
= tocrel_offset
;
8764 return (GET_CODE (tocrel_base
) == UNSPEC
8765 && XINT (tocrel_base
, 1) == UNSPEC_TOCREL
8766 && REG_P (XVECEXP (tocrel_base
, 0, 1))
8767 && REGNO (XVECEXP (tocrel_base
, 0, 1)) == TOC_REGISTER
);
8770 /* Return true if X is a constant pool address, and also for cmodel=medium
8771 if X is a toc-relative address known to be offsettable within MODE. */
8774 legitimate_constant_pool_address_p (const_rtx x
, machine_mode mode
,
8777 const_rtx tocrel_base
, tocrel_offset
;
8778 return (toc_relative_expr_p (x
, strict
, &tocrel_base
, &tocrel_offset
)
8779 && (TARGET_CMODEL
!= CMODEL_MEDIUM
8780 || constant_pool_expr_p (XVECEXP (tocrel_base
, 0, 0))
8782 || offsettable_ok_by_alignment (XVECEXP (tocrel_base
, 0, 0),
8783 INTVAL (tocrel_offset
), mode
)));
8787 legitimate_small_data_p (machine_mode mode
, rtx x
)
8789 return (DEFAULT_ABI
== ABI_V4
8790 && !flag_pic
&& !TARGET_TOC
8791 && (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
)
8792 && small_data_operand (x
, mode
));
8796 rs6000_legitimate_offset_address_p (machine_mode mode
, rtx x
,
8797 bool strict
, bool worst_case
)
8799 unsigned HOST_WIDE_INT offset
;
8802 if (GET_CODE (x
) != PLUS
)
8804 if (!REG_P (XEXP (x
, 0)))
8806 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8808 if (mode_supports_dq_form (mode
))
8809 return quad_address_p (x
, mode
, strict
);
8810 if (!reg_offset_addressing_ok_p (mode
))
8811 return virtual_stack_registers_memory_p (x
);
8812 if (legitimate_constant_pool_address_p (x
, mode
, strict
|| lra_in_progress
))
8814 if (!CONST_INT_P (XEXP (x
, 1)))
8817 offset
= INTVAL (XEXP (x
, 1));
8824 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8826 if (VECTOR_MEM_VSX_P (mode
))
8831 if (!TARGET_POWERPC64
)
8833 else if (offset
& 3)
8846 if (!TARGET_POWERPC64
)
8848 else if (offset
& 3)
8856 if (TARGET_PREFIXED
)
8857 return SIGNED_34BIT_OFFSET_EXTRA_P (offset
, extra
);
8859 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8863 legitimate_indexed_address_p (rtx x
, int strict
)
8867 if (GET_CODE (x
) != PLUS
)
8873 return (REG_P (op0
) && REG_P (op1
)
8874 && ((INT_REG_OK_FOR_BASE_P (op0
, strict
)
8875 && INT_REG_OK_FOR_INDEX_P (op1
, strict
))
8876 || (INT_REG_OK_FOR_BASE_P (op1
, strict
)
8877 && INT_REG_OK_FOR_INDEX_P (op0
, strict
))));
8881 avoiding_indexed_address_p (machine_mode mode
)
8883 unsigned int msize
= GET_MODE_SIZE (mode
);
8885 /* Avoid indexed addressing for modes that have non-indexed load/store
8886 instruction forms. On power10, vector pairs have an indexed
8887 form, but vector quads don't. */
8891 return (TARGET_AVOID_XFORM
&& VECTOR_MEM_NONE_P (mode
));
8895 legitimate_indirect_address_p (rtx x
, int strict
)
8897 return REG_P (x
) && INT_REG_OK_FOR_BASE_P (x
, strict
);
8901 macho_lo_sum_memory_operand (rtx x
, machine_mode mode
)
8903 if (!TARGET_MACHO
|| !flag_pic
8904 || mode
!= SImode
|| !MEM_P (x
))
8908 if (GET_CODE (x
) != LO_SUM
)
8910 if (!REG_P (XEXP (x
, 0)))
8912 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 0))
8916 return CONSTANT_P (x
);
8920 legitimate_lo_sum_address_p (machine_mode mode
, rtx x
, int strict
)
8922 if (GET_CODE (x
) != LO_SUM
)
8924 if (!REG_P (XEXP (x
, 0)))
8926 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8928 /* quad word addresses are restricted, and we can't use LO_SUM. */
8929 if (mode_supports_dq_form (mode
))
8933 if (TARGET_ELF
|| TARGET_MACHO
)
8937 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
)
8939 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8940 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8941 recognizes some LO_SUM addresses as valid although this
8942 function says opposite. In most cases, LRA through different
8943 transformations can generate correct code for address reloads.
8944 It cannot manage only some LO_SUM cases. So we need to add
8945 code here saying that some addresses are still valid. */
8946 large_toc_ok
= (lra_in_progress
&& TARGET_CMODEL
!= CMODEL_SMALL
8947 && small_toc_ref (x
, VOIDmode
));
8948 if (TARGET_TOC
&& ! large_toc_ok
)
8950 if (GET_MODE_NUNITS (mode
) != 1)
8952 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
8953 && !(/* ??? Assume floating point reg based on mode? */
8954 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
8957 return CONSTANT_P (x
) || large_toc_ok
;
8964 /* Try machine-dependent ways of modifying an illegitimate address
8965 to be legitimate. If we find one, return the new, valid address.
8966 This is used from only one place: `memory_address' in explow.cc.
8968 OLDX is the address as it was before break_out_memory_refs was
8969 called. In some cases it is useful to look at this to decide what
8972 It is always safe for this function to do nothing. It exists to
8973 recognize opportunities to optimize the output.
8975 On RS/6000, first check for the sum of a register with a constant
8976 integer that is out of range. If so, generate code to add the
8977 constant with the low-order 16 bits masked to the register and force
8978 this result into another register (this can be done with `cau').
8979 Then generate an address of REG+(CONST&0xffff), allowing for the
8980 possibility of bit 16 being a one.
8982 Then check for the sum of a register and something not constant, try to
8983 load the other things into a register and return the sum. */
8986 rs6000_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
8991 if (!reg_offset_addressing_ok_p (mode
)
8992 || mode_supports_dq_form (mode
))
8994 if (virtual_stack_registers_memory_p (x
))
8997 /* In theory we should not be seeing addresses of the form reg+0,
8998 but just in case it is generated, optimize it away. */
8999 if (GET_CODE (x
) == PLUS
&& XEXP (x
, 1) == const0_rtx
)
9000 return force_reg (Pmode
, XEXP (x
, 0));
9002 /* For TImode with load/store quad, restrict addresses to just a single
9003 pointer, so it works with both GPRs and VSX registers. */
9004 /* Make sure both operands are registers. */
9005 else if (GET_CODE (x
) == PLUS
9006 && (mode
!= TImode
|| !TARGET_VSX
))
9007 return gen_rtx_PLUS (Pmode
,
9008 force_reg (Pmode
, XEXP (x
, 0)),
9009 force_reg (Pmode
, XEXP (x
, 1)));
9011 return force_reg (Pmode
, x
);
9013 if (SYMBOL_REF_P (x
))
9015 enum tls_model model
= SYMBOL_REF_TLS_MODEL (x
);
9017 return rs6000_legitimize_tls_address (x
, model
);
9029 /* As in legitimate_offset_address_p we do not assume
9030 worst-case. The mode here is just a hint as to the registers
9031 used. A TImode is usually in gprs, but may actually be in
9032 fprs. Leave worst-case scenario for reload to handle via
9033 insn constraints. PTImode is only GPRs. */
9040 if (GET_CODE (x
) == PLUS
9041 && REG_P (XEXP (x
, 0))
9042 && CONST_INT_P (XEXP (x
, 1))
9043 && ((unsigned HOST_WIDE_INT
) (INTVAL (XEXP (x
, 1)) + 0x8000)
9044 >= 0x10000 - extra
))
9046 HOST_WIDE_INT high_int
, low_int
;
9048 low_int
= ((INTVAL (XEXP (x
, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9049 if (low_int
>= 0x8000 - extra
)
9051 high_int
= INTVAL (XEXP (x
, 1)) - low_int
;
9052 sum
= force_operand (gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9053 gen_int_mode (high_int
, Pmode
)), 0);
9054 return plus_constant (Pmode
, sum
, low_int
);
9056 else if (GET_CODE (x
) == PLUS
9057 && REG_P (XEXP (x
, 0))
9058 && !CONST_INT_P (XEXP (x
, 1))
9059 && GET_MODE_NUNITS (mode
) == 1
9060 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9061 || (/* ??? Assume floating point reg based on mode? */
9062 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9063 && !avoiding_indexed_address_p (mode
))
9065 return gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9066 force_reg (Pmode
, force_operand (XEXP (x
, 1), 0)));
9068 else if ((TARGET_ELF
9070 || !MACHO_DYNAMIC_NO_PIC_P
9074 && TARGET_NO_TOC_OR_PCREL
9077 && !CONST_WIDE_INT_P (x
)
9078 && !CONST_DOUBLE_P (x
)
9080 && GET_MODE_NUNITS (mode
) == 1
9081 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9082 || (/* ??? Assume floating point reg based on mode? */
9083 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
))))
9085 rtx reg
= gen_reg_rtx (Pmode
);
9087 emit_insn (gen_elf_high (reg
, x
));
9089 emit_insn (gen_macho_high (Pmode
, reg
, x
));
9090 return gen_rtx_LO_SUM (Pmode
, reg
, x
);
9094 && constant_pool_expr_p (x
)
9095 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x
), Pmode
))
9096 return create_TOC_reference (x
, NULL_RTX
);
9101 /* Debug version of rs6000_legitimize_address. */
9103 rs6000_debug_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
)
9109 ret
= rs6000_legitimize_address (x
, oldx
, mode
);
9110 insns
= get_insns ();
9116 "\nrs6000_legitimize_address: mode %s, old code %s, "
9117 "new code %s, modified\n",
9118 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)),
9119 GET_RTX_NAME (GET_CODE (ret
)));
9121 fprintf (stderr
, "Original address:\n");
9124 fprintf (stderr
, "oldx:\n");
9127 fprintf (stderr
, "New address:\n");
9132 fprintf (stderr
, "Insns added:\n");
9133 debug_rtx_list (insns
, 20);
9139 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9140 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)));
9151 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9152 We need to emit DTP-relative relocations. */
9154 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
9156 rs6000_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
9161 fputs ("\t.long\t", file
);
9164 fputs (DOUBLE_INT_ASM_OP
, file
);
9169 output_addr_const (file
, x
);
9171 fputs ("@dtprel+0x8000", file
);
9174 /* Return true if X is a symbol that refers to real (rather than emulated)
9178 rs6000_real_tls_symbol_ref_p (rtx x
)
9180 return (SYMBOL_REF_P (x
)
9181 && SYMBOL_REF_TLS_MODEL (x
) >= TLS_MODEL_REAL
);
9184 /* In the name of slightly smaller debug output, and to cater to
9185 general assembler lossage, recognize various UNSPEC sequences
9186 and turn them back into a direct symbol reference. */
9189 rs6000_delegitimize_address (rtx orig_x
)
9193 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9194 encodes loading up the high part of the address of a TOC reference along
9195 with a load of a GPR using the same base register used for the load. We
9196 return the original SYMBOL_REF.
9198 (set (reg:INT1 <reg>
9199 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9201 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9202 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9203 We return the original SYMBOL_REF.
9205 (parallel [(set (reg:DI <base-reg>)
9206 (unspec:DI [(symbol_ref <symbol>)
9207 (const_int <marker>)]
9208 UNSPEC_PCREL_OPT_LD_ADDR))
9209 (set (reg:DI <load-reg>)
9210 (unspec:DI [(const_int 0)]
9211 UNSPEC_PCREL_OPT_LD_DATA))])
9213 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9214 GPR being loaded is the same as the GPR used to hold the external address.
9216 (set (reg:DI <base-reg>)
9217 (unspec:DI [(symbol_ref <symbol>)
9218 (const_int <marker>)]
9219 UNSPEC_PCREL_OPT_LD_SAME_REG))
9221 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9222 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9223 We return the original SYMBOL_REF.
9225 (parallel [(set (reg:DI <base-reg>)
9226 (unspec:DI [(symbol_ref <symbol>)
9227 (const_int <marker>)]
9228 UNSPEC_PCREL_OPT_ST_ADDR))
9229 (use (reg <store-reg>))]) */
9231 if (GET_CODE (orig_x
) == UNSPEC
)
9232 switch (XINT (orig_x
, 1))
9234 case UNSPEC_FUSION_GPR
:
9235 case UNSPEC_PCREL_OPT_LD_ADDR
:
9236 case UNSPEC_PCREL_OPT_LD_SAME_REG
:
9237 case UNSPEC_PCREL_OPT_ST_ADDR
:
9238 orig_x
= XVECEXP (orig_x
, 0, 0);
9245 orig_x
= delegitimize_mem_from_attrs (orig_x
);
9252 if (TARGET_CMODEL
!= CMODEL_SMALL
&& GET_CODE (y
) == LO_SUM
)
9256 if (GET_CODE (y
) == PLUS
9257 && GET_MODE (y
) == Pmode
9258 && CONST_INT_P (XEXP (y
, 1)))
9260 offset
= XEXP (y
, 1);
9264 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_TOCREL
)
9266 y
= XVECEXP (y
, 0, 0);
9269 /* Do not associate thread-local symbols with the original
9270 constant pool symbol. */
9273 && CONSTANT_POOL_ADDRESS_P (y
)
9274 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y
)))
9278 if (offset
!= NULL_RTX
)
9279 y
= gen_rtx_PLUS (Pmode
, y
, offset
);
9280 if (!MEM_P (orig_x
))
9283 return replace_equiv_address_nv (orig_x
, y
);
9287 && GET_CODE (orig_x
) == LO_SUM
9288 && GET_CODE (XEXP (orig_x
, 1)) == CONST
)
9290 y
= XEXP (XEXP (orig_x
, 1), 0);
9291 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9292 return XVECEXP (y
, 0, 0);
9298 /* Return true if X shouldn't be emitted into the debug info.
9299 The linker doesn't like .toc section references from
9300 .debug_* sections, so reject .toc section symbols. */
9303 rs6000_const_not_ok_for_debug_p (rtx x
)
9305 if (GET_CODE (x
) == UNSPEC
)
9307 if (SYMBOL_REF_P (x
)
9308 && CONSTANT_POOL_ADDRESS_P (x
))
9310 rtx c
= get_pool_constant (x
);
9311 machine_mode cmode
= get_pool_mode (x
);
9312 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c
, cmode
))
9319 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9322 rs6000_legitimate_combined_insn (rtx_insn
*insn
)
9324 int icode
= INSN_CODE (insn
);
9326 /* Reject creating doloop insns. Combine should not be allowed
9327 to create these for a number of reasons:
9328 1) In a nested loop, if combine creates one of these in an
9329 outer loop and the register allocator happens to allocate ctr
9330 to the outer loop insn, then the inner loop can't use ctr.
9331 Inner loops ought to be more highly optimized.
9332 2) Combine often wants to create one of these from what was
9333 originally a three insn sequence, first combining the three
9334 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9335 allocated ctr, the splitter takes use back to the three insn
9336 sequence. It's better to stop combine at the two insn
9338 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9339 insns, the register allocator sometimes uses floating point
9340 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9341 jump insn and output reloads are not implemented for jumps,
9342 the ctrsi/ctrdi splitters need to handle all possible cases.
9343 That's a pain, and it gets to be seriously difficult when a
9344 splitter that runs after reload needs memory to transfer from
9345 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9346 for the difficult case. It's better to not create problems
9347 in the first place. */
9348 if (icode
!= CODE_FOR_nothing
9349 && (icode
== CODE_FOR_bdz_si
9350 || icode
== CODE_FOR_bdz_di
9351 || icode
== CODE_FOR_bdnz_si
9352 || icode
== CODE_FOR_bdnz_di
9353 || icode
== CODE_FOR_bdztf_si
9354 || icode
== CODE_FOR_bdztf_di
9355 || icode
== CODE_FOR_bdnztf_si
9356 || icode
== CODE_FOR_bdnztf_di
))
9362 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9364 static GTY(()) rtx rs6000_tls_symbol
;
9366 rs6000_tls_get_addr (void)
9368 if (!rs6000_tls_symbol
)
9369 rs6000_tls_symbol
= init_one_libfunc ("__tls_get_addr");
9371 return rs6000_tls_symbol
;
9374 /* Construct the SYMBOL_REF for TLS GOT references. */
9376 static GTY(()) rtx rs6000_got_symbol
;
9378 rs6000_got_sym (void)
9380 if (!rs6000_got_symbol
)
9382 rs6000_got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
9383 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_LOCAL
;
9384 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_EXTERNAL
;
9387 return rs6000_got_symbol
;
9390 /* AIX Thread-Local Address support. */
9393 rs6000_legitimize_tls_address_aix (rtx addr
, enum tls_model model
)
9395 rtx sym
, mem
, tocref
, tlsreg
, tmpreg
, dest
;
9399 /* Place addr into TOC constant pool. */
9400 sym
= force_const_mem (GET_MODE (addr
), addr
);
9402 /* Output the TOC entry and create the MEM referencing the value. */
9403 if (constant_pool_expr_p (XEXP (sym
, 0))
9404 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym
, 0)), Pmode
))
9406 tocref
= create_TOC_reference (XEXP (sym
, 0), NULL_RTX
);
9407 mem
= gen_const_mem (Pmode
, tocref
);
9408 set_mem_alias_set (mem
, get_TOC_alias_set ());
9413 /* Use global-dynamic for local-dynamic. */
9414 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
9415 || model
== TLS_MODEL_LOCAL_DYNAMIC
)
9417 /* Create new TOC reference for @m symbol. */
9418 name
= XSTR (XVECEXP (XEXP (mem
, 0), 0, 0), 0);
9419 tlsname
= XALLOCAVEC (char, strlen (name
) + 1);
9420 strcpy (tlsname
, "*LCM");
9421 strcat (tlsname
, name
+ 3);
9422 rtx modaddr
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (tlsname
));
9423 SYMBOL_REF_FLAGS (modaddr
) |= SYMBOL_FLAG_LOCAL
;
9424 tocref
= create_TOC_reference (modaddr
, NULL_RTX
);
9425 rtx modmem
= gen_const_mem (Pmode
, tocref
);
9426 set_mem_alias_set (modmem
, get_TOC_alias_set ());
9428 rtx modreg
= gen_reg_rtx (Pmode
);
9429 emit_insn (gen_rtx_SET (modreg
, modmem
));
9431 tmpreg
= gen_reg_rtx (Pmode
);
9432 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9434 dest
= gen_reg_rtx (Pmode
);
9436 emit_insn (gen_tls_get_addrsi (dest
, modreg
, tmpreg
));
9438 emit_insn (gen_tls_get_addrdi (dest
, modreg
, tmpreg
));
9441 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9442 else if (TARGET_32BIT
)
9444 tlsreg
= gen_reg_rtx (SImode
);
9445 emit_insn (gen_tls_get_tpointer (tlsreg
));
9449 tlsreg
= gen_rtx_REG (DImode
, 13);
9450 xcoff_tls_exec_model_detected
= true;
9453 /* Load the TOC value into temporary register. */
9454 tmpreg
= gen_reg_rtx (Pmode
);
9455 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9456 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
9457 gen_rtx_MINUS (Pmode
, addr
, tlsreg
));
9459 /* Add TOC symbol value to TLS pointer. */
9460 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tmpreg
, tlsreg
));
9465 /* Passes the tls arg value for global dynamic and local dynamic
9466 emit_library_call_value in rs6000_legitimize_tls_address to
9467 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9468 marker relocs put on __tls_get_addr calls. */
9469 static rtx global_tlsarg
;
9471 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9472 this (thread-local) address. */
9475 rs6000_legitimize_tls_address (rtx addr
, enum tls_model model
)
9480 return rs6000_legitimize_tls_address_aix (addr
, model
);
9482 dest
= gen_reg_rtx (Pmode
);
9483 if (model
== TLS_MODEL_LOCAL_EXEC
9484 && (rs6000_tls_size
== 16 || rs6000_pcrel_p ()))
9490 tlsreg
= gen_rtx_REG (Pmode
, 13);
9491 insn
= gen_tls_tprel_64 (dest
, tlsreg
, addr
);
9495 tlsreg
= gen_rtx_REG (Pmode
, 2);
9496 insn
= gen_tls_tprel_32 (dest
, tlsreg
, addr
);
9500 else if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 32)
9504 tmp
= gen_reg_rtx (Pmode
);
9507 tlsreg
= gen_rtx_REG (Pmode
, 13);
9508 insn
= gen_tls_tprel_ha_64 (tmp
, tlsreg
, addr
);
9512 tlsreg
= gen_rtx_REG (Pmode
, 2);
9513 insn
= gen_tls_tprel_ha_32 (tmp
, tlsreg
, addr
);
9517 insn
= gen_tls_tprel_lo_64 (dest
, tmp
, addr
);
9519 insn
= gen_tls_tprel_lo_32 (dest
, tmp
, addr
);
9524 rtx got
, tga
, tmp1
, tmp2
;
9526 /* We currently use relocations like @got@tlsgd for tls, which
9527 means the linker will handle allocation of tls entries, placing
9528 them in the .got section. So use a pointer to the .got section,
9529 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9530 or to secondary GOT sections used by 32-bit -fPIC. */
9531 if (rs6000_pcrel_p ())
9533 else if (TARGET_64BIT
)
9534 got
= gen_rtx_REG (Pmode
, 2);
9538 got
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
9541 rtx gsym
= rs6000_got_sym ();
9542 got
= gen_reg_rtx (Pmode
);
9544 rs6000_emit_move (got
, gsym
, Pmode
);
9549 tmp1
= gen_reg_rtx (Pmode
);
9550 tmp2
= gen_reg_rtx (Pmode
);
9551 mem
= gen_const_mem (Pmode
, tmp1
);
9552 lab
= gen_label_rtx ();
9553 emit_insn (gen_load_toc_v4_PIC_1b (gsym
, lab
));
9554 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
9555 if (TARGET_LINK_STACK
)
9556 emit_insn (gen_addsi3 (tmp1
, tmp1
, GEN_INT (4)));
9557 emit_move_insn (tmp2
, mem
);
9558 rtx_insn
*last
= emit_insn (gen_addsi3 (got
, tmp1
, tmp2
));
9559 set_unique_reg_note (last
, REG_EQUAL
, gsym
);
9564 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
)
9566 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addr
, got
),
9568 tga
= rs6000_tls_get_addr ();
9569 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9570 emit_insn (gen_rtx_SET (argreg
, arg
));
9571 global_tlsarg
= arg
;
9572 emit_library_call_value (tga
, dest
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9573 global_tlsarg
= NULL_RTX
;
9575 /* Make a note so that the result of this call can be CSEd. */
9576 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9577 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9578 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9580 else if (model
== TLS_MODEL_LOCAL_DYNAMIC
)
9582 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, got
), UNSPEC_TLSLD
);
9583 tga
= rs6000_tls_get_addr ();
9584 tmp1
= gen_reg_rtx (Pmode
);
9585 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9586 emit_insn (gen_rtx_SET (argreg
, arg
));
9587 global_tlsarg
= arg
;
9588 emit_library_call_value (tga
, tmp1
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9589 global_tlsarg
= NULL_RTX
;
9591 /* Make a note so that the result of this call can be CSEd. */
9592 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9593 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9594 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9596 if (rs6000_tls_size
== 16 || rs6000_pcrel_p ())
9599 insn
= gen_tls_dtprel_64 (dest
, tmp1
, addr
);
9601 insn
= gen_tls_dtprel_32 (dest
, tmp1
, addr
);
9603 else if (rs6000_tls_size
== 32)
9605 tmp2
= gen_reg_rtx (Pmode
);
9607 insn
= gen_tls_dtprel_ha_64 (tmp2
, tmp1
, addr
);
9609 insn
= gen_tls_dtprel_ha_32 (tmp2
, tmp1
, addr
);
9612 insn
= gen_tls_dtprel_lo_64 (dest
, tmp2
, addr
);
9614 insn
= gen_tls_dtprel_lo_32 (dest
, tmp2
, addr
);
9618 tmp2
= gen_reg_rtx (Pmode
);
9620 insn
= gen_tls_got_dtprel_64 (tmp2
, got
, addr
);
9622 insn
= gen_tls_got_dtprel_32 (tmp2
, got
, addr
);
9624 insn
= gen_rtx_SET (dest
, gen_rtx_PLUS (Pmode
, tmp2
, tmp1
));
9630 /* IE, or 64-bit offset LE. */
9631 tmp2
= gen_reg_rtx (Pmode
);
9633 insn
= gen_tls_got_tprel_64 (tmp2
, got
, addr
);
9635 insn
= gen_tls_got_tprel_32 (tmp2
, got
, addr
);
9637 if (rs6000_pcrel_p ())
9640 insn
= gen_tls_tls_pcrel_64 (dest
, tmp2
, addr
);
9642 insn
= gen_tls_tls_pcrel_32 (dest
, tmp2
, addr
);
9644 else if (TARGET_64BIT
)
9645 insn
= gen_tls_tls_64 (dest
, tmp2
, addr
);
9647 insn
= gen_tls_tls_32 (dest
, tmp2
, addr
);
9655 /* Only create the global variable for the stack protect guard if we are using
9656 the global flavor of that guard. */
9658 rs6000_init_stack_protect_guard (void)
9660 if (rs6000_stack_protector_guard
== SSP_GLOBAL
)
9661 return default_stack_protect_guard ();
9666 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9669 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9671 if (GET_CODE (x
) == HIGH
9672 && GET_CODE (XEXP (x
, 0)) == UNSPEC
)
9675 /* A TLS symbol in the TOC cannot contain a sum. */
9676 if (GET_CODE (x
) == CONST
9677 && GET_CODE (XEXP (x
, 0)) == PLUS
9678 && SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
9679 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0)) != 0)
9682 /* Allow AIX TOC TLS symbols in the constant pool,
9683 but not ELF TLS symbols. */
9684 return TARGET_ELF
&& tls_referenced_p (x
);
9687 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9688 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9689 can be addressed relative to the toc pointer. */
9692 use_toc_relative_ref (rtx sym
, machine_mode mode
)
9694 return ((constant_pool_expr_p (sym
)
9695 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym
),
9696 get_pool_mode (sym
)))
9697 || (TARGET_CMODEL
== CMODEL_MEDIUM
9698 && SYMBOL_REF_LOCAL_P (sym
)
9699 && GET_MODE_SIZE (mode
) <= POWERPC64_TOC_POINTER_ALIGNMENT
));
9702 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9703 that is a valid memory address for an instruction.
9704 The MODE argument is the machine mode for the MEM expression
9705 that wants to use this address.
9707 On the RS/6000, there are four valid address: a SYMBOL_REF that
9708 refers to a constant pool entry of an address (or the sum of it
9709 plus a constant), a short (16-bit signed) constant plus a register,
9710 the sum of two registers, or a register indirect, possibly with an
9711 auto-increment. For DFmode, DDmode and DImode with a constant plus
9712 register, we must ensure that both words are addressable or PowerPC64
9713 with offset word aligned.
9715 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9716 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9717 because adjacent memory cells are accessed by adding word-sized offsets
9718 during assembly output. */
9720 rs6000_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
)
9722 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
9723 bool quad_offset_p
= mode_supports_dq_form (mode
);
9725 if (TARGET_ELF
&& RS6000_SYMBOL_REF_TLS_P (x
))
9728 /* Handle unaligned altivec lvx/stvx type addresses. */
9729 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
9730 && GET_CODE (x
) == AND
9731 && CONST_INT_P (XEXP (x
, 1))
9732 && INTVAL (XEXP (x
, 1)) == -16)
9735 return (legitimate_indirect_address_p (x
, reg_ok_strict
)
9736 || legitimate_indexed_address_p (x
, reg_ok_strict
)
9737 || virtual_stack_registers_memory_p (x
));
9740 if (legitimate_indirect_address_p (x
, reg_ok_strict
))
9743 && (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == PRE_DEC
)
9744 && mode_supports_pre_incdec_p (mode
)
9745 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
))
9748 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9749 if (address_is_prefixed (x
, mode
, NON_PREFIXED_DEFAULT
))
9752 /* Handle restricted vector d-form offsets in ISA 3.0. */
9755 if (quad_address_p (x
, mode
, reg_ok_strict
))
9758 else if (virtual_stack_registers_memory_p (x
))
9761 else if (reg_offset_p
)
9763 if (legitimate_small_data_p (mode
, x
))
9765 if (legitimate_constant_pool_address_p (x
, mode
,
9766 reg_ok_strict
|| lra_in_progress
))
9770 /* For TImode, if we have TImode in VSX registers, only allow register
9771 indirect addresses. This will allow the values to go in either GPRs
9772 or VSX registers without reloading. The vector types would tend to
9773 go into VSX registers, so we allow REG+REG, while TImode seems
9774 somewhat split, in that some uses are GPR based, and some VSX based. */
9775 /* FIXME: We could loosen this by changing the following to
9776 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9777 but currently we cannot allow REG+REG addressing for TImode. See
9778 PR72827 for complete details on how this ends up hoodwinking DSE. */
9779 if (mode
== TImode
&& TARGET_VSX
)
9781 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9784 && GET_CODE (x
) == PLUS
9785 && REG_P (XEXP (x
, 0))
9786 && (XEXP (x
, 0) == virtual_stack_vars_rtx
9787 || XEXP (x
, 0) == arg_pointer_rtx
)
9788 && CONST_INT_P (XEXP (x
, 1)))
9790 if (rs6000_legitimate_offset_address_p (mode
, x
, reg_ok_strict
, false))
9792 if (!FLOAT128_2REG_P (mode
)
9793 && (TARGET_HARD_FLOAT
9795 || (mode
!= DFmode
&& mode
!= DDmode
))
9796 && (TARGET_POWERPC64
|| mode
!= DImode
)
9797 && (mode
!= TImode
|| VECTOR_MEM_VSX_P (TImode
))
9799 && !avoiding_indexed_address_p (mode
)
9800 && legitimate_indexed_address_p (x
, reg_ok_strict
))
9802 if (TARGET_UPDATE
&& GET_CODE (x
) == PRE_MODIFY
9803 && mode_supports_pre_modify_p (mode
)
9804 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
)
9805 && (rs6000_legitimate_offset_address_p (mode
, XEXP (x
, 1),
9806 reg_ok_strict
, false)
9807 || (!avoiding_indexed_address_p (mode
)
9808 && legitimate_indexed_address_p (XEXP (x
, 1), reg_ok_strict
)))
9809 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
9811 /* There is no prefixed version of the load/store with update. */
9812 rtx addr
= XEXP (x
, 1);
9813 return !address_is_prefixed (addr
, mode
, NON_PREFIXED_DEFAULT
);
9815 if (reg_offset_p
&& !quad_offset_p
9816 && legitimate_lo_sum_address_p (mode
, x
, reg_ok_strict
))
9821 /* Debug version of rs6000_legitimate_address_p. */
9823 rs6000_debug_legitimate_address_p (machine_mode mode
, rtx x
,
9826 bool ret
= rs6000_legitimate_address_p (mode
, x
, reg_ok_strict
);
9828 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9829 "strict = %d, reload = %s, code = %s\n",
9830 ret
? "true" : "false",
9831 GET_MODE_NAME (mode
),
9833 (reload_completed
? "after" : "before"),
9834 GET_RTX_NAME (GET_CODE (x
)));
9840 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9843 rs6000_mode_dependent_address_p (const_rtx addr
,
9844 addr_space_t as ATTRIBUTE_UNUSED
)
9846 return rs6000_mode_dependent_address_ptr (addr
);
9849 /* Go to LABEL if ADDR (a legitimate address expression)
9850 has an effect that depends on the machine mode it is used for.
9852 On the RS/6000 this is true of all integral offsets (since AltiVec
9853 and VSX modes don't allow them) or is a pre-increment or decrement.
9855 ??? Except that due to conceptual problems in offsettable_address_p
9856 we can't really report the problems of integral offsets. So leave
9857 this assuming that the adjustable offset must be valid for the
9858 sub-words of a TFmode operand, which is what we had before. */
9861 rs6000_mode_dependent_address (const_rtx addr
)
9863 switch (GET_CODE (addr
))
9866 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9867 is considered a legitimate address before reload, so there
9868 are no offset restrictions in that case. Note that this
9869 condition is safe in strict mode because any address involving
9870 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9871 been rejected as illegitimate. */
9872 if (XEXP (addr
, 0) != virtual_stack_vars_rtx
9873 && XEXP (addr
, 0) != arg_pointer_rtx
9874 && CONST_INT_P (XEXP (addr
, 1)))
9876 HOST_WIDE_INT val
= INTVAL (XEXP (addr
, 1));
9877 HOST_WIDE_INT extra
= TARGET_POWERPC64
? 8 : 12;
9878 if (TARGET_PREFIXED
)
9879 return !SIGNED_34BIT_OFFSET_EXTRA_P (val
, extra
);
9881 return !SIGNED_16BIT_OFFSET_EXTRA_P (val
, extra
);
9886 /* Anything in the constant pool is sufficiently aligned that
9887 all bytes have the same high part address. */
9888 return !legitimate_constant_pool_address_p (addr
, QImode
, false);
9890 /* Auto-increment cases are now treated generically in recog.cc. */
9892 return TARGET_UPDATE
;
9894 /* AND is only allowed in Altivec loads. */
9905 /* Debug version of rs6000_mode_dependent_address. */
9907 rs6000_debug_mode_dependent_address (const_rtx addr
)
9909 bool ret
= rs6000_mode_dependent_address (addr
);
9911 fprintf (stderr
, "\nrs6000_mode_dependent_address: ret = %s\n",
9912 ret
? "true" : "false");
9918 /* Implement FIND_BASE_TERM. */
9921 rs6000_find_base_term (rtx op
)
9926 if (GET_CODE (base
) == CONST
)
9927 base
= XEXP (base
, 0);
9928 if (GET_CODE (base
) == PLUS
)
9929 base
= XEXP (base
, 0);
9930 if (GET_CODE (base
) == UNSPEC
)
9931 switch (XINT (base
, 1))
9934 case UNSPEC_MACHOPIC_OFFSET
:
9935 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9936 for aliasing purposes. */
9937 return XVECEXP (base
, 0, 0);
9943 /* More elaborate version of recog's offsettable_memref_p predicate
9944 that works around the ??? note of rs6000_mode_dependent_address.
9945 In particular it accepts
9947 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9949 in 32-bit mode, that the recog predicate rejects. */
9952 rs6000_offsettable_memref_p (rtx op
, machine_mode reg_mode
, bool strict
)
9959 /* First mimic offsettable_memref_p. */
9960 if (offsettable_address_p (strict
, GET_MODE (op
), XEXP (op
, 0)))
9963 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9964 the latter predicate knows nothing about the mode of the memory
9965 reference and, therefore, assumes that it is the largest supported
9966 mode (TFmode). As a consequence, legitimate offsettable memory
9967 references are rejected. rs6000_legitimate_offset_address_p contains
9968 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9969 at least with a little bit of help here given that we know the
9970 actual registers used. */
9971 worst_case
= ((TARGET_POWERPC64
&& GET_MODE_CLASS (reg_mode
) == MODE_INT
)
9972 || GET_MODE_SIZE (reg_mode
) == 4);
9973 return rs6000_legitimate_offset_address_p (GET_MODE (op
), XEXP (op
, 0),
9974 strict
, worst_case
);
9977 /* Determine the reassociation width to be used in reassociate_bb.
9978 This takes into account how many parallel operations we
9979 can actually do of a given type, and also the latency.
9983 vect add/sub/mul 2/cycle
9984 fp add/sub/mul 2/cycle
9989 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
9992 switch (rs6000_tune
)
9994 case PROCESSOR_POWER8
:
9995 case PROCESSOR_POWER9
:
9996 case PROCESSOR_POWER10
:
9997 if (DECIMAL_FLOAT_MODE_P (mode
))
9999 if (VECTOR_MODE_P (mode
))
10001 if (INTEGRAL_MODE_P (mode
))
10003 if (FLOAT_MODE_P (mode
))
10012 /* Change register usage conditional on target flags. */
10014 rs6000_conditional_register_usage (void)
10018 if (TARGET_DEBUG_TARGET
)
10019 fprintf (stderr
, "rs6000_conditional_register_usage called\n");
10021 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10023 fixed_regs
[13] = call_used_regs
[13] = 1;
10025 /* Conditionally disable FPRs. */
10026 if (TARGET_SOFT_FLOAT
)
10027 for (i
= 32; i
< 64; i
++)
10028 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10030 /* The TOC register is not killed across calls in a way that is
10031 visible to the compiler. */
10032 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
10033 call_used_regs
[2] = 0;
10035 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
10036 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10038 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
10039 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10040 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10042 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
10043 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10044 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10046 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
)
10047 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10049 if (!TARGET_ALTIVEC
&& !TARGET_VSX
)
10051 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
10052 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10053 call_used_regs
[VRSAVE_REGNO
] = 1;
10056 if (TARGET_ALTIVEC
|| TARGET_VSX
)
10057 global_regs
[VSCR_REGNO
] = 1;
10059 if (TARGET_ALTIVEC_ABI
)
10061 for (i
= FIRST_ALTIVEC_REGNO
; i
< FIRST_ALTIVEC_REGNO
+ 20; ++i
)
10062 call_used_regs
[i
] = 1;
10064 /* AIX reserves VR20:31 in non-extended ABI mode. */
10065 if (TARGET_XCOFF
&& !rs6000_aix_extabi
)
10066 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
< FIRST_ALTIVEC_REGNO
+ 32; ++i
)
10067 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10072 /* Output insns to set DEST equal to the constant SOURCE as a series of
10073 lis, ori and shl instructions and return TRUE. */
10076 rs6000_emit_set_const (rtx dest
, rtx source
)
10078 machine_mode mode
= GET_MODE (dest
);
10083 gcc_checking_assert (CONST_INT_P (source
));
10084 c
= INTVAL (source
);
10089 emit_insn (gen_rtx_SET (dest
, source
));
10093 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (SImode
);
10095 emit_insn (gen_rtx_SET (copy_rtx (temp
),
10096 GEN_INT (c
& ~(HOST_WIDE_INT
) 0xffff)));
10097 emit_insn (gen_rtx_SET (dest
,
10098 gen_rtx_IOR (SImode
, copy_rtx (temp
),
10099 GEN_INT (c
& 0xffff))));
10103 if (!TARGET_POWERPC64
)
10107 hi
= operand_subword_force (copy_rtx (dest
), WORDS_BIG_ENDIAN
== 0,
10109 lo
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
!= 0,
10111 emit_move_insn (hi
, GEN_INT (c
>> 32));
10112 c
= ((c
& 0xffffffff) ^ 0x80000000) - 0x80000000;
10113 emit_move_insn (lo
, GEN_INT (c
));
10116 rs6000_emit_set_long_const (dest
, c
);
10120 gcc_unreachable ();
10123 insn
= get_last_insn ();
10124 set
= single_set (insn
);
10125 if (! CONSTANT_P (SET_SRC (set
)))
10126 set_unique_reg_note (insn
, REG_EQUAL
, GEN_INT (c
));
10131 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10132 Output insns to set DEST equal to the constant C as a series of
10133 lis, ori and shl instructions. */
10136 rs6000_emit_set_long_const (rtx dest
, HOST_WIDE_INT c
)
10139 HOST_WIDE_INT ud1
, ud2
, ud3
, ud4
;
10149 if ((ud4
== 0xffff && ud3
== 0xffff && ud2
== 0xffff && (ud1
& 0x8000))
10150 || (ud4
== 0 && ud3
== 0 && ud2
== 0 && ! (ud1
& 0x8000)))
10151 emit_move_insn (dest
, GEN_INT ((ud1
^ 0x8000) - 0x8000));
10153 else if ((ud4
== 0xffff && ud3
== 0xffff && (ud2
& 0x8000))
10154 || (ud4
== 0 && ud3
== 0 && ! (ud2
& 0x8000)))
10156 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10158 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10159 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
10161 emit_move_insn (dest
,
10162 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10165 else if (ud3
== 0 && ud4
== 0)
10167 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10169 gcc_assert (ud2
& 0x8000);
10170 emit_move_insn (copy_rtx (temp
),
10171 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
10173 emit_move_insn (copy_rtx (temp
),
10174 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10176 emit_move_insn (dest
,
10177 gen_rtx_ZERO_EXTEND (DImode
,
10178 gen_lowpart (SImode
,
10179 copy_rtx (temp
))));
10181 else if (ud1
== ud3
&& ud2
== ud4
)
10183 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10184 HOST_WIDE_INT num
= (ud2
<< 16) | ud1
;
10185 rs6000_emit_set_long_const (temp
, (num
^ 0x80000000) - 0x80000000);
10186 rtx one
= gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff));
10187 rtx two
= gen_rtx_ASHIFT (DImode
, temp
, GEN_INT (32));
10188 emit_move_insn (dest
, gen_rtx_IOR (DImode
, one
, two
));
10190 else if ((ud4
== 0xffff && (ud3
& 0x8000))
10191 || (ud4
== 0 && ! (ud3
& 0x8000)))
10193 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10195 emit_move_insn (copy_rtx (temp
),
10196 GEN_INT (((ud3
<< 16) ^ 0x80000000) - 0x80000000));
10198 emit_move_insn (copy_rtx (temp
),
10199 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10201 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10202 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
10205 emit_move_insn (dest
,
10206 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10211 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10213 emit_move_insn (copy_rtx (temp
),
10214 GEN_INT (((ud4
<< 16) ^ 0x80000000) - 0x80000000));
10216 emit_move_insn (copy_rtx (temp
),
10217 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10220 emit_move_insn (ud2
!= 0 || ud1
!= 0 ? copy_rtx (temp
) : dest
,
10221 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
10224 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10225 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10226 GEN_INT (ud2
<< 16)));
10228 emit_move_insn (dest
,
10229 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10234 /* Helper for the following. Get rid of [r+r] memory refs
10235 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10238 rs6000_eliminate_indexed_memrefs (rtx operands
[2])
10240 if (MEM_P (operands
[0])
10241 && !REG_P (XEXP (operands
[0], 0))
10242 && ! legitimate_constant_pool_address_p (XEXP (operands
[0], 0),
10243 GET_MODE (operands
[0]), false))
10245 = replace_equiv_address (operands
[0],
10246 copy_addr_to_reg (XEXP (operands
[0], 0)));
10248 if (MEM_P (operands
[1])
10249 && !REG_P (XEXP (operands
[1], 0))
10250 && ! legitimate_constant_pool_address_p (XEXP (operands
[1], 0),
10251 GET_MODE (operands
[1]), false))
10253 = replace_equiv_address (operands
[1],
10254 copy_addr_to_reg (XEXP (operands
[1], 0)));
10257 /* Generate a vector of constants to permute MODE for a little-endian
10258 storage operation by swapping the two halves of a vector. */
10260 rs6000_const_vec (machine_mode mode
)
10288 v
= rtvec_alloc (subparts
);
10290 for (i
= 0; i
< subparts
/ 2; ++i
)
10291 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
+ subparts
/ 2);
10292 for (i
= subparts
/ 2; i
< subparts
; ++i
)
10293 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
- subparts
/ 2);
10298 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10299 store operation. */
10301 rs6000_emit_le_vsx_permute (rtx dest
, rtx source
, machine_mode mode
)
10303 gcc_assert (!altivec_indexed_or_indirect_operand (dest
, mode
));
10304 gcc_assert (!altivec_indexed_or_indirect_operand (source
, mode
));
10306 /* Scalar permutations are easier to express in integer modes rather than
10307 floating-point modes, so cast them here. We use V1TImode instead
10308 of TImode to ensure that the values don't go through GPRs. */
10309 if (FLOAT128_VECTOR_P (mode
))
10311 dest
= gen_lowpart (V1TImode
, dest
);
10312 source
= gen_lowpart (V1TImode
, source
);
10316 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10318 if (mode
== TImode
|| mode
== V1TImode
)
10319 emit_insn (gen_rtx_SET (dest
, gen_rtx_ROTATE (mode
, source
,
10323 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rs6000_const_vec (mode
));
10324 emit_insn (gen_rtx_SET (dest
, gen_rtx_VEC_SELECT (mode
, source
, par
)));
10328 /* Emit a little-endian load from vector memory location SOURCE to VSX
10329 register DEST in mode MODE. The load is done with two permuting
10330 insn's that represent an lxvd2x and xxpermdi. */
10332 rs6000_emit_le_vsx_load (rtx dest
, rtx source
, machine_mode mode
)
10334 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10336 if (mode
== TImode
|| mode
== V1TImode
)
10339 dest
= gen_lowpart (V2DImode
, dest
);
10340 source
= adjust_address (source
, V2DImode
, 0);
10343 rtx tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest
) : dest
;
10344 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10345 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10348 /* Emit a little-endian store to vector memory location DEST from VSX
10349 register SOURCE in mode MODE. The store is done with two permuting
10350 insn's that represent an xxpermdi and an stxvd2x. */
10352 rs6000_emit_le_vsx_store (rtx dest
, rtx source
, machine_mode mode
)
10354 /* This should never be called after LRA. */
10355 gcc_assert (can_create_pseudo_p ());
10357 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10359 if (mode
== TImode
|| mode
== V1TImode
)
10362 dest
= adjust_address (dest
, V2DImode
, 0);
10363 source
= gen_lowpart (V2DImode
, source
);
10366 rtx tmp
= gen_reg_rtx_and_attrs (source
);
10367 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10368 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10371 /* Emit a sequence representing a little-endian VSX load or store,
10372 moving data from SOURCE to DEST in mode MODE. This is done
10373 separately from rs6000_emit_move to ensure it is called only
10374 during expand. LE VSX loads and stores introduced later are
10375 handled with a split. The expand-time RTL generation allows
10376 us to optimize away redundant pairs of register-permutes. */
10378 rs6000_emit_le_vsx_move (rtx dest
, rtx source
, machine_mode mode
)
10380 gcc_assert (!BYTES_BIG_ENDIAN
10381 && VECTOR_MEM_VSX_P (mode
)
10382 && !TARGET_P9_VECTOR
10383 && !gpr_or_gpr_p (dest
, source
)
10384 && (MEM_P (source
) ^ MEM_P (dest
)));
10386 if (MEM_P (source
))
10388 gcc_assert (REG_P (dest
) || SUBREG_P (dest
));
10389 rs6000_emit_le_vsx_load (dest
, source
, mode
);
10393 if (!REG_P (source
))
10394 source
= force_reg (mode
, source
);
10395 rs6000_emit_le_vsx_store (dest
, source
, mode
);
10399 /* Return whether a SFmode or SImode move can be done without converting one
10400 mode to another. This arrises when we have:
10402 (SUBREG:SF (REG:SI ...))
10403 (SUBREG:SI (REG:SF ...))
10405 and one of the values is in a floating point/vector register, where SFmode
10406 scalars are stored in DFmode format. */
10409 valid_sf_si_move (rtx dest
, rtx src
, machine_mode mode
)
10411 if (TARGET_ALLOW_SF_SUBREG
)
10414 if (mode
!= SFmode
&& GET_MODE_CLASS (mode
) != MODE_INT
)
10417 if (!SUBREG_P (src
) || !sf_subreg_operand (src
, mode
))
10420 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10421 if (SUBREG_P (dest
))
10423 rtx dest_subreg
= SUBREG_REG (dest
);
10424 rtx src_subreg
= SUBREG_REG (src
);
10425 return GET_MODE (dest_subreg
) == GET_MODE (src_subreg
);
10432 /* Helper function to change moves with:
10434 (SUBREG:SF (REG:SI)) and
10435 (SUBREG:SI (REG:SF))
10437 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10438 values are stored as DFmode values in the VSX registers. We need to convert
10439 the bits before we can use a direct move or operate on the bits in the
10440 vector register as an integer type.
10442 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10445 rs6000_emit_move_si_sf_subreg (rtx dest
, rtx source
, machine_mode mode
)
10447 if (TARGET_DIRECT_MOVE_64BIT
&& !reload_completed
10448 && (!SUBREG_P (dest
) || !sf_subreg_operand (dest
, mode
))
10449 && SUBREG_P (source
) && sf_subreg_operand (source
, mode
))
10451 rtx inner_source
= SUBREG_REG (source
);
10452 machine_mode inner_mode
= GET_MODE (inner_source
);
10454 if (mode
== SImode
&& inner_mode
== SFmode
)
10456 emit_insn (gen_movsi_from_sf (dest
, inner_source
));
10460 if (mode
== SFmode
&& inner_mode
== SImode
)
10462 emit_insn (gen_movsf_from_si (dest
, inner_source
));
10470 /* Emit a move from SOURCE to DEST in mode MODE. */
10472 rs6000_emit_move (rtx dest
, rtx source
, machine_mode mode
)
10475 operands
[0] = dest
;
10476 operands
[1] = source
;
10478 if (TARGET_DEBUG_ADDR
)
10481 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10482 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10483 GET_MODE_NAME (mode
),
10486 can_create_pseudo_p ());
10488 fprintf (stderr
, "source:\n");
10489 debug_rtx (source
);
10492 /* Check that we get CONST_WIDE_INT only when we should. */
10493 if (CONST_WIDE_INT_P (operands
[1])
10494 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
10495 gcc_unreachable ();
10497 #ifdef HAVE_AS_GNU_ATTRIBUTE
10498 /* If we use a long double type, set the flags in .gnu_attribute that say
10499 what the long double type is. This is to allow the linker's warning
10500 message for the wrong long double to be useful, even if the function does
10501 not do a call (for example, doing a 128-bit add on power9 if the long
10502 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10503 used if they aren't the default long dobule type. */
10504 if (rs6000_gnu_attr
&& (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
))
10506 if (TARGET_LONG_DOUBLE_128
&& (mode
== TFmode
|| mode
== TCmode
))
10507 rs6000_passes_float
= rs6000_passes_long_double
= true;
10509 else if (!TARGET_LONG_DOUBLE_128
&& (mode
== DFmode
|| mode
== DCmode
))
10510 rs6000_passes_float
= rs6000_passes_long_double
= true;
10514 /* See if we need to special case SImode/SFmode SUBREG moves. */
10515 if ((mode
== SImode
|| mode
== SFmode
) && SUBREG_P (source
)
10516 && rs6000_emit_move_si_sf_subreg (dest
, source
, mode
))
10519 /* Check if GCC is setting up a block move that will end up using FP
10520 registers as temporaries. We must make sure this is acceptable. */
10521 if (MEM_P (operands
[0])
10522 && MEM_P (operands
[1])
10524 && (rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[0]))
10525 || rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[1])))
10526 && ! (rs6000_slow_unaligned_access (SImode
,
10527 (MEM_ALIGN (operands
[0]) > 32
10528 ? 32 : MEM_ALIGN (operands
[0])))
10529 || rs6000_slow_unaligned_access (SImode
,
10530 (MEM_ALIGN (operands
[1]) > 32
10531 ? 32 : MEM_ALIGN (operands
[1]))))
10532 && ! MEM_VOLATILE_P (operands
[0])
10533 && ! MEM_VOLATILE_P (operands
[1]))
10535 emit_move_insn (adjust_address (operands
[0], SImode
, 0),
10536 adjust_address (operands
[1], SImode
, 0));
10537 emit_move_insn (adjust_address (copy_rtx (operands
[0]), SImode
, 4),
10538 adjust_address (copy_rtx (operands
[1]), SImode
, 4));
10542 if (can_create_pseudo_p () && MEM_P (operands
[0])
10543 && !gpc_reg_operand (operands
[1], mode
))
10544 operands
[1] = force_reg (mode
, operands
[1]);
10546 /* Recognize the case where operand[1] is a reference to thread-local
10547 data and load its address to a register. */
10548 if (tls_referenced_p (operands
[1]))
10550 enum tls_model model
;
10551 rtx tmp
= operands
[1];
10554 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
10556 addend
= XEXP (XEXP (tmp
, 0), 1);
10557 tmp
= XEXP (XEXP (tmp
, 0), 0);
10560 gcc_assert (SYMBOL_REF_P (tmp
));
10561 model
= SYMBOL_REF_TLS_MODEL (tmp
);
10562 gcc_assert (model
!= 0);
10564 tmp
= rs6000_legitimize_tls_address (tmp
, model
);
10567 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
10568 tmp
= force_operand (tmp
, operands
[0]);
10573 /* 128-bit constant floating-point values on Darwin should really be loaded
10574 as two parts. However, this premature splitting is a problem when DFmode
10575 values can go into Altivec registers. */
10576 if (TARGET_MACHO
&& CONST_DOUBLE_P (operands
[1]) && FLOAT128_IBM_P (mode
)
10577 && !reg_addr
[DFmode
].scalar_in_vmx_p
)
10579 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
, 0),
10580 simplify_gen_subreg (DFmode
, operands
[1], mode
, 0),
10582 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
,
10583 GET_MODE_SIZE (DFmode
)),
10584 simplify_gen_subreg (DFmode
, operands
[1], mode
,
10585 GET_MODE_SIZE (DFmode
)),
10590 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10591 p1:SD) if p1 is not of floating point class and p0 is spilled as
10592 we can have no analogous movsd_store for this. */
10593 if (lra_in_progress
&& mode
== DDmode
10594 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
10595 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10596 && SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1]))
10597 && GET_MODE (SUBREG_REG (operands
[1])) == SDmode
)
10600 int regno
= REGNO (SUBREG_REG (operands
[1]));
10602 if (!HARD_REGISTER_NUM_P (regno
))
10604 cl
= reg_preferred_class (regno
);
10605 regno
= reg_renumber
[regno
];
10607 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][1];
10609 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10612 operands
[0] = gen_lowpart_SUBREG (SDmode
, operands
[0]);
10613 operands
[1] = SUBREG_REG (operands
[1]);
10616 if (lra_in_progress
10618 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
10619 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10620 && (REG_P (operands
[1])
10621 || (SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1])))))
10623 int regno
= reg_or_subregno (operands
[1]);
10626 if (!HARD_REGISTER_NUM_P (regno
))
10628 cl
= reg_preferred_class (regno
);
10629 gcc_assert (cl
!= NO_REGS
);
10630 regno
= reg_renumber
[regno
];
10632 regno
= ira_class_hard_regs
[cl
][0];
10634 if (FP_REGNO_P (regno
))
10636 if (GET_MODE (operands
[0]) != DDmode
)
10637 operands
[0] = gen_rtx_SUBREG (DDmode
, operands
[0], 0);
10638 emit_insn (gen_movsd_store (operands
[0], operands
[1]));
10640 else if (INT_REGNO_P (regno
))
10641 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10646 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10647 p:DD)) if p0 is not of floating point class and p1 is spilled as
10648 we can have no analogous movsd_load for this. */
10649 if (lra_in_progress
&& mode
== DDmode
10650 && SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))
10651 && GET_MODE (SUBREG_REG (operands
[0])) == SDmode
10652 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
10653 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10656 int regno
= REGNO (SUBREG_REG (operands
[0]));
10658 if (!HARD_REGISTER_NUM_P (regno
))
10660 cl
= reg_preferred_class (regno
);
10661 regno
= reg_renumber
[regno
];
10663 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][0];
10665 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10668 operands
[0] = SUBREG_REG (operands
[0]);
10669 operands
[1] = gen_lowpart_SUBREG (SDmode
, operands
[1]);
10672 if (lra_in_progress
10674 && (REG_P (operands
[0])
10675 || (SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))))
10676 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
10677 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10679 int regno
= reg_or_subregno (operands
[0]);
10682 if (!HARD_REGISTER_NUM_P (regno
))
10684 cl
= reg_preferred_class (regno
);
10685 gcc_assert (cl
!= NO_REGS
);
10686 regno
= reg_renumber
[regno
];
10688 regno
= ira_class_hard_regs
[cl
][0];
10690 if (FP_REGNO_P (regno
))
10692 if (GET_MODE (operands
[1]) != DDmode
)
10693 operands
[1] = gen_rtx_SUBREG (DDmode
, operands
[1], 0);
10694 emit_insn (gen_movsd_load (operands
[0], operands
[1]));
10696 else if (INT_REGNO_P (regno
))
10697 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10703 /* FIXME: In the long term, this switch statement should go away
10704 and be replaced by a sequence of tests based on things like
10710 if (CONSTANT_P (operands
[1])
10711 && !CONST_INT_P (operands
[1]))
10712 operands
[1] = force_const_mem (mode
, operands
[1]);
10719 if (FLOAT128_2REG_P (mode
))
10720 rs6000_eliminate_indexed_memrefs (operands
);
10727 if (CONSTANT_P (operands
[1])
10728 && ! easy_fp_constant (operands
[1], mode
))
10729 operands
[1] = force_const_mem (mode
, operands
[1]);
10739 if (CONSTANT_P (operands
[1])
10740 && !easy_vector_constant (operands
[1], mode
))
10741 operands
[1] = force_const_mem (mode
, operands
[1]);
10746 if (CONST_INT_P (operands
[1]) && INTVAL (operands
[1]) != 0)
10747 error ("%qs is an opaque type, and you cannot set it to other values",
10748 (mode
== OOmode
) ? "__vector_pair" : "__vector_quad");
10753 /* Use default pattern for address of ELF small data */
10756 && DEFAULT_ABI
== ABI_V4
10757 && (SYMBOL_REF_P (operands
[1])
10758 || GET_CODE (operands
[1]) == CONST
)
10759 && small_data_operand (operands
[1], mode
))
10761 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10765 /* Use the default pattern for loading up PC-relative addresses. */
10766 if (TARGET_PCREL
&& mode
== Pmode
10767 && pcrel_local_or_external_address (operands
[1], Pmode
))
10769 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10773 if (DEFAULT_ABI
== ABI_V4
10774 && mode
== Pmode
&& mode
== SImode
10775 && flag_pic
== 1 && got_operand (operands
[1], mode
))
10777 emit_insn (gen_movsi_got (operands
[0], operands
[1]));
10781 if ((TARGET_ELF
|| DEFAULT_ABI
== ABI_DARWIN
)
10782 && TARGET_NO_TOC_OR_PCREL
10785 && CONSTANT_P (operands
[1])
10786 && GET_CODE (operands
[1]) != HIGH
10787 && !CONST_INT_P (operands
[1]))
10789 rtx target
= (!can_create_pseudo_p ()
10791 : gen_reg_rtx (mode
));
10793 /* If this is a function address on -mcall-aixdesc,
10794 convert it to the address of the descriptor. */
10795 if (DEFAULT_ABI
== ABI_AIX
10796 && SYMBOL_REF_P (operands
[1])
10797 && XSTR (operands
[1], 0)[0] == '.')
10799 const char *name
= XSTR (operands
[1], 0);
10801 while (*name
== '.')
10803 new_ref
= gen_rtx_SYMBOL_REF (Pmode
, name
);
10804 CONSTANT_POOL_ADDRESS_P (new_ref
)
10805 = CONSTANT_POOL_ADDRESS_P (operands
[1]);
10806 SYMBOL_REF_FLAGS (new_ref
) = SYMBOL_REF_FLAGS (operands
[1]);
10807 SYMBOL_REF_USED (new_ref
) = SYMBOL_REF_USED (operands
[1]);
10808 SYMBOL_REF_DATA (new_ref
) = SYMBOL_REF_DATA (operands
[1]);
10809 operands
[1] = new_ref
;
10812 if (DEFAULT_ABI
== ABI_DARWIN
)
10815 /* This is not PIC code, but could require the subset of
10816 indirections used by mdynamic-no-pic. */
10817 if (MACHO_DYNAMIC_NO_PIC_P
)
10819 /* Take care of any required data indirection. */
10820 operands
[1] = rs6000_machopic_legitimize_pic_address (
10821 operands
[1], mode
, operands
[0]);
10822 if (operands
[0] != operands
[1])
10823 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10827 emit_insn (gen_macho_high (Pmode
, target
, operands
[1]));
10828 emit_insn (gen_macho_low (Pmode
, operands
[0],
10829 target
, operands
[1]));
10833 emit_insn (gen_elf_high (target
, operands
[1]));
10834 emit_insn (gen_elf_low (operands
[0], target
, operands
[1]));
10838 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10839 and we have put it in the TOC, we just need to make a TOC-relative
10840 reference to it. */
10842 && SYMBOL_REF_P (operands
[1])
10843 && use_toc_relative_ref (operands
[1], mode
))
10844 operands
[1] = create_TOC_reference (operands
[1], operands
[0]);
10845 else if (mode
== Pmode
10846 && CONSTANT_P (operands
[1])
10847 && GET_CODE (operands
[1]) != HIGH
10848 && ((REG_P (operands
[0])
10849 && FP_REGNO_P (REGNO (operands
[0])))
10850 || !CONST_INT_P (operands
[1])
10851 || (num_insns_constant (operands
[1], mode
)
10852 > (TARGET_CMODEL
!= CMODEL_SMALL
? 3 : 2)))
10853 && !toc_relative_expr_p (operands
[1], false, NULL
, NULL
)
10854 && (TARGET_CMODEL
== CMODEL_SMALL
10855 || can_create_pseudo_p ()
10856 || (REG_P (operands
[0])
10857 && INT_REG_OK_FOR_BASE_P (operands
[0], true))))
10861 /* Darwin uses a special PIC legitimizer. */
10862 if (DEFAULT_ABI
== ABI_DARWIN
&& MACHOPIC_INDIRECT
)
10865 rs6000_machopic_legitimize_pic_address (operands
[1], mode
,
10867 if (operands
[0] != operands
[1])
10868 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10873 /* If we are to limit the number of things we put in the TOC and
10874 this is a symbol plus a constant we can add in one insn,
10875 just put the symbol in the TOC and add the constant. */
10876 if (GET_CODE (operands
[1]) == CONST
10877 && TARGET_NO_SUM_IN_TOC
10878 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
10879 && add_operand (XEXP (XEXP (operands
[1], 0), 1), mode
)
10880 && (GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == LABEL_REF
10881 || SYMBOL_REF_P (XEXP (XEXP (operands
[1], 0), 0)))
10882 && ! side_effects_p (operands
[0]))
10885 force_const_mem (mode
, XEXP (XEXP (operands
[1], 0), 0));
10886 rtx other
= XEXP (XEXP (operands
[1], 0), 1);
10888 sym
= force_reg (mode
, sym
);
10889 emit_insn (gen_add3_insn (operands
[0], sym
, other
));
10893 operands
[1] = force_const_mem (mode
, operands
[1]);
10896 && SYMBOL_REF_P (XEXP (operands
[1], 0))
10897 && use_toc_relative_ref (XEXP (operands
[1], 0), mode
))
10899 rtx tocref
= create_TOC_reference (XEXP (operands
[1], 0),
10901 operands
[1] = gen_const_mem (mode
, tocref
);
10902 set_mem_alias_set (operands
[1], get_TOC_alias_set ());
10908 if (!VECTOR_MEM_VSX_P (TImode
))
10909 rs6000_eliminate_indexed_memrefs (operands
);
10913 rs6000_eliminate_indexed_memrefs (operands
);
10917 fatal_insn ("bad move", gen_rtx_SET (dest
, source
));
10920 /* Above, we may have called force_const_mem which may have returned
10921 an invalid address. If we can, fix this up; otherwise, reload will
10922 have to deal with it. */
10923 if (MEM_P (operands
[1]))
10924 operands
[1] = validize_mem (operands
[1]);
10926 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10930 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10932 init_float128_ibm (machine_mode mode
)
10934 if (!TARGET_XL_COMPAT
)
10936 set_optab_libfunc (add_optab
, mode
, "__gcc_qadd");
10937 set_optab_libfunc (sub_optab
, mode
, "__gcc_qsub");
10938 set_optab_libfunc (smul_optab
, mode
, "__gcc_qmul");
10939 set_optab_libfunc (sdiv_optab
, mode
, "__gcc_qdiv");
10941 if (!TARGET_HARD_FLOAT
)
10943 set_optab_libfunc (neg_optab
, mode
, "__gcc_qneg");
10944 set_optab_libfunc (eq_optab
, mode
, "__gcc_qeq");
10945 set_optab_libfunc (ne_optab
, mode
, "__gcc_qne");
10946 set_optab_libfunc (gt_optab
, mode
, "__gcc_qgt");
10947 set_optab_libfunc (ge_optab
, mode
, "__gcc_qge");
10948 set_optab_libfunc (lt_optab
, mode
, "__gcc_qlt");
10949 set_optab_libfunc (le_optab
, mode
, "__gcc_qle");
10950 set_optab_libfunc (unord_optab
, mode
, "__gcc_qunord");
10952 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__gcc_stoq");
10953 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__gcc_dtoq");
10954 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__gcc_qtos");
10955 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__gcc_qtod");
10956 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__gcc_qtoi");
10957 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__gcc_qtou");
10958 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__gcc_itoq");
10959 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__gcc_utoq");
10964 set_optab_libfunc (add_optab
, mode
, "_xlqadd");
10965 set_optab_libfunc (sub_optab
, mode
, "_xlqsub");
10966 set_optab_libfunc (smul_optab
, mode
, "_xlqmul");
10967 set_optab_libfunc (sdiv_optab
, mode
, "_xlqdiv");
10970 /* Add various conversions for IFmode to use the traditional TFmode
10972 if (mode
== IFmode
)
10974 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdtf");
10975 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddtf");
10976 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdtf");
10977 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunctfsd");
10978 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunctfdd");
10979 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtftd");
10981 if (TARGET_POWERPC64
)
10983 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixtfti");
10984 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunstfti");
10985 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattitf");
10986 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntitf");
10991 /* Create a decl for either complex long double multiply or complex long double
10992 divide when long double is IEEE 128-bit floating point. We can't use
10993 __multc3 and __divtc3 because the original long double using IBM extended
10994 double used those names. The complex multiply/divide functions are encoded
10995 as builtin functions with a complex result and 4 scalar inputs. */
10998 create_complex_muldiv (const char *name
, built_in_function fncode
, tree fntype
)
11000 tree fndecl
= add_builtin_function (name
, fntype
, fncode
, BUILT_IN_NORMAL
,
11003 set_builtin_decl (fncode
, fndecl
, true);
11005 if (TARGET_DEBUG_BUILTIN
)
11006 fprintf (stderr
, "create complex %s, fncode: %d\n", name
, (int) fncode
);
11011 /* Set up IEEE 128-bit floating point routines. Use different names if the
11012 arguments can be passed in a vector register. The historical PowerPC
11013 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11014 continue to use that if we aren't using vector registers to pass IEEE
11015 128-bit floating point. */
11018 init_float128_ieee (machine_mode mode
)
11020 if (FLOAT128_VECTOR_P (mode
))
11022 static bool complex_muldiv_init_p
= false;
11024 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
11025 we have clone or target attributes, this will be called a second
11026 time. We want to create the built-in function only once. */
11027 if (mode
== TFmode
&& TARGET_IEEEQUAD
&& !complex_muldiv_init_p
)
11029 complex_muldiv_init_p
= true;
11030 built_in_function fncode_mul
=
11031 (built_in_function
) (BUILT_IN_COMPLEX_MUL_MIN
+ TCmode
11032 - MIN_MODE_COMPLEX_FLOAT
);
11033 built_in_function fncode_div
=
11034 (built_in_function
) (BUILT_IN_COMPLEX_DIV_MIN
+ TCmode
11035 - MIN_MODE_COMPLEX_FLOAT
);
11037 tree fntype
= build_function_type_list (complex_long_double_type_node
,
11038 long_double_type_node
,
11039 long_double_type_node
,
11040 long_double_type_node
,
11041 long_double_type_node
,
11044 create_complex_muldiv ("__mulkc3", fncode_mul
, fntype
);
11045 create_complex_muldiv ("__divkc3", fncode_div
, fntype
);
11048 set_optab_libfunc (add_optab
, mode
, "__addkf3");
11049 set_optab_libfunc (sub_optab
, mode
, "__subkf3");
11050 set_optab_libfunc (neg_optab
, mode
, "__negkf2");
11051 set_optab_libfunc (smul_optab
, mode
, "__mulkf3");
11052 set_optab_libfunc (sdiv_optab
, mode
, "__divkf3");
11053 set_optab_libfunc (sqrt_optab
, mode
, "__sqrtkf2");
11054 set_optab_libfunc (abs_optab
, mode
, "__abskf2");
11055 set_optab_libfunc (powi_optab
, mode
, "__powikf2");
11057 set_optab_libfunc (eq_optab
, mode
, "__eqkf2");
11058 set_optab_libfunc (ne_optab
, mode
, "__nekf2");
11059 set_optab_libfunc (gt_optab
, mode
, "__gtkf2");
11060 set_optab_libfunc (ge_optab
, mode
, "__gekf2");
11061 set_optab_libfunc (lt_optab
, mode
, "__ltkf2");
11062 set_optab_libfunc (le_optab
, mode
, "__lekf2");
11063 set_optab_libfunc (unord_optab
, mode
, "__unordkf2");
11065 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__extendsfkf2");
11066 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__extenddfkf2");
11067 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__trunckfsf2");
11068 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__trunckfdf2");
11070 set_conv_libfunc (sext_optab
, mode
, IFmode
, "__trunctfkf2");
11071 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11072 set_conv_libfunc (sext_optab
, mode
, TFmode
, "__trunctfkf2");
11074 set_conv_libfunc (trunc_optab
, IFmode
, mode
, "__extendkftf2");
11075 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11076 set_conv_libfunc (trunc_optab
, TFmode
, mode
, "__extendkftf2");
11078 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdkf");
11079 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddkf");
11080 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdkf");
11081 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunckfsd");
11082 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunckfdd");
11083 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendkftd");
11085 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__fixkfsi");
11086 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__fixunskfsi");
11087 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixkfdi");
11088 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunskfdi");
11090 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__floatsikf");
11091 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__floatunsikf");
11092 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatdikf");
11093 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatundikf");
11095 if (TARGET_POWERPC64
)
11097 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixkfti_sw");
11098 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunskfti_sw");
11099 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattikf_sw");
11100 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntikf_sw");
11106 set_optab_libfunc (add_optab
, mode
, "_q_add");
11107 set_optab_libfunc (sub_optab
, mode
, "_q_sub");
11108 set_optab_libfunc (neg_optab
, mode
, "_q_neg");
11109 set_optab_libfunc (smul_optab
, mode
, "_q_mul");
11110 set_optab_libfunc (sdiv_optab
, mode
, "_q_div");
11111 if (TARGET_PPC_GPOPT
)
11112 set_optab_libfunc (sqrt_optab
, mode
, "_q_sqrt");
11114 set_optab_libfunc (eq_optab
, mode
, "_q_feq");
11115 set_optab_libfunc (ne_optab
, mode
, "_q_fne");
11116 set_optab_libfunc (gt_optab
, mode
, "_q_fgt");
11117 set_optab_libfunc (ge_optab
, mode
, "_q_fge");
11118 set_optab_libfunc (lt_optab
, mode
, "_q_flt");
11119 set_optab_libfunc (le_optab
, mode
, "_q_fle");
11121 set_conv_libfunc (sext_optab
, mode
, SFmode
, "_q_stoq");
11122 set_conv_libfunc (sext_optab
, mode
, DFmode
, "_q_dtoq");
11123 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "_q_qtos");
11124 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "_q_qtod");
11125 set_conv_libfunc (sfix_optab
, SImode
, mode
, "_q_qtoi");
11126 set_conv_libfunc (ufix_optab
, SImode
, mode
, "_q_qtou");
11127 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "_q_itoq");
11128 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "_q_utoq");
11133 rs6000_init_libfuncs (void)
11135 /* __float128 support. */
11136 if (TARGET_FLOAT128_TYPE
)
11138 init_float128_ibm (IFmode
);
11139 init_float128_ieee (KFmode
);
11142 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11143 if (TARGET_LONG_DOUBLE_128
)
11145 if (!TARGET_IEEEQUAD
)
11146 init_float128_ibm (TFmode
);
11148 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11150 init_float128_ieee (TFmode
);
11154 /* Emit a potentially record-form instruction, setting DST from SRC.
11155 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11156 signed comparison of DST with zero. If DOT is 1, the generated RTL
11157 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11158 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11159 a separate COMPARE. */
11162 rs6000_emit_dot_insn (rtx dst
, rtx src
, int dot
, rtx ccreg
)
11166 emit_move_insn (dst
, src
);
11170 if (cc_reg_not_cr0_operand (ccreg
, CCmode
))
11172 emit_move_insn (dst
, src
);
11173 emit_move_insn (ccreg
, gen_rtx_COMPARE (CCmode
, dst
, const0_rtx
));
11177 rtx ccset
= gen_rtx_SET (ccreg
, gen_rtx_COMPARE (CCmode
, src
, const0_rtx
));
11180 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, dst
);
11181 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, clobber
)));
11185 rtx set
= gen_rtx_SET (dst
, src
);
11186 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, set
)));
11191 /* A validation routine: say whether CODE, a condition code, and MODE
11192 match. The other alternatives either don't make sense or should
11193 never be generated. */
11196 validate_condition_mode (enum rtx_code code
, machine_mode mode
)
11198 gcc_assert ((GET_RTX_CLASS (code
) == RTX_COMPARE
11199 || GET_RTX_CLASS (code
) == RTX_COMM_COMPARE
)
11200 && GET_MODE_CLASS (mode
) == MODE_CC
);
11202 /* These don't make sense. */
11203 gcc_assert ((code
!= GT
&& code
!= LT
&& code
!= GE
&& code
!= LE
)
11204 || mode
!= CCUNSmode
);
11206 gcc_assert ((code
!= GTU
&& code
!= LTU
&& code
!= GEU
&& code
!= LEU
)
11207 || mode
== CCUNSmode
);
11209 gcc_assert (mode
== CCFPmode
11210 || (code
!= ORDERED
&& code
!= UNORDERED
11211 && code
!= UNEQ
&& code
!= LTGT
11212 && code
!= UNGT
&& code
!= UNLT
11213 && code
!= UNGE
&& code
!= UNLE
));
11215 /* These are invalid; the information is not there. */
11216 gcc_assert (mode
!= CCEQmode
|| code
== EQ
|| code
== NE
);
11220 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11221 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11222 not zero, store there the bit offset (counted from the right) where
11223 the single stretch of 1 bits begins; and similarly for B, the bit
11224 offset where it ends. */
11227 rs6000_is_valid_mask (rtx mask
, int *b
, int *e
, machine_mode mode
)
11229 unsigned HOST_WIDE_INT val
= INTVAL (mask
);
11230 unsigned HOST_WIDE_INT bit
;
11232 int n
= GET_MODE_PRECISION (mode
);
11234 if (mode
!= DImode
&& mode
!= SImode
)
11237 if (INTVAL (mask
) >= 0)
11240 ne
= exact_log2 (bit
);
11241 nb
= exact_log2 (val
+ bit
);
11243 else if (val
+ 1 == 0)
11252 nb
= exact_log2 (bit
);
11253 ne
= exact_log2 (val
+ bit
);
11258 ne
= exact_log2 (bit
);
11259 if (val
+ bit
== 0)
11267 if (nb
< 0 || ne
< 0 || nb
>= n
|| ne
>= n
)
11279 rs6000_is_valid_rotate_dot_mask (rtx mask
, machine_mode mode
)
11282 return rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
) && nb
>= ne
&& ne
> 0;
11285 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11286 or rldicr instruction, to implement an AND with it in mode MODE. */
11289 rs6000_is_valid_and_mask (rtx mask
, machine_mode mode
)
11293 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11296 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11298 if (mode
== DImode
)
11299 return (ne
== 0 || nb
== 63 || (nb
< 32 && ne
<= nb
));
11301 /* For SImode, rlwinm can do everything. */
11302 if (mode
== SImode
)
11303 return (nb
< 32 && ne
< 32);
11308 /* Return the instruction template for an AND with mask in mode MODE, with
11309 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11312 rs6000_insn_for_and_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11316 if (!rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
))
11317 gcc_unreachable ();
11319 if (mode
== DImode
&& ne
== 0)
11321 operands
[3] = GEN_INT (63 - nb
);
11323 return "rldicl. %0,%1,0,%3";
11324 return "rldicl %0,%1,0,%3";
11327 if (mode
== DImode
&& nb
== 63)
11329 operands
[3] = GEN_INT (63 - ne
);
11331 return "rldicr. %0,%1,0,%3";
11332 return "rldicr %0,%1,0,%3";
11335 if (nb
< 32 && ne
< 32)
11337 operands
[3] = GEN_INT (31 - nb
);
11338 operands
[4] = GEN_INT (31 - ne
);
11340 return "rlwinm. %0,%1,0,%3,%4";
11341 return "rlwinm %0,%1,0,%3,%4";
11344 gcc_unreachable ();
11347 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11348 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11349 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11352 rs6000_is_valid_shift_mask (rtx mask
, rtx shift
, machine_mode mode
)
11356 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11359 int n
= GET_MODE_PRECISION (mode
);
11362 if (CONST_INT_P (XEXP (shift
, 1)))
11364 sh
= INTVAL (XEXP (shift
, 1));
11365 if (sh
< 0 || sh
>= n
)
11369 rtx_code code
= GET_CODE (shift
);
11371 /* Convert any shift by 0 to a rotate, to simplify below code. */
11375 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11376 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11378 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11384 /* DImode rotates need rld*. */
11385 if (mode
== DImode
&& code
== ROTATE
)
11386 return (nb
== 63 || ne
== 0 || ne
== sh
);
11388 /* SImode rotates need rlw*. */
11389 if (mode
== SImode
&& code
== ROTATE
)
11390 return (nb
< 32 && ne
< 32 && sh
< 32);
11392 /* Wrap-around masks are only okay for rotates. */
11396 /* Variable shifts are only okay for rotates. */
11400 /* Don't allow ASHIFT if the mask is wrong for that. */
11401 if (code
== ASHIFT
&& ne
< sh
)
11404 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11405 if the mask is wrong for that. */
11406 if (nb
< 32 && ne
< 32 && sh
< 32
11407 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11410 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11411 if the mask is wrong for that. */
11412 if (code
== LSHIFTRT
)
11414 if (nb
== 63 || ne
== 0 || ne
== sh
)
11415 return !(code
== LSHIFTRT
&& nb
>= sh
);
11420 /* Return the instruction template for a shift with mask in mode MODE, with
11421 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11424 rs6000_insn_for_shift_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11428 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11429 gcc_unreachable ();
11431 if (mode
== DImode
&& ne
== 0)
11433 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11434 operands
[2] = GEN_INT (64 - INTVAL (operands
[2]));
11435 operands
[3] = GEN_INT (63 - nb
);
11437 return "rld%I2cl. %0,%1,%2,%3";
11438 return "rld%I2cl %0,%1,%2,%3";
11441 if (mode
== DImode
&& nb
== 63)
11443 operands
[3] = GEN_INT (63 - ne
);
11445 return "rld%I2cr. %0,%1,%2,%3";
11446 return "rld%I2cr %0,%1,%2,%3";
11450 && GET_CODE (operands
[4]) != LSHIFTRT
11451 && CONST_INT_P (operands
[2])
11452 && ne
== INTVAL (operands
[2]))
11454 operands
[3] = GEN_INT (63 - nb
);
11456 return "rld%I2c. %0,%1,%2,%3";
11457 return "rld%I2c %0,%1,%2,%3";
11460 if (nb
< 32 && ne
< 32)
11462 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11463 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11464 operands
[3] = GEN_INT (31 - nb
);
11465 operands
[4] = GEN_INT (31 - ne
);
11466 /* This insn can also be a 64-bit rotate with mask that really makes
11467 it just a shift right (with mask); the %h below are to adjust for
11468 that situation (shift count is >= 32 in that case). */
11470 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11471 return "rlw%I2nm %0,%1,%h2,%3,%4";
11474 gcc_unreachable ();
11477 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11478 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11479 ASHIFT, or LSHIFTRT) in mode MODE. */
11482 rs6000_is_valid_insert_mask (rtx mask
, rtx shift
, machine_mode mode
)
11486 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11489 int n
= GET_MODE_PRECISION (mode
);
11491 int sh
= INTVAL (XEXP (shift
, 1));
11492 if (sh
< 0 || sh
>= n
)
11495 rtx_code code
= GET_CODE (shift
);
11497 /* Convert any shift by 0 to a rotate, to simplify below code. */
11501 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11502 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11504 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11510 /* DImode rotates need rldimi. */
11511 if (mode
== DImode
&& code
== ROTATE
)
11514 /* SImode rotates need rlwimi. */
11515 if (mode
== SImode
&& code
== ROTATE
)
11516 return (nb
< 32 && ne
< 32 && sh
< 32);
11518 /* Wrap-around masks are only okay for rotates. */
11522 /* Don't allow ASHIFT if the mask is wrong for that. */
11523 if (code
== ASHIFT
&& ne
< sh
)
11526 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11527 if the mask is wrong for that. */
11528 if (nb
< 32 && ne
< 32 && sh
< 32
11529 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11532 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11533 if the mask is wrong for that. */
11534 if (code
== LSHIFTRT
)
11537 return !(code
== LSHIFTRT
&& nb
>= sh
);
11542 /* Return the instruction template for an insert with mask in mode MODE, with
11543 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11546 rs6000_insn_for_insert_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11550 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11551 gcc_unreachable ();
11553 /* Prefer rldimi because rlwimi is cracked. */
11554 if (TARGET_POWERPC64
11555 && (!dot
|| mode
== DImode
)
11556 && GET_CODE (operands
[4]) != LSHIFTRT
11557 && ne
== INTVAL (operands
[2]))
11559 operands
[3] = GEN_INT (63 - nb
);
11561 return "rldimi. %0,%1,%2,%3";
11562 return "rldimi %0,%1,%2,%3";
11565 if (nb
< 32 && ne
< 32)
11567 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11568 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11569 operands
[3] = GEN_INT (31 - nb
);
11570 operands
[4] = GEN_INT (31 - ne
);
11572 return "rlwimi. %0,%1,%2,%3,%4";
11573 return "rlwimi %0,%1,%2,%3,%4";
11576 gcc_unreachable ();
11579 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11580 using two machine instructions. */
11583 rs6000_is_valid_2insn_and (rtx c
, machine_mode mode
)
11585 /* There are two kinds of AND we can handle with two insns:
11586 1) those we can do with two rl* insn;
11589 We do not handle that last case yet. */
11591 /* If there is just one stretch of ones, we can do it. */
11592 if (rs6000_is_valid_mask (c
, NULL
, NULL
, mode
))
11595 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11596 one insn, we can do the whole thing with two. */
11597 unsigned HOST_WIDE_INT val
= INTVAL (c
);
11598 unsigned HOST_WIDE_INT bit1
= val
& -val
;
11599 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
11600 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
11601 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
11602 return rs6000_is_valid_and_mask (GEN_INT (val
+ bit3
- bit2
), mode
);
11605 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11606 If EXPAND is true, split rotate-and-mask instructions we generate to
11607 their constituent parts as well (this is used during expand); if DOT
11608 is 1, make the last insn a record-form instruction clobbering the
11609 destination GPR and setting the CC reg (from operands[3]); if 2, set
11610 that GPR as well as the CC reg. */
11613 rs6000_emit_2insn_and (machine_mode mode
, rtx
*operands
, bool expand
, int dot
)
11615 gcc_assert (!(expand
&& dot
));
11617 unsigned HOST_WIDE_INT val
= INTVAL (operands
[2]);
11619 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11620 shift right. This generates better code than doing the masks without
11621 shifts, or shifting first right and then left. */
11623 if (rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
) && nb
>= ne
)
11625 gcc_assert (mode
== DImode
);
11627 int shift
= 63 - nb
;
11630 rtx tmp1
= gen_reg_rtx (DImode
);
11631 rtx tmp2
= gen_reg_rtx (DImode
);
11632 emit_insn (gen_ashldi3 (tmp1
, operands
[1], GEN_INT (shift
)));
11633 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (val
<< shift
)));
11634 emit_insn (gen_lshrdi3 (operands
[0], tmp2
, GEN_INT (shift
)));
11638 rtx tmp
= gen_rtx_ASHIFT (mode
, operands
[1], GEN_INT (shift
));
11639 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (val
<< shift
));
11640 emit_move_insn (operands
[0], tmp
);
11641 tmp
= gen_rtx_LSHIFTRT (mode
, operands
[0], GEN_INT (shift
));
11642 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11647 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11648 that does the rest. */
11649 unsigned HOST_WIDE_INT bit1
= val
& -val
;
11650 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
11651 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
11652 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
11654 unsigned HOST_WIDE_INT mask1
= -bit3
+ bit2
- 1;
11655 unsigned HOST_WIDE_INT mask2
= val
+ bit3
- bit2
;
11657 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2
), mode
));
11659 /* Two "no-rotate"-and-mask instructions, for SImode. */
11660 if (rs6000_is_valid_and_mask (GEN_INT (mask1
), mode
))
11662 gcc_assert (mode
== SImode
);
11664 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
11665 rtx tmp
= gen_rtx_AND (mode
, operands
[1], GEN_INT (mask1
));
11666 emit_move_insn (reg
, tmp
);
11667 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
11668 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11672 gcc_assert (mode
== DImode
);
11674 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11675 insns; we have to do the first in SImode, because it wraps. */
11676 if (mask2
<= 0xffffffff
11677 && rs6000_is_valid_and_mask (GEN_INT (mask1
), SImode
))
11679 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
11680 rtx tmp
= gen_rtx_AND (SImode
, gen_lowpart (SImode
, operands
[1]),
11682 rtx reg_low
= gen_lowpart (SImode
, reg
);
11683 emit_move_insn (reg_low
, tmp
);
11684 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
11685 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11689 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11690 at the top end), rotate back and clear the other hole. */
11691 int right
= exact_log2 (bit3
);
11692 int left
= 64 - right
;
11694 /* Rotate the mask too. */
11695 mask1
= (mask1
>> right
) | ((bit2
- 1) << left
);
11699 rtx tmp1
= gen_reg_rtx (DImode
);
11700 rtx tmp2
= gen_reg_rtx (DImode
);
11701 rtx tmp3
= gen_reg_rtx (DImode
);
11702 emit_insn (gen_rotldi3 (tmp1
, operands
[1], GEN_INT (left
)));
11703 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (mask1
)));
11704 emit_insn (gen_rotldi3 (tmp3
, tmp2
, GEN_INT (right
)));
11705 emit_insn (gen_anddi3 (operands
[0], tmp3
, GEN_INT (mask2
)));
11709 rtx tmp
= gen_rtx_ROTATE (mode
, operands
[1], GEN_INT (left
));
11710 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask1
));
11711 emit_move_insn (operands
[0], tmp
);
11712 tmp
= gen_rtx_ROTATE (mode
, operands
[0], GEN_INT (right
));
11713 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask2
));
11714 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11718 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11719 for lfq and stfq insns iff the registers are hard registers. */
11722 registers_ok_for_quad_peep (rtx reg1
, rtx reg2
)
11724 /* We might have been passed a SUBREG. */
11725 if (!REG_P (reg1
) || !REG_P (reg2
))
11728 /* We might have been passed non floating point registers. */
11729 if (!FP_REGNO_P (REGNO (reg1
))
11730 || !FP_REGNO_P (REGNO (reg2
)))
11733 return (REGNO (reg1
) == REGNO (reg2
) - 1);
11736 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11737 addr1 and addr2 must be in consecutive memory locations
11738 (addr2 == addr1 + 8). */
11741 mems_ok_for_quad_peep (rtx mem1
, rtx mem2
)
11744 unsigned int reg1
, reg2
;
11745 int offset1
, offset2
;
11747 /* The mems cannot be volatile. */
11748 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
11751 addr1
= XEXP (mem1
, 0);
11752 addr2
= XEXP (mem2
, 0);
11754 /* Extract an offset (if used) from the first addr. */
11755 if (GET_CODE (addr1
) == PLUS
)
11757 /* If not a REG, return zero. */
11758 if (!REG_P (XEXP (addr1
, 0)))
11762 reg1
= REGNO (XEXP (addr1
, 0));
11763 /* The offset must be constant! */
11764 if (!CONST_INT_P (XEXP (addr1
, 1)))
11766 offset1
= INTVAL (XEXP (addr1
, 1));
11769 else if (!REG_P (addr1
))
11773 reg1
= REGNO (addr1
);
11774 /* This was a simple (mem (reg)) expression. Offset is 0. */
11778 /* And now for the second addr. */
11779 if (GET_CODE (addr2
) == PLUS
)
11781 /* If not a REG, return zero. */
11782 if (!REG_P (XEXP (addr2
, 0)))
11786 reg2
= REGNO (XEXP (addr2
, 0));
11787 /* The offset must be constant. */
11788 if (!CONST_INT_P (XEXP (addr2
, 1)))
11790 offset2
= INTVAL (XEXP (addr2
, 1));
11793 else if (!REG_P (addr2
))
11797 reg2
= REGNO (addr2
);
11798 /* This was a simple (mem (reg)) expression. Offset is 0. */
11802 /* Both of these must have the same base register. */
11806 /* The offset for the second addr must be 8 more than the first addr. */
11807 if (offset2
!= offset1
+ 8)
11810 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
11815 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
11816 need to use DDmode, in all other cases we can use the same mode. */
11817 static machine_mode
11818 rs6000_secondary_memory_needed_mode (machine_mode mode
)
11820 if (lra_in_progress
&& mode
== SDmode
)
11825 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
11826 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11827 only work on the traditional altivec registers, note if an altivec register
11830 static enum rs6000_reg_type
11831 register_to_reg_type (rtx reg
, bool *is_altivec
)
11833 HOST_WIDE_INT regno
;
11834 enum reg_class rclass
;
11836 if (SUBREG_P (reg
))
11837 reg
= SUBREG_REG (reg
);
11840 return NO_REG_TYPE
;
11842 regno
= REGNO (reg
);
11843 if (!HARD_REGISTER_NUM_P (regno
))
11845 if (!lra_in_progress
&& !reload_completed
)
11846 return PSEUDO_REG_TYPE
;
11848 regno
= true_regnum (reg
);
11849 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
))
11850 return PSEUDO_REG_TYPE
;
11853 gcc_assert (regno
>= 0);
11855 if (is_altivec
&& ALTIVEC_REGNO_P (regno
))
11856 *is_altivec
= true;
11858 rclass
= rs6000_regno_regclass
[regno
];
11859 return reg_class_to_reg_type
[(int)rclass
];
11862 /* Helper function to return the cost of adding a TOC entry address. */
11865 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask
)
11869 if (TARGET_CMODEL
!= CMODEL_SMALL
)
11870 ret
= ((addr_mask
& RELOAD_REG_OFFSET
) == 0) ? 1 : 2;
11873 ret
= (TARGET_MINIMAL_TOC
) ? 6 : 3;
11878 /* Helper function for rs6000_secondary_reload to determine whether the memory
11879 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
11880 needs reloading. Return negative if the memory is not handled by the memory
11881 helper functions and to try a different reload method, 0 if no additional
11882 instructions are need, and positive to give the extra cost for the
11886 rs6000_secondary_reload_memory (rtx addr
,
11887 enum reg_class rclass
,
11890 int extra_cost
= 0;
11891 rtx reg
, and_arg
, plus_arg0
, plus_arg1
;
11892 addr_mask_type addr_mask
;
11893 const char *type
= NULL
;
11894 const char *fail_msg
= NULL
;
11896 if (GPR_REG_CLASS_P (rclass
))
11897 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
11899 else if (rclass
== FLOAT_REGS
)
11900 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
11902 else if (rclass
== ALTIVEC_REGS
)
11903 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
11905 /* For the combined VSX_REGS, turn off Altivec AND -16. */
11906 else if (rclass
== VSX_REGS
)
11907 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
]
11908 & ~RELOAD_REG_AND_M16
);
11910 /* If the register allocator hasn't made up its mind yet on the register
11911 class to use, settle on defaults to use. */
11912 else if (rclass
== NO_REGS
)
11914 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
]
11915 & ~RELOAD_REG_AND_M16
);
11917 if ((addr_mask
& RELOAD_REG_MULTIPLE
) != 0)
11918 addr_mask
&= ~(RELOAD_REG_INDEXED
11919 | RELOAD_REG_PRE_INCDEC
11920 | RELOAD_REG_PRE_MODIFY
);
11926 /* If the register isn't valid in this register class, just return now. */
11927 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
11929 if (TARGET_DEBUG_ADDR
)
11932 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11933 "not valid in class\n",
11934 GET_MODE_NAME (mode
), reg_class_names
[rclass
]);
11941 switch (GET_CODE (addr
))
11943 /* Does the register class supports auto update forms for this mode? We
11944 don't need a scratch register, since the powerpc only supports
11945 PRE_INC, PRE_DEC, and PRE_MODIFY. */
11948 reg
= XEXP (addr
, 0);
11949 if (!base_reg_operand (addr
, GET_MODE (reg
)))
11951 fail_msg
= "no base register #1";
11955 else if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
11963 reg
= XEXP (addr
, 0);
11964 plus_arg1
= XEXP (addr
, 1);
11965 if (!base_reg_operand (reg
, GET_MODE (reg
))
11966 || GET_CODE (plus_arg1
) != PLUS
11967 || !rtx_equal_p (reg
, XEXP (plus_arg1
, 0)))
11969 fail_msg
= "bad PRE_MODIFY";
11973 else if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
11980 /* Do we need to simulate AND -16 to clear the bottom address bits used
11981 in VMX load/stores? Only allow the AND for vector sizes. */
11983 and_arg
= XEXP (addr
, 0);
11984 if (GET_MODE_SIZE (mode
) != 16
11985 || !CONST_INT_P (XEXP (addr
, 1))
11986 || INTVAL (XEXP (addr
, 1)) != -16)
11988 fail_msg
= "bad Altivec AND #1";
11992 if (rclass
!= ALTIVEC_REGS
)
11994 if (legitimate_indirect_address_p (and_arg
, false))
11997 else if (legitimate_indexed_address_p (and_arg
, false))
12002 fail_msg
= "bad Altivec AND #2";
12010 /* If this is an indirect address, make sure it is a base register. */
12013 if (!legitimate_indirect_address_p (addr
, false))
12020 /* If this is an indexed address, make sure the register class can handle
12021 indexed addresses for this mode. */
12023 plus_arg0
= XEXP (addr
, 0);
12024 plus_arg1
= XEXP (addr
, 1);
12026 /* (plus (plus (reg) (constant)) (constant)) is generated during
12027 push_reload processing, so handle it now. */
12028 if (GET_CODE (plus_arg0
) == PLUS
&& CONST_INT_P (plus_arg1
))
12030 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12037 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12038 push_reload processing, so handle it now. */
12039 else if (GET_CODE (plus_arg0
) == PLUS
&& REG_P (plus_arg1
))
12041 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12044 type
= "indexed #2";
12048 else if (!base_reg_operand (plus_arg0
, GET_MODE (plus_arg0
)))
12050 fail_msg
= "no base register #2";
12054 else if (int_reg_operand (plus_arg1
, GET_MODE (plus_arg1
)))
12056 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0
12057 || !legitimate_indexed_address_p (addr
, false))
12064 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0
12065 && CONST_INT_P (plus_arg1
))
12067 if (!quad_address_offset_p (INTVAL (plus_arg1
)))
12070 type
= "vector d-form offset";
12074 /* Make sure the register class can handle offset addresses. */
12075 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
12077 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12080 type
= "offset #2";
12086 fail_msg
= "bad PLUS";
12093 /* Quad offsets are restricted and can't handle normal addresses. */
12094 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12097 type
= "vector d-form lo_sum";
12100 else if (!legitimate_lo_sum_address_p (mode
, addr
, false))
12102 fail_msg
= "bad LO_SUM";
12106 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12113 /* Static addresses need to create a TOC entry. */
12117 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12120 type
= "vector d-form lo_sum #2";
12126 extra_cost
= rs6000_secondary_reload_toc_costs (addr_mask
);
12130 /* TOC references look like offsetable memory. */
12132 if (TARGET_CMODEL
== CMODEL_SMALL
|| XINT (addr
, 1) != UNSPEC_TOCREL
)
12134 fail_msg
= "bad UNSPEC";
12138 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12141 type
= "vector d-form lo_sum #3";
12144 else if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12147 type
= "toc reference";
12153 fail_msg
= "bad address";
12158 if (TARGET_DEBUG_ADDR
/* && extra_cost != 0 */)
12160 if (extra_cost
< 0)
12162 "rs6000_secondary_reload_memory error: mode = %s, "
12163 "class = %s, addr_mask = '%s', %s\n",
12164 GET_MODE_NAME (mode
),
12165 reg_class_names
[rclass
],
12166 rs6000_debug_addr_mask (addr_mask
, false),
12167 (fail_msg
!= NULL
) ? fail_msg
: "<bad address>");
12171 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12172 "addr_mask = '%s', extra cost = %d, %s\n",
12173 GET_MODE_NAME (mode
),
12174 reg_class_names
[rclass
],
12175 rs6000_debug_addr_mask (addr_mask
, false),
12177 (type
) ? type
: "<none>");
12185 /* Helper function for rs6000_secondary_reload to return true if a move to a
12186 different register classe is really a simple move. */
12189 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type
,
12190 enum rs6000_reg_type from_type
,
12193 int size
= GET_MODE_SIZE (mode
);
12195 /* Add support for various direct moves available. In this function, we only
12196 look at cases where we don't need any extra registers, and one or more
12197 simple move insns are issued. Originally small integers are not allowed
12198 in FPR/VSX registers. Single precision binary floating is not a simple
12199 move because we need to convert to the single precision memory layout.
12200 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12201 need special direct move handling, which we do not support yet. */
12202 if (TARGET_DIRECT_MOVE
12203 && ((to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12204 || (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12206 if (TARGET_POWERPC64
)
12208 /* ISA 2.07: MTVSRD or MVFVSRD. */
12212 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12213 if (size
== 16 && TARGET_P9_VECTOR
&& mode
!= TDmode
)
12217 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12218 if (TARGET_P8_VECTOR
)
12220 if (mode
== SImode
)
12223 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
12227 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12228 if (mode
== SDmode
)
12232 /* Move to/from SPR. */
12233 else if ((size
== 4 || (TARGET_POWERPC64
&& size
== 8))
12234 && ((to_type
== GPR_REG_TYPE
&& from_type
== SPR_REG_TYPE
)
12235 || (to_type
== SPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12241 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12242 special direct moves that involve allocating an extra register, return the
12243 insn code of the helper function if there is such a function or
12244 CODE_FOR_nothing if not. */
12247 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type
,
12248 enum rs6000_reg_type from_type
,
12250 secondary_reload_info
*sri
,
12254 enum insn_code icode
= CODE_FOR_nothing
;
12256 int size
= GET_MODE_SIZE (mode
);
12258 if (TARGET_POWERPC64
&& size
== 16)
12260 /* Handle moving 128-bit values from GPRs to VSX point registers on
12261 ISA 2.07 (power8, power9) when running in 64-bit mode using
12262 XXPERMDI to glue the two 64-bit values back together. */
12263 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12265 cost
= 3; /* 2 mtvsrd's, 1 xxpermdi. */
12266 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12269 /* Handle moving 128-bit values from VSX point registers to GPRs on
12270 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12271 bottom 64-bit value. */
12272 else if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12274 cost
= 3; /* 2 mfvsrd's, 1 xxpermdi. */
12275 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12279 else if (TARGET_POWERPC64
&& mode
== SFmode
)
12281 if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12283 cost
= 3; /* xscvdpspn, mfvsrd, and. */
12284 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12287 else if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12289 cost
= 2; /* mtvsrz, xscvspdpn. */
12290 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12294 else if (!TARGET_POWERPC64
&& size
== 8)
12296 /* Handle moving 64-bit values from GPRs to floating point registers on
12297 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12298 32-bit values back together. Altivec register classes must be handled
12299 specially since a different instruction is used, and the secondary
12300 reload support requires a single instruction class in the scratch
12301 register constraint. However, right now TFmode is not allowed in
12302 Altivec registers, so the pattern will never match. */
12303 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
&& !altivec_p
)
12305 cost
= 3; /* 2 mtvsrwz's, 1 fmrgow. */
12306 icode
= reg_addr
[mode
].reload_fpr_gpr
;
12310 if (icode
!= CODE_FOR_nothing
)
12315 sri
->icode
= icode
;
12316 sri
->extra_cost
= cost
;
12323 /* Return whether a move between two register classes can be done either
12324 directly (simple move) or via a pattern that uses a single extra temporary
12325 (using ISA 2.07's direct move in this case. */
12328 rs6000_secondary_reload_move (enum rs6000_reg_type to_type
,
12329 enum rs6000_reg_type from_type
,
12331 secondary_reload_info
*sri
,
12334 /* Fall back to load/store reloads if either type is not a register. */
12335 if (to_type
== NO_REG_TYPE
|| from_type
== NO_REG_TYPE
)
12338 /* If we haven't allocated registers yet, assume the move can be done for the
12339 standard register types. */
12340 if ((to_type
== PSEUDO_REG_TYPE
&& from_type
== PSEUDO_REG_TYPE
)
12341 || (to_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (from_type
))
12342 || (from_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (to_type
)))
12345 /* Moves to the same set of registers is a simple move for non-specialized
12347 if (to_type
== from_type
&& IS_STD_REG_TYPE (to_type
))
12350 /* Check whether a simple move can be done directly. */
12351 if (rs6000_secondary_reload_simple_move (to_type
, from_type
, mode
))
12355 sri
->icode
= CODE_FOR_nothing
;
12356 sri
->extra_cost
= 0;
12361 /* Now check if we can do it in a few steps. */
12362 return rs6000_secondary_reload_direct_move (to_type
, from_type
, mode
, sri
,
12366 /* Inform reload about cases where moving X with a mode MODE to a register in
12367 RCLASS requires an extra scratch or immediate register. Return the class
12368 needed for the immediate register.
12370 For VSX and Altivec, we may need a register to convert sp+offset into
12373 For misaligned 64-bit gpr loads and stores we need a register to
12374 convert an offset address to indirect. */
12377 rs6000_secondary_reload (bool in_p
,
12379 reg_class_t rclass_i
,
12381 secondary_reload_info
*sri
)
12383 enum reg_class rclass
= (enum reg_class
) rclass_i
;
12384 reg_class_t ret
= ALL_REGS
;
12385 enum insn_code icode
;
12386 bool default_p
= false;
12387 bool done_p
= false;
12389 /* Allow subreg of memory before/during reload. */
12390 bool memory_p
= (MEM_P (x
)
12391 || (!reload_completed
&& SUBREG_P (x
)
12392 && MEM_P (SUBREG_REG (x
))));
12394 sri
->icode
= CODE_FOR_nothing
;
12395 sri
->t_icode
= CODE_FOR_nothing
;
12396 sri
->extra_cost
= 0;
12398 ? reg_addr
[mode
].reload_load
12399 : reg_addr
[mode
].reload_store
);
12401 if (REG_P (x
) || register_operand (x
, mode
))
12403 enum rs6000_reg_type to_type
= reg_class_to_reg_type
[(int)rclass
];
12404 bool altivec_p
= (rclass
== ALTIVEC_REGS
);
12405 enum rs6000_reg_type from_type
= register_to_reg_type (x
, &altivec_p
);
12408 std::swap (to_type
, from_type
);
12410 /* Can we do a direct move of some sort? */
12411 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
, sri
,
12414 icode
= (enum insn_code
)sri
->icode
;
12421 /* Make sure 0.0 is not reloaded or forced into memory. */
12422 if (x
== CONST0_RTX (mode
) && VSX_REG_CLASS_P (rclass
))
12429 /* If this is a scalar floating point value and we want to load it into the
12430 traditional Altivec registers, do it via a move via a traditional floating
12431 point register, unless we have D-form addressing. Also make sure that
12432 non-zero constants use a FPR. */
12433 if (!done_p
&& reg_addr
[mode
].scalar_in_vmx_p
12434 && !mode_supports_vmx_dform (mode
)
12435 && (rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
12436 && (memory_p
|| CONST_DOUBLE_P (x
)))
12443 /* Handle reload of load/stores if we have reload helper functions. */
12444 if (!done_p
&& icode
!= CODE_FOR_nothing
&& memory_p
)
12446 int extra_cost
= rs6000_secondary_reload_memory (XEXP (x
, 0), rclass
,
12449 if (extra_cost
>= 0)
12453 if (extra_cost
> 0)
12455 sri
->extra_cost
= extra_cost
;
12456 sri
->icode
= icode
;
12461 /* Handle unaligned loads and stores of integer registers. */
12462 if (!done_p
&& TARGET_POWERPC64
12463 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12465 && GET_MODE_SIZE (GET_MODE (x
)) >= UNITS_PER_WORD
)
12467 rtx addr
= XEXP (x
, 0);
12468 rtx off
= address_offset (addr
);
12470 if (off
!= NULL_RTX
)
12472 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12473 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12475 /* We need a secondary reload when our legitimate_address_p
12476 says the address is good (as otherwise the entire address
12477 will be reloaded), and the offset is not a multiple of
12478 four or we have an address wrap. Address wrap will only
12479 occur for LO_SUMs since legitimate_offset_address_p
12480 rejects addresses for 16-byte mems that will wrap. */
12481 if (GET_CODE (addr
) == LO_SUM
12482 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12483 && ((offset
& 3) != 0
12484 || ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
))
12485 : (offset
+ 0x8000 < 0x10000 - extra
/* legitimate_address_p */
12486 && (offset
& 3) != 0))
12488 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12490 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_load
12491 : CODE_FOR_reload_di_load
);
12493 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_store
12494 : CODE_FOR_reload_di_store
);
12495 sri
->extra_cost
= 2;
12506 if (!done_p
&& !TARGET_POWERPC64
12507 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12509 && GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
12511 rtx addr
= XEXP (x
, 0);
12512 rtx off
= address_offset (addr
);
12514 if (off
!= NULL_RTX
)
12516 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12517 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12519 /* We need a secondary reload when our legitimate_address_p
12520 says the address is good (as otherwise the entire address
12521 will be reloaded), and we have a wrap.
12523 legitimate_lo_sum_address_p allows LO_SUM addresses to
12524 have any offset so test for wrap in the low 16 bits.
12526 legitimate_offset_address_p checks for the range
12527 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12528 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12529 [0x7ff4,0x7fff] respectively, so test for the
12530 intersection of these ranges, [0x7ffc,0x7fff] and
12531 [0x7ff4,0x7ff7] respectively.
12533 Note that the address we see here may have been
12534 manipulated by legitimize_reload_address. */
12535 if (GET_CODE (addr
) == LO_SUM
12536 ? ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
12537 : offset
- (0x8000 - extra
) < UNITS_PER_WORD
)
12540 sri
->icode
= CODE_FOR_reload_si_load
;
12542 sri
->icode
= CODE_FOR_reload_si_store
;
12543 sri
->extra_cost
= 2;
12558 ret
= default_secondary_reload (in_p
, x
, rclass
, mode
, sri
);
12560 gcc_assert (ret
!= ALL_REGS
);
12562 if (TARGET_DEBUG_ADDR
)
12565 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12567 reg_class_names
[ret
],
12568 in_p
? "true" : "false",
12569 reg_class_names
[rclass
],
12570 GET_MODE_NAME (mode
));
12572 if (reload_completed
)
12573 fputs (", after reload", stderr
);
12576 fputs (", done_p not set", stderr
);
12579 fputs (", default secondary reload", stderr
);
12581 if (sri
->icode
!= CODE_FOR_nothing
)
12582 fprintf (stderr
, ", reload func = %s, extra cost = %d",
12583 insn_data
[sri
->icode
].name
, sri
->extra_cost
);
12585 else if (sri
->extra_cost
> 0)
12586 fprintf (stderr
, ", extra cost = %d", sri
->extra_cost
);
12588 fputs ("\n", stderr
);
12595 /* Better tracing for rs6000_secondary_reload_inner. */
12598 rs6000_secondary_reload_trace (int line
, rtx reg
, rtx mem
, rtx scratch
,
12603 gcc_assert (reg
!= NULL_RTX
&& mem
!= NULL_RTX
&& scratch
!= NULL_RTX
);
12605 fprintf (stderr
, "rs6000_secondary_reload_inner:%d, type = %s\n", line
,
12606 store_p
? "store" : "load");
12609 set
= gen_rtx_SET (mem
, reg
);
12611 set
= gen_rtx_SET (reg
, mem
);
12613 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
12614 debug_rtx (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
12617 static void rs6000_secondary_reload_fail (int, rtx
, rtx
, rtx
, bool)
12618 ATTRIBUTE_NORETURN
;
12621 rs6000_secondary_reload_fail (int line
, rtx reg
, rtx mem
, rtx scratch
,
12624 rs6000_secondary_reload_trace (line
, reg
, mem
, scratch
, store_p
);
12625 gcc_unreachable ();
12628 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12629 reload helper functions. These were identified in
12630 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12631 reload, it calls the insns:
12632 reload_<RELOAD:mode>_<P:mptrsize>_store
12633 reload_<RELOAD:mode>_<P:mptrsize>_load
12635 which in turn calls this function, to do whatever is necessary to create
12636 valid addresses. */
12639 rs6000_secondary_reload_inner (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
12641 int regno
= true_regnum (reg
);
12642 machine_mode mode
= GET_MODE (reg
);
12643 addr_mask_type addr_mask
;
12646 rtx op_reg
, op0
, op1
;
12651 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
) || !MEM_P (mem
)
12652 || !base_reg_operand (scratch
, GET_MODE (scratch
)))
12653 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12655 if (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
))
12656 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
12658 else if (IN_RANGE (regno
, FIRST_FPR_REGNO
, LAST_FPR_REGNO
))
12659 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
12661 else if (IN_RANGE (regno
, FIRST_ALTIVEC_REGNO
, LAST_ALTIVEC_REGNO
))
12662 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
12665 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12667 /* Make sure the mode is valid in this register class. */
12668 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
12669 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12671 if (TARGET_DEBUG_ADDR
)
12672 rs6000_secondary_reload_trace (__LINE__
, reg
, mem
, scratch
, store_p
);
12674 new_addr
= addr
= XEXP (mem
, 0);
12675 switch (GET_CODE (addr
))
12677 /* Does the register class support auto update forms for this mode? If
12678 not, do the update now. We don't need a scratch register, since the
12679 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12682 op_reg
= XEXP (addr
, 0);
12683 if (!base_reg_operand (op_reg
, Pmode
))
12684 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12686 if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
12688 int delta
= GET_MODE_SIZE (mode
);
12689 if (GET_CODE (addr
) == PRE_DEC
)
12691 emit_insn (gen_add2_insn (op_reg
, GEN_INT (delta
)));
12697 op0
= XEXP (addr
, 0);
12698 op1
= XEXP (addr
, 1);
12699 if (!base_reg_operand (op0
, Pmode
)
12700 || GET_CODE (op1
) != PLUS
12701 || !rtx_equal_p (op0
, XEXP (op1
, 0)))
12702 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12704 if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
12706 emit_insn (gen_rtx_SET (op0
, op1
));
12711 /* Do we need to simulate AND -16 to clear the bottom address bits used
12712 in VMX load/stores? */
12714 op0
= XEXP (addr
, 0);
12715 op1
= XEXP (addr
, 1);
12716 if ((addr_mask
& RELOAD_REG_AND_M16
) == 0)
12718 if (REG_P (op0
) || SUBREG_P (op0
))
12721 else if (GET_CODE (op1
) == PLUS
)
12723 emit_insn (gen_rtx_SET (scratch
, op1
));
12728 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12730 and_op
= gen_rtx_AND (GET_MODE (scratch
), op_reg
, op1
);
12731 cc_clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (CCmode
));
12732 rv
= gen_rtvec (2, gen_rtx_SET (scratch
, and_op
), cc_clobber
);
12733 emit_insn (gen_rtx_PARALLEL (VOIDmode
, rv
));
12734 new_addr
= scratch
;
12738 /* If this is an indirect address, make sure it is a base register. */
12741 if (!base_reg_operand (addr
, GET_MODE (addr
)))
12743 emit_insn (gen_rtx_SET (scratch
, addr
));
12744 new_addr
= scratch
;
12748 /* If this is an indexed address, make sure the register class can handle
12749 indexed addresses for this mode. */
12751 op0
= XEXP (addr
, 0);
12752 op1
= XEXP (addr
, 1);
12753 if (!base_reg_operand (op0
, Pmode
))
12754 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12756 else if (int_reg_operand (op1
, Pmode
))
12758 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12760 emit_insn (gen_rtx_SET (scratch
, addr
));
12761 new_addr
= scratch
;
12765 else if (mode_supports_dq_form (mode
) && CONST_INT_P (op1
))
12767 if (((addr_mask
& RELOAD_REG_QUAD_OFFSET
) == 0)
12768 || !quad_address_p (addr
, mode
, false))
12770 emit_insn (gen_rtx_SET (scratch
, addr
));
12771 new_addr
= scratch
;
12775 /* Make sure the register class can handle offset addresses. */
12776 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
12778 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12780 emit_insn (gen_rtx_SET (scratch
, addr
));
12781 new_addr
= scratch
;
12786 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12791 op0
= XEXP (addr
, 0);
12792 op1
= XEXP (addr
, 1);
12793 if (!base_reg_operand (op0
, Pmode
))
12794 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12796 else if (int_reg_operand (op1
, Pmode
))
12798 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12800 emit_insn (gen_rtx_SET (scratch
, addr
));
12801 new_addr
= scratch
;
12805 /* Quad offsets are restricted and can't handle normal addresses. */
12806 else if (mode_supports_dq_form (mode
))
12808 emit_insn (gen_rtx_SET (scratch
, addr
));
12809 new_addr
= scratch
;
12812 /* Make sure the register class can handle offset addresses. */
12813 else if (legitimate_lo_sum_address_p (mode
, addr
, false))
12815 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12817 emit_insn (gen_rtx_SET (scratch
, addr
));
12818 new_addr
= scratch
;
12823 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12830 rs6000_emit_move (scratch
, addr
, Pmode
);
12831 new_addr
= scratch
;
12835 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12838 /* Adjust the address if it changed. */
12839 if (addr
!= new_addr
)
12841 mem
= replace_equiv_address_nv (mem
, new_addr
);
12842 if (TARGET_DEBUG_ADDR
)
12843 fprintf (stderr
, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12846 /* Now create the move. */
12848 emit_insn (gen_rtx_SET (mem
, reg
));
12850 emit_insn (gen_rtx_SET (reg
, mem
));
12855 /* Convert reloads involving 64-bit gprs and misaligned offset
12856 addressing, or multiple 32-bit gprs and offsets that are too large,
12857 to use indirect addressing. */
12860 rs6000_secondary_reload_gpr (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
12862 int regno
= true_regnum (reg
);
12863 enum reg_class rclass
;
12865 rtx scratch_or_premodify
= scratch
;
12867 if (TARGET_DEBUG_ADDR
)
12869 fprintf (stderr
, "\nrs6000_secondary_reload_gpr, type = %s\n",
12870 store_p
? "store" : "load");
12871 fprintf (stderr
, "reg:\n");
12873 fprintf (stderr
, "mem:\n");
12875 fprintf (stderr
, "scratch:\n");
12876 debug_rtx (scratch
);
12879 gcc_assert (regno
>= 0 && HARD_REGISTER_NUM_P (regno
));
12880 gcc_assert (MEM_P (mem
));
12881 rclass
= REGNO_REG_CLASS (regno
);
12882 gcc_assert (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
);
12883 addr
= XEXP (mem
, 0);
12885 if (GET_CODE (addr
) == PRE_MODIFY
)
12887 gcc_assert (REG_P (XEXP (addr
, 0))
12888 && GET_CODE (XEXP (addr
, 1)) == PLUS
12889 && XEXP (XEXP (addr
, 1), 0) == XEXP (addr
, 0));
12890 scratch_or_premodify
= XEXP (addr
, 0);
12891 addr
= XEXP (addr
, 1);
12893 gcc_assert (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
);
12895 rs6000_emit_move (scratch_or_premodify
, addr
, Pmode
);
12897 mem
= replace_equiv_address_nv (mem
, scratch_or_premodify
);
12899 /* Now create the move. */
12901 emit_insn (gen_rtx_SET (mem
, reg
));
12903 emit_insn (gen_rtx_SET (reg
, mem
));
12908 /* Given an rtx X being reloaded into a reg required to be
12909 in class CLASS, return the class of reg to actually use.
12910 In general this is just CLASS; but on some machines
12911 in some cases it is preferable to use a more restrictive class.
12913 On the RS/6000, we have to return NO_REGS when we want to reload a
12914 floating-point CONST_DOUBLE to force it to be copied to memory.
12916 We also don't want to reload integer values into floating-point
12917 registers if we can at all help it. In fact, this can
12918 cause reload to die, if it tries to generate a reload of CTR
12919 into a FP register and discovers it doesn't have the memory location
12922 ??? Would it be a good idea to have reload do the converse, that is
12923 try to reload floating modes into FP registers if possible?
12926 static enum reg_class
12927 rs6000_preferred_reload_class (rtx x
, enum reg_class rclass
)
12929 machine_mode mode
= GET_MODE (x
);
12930 bool is_constant
= CONSTANT_P (x
);
12932 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12933 reload class for it. */
12934 if ((rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
12935 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
) == 0)
12938 if ((rclass
== FLOAT_REGS
|| rclass
== VSX_REGS
)
12939 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
] & RELOAD_REG_VALID
) == 0)
12942 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
12943 the reloading of address expressions using PLUS into floating point
12945 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
) && GET_CODE (x
) != PLUS
)
12949 /* Zero is always allowed in all VSX registers. */
12950 if (x
== CONST0_RTX (mode
))
12953 /* If this is a vector constant that can be formed with a few Altivec
12954 instructions, we want altivec registers. */
12955 if (GET_CODE (x
) == CONST_VECTOR
&& easy_vector_constant (x
, mode
))
12956 return ALTIVEC_REGS
;
12958 /* If this is an integer constant that can easily be loaded into
12959 vector registers, allow it. */
12960 if (CONST_INT_P (x
))
12962 HOST_WIDE_INT value
= INTVAL (x
);
12964 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12965 2.06 can generate it in the Altivec registers with
12969 if (TARGET_P8_VECTOR
)
12971 else if (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
12972 return ALTIVEC_REGS
;
12977 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12978 a sign extend in the Altivec registers. */
12979 if (IN_RANGE (value
, -128, 127) && TARGET_P9_VECTOR
12980 && (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
))
12981 return ALTIVEC_REGS
;
12984 /* Force constant to memory. */
12988 /* D-form addressing can easily reload the value. */
12989 if (mode_supports_vmx_dform (mode
)
12990 || mode_supports_dq_form (mode
))
12993 /* If this is a scalar floating point value and we don't have D-form
12994 addressing, prefer the traditional floating point registers so that we
12995 can use D-form (register+offset) addressing. */
12996 if (rclass
== VSX_REGS
12997 && (mode
== SFmode
|| GET_MODE_SIZE (mode
) == 8))
13000 /* Prefer the Altivec registers if Altivec is handling the vector
13001 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13003 if (VECTOR_UNIT_ALTIVEC_P (mode
) || VECTOR_MEM_ALTIVEC_P (mode
)
13004 || mode
== V1TImode
)
13005 return ALTIVEC_REGS
;
13010 if (is_constant
|| GET_CODE (x
) == PLUS
)
13012 if (reg_class_subset_p (GENERAL_REGS
, rclass
))
13013 return GENERAL_REGS
;
13014 if (reg_class_subset_p (BASE_REGS
, rclass
))
13019 /* For the vector pair and vector quad modes, prefer their natural register
13020 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13021 the GPR registers. */
13022 if (rclass
== GEN_OR_FLOAT_REGS
)
13024 if (mode
== OOmode
)
13027 if (mode
== XOmode
)
13030 if (GET_MODE_CLASS (mode
) == MODE_INT
)
13031 return GENERAL_REGS
;
13037 /* Debug version of rs6000_preferred_reload_class. */
13038 static enum reg_class
13039 rs6000_debug_preferred_reload_class (rtx x
, enum reg_class rclass
)
13041 enum reg_class ret
= rs6000_preferred_reload_class (x
, rclass
);
13044 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13046 reg_class_names
[ret
], reg_class_names
[rclass
],
13047 GET_MODE_NAME (GET_MODE (x
)));
13053 /* If we are copying between FP or AltiVec registers and anything else, we need
13054 a memory location. The exception is when we are targeting ppc64 and the
13055 move to/from fpr to gpr instructions are available. Also, under VSX, you
13056 can copy vector registers from the FP register set to the Altivec register
13057 set and vice versa. */
13060 rs6000_secondary_memory_needed (machine_mode mode
,
13061 reg_class_t from_class
,
13062 reg_class_t to_class
)
13064 enum rs6000_reg_type from_type
, to_type
;
13065 bool altivec_p
= ((from_class
== ALTIVEC_REGS
)
13066 || (to_class
== ALTIVEC_REGS
));
13068 /* If a simple/direct move is available, we don't need secondary memory */
13069 from_type
= reg_class_to_reg_type
[(int)from_class
];
13070 to_type
= reg_class_to_reg_type
[(int)to_class
];
13072 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
,
13073 (secondary_reload_info
*)0, altivec_p
))
13076 /* If we have a floating point or vector register class, we need to use
13077 memory to transfer the data. */
13078 if (IS_FP_VECT_REG_TYPE (from_type
) || IS_FP_VECT_REG_TYPE (to_type
))
13084 /* Debug version of rs6000_secondary_memory_needed. */
13086 rs6000_debug_secondary_memory_needed (machine_mode mode
,
13087 reg_class_t from_class
,
13088 reg_class_t to_class
)
13090 bool ret
= rs6000_secondary_memory_needed (mode
, from_class
, to_class
);
13093 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13094 "to_class = %s, mode = %s\n",
13095 ret
? "true" : "false",
13096 reg_class_names
[from_class
],
13097 reg_class_names
[to_class
],
13098 GET_MODE_NAME (mode
));
13103 /* Return the register class of a scratch register needed to copy IN into
13104 or out of a register in RCLASS in MODE. If it can be done directly,
13105 NO_REGS is returned. */
13107 static enum reg_class
13108 rs6000_secondary_reload_class (enum reg_class rclass
, machine_mode mode
,
13113 if (TARGET_ELF
|| (DEFAULT_ABI
== ABI_DARWIN
13115 && MACHOPIC_INDIRECT
13119 /* We cannot copy a symbolic operand directly into anything
13120 other than BASE_REGS for TARGET_ELF. So indicate that a
13121 register from BASE_REGS is needed as an intermediate
13124 On Darwin, pic addresses require a load from memory, which
13125 needs a base register. */
13126 if (rclass
!= BASE_REGS
13127 && (SYMBOL_REF_P (in
)
13128 || GET_CODE (in
) == HIGH
13129 || GET_CODE (in
) == LABEL_REF
13130 || GET_CODE (in
) == CONST
))
13136 regno
= REGNO (in
);
13137 if (!HARD_REGISTER_NUM_P (regno
))
13139 regno
= true_regnum (in
);
13140 if (!HARD_REGISTER_NUM_P (regno
))
13144 else if (SUBREG_P (in
))
13146 regno
= true_regnum (in
);
13147 if (!HARD_REGISTER_NUM_P (regno
))
13153 /* If we have VSX register moves, prefer moving scalar values between
13154 Altivec registers and GPR by going via an FPR (and then via memory)
13155 instead of reloading the secondary memory address for Altivec moves. */
13157 && GET_MODE_SIZE (mode
) < 16
13158 && !mode_supports_vmx_dform (mode
)
13159 && (((rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
)
13160 && (regno
>= 0 && ALTIVEC_REGNO_P (regno
)))
13161 || ((rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
13162 && (regno
>= 0 && INT_REGNO_P (regno
)))))
13165 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13167 if (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
13168 || (regno
>= 0 && INT_REGNO_P (regno
)))
13171 /* Constants, memory, and VSX registers can go into VSX registers (both the
13172 traditional floating point and the altivec registers). */
13173 if (rclass
== VSX_REGS
13174 && (regno
== -1 || VSX_REGNO_P (regno
)))
13177 /* Constants, memory, and FP registers can go into FP registers. */
13178 if ((regno
== -1 || FP_REGNO_P (regno
))
13179 && (rclass
== FLOAT_REGS
|| rclass
== GEN_OR_FLOAT_REGS
))
13180 return (mode
!= SDmode
|| lra_in_progress
) ? NO_REGS
: GENERAL_REGS
;
13182 /* Memory, and AltiVec registers can go into AltiVec registers. */
13183 if ((regno
== -1 || ALTIVEC_REGNO_P (regno
))
13184 && rclass
== ALTIVEC_REGS
)
13187 /* We can copy among the CR registers. */
13188 if ((rclass
== CR_REGS
|| rclass
== CR0_REGS
)
13189 && regno
>= 0 && CR_REGNO_P (regno
))
13192 /* Otherwise, we need GENERAL_REGS. */
13193 return GENERAL_REGS
;
13196 /* Debug version of rs6000_secondary_reload_class. */
13197 static enum reg_class
13198 rs6000_debug_secondary_reload_class (enum reg_class rclass
,
13199 machine_mode mode
, rtx in
)
13201 enum reg_class ret
= rs6000_secondary_reload_class (rclass
, mode
, in
);
13203 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13204 "mode = %s, input rtx:\n",
13205 reg_class_names
[ret
], reg_class_names
[rclass
],
13206 GET_MODE_NAME (mode
));
13212 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13215 rs6000_can_change_mode_class (machine_mode from
,
13217 reg_class_t rclass
)
13219 unsigned from_size
= GET_MODE_SIZE (from
);
13220 unsigned to_size
= GET_MODE_SIZE (to
);
13222 if (from_size
!= to_size
)
13224 enum reg_class xclass
= (TARGET_VSX
) ? VSX_REGS
: FLOAT_REGS
;
13226 if (reg_classes_intersect_p (xclass
, rclass
))
13228 unsigned to_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, to
);
13229 unsigned from_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, from
);
13230 bool to_float128_vector_p
= FLOAT128_VECTOR_P (to
);
13231 bool from_float128_vector_p
= FLOAT128_VECTOR_P (from
);
13233 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13234 single register under VSX because the scalar part of the register
13235 is in the upper 64-bits, and not the lower 64-bits. Types like
13236 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13237 IEEE floating point can't overlap, and neither can small
13240 if (to_float128_vector_p
&& from_float128_vector_p
)
13243 else if (to_float128_vector_p
|| from_float128_vector_p
)
13246 /* TDmode in floating-mode registers must always go into a register
13247 pair with the most significant word in the even-numbered register
13248 to match ISA requirements. In little-endian mode, this does not
13249 match subreg numbering, so we cannot allow subregs. */
13250 if (!BYTES_BIG_ENDIAN
&& (to
== TDmode
|| from
== TDmode
))
13253 /* Allow SD<->DD changes, since SDmode values are stored in
13254 the low half of the DDmode, just like target-independent
13255 code expects. We need to allow at least SD->DD since
13256 rs6000_secondary_memory_needed_mode asks for that change
13257 to be made for SD reloads. */
13258 if ((to
== DDmode
&& from
== SDmode
)
13259 || (to
== SDmode
&& from
== DDmode
))
13262 if (from_size
< 8 || to_size
< 8)
13265 if (from_size
== 8 && (8 * to_nregs
) != to_size
)
13268 if (to_size
== 8 && (8 * from_nregs
) != from_size
)
13277 /* Since the VSX register set includes traditional floating point registers
13278 and altivec registers, just check for the size being different instead of
13279 trying to check whether the modes are vector modes. Otherwise it won't
13280 allow say DF and DI to change classes. For types like TFmode and TDmode
13281 that take 2 64-bit registers, rather than a single 128-bit register, don't
13282 allow subregs of those types to other 128 bit types. */
13283 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
))
13285 unsigned num_regs
= (from_size
+ 15) / 16;
13286 if (hard_regno_nregs (FIRST_FPR_REGNO
, to
) > num_regs
13287 || hard_regno_nregs (FIRST_FPR_REGNO
, from
) > num_regs
)
13290 return (from_size
== 8 || from_size
== 16);
13293 if (TARGET_ALTIVEC
&& rclass
== ALTIVEC_REGS
13294 && (ALTIVEC_VECTOR_MODE (from
) + ALTIVEC_VECTOR_MODE (to
)) == 1)
13300 /* Debug version of rs6000_can_change_mode_class. */
13302 rs6000_debug_can_change_mode_class (machine_mode from
,
13304 reg_class_t rclass
)
13306 bool ret
= rs6000_can_change_mode_class (from
, to
, rclass
);
13309 "rs6000_can_change_mode_class, return %s, from = %s, "
13310 "to = %s, rclass = %s\n",
13311 ret
? "true" : "false",
13312 GET_MODE_NAME (from
), GET_MODE_NAME (to
),
13313 reg_class_names
[rclass
]);
13318 /* Return a string to do a move operation of 128 bits of data. */
13321 rs6000_output_move_128bit (rtx operands
[])
13323 rtx dest
= operands
[0];
13324 rtx src
= operands
[1];
13325 machine_mode mode
= GET_MODE (dest
);
13328 bool dest_gpr_p
, dest_fp_p
, dest_vmx_p
, dest_vsx_p
;
13329 bool src_gpr_p
, src_fp_p
, src_vmx_p
, src_vsx_p
;
13333 dest_regno
= REGNO (dest
);
13334 dest_gpr_p
= INT_REGNO_P (dest_regno
);
13335 dest_fp_p
= FP_REGNO_P (dest_regno
);
13336 dest_vmx_p
= ALTIVEC_REGNO_P (dest_regno
);
13337 dest_vsx_p
= dest_fp_p
| dest_vmx_p
;
13342 dest_gpr_p
= dest_fp_p
= dest_vmx_p
= dest_vsx_p
= false;
13347 src_regno
= REGNO (src
);
13348 src_gpr_p
= INT_REGNO_P (src_regno
);
13349 src_fp_p
= FP_REGNO_P (src_regno
);
13350 src_vmx_p
= ALTIVEC_REGNO_P (src_regno
);
13351 src_vsx_p
= src_fp_p
| src_vmx_p
;
13356 src_gpr_p
= src_fp_p
= src_vmx_p
= src_vsx_p
= false;
13359 /* Register moves. */
13360 if (dest_regno
>= 0 && src_regno
>= 0)
13367 if (TARGET_DIRECT_MOVE_128
&& src_vsx_p
)
13368 return (WORDS_BIG_ENDIAN
13369 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13370 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13372 else if (TARGET_VSX
&& TARGET_DIRECT_MOVE
&& src_vsx_p
)
13376 else if (TARGET_VSX
&& dest_vsx_p
)
13379 return "xxlor %x0,%x1,%x1";
13381 else if (TARGET_DIRECT_MOVE_128
&& src_gpr_p
)
13382 return (WORDS_BIG_ENDIAN
13383 ? "mtvsrdd %x0,%1,%L1"
13384 : "mtvsrdd %x0,%L1,%1");
13386 else if (TARGET_DIRECT_MOVE
&& src_gpr_p
)
13390 else if (TARGET_ALTIVEC
&& dest_vmx_p
&& src_vmx_p
)
13391 return "vor %0,%1,%1";
13393 else if (dest_fp_p
&& src_fp_p
)
13398 else if (dest_regno
>= 0 && MEM_P (src
))
13402 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13408 else if (TARGET_ALTIVEC
&& dest_vmx_p
13409 && altivec_indexed_or_indirect_operand (src
, mode
))
13410 return "lvx %0,%y1";
13412 else if (TARGET_VSX
&& dest_vsx_p
)
13414 if (mode_supports_dq_form (mode
)
13415 && quad_address_p (XEXP (src
, 0), mode
, true))
13416 return "lxv %x0,%1";
13418 else if (TARGET_P9_VECTOR
)
13419 return "lxvx %x0,%y1";
13421 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13422 return "lxvw4x %x0,%y1";
13425 return "lxvd2x %x0,%y1";
13428 else if (TARGET_ALTIVEC
&& dest_vmx_p
)
13429 return "lvx %0,%y1";
13431 else if (dest_fp_p
)
13436 else if (src_regno
>= 0 && MEM_P (dest
))
13440 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13441 return "stq %1,%0";
13446 else if (TARGET_ALTIVEC
&& src_vmx_p
13447 && altivec_indexed_or_indirect_operand (dest
, mode
))
13448 return "stvx %1,%y0";
13450 else if (TARGET_VSX
&& src_vsx_p
)
13452 if (mode_supports_dq_form (mode
)
13453 && quad_address_p (XEXP (dest
, 0), mode
, true))
13454 return "stxv %x1,%0";
13456 else if (TARGET_P9_VECTOR
)
13457 return "stxvx %x1,%y0";
13459 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13460 return "stxvw4x %x1,%y0";
13463 return "stxvd2x %x1,%y0";
13466 else if (TARGET_ALTIVEC
&& src_vmx_p
)
13467 return "stvx %1,%y0";
13474 else if (dest_regno
>= 0
13475 && (CONST_INT_P (src
)
13476 || CONST_WIDE_INT_P (src
)
13477 || CONST_DOUBLE_P (src
)
13478 || GET_CODE (src
) == CONST_VECTOR
))
13483 else if ((dest_vmx_p
&& TARGET_ALTIVEC
)
13484 || (dest_vsx_p
&& TARGET_VSX
))
13485 return output_vec_const_move (operands
);
13488 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest
, src
));
13491 /* Validate a 128-bit move. */
13493 rs6000_move_128bit_ok_p (rtx operands
[])
13495 machine_mode mode
= GET_MODE (operands
[0]);
13496 return (gpc_reg_operand (operands
[0], mode
)
13497 || gpc_reg_operand (operands
[1], mode
));
13500 /* Return true if a 128-bit move needs to be split. */
13502 rs6000_split_128bit_ok_p (rtx operands
[])
13504 if (!reload_completed
)
13507 if (!gpr_or_gpr_p (operands
[0], operands
[1]))
13510 if (quad_load_store_p (operands
[0], operands
[1]))
13517 /* Given a comparison operation, return the bit number in CCR to test. We
13518 know this is a valid comparison.
13520 SCC_P is 1 if this is for an scc. That means that %D will have been
13521 used instead of %C, so the bits will be in different places.
13523 Return -1 if OP isn't a valid comparison for some reason. */
13526 ccr_bit (rtx op
, int scc_p
)
13528 enum rtx_code code
= GET_CODE (op
);
13529 machine_mode cc_mode
;
13534 if (!COMPARISON_P (op
))
13537 reg
= XEXP (op
, 0);
13539 if (!REG_P (reg
) || !CR_REGNO_P (REGNO (reg
)))
13542 cc_mode
= GET_MODE (reg
);
13543 cc_regnum
= REGNO (reg
);
13544 base_bit
= 4 * (cc_regnum
- CR0_REGNO
);
13546 validate_condition_mode (code
, cc_mode
);
13548 /* When generating a sCOND operation, only positive conditions are
13567 return scc_p
? base_bit
+ 3 : base_bit
+ 2;
13569 return base_bit
+ 2;
13570 case GT
: case GTU
: case UNLE
:
13571 return base_bit
+ 1;
13572 case LT
: case LTU
: case UNGE
:
13574 case ORDERED
: case UNORDERED
:
13575 return base_bit
+ 3;
13578 /* If scc, we will have done a cror to put the bit in the
13579 unordered position. So test that bit. For integer, this is ! LT
13580 unless this is an scc insn. */
13581 return scc_p
? base_bit
+ 3 : base_bit
;
13584 return scc_p
? base_bit
+ 3 : base_bit
+ 1;
13591 /* Return the GOT register. */
13594 rs6000_got_register (rtx value ATTRIBUTE_UNUSED
)
13596 /* The second flow pass currently (June 1999) can't update
13597 regs_ever_live without disturbing other parts of the compiler, so
13598 update it here to make the prolog/epilogue code happy. */
13599 if (!can_create_pseudo_p ()
13600 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
13601 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM
, true);
13603 crtl
->uses_pic_offset_table
= 1;
13605 return pic_offset_table_rtx
;
13608 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13610 /* Write out a function code label. */
13613 rs6000_output_function_entry (FILE *file
, const char *fname
)
13615 if (fname
[0] != '.')
13617 switch (DEFAULT_ABI
)
13620 gcc_unreachable ();
13626 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "L.");
13636 RS6000_OUTPUT_BASENAME (file
, fname
);
13639 /* Print an operand. Recognize special options, documented below. */
13642 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13643 only introduced by the linker, when applying the sda21
13645 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13646 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13648 #define SMALL_DATA_RELOC "sda21"
13649 #define SMALL_DATA_REG 0
13653 print_operand (FILE *file
, rtx x
, int code
)
13656 unsigned HOST_WIDE_INT uval
;
13660 /* %a is output_address. */
13662 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13666 /* Write the MMA accumulator number associated with VSX register X. */
13667 if (!REG_P (x
) || !FP_REGNO_P (REGNO (x
)) || (REGNO (x
) % 4) != 0)
13668 output_operand_lossage ("invalid %%A value");
13670 fprintf (file
, "%d", (REGNO (x
) - FIRST_FPR_REGNO
) / 4);
13674 /* Like 'J' but get to the GT bit only. */
13675 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13677 output_operand_lossage ("invalid %%D value");
13681 /* Bit 1 is GT bit. */
13682 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 1;
13684 /* Add one for shift count in rlinm for scc. */
13685 fprintf (file
, "%d", i
+ 1);
13689 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13692 output_operand_lossage ("invalid %%e value");
13697 if ((uval
& 0xffff) == 0 && uval
!= 0)
13702 /* X is a CR register. Print the number of the EQ bit of the CR */
13703 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13704 output_operand_lossage ("invalid %%E value");
13706 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
) + 2);
13710 /* X is a CR register. Print the shift count needed to move it
13711 to the high-order four bits. */
13712 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13713 output_operand_lossage ("invalid %%f value");
13715 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
));
13719 /* Similar, but print the count for the rotate in the opposite
13721 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13722 output_operand_lossage ("invalid %%F value");
13724 fprintf (file
, "%d", 32 - 4 * (REGNO (x
) - CR0_REGNO
));
13728 /* X is a constant integer. If it is negative, print "m",
13729 otherwise print "z". This is to make an aze or ame insn. */
13730 if (!CONST_INT_P (x
))
13731 output_operand_lossage ("invalid %%G value");
13732 else if (INTVAL (x
) >= 0)
13739 /* If constant, output low-order five bits. Otherwise, write
13742 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 31);
13744 print_operand (file
, x
, 0);
13748 /* If constant, output low-order six bits. Otherwise, write
13751 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 63);
13753 print_operand (file
, x
, 0);
13757 /* Print `i' if this is a constant, else nothing. */
13763 /* Write the bit number in CCR for jump. */
13764 i
= ccr_bit (x
, 0);
13766 output_operand_lossage ("invalid %%j code");
13768 fprintf (file
, "%d", i
);
13772 /* Similar, but add one for shift count in rlinm for scc and pass
13773 scc flag to `ccr_bit'. */
13774 i
= ccr_bit (x
, 1);
13776 output_operand_lossage ("invalid %%J code");
13778 /* If we want bit 31, write a shift count of zero, not 32. */
13779 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
13783 /* X must be a constant. Write the 1's complement of the
13786 output_operand_lossage ("invalid %%k value");
13788 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~ INTVAL (x
));
13792 /* X must be a symbolic constant on ELF. Write an
13793 expression suitable for an 'addi' that adds in the low 16
13794 bits of the MEM. */
13795 if (GET_CODE (x
) == CONST
)
13797 if (GET_CODE (XEXP (x
, 0)) != PLUS
13798 || (!SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
13799 && GET_CODE (XEXP (XEXP (x
, 0), 0)) != LABEL_REF
)
13800 || !CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
13801 output_operand_lossage ("invalid %%K value");
13803 print_operand_address (file
, x
);
13804 fputs ("@l", file
);
13807 /* %l is output_asm_label. */
13810 /* Write second word of DImode or DFmode reference. Works on register
13811 or non-indexed memory only. */
13813 fputs (reg_names
[REGNO (x
) + 1], file
);
13814 else if (MEM_P (x
))
13816 machine_mode mode
= GET_MODE (x
);
13817 /* Handle possible auto-increment. Since it is pre-increment and
13818 we have already done it, we can just use an offset of word. */
13819 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
13820 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
13821 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
13823 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
13824 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
13827 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
,
13831 if (small_data_operand (x
, GET_MODE (x
)))
13832 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
13833 reg_names
[SMALL_DATA_REG
]);
13837 case 'N': /* Unused */
13838 /* Write the number of elements in the vector times 4. */
13839 if (GET_CODE (x
) != PARALLEL
)
13840 output_operand_lossage ("invalid %%N value");
13842 fprintf (file
, "%d", XVECLEN (x
, 0) * 4);
13845 case 'O': /* Unused */
13846 /* Similar, but subtract 1 first. */
13847 if (GET_CODE (x
) != PARALLEL
)
13848 output_operand_lossage ("invalid %%O value");
13850 fprintf (file
, "%d", (XVECLEN (x
, 0) - 1) * 4);
13854 /* X is a CONST_INT that is a power of two. Output the logarithm. */
13857 || (i
= exact_log2 (INTVAL (x
))) < 0)
13858 output_operand_lossage ("invalid %%p value");
13860 fprintf (file
, "%d", i
);
13864 /* The operand must be an indirect memory reference. The result
13865 is the register name. */
13866 if (!MEM_P (x
) || !REG_P (XEXP (x
, 0))
13867 || REGNO (XEXP (x
, 0)) >= 32)
13868 output_operand_lossage ("invalid %%P value");
13870 fputs (reg_names
[REGNO (XEXP (x
, 0))], file
);
13874 /* This outputs the logical code corresponding to a boolean
13875 expression. The expression may have one or both operands
13876 negated (if one, only the first one). For condition register
13877 logical operations, it will also treat the negated
13878 CR codes as NOTs, but not handle NOTs of them. */
13880 const char *const *t
= 0;
13882 enum rtx_code code
= GET_CODE (x
);
13883 static const char * const tbl
[3][3] = {
13884 { "and", "andc", "nor" },
13885 { "or", "orc", "nand" },
13886 { "xor", "eqv", "xor" } };
13890 else if (code
== IOR
)
13892 else if (code
== XOR
)
13895 output_operand_lossage ("invalid %%q value");
13897 if (GET_CODE (XEXP (x
, 0)) != NOT
)
13901 if (GET_CODE (XEXP (x
, 1)) == NOT
)
13912 if (! TARGET_MFCRF
)
13918 /* X is a CR register. Print the mask for `mtcrf'. */
13919 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13920 output_operand_lossage ("invalid %%R value");
13922 fprintf (file
, "%d", 128 >> (REGNO (x
) - CR0_REGNO
));
13926 /* Low 5 bits of 32 - value */
13928 output_operand_lossage ("invalid %%s value");
13930 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (32 - INTVAL (x
)) & 31);
13934 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13935 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13937 output_operand_lossage ("invalid %%t value");
13941 /* Bit 3 is OV bit. */
13942 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 3;
13944 /* If we want bit 31, write a shift count of zero, not 32. */
13945 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
13949 /* Print the symbolic name of a branch target register. */
13950 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
13951 x
= XVECEXP (x
, 0, 0);
13952 if (!REG_P (x
) || (REGNO (x
) != LR_REGNO
13953 && REGNO (x
) != CTR_REGNO
))
13954 output_operand_lossage ("invalid %%T value");
13955 else if (REGNO (x
) == LR_REGNO
)
13956 fputs ("lr", file
);
13958 fputs ("ctr", file
);
13962 /* High-order or low-order 16 bits of constant, whichever is non-zero,
13963 for use in unsigned operand. */
13966 output_operand_lossage ("invalid %%u value");
13971 if ((uval
& 0xffff) == 0)
13974 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
, uval
& 0xffff);
13978 /* High-order 16 bits of constant for use in signed operand. */
13980 output_operand_lossage ("invalid %%v value");
13982 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
,
13983 (INTVAL (x
) >> 16) & 0xffff);
13987 /* Print `u' if this has an auto-increment or auto-decrement. */
13989 && (GET_CODE (XEXP (x
, 0)) == PRE_INC
13990 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
13991 || GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
))
13996 /* Print the trap code for this operand. */
13997 switch (GET_CODE (x
))
14000 fputs ("eq", file
); /* 4 */
14003 fputs ("ne", file
); /* 24 */
14006 fputs ("lt", file
); /* 16 */
14009 fputs ("le", file
); /* 20 */
14012 fputs ("gt", file
); /* 8 */
14015 fputs ("ge", file
); /* 12 */
14018 fputs ("llt", file
); /* 2 */
14021 fputs ("lle", file
); /* 6 */
14024 fputs ("lgt", file
); /* 1 */
14027 fputs ("lge", file
); /* 5 */
14030 output_operand_lossage ("invalid %%V value");
14035 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14038 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
14039 ((INTVAL (x
) & 0xffff) ^ 0x8000) - 0x8000);
14041 print_operand (file
, x
, 0);
14045 /* X is a FPR or Altivec register used in a VSX context. */
14046 if (!REG_P (x
) || !VSX_REGNO_P (REGNO (x
)))
14047 output_operand_lossage ("invalid %%x value");
14050 int reg
= REGNO (x
);
14051 int vsx_reg
= (FP_REGNO_P (reg
)
14053 : reg
- FIRST_ALTIVEC_REGNO
+ 32);
14055 #ifdef TARGET_REGNAMES
14056 if (TARGET_REGNAMES
)
14057 fprintf (file
, "%%vs%d", vsx_reg
);
14060 fprintf (file
, "%d", vsx_reg
);
14066 && (legitimate_indexed_address_p (XEXP (x
, 0), 0)
14067 || (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
14068 && legitimate_indexed_address_p (XEXP (XEXP (x
, 0), 1), 0))))
14073 /* Like 'L', for third word of TImode/PTImode */
14075 fputs (reg_names
[REGNO (x
) + 2], file
);
14076 else if (MEM_P (x
))
14078 machine_mode mode
= GET_MODE (x
);
14079 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14080 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14081 output_address (mode
, plus_constant (Pmode
,
14082 XEXP (XEXP (x
, 0), 0), 8));
14083 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14084 output_address (mode
, plus_constant (Pmode
,
14085 XEXP (XEXP (x
, 0), 0), 8));
14087 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 8), 0));
14088 if (small_data_operand (x
, GET_MODE (x
)))
14089 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14090 reg_names
[SMALL_DATA_REG
]);
14095 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14096 x
= XVECEXP (x
, 0, 1);
14097 /* X is a SYMBOL_REF. Write out the name preceded by a
14098 period and without any trailing data in brackets. Used for function
14099 names. If we are configured for System V (or the embedded ABI) on
14100 the PowerPC, do not emit the period, since those systems do not use
14101 TOCs and the like. */
14102 if (!SYMBOL_REF_P (x
))
14104 output_operand_lossage ("invalid %%z value");
14108 /* For macho, check to see if we need a stub. */
14111 const char *name
= XSTR (x
, 0);
14113 if (darwin_symbol_stubs
14114 && MACHOPIC_INDIRECT
14115 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
14116 name
= machopic_indirection_name (x
, /*stub_p=*/true);
14118 assemble_name (file
, name
);
14120 else if (!DOT_SYMBOLS
)
14121 assemble_name (file
, XSTR (x
, 0));
14123 rs6000_output_function_entry (file
, XSTR (x
, 0));
14127 /* Like 'L', for last word of TImode/PTImode. */
14129 fputs (reg_names
[REGNO (x
) + 3], file
);
14130 else if (MEM_P (x
))
14132 machine_mode mode
= GET_MODE (x
);
14133 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14134 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14135 output_address (mode
, plus_constant (Pmode
,
14136 XEXP (XEXP (x
, 0), 0), 12));
14137 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14138 output_address (mode
, plus_constant (Pmode
,
14139 XEXP (XEXP (x
, 0), 0), 12));
14141 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 12), 0));
14142 if (small_data_operand (x
, GET_MODE (x
)))
14143 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14144 reg_names
[SMALL_DATA_REG
]);
14148 /* Print AltiVec memory operand. */
14153 gcc_assert (MEM_P (x
));
14157 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x
))
14158 && GET_CODE (tmp
) == AND
14159 && CONST_INT_P (XEXP (tmp
, 1))
14160 && INTVAL (XEXP (tmp
, 1)) == -16)
14161 tmp
= XEXP (tmp
, 0);
14162 else if (VECTOR_MEM_VSX_P (GET_MODE (x
))
14163 && GET_CODE (tmp
) == PRE_MODIFY
)
14164 tmp
= XEXP (tmp
, 1);
14166 fprintf (file
, "0,%s", reg_names
[REGNO (tmp
)]);
14169 if (GET_CODE (tmp
) != PLUS
14170 || !REG_P (XEXP (tmp
, 0))
14171 || !REG_P (XEXP (tmp
, 1)))
14173 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14177 if (REGNO (XEXP (tmp
, 0)) == 0)
14178 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 1)) ],
14179 reg_names
[ REGNO (XEXP (tmp
, 0)) ]);
14181 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 0)) ],
14182 reg_names
[ REGNO (XEXP (tmp
, 1)) ]);
14189 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
14190 else if (MEM_P (x
))
14192 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14193 know the width from the mode. */
14194 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
)
14195 fprintf (file
, "%d(%s)", GET_MODE_SIZE (GET_MODE (x
)),
14196 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14197 else if (GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14198 fprintf (file
, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x
)),
14199 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14200 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14201 output_address (GET_MODE (x
), XEXP (XEXP (x
, 0), 1));
14203 output_address (GET_MODE (x
), XEXP (x
, 0));
14205 else if (toc_relative_expr_p (x
, false,
14206 &tocrel_base_oac
, &tocrel_offset_oac
))
14207 /* This hack along with a corresponding hack in
14208 rs6000_output_addr_const_extra arranges to output addends
14209 where the assembler expects to find them. eg.
14210 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14211 without this hack would be output as "x@toc+4". We
14213 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14214 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
14215 output_addr_const (file
, XVECEXP (x
, 0, 0));
14216 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14217 output_addr_const (file
, XVECEXP (x
, 0, 1));
14219 output_addr_const (file
, x
);
14223 if (const char *name
= get_some_local_dynamic_name ())
14224 assemble_name (file
, name
);
14226 output_operand_lossage ("'%%&' used without any "
14227 "local dynamic TLS references");
14231 output_operand_lossage ("invalid %%xn code");
14235 /* Print the address of an operand. */
14238 print_operand_address (FILE *file
, rtx x
)
14241 fprintf (file
, "0(%s)", reg_names
[ REGNO (x
) ]);
14243 /* Is it a PC-relative address? */
14244 else if (TARGET_PCREL
&& pcrel_local_or_external_address (x
, VOIDmode
))
14246 HOST_WIDE_INT offset
;
14248 if (GET_CODE (x
) == CONST
)
14251 if (GET_CODE (x
) == PLUS
)
14253 offset
= INTVAL (XEXP (x
, 1));
14259 output_addr_const (file
, x
);
14262 fprintf (file
, "%+" PRId64
, offset
);
14264 if (SYMBOL_REF_P (x
) && !SYMBOL_REF_LOCAL_P (x
))
14265 fprintf (file
, "@got");
14267 fprintf (file
, "@pcrel");
14269 else if (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
14270 || GET_CODE (x
) == LABEL_REF
)
14272 output_addr_const (file
, x
);
14273 if (small_data_operand (x
, GET_MODE (x
)))
14274 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14275 reg_names
[SMALL_DATA_REG
]);
14277 gcc_assert (!TARGET_TOC
);
14279 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14280 && REG_P (XEXP (x
, 1)))
14282 if (REGNO (XEXP (x
, 0)) == 0)
14283 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 1)) ],
14284 reg_names
[ REGNO (XEXP (x
, 0)) ]);
14286 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 0)) ],
14287 reg_names
[ REGNO (XEXP (x
, 1)) ]);
14289 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14290 && CONST_INT_P (XEXP (x
, 1)))
14291 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
14292 INTVAL (XEXP (x
, 1)), reg_names
[ REGNO (XEXP (x
, 0)) ]);
14294 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14295 && CONSTANT_P (XEXP (x
, 1)))
14297 fprintf (file
, "lo16(");
14298 output_addr_const (file
, XEXP (x
, 1));
14299 fprintf (file
, ")(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14303 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14304 && CONSTANT_P (XEXP (x
, 1)))
14306 output_addr_const (file
, XEXP (x
, 1));
14307 fprintf (file
, "@l(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14310 else if (toc_relative_expr_p (x
, false, &tocrel_base_oac
, &tocrel_offset_oac
))
14312 /* This hack along with a corresponding hack in
14313 rs6000_output_addr_const_extra arranges to output addends
14314 where the assembler expects to find them. eg.
14316 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14317 without this hack would be output as "x@toc+8@l(9)". We
14318 want "x+8@toc@l(9)". */
14319 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14320 if (GET_CODE (x
) == LO_SUM
)
14321 fprintf (file
, "@l(%s)", reg_names
[REGNO (XEXP (x
, 0))]);
14323 fprintf (file
, "(%s)", reg_names
[REGNO (XVECEXP (tocrel_base_oac
, 0, 1))]);
14326 output_addr_const (file
, x
);
14329 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14332 rs6000_output_addr_const_extra (FILE *file
, rtx x
)
14334 if (GET_CODE (x
) == UNSPEC
)
14335 switch (XINT (x
, 1))
14337 case UNSPEC_TOCREL
:
14338 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x
, 0, 0))
14339 && REG_P (XVECEXP (x
, 0, 1))
14340 && REGNO (XVECEXP (x
, 0, 1)) == TOC_REGISTER
);
14341 output_addr_const (file
, XVECEXP (x
, 0, 0));
14342 if (x
== tocrel_base_oac
&& tocrel_offset_oac
!= const0_rtx
)
14344 if (INTVAL (tocrel_offset_oac
) >= 0)
14345 fprintf (file
, "+");
14346 output_addr_const (file
, CONST_CAST_RTX (tocrel_offset_oac
));
14348 if (!TARGET_AIX
|| (TARGET_ELF
&& TARGET_MINIMAL_TOC
))
14351 assemble_name (file
, toc_label_name
);
14354 else if (TARGET_ELF
)
14355 fputs ("@toc", file
);
14359 case UNSPEC_MACHOPIC_OFFSET
:
14360 output_addr_const (file
, XVECEXP (x
, 0, 0));
14362 machopic_output_function_base_name (file
);
14369 /* Target hook for assembling integer objects. The PowerPC version has
14370 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14371 is defined. It also needs to handle DI-mode objects on 64-bit
14375 rs6000_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
14377 #ifdef RELOCATABLE_NEEDS_FIXUP
14378 /* Special handling for SI values. */
14379 if (RELOCATABLE_NEEDS_FIXUP
&& size
== 4 && aligned_p
)
14381 static int recurse
= 0;
14383 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14384 the .fixup section. Since the TOC section is already relocated, we
14385 don't need to mark it here. We used to skip the text section, but it
14386 should never be valid for relocated addresses to be placed in the text
14388 if (DEFAULT_ABI
== ABI_V4
14389 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
14390 && in_section
!= toc_section
14392 && !CONST_SCALAR_INT_P (x
)
14398 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCP", fixuplabelno
);
14400 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
14401 fprintf (asm_out_file
, "\t.long\t(");
14402 output_addr_const (asm_out_file
, x
);
14403 fprintf (asm_out_file
, ")@fixup\n");
14404 fprintf (asm_out_file
, "\t.section\t\".fixup\",\"aw\"\n");
14405 ASM_OUTPUT_ALIGN (asm_out_file
, 2);
14406 fprintf (asm_out_file
, "\t.long\t");
14407 assemble_name (asm_out_file
, buf
);
14408 fprintf (asm_out_file
, "\n\t.previous\n");
14412 /* Remove initial .'s to turn a -mcall-aixdesc function
14413 address into the address of the descriptor, not the function
14415 else if (SYMBOL_REF_P (x
)
14416 && XSTR (x
, 0)[0] == '.'
14417 && DEFAULT_ABI
== ABI_AIX
)
14419 const char *name
= XSTR (x
, 0);
14420 while (*name
== '.')
14423 fprintf (asm_out_file
, "\t.long\t%s\n", name
);
14427 #endif /* RELOCATABLE_NEEDS_FIXUP */
14428 return default_assemble_integer (x
, size
, aligned_p
);
14431 /* Return a template string for assembly to emit when making an
14432 external call. FUNOP is the call mem argument operand number. */
14434 static const char *
14435 rs6000_call_template_1 (rtx
*operands
, unsigned int funop
, bool sibcall
)
14437 /* -Wformat-overflow workaround, without which gcc thinks that %u
14438 might produce 10 digits. */
14439 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14443 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14445 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14446 sprintf (arg
, "(%%%u@tlsgd)", funop
+ 1);
14447 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14448 sprintf (arg
, "(%%&@tlsld)");
14451 /* The magic 32768 offset here corresponds to the offset of
14452 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14454 sprintf (z
, "%%z%u%s", funop
,
14455 (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
== 2
14458 static char str
[32]; /* 1 spare */
14459 if (rs6000_pcrel_p ())
14460 sprintf (str
, "b%s %s@notoc%s", sibcall
? "" : "l", z
, arg
);
14461 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
14462 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14463 sibcall
? "" : "\n\tnop");
14464 else if (DEFAULT_ABI
== ABI_V4
)
14465 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14466 flag_pic
? "@plt" : "");
14468 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14469 else if (DEFAULT_ABI
== ABI_DARWIN
)
14471 /* The cookie is in operand func+2. */
14472 gcc_checking_assert (GET_CODE (operands
[funop
+ 2]) == CONST_INT
);
14473 int cookie
= INTVAL (operands
[funop
+ 2]);
14474 if (cookie
& CALL_LONG
)
14476 tree funname
= get_identifier (XSTR (operands
[funop
], 0));
14477 tree labelname
= get_prev_label (funname
);
14478 gcc_checking_assert (labelname
&& !sibcall
);
14480 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14481 instruction will reach 'foo', otherwise link as 'bl L42'".
14482 "L42" should be a 'branch island', that will do a far jump to
14483 'foo'. Branch islands are generated in
14484 macho_branch_islands(). */
14485 sprintf (str
, "jbsr %%z%u,%.10s", funop
,
14486 IDENTIFIER_POINTER (labelname
));
14489 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14491 sprintf (str
, "b%s %s%s", sibcall
? "" : "l", z
, arg
);
14495 gcc_unreachable ();
14500 rs6000_call_template (rtx
*operands
, unsigned int funop
)
14502 return rs6000_call_template_1 (operands
, funop
, false);
14506 rs6000_sibcall_template (rtx
*operands
, unsigned int funop
)
14508 return rs6000_call_template_1 (operands
, funop
, true);
14511 /* As above, for indirect calls. */
14513 static const char *
14514 rs6000_indirect_call_template_1 (rtx
*operands
, unsigned int funop
,
14517 /* -Wformat-overflow workaround, without which gcc thinks that %u
14518 might produce 10 digits. Note that -Wformat-overflow will not
14519 currently warn here for str[], so do not rely on a warning to
14520 ensure str[] is correctly sized. */
14521 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14523 /* Currently, funop is either 0 or 1. The maximum string is always
14524 a !speculate 64-bit __tls_get_addr call.
14527 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14528 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14530 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14531 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14538 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14539 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14541 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14542 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14549 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14550 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14552 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14553 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14560 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14561 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14563 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14564 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14568 static char str
[160]; /* 8 spare */
14570 const char *ptrload
= TARGET_64BIT
? "d" : "wz";
14572 if (DEFAULT_ABI
== ABI_AIX
)
14575 ptrload
, funop
+ 3);
14577 /* We don't need the extra code to stop indirect call speculation if
14579 bool speculate
= (TARGET_MACHO
14580 || rs6000_speculate_indirect_jumps
14581 || (REG_P (operands
[funop
])
14582 && REGNO (operands
[funop
]) == LR_REGNO
));
14584 if (TARGET_PLTSEQ
&& GET_CODE (operands
[funop
]) == UNSPEC
)
14586 const char *rel64
= TARGET_64BIT
? "64" : "";
14589 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14591 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14592 sprintf (tls
, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14594 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14595 sprintf (tls
, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14599 const char *notoc
= rs6000_pcrel_p () ? "_NOTOC" : "";
14600 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
14601 && flag_pic
== 2 ? "+32768" : "");
14605 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14606 tls
, rel64
, notoc
, funop
, addend
);
14607 s
+= sprintf (s
, "crset 2\n\t");
14610 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14611 tls
, rel64
, notoc
, funop
, addend
);
14613 else if (!speculate
)
14614 s
+= sprintf (s
, "crset 2\n\t");
14616 if (rs6000_pcrel_p ())
14619 sprintf (s
, "b%%T%ul", funop
);
14621 sprintf (s
, "beq%%T%ul-", funop
);
14623 else if (DEFAULT_ABI
== ABI_AIX
)
14629 funop
, ptrload
, funop
+ 4);
14634 funop
, ptrload
, funop
+ 4);
14636 else if (DEFAULT_ABI
== ABI_ELFv2
)
14642 funop
, ptrload
, funop
+ 3);
14647 funop
, ptrload
, funop
+ 3);
14654 funop
, sibcall
? "" : "l");
14658 funop
, sibcall
? "" : "l", sibcall
? "\n\tb $" : "");
14664 rs6000_indirect_call_template (rtx
*operands
, unsigned int funop
)
14666 return rs6000_indirect_call_template_1 (operands
, funop
, false);
14670 rs6000_indirect_sibcall_template (rtx
*operands
, unsigned int funop
)
14672 return rs6000_indirect_call_template_1 (operands
, funop
, true);
14676 /* Output indirect call insns. WHICH identifies the type of sequence. */
14678 rs6000_pltseq_template (rtx
*operands
, int which
)
14680 const char *rel64
= TARGET_64BIT
? "64" : "";
14683 if (GET_CODE (operands
[3]) == UNSPEC
)
14685 char off
= which
== RS6000_PLTSEQ_PLT_PCREL34
? '8' : '4';
14686 if (XINT (operands
[3], 1) == UNSPEC_TLSGD
)
14687 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14689 else if (XINT (operands
[3], 1) == UNSPEC_TLSLD
)
14690 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14694 gcc_assert (DEFAULT_ABI
== ABI_ELFv2
|| DEFAULT_ABI
== ABI_V4
);
14695 static char str
[96]; /* 10 spare */
14696 char off
= WORDS_BIG_ENDIAN
? '2' : '4';
14697 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
14698 && flag_pic
== 2 ? "+32768" : "");
14701 case RS6000_PLTSEQ_TOCSAVE
:
14704 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14705 TARGET_64BIT
? "d 2,24(1)" : "w 2,12(1)",
14708 case RS6000_PLTSEQ_PLT16_HA
:
14709 if (DEFAULT_ABI
== ABI_V4
&& !flag_pic
)
14712 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14716 "addis %%0,%%1,0\n\t"
14717 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14718 tls
, off
, rel64
, addend
);
14720 case RS6000_PLTSEQ_PLT16_LO
:
14722 "l%s %%0,0(%%1)\n\t"
14723 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14724 TARGET_64BIT
? "d" : "wz",
14725 tls
, off
, rel64
, TARGET_64BIT
? "_DS" : "", addend
);
14727 case RS6000_PLTSEQ_MTCTR
:
14730 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14731 tls
, rel64
, addend
);
14733 case RS6000_PLTSEQ_PLT_PCREL34
:
14735 "pl%s %%0,0(0),1\n\t"
14736 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14737 TARGET_64BIT
? "d" : "wz",
14741 gcc_unreachable ();
14747 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14748 /* Emit an assembler directive to set symbol visibility for DECL to
14749 VISIBILITY_TYPE. */
14752 rs6000_assemble_visibility (tree decl
, int vis
)
14757 /* Functions need to have their entry point symbol visibility set as
14758 well as their descriptor symbol visibility. */
14759 if (DEFAULT_ABI
== ABI_AIX
14761 && TREE_CODE (decl
) == FUNCTION_DECL
)
14763 static const char * const visibility_types
[] = {
14764 NULL
, "protected", "hidden", "internal"
14767 const char *name
, *type
;
14769 name
= ((* targetm
.strip_name_encoding
)
14770 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
))));
14771 type
= visibility_types
[vis
];
14773 fprintf (asm_out_file
, "\t.%s\t%s\n", type
, name
);
14774 fprintf (asm_out_file
, "\t.%s\t.%s\n", type
, name
);
14777 default_assemble_visibility (decl
, vis
);
14781 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
14782 entry. If RECORD_P is true and the target supports named sections,
14783 the location of the NOPs will be recorded in a special object section
14784 called "__patchable_function_entries". This routine may be called
14785 twice per function to put NOPs before and after the function
14789 rs6000_print_patchable_function_entry (FILE *file
,
14790 unsigned HOST_WIDE_INT patch_area_size
,
14793 unsigned int flags
= SECTION_WRITE
| SECTION_RELRO
;
14794 /* When .opd section is emitted, the function symbol
14795 default_print_patchable_function_entry_1 is emitted into the .opd section
14796 while the patchable area is emitted into the function section.
14797 Don't use SECTION_LINK_ORDER in that case. */
14798 if (!(TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
14799 && HAVE_GAS_SECTION_LINK_ORDER
)
14800 flags
|= SECTION_LINK_ORDER
;
14801 default_print_patchable_function_entry_1 (file
, patch_area_size
, record_p
,
14806 rs6000_reverse_condition (machine_mode mode
, enum rtx_code code
)
14808 /* Reversal of FP compares takes care -- an ordered compare
14809 becomes an unordered compare and vice versa. */
14810 if (mode
== CCFPmode
14811 && (!flag_finite_math_only
14812 || code
== UNLT
|| code
== UNLE
|| code
== UNGT
|| code
== UNGE
14813 || code
== UNEQ
|| code
== LTGT
))
14814 return reverse_condition_maybe_unordered (code
);
14816 return reverse_condition (code
);
14819 /* Generate a compare for CODE. Return a brand-new rtx that
14820 represents the result of the compare. */
14823 rs6000_generate_compare (rtx cmp
, machine_mode mode
)
14825 machine_mode comp_mode
;
14826 rtx compare_result
;
14827 enum rtx_code code
= GET_CODE (cmp
);
14828 rtx op0
= XEXP (cmp
, 0);
14829 rtx op1
= XEXP (cmp
, 1);
14831 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
14832 comp_mode
= CCmode
;
14833 else if (FLOAT_MODE_P (mode
))
14834 comp_mode
= CCFPmode
;
14835 else if (code
== GTU
|| code
== LTU
14836 || code
== GEU
|| code
== LEU
)
14837 comp_mode
= CCUNSmode
;
14838 else if ((code
== EQ
|| code
== NE
)
14839 && unsigned_reg_p (op0
)
14840 && (unsigned_reg_p (op1
)
14841 || (CONST_INT_P (op1
) && INTVAL (op1
) != 0)))
14842 /* These are unsigned values, perhaps there will be a later
14843 ordering compare that can be shared with this one. */
14844 comp_mode
= CCUNSmode
;
14846 comp_mode
= CCmode
;
14848 /* If we have an unsigned compare, make sure we don't have a signed value as
14850 if (comp_mode
== CCUNSmode
&& CONST_INT_P (op1
)
14851 && INTVAL (op1
) < 0)
14853 op0
= copy_rtx_if_shared (op0
);
14854 op1
= force_reg (GET_MODE (op0
), op1
);
14855 cmp
= gen_rtx_fmt_ee (code
, GET_MODE (cmp
), op0
, op1
);
14858 /* First, the compare. */
14859 compare_result
= gen_reg_rtx (comp_mode
);
14861 /* IEEE 128-bit support in VSX registers when we do not have hardware
14863 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
14865 rtx libfunc
= NULL_RTX
;
14866 bool check_nan
= false;
14873 libfunc
= optab_libfunc (eq_optab
, mode
);
14878 libfunc
= optab_libfunc (ge_optab
, mode
);
14883 libfunc
= optab_libfunc (le_optab
, mode
);
14888 libfunc
= optab_libfunc (unord_optab
, mode
);
14889 code
= (code
== UNORDERED
) ? NE
: EQ
;
14895 libfunc
= optab_libfunc (ge_optab
, mode
);
14896 code
= (code
== UNGE
) ? GE
: GT
;
14902 libfunc
= optab_libfunc (le_optab
, mode
);
14903 code
= (code
== UNLE
) ? LE
: LT
;
14909 libfunc
= optab_libfunc (eq_optab
, mode
);
14910 code
= (code
= UNEQ
) ? EQ
: NE
;
14914 gcc_unreachable ();
14917 gcc_assert (libfunc
);
14920 dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
14921 SImode
, op0
, mode
, op1
, mode
);
14923 /* The library signals an exception for signalling NaNs, so we need to
14924 handle isgreater, etc. by first checking isordered. */
14927 rtx ne_rtx
, normal_dest
, unord_dest
;
14928 rtx unord_func
= optab_libfunc (unord_optab
, mode
);
14929 rtx join_label
= gen_label_rtx ();
14930 rtx join_ref
= gen_rtx_LABEL_REF (VOIDmode
, join_label
);
14931 rtx unord_cmp
= gen_reg_rtx (comp_mode
);
14934 /* Test for either value being a NaN. */
14935 gcc_assert (unord_func
);
14936 unord_dest
= emit_library_call_value (unord_func
, NULL_RTX
, LCT_CONST
,
14937 SImode
, op0
, mode
, op1
, mode
);
14939 /* Set value (0) if either value is a NaN, and jump to the join
14941 dest
= gen_reg_rtx (SImode
);
14942 emit_move_insn (dest
, const1_rtx
);
14943 emit_insn (gen_rtx_SET (unord_cmp
,
14944 gen_rtx_COMPARE (comp_mode
, unord_dest
,
14947 ne_rtx
= gen_rtx_NE (comp_mode
, unord_cmp
, const0_rtx
);
14948 emit_jump_insn (gen_rtx_SET (pc_rtx
,
14949 gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
14953 /* Do the normal comparison, knowing that the values are not
14955 normal_dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
14956 SImode
, op0
, mode
, op1
, mode
);
14958 emit_insn (gen_cstoresi4 (dest
,
14959 gen_rtx_fmt_ee (code
, SImode
, normal_dest
,
14961 normal_dest
, const0_rtx
));
14963 /* Join NaN and non-Nan paths. Compare dest against 0. */
14964 emit_label (join_label
);
14968 emit_insn (gen_rtx_SET (compare_result
,
14969 gen_rtx_COMPARE (comp_mode
, dest
, const0_rtx
)));
14974 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14975 CLOBBERs to match cmptf_internal2 pattern. */
14976 if (comp_mode
== CCFPmode
&& TARGET_XL_COMPAT
14977 && FLOAT128_IBM_P (GET_MODE (op0
))
14978 && TARGET_HARD_FLOAT
)
14979 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
14981 gen_rtx_SET (compare_result
,
14982 gen_rtx_COMPARE (comp_mode
, op0
, op1
)),
14983 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14984 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14985 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14986 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14987 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14988 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14989 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14990 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
14991 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (Pmode
)))));
14992 else if (GET_CODE (op1
) == UNSPEC
14993 && XINT (op1
, 1) == UNSPEC_SP_TEST
)
14995 rtx op1b
= XVECEXP (op1
, 0, 0);
14996 comp_mode
= CCEQmode
;
14997 compare_result
= gen_reg_rtx (CCEQmode
);
14999 emit_insn (gen_stack_protect_testdi (compare_result
, op0
, op1b
));
15001 emit_insn (gen_stack_protect_testsi (compare_result
, op0
, op1b
));
15004 emit_insn (gen_rtx_SET (compare_result
,
15005 gen_rtx_COMPARE (comp_mode
, op0
, op1
)));
15008 validate_condition_mode (code
, GET_MODE (compare_result
));
15010 return gen_rtx_fmt_ee (code
, VOIDmode
, compare_result
, const0_rtx
);
15014 /* Return the diagnostic message string if the binary operation OP is
15015 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15018 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED
,
15022 machine_mode mode1
= TYPE_MODE (type1
);
15023 machine_mode mode2
= TYPE_MODE (type2
);
15025 /* For complex modes, use the inner type. */
15026 if (COMPLEX_MODE_P (mode1
))
15027 mode1
= GET_MODE_INNER (mode1
);
15029 if (COMPLEX_MODE_P (mode2
))
15030 mode2
= GET_MODE_INNER (mode2
);
15032 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15033 double to intermix unless -mfloat128-convert. */
15034 if (mode1
== mode2
)
15037 if (!TARGET_FLOAT128_CVT
)
15039 if ((FLOAT128_IEEE_P (mode1
) && FLOAT128_IBM_P (mode2
))
15040 || (FLOAT128_IBM_P (mode1
) && FLOAT128_IEEE_P (mode2
)))
15041 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15049 /* Expand floating point conversion to/from __float128 and __ibm128. */
15052 rs6000_expand_float128_convert (rtx dest
, rtx src
, bool unsigned_p
)
15054 machine_mode dest_mode
= GET_MODE (dest
);
15055 machine_mode src_mode
= GET_MODE (src
);
15056 convert_optab cvt
= unknown_optab
;
15057 bool do_move
= false;
15058 rtx libfunc
= NULL_RTX
;
15060 typedef rtx (*rtx_2func_t
) (rtx
, rtx
);
15061 rtx_2func_t hw_convert
= (rtx_2func_t
)0;
15065 rtx_2func_t from_df
;
15066 rtx_2func_t from_sf
;
15067 rtx_2func_t from_si_sign
;
15068 rtx_2func_t from_si_uns
;
15069 rtx_2func_t from_di_sign
;
15070 rtx_2func_t from_di_uns
;
15073 rtx_2func_t to_si_sign
;
15074 rtx_2func_t to_si_uns
;
15075 rtx_2func_t to_di_sign
;
15076 rtx_2func_t to_di_uns
;
15077 } hw_conversions
[2] = {
15078 /* convertions to/from KFmode */
15080 gen_extenddfkf2_hw
, /* KFmode <- DFmode. */
15081 gen_extendsfkf2_hw
, /* KFmode <- SFmode. */
15082 gen_float_kfsi2_hw
, /* KFmode <- SImode (signed). */
15083 gen_floatuns_kfsi2_hw
, /* KFmode <- SImode (unsigned). */
15084 gen_float_kfdi2_hw
, /* KFmode <- DImode (signed). */
15085 gen_floatuns_kfdi2_hw
, /* KFmode <- DImode (unsigned). */
15086 gen_trunckfdf2_hw
, /* DFmode <- KFmode. */
15087 gen_trunckfsf2_hw
, /* SFmode <- KFmode. */
15088 gen_fix_kfsi2_hw
, /* SImode <- KFmode (signed). */
15089 gen_fixuns_kfsi2_hw
, /* SImode <- KFmode (unsigned). */
15090 gen_fix_kfdi2_hw
, /* DImode <- KFmode (signed). */
15091 gen_fixuns_kfdi2_hw
, /* DImode <- KFmode (unsigned). */
15094 /* convertions to/from TFmode */
15096 gen_extenddftf2_hw
, /* TFmode <- DFmode. */
15097 gen_extendsftf2_hw
, /* TFmode <- SFmode. */
15098 gen_float_tfsi2_hw
, /* TFmode <- SImode (signed). */
15099 gen_floatuns_tfsi2_hw
, /* TFmode <- SImode (unsigned). */
15100 gen_float_tfdi2_hw
, /* TFmode <- DImode (signed). */
15101 gen_floatuns_tfdi2_hw
, /* TFmode <- DImode (unsigned). */
15102 gen_trunctfdf2_hw
, /* DFmode <- TFmode. */
15103 gen_trunctfsf2_hw
, /* SFmode <- TFmode. */
15104 gen_fix_tfsi2_hw
, /* SImode <- TFmode (signed). */
15105 gen_fixuns_tfsi2_hw
, /* SImode <- TFmode (unsigned). */
15106 gen_fix_tfdi2_hw
, /* DImode <- TFmode (signed). */
15107 gen_fixuns_tfdi2_hw
, /* DImode <- TFmode (unsigned). */
15111 if (dest_mode
== src_mode
)
15112 gcc_unreachable ();
15114 /* Eliminate memory operations. */
15116 src
= force_reg (src_mode
, src
);
15120 rtx tmp
= gen_reg_rtx (dest_mode
);
15121 rs6000_expand_float128_convert (tmp
, src
, unsigned_p
);
15122 rs6000_emit_move (dest
, tmp
, dest_mode
);
15126 /* Convert to IEEE 128-bit floating point. */
15127 if (FLOAT128_IEEE_P (dest_mode
))
15129 if (dest_mode
== KFmode
)
15131 else if (dest_mode
== TFmode
)
15134 gcc_unreachable ();
15140 hw_convert
= hw_conversions
[kf_or_tf
].from_df
;
15145 hw_convert
= hw_conversions
[kf_or_tf
].from_sf
;
15151 if (FLOAT128_IBM_P (src_mode
))
15160 cvt
= ufloat_optab
;
15161 hw_convert
= hw_conversions
[kf_or_tf
].from_si_uns
;
15165 cvt
= sfloat_optab
;
15166 hw_convert
= hw_conversions
[kf_or_tf
].from_si_sign
;
15173 cvt
= ufloat_optab
;
15174 hw_convert
= hw_conversions
[kf_or_tf
].from_di_uns
;
15178 cvt
= sfloat_optab
;
15179 hw_convert
= hw_conversions
[kf_or_tf
].from_di_sign
;
15184 gcc_unreachable ();
15188 /* Convert from IEEE 128-bit floating point. */
15189 else if (FLOAT128_IEEE_P (src_mode
))
15191 if (src_mode
== KFmode
)
15193 else if (src_mode
== TFmode
)
15196 gcc_unreachable ();
15202 hw_convert
= hw_conversions
[kf_or_tf
].to_df
;
15207 hw_convert
= hw_conversions
[kf_or_tf
].to_sf
;
15213 if (FLOAT128_IBM_P (dest_mode
))
15223 hw_convert
= hw_conversions
[kf_or_tf
].to_si_uns
;
15228 hw_convert
= hw_conversions
[kf_or_tf
].to_si_sign
;
15236 hw_convert
= hw_conversions
[kf_or_tf
].to_di_uns
;
15241 hw_convert
= hw_conversions
[kf_or_tf
].to_di_sign
;
15246 gcc_unreachable ();
15250 /* Both IBM format. */
15251 else if (FLOAT128_IBM_P (dest_mode
) && FLOAT128_IBM_P (src_mode
))
15255 gcc_unreachable ();
15257 /* Handle conversion between TFmode/KFmode/IFmode. */
15259 emit_insn (gen_rtx_SET (dest
, gen_rtx_FLOAT_EXTEND (dest_mode
, src
)));
15261 /* Handle conversion if we have hardware support. */
15262 else if (TARGET_FLOAT128_HW
&& hw_convert
)
15263 emit_insn ((hw_convert
) (dest
, src
));
15265 /* Call an external function to do the conversion. */
15266 else if (cvt
!= unknown_optab
)
15268 libfunc
= convert_optab_libfunc (cvt
, dest_mode
, src_mode
);
15269 gcc_assert (libfunc
!= NULL_RTX
);
15271 dest2
= emit_library_call_value (libfunc
, dest
, LCT_CONST
, dest_mode
,
15274 gcc_assert (dest2
!= NULL_RTX
);
15275 if (!rtx_equal_p (dest
, dest2
))
15276 emit_move_insn (dest
, dest2
);
15280 gcc_unreachable ();
15286 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15287 can be used as that dest register. Return the dest register. */
15290 rs6000_emit_eqne (machine_mode mode
, rtx op1
, rtx op2
, rtx scratch
)
15292 if (op2
== const0_rtx
)
15295 if (GET_CODE (scratch
) == SCRATCH
)
15296 scratch
= gen_reg_rtx (mode
);
15298 if (logical_operand (op2
, mode
))
15299 emit_insn (gen_rtx_SET (scratch
, gen_rtx_XOR (mode
, op1
, op2
)));
15301 emit_insn (gen_rtx_SET (scratch
,
15302 gen_rtx_PLUS (mode
, op1
, negate_rtx (mode
, op2
))));
15307 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15308 requires this. The result is mode MODE. */
15310 rs6000_emit_fp_cror (rtx_code code
, machine_mode mode
, rtx x
)
15314 if (code
== LTGT
|| code
== LE
|| code
== UNLT
)
15315 cond
[n
++] = gen_rtx_fmt_ee (LT
, mode
, x
, const0_rtx
);
15316 if (code
== LTGT
|| code
== GE
|| code
== UNGT
)
15317 cond
[n
++] = gen_rtx_fmt_ee (GT
, mode
, x
, const0_rtx
);
15318 if (code
== LE
|| code
== GE
|| code
== UNEQ
)
15319 cond
[n
++] = gen_rtx_fmt_ee (EQ
, mode
, x
, const0_rtx
);
15320 if (code
== UNLT
|| code
== UNGT
|| code
== UNEQ
)
15321 cond
[n
++] = gen_rtx_fmt_ee (UNORDERED
, mode
, x
, const0_rtx
);
15323 gcc_assert (n
== 2);
15325 rtx cc
= gen_reg_rtx (CCEQmode
);
15326 rtx logical
= gen_rtx_IOR (mode
, cond
[0], cond
[1]);
15327 emit_insn (gen_cceq_ior_compare (mode
, cc
, logical
, cond
[0], x
, cond
[1], x
));
15333 rs6000_emit_sCOND (machine_mode mode
, rtx operands
[])
15335 rtx condition_rtx
= rs6000_generate_compare (operands
[1], mode
);
15336 rtx_code cond_code
= GET_CODE (condition_rtx
);
15338 if (FLOAT_MODE_P (mode
) && HONOR_NANS (mode
)
15339 && !(FLOAT128_VECTOR_P (mode
) && !TARGET_FLOAT128_HW
))
15341 else if (cond_code
== NE
15342 || cond_code
== GE
|| cond_code
== LE
15343 || cond_code
== GEU
|| cond_code
== LEU
15344 || cond_code
== ORDERED
|| cond_code
== UNGE
|| cond_code
== UNLE
)
15346 rtx not_result
= gen_reg_rtx (CCEQmode
);
15347 rtx not_op
, rev_cond_rtx
;
15348 machine_mode cc_mode
;
15350 cc_mode
= GET_MODE (XEXP (condition_rtx
, 0));
15352 rev_cond_rtx
= gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode
, cond_code
),
15353 SImode
, XEXP (condition_rtx
, 0), const0_rtx
);
15354 not_op
= gen_rtx_COMPARE (CCEQmode
, rev_cond_rtx
, const0_rtx
);
15355 emit_insn (gen_rtx_SET (not_result
, not_op
));
15356 condition_rtx
= gen_rtx_EQ (VOIDmode
, not_result
, const0_rtx
);
15359 machine_mode op_mode
= GET_MODE (XEXP (operands
[1], 0));
15360 if (op_mode
== VOIDmode
)
15361 op_mode
= GET_MODE (XEXP (operands
[1], 1));
15363 if (TARGET_POWERPC64
&& (op_mode
== DImode
|| FLOAT_MODE_P (mode
)))
15365 PUT_MODE (condition_rtx
, DImode
);
15366 convert_move (operands
[0], condition_rtx
, 0);
15370 PUT_MODE (condition_rtx
, SImode
);
15371 emit_insn (gen_rtx_SET (operands
[0], condition_rtx
));
15375 /* Emit a branch of kind CODE to location LOC. */
15378 rs6000_emit_cbranch (machine_mode mode
, rtx operands
[])
15380 rtx condition_rtx
= rs6000_generate_compare (operands
[0], mode
);
15381 rtx loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
15382 rtx ite
= gen_rtx_IF_THEN_ELSE (VOIDmode
, condition_rtx
, loc_ref
, pc_rtx
);
15383 emit_jump_insn (gen_rtx_SET (pc_rtx
, ite
));
15386 /* Return the string to output a conditional branch to LABEL, which is
15387 the operand template of the label, or NULL if the branch is really a
15388 conditional return.
15390 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15391 condition code register and its mode specifies what kind of
15392 comparison we made.
15394 REVERSED is nonzero if we should reverse the sense of the comparison.
15396 INSN is the insn. */
15399 output_cbranch (rtx op
, const char *label
, int reversed
, rtx_insn
*insn
)
15401 static char string
[64];
15402 enum rtx_code code
= GET_CODE (op
);
15403 rtx cc_reg
= XEXP (op
, 0);
15404 machine_mode mode
= GET_MODE (cc_reg
);
15405 int cc_regno
= REGNO (cc_reg
) - CR0_REGNO
;
15406 int need_longbranch
= label
!= NULL
&& get_attr_length (insn
) == 8;
15407 int really_reversed
= reversed
^ need_longbranch
;
15413 validate_condition_mode (code
, mode
);
15415 /* Work out which way this really branches. We could use
15416 reverse_condition_maybe_unordered here always but this
15417 makes the resulting assembler clearer. */
15418 if (really_reversed
)
15420 /* Reversal of FP compares takes care -- an ordered compare
15421 becomes an unordered compare and vice versa. */
15422 if (mode
== CCFPmode
)
15423 code
= reverse_condition_maybe_unordered (code
);
15425 code
= reverse_condition (code
);
15430 /* Not all of these are actually distinct opcodes, but
15431 we distinguish them for clarity of the resulting assembler. */
15432 case NE
: case LTGT
:
15433 ccode
= "ne"; break;
15434 case EQ
: case UNEQ
:
15435 ccode
= "eq"; break;
15437 ccode
= "ge"; break;
15438 case GT
: case GTU
: case UNGT
:
15439 ccode
= "gt"; break;
15441 ccode
= "le"; break;
15442 case LT
: case LTU
: case UNLT
:
15443 ccode
= "lt"; break;
15444 case UNORDERED
: ccode
= "un"; break;
15445 case ORDERED
: ccode
= "nu"; break;
15446 case UNGE
: ccode
= "nl"; break;
15447 case UNLE
: ccode
= "ng"; break;
15449 gcc_unreachable ();
15452 /* Maybe we have a guess as to how likely the branch is. */
15454 note
= find_reg_note (insn
, REG_BR_PROB
, NULL_RTX
);
15455 if (note
!= NULL_RTX
)
15457 /* PROB is the difference from 50%. */
15458 int prob
= profile_probability::from_reg_br_prob_note (XINT (note
, 0))
15459 .to_reg_br_prob_base () - REG_BR_PROB_BASE
/ 2;
15461 /* Only hint for highly probable/improbable branches on newer cpus when
15462 we have real profile data, as static prediction overrides processor
15463 dynamic prediction. For older cpus we may as well always hint, but
15464 assume not taken for branches that are very close to 50% as a
15465 mispredicted taken branch is more expensive than a
15466 mispredicted not-taken branch. */
15467 if (rs6000_always_hint
15468 || (abs (prob
) > REG_BR_PROB_BASE
/ 100 * 48
15469 && (profile_status_for_fn (cfun
) != PROFILE_GUESSED
)
15470 && br_prob_note_reliable_p (note
)))
15472 if (abs (prob
) > REG_BR_PROB_BASE
/ 20
15473 && ((prob
> 0) ^ need_longbranch
))
15481 s
+= sprintf (s
, "b%slr%s ", ccode
, pred
);
15483 s
+= sprintf (s
, "b%s%s ", ccode
, pred
);
15485 /* We need to escape any '%' characters in the reg_names string.
15486 Assume they'd only be the first character.... */
15487 if (reg_names
[cc_regno
+ CR0_REGNO
][0] == '%')
15489 s
+= sprintf (s
, "%s", reg_names
[cc_regno
+ CR0_REGNO
]);
15493 /* If the branch distance was too far, we may have to use an
15494 unconditional branch to go the distance. */
15495 if (need_longbranch
)
15496 s
+= sprintf (s
, ",$+8\n\tb %s", label
);
15498 s
+= sprintf (s
, ",%s", label
);
15504 /* Return insn for VSX or Altivec comparisons. */
15507 rs6000_emit_vector_compare_inner (enum rtx_code code
, rtx op0
, rtx op1
)
15510 machine_mode mode
= GET_MODE (op0
);
15518 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15529 mask
= gen_reg_rtx (mode
);
15530 emit_insn (gen_rtx_SET (mask
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
15537 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15538 DMODE is expected destination mode. This is a recursive function. */
15541 rs6000_emit_vector_compare (enum rtx_code rcode
,
15543 machine_mode dmode
)
15546 bool swap_operands
= false;
15547 bool try_again
= false;
15549 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode
));
15550 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
15552 /* See if the comparison works as is. */
15553 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
15561 swap_operands
= true;
15566 swap_operands
= true;
15574 /* Invert condition and try again.
15575 e.g., A != B becomes ~(A==B). */
15577 enum rtx_code rev_code
;
15578 enum insn_code nor_code
;
15581 rev_code
= reverse_condition_maybe_unordered (rcode
);
15582 if (rev_code
== UNKNOWN
)
15585 nor_code
= optab_handler (one_cmpl_optab
, dmode
);
15586 if (nor_code
== CODE_FOR_nothing
)
15589 mask2
= rs6000_emit_vector_compare (rev_code
, op0
, op1
, dmode
);
15593 mask
= gen_reg_rtx (dmode
);
15594 emit_insn (GEN_FCN (nor_code
) (mask
, mask2
));
15602 /* Try GT/GTU/LT/LTU OR EQ */
15605 enum insn_code ior_code
;
15606 enum rtx_code new_code
;
15627 gcc_unreachable ();
15630 ior_code
= optab_handler (ior_optab
, dmode
);
15631 if (ior_code
== CODE_FOR_nothing
)
15634 c_rtx
= rs6000_emit_vector_compare (new_code
, op0
, op1
, dmode
);
15638 eq_rtx
= rs6000_emit_vector_compare (EQ
, op0
, op1
, dmode
);
15642 mask
= gen_reg_rtx (dmode
);
15643 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
15654 std::swap (op0
, op1
);
15656 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
15661 /* You only get two chances. */
15665 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15666 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15667 operands for the relation operation COND. */
15670 rs6000_emit_vector_cond_expr (rtx dest
, rtx op_true
, rtx op_false
,
15671 rtx cond
, rtx cc_op0
, rtx cc_op1
)
15673 machine_mode dest_mode
= GET_MODE (dest
);
15674 machine_mode mask_mode
= GET_MODE (cc_op0
);
15675 enum rtx_code rcode
= GET_CODE (cond
);
15677 bool invert_move
= false;
15679 if (VECTOR_UNIT_NONE_P (dest_mode
))
15682 gcc_assert (GET_MODE_SIZE (dest_mode
) == GET_MODE_SIZE (mask_mode
)
15683 && GET_MODE_NUNITS (dest_mode
) == GET_MODE_NUNITS (mask_mode
));
15687 /* Swap operands if we can, and fall back to doing the operation as
15688 specified, and doing a NOR to invert the test. */
15694 /* Invert condition and try again.
15695 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15696 invert_move
= true;
15697 rcode
= reverse_condition_maybe_unordered (rcode
);
15698 if (rcode
== UNKNOWN
)
15704 if (GET_MODE_CLASS (mask_mode
) == MODE_VECTOR_INT
)
15706 /* Invert condition to avoid compound test. */
15707 invert_move
= true;
15708 rcode
= reverse_condition (rcode
);
15717 /* Invert condition to avoid compound test if necessary. */
15718 if (rcode
== GEU
|| rcode
== LEU
)
15720 invert_move
= true;
15721 rcode
= reverse_condition (rcode
);
15729 /* Get the vector mask for the given relational operations. */
15730 mask
= rs6000_emit_vector_compare (rcode
, cc_op0
, cc_op1
, mask_mode
);
15735 if (mask_mode
!= dest_mode
)
15736 mask
= simplify_gen_subreg (dest_mode
, mask
, mask_mode
, 0);
15739 std::swap (op_true
, op_false
);
15741 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
15742 if (GET_MODE_CLASS (dest_mode
) == MODE_VECTOR_INT
15743 && (GET_CODE (op_true
) == CONST_VECTOR
15744 || GET_CODE (op_false
) == CONST_VECTOR
))
15746 rtx constant_0
= CONST0_RTX (dest_mode
);
15747 rtx constant_m1
= CONSTM1_RTX (dest_mode
);
15749 if (op_true
== constant_m1
&& op_false
== constant_0
)
15751 emit_move_insn (dest
, mask
);
15755 else if (op_true
== constant_0
&& op_false
== constant_m1
)
15757 emit_insn (gen_rtx_SET (dest
, gen_rtx_NOT (dest_mode
, mask
)));
15761 /* If we can't use the vector comparison directly, perhaps we can use
15762 the mask for the true or false fields, instead of loading up a
15764 if (op_true
== constant_m1
)
15767 if (op_false
== constant_0
)
15771 if (!REG_P (op_true
) && !SUBREG_P (op_true
))
15772 op_true
= force_reg (dest_mode
, op_true
);
15774 if (!REG_P (op_false
) && !SUBREG_P (op_false
))
15775 op_false
= force_reg (dest_mode
, op_false
);
15777 rtx tmp
= gen_rtx_IOR (dest_mode
,
15778 gen_rtx_AND (dest_mode
, gen_rtx_NOT (dest_mode
, mask
),
15780 gen_rtx_AND (dest_mode
, mask
, op_true
));
15781 emit_insn (gen_rtx_SET (dest
, tmp
));
15785 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
15786 maximum or minimum with "C" semantics.
15788 Unless you use -ffast-math, you can't use these instructions to replace
15789 conditions that implicitly reverse the condition because the comparison
15790 might generate a NaN or signed zer0.
15792 I.e. the following can be replaced all of the time
15793 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
15794 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
15795 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
15796 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
15798 The following can be replaced only if -ffast-math is used:
15799 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
15800 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
15801 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
15802 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
15804 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15805 nonzero/true, FALSE_COND if it is zero/false.
15807 Return false if we can't generate the appropriate minimum or maximum, and
15808 true if we can did the minimum or maximum. */
15811 rs6000_maybe_emit_maxc_minc (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
15813 enum rtx_code code
= GET_CODE (op
);
15814 rtx op0
= XEXP (op
, 0);
15815 rtx op1
= XEXP (op
, 1);
15816 machine_mode compare_mode
= GET_MODE (op0
);
15817 machine_mode result_mode
= GET_MODE (dest
);
15818 bool max_p
= false;
15820 if (result_mode
!= compare_mode
)
15823 if (code
== GE
|| code
== GT
)
15825 else if (code
== LE
|| code
== LT
)
15830 if (rtx_equal_p (op0
, true_cond
) && rtx_equal_p (op1
, false_cond
))
15833 /* Only when NaNs and signed-zeros are not in effect, smax could be
15834 used for `op0 < op1 ? op1 : op0`, and smin could be used for
15835 `op0 > op1 ? op1 : op0`. */
15836 else if (rtx_equal_p (op1
, true_cond
) && rtx_equal_p (op0
, false_cond
)
15837 && !HONOR_NANS (compare_mode
) && !HONOR_SIGNED_ZEROS (compare_mode
))
15843 rs6000_emit_minmax (dest
, max_p
? SMAX
: SMIN
, op0
, op1
);
15847 /* Possibly emit a floating point conditional move by generating a compare that
15848 sets a mask instruction and a XXSEL select instruction.
15850 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15851 nonzero/true, FALSE_COND if it is zero/false.
15853 Return false if the operation cannot be generated, and true if we could
15854 generate the instruction. */
15857 rs6000_maybe_emit_fp_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
15859 enum rtx_code code
= GET_CODE (op
);
15860 rtx op0
= XEXP (op
, 0);
15861 rtx op1
= XEXP (op
, 1);
15862 machine_mode compare_mode
= GET_MODE (op0
);
15863 machine_mode result_mode
= GET_MODE (dest
);
15868 if (!can_create_pseudo_p ())
15871 /* We allow the comparison to be either SFmode/DFmode and the true/false
15872 condition to be either SFmode/DFmode. I.e. we allow:
15877 r = (a == b) ? c : d;
15884 r = (a == b) ? c : d;
15886 but we don't allow intermixing the IEEE 128-bit floating point types with
15887 the 32/64-bit scalar types. */
15889 if (!(compare_mode
== result_mode
15890 || (compare_mode
== SFmode
&& result_mode
== DFmode
)
15891 || (compare_mode
== DFmode
&& result_mode
== SFmode
)))
15904 code
= swap_condition (code
);
15905 std::swap (op0
, op1
);
15912 /* Generate: [(parallel [(set (dest)
15913 (if_then_else (op (cmp1) (cmp2))
15916 (clobber (scratch))])]. */
15918 compare_rtx
= gen_rtx_fmt_ee (code
, CCFPmode
, op0
, op1
);
15919 cmove_rtx
= gen_rtx_SET (dest
,
15920 gen_rtx_IF_THEN_ELSE (result_mode
,
15925 clobber_rtx
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (V2DImode
));
15926 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
15927 gen_rtvec (2, cmove_rtx
, clobber_rtx
)));
15932 /* Helper function to return true if the target has instructions to do a
15933 compare and set mask instruction that can be used with XXSEL to implement a
15934 conditional move. It is also assumed that such a target also supports the
15935 "C" minimum and maximum instructions. */
15938 have_compare_and_set_mask (machine_mode mode
)
15944 return TARGET_P9_MINMAX
;
15948 return TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
);
15957 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
15958 operands of the last comparison is nonzero/true, FALSE_COND if it
15959 is zero/false. Return 0 if the hardware has no such operation. */
15962 rs6000_emit_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
15964 enum rtx_code code
= GET_CODE (op
);
15965 rtx op0
= XEXP (op
, 0);
15966 rtx op1
= XEXP (op
, 1);
15967 machine_mode compare_mode
= GET_MODE (op0
);
15968 machine_mode result_mode
= GET_MODE (dest
);
15970 bool is_against_zero
;
15972 /* These modes should always match. */
15973 if (GET_MODE (op1
) != compare_mode
15974 /* In the isel case however, we can use a compare immediate, so
15975 op1 may be a small constant. */
15976 && (!TARGET_ISEL
|| !short_cint_operand (op1
, VOIDmode
)))
15978 if (GET_MODE (true_cond
) != result_mode
)
15980 if (GET_MODE (false_cond
) != result_mode
)
15983 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
15985 if (have_compare_and_set_mask (compare_mode
)
15986 && have_compare_and_set_mask (result_mode
))
15988 if (rs6000_maybe_emit_maxc_minc (dest
, op
, true_cond
, false_cond
))
15991 if (rs6000_maybe_emit_fp_cmove (dest
, op
, true_cond
, false_cond
))
15995 /* Don't allow using floating point comparisons for integer results for
15997 if (FLOAT_MODE_P (compare_mode
) && !FLOAT_MODE_P (result_mode
))
16000 /* First, work out if the hardware can do this at all, or
16001 if it's too slow.... */
16002 if (!FLOAT_MODE_P (compare_mode
))
16005 return rs6000_emit_int_cmove (dest
, op
, true_cond
, false_cond
);
16009 is_against_zero
= op1
== CONST0_RTX (compare_mode
);
16011 /* A floating-point subtract might overflow, underflow, or produce
16012 an inexact result, thus changing the floating-point flags, so it
16013 can't be generated if we care about that. It's safe if one side
16014 of the construct is zero, since then no subtract will be
16016 if (SCALAR_FLOAT_MODE_P (compare_mode
)
16017 && flag_trapping_math
&& ! is_against_zero
)
16020 /* Eliminate half of the comparisons by switching operands, this
16021 makes the remaining code simpler. */
16022 if (code
== UNLT
|| code
== UNGT
|| code
== UNORDERED
|| code
== NE
16023 || code
== LTGT
|| code
== LT
|| code
== UNLE
)
16025 code
= reverse_condition_maybe_unordered (code
);
16027 true_cond
= false_cond
;
16031 /* UNEQ and LTGT take four instructions for a comparison with zero,
16032 it'll probably be faster to use a branch here too. */
16033 if (code
== UNEQ
&& HONOR_NANS (compare_mode
))
16036 /* We're going to try to implement comparisons by performing
16037 a subtract, then comparing against zero. Unfortunately,
16038 Inf - Inf is NaN which is not zero, and so if we don't
16039 know that the operand is finite and the comparison
16040 would treat EQ different to UNORDERED, we can't do it. */
16041 if (HONOR_INFINITIES (compare_mode
)
16042 && code
!= GT
&& code
!= UNGE
16043 && (!CONST_DOUBLE_P (op1
)
16044 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1
)))
16045 /* Constructs of the form (a OP b ? a : b) are safe. */
16046 && ((! rtx_equal_p (op0
, false_cond
) && ! rtx_equal_p (op1
, false_cond
))
16047 || (! rtx_equal_p (op0
, true_cond
)
16048 && ! rtx_equal_p (op1
, true_cond
))))
16051 /* At this point we know we can use fsel. */
16053 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16054 is no fsel instruction. */
16055 if (compare_mode
!= SFmode
&& compare_mode
!= DFmode
)
16058 /* Reduce the comparison to a comparison against zero. */
16059 if (! is_against_zero
)
16061 temp
= gen_reg_rtx (compare_mode
);
16062 emit_insn (gen_rtx_SET (temp
, gen_rtx_MINUS (compare_mode
, op0
, op1
)));
16064 op1
= CONST0_RTX (compare_mode
);
16067 /* If we don't care about NaNs we can reduce some of the comparisons
16068 down to faster ones. */
16069 if (! HONOR_NANS (compare_mode
))
16075 true_cond
= false_cond
;
16088 /* Now, reduce everything down to a GE. */
16095 temp
= gen_reg_rtx (compare_mode
);
16096 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16101 temp
= gen_reg_rtx (compare_mode
);
16102 emit_insn (gen_rtx_SET (temp
, gen_rtx_ABS (compare_mode
, op0
)));
16107 temp
= gen_reg_rtx (compare_mode
);
16108 emit_insn (gen_rtx_SET (temp
,
16109 gen_rtx_NEG (compare_mode
,
16110 gen_rtx_ABS (compare_mode
, op0
))));
16115 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16116 temp
= gen_reg_rtx (result_mode
);
16117 emit_insn (gen_rtx_SET (temp
,
16118 gen_rtx_IF_THEN_ELSE (result_mode
,
16119 gen_rtx_GE (VOIDmode
,
16121 true_cond
, false_cond
)));
16122 false_cond
= true_cond
;
16125 temp
= gen_reg_rtx (compare_mode
);
16126 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16131 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16132 temp
= gen_reg_rtx (result_mode
);
16133 emit_insn (gen_rtx_SET (temp
,
16134 gen_rtx_IF_THEN_ELSE (result_mode
,
16135 gen_rtx_GE (VOIDmode
,
16137 true_cond
, false_cond
)));
16138 true_cond
= false_cond
;
16141 temp
= gen_reg_rtx (compare_mode
);
16142 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16147 gcc_unreachable ();
16150 emit_insn (gen_rtx_SET (dest
,
16151 gen_rtx_IF_THEN_ELSE (result_mode
,
16152 gen_rtx_GE (VOIDmode
,
16154 true_cond
, false_cond
)));
16158 /* Same as above, but for ints (isel). */
16161 rs6000_emit_int_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16163 rtx condition_rtx
, cr
;
16164 machine_mode mode
= GET_MODE (dest
);
16165 enum rtx_code cond_code
;
16166 rtx (*isel_func
) (rtx
, rtx
, rtx
, rtx
, rtx
);
16169 if (mode
!= SImode
&& (!TARGET_POWERPC64
|| mode
!= DImode
))
16172 /* We still have to do the compare, because isel doesn't do a
16173 compare, it just looks at the CRx bits set by a previous compare
16175 condition_rtx
= rs6000_generate_compare (op
, mode
);
16176 cond_code
= GET_CODE (condition_rtx
);
16177 cr
= XEXP (condition_rtx
, 0);
16178 signedp
= GET_MODE (cr
) == CCmode
;
16180 isel_func
= (mode
== SImode
16181 ? (signedp
? gen_isel_signed_si
: gen_isel_unsigned_si
)
16182 : (signedp
? gen_isel_signed_di
: gen_isel_unsigned_di
));
16186 case LT
: case GT
: case LTU
: case GTU
: case EQ
:
16187 /* isel handles these directly. */
16191 /* We need to swap the sense of the comparison. */
16193 std::swap (false_cond
, true_cond
);
16194 PUT_CODE (condition_rtx
, reverse_condition (cond_code
));
16199 false_cond
= force_reg (mode
, false_cond
);
16200 if (true_cond
!= const0_rtx
)
16201 true_cond
= force_reg (mode
, true_cond
);
16203 emit_insn (isel_func (dest
, condition_rtx
, true_cond
, false_cond
, cr
));
16209 rs6000_emit_minmax (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
16211 machine_mode mode
= GET_MODE (op0
);
16215 /* VSX/altivec have direct min/max insns. */
16216 if ((code
== SMAX
|| code
== SMIN
)
16217 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode
)
16218 || (mode
== SFmode
&& VECTOR_UNIT_VSX_P (DFmode
))
16219 || (TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
))))
16221 emit_insn (gen_rtx_SET (dest
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
16225 if (code
== SMAX
|| code
== SMIN
)
16230 if (code
== SMAX
|| code
== UMAX
)
16231 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16232 op0
, op1
, mode
, 0);
16234 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16235 op1
, op0
, mode
, 0);
16236 gcc_assert (target
);
16237 if (target
!= dest
)
16238 emit_move_insn (dest
, target
);
16241 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16242 COND is true. Mark the jump as unlikely to be taken. */
16245 emit_unlikely_jump (rtx cond
, rtx label
)
16247 rtx x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, label
, pc_rtx
);
16248 rtx_insn
*insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, x
));
16249 add_reg_br_prob_note (insn
, profile_probability::very_unlikely ());
16252 /* A subroutine of the atomic operation splitters. Emit a load-locked
16253 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16254 the zero_extend operation. */
16257 emit_load_locked (machine_mode mode
, rtx reg
, rtx mem
)
16259 rtx (*fn
) (rtx
, rtx
) = NULL
;
16264 fn
= gen_load_lockedqi
;
16267 fn
= gen_load_lockedhi
;
16270 if (GET_MODE (mem
) == QImode
)
16271 fn
= gen_load_lockedqi_si
;
16272 else if (GET_MODE (mem
) == HImode
)
16273 fn
= gen_load_lockedhi_si
;
16275 fn
= gen_load_lockedsi
;
16278 fn
= gen_load_lockeddi
;
16281 fn
= gen_load_lockedti
;
16284 gcc_unreachable ();
16286 emit_insn (fn (reg
, mem
));
16289 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16290 instruction in MODE. */
16293 emit_store_conditional (machine_mode mode
, rtx res
, rtx mem
, rtx val
)
16295 rtx (*fn
) (rtx
, rtx
, rtx
) = NULL
;
16300 fn
= gen_store_conditionalqi
;
16303 fn
= gen_store_conditionalhi
;
16306 fn
= gen_store_conditionalsi
;
16309 fn
= gen_store_conditionaldi
;
16312 fn
= gen_store_conditionalti
;
16315 gcc_unreachable ();
16318 /* Emit sync before stwcx. to address PPC405 Erratum. */
16319 if (PPC405_ERRATUM77
)
16320 emit_insn (gen_hwsync ());
16322 emit_insn (fn (res
, mem
, val
));
16325 /* Expand barriers before and after a load_locked/store_cond sequence. */
16328 rs6000_pre_atomic_barrier (rtx mem
, enum memmodel model
)
16330 rtx addr
= XEXP (mem
, 0);
16332 if (!legitimate_indirect_address_p (addr
, reload_completed
)
16333 && !legitimate_indexed_address_p (addr
, reload_completed
))
16335 addr
= force_reg (Pmode
, addr
);
16336 mem
= replace_equiv_address_nv (mem
, addr
);
16341 case MEMMODEL_RELAXED
:
16342 case MEMMODEL_CONSUME
:
16343 case MEMMODEL_ACQUIRE
:
16345 case MEMMODEL_RELEASE
:
16346 case MEMMODEL_ACQ_REL
:
16347 emit_insn (gen_lwsync ());
16349 case MEMMODEL_SEQ_CST
:
16350 emit_insn (gen_hwsync ());
16353 gcc_unreachable ();
16359 rs6000_post_atomic_barrier (enum memmodel model
)
16363 case MEMMODEL_RELAXED
:
16364 case MEMMODEL_CONSUME
:
16365 case MEMMODEL_RELEASE
:
16367 case MEMMODEL_ACQUIRE
:
16368 case MEMMODEL_ACQ_REL
:
16369 case MEMMODEL_SEQ_CST
:
16370 emit_insn (gen_isync ());
16373 gcc_unreachable ();
16377 /* A subroutine of the various atomic expanders. For sub-word operations,
16378 we must adjust things to operate on SImode. Given the original MEM,
16379 return a new aligned memory. Also build and return the quantities by
16380 which to shift and mask. */
16383 rs6000_adjust_atomic_subword (rtx orig_mem
, rtx
*pshift
, rtx
*pmask
)
16385 rtx addr
, align
, shift
, mask
, mem
;
16386 HOST_WIDE_INT shift_mask
;
16387 machine_mode mode
= GET_MODE (orig_mem
);
16389 /* For smaller modes, we have to implement this via SImode. */
16390 shift_mask
= (mode
== QImode
? 0x18 : 0x10);
16392 addr
= XEXP (orig_mem
, 0);
16393 addr
= force_reg (GET_MODE (addr
), addr
);
16395 /* Aligned memory containing subword. Generate a new memory. We
16396 do not want any of the existing MEM_ATTR data, as we're now
16397 accessing memory outside the original object. */
16398 align
= expand_simple_binop (Pmode
, AND
, addr
, GEN_INT (-4),
16399 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16400 mem
= gen_rtx_MEM (SImode
, align
);
16401 MEM_VOLATILE_P (mem
) = MEM_VOLATILE_P (orig_mem
);
16402 if (MEM_ALIAS_SET (orig_mem
) == ALIAS_SET_MEMORY_BARRIER
)
16403 set_mem_alias_set (mem
, ALIAS_SET_MEMORY_BARRIER
);
16405 /* Shift amount for subword relative to aligned word. */
16406 shift
= gen_reg_rtx (SImode
);
16407 addr
= gen_lowpart (SImode
, addr
);
16408 rtx tmp
= gen_reg_rtx (SImode
);
16409 emit_insn (gen_ashlsi3 (tmp
, addr
, GEN_INT (3)));
16410 emit_insn (gen_andsi3 (shift
, tmp
, GEN_INT (shift_mask
)));
16411 if (BYTES_BIG_ENDIAN
)
16412 shift
= expand_simple_binop (SImode
, XOR
, shift
, GEN_INT (shift_mask
),
16413 shift
, 1, OPTAB_LIB_WIDEN
);
16416 /* Mask for insertion. */
16417 mask
= expand_simple_binop (SImode
, ASHIFT
, GEN_INT (GET_MODE_MASK (mode
)),
16418 shift
, NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16424 /* A subroutine of the various atomic expanders. For sub-word operands,
16425 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16428 rs6000_mask_atomic_subword (rtx oldval
, rtx newval
, rtx mask
)
16432 x
= gen_reg_rtx (SImode
);
16433 emit_insn (gen_rtx_SET (x
, gen_rtx_AND (SImode
,
16434 gen_rtx_NOT (SImode
, mask
),
16437 x
= expand_simple_binop (SImode
, IOR
, newval
, x
, x
, 1, OPTAB_LIB_WIDEN
);
16442 /* A subroutine of the various atomic expanders. For sub-word operands,
16443 extract WIDE to NARROW via SHIFT. */
16446 rs6000_finish_atomic_subword (rtx narrow
, rtx wide
, rtx shift
)
16448 wide
= expand_simple_binop (SImode
, LSHIFTRT
, wide
, shift
,
16449 wide
, 1, OPTAB_LIB_WIDEN
);
16450 emit_move_insn (narrow
, gen_lowpart (GET_MODE (narrow
), wide
));
16453 /* Expand an atomic compare and swap operation. */
16456 rs6000_expand_atomic_compare_and_swap (rtx operands
[])
16458 rtx boolval
, retval
, mem
, oldval
, newval
, cond
;
16459 rtx label1
, label2
, x
, mask
, shift
;
16460 machine_mode mode
, orig_mode
;
16461 enum memmodel mod_s
, mod_f
;
16464 boolval
= operands
[0];
16465 retval
= operands
[1];
16467 oldval
= operands
[3];
16468 newval
= operands
[4];
16469 is_weak
= (INTVAL (operands
[5]) != 0);
16470 mod_s
= memmodel_base (INTVAL (operands
[6]));
16471 mod_f
= memmodel_base (INTVAL (operands
[7]));
16472 orig_mode
= mode
= GET_MODE (mem
);
16474 mask
= shift
= NULL_RTX
;
16475 if (mode
== QImode
|| mode
== HImode
)
16477 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16478 lwarx and shift/mask operations. With power8, we need to do the
16479 comparison in SImode, but the store is still done in QI/HImode. */
16480 oldval
= convert_modes (SImode
, mode
, oldval
, 1);
16482 if (!TARGET_SYNC_HI_QI
)
16484 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16486 /* Shift and mask OLDVAL into position with the word. */
16487 oldval
= expand_simple_binop (SImode
, ASHIFT
, oldval
, shift
,
16488 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16490 /* Shift and mask NEWVAL into position within the word. */
16491 newval
= convert_modes (SImode
, mode
, newval
, 1);
16492 newval
= expand_simple_binop (SImode
, ASHIFT
, newval
, shift
,
16493 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16496 /* Prepare to adjust the return value. */
16497 retval
= gen_reg_rtx (SImode
);
16500 else if (reg_overlap_mentioned_p (retval
, oldval
))
16501 oldval
= copy_to_reg (oldval
);
16503 if (mode
!= TImode
&& !reg_or_short_operand (oldval
, mode
))
16504 oldval
= copy_to_mode_reg (mode
, oldval
);
16506 if (reg_overlap_mentioned_p (retval
, newval
))
16507 newval
= copy_to_reg (newval
);
16509 mem
= rs6000_pre_atomic_barrier (mem
, mod_s
);
16514 label1
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16515 emit_label (XEXP (label1
, 0));
16517 label2
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16519 emit_load_locked (mode
, retval
, mem
);
16523 x
= expand_simple_binop (SImode
, AND
, retval
, mask
,
16524 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16526 cond
= gen_reg_rtx (CCmode
);
16527 /* If we have TImode, synthesize a comparison. */
16528 if (mode
!= TImode
)
16529 x
= gen_rtx_COMPARE (CCmode
, x
, oldval
);
16532 rtx xor1_result
= gen_reg_rtx (DImode
);
16533 rtx xor2_result
= gen_reg_rtx (DImode
);
16534 rtx or_result
= gen_reg_rtx (DImode
);
16535 rtx new_word0
= simplify_gen_subreg (DImode
, x
, TImode
, 0);
16536 rtx new_word1
= simplify_gen_subreg (DImode
, x
, TImode
, 8);
16537 rtx old_word0
= simplify_gen_subreg (DImode
, oldval
, TImode
, 0);
16538 rtx old_word1
= simplify_gen_subreg (DImode
, oldval
, TImode
, 8);
16540 emit_insn (gen_xordi3 (xor1_result
, new_word0
, old_word0
));
16541 emit_insn (gen_xordi3 (xor2_result
, new_word1
, old_word1
));
16542 emit_insn (gen_iordi3 (or_result
, xor1_result
, xor2_result
));
16543 x
= gen_rtx_COMPARE (CCmode
, or_result
, const0_rtx
);
16546 emit_insn (gen_rtx_SET (cond
, x
));
16548 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16549 emit_unlikely_jump (x
, label2
);
16553 x
= rs6000_mask_atomic_subword (retval
, newval
, mask
);
16555 emit_store_conditional (orig_mode
, cond
, mem
, x
);
16559 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16560 emit_unlikely_jump (x
, label1
);
16563 if (!is_mm_relaxed (mod_f
))
16564 emit_label (XEXP (label2
, 0));
16566 rs6000_post_atomic_barrier (mod_s
);
16568 if (is_mm_relaxed (mod_f
))
16569 emit_label (XEXP (label2
, 0));
16572 rs6000_finish_atomic_subword (operands
[1], retval
, shift
);
16573 else if (mode
!= GET_MODE (operands
[1]))
16574 convert_move (operands
[1], retval
, 1);
16576 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16577 x
= gen_rtx_EQ (SImode
, cond
, const0_rtx
);
16578 emit_insn (gen_rtx_SET (boolval
, x
));
16581 /* Expand an atomic exchange operation. */
16584 rs6000_expand_atomic_exchange (rtx operands
[])
16586 rtx retval
, mem
, val
, cond
;
16588 enum memmodel model
;
16589 rtx label
, x
, mask
, shift
;
16591 retval
= operands
[0];
16594 model
= memmodel_base (INTVAL (operands
[3]));
16595 mode
= GET_MODE (mem
);
16597 mask
= shift
= NULL_RTX
;
16598 if (!TARGET_SYNC_HI_QI
&& (mode
== QImode
|| mode
== HImode
))
16600 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16602 /* Shift and mask VAL into position with the word. */
16603 val
= convert_modes (SImode
, mode
, val
, 1);
16604 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
16605 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16607 /* Prepare to adjust the return value. */
16608 retval
= gen_reg_rtx (SImode
);
16612 mem
= rs6000_pre_atomic_barrier (mem
, model
);
16614 label
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16615 emit_label (XEXP (label
, 0));
16617 emit_load_locked (mode
, retval
, mem
);
16621 x
= rs6000_mask_atomic_subword (retval
, val
, mask
);
16623 cond
= gen_reg_rtx (CCmode
);
16624 emit_store_conditional (mode
, cond
, mem
, x
);
16626 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16627 emit_unlikely_jump (x
, label
);
16629 rs6000_post_atomic_barrier (model
);
16632 rs6000_finish_atomic_subword (operands
[0], retval
, shift
);
16635 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16636 to perform. MEM is the memory on which to operate. VAL is the second
16637 operand of the binary operator. BEFORE and AFTER are optional locations to
16638 return the value of MEM either before of after the operation. MODEL_RTX
16639 is a CONST_INT containing the memory model to use. */
16642 rs6000_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
16643 rtx orig_before
, rtx orig_after
, rtx model_rtx
)
16645 enum memmodel model
= memmodel_base (INTVAL (model_rtx
));
16646 machine_mode mode
= GET_MODE (mem
);
16647 machine_mode store_mode
= mode
;
16648 rtx label
, x
, cond
, mask
, shift
;
16649 rtx before
= orig_before
, after
= orig_after
;
16651 mask
= shift
= NULL_RTX
;
16652 /* On power8, we want to use SImode for the operation. On previous systems,
16653 use the operation in a subword and shift/mask to get the proper byte or
16655 if (mode
== QImode
|| mode
== HImode
)
16657 if (TARGET_SYNC_HI_QI
)
16659 val
= convert_modes (SImode
, mode
, val
, 1);
16661 /* Prepare to adjust the return value. */
16662 before
= gen_reg_rtx (SImode
);
16664 after
= gen_reg_rtx (SImode
);
16669 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16671 /* Shift and mask VAL into position with the word. */
16672 val
= convert_modes (SImode
, mode
, val
, 1);
16673 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
16674 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16680 /* We've already zero-extended VAL. That is sufficient to
16681 make certain that it does not affect other bits. */
16686 /* If we make certain that all of the other bits in VAL are
16687 set, that will be sufficient to not affect other bits. */
16688 x
= gen_rtx_NOT (SImode
, mask
);
16689 x
= gen_rtx_IOR (SImode
, x
, val
);
16690 emit_insn (gen_rtx_SET (val
, x
));
16697 /* These will all affect bits outside the field and need
16698 adjustment via MASK within the loop. */
16702 gcc_unreachable ();
16705 /* Prepare to adjust the return value. */
16706 before
= gen_reg_rtx (SImode
);
16708 after
= gen_reg_rtx (SImode
);
16709 store_mode
= mode
= SImode
;
16713 mem
= rs6000_pre_atomic_barrier (mem
, model
);
16715 label
= gen_label_rtx ();
16716 emit_label (label
);
16717 label
= gen_rtx_LABEL_REF (VOIDmode
, label
);
16719 if (before
== NULL_RTX
)
16720 before
= gen_reg_rtx (mode
);
16722 emit_load_locked (mode
, before
, mem
);
16726 x
= expand_simple_binop (mode
, AND
, before
, val
,
16727 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16728 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
16732 after
= expand_simple_binop (mode
, code
, before
, val
,
16733 after
, 1, OPTAB_LIB_WIDEN
);
16739 x
= expand_simple_binop (SImode
, AND
, after
, mask
,
16740 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16741 x
= rs6000_mask_atomic_subword (before
, x
, mask
);
16743 else if (store_mode
!= mode
)
16744 x
= convert_modes (store_mode
, mode
, x
, 1);
16746 cond
= gen_reg_rtx (CCmode
);
16747 emit_store_conditional (store_mode
, cond
, mem
, x
);
16749 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16750 emit_unlikely_jump (x
, label
);
16752 rs6000_post_atomic_barrier (model
);
16756 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16757 then do the calcuations in a SImode register. */
16759 rs6000_finish_atomic_subword (orig_before
, before
, shift
);
16761 rs6000_finish_atomic_subword (orig_after
, after
, shift
);
16763 else if (store_mode
!= mode
)
16765 /* QImode/HImode on machines with lbarx/lharx where we do the native
16766 operation and then do the calcuations in a SImode register. */
16768 convert_move (orig_before
, before
, 1);
16770 convert_move (orig_after
, after
, 1);
16772 else if (orig_after
&& after
!= orig_after
)
16773 emit_move_insn (orig_after
, after
);
16776 static GTY(()) alias_set_type TOC_alias_set
= -1;
16779 get_TOC_alias_set (void)
16781 if (TOC_alias_set
== -1)
16782 TOC_alias_set
= new_alias_set ();
16783 return TOC_alias_set
;
16786 /* The mode the ABI uses for a word. This is not the same as word_mode
16787 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16789 static scalar_int_mode
16790 rs6000_abi_word_mode (void)
16792 return TARGET_32BIT
? SImode
: DImode
;
16795 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16797 rs6000_offload_options (void)
16800 return xstrdup ("-foffload-abi=lp64");
16802 return xstrdup ("-foffload-abi=ilp32");
16806 /* A quick summary of the various types of 'constant-pool tables'
16809 Target Flags Name One table per
16810 AIX (none) AIX TOC object file
16811 AIX -mfull-toc AIX TOC object file
16812 AIX -mminimal-toc AIX minimal TOC translation unit
16813 SVR4/EABI (none) SVR4 SDATA object file
16814 SVR4/EABI -fpic SVR4 pic object file
16815 SVR4/EABI -fPIC SVR4 PIC translation unit
16816 SVR4/EABI -mrelocatable EABI TOC function
16817 SVR4/EABI -maix AIX TOC object file
16818 SVR4/EABI -maix -mminimal-toc
16819 AIX minimal TOC translation unit
16821 Name Reg. Set by entries contains:
16822 made by addrs? fp? sum?
16824 AIX TOC 2 crt0 as Y option option
16825 AIX minimal TOC 30 prolog gcc Y Y option
16826 SVR4 SDATA 13 crt0 gcc N Y N
16827 SVR4 pic 30 prolog ld Y not yet N
16828 SVR4 PIC 30 prolog gcc Y option option
16829 EABI TOC 30 prolog gcc Y option option
16833 /* Hash functions for the hash table. */
16836 rs6000_hash_constant (rtx k
)
16838 enum rtx_code code
= GET_CODE (k
);
16839 machine_mode mode
= GET_MODE (k
);
16840 unsigned result
= (code
<< 3) ^ mode
;
16841 const char *format
;
16844 format
= GET_RTX_FORMAT (code
);
16845 flen
= strlen (format
);
16851 return result
* 1231 + (unsigned) INSN_UID (XEXP (k
, 0));
16853 case CONST_WIDE_INT
:
16856 flen
= CONST_WIDE_INT_NUNITS (k
);
16857 for (i
= 0; i
< flen
; i
++)
16858 result
= result
* 613 + CONST_WIDE_INT_ELT (k
, i
);
16863 return real_hash (CONST_DOUBLE_REAL_VALUE (k
)) * result
;
16873 for (; fidx
< flen
; fidx
++)
16874 switch (format
[fidx
])
16879 const char *str
= XSTR (k
, fidx
);
16880 len
= strlen (str
);
16881 result
= result
* 613 + len
;
16882 for (i
= 0; i
< len
; i
++)
16883 result
= result
* 613 + (unsigned) str
[i
];
16888 result
= result
* 1231 + rs6000_hash_constant (XEXP (k
, fidx
));
16892 result
= result
* 613 + (unsigned) XINT (k
, fidx
);
16895 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT
))
16896 result
= result
* 613 + (unsigned) XWINT (k
, fidx
);
16900 for (i
= 0; i
< sizeof (HOST_WIDE_INT
) / sizeof (unsigned); i
++)
16901 result
= result
* 613 + (unsigned) (XWINT (k
, fidx
)
16908 gcc_unreachable ();
16915 toc_hasher::hash (toc_hash_struct
*thc
)
16917 return rs6000_hash_constant (thc
->key
) ^ thc
->key_mode
;
16920 /* Compare H1 and H2 for equivalence. */
16923 toc_hasher::equal (toc_hash_struct
*h1
, toc_hash_struct
*h2
)
16928 if (h1
->key_mode
!= h2
->key_mode
)
16931 return rtx_equal_p (r1
, r2
);
16934 /* These are the names given by the C++ front-end to vtables, and
16935 vtable-like objects. Ideally, this logic should not be here;
16936 instead, there should be some programmatic way of inquiring as
16937 to whether or not an object is a vtable. */
16939 #define VTABLE_NAME_P(NAME) \
16940 (startswith (name, "_vt.") \
16941 || startswith (name, "_ZTV") \
16942 || startswith (name, "_ZTT") \
16943 || startswith (name, "_ZTI") \
16944 || startswith (name, "_ZTC"))
16946 #ifdef NO_DOLLAR_IN_LABEL
16947 /* Return a GGC-allocated character string translating dollar signs in
16948 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16951 rs6000_xcoff_strip_dollar (const char *name
)
16957 q
= (const char *) strchr (name
, '$');
16959 if (q
== 0 || q
== name
)
16962 len
= strlen (name
);
16963 strip
= XALLOCAVEC (char, len
+ 1);
16964 strcpy (strip
, name
);
16965 p
= strip
+ (q
- name
);
16969 p
= strchr (p
+ 1, '$');
16972 return ggc_alloc_string (strip
, len
);
16977 rs6000_output_symbol_ref (FILE *file
, rtx x
)
16979 const char *name
= XSTR (x
, 0);
16981 /* Currently C++ toc references to vtables can be emitted before it
16982 is decided whether the vtable is public or private. If this is
16983 the case, then the linker will eventually complain that there is
16984 a reference to an unknown section. Thus, for vtables only,
16985 we emit the TOC reference to reference the identifier and not the
16987 if (VTABLE_NAME_P (name
))
16989 RS6000_OUTPUT_BASENAME (file
, name
);
16992 assemble_name (file
, name
);
16995 /* Output a TOC entry. We derive the entry name from what is being
16999 output_toc (FILE *file
, rtx x
, int labelno
, machine_mode mode
)
17002 const char *name
= buf
;
17004 HOST_WIDE_INT offset
= 0;
17006 gcc_assert (!TARGET_NO_TOC_OR_PCREL
);
17008 /* When the linker won't eliminate them, don't output duplicate
17009 TOC entries (this happens on AIX if there is any kind of TOC,
17010 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17012 if (TARGET_TOC
&& GET_CODE (x
) != LABEL_REF
)
17014 struct toc_hash_struct
*h
;
17016 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17017 time because GGC is not initialized at that point. */
17018 if (toc_hash_table
== NULL
)
17019 toc_hash_table
= hash_table
<toc_hasher
>::create_ggc (1021);
17021 h
= ggc_alloc
<toc_hash_struct
> ();
17023 h
->key_mode
= mode
;
17024 h
->labelno
= labelno
;
17026 toc_hash_struct
**found
= toc_hash_table
->find_slot (h
, INSERT
);
17027 if (*found
== NULL
)
17029 else /* This is indeed a duplicate.
17030 Set this label equal to that label. */
17032 fputs ("\t.set ", file
);
17033 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17034 fprintf (file
, "%d,", labelno
);
17035 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17036 fprintf (file
, "%d\n", ((*found
)->labelno
));
17039 if (TARGET_XCOFF
&& SYMBOL_REF_P (x
)
17040 && (SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_GLOBAL_DYNAMIC
17041 || SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
))
17043 fputs ("\t.set ", file
);
17044 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17045 fprintf (file
, "%d,", labelno
);
17046 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17047 fprintf (file
, "%d\n", ((*found
)->labelno
));
17054 /* If we're going to put a double constant in the TOC, make sure it's
17055 aligned properly when strict alignment is on. */
17056 if ((CONST_DOUBLE_P (x
) || CONST_WIDE_INT_P (x
))
17057 && STRICT_ALIGNMENT
17058 && GET_MODE_BITSIZE (mode
) >= 64
17059 && ! (TARGET_NO_FP_IN_TOC
&& ! TARGET_MINIMAL_TOC
)) {
17060 ASM_OUTPUT_ALIGN (file
, 3);
17063 (*targetm
.asm_out
.internal_label
) (file
, "LC", labelno
);
17065 /* Handle FP constants specially. Note that if we have a minimal
17066 TOC, things we put here aren't actually in the TOC, so we can allow
17068 if (CONST_DOUBLE_P (x
)
17069 && (GET_MODE (x
) == TFmode
|| GET_MODE (x
) == TDmode
17070 || GET_MODE (x
) == IFmode
|| GET_MODE (x
) == KFmode
))
17074 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17075 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17077 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17081 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17082 fputs (DOUBLE_INT_ASM_OP
, file
);
17084 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17085 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17086 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17087 fprintf (file
, "0x%lx%08lx,0x%lx%08lx\n",
17088 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17089 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff,
17090 k
[WORDS_BIG_ENDIAN
? 2 : 3] & 0xffffffff,
17091 k
[WORDS_BIG_ENDIAN
? 3 : 2] & 0xffffffff);
17096 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17097 fputs ("\t.long ", file
);
17099 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17100 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17101 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17102 fprintf (file
, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17103 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17104 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17108 else if (CONST_DOUBLE_P (x
)
17109 && (GET_MODE (x
) == DFmode
|| GET_MODE (x
) == DDmode
))
17113 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17114 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17116 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17120 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17121 fputs (DOUBLE_INT_ASM_OP
, file
);
17123 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17124 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17125 fprintf (file
, "0x%lx%08lx\n",
17126 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17127 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff);
17132 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17133 fputs ("\t.long ", file
);
17135 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17136 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17137 fprintf (file
, "0x%lx,0x%lx\n",
17138 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17142 else if (CONST_DOUBLE_P (x
)
17143 && (GET_MODE (x
) == SFmode
|| GET_MODE (x
) == SDmode
))
17147 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17148 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17150 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17154 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17155 fputs (DOUBLE_INT_ASM_OP
, file
);
17157 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17158 if (WORDS_BIG_ENDIAN
)
17159 fprintf (file
, "0x%lx00000000\n", l
& 0xffffffff);
17161 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17166 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17167 fputs ("\t.long ", file
);
17169 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17170 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17174 else if (GET_MODE (x
) == VOIDmode
&& CONST_INT_P (x
))
17176 unsigned HOST_WIDE_INT low
;
17177 HOST_WIDE_INT high
;
17179 low
= INTVAL (x
) & 0xffffffff;
17180 high
= (HOST_WIDE_INT
) INTVAL (x
) >> 32;
17182 /* TOC entries are always Pmode-sized, so when big-endian
17183 smaller integer constants in the TOC need to be padded.
17184 (This is still a win over putting the constants in
17185 a separate constant pool, because then we'd have
17186 to have both a TOC entry _and_ the actual constant.)
17188 For a 32-bit target, CONST_INT values are loaded and shifted
17189 entirely within `low' and can be stored in one TOC entry. */
17191 /* It would be easy to make this work, but it doesn't now. */
17192 gcc_assert (!TARGET_64BIT
|| POINTER_SIZE
>= GET_MODE_BITSIZE (mode
));
17194 if (WORDS_BIG_ENDIAN
&& POINTER_SIZE
> GET_MODE_BITSIZE (mode
))
17197 low
<<= POINTER_SIZE
- GET_MODE_BITSIZE (mode
);
17198 high
= (HOST_WIDE_INT
) low
>> 32;
17204 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17205 fputs (DOUBLE_INT_ASM_OP
, file
);
17207 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17208 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17209 fprintf (file
, "0x%lx%08lx\n",
17210 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17215 if (POINTER_SIZE
< GET_MODE_BITSIZE (mode
))
17217 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17218 fputs ("\t.long ", file
);
17220 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17221 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17222 fprintf (file
, "0x%lx,0x%lx\n",
17223 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17227 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17228 fputs ("\t.long ", file
);
17230 fprintf (file
, "\t.tc IS_%lx[TC],", (long) low
& 0xffffffff);
17231 fprintf (file
, "0x%lx\n", (long) low
& 0xffffffff);
17237 if (GET_CODE (x
) == CONST
)
17239 gcc_assert (GET_CODE (XEXP (x
, 0)) == PLUS
17240 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)));
17242 base
= XEXP (XEXP (x
, 0), 0);
17243 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
17246 switch (GET_CODE (base
))
17249 name
= XSTR (base
, 0);
17253 ASM_GENERATE_INTERNAL_LABEL (buf
, "L",
17254 CODE_LABEL_NUMBER (XEXP (base
, 0)));
17258 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (base
));
17262 gcc_unreachable ();
17265 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17266 fputs (TARGET_32BIT
? "\t.long " : DOUBLE_INT_ASM_OP
, file
);
17269 fputs ("\t.tc ", file
);
17270 RS6000_OUTPUT_BASENAME (file
, name
);
17273 fprintf (file
, ".N" HOST_WIDE_INT_PRINT_UNSIGNED
, - offset
);
17275 fprintf (file
, ".P" HOST_WIDE_INT_PRINT_UNSIGNED
, offset
);
17277 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17278 after other TOC symbols, reducing overflow of small TOC access
17279 to [TC] symbols. */
17280 fputs (TARGET_XCOFF
&& TARGET_CMODEL
!= CMODEL_SMALL
17281 ? "[TE]," : "[TC],", file
);
17284 /* Currently C++ toc references to vtables can be emitted before it
17285 is decided whether the vtable is public or private. If this is
17286 the case, then the linker will eventually complain that there is
17287 a TOC reference to an unknown section. Thus, for vtables only,
17288 we emit the TOC reference to reference the symbol and not the
17290 if (VTABLE_NAME_P (name
))
17292 RS6000_OUTPUT_BASENAME (file
, name
);
17294 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
);
17295 else if (offset
> 0)
17296 fprintf (file
, "+" HOST_WIDE_INT_PRINT_DEC
, offset
);
17299 output_addr_const (file
, x
);
17302 if (TARGET_XCOFF
&& SYMBOL_REF_P (base
))
17304 switch (SYMBOL_REF_TLS_MODEL (base
))
17308 case TLS_MODEL_LOCAL_EXEC
:
17309 fputs ("@le", file
);
17311 case TLS_MODEL_INITIAL_EXEC
:
17312 fputs ("@ie", file
);
17314 /* Use global-dynamic for local-dynamic. */
17315 case TLS_MODEL_GLOBAL_DYNAMIC
:
17316 case TLS_MODEL_LOCAL_DYNAMIC
:
17318 (*targetm
.asm_out
.internal_label
) (file
, "LCM", labelno
);
17319 fputs ("\t.tc .", file
);
17320 RS6000_OUTPUT_BASENAME (file
, name
);
17321 fputs ("[TC],", file
);
17322 output_addr_const (file
, x
);
17323 fputs ("@m", file
);
17326 gcc_unreachable ();
17334 /* Output an assembler pseudo-op to write an ASCII string of N characters
17335 starting at P to FILE.
17337 On the RS/6000, we have to do this using the .byte operation and
17338 write out special characters outside the quoted string.
17339 Also, the assembler is broken; very long strings are truncated,
17340 so we must artificially break them up early. */
17343 output_ascii (FILE *file
, const char *p
, int n
)
17346 int i
, count_string
;
17347 const char *for_string
= "\t.byte \"";
17348 const char *for_decimal
= "\t.byte ";
17349 const char *to_close
= NULL
;
17352 for (i
= 0; i
< n
; i
++)
17355 if (c
>= ' ' && c
< 0177)
17358 fputs (for_string
, file
);
17361 /* Write two quotes to get one. */
17369 for_decimal
= "\"\n\t.byte ";
17373 if (count_string
>= 512)
17375 fputs (to_close
, file
);
17377 for_string
= "\t.byte \"";
17378 for_decimal
= "\t.byte ";
17386 fputs (for_decimal
, file
);
17387 fprintf (file
, "%d", c
);
17389 for_string
= "\n\t.byte \"";
17390 for_decimal
= ", ";
17396 /* Now close the string if we have written one. Then end the line. */
17398 fputs (to_close
, file
);
17401 /* Generate a unique section name for FILENAME for a section type
17402 represented by SECTION_DESC. Output goes into BUF.
17404 SECTION_DESC can be any string, as long as it is different for each
17405 possible section type.
17407 We name the section in the same manner as xlc. The name begins with an
17408 underscore followed by the filename (after stripping any leading directory
17409 names) with the last period replaced by the string SECTION_DESC. If
17410 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17414 rs6000_gen_section_name (char **buf
, const char *filename
,
17415 const char *section_desc
)
17417 const char *q
, *after_last_slash
, *last_period
= 0;
17421 after_last_slash
= filename
;
17422 for (q
= filename
; *q
; q
++)
17425 after_last_slash
= q
+ 1;
17426 else if (*q
== '.')
17430 len
= strlen (after_last_slash
) + strlen (section_desc
) + 2;
17431 *buf
= (char *) xmalloc (len
);
17436 for (q
= after_last_slash
; *q
; q
++)
17438 if (q
== last_period
)
17440 strcpy (p
, section_desc
);
17441 p
+= strlen (section_desc
);
17445 else if (ISALNUM (*q
))
17449 if (last_period
== 0)
17450 strcpy (p
, section_desc
);
17455 /* Emit profile function. */
17458 output_profile_hook (int labelno ATTRIBUTE_UNUSED
)
17460 /* Non-standard profiling for kernels, which just saves LR then calls
17461 _mcount without worrying about arg saves. The idea is to change
17462 the function prologue as little as possible as it isn't easy to
17463 account for arg save/restore code added just for _mcount. */
17464 if (TARGET_PROFILE_KERNEL
)
17467 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
17469 #ifndef NO_PROFILE_COUNTERS
17470 # define NO_PROFILE_COUNTERS 0
17472 if (NO_PROFILE_COUNTERS
)
17473 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17474 LCT_NORMAL
, VOIDmode
);
17478 const char *label_name
;
17481 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
17482 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
17483 fun
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
17485 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17486 LCT_NORMAL
, VOIDmode
, fun
, Pmode
);
17489 else if (DEFAULT_ABI
== ABI_DARWIN
)
17491 const char *mcount_name
= RS6000_MCOUNT
;
17492 int caller_addr_regno
= LR_REGNO
;
17494 /* Be conservative and always set this, at least for now. */
17495 crtl
->uses_pic_offset_table
= 1;
17498 /* For PIC code, set up a stub and collect the caller's address
17499 from r0, which is where the prologue puts it. */
17500 if (MACHOPIC_INDIRECT
17501 && crtl
->uses_pic_offset_table
)
17502 caller_addr_regno
= 0;
17504 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, mcount_name
),
17505 LCT_NORMAL
, VOIDmode
,
17506 gen_rtx_REG (Pmode
, caller_addr_regno
), Pmode
);
17510 /* Write function profiler code. */
17513 output_function_profiler (FILE *file
, int labelno
)
17517 switch (DEFAULT_ABI
)
17520 gcc_unreachable ();
17525 warning (0, "no profiling of 64-bit code for this ABI");
17528 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
17529 fprintf (file
, "\tmflr %s\n", reg_names
[0]);
17530 if (NO_PROFILE_COUNTERS
)
17532 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17533 reg_names
[0], reg_names
[1]);
17535 else if (TARGET_SECURE_PLT
&& flag_pic
)
17537 if (TARGET_LINK_STACK
)
17540 get_ppc476_thunk_name (name
);
17541 asm_fprintf (file
, "\tbl %s\n", name
);
17544 asm_fprintf (file
, "\tbcl 20,31,1f\n1:\n");
17545 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17546 reg_names
[0], reg_names
[1]);
17547 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
17548 asm_fprintf (file
, "\taddis %s,%s,",
17549 reg_names
[12], reg_names
[12]);
17550 assemble_name (file
, buf
);
17551 asm_fprintf (file
, "-1b@ha\n\tla %s,", reg_names
[0]);
17552 assemble_name (file
, buf
);
17553 asm_fprintf (file
, "-1b@l(%s)\n", reg_names
[12]);
17555 else if (flag_pic
== 1)
17557 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file
);
17558 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17559 reg_names
[0], reg_names
[1]);
17560 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
17561 asm_fprintf (file
, "\tlwz %s,", reg_names
[0]);
17562 assemble_name (file
, buf
);
17563 asm_fprintf (file
, "@got(%s)\n", reg_names
[12]);
17565 else if (flag_pic
> 1)
17567 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17568 reg_names
[0], reg_names
[1]);
17569 /* Now, we need to get the address of the label. */
17570 if (TARGET_LINK_STACK
)
17573 get_ppc476_thunk_name (name
);
17574 asm_fprintf (file
, "\tbl %s\n\tb 1f\n\t.long ", name
);
17575 assemble_name (file
, buf
);
17576 fputs ("-.\n1:", file
);
17577 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
17578 asm_fprintf (file
, "\taddi %s,%s,4\n",
17579 reg_names
[11], reg_names
[11]);
17583 fputs ("\tbcl 20,31,1f\n\t.long ", file
);
17584 assemble_name (file
, buf
);
17585 fputs ("-.\n1:", file
);
17586 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
17588 asm_fprintf (file
, "\tlwz %s,0(%s)\n",
17589 reg_names
[0], reg_names
[11]);
17590 asm_fprintf (file
, "\tadd %s,%s,%s\n",
17591 reg_names
[0], reg_names
[0], reg_names
[11]);
17595 asm_fprintf (file
, "\tlis %s,", reg_names
[12]);
17596 assemble_name (file
, buf
);
17597 fputs ("@ha\n", file
);
17598 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17599 reg_names
[0], reg_names
[1]);
17600 asm_fprintf (file
, "\tla %s,", reg_names
[0]);
17601 assemble_name (file
, buf
);
17602 asm_fprintf (file
, "@l(%s)\n", reg_names
[12]);
17605 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17606 fprintf (file
, "\tbl %s%s\n",
17607 RS6000_MCOUNT
, flag_pic
? "@plt" : "");
17613 /* Don't do anything, done in output_profile_hook (). */
17620 /* The following variable value is the last issued insn. */
17622 static rtx_insn
*last_scheduled_insn
;
17624 /* The following variable helps to balance issuing of load and
17625 store instructions */
17627 static int load_store_pendulum
;
17629 /* The following variable helps pair divide insns during scheduling. */
17630 static int divide_cnt
;
17631 /* The following variable helps pair and alternate vector and vector load
17632 insns during scheduling. */
17633 static int vec_pairing
;
17636 /* Power4 load update and store update instructions are cracked into a
17637 load or store and an integer insn which are executed in the same cycle.
17638 Branches have their own dispatch slot which does not count against the
17639 GCC issue rate, but it changes the program flow so there are no other
17640 instructions to issue in this cycle. */
17643 rs6000_variable_issue_1 (rtx_insn
*insn
, int more
)
17645 last_scheduled_insn
= insn
;
17646 if (GET_CODE (PATTERN (insn
)) == USE
17647 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17649 cached_can_issue_more
= more
;
17650 return cached_can_issue_more
;
17653 if (insn_terminates_group_p (insn
, current_group
))
17655 cached_can_issue_more
= 0;
17656 return cached_can_issue_more
;
17659 /* If no reservation, but reach here */
17660 if (recog_memoized (insn
) < 0)
17663 if (rs6000_sched_groups
)
17665 if (is_microcoded_insn (insn
))
17666 cached_can_issue_more
= 0;
17667 else if (is_cracked_insn (insn
))
17668 cached_can_issue_more
= more
> 2 ? more
- 2 : 0;
17670 cached_can_issue_more
= more
- 1;
17672 return cached_can_issue_more
;
17675 if (rs6000_tune
== PROCESSOR_CELL
&& is_nonpipeline_insn (insn
))
17678 cached_can_issue_more
= more
- 1;
17679 return cached_can_issue_more
;
17683 rs6000_variable_issue (FILE *stream
, int verbose
, rtx_insn
*insn
, int more
)
17685 int r
= rs6000_variable_issue_1 (insn
, more
);
17687 fprintf (stream
, "// rs6000_variable_issue (more = %d) = %d\n", more
, r
);
17691 /* Adjust the cost of a scheduling dependency. Return the new cost of
17692 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
17695 rs6000_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
17698 enum attr_type attr_type
;
17700 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
17707 /* Data dependency; DEP_INSN writes a register that INSN reads
17708 some cycles later. */
17710 /* Separate a load from a narrower, dependent store. */
17711 if ((rs6000_sched_groups
|| rs6000_tune
== PROCESSOR_POWER9
17712 || rs6000_tune
== PROCESSOR_POWER10
)
17713 && GET_CODE (PATTERN (insn
)) == SET
17714 && GET_CODE (PATTERN (dep_insn
)) == SET
17715 && MEM_P (XEXP (PATTERN (insn
), 1))
17716 && MEM_P (XEXP (PATTERN (dep_insn
), 0))
17717 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn
), 1)))
17718 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn
), 0)))))
17721 attr_type
= get_attr_type (insn
);
17726 /* Tell the first scheduling pass about the latency between
17727 a mtctr and bctr (and mtlr and br/blr). The first
17728 scheduling pass will not know about this latency since
17729 the mtctr instruction, which has the latency associated
17730 to it, will be generated by reload. */
17733 /* Leave some extra cycles between a compare and its
17734 dependent branch, to inhibit expensive mispredicts. */
17735 if ((rs6000_tune
== PROCESSOR_PPC603
17736 || rs6000_tune
== PROCESSOR_PPC604
17737 || rs6000_tune
== PROCESSOR_PPC604e
17738 || rs6000_tune
== PROCESSOR_PPC620
17739 || rs6000_tune
== PROCESSOR_PPC630
17740 || rs6000_tune
== PROCESSOR_PPC750
17741 || rs6000_tune
== PROCESSOR_PPC7400
17742 || rs6000_tune
== PROCESSOR_PPC7450
17743 || rs6000_tune
== PROCESSOR_PPCE5500
17744 || rs6000_tune
== PROCESSOR_PPCE6500
17745 || rs6000_tune
== PROCESSOR_POWER4
17746 || rs6000_tune
== PROCESSOR_POWER5
17747 || rs6000_tune
== PROCESSOR_POWER7
17748 || rs6000_tune
== PROCESSOR_POWER8
17749 || rs6000_tune
== PROCESSOR_POWER9
17750 || rs6000_tune
== PROCESSOR_POWER10
17751 || rs6000_tune
== PROCESSOR_CELL
)
17752 && recog_memoized (dep_insn
)
17753 && (INSN_CODE (dep_insn
) >= 0))
17755 switch (get_attr_type (dep_insn
))
17758 case TYPE_FPCOMPARE
:
17759 case TYPE_CR_LOGICAL
:
17763 if (get_attr_dot (dep_insn
) == DOT_YES
)
17768 if (get_attr_dot (dep_insn
) == DOT_YES
17769 && get_attr_var_shift (dep_insn
) == VAR_SHIFT_NO
)
17780 if ((rs6000_tune
== PROCESSOR_POWER6
)
17781 && recog_memoized (dep_insn
)
17782 && (INSN_CODE (dep_insn
) >= 0))
17785 if (GET_CODE (PATTERN (insn
)) != SET
)
17786 /* If this happens, we have to extend this to schedule
17787 optimally. Return default for now. */
17790 /* Adjust the cost for the case where the value written
17791 by a fixed point operation is used as the address
17792 gen value on a store. */
17793 switch (get_attr_type (dep_insn
))
17798 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17799 return get_attr_sign_extend (dep_insn
)
17800 == SIGN_EXTEND_YES
? 6 : 4;
17805 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17806 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
17816 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17824 if (get_attr_update (dep_insn
) == UPDATE_YES
17825 && ! rs6000_store_data_bypass_p (dep_insn
, insn
))
17831 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17837 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
17838 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
17848 if ((rs6000_tune
== PROCESSOR_POWER6
)
17849 && recog_memoized (dep_insn
)
17850 && (INSN_CODE (dep_insn
) >= 0))
17853 /* Adjust the cost for the case where the value written
17854 by a fixed point instruction is used within the address
17855 gen portion of a subsequent load(u)(x) */
17856 switch (get_attr_type (dep_insn
))
17861 if (set_to_load_agen (dep_insn
, insn
))
17862 return get_attr_sign_extend (dep_insn
)
17863 == SIGN_EXTEND_YES
? 6 : 4;
17868 if (set_to_load_agen (dep_insn
, insn
))
17869 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
17879 if (set_to_load_agen (dep_insn
, insn
))
17887 if (get_attr_update (dep_insn
) == UPDATE_YES
17888 && set_to_load_agen (dep_insn
, insn
))
17894 if (set_to_load_agen (dep_insn
, insn
))
17900 if (set_to_load_agen (dep_insn
, insn
))
17901 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
17914 /* Fall out to return default cost. */
17918 case REG_DEP_OUTPUT
:
17919 /* Output dependency; DEP_INSN writes a register that INSN writes some
17921 if ((rs6000_tune
== PROCESSOR_POWER6
)
17922 && recog_memoized (dep_insn
)
17923 && (INSN_CODE (dep_insn
) >= 0))
17925 attr_type
= get_attr_type (insn
);
17930 case TYPE_FPSIMPLE
:
17931 if (get_attr_type (dep_insn
) == TYPE_FP
17932 || get_attr_type (dep_insn
) == TYPE_FPSIMPLE
)
17939 /* Fall through, no cost for output dependency. */
17943 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17948 gcc_unreachable ();
17954 /* Debug version of rs6000_adjust_cost. */
17957 rs6000_debug_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
17958 int cost
, unsigned int dw
)
17960 int ret
= rs6000_adjust_cost (insn
, dep_type
, dep_insn
, cost
, dw
);
17968 default: dep
= "unknown depencency"; break;
17969 case REG_DEP_TRUE
: dep
= "data dependency"; break;
17970 case REG_DEP_OUTPUT
: dep
= "output dependency"; break;
17971 case REG_DEP_ANTI
: dep
= "anti depencency"; break;
17975 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17976 "%s, insn:\n", ret
, cost
, dep
);
17984 /* The function returns a true if INSN is microcoded.
17985 Return false otherwise. */
17988 is_microcoded_insn (rtx_insn
*insn
)
17990 if (!insn
|| !NONDEBUG_INSN_P (insn
)
17991 || GET_CODE (PATTERN (insn
)) == USE
17992 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17995 if (rs6000_tune
== PROCESSOR_CELL
)
17996 return get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
;
17998 if (rs6000_sched_groups
17999 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18001 enum attr_type type
= get_attr_type (insn
);
18002 if ((type
== TYPE_LOAD
18003 && get_attr_update (insn
) == UPDATE_YES
18004 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
18005 || ((type
== TYPE_LOAD
|| type
== TYPE_STORE
)
18006 && get_attr_update (insn
) == UPDATE_YES
18007 && get_attr_indexed (insn
) == INDEXED_YES
)
18008 || type
== TYPE_MFCR
)
18015 /* The function returns true if INSN is cracked into 2 instructions
18016 by the processor (and therefore occupies 2 issue slots). */
18019 is_cracked_insn (rtx_insn
*insn
)
18021 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18022 || GET_CODE (PATTERN (insn
)) == USE
18023 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18026 if (rs6000_sched_groups
18027 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18029 enum attr_type type
= get_attr_type (insn
);
18030 if ((type
== TYPE_LOAD
18031 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
18032 && get_attr_update (insn
) == UPDATE_NO
)
18033 || (type
== TYPE_LOAD
18034 && get_attr_sign_extend (insn
) == SIGN_EXTEND_NO
18035 && get_attr_update (insn
) == UPDATE_YES
18036 && get_attr_indexed (insn
) == INDEXED_NO
)
18037 || (type
== TYPE_STORE
18038 && get_attr_update (insn
) == UPDATE_YES
18039 && get_attr_indexed (insn
) == INDEXED_NO
)
18040 || ((type
== TYPE_FPLOAD
|| type
== TYPE_FPSTORE
)
18041 && get_attr_update (insn
) == UPDATE_YES
)
18042 || (type
== TYPE_CR_LOGICAL
18043 && get_attr_cr_logical_3op (insn
) == CR_LOGICAL_3OP_YES
)
18044 || (type
== TYPE_EXTS
18045 && get_attr_dot (insn
) == DOT_YES
)
18046 || (type
== TYPE_SHIFT
18047 && get_attr_dot (insn
) == DOT_YES
18048 && get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
18049 || (type
== TYPE_MUL
18050 && get_attr_dot (insn
) == DOT_YES
)
18051 || type
== TYPE_DIV
18052 || (type
== TYPE_INSERT
18053 && get_attr_size (insn
) == SIZE_32
))
18060 /* The function returns true if INSN can be issued only from
18061 the branch slot. */
18064 is_branch_slot_insn (rtx_insn
*insn
)
18066 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18067 || GET_CODE (PATTERN (insn
)) == USE
18068 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18071 if (rs6000_sched_groups
)
18073 enum attr_type type
= get_attr_type (insn
);
18074 if (type
== TYPE_BRANCH
|| type
== TYPE_JMPREG
)
18082 /* The function returns true if out_inst sets a value that is
18083 used in the address generation computation of in_insn */
18085 set_to_load_agen (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
18087 rtx out_set
, in_set
;
18089 /* For performance reasons, only handle the simple case where
18090 both loads are a single_set. */
18091 out_set
= single_set (out_insn
);
18094 in_set
= single_set (in_insn
);
18096 return reg_mentioned_p (SET_DEST (out_set
), SET_SRC (in_set
));
18102 /* Try to determine base/offset/size parts of the given MEM.
18103 Return true if successful, false if all the values couldn't
18106 This function only looks for REG or REG+CONST address forms.
18107 REG+REG address form will return false. */
18110 get_memref_parts (rtx mem
, rtx
*base
, HOST_WIDE_INT
*offset
,
18111 HOST_WIDE_INT
*size
)
18114 if MEM_SIZE_KNOWN_P (mem
)
18115 *size
= MEM_SIZE (mem
);
18119 addr_rtx
= (XEXP (mem
, 0));
18120 if (GET_CODE (addr_rtx
) == PRE_MODIFY
)
18121 addr_rtx
= XEXP (addr_rtx
, 1);
18124 while (GET_CODE (addr_rtx
) == PLUS
18125 && CONST_INT_P (XEXP (addr_rtx
, 1)))
18127 *offset
+= INTVAL (XEXP (addr_rtx
, 1));
18128 addr_rtx
= XEXP (addr_rtx
, 0);
18130 if (!REG_P (addr_rtx
))
18137 /* If the target storage locations of arguments MEM1 and MEM2 are
18138 adjacent, then return the argument that has the lower address.
18139 Otherwise, return NULL_RTX. */
18142 adjacent_mem_locations (rtx mem1
, rtx mem2
)
18145 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18149 && get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18150 && get_memref_parts (mem2
, ®2
, &off2
, &size2
)
18151 && REGNO (reg1
) == REGNO (reg2
))
18153 if (off1
+ size1
== off2
)
18155 else if (off2
+ size2
== off1
)
18162 /* This function returns true if it can be determined that the two MEM
18163 locations overlap by at least 1 byte based on base reg/offset/size. */
18166 mem_locations_overlap (rtx mem1
, rtx mem2
)
18169 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18171 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18172 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
18173 return ((REGNO (reg1
) == REGNO (reg2
))
18174 && (((off1
<= off2
) && (off1
+ size1
> off2
))
18175 || ((off2
<= off1
) && (off2
+ size2
> off1
))));
18180 /* A C statement (sans semicolon) to update the integer scheduling
18181 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18182 INSN earlier, reduce the priority to execute INSN later. Do not
18183 define this macro if you do not need to adjust the scheduling
18184 priorities of insns. */
18187 rs6000_adjust_priority (rtx_insn
*insn ATTRIBUTE_UNUSED
, int priority
)
18189 rtx load_mem
, str_mem
;
18190 /* On machines (like the 750) which have asymmetric integer units,
18191 where one integer unit can do multiply and divides and the other
18192 can't, reduce the priority of multiply/divide so it is scheduled
18193 before other integer operations. */
18196 if (! INSN_P (insn
))
18199 if (GET_CODE (PATTERN (insn
)) == USE
)
18202 switch (rs6000_tune
) {
18203 case PROCESSOR_PPC750
:
18204 switch (get_attr_type (insn
))
18211 fprintf (stderr
, "priority was %#x (%d) before adjustment\n",
18212 priority
, priority
);
18213 if (priority
>= 0 && priority
< 0x01000000)
18220 if (insn_must_be_first_in_group (insn
)
18221 && reload_completed
18222 && current_sched_info
->sched_max_insns_priority
18223 && rs6000_sched_restricted_insns_priority
)
18226 /* Prioritize insns that can be dispatched only in the first
18228 if (rs6000_sched_restricted_insns_priority
== 1)
18229 /* Attach highest priority to insn. This means that in
18230 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18231 precede 'priority' (critical path) considerations. */
18232 return current_sched_info
->sched_max_insns_priority
;
18233 else if (rs6000_sched_restricted_insns_priority
== 2)
18234 /* Increase priority of insn by a minimal amount. This means that in
18235 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18236 considerations precede dispatch-slot restriction considerations. */
18237 return (priority
+ 1);
18240 if (rs6000_tune
== PROCESSOR_POWER6
18241 && ((load_store_pendulum
== -2 && is_load_insn (insn
, &load_mem
))
18242 || (load_store_pendulum
== 2 && is_store_insn (insn
, &str_mem
))))
18243 /* Attach highest priority to insn if the scheduler has just issued two
18244 stores and this instruction is a load, or two loads and this instruction
18245 is a store. Power6 wants loads and stores scheduled alternately
18247 return current_sched_info
->sched_max_insns_priority
;
18252 /* Return true if the instruction is nonpipelined on the Cell. */
18254 is_nonpipeline_insn (rtx_insn
*insn
)
18256 enum attr_type type
;
18257 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18258 || GET_CODE (PATTERN (insn
)) == USE
18259 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18262 type
= get_attr_type (insn
);
18263 if (type
== TYPE_MUL
18264 || type
== TYPE_DIV
18265 || type
== TYPE_SDIV
18266 || type
== TYPE_DDIV
18267 || type
== TYPE_SSQRT
18268 || type
== TYPE_DSQRT
18269 || type
== TYPE_MFCR
18270 || type
== TYPE_MFCRF
18271 || type
== TYPE_MFJMPR
)
18279 /* Return how many instructions the machine can issue per cycle. */
18282 rs6000_issue_rate (void)
18284 /* Unless scheduling for register pressure, use issue rate of 1 for
18285 first scheduling pass to decrease degradation. */
18286 if (!reload_completed
&& !flag_sched_pressure
)
18289 switch (rs6000_tune
) {
18290 case PROCESSOR_RS64A
:
18291 case PROCESSOR_PPC601
: /* ? */
18292 case PROCESSOR_PPC7450
:
18294 case PROCESSOR_PPC440
:
18295 case PROCESSOR_PPC603
:
18296 case PROCESSOR_PPC750
:
18297 case PROCESSOR_PPC7400
:
18298 case PROCESSOR_PPC8540
:
18299 case PROCESSOR_PPC8548
:
18300 case PROCESSOR_CELL
:
18301 case PROCESSOR_PPCE300C2
:
18302 case PROCESSOR_PPCE300C3
:
18303 case PROCESSOR_PPCE500MC
:
18304 case PROCESSOR_PPCE500MC64
:
18305 case PROCESSOR_PPCE5500
:
18306 case PROCESSOR_PPCE6500
:
18307 case PROCESSOR_TITAN
:
18309 case PROCESSOR_PPC476
:
18310 case PROCESSOR_PPC604
:
18311 case PROCESSOR_PPC604e
:
18312 case PROCESSOR_PPC620
:
18313 case PROCESSOR_PPC630
:
18315 case PROCESSOR_POWER4
:
18316 case PROCESSOR_POWER5
:
18317 case PROCESSOR_POWER6
:
18318 case PROCESSOR_POWER7
:
18320 case PROCESSOR_POWER8
:
18322 case PROCESSOR_POWER9
:
18324 case PROCESSOR_POWER10
:
18331 /* Return how many instructions to look ahead for better insn
18335 rs6000_use_sched_lookahead (void)
18337 switch (rs6000_tune
)
18339 case PROCESSOR_PPC8540
:
18340 case PROCESSOR_PPC8548
:
18343 case PROCESSOR_CELL
:
18344 return (reload_completed
? 8 : 0);
18351 /* We are choosing insn from the ready queue. Return zero if INSN can be
18354 rs6000_use_sched_lookahead_guard (rtx_insn
*insn
, int ready_index
)
18356 if (ready_index
== 0)
18359 if (rs6000_tune
!= PROCESSOR_CELL
)
18362 gcc_assert (insn
!= NULL_RTX
&& INSN_P (insn
));
18364 if (!reload_completed
18365 || is_nonpipeline_insn (insn
)
18366 || is_microcoded_insn (insn
))
18372 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18373 and return true. */
18376 find_mem_ref (rtx pat
, rtx
*mem_ref
)
18381 /* stack_tie does not produce any real memory traffic. */
18382 if (tie_operand (pat
, VOIDmode
))
18391 /* Recursively process the pattern. */
18392 fmt
= GET_RTX_FORMAT (GET_CODE (pat
));
18394 for (i
= GET_RTX_LENGTH (GET_CODE (pat
)) - 1; i
>= 0; i
--)
18398 if (find_mem_ref (XEXP (pat
, i
), mem_ref
))
18401 else if (fmt
[i
] == 'E')
18402 for (j
= XVECLEN (pat
, i
) - 1; j
>= 0; j
--)
18404 if (find_mem_ref (XVECEXP (pat
, i
, j
), mem_ref
))
18412 /* Determine if PAT is a PATTERN of a load insn. */
18415 is_load_insn1 (rtx pat
, rtx
*load_mem
)
18417 if (!pat
|| pat
== NULL_RTX
)
18420 if (GET_CODE (pat
) == SET
)
18422 if (REG_P (SET_DEST (pat
)))
18423 return find_mem_ref (SET_SRC (pat
), load_mem
);
18428 if (GET_CODE (pat
) == PARALLEL
)
18432 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18433 if (is_load_insn1 (XVECEXP (pat
, 0, i
), load_mem
))
18440 /* Determine if INSN loads from memory. */
18443 is_load_insn (rtx insn
, rtx
*load_mem
)
18445 if (!insn
|| !INSN_P (insn
))
18451 return is_load_insn1 (PATTERN (insn
), load_mem
);
18454 /* Determine if PAT is a PATTERN of a store insn. */
18457 is_store_insn1 (rtx pat
, rtx
*str_mem
)
18459 if (!pat
|| pat
== NULL_RTX
)
18462 if (GET_CODE (pat
) == SET
)
18464 if (REG_P (SET_SRC (pat
)) || SUBREG_P (SET_SRC (pat
)))
18465 return find_mem_ref (SET_DEST (pat
), str_mem
);
18470 if (GET_CODE (pat
) == PARALLEL
)
18474 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18475 if (is_store_insn1 (XVECEXP (pat
, 0, i
), str_mem
))
18482 /* Determine if INSN stores to memory. */
18485 is_store_insn (rtx insn
, rtx
*str_mem
)
18487 if (!insn
|| !INSN_P (insn
))
18490 return is_store_insn1 (PATTERN (insn
), str_mem
);
18493 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18496 is_power9_pairable_vec_type (enum attr_type type
)
18500 case TYPE_VECSIMPLE
:
18501 case TYPE_VECCOMPLEX
:
18505 case TYPE_VECFLOAT
:
18507 case TYPE_VECDOUBLE
:
18515 /* Returns whether the dependence between INSN and NEXT is considered
18516 costly by the given target. */
18519 rs6000_is_costly_dependence (dep_t dep
, int cost
, int distance
)
18523 rtx load_mem
, str_mem
;
18525 /* If the flag is not enabled - no dependence is considered costly;
18526 allow all dependent insns in the same group.
18527 This is the most aggressive option. */
18528 if (rs6000_sched_costly_dep
== no_dep_costly
)
18531 /* If the flag is set to 1 - a dependence is always considered costly;
18532 do not allow dependent instructions in the same group.
18533 This is the most conservative option. */
18534 if (rs6000_sched_costly_dep
== all_deps_costly
)
18537 insn
= DEP_PRO (dep
);
18538 next
= DEP_CON (dep
);
18540 if (rs6000_sched_costly_dep
== store_to_load_dep_costly
18541 && is_load_insn (next
, &load_mem
)
18542 && is_store_insn (insn
, &str_mem
))
18543 /* Prevent load after store in the same group. */
18546 if (rs6000_sched_costly_dep
== true_store_to_load_dep_costly
18547 && is_load_insn (next
, &load_mem
)
18548 && is_store_insn (insn
, &str_mem
)
18549 && DEP_TYPE (dep
) == REG_DEP_TRUE
18550 && mem_locations_overlap(str_mem
, load_mem
))
18551 /* Prevent load after store in the same group if it is a true
18555 /* The flag is set to X; dependences with latency >= X are considered costly,
18556 and will not be scheduled in the same group. */
18557 if (rs6000_sched_costly_dep
<= max_dep_latency
18558 && ((cost
- distance
) >= (int)rs6000_sched_costly_dep
))
18564 /* Return the next insn after INSN that is found before TAIL is reached,
18565 skipping any "non-active" insns - insns that will not actually occupy
18566 an issue slot. Return NULL_RTX if such an insn is not found. */
18569 get_next_active_insn (rtx_insn
*insn
, rtx_insn
*tail
)
18571 if (insn
== NULL_RTX
|| insn
== tail
)
18576 insn
= NEXT_INSN (insn
);
18577 if (insn
== NULL_RTX
|| insn
== tail
)
18581 || JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
)
18582 || (NONJUMP_INSN_P (insn
)
18583 && GET_CODE (PATTERN (insn
)) != USE
18584 && GET_CODE (PATTERN (insn
)) != CLOBBER
18585 && INSN_CODE (insn
) != CODE_FOR_stack_tie
))
18591 /* Move instruction at POS to the end of the READY list. */
18594 move_to_end_of_ready (rtx_insn
**ready
, int pos
, int lastpos
)
18600 for (i
= pos
; i
< lastpos
; i
++)
18601 ready
[i
] = ready
[i
+ 1];
18602 ready
[lastpos
] = tmp
;
18605 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18608 power6_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
18610 /* For Power6, we need to handle some special cases to try and keep the
18611 store queue from overflowing and triggering expensive flushes.
18613 This code monitors how load and store instructions are being issued
18614 and skews the ready list one way or the other to increase the likelihood
18615 that a desired instruction is issued at the proper time.
18617 A couple of things are done. First, we maintain a "load_store_pendulum"
18618 to track the current state of load/store issue.
18620 - If the pendulum is at zero, then no loads or stores have been
18621 issued in the current cycle so we do nothing.
18623 - If the pendulum is 1, then a single load has been issued in this
18624 cycle and we attempt to locate another load in the ready list to
18627 - If the pendulum is -2, then two stores have already been
18628 issued in this cycle, so we increase the priority of the first load
18629 in the ready list to increase it's likelihood of being chosen first
18632 - If the pendulum is -1, then a single store has been issued in this
18633 cycle and we attempt to locate another store in the ready list to
18634 issue with it, preferring a store to an adjacent memory location to
18635 facilitate store pairing in the store queue.
18637 - If the pendulum is 2, then two loads have already been
18638 issued in this cycle, so we increase the priority of the first store
18639 in the ready list to increase it's likelihood of being chosen first
18642 - If the pendulum < -2 or > 2, then do nothing.
18644 Note: This code covers the most common scenarios. There exist non
18645 load/store instructions which make use of the LSU and which
18646 would need to be accounted for to strictly model the behavior
18647 of the machine. Those instructions are currently unaccounted
18648 for to help minimize compile time overhead of this code.
18651 rtx load_mem
, str_mem
;
18653 if (is_store_insn (last_scheduled_insn
, &str_mem
))
18654 /* Issuing a store, swing the load_store_pendulum to the left */
18655 load_store_pendulum
--;
18656 else if (is_load_insn (last_scheduled_insn
, &load_mem
))
18657 /* Issuing a load, swing the load_store_pendulum to the right */
18658 load_store_pendulum
++;
18660 return cached_can_issue_more
;
18662 /* If the pendulum is balanced, or there is only one instruction on
18663 the ready list, then all is well, so return. */
18664 if ((load_store_pendulum
== 0) || (lastpos
<= 0))
18665 return cached_can_issue_more
;
18667 if (load_store_pendulum
== 1)
18669 /* A load has been issued in this cycle. Scan the ready list
18670 for another load to issue with it */
18675 if (is_load_insn (ready
[pos
], &load_mem
))
18677 /* Found a load. Move it to the head of the ready list,
18678 and adjust it's priority so that it is more likely to
18680 move_to_end_of_ready (ready
, pos
, lastpos
);
18682 if (!sel_sched_p ()
18683 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
18684 INSN_PRIORITY (ready
[lastpos
])++;
18690 else if (load_store_pendulum
== -2)
18692 /* Two stores have been issued in this cycle. Increase the
18693 priority of the first load in the ready list to favor it for
18694 issuing in the next cycle. */
18699 if (is_load_insn (ready
[pos
], &load_mem
)
18701 && INSN_PRIORITY_KNOWN (ready
[pos
]))
18703 INSN_PRIORITY (ready
[pos
])++;
18705 /* Adjust the pendulum to account for the fact that a load
18706 was found and increased in priority. This is to prevent
18707 increasing the priority of multiple loads */
18708 load_store_pendulum
--;
18715 else if (load_store_pendulum
== -1)
18717 /* A store has been issued in this cycle. Scan the ready list for
18718 another store to issue with it, preferring a store to an adjacent
18720 int first_store_pos
= -1;
18726 if (is_store_insn (ready
[pos
], &str_mem
))
18729 /* Maintain the index of the first store found on the
18731 if (first_store_pos
== -1)
18732 first_store_pos
= pos
;
18734 if (is_store_insn (last_scheduled_insn
, &str_mem2
)
18735 && adjacent_mem_locations (str_mem
, str_mem2
))
18737 /* Found an adjacent store. Move it to the head of the
18738 ready list, and adjust it's priority so that it is
18739 more likely to stay there */
18740 move_to_end_of_ready (ready
, pos
, lastpos
);
18742 if (!sel_sched_p ()
18743 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
18744 INSN_PRIORITY (ready
[lastpos
])++;
18746 first_store_pos
= -1;
18754 if (first_store_pos
>= 0)
18756 /* An adjacent store wasn't found, but a non-adjacent store was,
18757 so move the non-adjacent store to the front of the ready
18758 list, and adjust its priority so that it is more likely to
18760 move_to_end_of_ready (ready
, first_store_pos
, lastpos
);
18761 if (!sel_sched_p ()
18762 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
18763 INSN_PRIORITY (ready
[lastpos
])++;
18766 else if (load_store_pendulum
== 2)
18768 /* Two loads have been issued in this cycle. Increase the priority
18769 of the first store in the ready list to favor it for issuing in
18775 if (is_store_insn (ready
[pos
], &str_mem
)
18777 && INSN_PRIORITY_KNOWN (ready
[pos
]))
18779 INSN_PRIORITY (ready
[pos
])++;
18781 /* Adjust the pendulum to account for the fact that a store
18782 was found and increased in priority. This is to prevent
18783 increasing the priority of multiple stores */
18784 load_store_pendulum
++;
18792 return cached_can_issue_more
;
18795 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18798 power9_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
18801 enum attr_type type
, type2
;
18803 type
= get_attr_type (last_scheduled_insn
);
18805 /* Try to issue fixed point divides back-to-back in pairs so they will be
18806 routed to separate execution units and execute in parallel. */
18807 if (type
== TYPE_DIV
&& divide_cnt
== 0)
18809 /* First divide has been scheduled. */
18812 /* Scan the ready list looking for another divide, if found move it
18813 to the end of the list so it is chosen next. */
18817 if (recog_memoized (ready
[pos
]) >= 0
18818 && get_attr_type (ready
[pos
]) == TYPE_DIV
)
18820 move_to_end_of_ready (ready
, pos
, lastpos
);
18828 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18831 /* The best dispatch throughput for vector and vector load insns can be
18832 achieved by interleaving a vector and vector load such that they'll
18833 dispatch to the same superslice. If this pairing cannot be achieved
18834 then it is best to pair vector insns together and vector load insns
18837 To aid in this pairing, vec_pairing maintains the current state with
18838 the following values:
18840 0 : Initial state, no vecload/vector pairing has been started.
18842 1 : A vecload or vector insn has been issued and a candidate for
18843 pairing has been found and moved to the end of the ready
18845 if (type
== TYPE_VECLOAD
)
18847 /* Issued a vecload. */
18848 if (vec_pairing
== 0)
18850 int vecload_pos
= -1;
18851 /* We issued a single vecload, look for a vector insn to pair it
18852 with. If one isn't found, try to pair another vecload. */
18856 if (recog_memoized (ready
[pos
]) >= 0)
18858 type2
= get_attr_type (ready
[pos
]);
18859 if (is_power9_pairable_vec_type (type2
))
18861 /* Found a vector insn to pair with, move it to the
18862 end of the ready list so it is scheduled next. */
18863 move_to_end_of_ready (ready
, pos
, lastpos
);
18865 return cached_can_issue_more
;
18867 else if (type2
== TYPE_VECLOAD
&& vecload_pos
== -1)
18868 /* Remember position of first vecload seen. */
18873 if (vecload_pos
>= 0)
18875 /* Didn't find a vector to pair with but did find a vecload,
18876 move it to the end of the ready list. */
18877 move_to_end_of_ready (ready
, vecload_pos
, lastpos
);
18879 return cached_can_issue_more
;
18883 else if (is_power9_pairable_vec_type (type
))
18885 /* Issued a vector operation. */
18886 if (vec_pairing
== 0)
18889 /* We issued a single vector insn, look for a vecload to pair it
18890 with. If one isn't found, try to pair another vector. */
18894 if (recog_memoized (ready
[pos
]) >= 0)
18896 type2
= get_attr_type (ready
[pos
]);
18897 if (type2
== TYPE_VECLOAD
)
18899 /* Found a vecload insn to pair with, move it to the
18900 end of the ready list so it is scheduled next. */
18901 move_to_end_of_ready (ready
, pos
, lastpos
);
18903 return cached_can_issue_more
;
18905 else if (is_power9_pairable_vec_type (type2
)
18907 /* Remember position of first vector insn seen. */
18914 /* Didn't find a vecload to pair with but did find a vector
18915 insn, move it to the end of the ready list. */
18916 move_to_end_of_ready (ready
, vec_pos
, lastpos
);
18918 return cached_can_issue_more
;
18923 /* We've either finished a vec/vecload pair, couldn't find an insn to
18924 continue the current pair, or the last insn had nothing to do with
18925 with pairing. In any case, reset the state. */
18929 return cached_can_issue_more
;
18932 /* Determine if INSN is a store to memory that can be fused with a similar
18936 is_fusable_store (rtx_insn
*insn
, rtx
*str_mem
)
18938 /* Insn must be a non-prefixed base+disp form store. */
18939 if (is_store_insn (insn
, str_mem
)
18940 && get_attr_prefixed (insn
) == PREFIXED_NO
18941 && get_attr_update (insn
) == UPDATE_NO
18942 && get_attr_indexed (insn
) == INDEXED_NO
)
18944 /* Further restrictions by mode and size. */
18945 if (!MEM_SIZE_KNOWN_P (*str_mem
))
18948 machine_mode mode
= GET_MODE (*str_mem
);
18949 HOST_WIDE_INT size
= MEM_SIZE (*str_mem
);
18951 if (INTEGRAL_MODE_P (mode
))
18952 /* Must be word or dword size. */
18953 return (size
== 4 || size
== 8);
18954 else if (FLOAT_MODE_P (mode
))
18955 /* Must be dword size. */
18956 return (size
== 8);
18962 /* Do Power10 specific reordering of the ready list. */
18965 power10_sched_reorder (rtx_insn
**ready
, int lastpos
)
18969 /* Do store fusion during sched2 only. */
18970 if (!reload_completed
)
18971 return cached_can_issue_more
;
18973 /* If the prior insn finished off a store fusion pair then simply
18974 reset the counter and return, nothing more to do. */
18975 if (load_store_pendulum
!= 0)
18977 load_store_pendulum
= 0;
18978 return cached_can_issue_more
;
18981 /* Try to pair certain store insns to adjacent memory locations
18982 so that the hardware will fuse them to a single operation. */
18983 if (TARGET_P10_FUSION
&& TARGET_P10_FUSION_2STORE
18984 && is_fusable_store (last_scheduled_insn
, &mem1
))
18987 /* A fusable store was just scheduled. Scan the ready list for another
18988 store that it can fuse with. */
18993 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
18994 must be ascending only. */
18995 if (is_fusable_store (ready
[pos
], &mem2
)
18996 && ((INTEGRAL_MODE_P (GET_MODE (mem1
))
18997 && adjacent_mem_locations (mem1
, mem2
))
18998 || (FLOAT_MODE_P (GET_MODE (mem1
))
18999 && (adjacent_mem_locations (mem1
, mem2
) == mem1
))))
19001 /* Found a fusable store. Move it to the end of the ready list
19002 so it is scheduled next. */
19003 move_to_end_of_ready (ready
, pos
, lastpos
);
19005 load_store_pendulum
= -1;
19012 return cached_can_issue_more
;
19015 /* We are about to begin issuing insns for this clock cycle. */
19018 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
, int sched_verbose
,
19019 rtx_insn
**ready ATTRIBUTE_UNUSED
,
19020 int *pn_ready ATTRIBUTE_UNUSED
,
19021 int clock_var ATTRIBUTE_UNUSED
)
19023 int n_ready
= *pn_ready
;
19026 fprintf (dump
, "// rs6000_sched_reorder :\n");
19028 /* Reorder the ready list, if the second to last ready insn
19029 is a nonepipeline insn. */
19030 if (rs6000_tune
== PROCESSOR_CELL
&& n_ready
> 1)
19032 if (is_nonpipeline_insn (ready
[n_ready
- 1])
19033 && (recog_memoized (ready
[n_ready
- 2]) > 0))
19034 /* Simply swap first two insns. */
19035 std::swap (ready
[n_ready
- 1], ready
[n_ready
- 2]);
19038 if (rs6000_tune
== PROCESSOR_POWER6
)
19039 load_store_pendulum
= 0;
19041 /* Do Power10 dependent reordering. */
19042 if (rs6000_tune
== PROCESSOR_POWER10
&& last_scheduled_insn
)
19043 power10_sched_reorder (ready
, n_ready
- 1);
19045 return rs6000_issue_rate ();
19048 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19051 rs6000_sched_reorder2 (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
19052 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
)
19055 fprintf (dump
, "// rs6000_sched_reorder2 :\n");
19057 /* Do Power6 dependent reordering if necessary. */
19058 if (rs6000_tune
== PROCESSOR_POWER6
&& last_scheduled_insn
)
19059 return power6_sched_reorder2 (ready
, *pn_ready
- 1);
19061 /* Do Power9 dependent reordering if necessary. */
19062 if (rs6000_tune
== PROCESSOR_POWER9
&& last_scheduled_insn
19063 && recog_memoized (last_scheduled_insn
) >= 0)
19064 return power9_sched_reorder2 (ready
, *pn_ready
- 1);
19066 /* Do Power10 dependent reordering. */
19067 if (rs6000_tune
== PROCESSOR_POWER10
&& last_scheduled_insn
)
19068 return power10_sched_reorder (ready
, *pn_ready
- 1);
19070 return cached_can_issue_more
;
19073 /* Return whether the presence of INSN causes a dispatch group termination
19074 of group WHICH_GROUP.
19076 If WHICH_GROUP == current_group, this function will return true if INSN
19077 causes the termination of the current group (i.e, the dispatch group to
19078 which INSN belongs). This means that INSN will be the last insn in the
19079 group it belongs to.
19081 If WHICH_GROUP == previous_group, this function will return true if INSN
19082 causes the termination of the previous group (i.e, the dispatch group that
19083 precedes the group to which INSN belongs). This means that INSN will be
19084 the first insn in the group it belongs to). */
19087 insn_terminates_group_p (rtx_insn
*insn
, enum group_termination which_group
)
19094 first
= insn_must_be_first_in_group (insn
);
19095 last
= insn_must_be_last_in_group (insn
);
19100 if (which_group
== current_group
)
19102 else if (which_group
== previous_group
)
19110 insn_must_be_first_in_group (rtx_insn
*insn
)
19112 enum attr_type type
;
19116 || DEBUG_INSN_P (insn
)
19117 || GET_CODE (PATTERN (insn
)) == USE
19118 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19121 switch (rs6000_tune
)
19123 case PROCESSOR_POWER5
:
19124 if (is_cracked_insn (insn
))
19127 case PROCESSOR_POWER4
:
19128 if (is_microcoded_insn (insn
))
19131 if (!rs6000_sched_groups
)
19134 type
= get_attr_type (insn
);
19141 case TYPE_CR_LOGICAL
:
19154 case PROCESSOR_POWER6
:
19155 type
= get_attr_type (insn
);
19164 case TYPE_FPCOMPARE
:
19175 if (get_attr_dot (insn
) == DOT_NO
19176 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19181 if (get_attr_size (insn
) == SIZE_32
)
19189 if (get_attr_update (insn
) == UPDATE_YES
)
19197 case PROCESSOR_POWER7
:
19198 type
= get_attr_type (insn
);
19202 case TYPE_CR_LOGICAL
:
19216 if (get_attr_dot (insn
) == DOT_YES
)
19221 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19222 || get_attr_update (insn
) == UPDATE_YES
)
19229 if (get_attr_update (insn
) == UPDATE_YES
)
19237 case PROCESSOR_POWER8
:
19238 type
= get_attr_type (insn
);
19242 case TYPE_CR_LOGICAL
:
19250 case TYPE_VECSTORE
:
19257 if (get_attr_dot (insn
) == DOT_YES
)
19262 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19263 || get_attr_update (insn
) == UPDATE_YES
)
19268 if (get_attr_update (insn
) == UPDATE_YES
19269 && get_attr_indexed (insn
) == INDEXED_YES
)
19285 insn_must_be_last_in_group (rtx_insn
*insn
)
19287 enum attr_type type
;
19291 || DEBUG_INSN_P (insn
)
19292 || GET_CODE (PATTERN (insn
)) == USE
19293 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19296 switch (rs6000_tune
) {
19297 case PROCESSOR_POWER4
:
19298 case PROCESSOR_POWER5
:
19299 if (is_microcoded_insn (insn
))
19302 if (is_branch_slot_insn (insn
))
19306 case PROCESSOR_POWER6
:
19307 type
= get_attr_type (insn
);
19315 case TYPE_FPCOMPARE
:
19326 if (get_attr_dot (insn
) == DOT_NO
19327 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19332 if (get_attr_size (insn
) == SIZE_32
)
19340 case PROCESSOR_POWER7
:
19341 type
= get_attr_type (insn
);
19351 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19352 && get_attr_update (insn
) == UPDATE_YES
)
19357 if (get_attr_update (insn
) == UPDATE_YES
19358 && get_attr_indexed (insn
) == INDEXED_YES
)
19366 case PROCESSOR_POWER8
:
19367 type
= get_attr_type (insn
);
19379 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19380 && get_attr_update (insn
) == UPDATE_YES
)
19385 if (get_attr_update (insn
) == UPDATE_YES
19386 && get_attr_indexed (insn
) == INDEXED_YES
)
19401 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19402 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19405 is_costly_group (rtx
*group_insns
, rtx next_insn
)
19408 int issue_rate
= rs6000_issue_rate ();
19410 for (i
= 0; i
< issue_rate
; i
++)
19412 sd_iterator_def sd_it
;
19414 rtx insn
= group_insns
[i
];
19419 FOR_EACH_DEP (insn
, SD_LIST_RES_FORW
, sd_it
, dep
)
19421 rtx next
= DEP_CON (dep
);
19423 if (next
== next_insn
19424 && rs6000_is_costly_dependence (dep
, dep_cost (dep
), 0))
19432 /* Utility of the function redefine_groups.
19433 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19434 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19435 to keep it "far" (in a separate group) from GROUP_INSNS, following
19436 one of the following schemes, depending on the value of the flag
19437 -minsert_sched_nops = X:
19438 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19439 in order to force NEXT_INSN into a separate group.
19440 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19441 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19442 insertion (has a group just ended, how many vacant issue slots remain in the
19443 last group, and how many dispatch groups were encountered so far). */
19446 force_new_group (int sched_verbose
, FILE *dump
, rtx
*group_insns
,
19447 rtx_insn
*next_insn
, bool *group_end
, int can_issue_more
,
19452 int issue_rate
= rs6000_issue_rate ();
19453 bool end
= *group_end
;
19456 if (next_insn
== NULL_RTX
|| DEBUG_INSN_P (next_insn
))
19457 return can_issue_more
;
19459 if (rs6000_sched_insert_nops
> sched_finish_regroup_exact
)
19460 return can_issue_more
;
19462 force
= is_costly_group (group_insns
, next_insn
);
19464 return can_issue_more
;
19466 if (sched_verbose
> 6)
19467 fprintf (dump
,"force: group count = %d, can_issue_more = %d\n",
19468 *group_count
,can_issue_more
);
19470 if (rs6000_sched_insert_nops
== sched_finish_regroup_exact
)
19473 can_issue_more
= 0;
19475 /* Since only a branch can be issued in the last issue_slot, it is
19476 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19477 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19478 in this case the last nop will start a new group and the branch
19479 will be forced to the new group. */
19480 if (can_issue_more
&& !is_branch_slot_insn (next_insn
))
19483 /* Do we have a special group ending nop? */
19484 if (rs6000_tune
== PROCESSOR_POWER6
|| rs6000_tune
== PROCESSOR_POWER7
19485 || rs6000_tune
== PROCESSOR_POWER8
)
19487 nop
= gen_group_ending_nop ();
19488 emit_insn_before (nop
, next_insn
);
19489 can_issue_more
= 0;
19492 while (can_issue_more
> 0)
19495 emit_insn_before (nop
, next_insn
);
19503 if (rs6000_sched_insert_nops
< sched_finish_regroup_exact
)
19505 int n_nops
= rs6000_sched_insert_nops
;
19507 /* Nops can't be issued from the branch slot, so the effective
19508 issue_rate for nops is 'issue_rate - 1'. */
19509 if (can_issue_more
== 0)
19510 can_issue_more
= issue_rate
;
19512 if (can_issue_more
== 0)
19514 can_issue_more
= issue_rate
- 1;
19517 for (i
= 0; i
< issue_rate
; i
++)
19519 group_insns
[i
] = 0;
19526 emit_insn_before (nop
, next_insn
);
19527 if (can_issue_more
== issue_rate
- 1) /* new group begins */
19530 if (can_issue_more
== 0)
19532 can_issue_more
= issue_rate
- 1;
19535 for (i
= 0; i
< issue_rate
; i
++)
19537 group_insns
[i
] = 0;
19543 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19546 /* Is next_insn going to start a new group? */
19549 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
19550 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
19551 || (can_issue_more
< issue_rate
&&
19552 insn_terminates_group_p (next_insn
, previous_group
)));
19553 if (*group_end
&& end
)
19556 if (sched_verbose
> 6)
19557 fprintf (dump
, "done force: group count = %d, can_issue_more = %d\n",
19558 *group_count
, can_issue_more
);
19559 return can_issue_more
;
19562 return can_issue_more
;
19565 /* This function tries to synch the dispatch groups that the compiler "sees"
19566 with the dispatch groups that the processor dispatcher is expected to
19567 form in practice. It tries to achieve this synchronization by forcing the
19568 estimated processor grouping on the compiler (as opposed to the function
19569 'pad_goups' which tries to force the scheduler's grouping on the processor).
19571 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19572 examines the (estimated) dispatch groups that will be formed by the processor
19573 dispatcher. It marks these group boundaries to reflect the estimated
19574 processor grouping, overriding the grouping that the scheduler had marked.
19575 Depending on the value of the flag '-minsert-sched-nops' this function can
19576 force certain insns into separate groups or force a certain distance between
19577 them by inserting nops, for example, if there exists a "costly dependence"
19580 The function estimates the group boundaries that the processor will form as
19581 follows: It keeps track of how many vacant issue slots are available after
19582 each insn. A subsequent insn will start a new group if one of the following
19584 - no more vacant issue slots remain in the current dispatch group.
19585 - only the last issue slot, which is the branch slot, is vacant, but the next
19586 insn is not a branch.
19587 - only the last 2 or less issue slots, including the branch slot, are vacant,
19588 which means that a cracked insn (which occupies two issue slots) can't be
19589 issued in this group.
19590 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19591 start a new group. */
19594 redefine_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
19597 rtx_insn
*insn
, *next_insn
;
19599 int can_issue_more
;
19602 int group_count
= 0;
19606 issue_rate
= rs6000_issue_rate ();
19607 group_insns
= XALLOCAVEC (rtx
, issue_rate
);
19608 for (i
= 0; i
< issue_rate
; i
++)
19610 group_insns
[i
] = 0;
19612 can_issue_more
= issue_rate
;
19614 insn
= get_next_active_insn (prev_head_insn
, tail
);
19617 while (insn
!= NULL_RTX
)
19619 slot
= (issue_rate
- can_issue_more
);
19620 group_insns
[slot
] = insn
;
19622 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
19623 if (insn_terminates_group_p (insn
, current_group
))
19624 can_issue_more
= 0;
19626 next_insn
= get_next_active_insn (insn
, tail
);
19627 if (next_insn
== NULL_RTX
)
19628 return group_count
+ 1;
19630 /* Is next_insn going to start a new group? */
19632 = (can_issue_more
== 0
19633 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
19634 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
19635 || (can_issue_more
< issue_rate
&&
19636 insn_terminates_group_p (next_insn
, previous_group
)));
19638 can_issue_more
= force_new_group (sched_verbose
, dump
, group_insns
,
19639 next_insn
, &group_end
, can_issue_more
,
19645 can_issue_more
= 0;
19646 for (i
= 0; i
< issue_rate
; i
++)
19648 group_insns
[i
] = 0;
19652 if (GET_MODE (next_insn
) == TImode
&& can_issue_more
)
19653 PUT_MODE (next_insn
, VOIDmode
);
19654 else if (!can_issue_more
&& GET_MODE (next_insn
) != TImode
)
19655 PUT_MODE (next_insn
, TImode
);
19658 if (can_issue_more
== 0)
19659 can_issue_more
= issue_rate
;
19662 return group_count
;
19665 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19666 dispatch group boundaries that the scheduler had marked. Pad with nops
19667 any dispatch groups which have vacant issue slots, in order to force the
19668 scheduler's grouping on the processor dispatcher. The function
19669 returns the number of dispatch groups found. */
19672 pad_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
19675 rtx_insn
*insn
, *next_insn
;
19678 int can_issue_more
;
19680 int group_count
= 0;
19682 /* Initialize issue_rate. */
19683 issue_rate
= rs6000_issue_rate ();
19684 can_issue_more
= issue_rate
;
19686 insn
= get_next_active_insn (prev_head_insn
, tail
);
19687 next_insn
= get_next_active_insn (insn
, tail
);
19689 while (insn
!= NULL_RTX
)
19692 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
19694 group_end
= (next_insn
== NULL_RTX
|| GET_MODE (next_insn
) == TImode
);
19696 if (next_insn
== NULL_RTX
)
19701 /* If the scheduler had marked group termination at this location
19702 (between insn and next_insn), and neither insn nor next_insn will
19703 force group termination, pad the group with nops to force group
19706 && (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
19707 && !insn_terminates_group_p (insn
, current_group
)
19708 && !insn_terminates_group_p (next_insn
, previous_group
))
19710 if (!is_branch_slot_insn (next_insn
))
19713 while (can_issue_more
)
19716 emit_insn_before (nop
, next_insn
);
19721 can_issue_more
= issue_rate
;
19726 next_insn
= get_next_active_insn (insn
, tail
);
19729 return group_count
;
19732 /* We're beginning a new block. Initialize data structures as necessary. */
19735 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
19736 int sched_verbose ATTRIBUTE_UNUSED
,
19737 int max_ready ATTRIBUTE_UNUSED
)
19739 last_scheduled_insn
= NULL
;
19740 load_store_pendulum
= 0;
19745 /* The following function is called at the end of scheduling BB.
19746 After reload, it inserts nops at insn group bundling. */
19749 rs6000_sched_finish (FILE *dump
, int sched_verbose
)
19754 fprintf (dump
, "=== Finishing schedule.\n");
19756 if (reload_completed
&& rs6000_sched_groups
)
19758 /* Do not run sched_finish hook when selective scheduling enabled. */
19759 if (sel_sched_p ())
19762 if (rs6000_sched_insert_nops
== sched_finish_none
)
19765 if (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
19766 n_groups
= pad_groups (dump
, sched_verbose
,
19767 current_sched_info
->prev_head
,
19768 current_sched_info
->next_tail
);
19770 n_groups
= redefine_groups (dump
, sched_verbose
,
19771 current_sched_info
->prev_head
,
19772 current_sched_info
->next_tail
);
19774 if (sched_verbose
>= 6)
19776 fprintf (dump
, "ngroups = %d\n", n_groups
);
19777 print_rtl (dump
, current_sched_info
->prev_head
);
19778 fprintf (dump
, "Done finish_sched\n");
19783 struct rs6000_sched_context
19785 short cached_can_issue_more
;
19786 rtx_insn
*last_scheduled_insn
;
19787 int load_store_pendulum
;
19792 typedef struct rs6000_sched_context rs6000_sched_context_def
;
19793 typedef rs6000_sched_context_def
*rs6000_sched_context_t
;
19795 /* Allocate store for new scheduling context. */
19797 rs6000_alloc_sched_context (void)
19799 return xmalloc (sizeof (rs6000_sched_context_def
));
19802 /* If CLEAN_P is true then initializes _SC with clean data,
19803 and from the global context otherwise. */
19805 rs6000_init_sched_context (void *_sc
, bool clean_p
)
19807 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
19811 sc
->cached_can_issue_more
= 0;
19812 sc
->last_scheduled_insn
= NULL
;
19813 sc
->load_store_pendulum
= 0;
19814 sc
->divide_cnt
= 0;
19815 sc
->vec_pairing
= 0;
19819 sc
->cached_can_issue_more
= cached_can_issue_more
;
19820 sc
->last_scheduled_insn
= last_scheduled_insn
;
19821 sc
->load_store_pendulum
= load_store_pendulum
;
19822 sc
->divide_cnt
= divide_cnt
;
19823 sc
->vec_pairing
= vec_pairing
;
19827 /* Sets the global scheduling context to the one pointed to by _SC. */
19829 rs6000_set_sched_context (void *_sc
)
19831 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
19833 gcc_assert (sc
!= NULL
);
19835 cached_can_issue_more
= sc
->cached_can_issue_more
;
19836 last_scheduled_insn
= sc
->last_scheduled_insn
;
19837 load_store_pendulum
= sc
->load_store_pendulum
;
19838 divide_cnt
= sc
->divide_cnt
;
19839 vec_pairing
= sc
->vec_pairing
;
19844 rs6000_free_sched_context (void *_sc
)
19846 gcc_assert (_sc
!= NULL
);
19852 rs6000_sched_can_speculate_insn (rtx_insn
*insn
)
19854 switch (get_attr_type (insn
))
19869 /* Length in units of the trampoline for entering a nested function. */
19872 rs6000_trampoline_size (void)
19876 switch (DEFAULT_ABI
)
19879 gcc_unreachable ();
19882 ret
= (TARGET_32BIT
) ? 12 : 24;
19886 gcc_assert (!TARGET_32BIT
);
19892 ret
= (TARGET_32BIT
) ? 40 : 48;
19899 /* Emit RTL insns to initialize the variable parts of a trampoline.
19900 FNADDR is an RTX for the address of the function's pure code.
19901 CXT is an RTX for the static chain value for the function. */
19904 rs6000_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
19906 int regsize
= (TARGET_32BIT
) ? 4 : 8;
19907 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
19908 rtx ctx_reg
= force_reg (Pmode
, cxt
);
19909 rtx addr
= force_reg (Pmode
, XEXP (m_tramp
, 0));
19911 switch (DEFAULT_ABI
)
19914 gcc_unreachable ();
19916 /* Under AIX, just build the 3 word function descriptor */
19919 rtx fnmem
, fn_reg
, toc_reg
;
19921 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS
)
19922 error ("you cannot take the address of a nested function if you use "
19923 "the %qs option", "-mno-pointers-to-nested-functions");
19925 fnmem
= gen_const_mem (Pmode
, force_reg (Pmode
, fnaddr
));
19926 fn_reg
= gen_reg_rtx (Pmode
);
19927 toc_reg
= gen_reg_rtx (Pmode
);
19929 /* Macro to shorten the code expansions below. */
19930 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19932 m_tramp
= replace_equiv_address (m_tramp
, addr
);
19934 emit_move_insn (fn_reg
, MEM_PLUS (fnmem
, 0));
19935 emit_move_insn (toc_reg
, MEM_PLUS (fnmem
, regsize
));
19936 emit_move_insn (MEM_PLUS (m_tramp
, 0), fn_reg
);
19937 emit_move_insn (MEM_PLUS (m_tramp
, regsize
), toc_reg
);
19938 emit_move_insn (MEM_PLUS (m_tramp
, 2*regsize
), ctx_reg
);
19944 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19948 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__trampoline_setup"),
19949 LCT_NORMAL
, VOIDmode
,
19951 GEN_INT (rs6000_trampoline_size ()), SImode
,
19959 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19960 identifier as an argument, so the front end shouldn't look it up. */
19963 rs6000_attribute_takes_identifier_p (const_tree attr_id
)
19965 return is_attribute_p ("altivec", attr_id
);
19968 /* Handle the "altivec" attribute. The attribute may have
19969 arguments as follows:
19971 __attribute__((altivec(vector__)))
19972 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19973 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19975 and may appear more than once (e.g., 'vector bool char') in a
19976 given declaration. */
19979 rs6000_handle_altivec_attribute (tree
*node
,
19980 tree name ATTRIBUTE_UNUSED
,
19982 int flags ATTRIBUTE_UNUSED
,
19983 bool *no_add_attrs
)
19985 tree type
= *node
, result
= NULL_TREE
;
19989 = ((args
&& TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
)
19990 && TREE_CODE (TREE_VALUE (args
)) == IDENTIFIER_NODE
)
19991 ? *IDENTIFIER_POINTER (TREE_VALUE (args
))
19994 while (POINTER_TYPE_P (type
)
19995 || TREE_CODE (type
) == FUNCTION_TYPE
19996 || TREE_CODE (type
) == METHOD_TYPE
19997 || TREE_CODE (type
) == ARRAY_TYPE
)
19998 type
= TREE_TYPE (type
);
20000 mode
= TYPE_MODE (type
);
20002 /* Check for invalid AltiVec type qualifiers. */
20003 if (type
== long_double_type_node
)
20004 error ("use of %<long double%> in AltiVec types is invalid");
20005 else if (type
== boolean_type_node
)
20006 error ("use of boolean types in AltiVec types is invalid");
20007 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
20008 error ("use of %<complex%> in AltiVec types is invalid");
20009 else if (DECIMAL_FLOAT_MODE_P (mode
))
20010 error ("use of decimal floating-point types in AltiVec types is invalid");
20011 else if (!TARGET_VSX
)
20013 if (type
== long_unsigned_type_node
|| type
== long_integer_type_node
)
20016 error ("use of %<long%> in AltiVec types is invalid for "
20017 "64-bit code without %qs", "-mvsx");
20018 else if (rs6000_warn_altivec_long
)
20019 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20022 else if (type
== long_long_unsigned_type_node
20023 || type
== long_long_integer_type_node
)
20024 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20026 else if (type
== double_type_node
)
20027 error ("use of %<double%> in AltiVec types is invalid without %qs",
20031 switch (altivec_type
)
20034 unsigned_p
= TYPE_UNSIGNED (type
);
20038 result
= (unsigned_p
? unsigned_V1TI_type_node
: V1TI_type_node
);
20041 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
20044 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
20047 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
20050 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
20052 case E_SFmode
: result
= V4SF_type_node
; break;
20053 case E_DFmode
: result
= V2DF_type_node
; break;
20054 /* If the user says 'vector int bool', we may be handed the 'bool'
20055 attribute _before_ the 'vector' attribute, and so select the
20056 proper type in the 'b' case below. */
20057 case E_V4SImode
: case E_V8HImode
: case E_V16QImode
: case E_V4SFmode
:
20058 case E_V2DImode
: case E_V2DFmode
:
20066 case E_TImode
: case E_V1TImode
: result
= bool_V1TI_type_node
; break;
20067 case E_DImode
: case E_V2DImode
: result
= bool_V2DI_type_node
; break;
20068 case E_SImode
: case E_V4SImode
: result
= bool_V4SI_type_node
; break;
20069 case E_HImode
: case E_V8HImode
: result
= bool_V8HI_type_node
; break;
20070 case E_QImode
: case E_V16QImode
: result
= bool_V16QI_type_node
;
20077 case E_V8HImode
: result
= pixel_V8HI_type_node
;
20083 /* Propagate qualifiers attached to the element type
20084 onto the vector type. */
20085 if (result
&& result
!= type
&& TYPE_QUALS (type
))
20086 result
= build_qualified_type (result
, TYPE_QUALS (type
));
20088 *no_add_attrs
= true; /* No need to hang on to the attribute. */
20091 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
20096 /* AltiVec defines five built-in scalar types that serve as vector
20097 elements; we must teach the compiler how to mangle them. The 128-bit
20098 floating point mangling is target-specific as well. MMA defines
20099 two built-in types to be used as opaque vector types. */
20101 static const char *
20102 rs6000_mangle_type (const_tree type
)
20104 type
= TYPE_MAIN_VARIANT (type
);
20106 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
20107 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
20108 && TREE_CODE (type
) != OPAQUE_TYPE
)
20111 if (type
== bool_char_type_node
) return "U6__boolc";
20112 if (type
== bool_short_type_node
) return "U6__bools";
20113 if (type
== pixel_type_node
) return "u7__pixel";
20114 if (type
== bool_int_type_node
) return "U6__booli";
20115 if (type
== bool_long_long_type_node
) return "U6__boolx";
20117 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IBM_P (TYPE_MODE (type
)))
20119 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IEEE_P (TYPE_MODE (type
)))
20120 return "u9__ieee128";
20122 if (type
== vector_pair_type_node
)
20123 return "u13__vector_pair";
20124 if (type
== vector_quad_type_node
)
20125 return "u13__vector_quad";
20127 /* For all other types, use the default mangling. */
20131 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20132 struct attribute_spec.handler. */
20135 rs6000_handle_longcall_attribute (tree
*node
, tree name
,
20136 tree args ATTRIBUTE_UNUSED
,
20137 int flags ATTRIBUTE_UNUSED
,
20138 bool *no_add_attrs
)
20140 if (TREE_CODE (*node
) != FUNCTION_TYPE
20141 && TREE_CODE (*node
) != FIELD_DECL
20142 && TREE_CODE (*node
) != TYPE_DECL
)
20144 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
20146 *no_add_attrs
= true;
20152 /* Set longcall attributes on all functions declared when
20153 rs6000_default_long_calls is true. */
20155 rs6000_set_default_type_attributes (tree type
)
20157 if (rs6000_default_long_calls
20158 && (TREE_CODE (type
) == FUNCTION_TYPE
20159 || TREE_CODE (type
) == METHOD_TYPE
))
20160 TYPE_ATTRIBUTES (type
) = tree_cons (get_identifier ("longcall"),
20162 TYPE_ATTRIBUTES (type
));
20165 darwin_set_default_type_attributes (type
);
20169 /* Return a reference suitable for calling a function with the
20170 longcall attribute. */
20173 rs6000_longcall_ref (rtx call_ref
, rtx arg
)
20175 /* System V adds '.' to the internal name, so skip them. */
20176 const char *call_name
= XSTR (call_ref
, 0);
20177 if (*call_name
== '.')
20179 while (*call_name
== '.')
20182 tree node
= get_identifier (call_name
);
20183 call_ref
= gen_rtx_SYMBOL_REF (VOIDmode
, IDENTIFIER_POINTER (node
));
20188 rtx base
= const0_rtx
;
20190 if (rs6000_pcrel_p ())
20192 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20193 rtx u
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20194 gen_rtvec (3, base
, call_ref
, arg
),
20195 UNSPECV_PLT_PCREL
);
20196 emit_insn (gen_rtx_SET (reg
, u
));
20200 if (DEFAULT_ABI
== ABI_ELFv2
)
20201 base
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
20205 base
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
20208 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20209 may be used by a function global entry point. For SysV4, r11
20210 is used by __glink_PLTresolve lazy resolver entry. */
20211 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20212 rtx hi
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (3, base
, call_ref
, arg
),
20214 rtx lo
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20215 gen_rtvec (3, reg
, call_ref
, arg
),
20217 emit_insn (gen_rtx_SET (reg
, hi
));
20218 emit_insn (gen_rtx_SET (reg
, lo
));
20222 return force_reg (Pmode
, call_ref
);
20225 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20226 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20229 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20230 struct attribute_spec.handler. */
20232 rs6000_handle_struct_attribute (tree
*node
, tree name
,
20233 tree args ATTRIBUTE_UNUSED
,
20234 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
20237 if (DECL_P (*node
))
20239 if (TREE_CODE (*node
) == TYPE_DECL
)
20240 type
= &TREE_TYPE (*node
);
20245 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
20246 || TREE_CODE (*type
) == UNION_TYPE
)))
20248 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
20249 *no_add_attrs
= true;
20252 else if ((is_attribute_p ("ms_struct", name
)
20253 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
20254 || ((is_attribute_p ("gcc_struct", name
)
20255 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
20257 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
20259 *no_add_attrs
= true;
20266 rs6000_ms_bitfield_layout_p (const_tree record_type
)
20268 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
20269 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
20270 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
20273 #ifdef USING_ELFOS_H
20275 /* A get_unnamed_section callback, used for switching to toc_section. */
20278 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
20280 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20281 && TARGET_MINIMAL_TOC
)
20283 if (!toc_initialized
)
20285 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20286 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20287 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LCTOC", 0);
20288 fprintf (asm_out_file
, "\t.tc ");
20289 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1[TC],");
20290 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20291 fprintf (asm_out_file
, "\n");
20293 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20294 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20295 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20296 fprintf (asm_out_file
, " = .+32768\n");
20297 toc_initialized
= 1;
20300 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20302 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20304 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20305 if (!toc_initialized
)
20307 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20308 toc_initialized
= 1;
20313 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20314 if (!toc_initialized
)
20316 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20317 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20318 fprintf (asm_out_file
, " = .+32768\n");
20319 toc_initialized
= 1;
20324 /* Implement TARGET_ASM_INIT_SECTIONS. */
20327 rs6000_elf_asm_init_sections (void)
20330 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op
, NULL
);
20333 = get_unnamed_section (SECTION_WRITE
, output_section_asm_op
,
20334 SDATA2_SECTION_ASM_OP
);
20337 /* Implement TARGET_SELECT_RTX_SECTION. */
20340 rs6000_elf_select_rtx_section (machine_mode mode
, rtx x
,
20341 unsigned HOST_WIDE_INT align
)
20343 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
20344 return toc_section
;
20346 return default_elf_select_rtx_section (mode
, x
, align
);
20349 /* For a SYMBOL_REF, set generic flags and then perform some
20350 target-specific processing.
20352 When the AIX ABI is requested on a non-AIX system, replace the
20353 function name with the real name (with a leading .) rather than the
20354 function descriptor name. This saves a lot of overriding code to
20355 read the prefixes. */
20357 static void rs6000_elf_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
20359 rs6000_elf_encode_section_info (tree decl
, rtx rtl
, int first
)
20361 default_encode_section_info (decl
, rtl
, first
);
20364 && TREE_CODE (decl
) == FUNCTION_DECL
20366 && DEFAULT_ABI
== ABI_AIX
)
20368 rtx sym_ref
= XEXP (rtl
, 0);
20369 size_t len
= strlen (XSTR (sym_ref
, 0));
20370 char *str
= XALLOCAVEC (char, len
+ 2);
20372 memcpy (str
+ 1, XSTR (sym_ref
, 0), len
+ 1);
20373 XSTR (sym_ref
, 0) = ggc_alloc_string (str
, len
+ 1);
20378 compare_section_name (const char *section
, const char *templ
)
20382 len
= strlen (templ
);
20383 return (strncmp (section
, templ
, len
) == 0
20384 && (section
[len
] == 0 || section
[len
] == '.'));
20388 rs6000_elf_in_small_data_p (const_tree decl
)
20390 if (rs6000_sdata
== SDATA_NONE
)
20393 /* We want to merge strings, so we never consider them small data. */
20394 if (TREE_CODE (decl
) == STRING_CST
)
20397 /* Functions are never in the small data area. */
20398 if (TREE_CODE (decl
) == FUNCTION_DECL
)
20401 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_SECTION_NAME (decl
))
20403 const char *section
= DECL_SECTION_NAME (decl
);
20404 if (compare_section_name (section
, ".sdata")
20405 || compare_section_name (section
, ".sdata2")
20406 || compare_section_name (section
, ".gnu.linkonce.s")
20407 || compare_section_name (section
, ".sbss")
20408 || compare_section_name (section
, ".sbss2")
20409 || compare_section_name (section
, ".gnu.linkonce.sb")
20410 || strcmp (section
, ".PPC.EMB.sdata0") == 0
20411 || strcmp (section
, ".PPC.EMB.sbss0") == 0)
20416 /* If we are told not to put readonly data in sdata, then don't. */
20417 if (TREE_READONLY (decl
) && rs6000_sdata
!= SDATA_EABI
20418 && !rs6000_readonly_in_sdata
)
20421 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
20424 && size
<= g_switch_value
20425 /* If it's not public, and we're not going to reference it there,
20426 there's no need to put it in the small data section. */
20427 && (rs6000_sdata
!= SDATA_DATA
|| TREE_PUBLIC (decl
)))
20434 #endif /* USING_ELFOS_H */
20436 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20439 rs6000_use_blocks_for_constant_p (machine_mode mode
, const_rtx x
)
20441 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
);
20444 /* Do not place thread-local symbols refs in the object blocks. */
20447 rs6000_use_blocks_for_decl_p (const_tree decl
)
20449 return !DECL_THREAD_LOCAL_P (decl
);
20452 /* Return a REG that occurs in ADDR with coefficient 1.
20453 ADDR can be effectively incremented by incrementing REG.
20455 r0 is special and we must not select it as an address
20456 register by this routine since our caller will try to
20457 increment the returned register via an "la" instruction. */
20460 find_addr_reg (rtx addr
)
20462 while (GET_CODE (addr
) == PLUS
)
20464 if (REG_P (XEXP (addr
, 0))
20465 && REGNO (XEXP (addr
, 0)) != 0)
20466 addr
= XEXP (addr
, 0);
20467 else if (REG_P (XEXP (addr
, 1))
20468 && REGNO (XEXP (addr
, 1)) != 0)
20469 addr
= XEXP (addr
, 1);
20470 else if (CONSTANT_P (XEXP (addr
, 0)))
20471 addr
= XEXP (addr
, 1);
20472 else if (CONSTANT_P (XEXP (addr
, 1)))
20473 addr
= XEXP (addr
, 0);
20475 gcc_unreachable ();
20477 gcc_assert (REG_P (addr
) && REGNO (addr
) != 0);
20482 rs6000_fatal_bad_address (rtx op
)
20484 fatal_insn ("bad address", op
);
20489 vec
<branch_island
, va_gc
> *branch_islands
;
20491 /* Remember to generate a branch island for far calls to the given
20495 add_compiler_branch_island (tree label_name
, tree function_name
,
20498 branch_island bi
= {function_name
, label_name
, line_number
};
20499 vec_safe_push (branch_islands
, bi
);
20502 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20503 already there or not. */
20506 no_previous_def (tree function_name
)
20511 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
20512 if (function_name
== bi
->function_name
)
20517 /* GET_PREV_LABEL gets the label name from the previous definition of
20521 get_prev_label (tree function_name
)
20526 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
20527 if (function_name
== bi
->function_name
)
20528 return bi
->label_name
;
20532 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20535 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
20537 unsigned int length
;
20538 char *symbol_name
, *lazy_ptr_name
;
20539 char *local_label_0
;
20540 static unsigned label
= 0;
20542 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20543 symb
= (*targetm
.strip_name_encoding
) (symb
);
20545 length
= strlen (symb
);
20546 symbol_name
= XALLOCAVEC (char, length
+ 32);
20547 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
20549 lazy_ptr_name
= XALLOCAVEC (char, length
+ 32);
20550 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name
, symb
, length
);
20554 switch_to_section (darwin_sections
[machopic_picsymbol_stub1_section
]);
20555 fprintf (file
, "\t.align 5\n");
20557 fprintf (file
, "%s:\n", stub
);
20558 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20561 local_label_0
= XALLOCAVEC (char, 16);
20562 sprintf (local_label_0
, "L%u$spb", label
);
20564 fprintf (file
, "\tmflr r0\n");
20565 fprintf (file
, "\tbcl 20,31,%s\n", local_label_0
);
20566 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
20567 fprintf (file
, "\taddis r11,r11,ha16(%s-%s)\n",
20568 lazy_ptr_name
, local_label_0
);
20569 fprintf (file
, "\tmtlr r0\n");
20570 fprintf (file
, "\t%s r12,lo16(%s-%s)(r11)\n",
20571 (TARGET_64BIT
? "ldu" : "lwzu"),
20572 lazy_ptr_name
, local_label_0
);
20573 fprintf (file
, "\tmtctr r12\n");
20574 fprintf (file
, "\tbctr\n");
20576 else /* mdynamic-no-pic or mkernel. */
20578 switch_to_section (darwin_sections
[machopic_symbol_stub1_section
]);
20579 fprintf (file
, "\t.align 4\n");
20581 fprintf (file
, "%s:\n", stub
);
20582 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20584 fprintf (file
, "\tlis r11,ha16(%s)\n", lazy_ptr_name
);
20585 fprintf (file
, "\t%s r12,lo16(%s)(r11)\n",
20586 (TARGET_64BIT
? "ldu" : "lwzu"),
20588 fprintf (file
, "\tmtctr r12\n");
20589 fprintf (file
, "\tbctr\n");
20592 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
20593 fprintf (file
, "%s:\n", lazy_ptr_name
);
20594 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20595 fprintf (file
, "%sdyld_stub_binding_helper\n",
20596 (TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t"));
20599 /* Legitimize PIC addresses. If the address is already
20600 position-independent, we return ORIG. Newly generated
20601 position-independent addresses go into a reg. This is REG if non
20602 zero, otherwise we allocate register(s) as necessary. */
20604 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20607 rs6000_machopic_legitimize_pic_address (rtx orig
, machine_mode mode
,
20612 if (reg
== NULL
&& !reload_completed
)
20613 reg
= gen_reg_rtx (Pmode
);
20615 if (GET_CODE (orig
) == CONST
)
20619 if (GET_CODE (XEXP (orig
, 0)) == PLUS
20620 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
20623 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
20625 /* Use a different reg for the intermediate value, as
20626 it will be marked UNCHANGING. */
20627 reg_temp
= !can_create_pseudo_p () ? reg
: gen_reg_rtx (Pmode
);
20628 base
= rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 0),
20631 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 1),
20634 if (CONST_INT_P (offset
))
20636 if (SMALL_INT (offset
))
20637 return plus_constant (Pmode
, base
, INTVAL (offset
));
20638 else if (!reload_completed
)
20639 offset
= force_reg (Pmode
, offset
);
20642 rtx mem
= force_const_mem (Pmode
, orig
);
20643 return machopic_legitimize_pic_address (mem
, Pmode
, reg
);
20646 return gen_rtx_PLUS (Pmode
, base
, offset
);
20649 /* Fall back on generic machopic code. */
20650 return machopic_legitimize_pic_address (orig
, mode
, reg
);
20653 /* Output a .machine directive for the Darwin assembler, and call
20654 the generic start_file routine. */
20657 rs6000_darwin_file_start (void)
20659 static const struct
20663 HOST_WIDE_INT if_set
;
20665 { "ppc64", "ppc64", MASK_64BIT
},
20666 { "970", "ppc970", MASK_PPC_GPOPT
| MASK_MFCRF
| MASK_POWERPC64
},
20667 { "power4", "ppc970", 0 },
20668 { "G5", "ppc970", 0 },
20669 { "7450", "ppc7450", 0 },
20670 { "7400", "ppc7400", MASK_ALTIVEC
},
20671 { "G4", "ppc7400", 0 },
20672 { "750", "ppc750", 0 },
20673 { "740", "ppc750", 0 },
20674 { "G3", "ppc750", 0 },
20675 { "604e", "ppc604e", 0 },
20676 { "604", "ppc604", 0 },
20677 { "603e", "ppc603", 0 },
20678 { "603", "ppc603", 0 },
20679 { "601", "ppc601", 0 },
20680 { NULL
, "ppc", 0 } };
20681 const char *cpu_id
= "";
20684 rs6000_file_start ();
20685 darwin_file_start ();
20687 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
20689 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
20690 cpu_id
= rs6000_default_cpu
;
20692 if (OPTION_SET_P (rs6000_cpu_index
))
20693 cpu_id
= processor_target_table
[rs6000_cpu_index
].name
;
20695 /* Look through the mapping array. Pick the first name that either
20696 matches the argument, has a bit set in IF_SET that is also set
20697 in the target flags, or has a NULL name. */
20700 while (mapping
[i
].arg
!= NULL
20701 && strcmp (mapping
[i
].arg
, cpu_id
) != 0
20702 && (mapping
[i
].if_set
& rs6000_isa_flags
) == 0)
20705 fprintf (asm_out_file
, "\t.machine %s\n", mapping
[i
].name
);
20708 #endif /* TARGET_MACHO */
20712 rs6000_elf_reloc_rw_mask (void)
20716 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20722 /* Record an element in the table of global constructors. SYMBOL is
20723 a SYMBOL_REF of the function to be called; PRIORITY is a number
20724 between 0 and MAX_INIT_PRIORITY.
20726 This differs from default_named_section_asm_out_constructor in
20727 that we have special handling for -mrelocatable. */
20729 static void rs6000_elf_asm_out_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
20731 rs6000_elf_asm_out_constructor (rtx symbol
, int priority
)
20733 const char *section
= ".ctors";
20736 if (priority
!= DEFAULT_INIT_PRIORITY
)
20738 sprintf (buf
, ".ctors.%.5u",
20739 /* Invert the numbering so the linker puts us in the proper
20740 order; constructors are run from right to left, and the
20741 linker sorts in increasing order. */
20742 MAX_INIT_PRIORITY
- priority
);
20746 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
20747 assemble_align (POINTER_SIZE
);
20749 if (DEFAULT_ABI
== ABI_V4
20750 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
20752 fputs ("\t.long (", asm_out_file
);
20753 output_addr_const (asm_out_file
, symbol
);
20754 fputs (")@fixup\n", asm_out_file
);
20757 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
20760 static void rs6000_elf_asm_out_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
20762 rs6000_elf_asm_out_destructor (rtx symbol
, int priority
)
20764 const char *section
= ".dtors";
20767 if (priority
!= DEFAULT_INIT_PRIORITY
)
20769 sprintf (buf
, ".dtors.%.5u",
20770 /* Invert the numbering so the linker puts us in the proper
20771 order; constructors are run from right to left, and the
20772 linker sorts in increasing order. */
20773 MAX_INIT_PRIORITY
- priority
);
20777 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
20778 assemble_align (POINTER_SIZE
);
20780 if (DEFAULT_ABI
== ABI_V4
20781 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
20783 fputs ("\t.long (", asm_out_file
);
20784 output_addr_const (asm_out_file
, symbol
);
20785 fputs (")@fixup\n", asm_out_file
);
20788 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
20792 rs6000_elf_declare_function_name (FILE *file
, const char *name
, tree decl
)
20794 if (TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
20796 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file
);
20797 ASM_OUTPUT_LABEL (file
, name
);
20798 fputs (DOUBLE_INT_ASM_OP
, file
);
20799 rs6000_output_function_entry (file
, name
);
20800 fputs (",.TOC.@tocbase,0\n\t.previous\n", file
);
20803 fputs ("\t.size\t", file
);
20804 assemble_name (file
, name
);
20805 fputs (",24\n\t.type\t.", file
);
20806 assemble_name (file
, name
);
20807 fputs (",@function\n", file
);
20808 if (TREE_PUBLIC (decl
) && ! DECL_WEAK (decl
))
20810 fputs ("\t.globl\t.", file
);
20811 assemble_name (file
, name
);
20816 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
20817 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
20818 rs6000_output_function_entry (file
, name
);
20819 fputs (":\n", file
);
20824 if (DEFAULT_ABI
== ABI_V4
20825 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
20826 && !TARGET_SECURE_PLT
20827 && (!constant_pool_empty_p () || crtl
->profile
)
20828 && (uses_toc
= uses_TOC ()))
20833 switch_to_other_text_partition ();
20834 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
20836 fprintf (file
, "\t.long ");
20837 assemble_name (file
, toc_label_name
);
20840 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
20841 assemble_name (file
, buf
);
20844 switch_to_other_text_partition ();
20847 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
20848 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
20850 if (TARGET_CMODEL
== CMODEL_LARGE
20851 && rs6000_global_entry_point_prologue_needed_p ())
20855 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
20857 fprintf (file
, "\t.quad .TOC.-");
20858 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
20859 assemble_name (file
, buf
);
20863 if (DEFAULT_ABI
== ABI_AIX
)
20865 const char *desc_name
, *orig_name
;
20867 orig_name
= (*targetm
.strip_name_encoding
) (name
);
20868 desc_name
= orig_name
;
20869 while (*desc_name
== '.')
20872 if (TREE_PUBLIC (decl
))
20873 fprintf (file
, "\t.globl %s\n", desc_name
);
20875 fprintf (file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20876 fprintf (file
, "%s:\n", desc_name
);
20877 fprintf (file
, "\t.long %s\n", orig_name
);
20878 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file
);
20879 fputs ("\t.long 0\n", file
);
20880 fprintf (file
, "\t.previous\n");
20882 ASM_OUTPUT_LABEL (file
, name
);
20885 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED
;
20887 rs6000_elf_file_end (void)
20889 #ifdef HAVE_AS_GNU_ATTRIBUTE
20890 /* ??? The value emitted depends on options active at file end.
20891 Assume anyone using #pragma or attributes that might change
20892 options knows what they are doing. */
20893 if ((TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
20894 && rs6000_passes_float
)
20898 if (TARGET_HARD_FLOAT
)
20902 if (rs6000_passes_long_double
)
20904 if (!TARGET_LONG_DOUBLE_128
)
20906 else if (TARGET_IEEEQUAD
)
20911 fprintf (asm_out_file
, "\t.gnu_attribute 4, %d\n", fp
);
20913 if (TARGET_32BIT
&& DEFAULT_ABI
== ABI_V4
)
20915 if (rs6000_passes_vector
)
20916 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
20917 (TARGET_ALTIVEC_ABI
? 2 : 1));
20918 if (rs6000_returns_struct
)
20919 fprintf (asm_out_file
, "\t.gnu_attribute 12, %d\n",
20920 aix_struct_return
? 2 : 1);
20923 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20924 if (TARGET_32BIT
|| DEFAULT_ABI
== ABI_ELFv2
)
20925 file_end_indicate_exec_stack ();
20928 if (flag_split_stack
)
20929 file_end_indicate_split_stack ();
20933 /* We have expanded a CPU builtin, so we need to emit a reference to
20934 the special symbol that LIBC uses to declare it supports the
20935 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
20936 switch_to_section (data_section
);
20937 fprintf (asm_out_file
, "\t.align %u\n", TARGET_32BIT
? 2 : 3);
20938 fprintf (asm_out_file
, "\t%s %s\n",
20939 TARGET_32BIT
? ".long" : ".quad", tcb_verification_symbol
);
20946 #ifndef HAVE_XCOFF_DWARF_EXTRAS
20947 #define HAVE_XCOFF_DWARF_EXTRAS 0
20950 static enum unwind_info_type
20951 rs6000_xcoff_debug_unwind_info (void)
20957 rs6000_xcoff_asm_output_anchor (rtx symbol
)
20961 sprintf (buffer
, "$ + " HOST_WIDE_INT_PRINT_DEC
,
20962 SYMBOL_REF_BLOCK_OFFSET (symbol
));
20963 fprintf (asm_out_file
, "%s", SET_ASM_OP
);
20964 RS6000_OUTPUT_BASENAME (asm_out_file
, XSTR (symbol
, 0));
20965 fprintf (asm_out_file
, ",");
20966 RS6000_OUTPUT_BASENAME (asm_out_file
, buffer
);
20967 fprintf (asm_out_file
, "\n");
20971 rs6000_xcoff_asm_globalize_label (FILE *stream
, const char *name
)
20973 fputs (GLOBAL_ASM_OP
, stream
);
20974 RS6000_OUTPUT_BASENAME (stream
, name
);
20975 putc ('\n', stream
);
20978 /* A get_unnamed_decl callback, used for read-only sections. PTR
20979 points to the section string variable. */
20982 rs6000_xcoff_output_readonly_section_asm_op (const char *directive
)
20984 fprintf (asm_out_file
, "\t.csect %s[RO],%s\n",
20986 ? xcoff_private_rodata_section_name
20987 : xcoff_read_only_section_name
,
20988 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
20991 /* Likewise for read-write sections. */
20994 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
20996 fprintf (asm_out_file
, "\t.csect %s[RW],%s\n",
20997 xcoff_private_data_section_name
,
20998 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21002 rs6000_xcoff_output_tls_section_asm_op (const char *directive
)
21004 fprintf (asm_out_file
, "\t.csect %s[TL],%s\n",
21006 ? xcoff_private_data_section_name
21007 : xcoff_tls_data_section_name
,
21008 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21011 /* A get_unnamed_section callback, used for switching to toc_section. */
21014 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
21016 if (TARGET_MINIMAL_TOC
)
21018 /* toc_section is always selected at least once from
21019 rs6000_xcoff_file_start, so this is guaranteed to
21020 always be defined once and only once in each file. */
21021 if (!toc_initialized
)
21023 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file
);
21024 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file
);
21025 toc_initialized
= 1;
21027 fprintf (asm_out_file
, "\t.csect toc_table[RW]%s\n",
21028 (TARGET_32BIT
? "" : ",3"));
21031 fputs ("\t.toc\n", asm_out_file
);
21034 /* Implement TARGET_ASM_INIT_SECTIONS. */
21037 rs6000_xcoff_asm_init_sections (void)
21039 read_only_data_section
21040 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21043 private_data_section
21044 = get_unnamed_section (SECTION_WRITE
,
21045 rs6000_xcoff_output_readwrite_section_asm_op
,
21048 read_only_private_data_section
21049 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21053 = get_unnamed_section (SECTION_TLS
,
21054 rs6000_xcoff_output_tls_section_asm_op
,
21057 tls_private_data_section
21058 = get_unnamed_section (SECTION_TLS
,
21059 rs6000_xcoff_output_tls_section_asm_op
,
21063 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op
, NULL
);
21065 readonly_data_section
= read_only_data_section
;
21069 rs6000_xcoff_reloc_rw_mask (void)
21075 rs6000_xcoff_asm_named_section (const char *name
, unsigned int flags
,
21076 tree decl ATTRIBUTE_UNUSED
)
21079 static const char * const suffix
[7]
21080 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21082 if (flags
& SECTION_EXCLUDE
)
21084 else if (flags
& SECTION_DEBUG
)
21086 fprintf (asm_out_file
, "\t.dwsect %s\n", name
);
21089 else if (flags
& SECTION_CODE
)
21091 else if (flags
& SECTION_TLS
)
21093 if (flags
& SECTION_BSS
)
21098 else if (flags
& SECTION_WRITE
)
21100 if (flags
& SECTION_BSS
)
21108 fprintf (asm_out_file
, "\t.csect %s%s[%s],%u\n",
21109 (flags
& SECTION_CODE
) ? "." : "",
21110 name
, suffix
[smclass
], flags
& SECTION_ENTSIZE
);
21113 #define IN_NAMED_SECTION(DECL) \
21114 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
21115 && DECL_SECTION_NAME (DECL) != NULL)
21118 rs6000_xcoff_select_section (tree decl
, int reloc
,
21119 unsigned HOST_WIDE_INT align
)
21121 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21123 if (align
> BIGGEST_ALIGNMENT
&& VAR_OR_FUNCTION_DECL_P (decl
))
21125 resolve_unique_section (decl
, reloc
, true);
21126 if (IN_NAMED_SECTION (decl
))
21127 return get_named_section (decl
, NULL
, reloc
);
21130 if (decl_readonly_section (decl
, reloc
))
21132 if (TREE_PUBLIC (decl
))
21133 return read_only_data_section
;
21135 return read_only_private_data_section
;
21140 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
21142 if (bss_initializer_p (decl
))
21143 return tls_comm_section
;
21144 else if (TREE_PUBLIC (decl
))
21145 return tls_data_section
;
21147 return tls_private_data_section
;
21151 if (TREE_PUBLIC (decl
))
21152 return data_section
;
21154 return private_data_section
;
21159 rs6000_xcoff_unique_section (tree decl
, int reloc ATTRIBUTE_UNUSED
)
21163 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
21164 name
= (*targetm
.strip_name_encoding
) (name
);
21165 set_decl_section_name (decl
, name
);
21168 /* Select section for constant in constant pool.
21170 On RS/6000, all constants are in the private read-only data area.
21171 However, if this is being placed in the TOC it must be output as a
21175 rs6000_xcoff_select_rtx_section (machine_mode mode
, rtx x
,
21176 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
21178 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
21179 return toc_section
;
21181 return read_only_private_data_section
;
21184 /* Remove any trailing [DS] or the like from the symbol name. */
21186 static const char *
21187 rs6000_xcoff_strip_name_encoding (const char *name
)
21192 len
= strlen (name
);
21193 if (name
[len
- 1] == ']')
21194 return ggc_alloc_string (name
, len
- 4);
21199 /* Section attributes. AIX is always PIC. */
21201 static unsigned int
21202 rs6000_xcoff_section_type_flags (tree decl
, const char *name
, int reloc
)
21204 unsigned int align
;
21205 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
21207 if (decl
&& DECL_P (decl
) && VAR_P (decl
) && bss_initializer_p (decl
))
21208 flags
|= SECTION_BSS
;
21210 /* Align to at least UNIT size. */
21211 if (!decl
|| !DECL_P (decl
))
21212 align
= MIN_UNITS_PER_WORD
;
21213 /* Align code CSECT to at least 32 bytes. */
21214 else if ((flags
& SECTION_CODE
) != 0)
21215 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
), 32);
21217 /* Increase alignment of large objects if not already stricter. */
21218 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
),
21219 int_size_in_bytes (TREE_TYPE (decl
)) > MIN_UNITS_PER_WORD
21220 ? UNITS_PER_FP_WORD
: MIN_UNITS_PER_WORD
);
21222 return flags
| (exact_log2 (align
) & SECTION_ENTSIZE
);
21225 /* Output at beginning of assembler file.
21227 Initialize the section names for the RS/6000 at this point.
21229 Specify filename, including full path, to assembler.
21231 We want to go into the TOC section so at least one .toc will be emitted.
21232 Also, in order to output proper .bs/.es pairs, we need at least one static
21233 [RW] section emitted.
21235 Finally, declare mcount when profiling to make the assembler happy. */
21238 rs6000_xcoff_file_start (void)
21240 rs6000_gen_section_name (&xcoff_bss_section_name
,
21241 main_input_filename
, ".bss_");
21242 rs6000_gen_section_name (&xcoff_private_data_section_name
,
21243 main_input_filename
, ".rw_");
21244 rs6000_gen_section_name (&xcoff_private_rodata_section_name
,
21245 main_input_filename
, ".rop_");
21246 rs6000_gen_section_name (&xcoff_read_only_section_name
,
21247 main_input_filename
, ".ro_");
21248 rs6000_gen_section_name (&xcoff_tls_data_section_name
,
21249 main_input_filename
, ".tls_");
21251 fputs ("\t.file\t", asm_out_file
);
21252 output_quoted_string (asm_out_file
, main_input_filename
);
21253 fputc ('\n', asm_out_file
);
21254 if (write_symbols
!= NO_DEBUG
)
21255 switch_to_section (private_data_section
);
21256 switch_to_section (toc_section
);
21257 switch_to_section (text_section
);
21259 fprintf (asm_out_file
, "\t.extern %s\n", RS6000_MCOUNT
);
21260 rs6000_file_start ();
21263 /* Output at end of assembler file.
21264 On the RS/6000, referencing data should automatically pull in text. */
21267 rs6000_xcoff_file_end (void)
21269 switch_to_section (text_section
);
21270 if (xcoff_tls_exec_model_detected
)
21272 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21273 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file
);
21275 fputs ("_section_.text:\n", asm_out_file
);
21276 switch_to_section (data_section
);
21277 fputs (TARGET_32BIT
21278 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21283 struct declare_alias_data
21286 bool function_descriptor
;
21289 /* Declare alias N. A helper function for for_node_and_aliases. */
21292 rs6000_declare_alias (struct symtab_node
*n
, void *d
)
21294 struct declare_alias_data
*data
= (struct declare_alias_data
*)d
;
21295 /* Main symbol is output specially, because varasm machinery does part of
21296 the job for us - we do not need to declare .globl/lglobs and such. */
21297 if (!n
->alias
|| n
->weakref
)
21300 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n
->decl
)))
21303 /* Prevent assemble_alias from trying to use .set pseudo operation
21304 that does not behave as expected by the middle-end. */
21305 TREE_ASM_WRITTEN (n
->decl
) = true;
21307 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
));
21308 char *buffer
= (char *) alloca (strlen (name
) + 2);
21310 int dollar_inside
= 0;
21312 strcpy (buffer
, name
);
21313 p
= strchr (buffer
, '$');
21317 p
= strchr (p
+ 1, '$');
21319 if (TREE_PUBLIC (n
->decl
))
21321 if (!RS6000_WEAK
|| !DECL_WEAK (n
->decl
))
21323 if (dollar_inside
) {
21324 if (data
->function_descriptor
)
21325 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21326 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21328 if (data
->function_descriptor
)
21330 fputs ("\t.globl .", data
->file
);
21331 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21332 putc ('\n', data
->file
);
21334 fputs ("\t.globl ", data
->file
);
21335 assemble_name (data
->file
, buffer
);
21336 putc ('\n', data
->file
);
21338 #ifdef ASM_WEAKEN_DECL
21339 else if (DECL_WEAK (n
->decl
) && !data
->function_descriptor
)
21340 ASM_WEAKEN_DECL (data
->file
, n
->decl
, name
, NULL
);
21347 if (data
->function_descriptor
)
21348 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21349 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21351 if (data
->function_descriptor
)
21353 fputs ("\t.lglobl .", data
->file
);
21354 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21355 putc ('\n', data
->file
);
21357 fputs ("\t.lglobl ", data
->file
);
21358 assemble_name (data
->file
, buffer
);
21359 putc ('\n', data
->file
);
21361 if (data
->function_descriptor
)
21362 putc ('.', data
->file
);
21363 ASM_OUTPUT_LABEL (data
->file
, buffer
);
21368 #ifdef HAVE_GAS_HIDDEN
21369 /* Helper function to calculate visibility of a DECL
21370 and return the value as a const string. */
21372 static const char *
21373 rs6000_xcoff_visibility (tree decl
)
21375 static const char * const visibility_types
[] = {
21376 "", ",protected", ",hidden", ",internal"
21379 enum symbol_visibility vis
= DECL_VISIBILITY (decl
);
21380 return visibility_types
[vis
];
21385 /* This macro produces the initial definition of a function name.
21386 On the RS/6000, we need to place an extra '.' in the function name and
21387 output the function descriptor.
21388 Dollar signs are converted to underscores.
21390 The csect for the function will have already been created when
21391 text_section was selected. We do have to go back to that csect, however.
21393 The third and fourth parameters to the .function pseudo-op (16 and 044)
21394 are placeholders which no longer have any use.
21396 Because AIX assembler's .set command has unexpected semantics, we output
21397 all aliases as alternative labels in front of the definition. */
21400 rs6000_xcoff_declare_function_name (FILE *file
, const char *name
, tree decl
)
21402 char *buffer
= (char *) alloca (strlen (name
) + 1);
21404 int dollar_inside
= 0;
21405 struct declare_alias_data data
= {file
, false};
21407 strcpy (buffer
, name
);
21408 p
= strchr (buffer
, '$');
21412 p
= strchr (p
+ 1, '$');
21414 if (TREE_PUBLIC (decl
))
21416 if (!RS6000_WEAK
|| !DECL_WEAK (decl
))
21418 if (dollar_inside
) {
21419 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21420 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21422 fputs ("\t.globl .", file
);
21423 RS6000_OUTPUT_BASENAME (file
, buffer
);
21424 #ifdef HAVE_GAS_HIDDEN
21425 fputs (rs6000_xcoff_visibility (decl
), file
);
21432 if (dollar_inside
) {
21433 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21434 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21436 fputs ("\t.lglobl .", file
);
21437 RS6000_OUTPUT_BASENAME (file
, buffer
);
21441 fputs ("\t.csect ", file
);
21442 assemble_name (file
, buffer
);
21443 fputs (TARGET_32BIT
? "\n" : ",3\n", file
);
21445 ASM_OUTPUT_LABEL (file
, buffer
);
21447 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21449 fputs (TARGET_32BIT
? "\t.long ." : "\t.llong .", file
);
21450 RS6000_OUTPUT_BASENAME (file
, buffer
);
21451 fputs (", TOC[tc0], 0\n", file
);
21454 switch_to_section (function_section (decl
));
21456 ASM_OUTPUT_LABEL (file
, buffer
);
21458 data
.function_descriptor
= true;
21459 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21461 if (!DECL_IGNORED_P (decl
))
21463 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
21464 xcoffout_declare_function (file
, decl
, buffer
);
21465 else if (dwarf_debuginfo_p ())
21467 name
= (*targetm
.strip_name_encoding
) (name
);
21468 fprintf (file
, "\t.function .%s,.%s,2,0\n", name
, name
);
21475 /* Output assembly language to globalize a symbol from a DECL,
21476 possibly with visibility. */
21479 rs6000_xcoff_asm_globalize_decl_name (FILE *stream
, tree decl
)
21481 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
21482 fputs (GLOBAL_ASM_OP
, stream
);
21483 assemble_name (stream
, name
);
21484 #ifdef HAVE_GAS_HIDDEN
21485 fputs (rs6000_xcoff_visibility (decl
), stream
);
21487 putc ('\n', stream
);
21490 /* Output assembly language to define a symbol as COMMON from a DECL,
21491 possibly with visibility. */
21494 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream
,
21495 tree decl ATTRIBUTE_UNUSED
,
21497 unsigned HOST_WIDE_INT size
,
21498 unsigned int align
)
21500 unsigned int align2
= 2;
21503 align
= DATA_ABI_ALIGNMENT (TREE_TYPE (decl
), DECL_ALIGN (decl
));
21506 align2
= floor_log2 (align
/ BITS_PER_UNIT
);
21510 if (! DECL_COMMON (decl
))
21512 /* Forget section. */
21515 /* Globalize TLS BSS. */
21516 if (TREE_PUBLIC (decl
) && DECL_THREAD_LOCAL_P (decl
))
21518 fputs (GLOBAL_ASM_OP
, stream
);
21519 assemble_name (stream
, name
);
21520 fputc ('\n', stream
);
21523 /* Switch to section and skip space. */
21524 fputs ("\t.csect ", stream
);
21525 assemble_name (stream
, name
);
21526 fprintf (stream
, ",%u\n", align2
);
21527 ASM_DECLARE_OBJECT_NAME (stream
, name
, decl
);
21528 ASM_OUTPUT_SKIP (stream
, size
? size
: 1);
21532 if (TREE_PUBLIC (decl
))
21535 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%u" ,
21536 name
, size
, align2
);
21538 #ifdef HAVE_GAS_HIDDEN
21540 fputs (rs6000_xcoff_visibility (decl
), stream
);
21542 putc ('\n', stream
);
21546 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%s,%u\n",
21547 (*targetm
.strip_name_encoding
) (name
), size
, name
, align2
);
21550 /* This macro produces the initial definition of a object (variable) name.
21551 Because AIX assembler's .set command has unexpected semantics, we output
21552 all aliases as alternative labels in front of the definition. */
21555 rs6000_xcoff_declare_object_name (FILE *file
, const char *name
, tree decl
)
21557 struct declare_alias_data data
= {file
, false};
21558 ASM_OUTPUT_LABEL (file
, name
);
21559 symtab_node::get_create (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21563 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21566 rs6000_asm_output_dwarf_pcrel (FILE *file
, int size
, const char *label
)
21568 fputs (integer_asm_op (size
, FALSE
), file
);
21569 assemble_name (file
, label
);
21570 fputs ("-$", file
);
21573 /* Output a symbol offset relative to the dbase for the current object.
21574 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21577 __gcc_unwind_dbase is embedded in all executables/libraries through
21578 libgcc/config/rs6000/crtdbase.S. */
21581 rs6000_asm_output_dwarf_datarel (FILE *file
, int size
, const char *label
)
21583 fputs (integer_asm_op (size
, FALSE
), file
);
21584 assemble_name (file
, label
);
21585 fputs("-__gcc_unwind_dbase", file
);
21590 rs6000_xcoff_encode_section_info (tree decl
, rtx rtl
, int first
)
21594 const char *symname
;
21596 default_encode_section_info (decl
, rtl
, first
);
21598 /* Careful not to prod global register variables. */
21601 symbol
= XEXP (rtl
, 0);
21602 if (!SYMBOL_REF_P (symbol
))
21605 flags
= SYMBOL_REF_FLAGS (symbol
);
21607 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
21608 flags
&= ~SYMBOL_FLAG_HAS_BLOCK_INFO
;
21610 SYMBOL_REF_FLAGS (symbol
) = flags
;
21612 symname
= XSTR (symbol
, 0);
21614 /* Append CSECT mapping class, unless the symbol already is qualified.
21615 Aliases are implemented as labels, so the symbol name should not add
21616 a mapping class. */
21619 && VAR_OR_FUNCTION_DECL_P (decl
)
21620 && (symtab_node::get (decl
) == NULL
21621 || symtab_node::get (decl
)->alias
== 0)
21622 && symname
[strlen (symname
) - 1] != ']')
21624 const char *smclass
= NULL
;
21626 if (TREE_CODE (decl
) == FUNCTION_DECL
)
21628 else if (DECL_THREAD_LOCAL_P (decl
))
21630 if (bss_initializer_p (decl
))
21632 else if (flag_data_sections
)
21635 else if (DECL_EXTERNAL (decl
))
21637 else if (bss_initializer_p (decl
))
21639 else if (flag_data_sections
)
21641 /* This must exactly match the logic of select section. */
21642 if (decl_readonly_section (decl
, compute_reloc_for_var (decl
)))
21648 if (smclass
!= NULL
)
21650 char *newname
= XALLOCAVEC (char, strlen (symname
) + 5);
21652 strcpy (newname
, symname
);
21653 strcat (newname
, smclass
);
21654 XSTR (symbol
, 0) = ggc_strdup (newname
);
21658 #endif /* HAVE_AS_TLS */
21659 #endif /* TARGET_XCOFF */
21662 rs6000_asm_weaken_decl (FILE *stream
, tree decl
,
21663 const char *name
, const char *val
)
21665 fputs ("\t.weak\t", stream
);
21666 assemble_name (stream
, name
);
21667 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
21668 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
21670 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21672 fputs (rs6000_xcoff_visibility (decl
), stream
);
21674 fputs ("\n\t.weak\t.", stream
);
21675 RS6000_OUTPUT_BASENAME (stream
, name
);
21677 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21679 fputs (rs6000_xcoff_visibility (decl
), stream
);
21681 fputc ('\n', stream
);
21685 #ifdef ASM_OUTPUT_DEF
21686 ASM_OUTPUT_DEF (stream
, name
, val
);
21688 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
21689 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
21691 fputs ("\t.set\t.", stream
);
21692 RS6000_OUTPUT_BASENAME (stream
, name
);
21693 fputs (",.", stream
);
21694 RS6000_OUTPUT_BASENAME (stream
, val
);
21695 fputc ('\n', stream
);
21701 /* Return true if INSN should not be copied. */
21704 rs6000_cannot_copy_insn_p (rtx_insn
*insn
)
21706 return recog_memoized (insn
) >= 0
21707 && get_attr_cannot_copy (insn
);
21710 /* Compute a (partial) cost for rtx X. Return true if the complete
21711 cost has been computed, and false if subexpressions should be
21712 scanned. In either case, *TOTAL contains the cost result. */
21715 rs6000_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
21716 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
21718 int code
= GET_CODE (x
);
21722 /* On the RS/6000, if it is valid in the insn, it is free. */
21724 if (((outer_code
== SET
21725 || outer_code
== PLUS
21726 || outer_code
== MINUS
)
21727 && (satisfies_constraint_I (x
)
21728 || satisfies_constraint_L (x
)))
21729 || (outer_code
== AND
21730 && (satisfies_constraint_K (x
)
21732 ? satisfies_constraint_L (x
)
21733 : satisfies_constraint_J (x
))))
21734 || ((outer_code
== IOR
|| outer_code
== XOR
)
21735 && (satisfies_constraint_K (x
)
21737 ? satisfies_constraint_L (x
)
21738 : satisfies_constraint_J (x
))))
21739 || outer_code
== ASHIFT
21740 || outer_code
== ASHIFTRT
21741 || outer_code
== LSHIFTRT
21742 || outer_code
== ROTATE
21743 || outer_code
== ROTATERT
21744 || outer_code
== ZERO_EXTRACT
21745 || (outer_code
== MULT
21746 && satisfies_constraint_I (x
))
21747 || ((outer_code
== DIV
|| outer_code
== UDIV
21748 || outer_code
== MOD
|| outer_code
== UMOD
)
21749 && exact_log2 (INTVAL (x
)) >= 0)
21750 || (outer_code
== COMPARE
21751 && (satisfies_constraint_I (x
)
21752 || satisfies_constraint_K (x
)))
21753 || ((outer_code
== EQ
|| outer_code
== NE
)
21754 && (satisfies_constraint_I (x
)
21755 || satisfies_constraint_K (x
)
21757 ? satisfies_constraint_L (x
)
21758 : satisfies_constraint_J (x
))))
21759 || (outer_code
== GTU
21760 && satisfies_constraint_I (x
))
21761 || (outer_code
== LTU
21762 && satisfies_constraint_P (x
)))
21767 else if ((outer_code
== PLUS
21768 && reg_or_add_cint_operand (x
, mode
))
21769 || (outer_code
== MINUS
21770 && reg_or_sub_cint_operand (x
, mode
))
21771 || ((outer_code
== SET
21772 || outer_code
== IOR
21773 || outer_code
== XOR
)
21775 & ~ (unsigned HOST_WIDE_INT
) 0xffffffff) == 0))
21777 *total
= COSTS_N_INSNS (1);
21783 case CONST_WIDE_INT
:
21787 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21791 /* When optimizing for size, MEM should be slightly more expensive
21792 than generating address, e.g., (plus (reg) (const)).
21793 L1 cache latency is about two instructions. */
21794 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21795 if (rs6000_slow_unaligned_access (mode
, MEM_ALIGN (x
)))
21796 *total
+= COSTS_N_INSNS (100);
21805 if (FLOAT_MODE_P (mode
))
21806 *total
= rs6000_cost
->fp
;
21808 *total
= COSTS_N_INSNS (1);
21812 if (CONST_INT_P (XEXP (x
, 1))
21813 && satisfies_constraint_I (XEXP (x
, 1)))
21815 if (INTVAL (XEXP (x
, 1)) >= -256
21816 && INTVAL (XEXP (x
, 1)) <= 255)
21817 *total
= rs6000_cost
->mulsi_const9
;
21819 *total
= rs6000_cost
->mulsi_const
;
21821 else if (mode
== SFmode
)
21822 *total
= rs6000_cost
->fp
;
21823 else if (FLOAT_MODE_P (mode
))
21824 *total
= rs6000_cost
->dmul
;
21825 else if (mode
== DImode
)
21826 *total
= rs6000_cost
->muldi
;
21828 *total
= rs6000_cost
->mulsi
;
21832 if (mode
== SFmode
)
21833 *total
= rs6000_cost
->fp
;
21835 *total
= rs6000_cost
->dmul
;
21840 if (FLOAT_MODE_P (mode
))
21842 *total
= mode
== DFmode
? rs6000_cost
->ddiv
21843 : rs6000_cost
->sdiv
;
21850 if (CONST_INT_P (XEXP (x
, 1))
21851 && exact_log2 (INTVAL (XEXP (x
, 1))) >= 0)
21853 if (code
== DIV
|| code
== MOD
)
21855 *total
= COSTS_N_INSNS (2);
21858 *total
= COSTS_N_INSNS (1);
21862 if (GET_MODE (XEXP (x
, 1)) == DImode
)
21863 *total
= rs6000_cost
->divdi
;
21865 *total
= rs6000_cost
->divsi
;
21867 /* Add in shift and subtract for MOD unless we have a mod instruction. */
21868 if (!TARGET_MODULO
&& (code
== MOD
|| code
== UMOD
))
21869 *total
+= COSTS_N_INSNS (2);
21873 *total
= COSTS_N_INSNS (TARGET_CTZ
? 1 : 4);
21877 *total
= COSTS_N_INSNS (4);
21881 *total
= COSTS_N_INSNS (TARGET_POPCNTD
? 1 : 6);
21885 *total
= COSTS_N_INSNS (TARGET_CMPB
? 2 : 6);
21889 if (outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
21892 *total
= COSTS_N_INSNS (1);
21896 if (CONST_INT_P (XEXP (x
, 1)))
21898 rtx left
= XEXP (x
, 0);
21899 rtx_code left_code
= GET_CODE (left
);
21901 /* rotate-and-mask: 1 insn. */
21902 if ((left_code
== ROTATE
21903 || left_code
== ASHIFT
21904 || left_code
== LSHIFTRT
)
21905 && rs6000_is_valid_shift_mask (XEXP (x
, 1), left
, mode
))
21907 *total
= rtx_cost (XEXP (left
, 0), mode
, left_code
, 0, speed
);
21908 if (!CONST_INT_P (XEXP (left
, 1)))
21909 *total
+= rtx_cost (XEXP (left
, 1), SImode
, left_code
, 1, speed
);
21910 *total
+= COSTS_N_INSNS (1);
21914 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
21915 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
21916 if (rs6000_is_valid_and_mask (XEXP (x
, 1), mode
)
21917 || (val
& 0xffff) == val
21918 || (val
& 0xffff0000) == val
21919 || ((val
& 0xffff) == 0 && mode
== SImode
))
21921 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
21922 *total
+= COSTS_N_INSNS (1);
21927 if (rs6000_is_valid_2insn_and (XEXP (x
, 1), mode
))
21929 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
21930 *total
+= COSTS_N_INSNS (2);
21935 *total
= COSTS_N_INSNS (1);
21940 *total
= COSTS_N_INSNS (1);
21946 *total
= COSTS_N_INSNS (1);
21950 /* The EXTSWSLI instruction is a combined instruction. Don't count both
21951 the sign extend and shift separately within the insn. */
21952 if (TARGET_EXTSWSLI
&& mode
== DImode
21953 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
21954 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
)
21965 /* Handle mul_highpart. */
21966 if (outer_code
== TRUNCATE
21967 && GET_CODE (XEXP (x
, 0)) == MULT
)
21969 if (mode
== DImode
)
21970 *total
= rs6000_cost
->muldi
;
21972 *total
= rs6000_cost
->mulsi
;
21975 else if (outer_code
== AND
)
21978 *total
= COSTS_N_INSNS (1);
21983 if (MEM_P (XEXP (x
, 0)))
21986 *total
= COSTS_N_INSNS (1);
21992 if (!FLOAT_MODE_P (mode
))
21994 *total
= COSTS_N_INSNS (1);
22000 case UNSIGNED_FLOAT
:
22003 case FLOAT_TRUNCATE
:
22004 *total
= rs6000_cost
->fp
;
22008 if (mode
== DFmode
)
22009 *total
= rs6000_cost
->sfdf_convert
;
22011 *total
= rs6000_cost
->fp
;
22018 *total
= COSTS_N_INSNS (1);
22021 else if (FLOAT_MODE_P (mode
) && TARGET_PPC_GFXOPT
&& TARGET_HARD_FLOAT
)
22023 *total
= rs6000_cost
->fp
;
22032 /* Carry bit requires mode == Pmode.
22033 NEG or PLUS already counted so only add one. */
22035 && (outer_code
== NEG
|| outer_code
== PLUS
))
22037 *total
= COSTS_N_INSNS (1);
22045 if (outer_code
== SET
)
22047 if (XEXP (x
, 1) == const0_rtx
)
22049 *total
= COSTS_N_INSNS (2);
22054 *total
= COSTS_N_INSNS (3);
22059 if (outer_code
== COMPARE
)
22067 if (XINT (x
, 1) == UNSPECV_MMA_XXSETACCZ
)
22081 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22084 rs6000_debug_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
22085 int opno
, int *total
, bool speed
)
22087 bool ret
= rs6000_rtx_costs (x
, mode
, outer_code
, opno
, total
, speed
);
22090 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22091 "opno = %d, total = %d, speed = %s, x:\n",
22092 ret
? "complete" : "scan inner",
22093 GET_MODE_NAME (mode
),
22094 GET_RTX_NAME (outer_code
),
22097 speed
? "true" : "false");
22105 rs6000_insn_cost (rtx_insn
*insn
, bool speed
)
22107 if (recog_memoized (insn
) < 0)
22110 /* If we are optimizing for size, just use the length. */
22112 return get_attr_length (insn
);
22114 /* Use the cost if provided. */
22115 int cost
= get_attr_cost (insn
);
22119 /* If the insn tells us how many insns there are, use that. Otherwise use
22120 the length/4. Adjust the insn length to remove the extra size that
22121 prefixed instructions take. */
22122 int n
= get_attr_num_insns (insn
);
22125 int length
= get_attr_length (insn
);
22126 if (get_attr_prefixed (insn
) == PREFIXED_YES
)
22129 ADJUST_INSN_LENGTH (insn
, adjust
);
22136 enum attr_type type
= get_attr_type (insn
);
22143 cost
= COSTS_N_INSNS (n
+ 1);
22147 switch (get_attr_size (insn
))
22150 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const9
;
22153 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const
;
22156 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi
;
22159 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->muldi
;
22162 gcc_unreachable ();
22166 switch (get_attr_size (insn
))
22169 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divsi
;
22172 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divdi
;
22175 gcc_unreachable ();
22180 cost
= n
* rs6000_cost
->fp
;
22183 cost
= n
* rs6000_cost
->dmul
;
22186 cost
= n
* rs6000_cost
->sdiv
;
22189 cost
= n
* rs6000_cost
->ddiv
;
22196 cost
= COSTS_N_INSNS (n
+ 2);
22200 cost
= COSTS_N_INSNS (n
);
22206 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22209 rs6000_debug_address_cost (rtx x
, machine_mode mode
,
22210 addr_space_t as
, bool speed
)
22212 int ret
= TARGET_ADDRESS_COST (x
, mode
, as
, speed
);
22214 fprintf (stderr
, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22215 ret
, speed
? "true" : "false");
22222 /* A C expression returning the cost of moving data from a register of class
22223 CLASS1 to one of CLASS2. */
22226 rs6000_register_move_cost (machine_mode mode
,
22227 reg_class_t from
, reg_class_t to
)
22230 reg_class_t rclass
;
22232 if (TARGET_DEBUG_COST
)
22235 /* If we have VSX, we can easily move between FPR or Altivec registers,
22236 otherwise we can only easily move within classes.
22237 Do this first so we give best-case answers for union classes
22238 containing both gprs and vsx regs. */
22239 HARD_REG_SET to_vsx
, from_vsx
;
22240 to_vsx
= reg_class_contents
[to
] & reg_class_contents
[VSX_REGS
];
22241 from_vsx
= reg_class_contents
[from
] & reg_class_contents
[VSX_REGS
];
22242 if (!hard_reg_set_empty_p (to_vsx
)
22243 && !hard_reg_set_empty_p (from_vsx
)
22245 || hard_reg_set_intersect_p (to_vsx
, from_vsx
)))
22247 int reg
= FIRST_FPR_REGNO
;
22249 || (TEST_HARD_REG_BIT (to_vsx
, FIRST_ALTIVEC_REGNO
)
22250 && TEST_HARD_REG_BIT (from_vsx
, FIRST_ALTIVEC_REGNO
)))
22251 reg
= FIRST_ALTIVEC_REGNO
;
22252 ret
= 2 * hard_regno_nregs (reg
, mode
);
22255 /* Moves from/to GENERAL_REGS. */
22256 else if ((rclass
= from
, reg_classes_intersect_p (to
, GENERAL_REGS
))
22257 || (rclass
= to
, reg_classes_intersect_p (from
, GENERAL_REGS
)))
22259 if (rclass
== FLOAT_REGS
|| rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
22261 if (TARGET_DIRECT_MOVE
)
22263 /* Keep the cost for direct moves above that for within
22264 a register class even if the actual processor cost is
22265 comparable. We do this because a direct move insn
22266 can't be a nop, whereas with ideal register
22267 allocation a move within the same class might turn
22268 out to be a nop. */
22269 if (rs6000_tune
== PROCESSOR_POWER9
22270 || rs6000_tune
== PROCESSOR_POWER10
)
22271 ret
= 3 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22273 ret
= 4 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22274 /* SFmode requires a conversion when moving between gprs
22276 if (mode
== SFmode
)
22280 ret
= (rs6000_memory_move_cost (mode
, rclass
, false)
22281 + rs6000_memory_move_cost (mode
, GENERAL_REGS
, false));
22284 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22286 else if (rclass
== CR_REGS
)
22289 /* For those processors that have slow LR/CTR moves, make them more
22290 expensive than memory in order to bias spills to memory .*/
22291 else if ((rs6000_tune
== PROCESSOR_POWER6
22292 || rs6000_tune
== PROCESSOR_POWER7
22293 || rs6000_tune
== PROCESSOR_POWER8
22294 || rs6000_tune
== PROCESSOR_POWER9
)
22295 && reg_class_subset_p (rclass
, SPECIAL_REGS
))
22296 ret
= 6 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22299 /* A move will cost one instruction per GPR moved. */
22300 ret
= 2 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22303 /* Everything else has to go through GENERAL_REGS. */
22305 ret
= (rs6000_register_move_cost (mode
, GENERAL_REGS
, to
)
22306 + rs6000_register_move_cost (mode
, from
, GENERAL_REGS
));
22308 if (TARGET_DEBUG_COST
)
22310 if (dbg_cost_ctrl
== 1)
22312 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22313 ret
, GET_MODE_NAME (mode
), reg_class_names
[from
],
22314 reg_class_names
[to
]);
22321 /* A C expressions returning the cost of moving data of MODE from a register to
22325 rs6000_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
22326 bool in ATTRIBUTE_UNUSED
)
22330 if (TARGET_DEBUG_COST
)
22333 if (reg_classes_intersect_p (rclass
, GENERAL_REGS
))
22334 ret
= 4 * hard_regno_nregs (0, mode
);
22335 else if ((reg_classes_intersect_p (rclass
, FLOAT_REGS
)
22336 || reg_classes_intersect_p (rclass
, VSX_REGS
)))
22337 ret
= 4 * hard_regno_nregs (32, mode
);
22338 else if (reg_classes_intersect_p (rclass
, ALTIVEC_REGS
))
22339 ret
= 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO
, mode
);
22341 ret
= 4 + rs6000_register_move_cost (mode
, rclass
, GENERAL_REGS
);
22343 if (TARGET_DEBUG_COST
)
22345 if (dbg_cost_ctrl
== 1)
22347 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22348 ret
, GET_MODE_NAME (mode
), reg_class_names
[rclass
], in
);
22355 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22357 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22358 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22359 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22360 move cost between GENERAL_REGS and VSX_REGS low.
22362 It might seem reasonable to use a union class. After all, if usage
22363 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22364 rather than memory. However, in cases where register pressure of
22365 both is high, like the cactus_adm spec test, allowing
22366 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22367 the first scheduling pass. This is partly due to an allocno of
22368 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22369 class, which gives too high a pressure for GENERAL_REGS and too low
22370 for VSX_REGS. So, force a choice of the subclass here.
22372 The best class is also the union if GENERAL_REGS and VSX_REGS have
22373 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22374 allocno class, since trying to narrow down the class by regno mode
22375 is prone to error. For example, SImode is allowed in VSX regs and
22376 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22377 it would be wrong to choose an allocno of GENERAL_REGS based on
22381 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED
,
22382 reg_class_t allocno_class
,
22383 reg_class_t best_class
)
22385 switch (allocno_class
)
22387 case GEN_OR_VSX_REGS
:
22388 /* best_class must be a subset of allocno_class. */
22389 gcc_checking_assert (best_class
== GEN_OR_VSX_REGS
22390 || best_class
== GEN_OR_FLOAT_REGS
22391 || best_class
== VSX_REGS
22392 || best_class
== ALTIVEC_REGS
22393 || best_class
== FLOAT_REGS
22394 || best_class
== GENERAL_REGS
22395 || best_class
== BASE_REGS
);
22396 /* Use best_class but choose wider classes when copying from the
22397 wider class to best_class is cheap. This mimics IRA choice
22398 of allocno class. */
22399 if (best_class
== BASE_REGS
)
22400 return GENERAL_REGS
;
22401 if (TARGET_VSX
&& best_class
== FLOAT_REGS
)
22406 if (best_class
== ALTIVEC_REGS
)
22407 return ALTIVEC_REGS
;
22413 return allocno_class
;
22416 /* Load up a constant. If the mode is a vector mode, splat the value across
22417 all of the vector elements. */
22420 rs6000_load_constant_and_splat (machine_mode mode
, REAL_VALUE_TYPE dconst
)
22424 if (mode
== SFmode
|| mode
== DFmode
)
22426 rtx d
= const_double_from_real_value (dconst
, mode
);
22427 reg
= force_reg (mode
, d
);
22429 else if (mode
== V4SFmode
)
22431 rtx d
= const_double_from_real_value (dconst
, SFmode
);
22432 rtvec v
= gen_rtvec (4, d
, d
, d
, d
);
22433 reg
= gen_reg_rtx (mode
);
22434 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22436 else if (mode
== V2DFmode
)
22438 rtx d
= const_double_from_real_value (dconst
, DFmode
);
22439 rtvec v
= gen_rtvec (2, d
, d
);
22440 reg
= gen_reg_rtx (mode
);
22441 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22444 gcc_unreachable ();
22449 /* Generate an FMA instruction. */
22452 rs6000_emit_madd (rtx target
, rtx m1
, rtx m2
, rtx a
)
22454 machine_mode mode
= GET_MODE (target
);
22457 dst
= expand_ternary_op (mode
, fma_optab
, m1
, m2
, a
, target
, 0);
22458 gcc_assert (dst
!= NULL
);
22461 emit_move_insn (target
, dst
);
22464 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22467 rs6000_emit_nmsub (rtx dst
, rtx m1
, rtx m2
, rtx a
)
22469 machine_mode mode
= GET_MODE (dst
);
22472 /* This is a tad more complicated, since the fnma_optab is for
22473 a different expression: fma(-m1, m2, a), which is the same
22474 thing except in the case of signed zeros.
22476 Fortunately we know that if FMA is supported that FNMSUB is
22477 also supported in the ISA. Just expand it directly. */
22479 gcc_assert (optab_handler (fma_optab
, mode
) != CODE_FOR_nothing
);
22481 r
= gen_rtx_NEG (mode
, a
);
22482 r
= gen_rtx_FMA (mode
, m1
, m2
, r
);
22483 r
= gen_rtx_NEG (mode
, r
);
22484 emit_insn (gen_rtx_SET (dst
, r
));
22487 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22488 add a reg_note saying that this was a division. Support both scalar and
22489 vector divide. Assumes no trapping math and finite arguments. */
22492 rs6000_emit_swdiv (rtx dst
, rtx n
, rtx d
, bool note_p
)
22494 machine_mode mode
= GET_MODE (dst
);
22495 rtx one
, x0
, e0
, x1
, xprev
, eprev
, xnext
, enext
, u
, v
;
22498 /* Low precision estimates guarantee 5 bits of accuracy. High
22499 precision estimates guarantee 14 bits of accuracy. SFmode
22500 requires 23 bits of accuracy. DFmode requires 52 bits of
22501 accuracy. Each pass at least doubles the accuracy, leading
22502 to the following. */
22503 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
22504 if (mode
== DFmode
|| mode
== V2DFmode
)
22507 enum insn_code code
= optab_handler (smul_optab
, mode
);
22508 insn_gen_fn gen_mul
= GEN_FCN (code
);
22510 gcc_assert (code
!= CODE_FOR_nothing
);
22512 one
= rs6000_load_constant_and_splat (mode
, dconst1
);
22514 /* x0 = 1./d estimate */
22515 x0
= gen_reg_rtx (mode
);
22516 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, d
),
22519 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22522 /* e0 = 1. - d * x0 */
22523 e0
= gen_reg_rtx (mode
);
22524 rs6000_emit_nmsub (e0
, d
, x0
, one
);
22526 /* x1 = x0 + e0 * x0 */
22527 x1
= gen_reg_rtx (mode
);
22528 rs6000_emit_madd (x1
, e0
, x0
, x0
);
22530 for (i
= 0, xprev
= x1
, eprev
= e0
; i
< passes
- 2;
22531 ++i
, xprev
= xnext
, eprev
= enext
) {
22533 /* enext = eprev * eprev */
22534 enext
= gen_reg_rtx (mode
);
22535 emit_insn (gen_mul (enext
, eprev
, eprev
));
22537 /* xnext = xprev + enext * xprev */
22538 xnext
= gen_reg_rtx (mode
);
22539 rs6000_emit_madd (xnext
, enext
, xprev
, xprev
);
22545 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22547 /* u = n * xprev */
22548 u
= gen_reg_rtx (mode
);
22549 emit_insn (gen_mul (u
, n
, xprev
));
22551 /* v = n - (d * u) */
22552 v
= gen_reg_rtx (mode
);
22553 rs6000_emit_nmsub (v
, d
, u
, n
);
22555 /* dst = (v * xprev) + u */
22556 rs6000_emit_madd (dst
, v
, xprev
, u
);
22559 add_reg_note (get_last_insn (), REG_EQUAL
, gen_rtx_DIV (mode
, n
, d
));
22562 /* Goldschmidt's Algorithm for single/double-precision floating point
22563 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22566 rs6000_emit_swsqrt (rtx dst
, rtx src
, bool recip
)
22568 machine_mode mode
= GET_MODE (src
);
22569 rtx e
= gen_reg_rtx (mode
);
22570 rtx g
= gen_reg_rtx (mode
);
22571 rtx h
= gen_reg_rtx (mode
);
22573 /* Low precision estimates guarantee 5 bits of accuracy. High
22574 precision estimates guarantee 14 bits of accuracy. SFmode
22575 requires 23 bits of accuracy. DFmode requires 52 bits of
22576 accuracy. Each pass at least doubles the accuracy, leading
22577 to the following. */
22578 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
22579 if (mode
== DFmode
|| mode
== V2DFmode
)
22584 enum insn_code code
= optab_handler (smul_optab
, mode
);
22585 insn_gen_fn gen_mul
= GEN_FCN (code
);
22587 gcc_assert (code
!= CODE_FOR_nothing
);
22589 mhalf
= rs6000_load_constant_and_splat (mode
, dconsthalf
);
22591 /* e = rsqrt estimate */
22592 emit_insn (gen_rtx_SET (e
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, src
),
22595 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22598 rtx zero
= force_reg (mode
, CONST0_RTX (mode
));
22600 if (mode
== SFmode
)
22602 rtx target
= emit_conditional_move (e
, { GT
, src
, zero
, mode
},
22605 emit_move_insn (e
, target
);
22609 rtx cond
= gen_rtx_GT (VOIDmode
, e
, zero
);
22610 rs6000_emit_vector_cond_expr (e
, e
, zero
, cond
, src
, zero
);
22614 /* g = sqrt estimate. */
22615 emit_insn (gen_mul (g
, e
, src
));
22616 /* h = 1/(2*sqrt) estimate. */
22617 emit_insn (gen_mul (h
, e
, mhalf
));
22623 rtx t
= gen_reg_rtx (mode
);
22624 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
22625 /* Apply correction directly to 1/rsqrt estimate. */
22626 rs6000_emit_madd (dst
, e
, t
, e
);
22630 for (i
= 0; i
< passes
; i
++)
22632 rtx t1
= gen_reg_rtx (mode
);
22633 rtx g1
= gen_reg_rtx (mode
);
22634 rtx h1
= gen_reg_rtx (mode
);
22636 rs6000_emit_nmsub (t1
, g
, h
, mhalf
);
22637 rs6000_emit_madd (g1
, g
, t1
, g
);
22638 rs6000_emit_madd (h1
, h
, t1
, h
);
22643 /* Multiply by 2 for 1/rsqrt. */
22644 emit_insn (gen_add3_insn (dst
, h
, h
));
22649 rtx t
= gen_reg_rtx (mode
);
22650 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
22651 rs6000_emit_madd (dst
, g
, t
, g
);
22657 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22658 (Power7) targets. DST is the target, and SRC is the argument operand. */
22661 rs6000_emit_popcount (rtx dst
, rtx src
)
22663 machine_mode mode
= GET_MODE (dst
);
22666 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
22667 if (TARGET_POPCNTD
)
22669 if (mode
== SImode
)
22670 emit_insn (gen_popcntdsi2 (dst
, src
));
22672 emit_insn (gen_popcntddi2 (dst
, src
));
22676 tmp1
= gen_reg_rtx (mode
);
22678 if (mode
== SImode
)
22680 emit_insn (gen_popcntbsi2 (tmp1
, src
));
22681 tmp2
= expand_mult (SImode
, tmp1
, GEN_INT (0x01010101),
22683 tmp2
= force_reg (SImode
, tmp2
);
22684 emit_insn (gen_lshrsi3 (dst
, tmp2
, GEN_INT (24)));
22688 emit_insn (gen_popcntbdi2 (tmp1
, src
));
22689 tmp2
= expand_mult (DImode
, tmp1
,
22690 GEN_INT ((HOST_WIDE_INT
)
22691 0x01010101 << 32 | 0x01010101),
22693 tmp2
= force_reg (DImode
, tmp2
);
22694 emit_insn (gen_lshrdi3 (dst
, tmp2
, GEN_INT (56)));
22699 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
22700 target, and SRC is the argument operand. */
22703 rs6000_emit_parity (rtx dst
, rtx src
)
22705 machine_mode mode
= GET_MODE (dst
);
22708 tmp
= gen_reg_rtx (mode
);
22710 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
22713 if (mode
== SImode
)
22715 emit_insn (gen_popcntbsi2 (tmp
, src
));
22716 emit_insn (gen_paritysi2_cmpb (dst
, tmp
));
22720 emit_insn (gen_popcntbdi2 (tmp
, src
));
22721 emit_insn (gen_paritydi2_cmpb (dst
, tmp
));
22726 if (mode
== SImode
)
22728 /* Is mult+shift >= shift+xor+shift+xor? */
22729 if (rs6000_cost
->mulsi_const
>= COSTS_N_INSNS (3))
22731 rtx tmp1
, tmp2
, tmp3
, tmp4
;
22733 tmp1
= gen_reg_rtx (SImode
);
22734 emit_insn (gen_popcntbsi2 (tmp1
, src
));
22736 tmp2
= gen_reg_rtx (SImode
);
22737 emit_insn (gen_lshrsi3 (tmp2
, tmp1
, GEN_INT (16)));
22738 tmp3
= gen_reg_rtx (SImode
);
22739 emit_insn (gen_xorsi3 (tmp3
, tmp1
, tmp2
));
22741 tmp4
= gen_reg_rtx (SImode
);
22742 emit_insn (gen_lshrsi3 (tmp4
, tmp3
, GEN_INT (8)));
22743 emit_insn (gen_xorsi3 (tmp
, tmp3
, tmp4
));
22746 rs6000_emit_popcount (tmp
, src
);
22747 emit_insn (gen_andsi3 (dst
, tmp
, const1_rtx
));
22751 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
22752 if (rs6000_cost
->muldi
>= COSTS_N_INSNS (5))
22754 rtx tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
;
22756 tmp1
= gen_reg_rtx (DImode
);
22757 emit_insn (gen_popcntbdi2 (tmp1
, src
));
22759 tmp2
= gen_reg_rtx (DImode
);
22760 emit_insn (gen_lshrdi3 (tmp2
, tmp1
, GEN_INT (32)));
22761 tmp3
= gen_reg_rtx (DImode
);
22762 emit_insn (gen_xordi3 (tmp3
, tmp1
, tmp2
));
22764 tmp4
= gen_reg_rtx (DImode
);
22765 emit_insn (gen_lshrdi3 (tmp4
, tmp3
, GEN_INT (16)));
22766 tmp5
= gen_reg_rtx (DImode
);
22767 emit_insn (gen_xordi3 (tmp5
, tmp3
, tmp4
));
22769 tmp6
= gen_reg_rtx (DImode
);
22770 emit_insn (gen_lshrdi3 (tmp6
, tmp5
, GEN_INT (8)));
22771 emit_insn (gen_xordi3 (tmp
, tmp5
, tmp6
));
22774 rs6000_emit_popcount (tmp
, src
);
22775 emit_insn (gen_anddi3 (dst
, tmp
, const1_rtx
));
22779 /* Expand an Altivec constant permutation for little endian mode.
22780 OP0 and OP1 are the input vectors and TARGET is the output vector.
22781 SEL specifies the constant permutation vector.
22783 There are two issues: First, the two input operands must be
22784 swapped so that together they form a double-wide array in LE
22785 order. Second, the vperm instruction has surprising behavior
22786 in LE mode: it interprets the elements of the source vectors
22787 in BE mode ("left to right") and interprets the elements of
22788 the destination vector in LE mode ("right to left"). To
22789 correct for this, we must subtract each element of the permute
22790 control vector from 31.
22792 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
22793 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
22794 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
22795 serve as the permute control vector. Then, in BE mode,
22799 places the desired result in vr9. However, in LE mode the
22800 vector contents will be
22802 vr10 = 00000003 00000002 00000001 00000000
22803 vr11 = 00000007 00000006 00000005 00000004
22805 The result of the vperm using the same permute control vector is
22807 vr9 = 05000000 07000000 01000000 03000000
22809 That is, the leftmost 4 bytes of vr10 are interpreted as the
22810 source for the rightmost 4 bytes of vr9, and so on.
22812 If we change the permute control vector to
22814 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
22822 vr9 = 00000006 00000004 00000002 00000000. */
22825 altivec_expand_vec_perm_const_le (rtx target
, rtx op0
, rtx op1
,
22826 const vec_perm_indices
&sel
)
22830 rtx constv
, unspec
;
22832 /* Unpack and adjust the constant selector. */
22833 for (i
= 0; i
< 16; ++i
)
22835 unsigned int elt
= 31 - (sel
[i
] & 31);
22836 perm
[i
] = GEN_INT (elt
);
22839 /* Expand to a permute, swapping the inputs and using the
22840 adjusted selector. */
22842 op0
= force_reg (V16QImode
, op0
);
22844 op1
= force_reg (V16QImode
, op1
);
22846 constv
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
));
22847 constv
= force_reg (V16QImode
, constv
);
22848 unspec
= gen_rtx_UNSPEC (V16QImode
, gen_rtvec (3, op1
, op0
, constv
),
22850 if (!REG_P (target
))
22852 rtx tmp
= gen_reg_rtx (V16QImode
);
22853 emit_move_insn (tmp
, unspec
);
22857 emit_move_insn (target
, unspec
);
22860 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
22861 permute control vector. But here it's not a constant, so we must
22862 generate a vector NAND or NOR to do the adjustment. */
22865 altivec_expand_vec_perm_le (rtx operands
[4])
22867 rtx notx
, iorx
, unspec
;
22868 rtx target
= operands
[0];
22869 rtx op0
= operands
[1];
22870 rtx op1
= operands
[2];
22871 rtx sel
= operands
[3];
22873 rtx norreg
= gen_reg_rtx (V16QImode
);
22874 machine_mode mode
= GET_MODE (target
);
22876 /* Get everything in regs so the pattern matches. */
22878 op0
= force_reg (mode
, op0
);
22880 op1
= force_reg (mode
, op1
);
22882 sel
= force_reg (V16QImode
, sel
);
22883 if (!REG_P (target
))
22884 tmp
= gen_reg_rtx (mode
);
22886 if (TARGET_P9_VECTOR
)
22888 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, sel
),
22893 /* Invert the selector with a VNAND if available, else a VNOR.
22894 The VNAND is preferred for future fusion opportunities. */
22895 notx
= gen_rtx_NOT (V16QImode
, sel
);
22896 iorx
= (TARGET_P8_VECTOR
22897 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
22898 : gen_rtx_AND (V16QImode
, notx
, notx
));
22899 emit_insn (gen_rtx_SET (norreg
, iorx
));
22901 /* Permute with operands reversed and adjusted selector. */
22902 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, norreg
),
22906 /* Copy into target, possibly by way of a register. */
22907 if (!REG_P (target
))
22909 emit_move_insn (tmp
, unspec
);
22913 emit_move_insn (target
, unspec
);
22916 /* Expand an Altivec constant permutation. Return true if we match
22917 an efficient implementation; false to fall back to VPERM.
22919 OP0 and OP1 are the input vectors and TARGET is the output vector.
22920 SEL specifies the constant permutation vector. */
22923 altivec_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
,
22924 const vec_perm_indices
&sel
)
22926 struct altivec_perm_insn
{
22927 HOST_WIDE_INT mask
;
22928 enum insn_code impl
;
22929 unsigned char perm
[16];
22931 static const struct altivec_perm_insn patterns
[] = {
22932 {OPTION_MASK_ALTIVEC
,
22933 CODE_FOR_altivec_vpkuhum_direct
,
22934 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
22935 {OPTION_MASK_ALTIVEC
,
22936 CODE_FOR_altivec_vpkuwum_direct
,
22937 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
22938 {OPTION_MASK_ALTIVEC
,
22939 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghb_direct
22940 : CODE_FOR_altivec_vmrglb_direct
,
22941 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
22942 {OPTION_MASK_ALTIVEC
,
22943 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghh_direct
22944 : CODE_FOR_altivec_vmrglh_direct
,
22945 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
22946 {OPTION_MASK_ALTIVEC
,
22947 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghw_direct_v4si
22948 : CODE_FOR_altivec_vmrglw_direct_v4si
,
22949 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
22950 {OPTION_MASK_ALTIVEC
,
22951 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglb_direct
22952 : CODE_FOR_altivec_vmrghb_direct
,
22953 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
22954 {OPTION_MASK_ALTIVEC
,
22955 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglh_direct
22956 : CODE_FOR_altivec_vmrghh_direct
,
22957 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
22958 {OPTION_MASK_ALTIVEC
,
22959 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglw_direct_v4si
22960 : CODE_FOR_altivec_vmrghw_direct_v4si
,
22961 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
22962 {OPTION_MASK_P8_VECTOR
,
22963 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgew_v4sf_direct
22964 : CODE_FOR_p8_vmrgow_v4sf_direct
,
22965 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
22966 {OPTION_MASK_P8_VECTOR
,
22967 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgow_v4sf_direct
22968 : CODE_FOR_p8_vmrgew_v4sf_direct
,
22969 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
22970 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
22971 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
22972 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
22973 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
22974 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
22975 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
22976 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
22977 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
22979 unsigned int i
, j
, elt
, which
;
22980 unsigned char perm
[16];
22984 /* Unpack the constant selector. */
22985 for (i
= which
= 0; i
< 16; ++i
)
22988 which
|= (elt
< 16 ? 1 : 2);
22992 /* Simplify the constant selector based on operands. */
22996 gcc_unreachable ();
23000 if (!rtx_equal_p (op0
, op1
))
23005 for (i
= 0; i
< 16; ++i
)
23017 /* Look for splat patterns. */
23022 for (i
= 0; i
< 16; ++i
)
23023 if (perm
[i
] != elt
)
23027 if (!BYTES_BIG_ENDIAN
)
23029 emit_insn (gen_altivec_vspltb_direct (target
, op0
, GEN_INT (elt
)));
23035 for (i
= 0; i
< 16; i
+= 2)
23036 if (perm
[i
] != elt
|| perm
[i
+ 1] != elt
+ 1)
23040 int field
= BYTES_BIG_ENDIAN
? elt
/ 2 : 7 - elt
/ 2;
23041 x
= gen_reg_rtx (V8HImode
);
23042 emit_insn (gen_altivec_vsplth_direct (x
, gen_lowpart (V8HImode
, op0
),
23044 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23051 for (i
= 0; i
< 16; i
+= 4)
23053 || perm
[i
+ 1] != elt
+ 1
23054 || perm
[i
+ 2] != elt
+ 2
23055 || perm
[i
+ 3] != elt
+ 3)
23059 int field
= BYTES_BIG_ENDIAN
? elt
/ 4 : 3 - elt
/ 4;
23060 x
= gen_reg_rtx (V4SImode
);
23061 emit_insn (gen_altivec_vspltw_direct (x
, gen_lowpart (V4SImode
, op0
),
23063 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23069 /* Look for merge and pack patterns. */
23070 for (j
= 0; j
< ARRAY_SIZE (patterns
); ++j
)
23074 if ((patterns
[j
].mask
& rs6000_isa_flags
) == 0)
23077 elt
= patterns
[j
].perm
[0];
23078 if (perm
[0] == elt
)
23080 else if (perm
[0] == elt
+ 16)
23084 for (i
= 1; i
< 16; ++i
)
23086 elt
= patterns
[j
].perm
[i
];
23088 elt
= (elt
>= 16 ? elt
- 16 : elt
+ 16);
23089 else if (one_vec
&& elt
>= 16)
23091 if (perm
[i
] != elt
)
23096 enum insn_code icode
= patterns
[j
].impl
;
23097 machine_mode omode
= insn_data
[icode
].operand
[0].mode
;
23098 machine_mode imode
= insn_data
[icode
].operand
[1].mode
;
23100 rtx perm_idx
= GEN_INT (0);
23101 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23118 perm_idx
= GEN_INT (perm_val
);
23121 /* For little-endian, don't use vpkuwum and vpkuhum if the
23122 underlying vector type is not V4SI and V8HI, respectively.
23123 For example, using vpkuwum with a V8HI picks up the even
23124 halfwords (BE numbering) when the even halfwords (LE
23125 numbering) are what we need. */
23126 if (!BYTES_BIG_ENDIAN
23127 && icode
== CODE_FOR_altivec_vpkuwum_direct
23129 && GET_MODE (op0
) != V4SImode
)
23131 && GET_MODE (XEXP (op0
, 0)) != V4SImode
)))
23133 if (!BYTES_BIG_ENDIAN
23134 && icode
== CODE_FOR_altivec_vpkuhum_direct
23136 && GET_MODE (op0
) != V8HImode
)
23138 && GET_MODE (XEXP (op0
, 0)) != V8HImode
)))
23141 /* For little-endian, the two input operands must be swapped
23142 (or swapped back) to ensure proper right-to-left numbering
23144 if (swapped
^ !BYTES_BIG_ENDIAN
23145 && icode
!= CODE_FOR_vsx_xxpermdi_v16qi
)
23146 std::swap (op0
, op1
);
23147 if (imode
!= V16QImode
)
23149 op0
= gen_lowpart (imode
, op0
);
23150 op1
= gen_lowpart (imode
, op1
);
23152 if (omode
== V16QImode
)
23155 x
= gen_reg_rtx (omode
);
23156 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23157 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
, perm_idx
));
23159 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
));
23160 if (omode
!= V16QImode
)
23161 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23166 if (!BYTES_BIG_ENDIAN
)
23168 altivec_expand_vec_perm_const_le (target
, op0
, op1
, sel
);
23175 /* Expand a VSX Permute Doubleword constant permutation.
23176 Return true if we match an efficient implementation. */
23179 rs6000_expand_vec_perm_const_1 (rtx target
, rtx op0
, rtx op1
,
23180 unsigned char perm0
, unsigned char perm1
)
23184 /* If both selectors come from the same operand, fold to single op. */
23185 if ((perm0
& 2) == (perm1
& 2))
23192 /* If both operands are equal, fold to simpler permutation. */
23193 if (rtx_equal_p (op0
, op1
))
23196 perm1
= (perm1
& 1) + 2;
23198 /* If the first selector comes from the second operand, swap. */
23199 else if (perm0
& 2)
23205 std::swap (op0
, op1
);
23207 /* If the second selector does not come from the second operand, fail. */
23208 else if ((perm1
& 2) == 0)
23212 if (target
!= NULL
)
23214 machine_mode vmode
, dmode
;
23217 vmode
= GET_MODE (target
);
23218 gcc_assert (GET_MODE_NUNITS (vmode
) == 2);
23219 dmode
= mode_for_vector (GET_MODE_INNER (vmode
), 4).require ();
23220 x
= gen_rtx_VEC_CONCAT (dmode
, op0
, op1
);
23221 v
= gen_rtvec (2, GEN_INT (perm0
), GEN_INT (perm1
));
23222 x
= gen_rtx_VEC_SELECT (vmode
, x
, gen_rtx_PARALLEL (VOIDmode
, v
));
23223 emit_insn (gen_rtx_SET (target
, x
));
23228 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23231 rs6000_vectorize_vec_perm_const (machine_mode vmode
, rtx target
, rtx op0
,
23232 rtx op1
, const vec_perm_indices
&sel
)
23234 bool testing_p
= !target
;
23236 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23237 if (TARGET_ALTIVEC
&& testing_p
)
23242 rtx nop0
= force_reg (vmode
, op0
);
23248 op1
= force_reg (vmode
, op1
);
23250 /* Check for ps_merge* or xxpermdi insns. */
23251 if ((vmode
== V2DFmode
|| vmode
== V2DImode
) && VECTOR_MEM_VSX_P (vmode
))
23255 op0
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 1);
23256 op1
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 2);
23258 if (rs6000_expand_vec_perm_const_1 (target
, op0
, op1
, sel
[0], sel
[1]))
23262 if (TARGET_ALTIVEC
)
23264 /* Force the target-independent code to lower to V16QImode. */
23265 if (vmode
!= V16QImode
)
23267 if (altivec_expand_vec_perm_const (target
, op0
, op1
, sel
))
23274 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23275 OP0 and OP1 are the input vectors and TARGET is the output vector.
23276 PERM specifies the constant permutation vector. */
23279 rs6000_do_expand_vec_perm (rtx target
, rtx op0
, rtx op1
,
23280 machine_mode vmode
, const vec_perm_builder
&perm
)
23282 rtx x
= expand_vec_perm_const (vmode
, op0
, op1
, perm
, BLKmode
, target
);
23284 emit_move_insn (target
, x
);
23287 /* Expand an extract even operation. */
23290 rs6000_expand_extract_even (rtx target
, rtx op0
, rtx op1
)
23292 machine_mode vmode
= GET_MODE (target
);
23293 unsigned i
, nelt
= GET_MODE_NUNITS (vmode
);
23294 vec_perm_builder
perm (nelt
, nelt
, 1);
23296 for (i
= 0; i
< nelt
; i
++)
23297 perm
.quick_push (i
* 2);
23299 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23302 /* Expand a vector interleave operation. */
23305 rs6000_expand_interleave (rtx target
, rtx op0
, rtx op1
, bool highp
)
23307 machine_mode vmode
= GET_MODE (target
);
23308 unsigned i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
23309 vec_perm_builder
perm (nelt
, nelt
, 1);
23311 high
= (highp
? 0 : nelt
/ 2);
23312 for (i
= 0; i
< nelt
/ 2; i
++)
23314 perm
.quick_push (i
+ high
);
23315 perm
.quick_push (i
+ nelt
+ high
);
23318 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23321 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23323 rs6000_scale_v2df (rtx tgt
, rtx src
, int scale
)
23325 HOST_WIDE_INT
hwi_scale (scale
);
23326 REAL_VALUE_TYPE r_pow
;
23327 rtvec v
= rtvec_alloc (2);
23329 rtx scale_vec
= gen_reg_rtx (V2DFmode
);
23330 (void)real_powi (&r_pow
, DFmode
, &dconst2
, hwi_scale
);
23331 elt
= const_double_from_real_value (r_pow
, DFmode
);
23332 RTVEC_ELT (v
, 0) = elt
;
23333 RTVEC_ELT (v
, 1) = elt
;
23334 rs6000_expand_vector_init (scale_vec
, gen_rtx_PARALLEL (V2DFmode
, v
));
23335 emit_insn (gen_mulv2df3 (tgt
, src
, scale_vec
));
23338 /* Return an RTX representing where to find the function value of a
23339 function returning MODE. */
23341 rs6000_complex_function_value (machine_mode mode
)
23343 unsigned int regno
;
23345 machine_mode inner
= GET_MODE_INNER (mode
);
23346 unsigned int inner_bytes
= GET_MODE_UNIT_SIZE (mode
);
23348 if (TARGET_FLOAT128_TYPE
23350 || (mode
== TCmode
&& TARGET_IEEEQUAD
)))
23351 regno
= ALTIVEC_ARG_RETURN
;
23353 else if (FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23354 regno
= FP_ARG_RETURN
;
23358 regno
= GP_ARG_RETURN
;
23360 /* 32-bit is OK since it'll go in r3/r4. */
23361 if (TARGET_32BIT
&& inner_bytes
>= 4)
23362 return gen_rtx_REG (mode
, regno
);
23365 if (inner_bytes
>= 8)
23366 return gen_rtx_REG (mode
, regno
);
23368 r1
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
),
23370 r2
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
+ 1),
23371 GEN_INT (inner_bytes
));
23372 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
23375 /* Return an rtx describing a return value of MODE as a PARALLEL
23376 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23377 stride REG_STRIDE. */
23380 rs6000_parallel_return (machine_mode mode
,
23381 int n_elts
, machine_mode elt_mode
,
23382 unsigned int regno
, unsigned int reg_stride
)
23384 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
23387 for (i
= 0; i
< n_elts
; i
++)
23389 rtx r
= gen_rtx_REG (elt_mode
, regno
);
23390 rtx off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
23391 XVECEXP (par
, 0, i
) = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
23392 regno
+= reg_stride
;
23398 /* Target hook for TARGET_FUNCTION_VALUE.
23400 An integer value is in r3 and a floating-point value is in fp1,
23401 unless -msoft-float. */
23404 rs6000_function_value (const_tree valtype
,
23405 const_tree fn_decl_or_type ATTRIBUTE_UNUSED
,
23406 bool outgoing ATTRIBUTE_UNUSED
)
23409 unsigned int regno
;
23410 machine_mode elt_mode
;
23413 /* Special handling for structs in darwin64. */
23415 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype
), valtype
))
23417 CUMULATIVE_ARGS valcum
;
23421 valcum
.fregno
= FP_ARG_MIN_REG
;
23422 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
23423 /* Do a trial code generation as if this were going to be passed as
23424 an argument; if any part goes in memory, we return NULL. */
23425 valret
= rs6000_darwin64_record_arg (&valcum
, valtype
, true, /* retval= */ true);
23428 /* Otherwise fall through to standard ABI rules. */
23431 mode
= TYPE_MODE (valtype
);
23433 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23434 if (rs6000_discover_homogeneous_aggregate (mode
, valtype
, &elt_mode
, &n_elts
))
23436 int first_reg
, n_regs
;
23438 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode
))
23440 /* _Decimal128 must use even/odd register pairs. */
23441 first_reg
= (elt_mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23442 n_regs
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
23446 first_reg
= ALTIVEC_ARG_RETURN
;
23450 return rs6000_parallel_return (mode
, n_elts
, elt_mode
, first_reg
, n_regs
);
23453 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23454 if (TARGET_32BIT
&& TARGET_POWERPC64
)
23463 int count
= GET_MODE_SIZE (mode
) / 4;
23464 return rs6000_parallel_return (mode
, count
, SImode
, GP_ARG_RETURN
, 1);
23467 if ((INTEGRAL_TYPE_P (valtype
)
23468 && GET_MODE_BITSIZE (mode
) < (TARGET_32BIT
? 32 : 64))
23469 || POINTER_TYPE_P (valtype
))
23470 mode
= TARGET_32BIT
? SImode
: DImode
;
23472 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23473 /* _Decimal128 must use an even/odd register pair. */
23474 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23475 else if (SCALAR_FLOAT_TYPE_P (valtype
) && TARGET_HARD_FLOAT
23476 && !FLOAT128_VECTOR_P (mode
))
23477 regno
= FP_ARG_RETURN
;
23478 else if (TREE_CODE (valtype
) == COMPLEX_TYPE
23479 && targetm
.calls
.split_complex_arg
)
23480 return rs6000_complex_function_value (mode
);
23481 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23482 return register is used in both cases, and we won't see V2DImode/V2DFmode
23483 for pure altivec, combine the two cases. */
23484 else if ((TREE_CODE (valtype
) == VECTOR_TYPE
|| VECTOR_ALIGNMENT_P (mode
))
23485 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
23486 && ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
23487 regno
= ALTIVEC_ARG_RETURN
;
23489 regno
= GP_ARG_RETURN
;
23491 return gen_rtx_REG (mode
, regno
);
23494 /* Define how to find the value returned by a library function
23495 assuming the value has mode MODE. */
23497 rs6000_libcall_value (machine_mode mode
)
23499 unsigned int regno
;
23501 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23502 if (TARGET_32BIT
&& TARGET_POWERPC64
&& mode
== DImode
)
23503 return rs6000_parallel_return (mode
, 2, SImode
, GP_ARG_RETURN
, 1);
23505 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23506 /* _Decimal128 must use an even/odd register pair. */
23507 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23508 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && TARGET_HARD_FLOAT
)
23509 regno
= FP_ARG_RETURN
;
23510 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23511 return register is used in both cases, and we won't see V2DImode/V2DFmode
23512 for pure altivec, combine the two cases. */
23513 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
23514 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
)
23515 regno
= ALTIVEC_ARG_RETURN
;
23516 else if (COMPLEX_MODE_P (mode
) && targetm
.calls
.split_complex_arg
)
23517 return rs6000_complex_function_value (mode
);
23519 regno
= GP_ARG_RETURN
;
23521 return gen_rtx_REG (mode
, regno
);
23524 /* Compute register pressure classes. We implement the target hook to avoid
23525 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23526 lead to incorrect estimates of number of available registers and therefor
23527 increased register pressure/spill. */
23529 rs6000_compute_pressure_classes (enum reg_class
*pressure_classes
)
23534 pressure_classes
[n
++] = GENERAL_REGS
;
23535 if (TARGET_ALTIVEC
)
23536 pressure_classes
[n
++] = ALTIVEC_REGS
;
23538 pressure_classes
[n
++] = VSX_REGS
;
23541 if (TARGET_HARD_FLOAT
)
23542 pressure_classes
[n
++] = FLOAT_REGS
;
23544 pressure_classes
[n
++] = CR_REGS
;
23545 pressure_classes
[n
++] = SPECIAL_REGS
;
23550 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23551 Frame pointer elimination is automatically handled.
23553 For the RS/6000, if frame pointer elimination is being done, we would like
23554 to convert ap into fp, not sp.
23556 We need r30 if -mminimal-toc was specified, and there are constant pool
23560 rs6000_can_eliminate (const int from
, const int to
)
23562 return (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
23563 ? ! frame_pointer_needed
23564 : from
== RS6000_PIC_OFFSET_TABLE_REGNUM
23565 ? ! TARGET_MINIMAL_TOC
|| TARGET_NO_TOC_OR_PCREL
23566 || constant_pool_empty_p ()
23570 /* Define the offset between two registers, FROM to be eliminated and its
23571 replacement TO, at the start of a routine. */
23573 rs6000_initial_elimination_offset (int from
, int to
)
23575 rs6000_stack_t
*info
= rs6000_stack_info ();
23576 HOST_WIDE_INT offset
;
23578 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23579 offset
= info
->push_p
? 0 : -info
->total_size
;
23580 else if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23582 offset
= info
->push_p
? 0 : -info
->total_size
;
23583 if (FRAME_GROWS_DOWNWARD
)
23584 offset
+= info
->fixed_size
+ info
->vars_size
+ info
->parm_size
;
23586 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
23587 offset
= FRAME_GROWS_DOWNWARD
23588 ? info
->fixed_size
+ info
->vars_size
+ info
->parm_size
23590 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
23591 offset
= info
->total_size
;
23592 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23593 offset
= info
->push_p
? info
->total_size
: 0;
23594 else if (from
== RS6000_PIC_OFFSET_TABLE_REGNUM
)
23597 gcc_unreachable ();
23602 /* Fill in sizes of registers used by unwinder. */
23605 rs6000_init_dwarf_reg_sizes_extra (tree address
)
23607 if (TARGET_MACHO
&& ! TARGET_ALTIVEC
)
23610 machine_mode mode
= TYPE_MODE (char_type_node
);
23611 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
23612 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
23613 rtx value
= gen_int_mode (16, mode
);
23615 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23616 The unwinder still needs to know the size of Altivec registers. */
23618 for (i
= FIRST_ALTIVEC_REGNO
; i
< LAST_ALTIVEC_REGNO
+1; i
++)
23620 int column
= DWARF_REG_TO_UNWIND_COLUMN
23621 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
23622 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
23624 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
23629 /* Map internal gcc register numbers to debug format register numbers.
23630 FORMAT specifies the type of debug register number to use:
23631 0 -- debug information, except for frame-related sections
23632 1 -- DWARF .debug_frame section
23633 2 -- DWARF .eh_frame section */
23636 rs6000_dbx_register_number (unsigned int regno
, unsigned int format
)
23638 /* On some platforms, we use the standard DWARF register
23639 numbering for .debug_info and .debug_frame. */
23640 if ((format
== 0 && dwarf_debuginfo_p ()) || format
== 1)
23642 #ifdef RS6000_USE_DWARF_NUMBERING
23645 if (FP_REGNO_P (regno
))
23646 return regno
- FIRST_FPR_REGNO
+ 32;
23647 if (ALTIVEC_REGNO_P (regno
))
23648 return regno
- FIRST_ALTIVEC_REGNO
+ 1124;
23649 if (regno
== LR_REGNO
)
23651 if (regno
== CTR_REGNO
)
23653 if (regno
== CA_REGNO
)
23654 return 101; /* XER */
23655 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23656 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23657 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23658 to the DWARF reg for CR. */
23659 if (format
== 1 && regno
== CR2_REGNO
)
23661 if (CR_REGNO_P (regno
))
23662 return regno
- CR0_REGNO
+ 86;
23663 if (regno
== VRSAVE_REGNO
)
23665 if (regno
== VSCR_REGNO
)
23668 /* These do not make much sense. */
23669 if (regno
== FRAME_POINTER_REGNUM
)
23671 if (regno
== ARG_POINTER_REGNUM
)
23676 gcc_unreachable ();
23680 /* We use the GCC 7 (and before) internal number for non-DWARF debug
23681 information, and also for .eh_frame. */
23682 /* Translate the regnos to their numbers in GCC 7 (and before). */
23685 if (FP_REGNO_P (regno
))
23686 return regno
- FIRST_FPR_REGNO
+ 32;
23687 if (ALTIVEC_REGNO_P (regno
))
23688 return regno
- FIRST_ALTIVEC_REGNO
+ 77;
23689 if (regno
== LR_REGNO
)
23691 if (regno
== CTR_REGNO
)
23693 if (regno
== CA_REGNO
)
23694 return 76; /* XER */
23695 if (CR_REGNO_P (regno
))
23696 return regno
- CR0_REGNO
+ 68;
23697 if (regno
== VRSAVE_REGNO
)
23699 if (regno
== VSCR_REGNO
)
23702 if (regno
== FRAME_POINTER_REGNUM
)
23704 if (regno
== ARG_POINTER_REGNUM
)
23709 gcc_unreachable ();
23712 /* target hook eh_return_filter_mode */
23713 static scalar_int_mode
23714 rs6000_eh_return_filter_mode (void)
23716 return TARGET_32BIT
? SImode
: word_mode
;
23719 /* Target hook for translate_mode_attribute. */
23720 static machine_mode
23721 rs6000_translate_mode_attribute (machine_mode mode
)
23723 if ((FLOAT128_IEEE_P (mode
)
23724 && ieee128_float_type_node
== long_double_type_node
)
23725 || (FLOAT128_IBM_P (mode
)
23726 && ibm128_float_type_node
== long_double_type_node
))
23727 return COMPLEX_MODE_P (mode
) ? E_TCmode
: E_TFmode
;
23731 /* Target hook for scalar_mode_supported_p. */
23733 rs6000_scalar_mode_supported_p (scalar_mode mode
)
23735 /* -m32 does not support TImode. This is the default, from
23736 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
23737 same ABI as for -m32. But default_scalar_mode_supported_p allows
23738 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23739 for -mpowerpc64. */
23740 if (TARGET_32BIT
&& mode
== TImode
)
23743 if (DECIMAL_FLOAT_MODE_P (mode
))
23744 return default_decimal_float_supported_p ();
23745 else if (TARGET_FLOAT128_TYPE
&& (mode
== KFmode
|| mode
== IFmode
))
23748 return default_scalar_mode_supported_p (mode
);
23751 /* Target hook for libgcc_floating_mode_supported_p. */
23754 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode
)
23763 /* We only return true for KFmode if IEEE 128-bit types are supported, and
23764 if long double does not use the IEEE 128-bit format. If long double
23765 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
23766 Because the code will not use KFmode in that case, there will be aborts
23767 because it can't find KFmode in the Floatn types. */
23769 return TARGET_FLOAT128_TYPE
&& !TARGET_IEEEQUAD
;
23776 /* Target hook for vector_mode_supported_p. */
23778 rs6000_vector_mode_supported_p (machine_mode mode
)
23780 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
23781 128-bit, the compiler might try to widen IEEE 128-bit to IBM
23783 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
) && !FLOAT128_IEEE_P (mode
))
23790 /* Target hook for floatn_mode. */
23791 static opt_scalar_float_mode
23792 rs6000_floatn_mode (int n
, bool extended
)
23802 if (TARGET_FLOAT128_TYPE
)
23803 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
23805 return opt_scalar_float_mode ();
23808 return opt_scalar_float_mode ();
23811 /* Those are the only valid _FloatNx types. */
23812 gcc_unreachable ();
23826 if (TARGET_FLOAT128_TYPE
)
23827 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
23829 return opt_scalar_float_mode ();
23832 return opt_scalar_float_mode ();
23838 /* Target hook for c_mode_for_suffix. */
23839 static machine_mode
23840 rs6000_c_mode_for_suffix (char suffix
)
23842 if (TARGET_FLOAT128_TYPE
)
23844 if (suffix
== 'q' || suffix
== 'Q')
23845 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
23847 /* At the moment, we are not defining a suffix for IBM extended double.
23848 If/when the default for -mabi=ieeelongdouble is changed, and we want
23849 to support __ibm128 constants in legacy library code, we may need to
23850 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
23851 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
23852 __float80 constants. */
23858 /* Target hook for invalid_arg_for_unprototyped_fn. */
23859 static const char *
23860 invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
23862 return (!rs6000_darwin64_abi
23864 && TREE_CODE (TREE_TYPE (val
)) == VECTOR_TYPE
23865 && (funcdecl
== NULL_TREE
23866 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
23867 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
23868 ? N_("AltiVec argument passed to unprototyped function")
23872 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
23873 setup by using __stack_chk_fail_local hidden function instead of
23874 calling __stack_chk_fail directly. Otherwise it is better to call
23875 __stack_chk_fail directly. */
23877 static tree ATTRIBUTE_UNUSED
23878 rs6000_stack_protect_fail (void)
23880 return (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
23881 ? default_hidden_stack_protect_fail ()
23882 : default_external_stack_protect_fail ();
23885 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
23888 static unsigned HOST_WIDE_INT
23889 rs6000_asan_shadow_offset (void)
23891 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_64BIT
? 41 : 29);
23895 /* Mask options that we want to support inside of attribute((target)) and
23896 #pragma GCC target operations. Note, we do not include things like
23897 64/32-bit, endianness, hard/soft floating point, etc. that would have
23898 different calling sequences. */
23900 struct rs6000_opt_mask
{
23901 const char *name
; /* option name */
23902 HOST_WIDE_INT mask
; /* mask to set */
23903 bool invert
; /* invert sense of mask */
23904 bool valid_target
; /* option is a target option */
23907 static struct rs6000_opt_mask
const rs6000_opt_masks
[] =
23909 { "altivec", OPTION_MASK_ALTIVEC
, false, true },
23910 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
,
23912 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
,
23914 { "cmpb", OPTION_MASK_CMPB
, false, true },
23915 { "crypto", OPTION_MASK_CRYPTO
, false, true },
23916 { "direct-move", OPTION_MASK_DIRECT_MOVE
, false, true },
23917 { "dlmzb", OPTION_MASK_DLMZB
, false, true },
23918 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX
,
23920 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, true },
23921 { "float128-hardware", OPTION_MASK_FLOAT128_HW
, false, true },
23922 { "fprnd", OPTION_MASK_FPRND
, false, true },
23923 { "power10", OPTION_MASK_POWER10
, false, true },
23924 { "hard-dfp", OPTION_MASK_DFP
, false, true },
23925 { "htm", OPTION_MASK_HTM
, false, true },
23926 { "isel", OPTION_MASK_ISEL
, false, true },
23927 { "mfcrf", OPTION_MASK_MFCRF
, false, true },
23928 { "mfpgpr", 0, false, true },
23929 { "mma", OPTION_MASK_MMA
, false, true },
23930 { "modulo", OPTION_MASK_MODULO
, false, true },
23931 { "mulhw", OPTION_MASK_MULHW
, false, true },
23932 { "multiple", OPTION_MASK_MULTIPLE
, false, true },
23933 { "pcrel", OPTION_MASK_PCREL
, false, true },
23934 { "pcrel-opt", OPTION_MASK_PCREL_OPT
, false, true },
23935 { "popcntb", OPTION_MASK_POPCNTB
, false, true },
23936 { "popcntd", OPTION_MASK_POPCNTD
, false, true },
23937 { "power8-fusion", OPTION_MASK_P8_FUSION
, false, true },
23938 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN
, false, true },
23939 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, true },
23940 { "power9-minmax", OPTION_MASK_P9_MINMAX
, false, true },
23941 { "power9-misc", OPTION_MASK_P9_MISC
, false, true },
23942 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, true },
23943 { "power10-fusion", OPTION_MASK_P10_FUSION
, false, true },
23944 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT
, false, true },
23945 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT
, false, true },
23946 { "prefixed", OPTION_MASK_PREFIXED
, false, true },
23947 { "quad-memory", OPTION_MASK_QUAD_MEMORY
, false, true },
23948 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC
, false, true },
23949 { "recip-precision", OPTION_MASK_RECIP_PRECISION
, false, true },
23950 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT
, false, true },
23951 { "string", 0, false, true },
23952 { "update", OPTION_MASK_NO_UPDATE
, true , true },
23953 { "vsx", OPTION_MASK_VSX
, false, true },
23954 #ifdef OPTION_MASK_64BIT
23956 { "aix64", OPTION_MASK_64BIT
, false, false },
23957 { "aix32", OPTION_MASK_64BIT
, true, false },
23959 { "64", OPTION_MASK_64BIT
, false, false },
23960 { "32", OPTION_MASK_64BIT
, true, false },
23963 #ifdef OPTION_MASK_EABI
23964 { "eabi", OPTION_MASK_EABI
, false, false },
23966 #ifdef OPTION_MASK_LITTLE_ENDIAN
23967 { "little", OPTION_MASK_LITTLE_ENDIAN
, false, false },
23968 { "big", OPTION_MASK_LITTLE_ENDIAN
, true, false },
23970 #ifdef OPTION_MASK_RELOCATABLE
23971 { "relocatable", OPTION_MASK_RELOCATABLE
, false, false },
23973 #ifdef OPTION_MASK_STRICT_ALIGN
23974 { "strict-align", OPTION_MASK_STRICT_ALIGN
, false, false },
23976 { "soft-float", OPTION_MASK_SOFT_FLOAT
, false, false },
23977 { "string", 0, false, false },
23980 /* Builtin mask mapping for printing the flags. */
23981 static struct rs6000_opt_mask
const rs6000_builtin_mask_names
[] =
23983 { "altivec", RS6000_BTM_ALTIVEC
, false, false },
23984 { "vsx", RS6000_BTM_VSX
, false, false },
23985 { "fre", RS6000_BTM_FRE
, false, false },
23986 { "fres", RS6000_BTM_FRES
, false, false },
23987 { "frsqrte", RS6000_BTM_FRSQRTE
, false, false },
23988 { "frsqrtes", RS6000_BTM_FRSQRTES
, false, false },
23989 { "popcntd", RS6000_BTM_POPCNTD
, false, false },
23990 { "cell", RS6000_BTM_CELL
, false, false },
23991 { "power8-vector", RS6000_BTM_P8_VECTOR
, false, false },
23992 { "power9-vector", RS6000_BTM_P9_VECTOR
, false, false },
23993 { "power9-misc", RS6000_BTM_P9_MISC
, false, false },
23994 { "crypto", RS6000_BTM_CRYPTO
, false, false },
23995 { "htm", RS6000_BTM_HTM
, false, false },
23996 { "hard-dfp", RS6000_BTM_DFP
, false, false },
23997 { "hard-float", RS6000_BTM_HARD_FLOAT
, false, false },
23998 { "long-double-128", RS6000_BTM_LDBL128
, false, false },
23999 { "powerpc64", RS6000_BTM_POWERPC64
, false, false },
24000 { "float128", RS6000_BTM_FLOAT128
, false, false },
24001 { "float128-hw", RS6000_BTM_FLOAT128_HW
,false, false },
24002 { "mma", RS6000_BTM_MMA
, false, false },
24003 { "power10", RS6000_BTM_P10
, false, false },
24006 /* Option variables that we want to support inside attribute((target)) and
24007 #pragma GCC target operations. */
24009 struct rs6000_opt_var
{
24010 const char *name
; /* option name */
24011 size_t global_offset
; /* offset of the option in global_options. */
24012 size_t target_offset
; /* offset of the option in target options. */
24015 static struct rs6000_opt_var
const rs6000_opt_vars
[] =
24018 offsetof (struct gcc_options
, x_TARGET_FRIZ
),
24019 offsetof (struct cl_target_option
, x_TARGET_FRIZ
), },
24020 { "avoid-indexed-addresses",
24021 offsetof (struct gcc_options
, x_TARGET_AVOID_XFORM
),
24022 offsetof (struct cl_target_option
, x_TARGET_AVOID_XFORM
) },
24024 offsetof (struct gcc_options
, x_rs6000_default_long_calls
),
24025 offsetof (struct cl_target_option
, x_rs6000_default_long_calls
), },
24026 { "optimize-swaps",
24027 offsetof (struct gcc_options
, x_rs6000_optimize_swaps
),
24028 offsetof (struct cl_target_option
, x_rs6000_optimize_swaps
), },
24029 { "allow-movmisalign",
24030 offsetof (struct gcc_options
, x_TARGET_ALLOW_MOVMISALIGN
),
24031 offsetof (struct cl_target_option
, x_TARGET_ALLOW_MOVMISALIGN
), },
24033 offsetof (struct gcc_options
, x_TARGET_SCHED_GROUPS
),
24034 offsetof (struct cl_target_option
, x_TARGET_SCHED_GROUPS
), },
24036 offsetof (struct gcc_options
, x_TARGET_ALWAYS_HINT
),
24037 offsetof (struct cl_target_option
, x_TARGET_ALWAYS_HINT
), },
24038 { "align-branch-targets",
24039 offsetof (struct gcc_options
, x_TARGET_ALIGN_BRANCH_TARGETS
),
24040 offsetof (struct cl_target_option
, x_TARGET_ALIGN_BRANCH_TARGETS
), },
24042 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24043 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24045 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24046 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24047 { "speculate-indirect-jumps",
24048 offsetof (struct gcc_options
, x_rs6000_speculate_indirect_jumps
),
24049 offsetof (struct cl_target_option
, x_rs6000_speculate_indirect_jumps
), },
24052 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24053 parsing. Return true if there were no errors. */
24056 rs6000_inner_target_options (tree args
, bool attr_p
)
24060 if (args
== NULL_TREE
)
24063 else if (TREE_CODE (args
) == STRING_CST
)
24065 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24068 while ((q
= strtok (p
, ",")) != NULL
)
24070 bool error_p
= false;
24071 bool not_valid_p
= false;
24072 const char *cpu_opt
= NULL
;
24075 if (startswith (q
, "cpu="))
24077 int cpu_index
= rs6000_cpu_name_lookup (q
+4);
24078 if (cpu_index
>= 0)
24079 rs6000_cpu_index
= cpu_index
;
24086 else if (startswith (q
, "tune="))
24088 int tune_index
= rs6000_cpu_name_lookup (q
+5);
24089 if (tune_index
>= 0)
24090 rs6000_tune_index
= tune_index
;
24100 bool invert
= false;
24104 if (startswith (r
, "no-"))
24110 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_masks
); i
++)
24111 if (strcmp (r
, rs6000_opt_masks
[i
].name
) == 0)
24113 HOST_WIDE_INT mask
= rs6000_opt_masks
[i
].mask
;
24115 if (!rs6000_opt_masks
[i
].valid_target
)
24116 not_valid_p
= true;
24120 rs6000_isa_flags_explicit
|= mask
;
24122 /* VSX needs altivec, so -mvsx automagically sets
24123 altivec and disables -mavoid-indexed-addresses. */
24126 if (mask
== OPTION_MASK_VSX
)
24128 mask
|= OPTION_MASK_ALTIVEC
;
24129 TARGET_AVOID_XFORM
= 0;
24133 if (rs6000_opt_masks
[i
].invert
)
24137 rs6000_isa_flags
&= ~mask
;
24139 rs6000_isa_flags
|= mask
;
24144 if (error_p
&& !not_valid_p
)
24146 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_vars
); i
++)
24147 if (strcmp (r
, rs6000_opt_vars
[i
].name
) == 0)
24149 size_t j
= rs6000_opt_vars
[i
].global_offset
;
24150 *((int *) ((char *)&global_options
+ j
)) = !invert
;
24152 not_valid_p
= false;
24160 const char *eprefix
, *esuffix
;
24165 eprefix
= "__attribute__((__target__(";
24170 eprefix
= "#pragma GCC target ";
24175 error ("invalid cpu %qs for %s%qs%s", cpu_opt
, eprefix
,
24177 else if (not_valid_p
)
24178 error ("%s%qs%s is not allowed", eprefix
, q
, esuffix
);
24180 error ("%s%qs%s is invalid", eprefix
, q
, esuffix
);
24185 else if (TREE_CODE (args
) == TREE_LIST
)
24189 tree value
= TREE_VALUE (args
);
24192 bool ret2
= rs6000_inner_target_options (value
, attr_p
);
24196 args
= TREE_CHAIN (args
);
24198 while (args
!= NULL_TREE
);
24203 error ("attribute %<target%> argument not a string");
24210 /* Print out the target options as a list for -mdebug=target. */
24213 rs6000_debug_target_options (tree args
, const char *prefix
)
24215 if (args
== NULL_TREE
)
24216 fprintf (stderr
, "%s<NULL>", prefix
);
24218 else if (TREE_CODE (args
) == STRING_CST
)
24220 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24223 while ((q
= strtok (p
, ",")) != NULL
)
24226 fprintf (stderr
, "%s\"%s\"", prefix
, q
);
24231 else if (TREE_CODE (args
) == TREE_LIST
)
24235 tree value
= TREE_VALUE (args
);
24238 rs6000_debug_target_options (value
, prefix
);
24241 args
= TREE_CHAIN (args
);
24243 while (args
!= NULL_TREE
);
24247 gcc_unreachable ();
24253 /* Hook to validate attribute((target("..."))). */
24256 rs6000_valid_attribute_p (tree fndecl
,
24257 tree
ARG_UNUSED (name
),
24261 struct cl_target_option cur_target
;
24264 tree new_target
, new_optimize
;
24265 tree func_optimize
;
24267 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
24269 if (TARGET_DEBUG_TARGET
)
24271 tree tname
= DECL_NAME (fndecl
);
24272 fprintf (stderr
, "\n==================== rs6000_valid_attribute_p:\n");
24274 fprintf (stderr
, "function: %.*s\n",
24275 (int) IDENTIFIER_LENGTH (tname
),
24276 IDENTIFIER_POINTER (tname
));
24278 fprintf (stderr
, "function: unknown\n");
24280 fprintf (stderr
, "args:");
24281 rs6000_debug_target_options (args
, " ");
24282 fprintf (stderr
, "\n");
24285 fprintf (stderr
, "flags: 0x%x\n", flags
);
24287 fprintf (stderr
, "--------------------\n");
24290 /* attribute((target("default"))) does nothing, beyond
24291 affecting multi-versioning. */
24292 if (TREE_VALUE (args
)
24293 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
24294 && TREE_CHAIN (args
) == NULL_TREE
24295 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
24298 old_optimize
= build_optimization_node (&global_options
,
24299 &global_options_set
);
24300 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
24302 /* If the function changed the optimization levels as well as setting target
24303 options, start with the optimizations specified. */
24304 if (func_optimize
&& func_optimize
!= old_optimize
)
24305 cl_optimization_restore (&global_options
, &global_options_set
,
24306 TREE_OPTIMIZATION (func_optimize
));
24308 /* The target attributes may also change some optimization flags, so update
24309 the optimization options if necessary. */
24310 cl_target_option_save (&cur_target
, &global_options
, &global_options_set
);
24311 rs6000_cpu_index
= rs6000_tune_index
= -1;
24312 ret
= rs6000_inner_target_options (args
, true);
24314 /* Set up any additional state. */
24317 ret
= rs6000_option_override_internal (false);
24318 new_target
= build_target_option_node (&global_options
,
24319 &global_options_set
);
24324 new_optimize
= build_optimization_node (&global_options
,
24325 &global_options_set
);
24332 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
24334 if (old_optimize
!= new_optimize
)
24335 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
24338 cl_target_option_restore (&global_options
, &global_options_set
, &cur_target
);
24340 if (old_optimize
!= new_optimize
)
24341 cl_optimization_restore (&global_options
, &global_options_set
,
24342 TREE_OPTIMIZATION (old_optimize
));
24348 /* Hook to validate the current #pragma GCC target and set the state, and
24349 update the macros based on what was changed. If ARGS is NULL, then
24350 POP_TARGET is used to reset the options. */
24353 rs6000_pragma_target_parse (tree args
, tree pop_target
)
24355 tree prev_tree
= build_target_option_node (&global_options
,
24356 &global_options_set
);
24358 struct cl_target_option
*prev_opt
, *cur_opt
;
24359 HOST_WIDE_INT prev_flags
, cur_flags
, diff_flags
;
24360 HOST_WIDE_INT prev_bumask
, cur_bumask
, diff_bumask
;
24362 if (TARGET_DEBUG_TARGET
)
24364 fprintf (stderr
, "\n==================== rs6000_pragma_target_parse\n");
24365 fprintf (stderr
, "args:");
24366 rs6000_debug_target_options (args
, " ");
24367 fprintf (stderr
, "\n");
24371 fprintf (stderr
, "pop_target:\n");
24372 debug_tree (pop_target
);
24375 fprintf (stderr
, "pop_target: <NULL>\n");
24377 fprintf (stderr
, "--------------------\n");
24382 cur_tree
= ((pop_target
)
24384 : target_option_default_node
);
24385 cl_target_option_restore (&global_options
, &global_options_set
,
24386 TREE_TARGET_OPTION (cur_tree
));
24390 rs6000_cpu_index
= rs6000_tune_index
= -1;
24391 if (!rs6000_inner_target_options (args
, false)
24392 || !rs6000_option_override_internal (false)
24393 || (cur_tree
= build_target_option_node (&global_options
,
24394 &global_options_set
))
24397 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
24398 fprintf (stderr
, "invalid pragma\n");
24404 target_option_current_node
= cur_tree
;
24405 rs6000_activate_target_options (target_option_current_node
);
24407 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24408 change the macros that are defined. */
24409 if (rs6000_target_modify_macros_ptr
)
24411 prev_opt
= TREE_TARGET_OPTION (prev_tree
);
24412 prev_bumask
= prev_opt
->x_rs6000_builtin_mask
;
24413 prev_flags
= prev_opt
->x_rs6000_isa_flags
;
24415 cur_opt
= TREE_TARGET_OPTION (cur_tree
);
24416 cur_flags
= cur_opt
->x_rs6000_isa_flags
;
24417 cur_bumask
= cur_opt
->x_rs6000_builtin_mask
;
24419 diff_bumask
= (prev_bumask
^ cur_bumask
);
24420 diff_flags
= (prev_flags
^ cur_flags
);
24422 if ((diff_flags
!= 0) || (diff_bumask
!= 0))
24424 /* Delete old macros. */
24425 rs6000_target_modify_macros_ptr (false,
24426 prev_flags
& diff_flags
,
24427 prev_bumask
& diff_bumask
);
24429 /* Define new macros. */
24430 rs6000_target_modify_macros_ptr (true,
24431 cur_flags
& diff_flags
,
24432 cur_bumask
& diff_bumask
);
24440 /* Remember the last target of rs6000_set_current_function. */
24441 static GTY(()) tree rs6000_previous_fndecl
;
24443 /* Restore target's globals from NEW_TREE and invalidate the
24444 rs6000_previous_fndecl cache. */
24447 rs6000_activate_target_options (tree new_tree
)
24449 cl_target_option_restore (&global_options
, &global_options_set
,
24450 TREE_TARGET_OPTION (new_tree
));
24451 if (TREE_TARGET_GLOBALS (new_tree
))
24452 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
24453 else if (new_tree
== target_option_default_node
)
24454 restore_target_globals (&default_target_globals
);
24456 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
24457 rs6000_previous_fndecl
= NULL_TREE
;
24460 /* Establish appropriate back-end context for processing the function
24461 FNDECL. The argument might be NULL to indicate processing at top
24462 level, outside of any function scope. */
24464 rs6000_set_current_function (tree fndecl
)
24466 if (TARGET_DEBUG_TARGET
)
24468 fprintf (stderr
, "\n==================== rs6000_set_current_function");
24471 fprintf (stderr
, ", fndecl %s (%p)",
24472 (DECL_NAME (fndecl
)
24473 ? IDENTIFIER_POINTER (DECL_NAME (fndecl
))
24474 : "<unknown>"), (void *)fndecl
);
24476 if (rs6000_previous_fndecl
)
24477 fprintf (stderr
, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl
);
24479 fprintf (stderr
, "\n");
24482 /* Only change the context if the function changes. This hook is called
24483 several times in the course of compiling a function, and we don't want to
24484 slow things down too much or call target_reinit when it isn't safe. */
24485 if (fndecl
== rs6000_previous_fndecl
)
24489 if (rs6000_previous_fndecl
== NULL_TREE
)
24490 old_tree
= target_option_current_node
;
24491 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
))
24492 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
);
24494 old_tree
= target_option_default_node
;
24497 if (fndecl
== NULL_TREE
)
24499 if (old_tree
!= target_option_current_node
)
24500 new_tree
= target_option_current_node
;
24502 new_tree
= NULL_TREE
;
24506 new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
24507 if (new_tree
== NULL_TREE
)
24508 new_tree
= target_option_default_node
;
24511 if (TARGET_DEBUG_TARGET
)
24515 fprintf (stderr
, "\nnew fndecl target specific options:\n");
24516 debug_tree (new_tree
);
24521 fprintf (stderr
, "\nold fndecl target specific options:\n");
24522 debug_tree (old_tree
);
24525 if (old_tree
!= NULL_TREE
|| new_tree
!= NULL_TREE
)
24526 fprintf (stderr
, "--------------------\n");
24529 if (new_tree
&& old_tree
!= new_tree
)
24530 rs6000_activate_target_options (new_tree
);
24533 rs6000_previous_fndecl
= fndecl
;
24537 /* Save the current options */
24540 rs6000_function_specific_save (struct cl_target_option
*ptr
,
24541 struct gcc_options
*opts
,
24542 struct gcc_options */
* opts_set */
)
24544 ptr
->x_rs6000_isa_flags
= opts
->x_rs6000_isa_flags
;
24545 ptr
->x_rs6000_isa_flags_explicit
= opts
->x_rs6000_isa_flags_explicit
;
24548 /* Restore the current options */
24551 rs6000_function_specific_restore (struct gcc_options
*opts
,
24552 struct gcc_options */
* opts_set */
,
24553 struct cl_target_option
*ptr
)
24556 opts
->x_rs6000_isa_flags
= ptr
->x_rs6000_isa_flags
;
24557 opts
->x_rs6000_isa_flags_explicit
= ptr
->x_rs6000_isa_flags_explicit
;
24558 (void) rs6000_option_override_internal (false);
24561 /* Print the current options */
24564 rs6000_function_specific_print (FILE *file
, int indent
,
24565 struct cl_target_option
*ptr
)
24567 rs6000_print_isa_options (file
, indent
, "Isa options set",
24568 ptr
->x_rs6000_isa_flags
);
24570 rs6000_print_isa_options (file
, indent
, "Isa options explicit",
24571 ptr
->x_rs6000_isa_flags_explicit
);
24574 /* Helper function to print the current isa or misc options on a line. */
24577 rs6000_print_options_internal (FILE *file
,
24579 const char *string
,
24580 HOST_WIDE_INT flags
,
24581 const char *prefix
,
24582 const struct rs6000_opt_mask
*opts
,
24583 size_t num_elements
)
24586 size_t start_column
= 0;
24588 size_t max_column
= 120;
24589 size_t prefix_len
= strlen (prefix
);
24590 size_t comma_len
= 0;
24591 const char *comma
= "";
24594 start_column
+= fprintf (file
, "%*s", indent
, "");
24598 fprintf (stderr
, DEBUG_FMT_S
, string
, "<none>");
24602 start_column
+= fprintf (stderr
, DEBUG_FMT_WX
, string
, flags
);
24604 /* Print the various mask options. */
24605 cur_column
= start_column
;
24606 for (i
= 0; i
< num_elements
; i
++)
24608 bool invert
= opts
[i
].invert
;
24609 const char *name
= opts
[i
].name
;
24610 const char *no_str
= "";
24611 HOST_WIDE_INT mask
= opts
[i
].mask
;
24612 size_t len
= comma_len
+ prefix_len
+ strlen (name
);
24616 if ((flags
& mask
) == 0)
24619 len
+= strlen ("no-");
24627 if ((flags
& mask
) != 0)
24630 len
+= strlen ("no-");
24637 if (cur_column
> max_column
)
24639 fprintf (stderr
, ", \\\n%*s", (int)start_column
, "");
24640 cur_column
= start_column
+ len
;
24644 fprintf (file
, "%s%s%s%s", comma
, prefix
, no_str
, name
);
24646 comma_len
= strlen (", ");
24649 fputs ("\n", file
);
24652 /* Helper function to print the current isa options on a line. */
24655 rs6000_print_isa_options (FILE *file
, int indent
, const char *string
,
24656 HOST_WIDE_INT flags
)
24658 rs6000_print_options_internal (file
, indent
, string
, flags
, "-m",
24659 &rs6000_opt_masks
[0],
24660 ARRAY_SIZE (rs6000_opt_masks
));
24664 rs6000_print_builtin_options (FILE *file
, int indent
, const char *string
,
24665 HOST_WIDE_INT flags
)
24667 rs6000_print_options_internal (file
, indent
, string
, flags
, "",
24668 &rs6000_builtin_mask_names
[0],
24669 ARRAY_SIZE (rs6000_builtin_mask_names
));
24672 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24673 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24674 -mupper-regs-df, etc.).
24676 If the user used -mno-power8-vector, we need to turn off all of the implicit
24677 ISA 2.07 and 3.0 options that relate to the vector unit.
24679 If the user used -mno-power9-vector, we need to turn off all of the implicit
24680 ISA 3.0 options that relate to the vector unit.
24682 This function does not handle explicit options such as the user specifying
24683 -mdirect-move. These are handled in rs6000_option_override_internal, and
24684 the appropriate error is given if needed.
24686 We return a mask of all of the implicit options that should not be enabled
24689 static HOST_WIDE_INT
24690 rs6000_disable_incompatible_switches (void)
24692 HOST_WIDE_INT ignore_masks
= rs6000_isa_flags_explicit
;
24695 static const struct {
24696 const HOST_WIDE_INT no_flag
; /* flag explicitly turned off. */
24697 const HOST_WIDE_INT dep_flags
; /* flags that depend on this option. */
24698 const char *const name
; /* name of the switch. */
24700 { OPTION_MASK_P9_VECTOR
, OTHER_P9_VECTOR_MASKS
, "power9-vector" },
24701 { OPTION_MASK_P8_VECTOR
, OTHER_P8_VECTOR_MASKS
, "power8-vector" },
24702 { OPTION_MASK_VSX
, OTHER_VSX_VECTOR_MASKS
, "vsx" },
24703 { OPTION_MASK_ALTIVEC
, OTHER_ALTIVEC_MASKS
, "altivec" },
24706 for (i
= 0; i
< ARRAY_SIZE (flags
); i
++)
24708 HOST_WIDE_INT no_flag
= flags
[i
].no_flag
;
24710 if ((rs6000_isa_flags
& no_flag
) == 0
24711 && (rs6000_isa_flags_explicit
& no_flag
) != 0)
24713 HOST_WIDE_INT dep_flags
= flags
[i
].dep_flags
;
24714 HOST_WIDE_INT set_flags
= (rs6000_isa_flags_explicit
24720 for (j
= 0; j
< ARRAY_SIZE (rs6000_opt_masks
); j
++)
24721 if ((set_flags
& rs6000_opt_masks
[j
].mask
) != 0)
24723 set_flags
&= ~rs6000_opt_masks
[j
].mask
;
24724 error ("%<-mno-%s%> turns off %<-m%s%>",
24726 rs6000_opt_masks
[j
].name
);
24729 gcc_assert (!set_flags
);
24732 rs6000_isa_flags
&= ~dep_flags
;
24733 ignore_masks
|= no_flag
| dep_flags
;
24737 return ignore_masks
;
24741 /* Helper function for printing the function name when debugging. */
24743 static const char *
24744 get_decl_name (tree fn
)
24751 name
= DECL_NAME (fn
);
24753 return "<no-name>";
24755 return IDENTIFIER_POINTER (name
);
24758 /* Return the clone id of the target we are compiling code for in a target
24759 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24760 the priority list for the target clones (ordered from lowest to
24764 rs6000_clone_priority (tree fndecl
)
24766 tree fn_opts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
24767 HOST_WIDE_INT isa_masks
;
24768 int ret
= CLONE_DEFAULT
;
24769 tree attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (fndecl
));
24770 const char *attrs_str
= NULL
;
24772 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
24773 attrs_str
= TREE_STRING_POINTER (attrs
);
24775 /* Return priority zero for default function. Return the ISA needed for the
24776 function if it is not the default. */
24777 if (strcmp (attrs_str
, "default") != 0)
24779 if (fn_opts
== NULL_TREE
)
24780 fn_opts
= target_option_default_node
;
24782 if (!fn_opts
|| !TREE_TARGET_OPTION (fn_opts
))
24783 isa_masks
= rs6000_isa_flags
;
24785 isa_masks
= TREE_TARGET_OPTION (fn_opts
)->x_rs6000_isa_flags
;
24787 for (ret
= CLONE_MAX
- 1; ret
!= 0; ret
--)
24788 if ((rs6000_clone_map
[ret
].isa_mask
& isa_masks
) != 0)
24792 if (TARGET_DEBUG_TARGET
)
24793 fprintf (stderr
, "rs6000_get_function_version_priority (%s) => %d\n",
24794 get_decl_name (fndecl
), ret
);
24799 /* This compares the priority of target features in function DECL1 and DECL2.
24800 It returns positive value if DECL1 is higher priority, negative value if
24801 DECL2 is higher priority and 0 if they are the same. Note, priorities are
24802 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
24805 rs6000_compare_version_priority (tree decl1
, tree decl2
)
24807 int priority1
= rs6000_clone_priority (decl1
);
24808 int priority2
= rs6000_clone_priority (decl2
);
24809 int ret
= priority1
- priority2
;
24811 if (TARGET_DEBUG_TARGET
)
24812 fprintf (stderr
, "rs6000_compare_version_priority (%s, %s) => %d\n",
24813 get_decl_name (decl1
), get_decl_name (decl2
), ret
);
24818 /* Make a dispatcher declaration for the multi-versioned function DECL.
24819 Calls to DECL function will be replaced with calls to the dispatcher
24820 by the front-end. Returns the decl of the dispatcher function. */
24823 rs6000_get_function_versions_dispatcher (void *decl
)
24825 tree fn
= (tree
) decl
;
24826 struct cgraph_node
*node
= NULL
;
24827 struct cgraph_node
*default_node
= NULL
;
24828 struct cgraph_function_version_info
*node_v
= NULL
;
24829 struct cgraph_function_version_info
*first_v
= NULL
;
24831 tree dispatch_decl
= NULL
;
24833 struct cgraph_function_version_info
*default_version_info
= NULL
;
24834 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
24836 if (TARGET_DEBUG_TARGET
)
24837 fprintf (stderr
, "rs6000_get_function_versions_dispatcher (%s)\n",
24838 get_decl_name (fn
));
24840 node
= cgraph_node::get (fn
);
24841 gcc_assert (node
!= NULL
);
24843 node_v
= node
->function_version ();
24844 gcc_assert (node_v
!= NULL
);
24846 if (node_v
->dispatcher_resolver
!= NULL
)
24847 return node_v
->dispatcher_resolver
;
24849 /* Find the default version and make it the first node. */
24851 /* Go to the beginning of the chain. */
24852 while (first_v
->prev
!= NULL
)
24853 first_v
= first_v
->prev
;
24855 default_version_info
= first_v
;
24856 while (default_version_info
!= NULL
)
24858 const tree decl2
= default_version_info
->this_node
->decl
;
24859 if (is_function_default_version (decl2
))
24861 default_version_info
= default_version_info
->next
;
24864 /* If there is no default node, just return NULL. */
24865 if (default_version_info
== NULL
)
24868 /* Make default info the first node. */
24869 if (first_v
!= default_version_info
)
24871 default_version_info
->prev
->next
= default_version_info
->next
;
24872 if (default_version_info
->next
)
24873 default_version_info
->next
->prev
= default_version_info
->prev
;
24874 first_v
->prev
= default_version_info
;
24875 default_version_info
->next
= first_v
;
24876 default_version_info
->prev
= NULL
;
24879 default_node
= default_version_info
->this_node
;
24881 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
24882 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
24883 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
24884 "exports hardware capability bits");
24887 if (targetm
.has_ifunc_p ())
24889 struct cgraph_function_version_info
*it_v
= NULL
;
24890 struct cgraph_node
*dispatcher_node
= NULL
;
24891 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
24893 /* Right now, the dispatching is done via ifunc. */
24894 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
24896 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
24897 gcc_assert (dispatcher_node
!= NULL
);
24898 dispatcher_node
->dispatcher_function
= 1;
24899 dispatcher_version_info
24900 = dispatcher_node
->insert_new_function_version ();
24901 dispatcher_version_info
->next
= default_version_info
;
24902 dispatcher_node
->definition
= 1;
24904 /* Set the dispatcher for all the versions. */
24905 it_v
= default_version_info
;
24906 while (it_v
!= NULL
)
24908 it_v
->dispatcher_resolver
= dispatch_decl
;
24914 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
24915 "multiversioning needs %<ifunc%> which is not supported "
24920 return dispatch_decl
;
24923 /* Make the resolver function decl to dispatch the versions of a multi-
24924 versioned function, DEFAULT_DECL. Create an empty basic block in the
24925 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
24929 make_resolver_func (const tree default_decl
,
24930 const tree dispatch_decl
,
24931 basic_block
*empty_bb
)
24933 /* Make the resolver function static. The resolver function returns
24935 tree decl_name
= clone_function_name (default_decl
, "resolver");
24936 const char *resolver_name
= IDENTIFIER_POINTER (decl_name
);
24937 tree type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
24938 tree decl
= build_fn_decl (resolver_name
, type
);
24939 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
24941 DECL_NAME (decl
) = decl_name
;
24942 TREE_USED (decl
) = 1;
24943 DECL_ARTIFICIAL (decl
) = 1;
24944 DECL_IGNORED_P (decl
) = 0;
24945 TREE_PUBLIC (decl
) = 0;
24946 DECL_UNINLINABLE (decl
) = 1;
24948 /* Resolver is not external, body is generated. */
24949 DECL_EXTERNAL (decl
) = 0;
24950 DECL_EXTERNAL (dispatch_decl
) = 0;
24952 DECL_CONTEXT (decl
) = NULL_TREE
;
24953 DECL_INITIAL (decl
) = make_node (BLOCK
);
24954 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
24956 if (DECL_COMDAT_GROUP (default_decl
)
24957 || TREE_PUBLIC (default_decl
))
24959 /* In this case, each translation unit with a call to this
24960 versioned function will put out a resolver. Ensure it
24961 is comdat to keep just one copy. */
24962 DECL_COMDAT (decl
) = 1;
24963 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
24966 TREE_PUBLIC (dispatch_decl
) = 0;
24968 /* Build result decl and add to function_decl. */
24969 tree t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
24970 DECL_CONTEXT (t
) = decl
;
24971 DECL_ARTIFICIAL (t
) = 1;
24972 DECL_IGNORED_P (t
) = 1;
24973 DECL_RESULT (decl
) = t
;
24975 gimplify_function_tree (decl
);
24976 push_cfun (DECL_STRUCT_FUNCTION (decl
));
24977 *empty_bb
= init_lowered_empty_function (decl
, false,
24978 profile_count::uninitialized ());
24980 cgraph_node::add_new_function (decl
, true);
24981 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
24985 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
24986 DECL_ATTRIBUTES (dispatch_decl
)
24987 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
24989 cgraph_node::create_same_body_alias (dispatch_decl
, decl
);
24994 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
24995 return a pointer to VERSION_DECL if we are running on a machine that
24996 supports the index CLONE_ISA hardware architecture bits. This function will
24997 be called during version dispatch to decide which function version to
24998 execute. It returns the basic block at the end, to which more conditions
25002 add_condition_to_bb (tree function_decl
, tree version_decl
,
25003 int clone_isa
, basic_block new_bb
)
25005 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
25007 gcc_assert (new_bb
!= NULL
);
25008 gimple_seq gseq
= bb_seq (new_bb
);
25011 tree convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
25012 build_fold_addr_expr (version_decl
));
25013 tree result_var
= create_tmp_var (ptr_type_node
);
25014 gimple
*convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
25015 gimple
*return_stmt
= gimple_build_return (result_var
);
25017 if (clone_isa
== CLONE_DEFAULT
)
25019 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25020 gimple_seq_add_stmt (&gseq
, return_stmt
);
25021 set_bb_seq (new_bb
, gseq
);
25022 gimple_set_bb (convert_stmt
, new_bb
);
25023 gimple_set_bb (return_stmt
, new_bb
);
25028 tree bool_zero
= build_int_cst (bool_int_type_node
, 0);
25029 tree cond_var
= create_tmp_var (bool_int_type_node
);
25030 tree predicate_decl
= rs6000_builtin_decls
[(int) RS6000_BIF_CPU_SUPPORTS
];
25031 const char *arg_str
= rs6000_clone_map
[clone_isa
].name
;
25032 tree predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
25033 gimple
*call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
25034 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
25036 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
25037 gimple_set_bb (call_cond_stmt
, new_bb
);
25038 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
25040 gimple
*if_else_stmt
= gimple_build_cond (NE_EXPR
, cond_var
, bool_zero
,
25041 NULL_TREE
, NULL_TREE
);
25042 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
25043 gimple_set_bb (if_else_stmt
, new_bb
);
25044 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
25046 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25047 gimple_seq_add_stmt (&gseq
, return_stmt
);
25048 set_bb_seq (new_bb
, gseq
);
25050 basic_block bb1
= new_bb
;
25051 edge e12
= split_block (bb1
, if_else_stmt
);
25052 basic_block bb2
= e12
->dest
;
25053 e12
->flags
&= ~EDGE_FALLTHRU
;
25054 e12
->flags
|= EDGE_TRUE_VALUE
;
25056 edge e23
= split_block (bb2
, return_stmt
);
25057 gimple_set_bb (convert_stmt
, bb2
);
25058 gimple_set_bb (return_stmt
, bb2
);
25060 basic_block bb3
= e23
->dest
;
25061 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
25064 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
25070 /* This function generates the dispatch function for multi-versioned functions.
25071 DISPATCH_DECL is the function which will contain the dispatch logic.
25072 FNDECLS are the function choices for dispatch, and is a tree chain.
25073 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25074 code is generated. */
25077 dispatch_function_versions (tree dispatch_decl
,
25079 basic_block
*empty_bb
)
25083 vec
<tree
> *fndecls
;
25084 tree clones
[CLONE_MAX
];
25086 if (TARGET_DEBUG_TARGET
)
25087 fputs ("dispatch_function_versions, top\n", stderr
);
25089 gcc_assert (dispatch_decl
!= NULL
25090 && fndecls_p
!= NULL
25091 && empty_bb
!= NULL
);
25093 /* fndecls_p is actually a vector. */
25094 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
25096 /* At least one more version other than the default. */
25097 gcc_assert (fndecls
->length () >= 2);
25099 /* The first version in the vector is the default decl. */
25100 memset ((void *) clones
, '\0', sizeof (clones
));
25101 clones
[CLONE_DEFAULT
] = (*fndecls
)[0];
25103 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25104 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25105 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25106 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25107 to insert the code here to do the call. */
25109 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
25111 int priority
= rs6000_clone_priority (ele
);
25112 if (!clones
[priority
])
25113 clones
[priority
] = ele
;
25116 for (ix
= CLONE_MAX
- 1; ix
>= 0; ix
--)
25119 if (TARGET_DEBUG_TARGET
)
25120 fprintf (stderr
, "dispatch_function_versions, clone %d, %s\n",
25121 ix
, get_decl_name (clones
[ix
]));
25123 *empty_bb
= add_condition_to_bb (dispatch_decl
, clones
[ix
], ix
,
25130 /* Generate the dispatching code body to dispatch multi-versioned function
25131 DECL. The target hook is called to process the "target" attributes and
25132 provide the code to dispatch the right function at run-time. NODE points
25133 to the dispatcher decl whose body will be created. */
25136 rs6000_generate_version_dispatcher_body (void *node_p
)
25139 basic_block empty_bb
;
25140 struct cgraph_node
*node
= (cgraph_node
*) node_p
;
25141 struct cgraph_function_version_info
*ninfo
= node
->function_version ();
25143 if (ninfo
->dispatcher_resolver
)
25144 return ninfo
->dispatcher_resolver
;
25146 /* node is going to be an alias, so remove the finalized bit. */
25147 node
->definition
= false;
25149 /* The first version in the chain corresponds to the default version. */
25150 ninfo
->dispatcher_resolver
= resolver
25151 = make_resolver_func (ninfo
->next
->this_node
->decl
, node
->decl
, &empty_bb
);
25153 if (TARGET_DEBUG_TARGET
)
25154 fprintf (stderr
, "rs6000_get_function_versions_dispatcher, %s\n",
25155 get_decl_name (resolver
));
25157 push_cfun (DECL_STRUCT_FUNCTION (resolver
));
25158 auto_vec
<tree
, 2> fn_ver_vec
;
25160 for (struct cgraph_function_version_info
*vinfo
= ninfo
->next
;
25162 vinfo
= vinfo
->next
)
25164 struct cgraph_node
*version
= vinfo
->this_node
;
25165 /* Check for virtual functions here again, as by this time it should
25166 have been determined if this function needs a vtable index or
25167 not. This happens for methods in derived classes that override
25168 virtual methods in base classes but are not explicitly marked as
25170 if (DECL_VINDEX (version
->decl
))
25171 sorry ("Virtual function multiversioning not supported");
25173 fn_ver_vec
.safe_push (version
->decl
);
25176 dispatch_function_versions (resolver
, &fn_ver_vec
, &empty_bb
);
25177 cgraph_edge::rebuild_edges ();
25182 /* Hook to decide if we need to scan function gimple statements to
25183 collect target specific information for inlining, and update the
25184 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25185 to predict which ISA feature is used at this time. Return true
25186 if we need to scan, otherwise return false. */
25189 rs6000_need_ipa_fn_target_info (const_tree decl
,
25190 unsigned int &info ATTRIBUTE_UNUSED
)
25192 tree target
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
25194 target
= target_option_default_node
;
25195 struct cl_target_option
*opts
= TREE_TARGET_OPTION (target
);
25197 /* See PR102059, we only handle HTM for now, so will only do
25198 the consequent scannings when HTM feature enabled. */
25199 if (opts
->x_rs6000_isa_flags
& OPTION_MASK_HTM
)
25205 /* Hook to update target specific information INFO for inlining by
25206 checking the given STMT. Return false if we don't need to scan
25207 any more, otherwise return true. */
25210 rs6000_update_ipa_fn_target_info (unsigned int &info
, const gimple
*stmt
)
25212 /* Assume inline asm can use any instruction features. */
25213 if (gimple_code (stmt
) == GIMPLE_ASM
)
25215 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25216 the only bit we care about. */
25217 info
|= RS6000_FN_TARGET_INFO_HTM
;
25220 else if (gimple_code (stmt
) == GIMPLE_CALL
)
25222 tree fndecl
= gimple_call_fndecl (stmt
);
25223 if (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_MD
))
25225 enum rs6000_gen_builtins fcode
25226 = (enum rs6000_gen_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
25227 /* HTM bifs definitely exploit HTM insns. */
25228 if (bif_is_htm (rs6000_builtin_info
[fcode
]))
25230 info
|= RS6000_FN_TARGET_INFO_HTM
;
25239 /* Hook to determine if one function can safely inline another. */
25242 rs6000_can_inline_p (tree caller
, tree callee
)
25245 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
25246 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
25248 /* If the callee has no option attributes, then it is ok to inline. */
25254 HOST_WIDE_INT caller_isa
;
25255 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
25256 HOST_WIDE_INT callee_isa
= callee_opts
->x_rs6000_isa_flags
;
25257 HOST_WIDE_INT explicit_isa
= callee_opts
->x_rs6000_isa_flags_explicit
;
25259 /* If the caller has option attributes, then use them.
25260 Otherwise, use the command line options. */
25262 caller_isa
= TREE_TARGET_OPTION (caller_tree
)->x_rs6000_isa_flags
;
25264 caller_isa
= rs6000_isa_flags
;
25266 cgraph_node
*callee_node
= cgraph_node::get (callee
);
25267 if (ipa_fn_summaries
&& ipa_fn_summaries
->get (callee_node
) != NULL
)
25269 unsigned int info
= ipa_fn_summaries
->get (callee_node
)->target_info
;
25270 if ((info
& RS6000_FN_TARGET_INFO_HTM
) == 0)
25272 callee_isa
&= ~OPTION_MASK_HTM
;
25273 explicit_isa
&= ~OPTION_MASK_HTM
;
25277 /* The callee's options must be a subset of the caller's options, i.e.
25278 a vsx function may inline an altivec function, but a no-vsx function
25279 must not inline a vsx function. However, for those options that the
25280 callee has explicitly enabled or disabled, then we must enforce that
25281 the callee's and caller's options match exactly; see PR70010. */
25282 if (((caller_isa
& callee_isa
) == callee_isa
)
25283 && (caller_isa
& explicit_isa
) == (callee_isa
& explicit_isa
))
25287 if (TARGET_DEBUG_TARGET
)
25288 fprintf (stderr
, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25289 get_decl_name (caller
), get_decl_name (callee
),
25290 (ret
? "can" : "cannot"));
25295 /* Allocate a stack temp and fixup the address so it meets the particular
25296 memory requirements (either offetable or REG+REG addressing). */
25299 rs6000_allocate_stack_temp (machine_mode mode
,
25300 bool offsettable_p
,
25303 rtx stack
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
25304 rtx addr
= XEXP (stack
, 0);
25305 int strict_p
= reload_completed
;
25307 if (!legitimate_indirect_address_p (addr
, strict_p
))
25310 && !rs6000_legitimate_offset_address_p (mode
, addr
, strict_p
, true))
25311 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25313 else if (reg_reg_p
&& !legitimate_indexed_address_p (addr
, strict_p
))
25314 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25320 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25321 convert to such a form to deal with memory reference instructions
25322 like STFIWX and LDBRX that only take reg+reg addressing. */
25325 rs6000_force_indexed_or_indirect_mem (rtx x
)
25327 machine_mode mode
= GET_MODE (x
);
25329 gcc_assert (MEM_P (x
));
25330 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x
, mode
))
25332 rtx addr
= XEXP (x
, 0);
25333 if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
25335 rtx reg
= XEXP (addr
, 0);
25336 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (x
));
25337 rtx size_rtx
= GEN_INT ((GET_CODE (addr
) == PRE_DEC
) ? -size
: size
);
25338 gcc_assert (REG_P (reg
));
25339 emit_insn (gen_add3_insn (reg
, reg
, size_rtx
));
25342 else if (GET_CODE (addr
) == PRE_MODIFY
)
25344 rtx reg
= XEXP (addr
, 0);
25345 rtx expr
= XEXP (addr
, 1);
25346 gcc_assert (REG_P (reg
));
25347 gcc_assert (GET_CODE (expr
) == PLUS
);
25348 emit_insn (gen_add3_insn (reg
, XEXP (expr
, 0), XEXP (expr
, 1)));
25352 if (GET_CODE (addr
) == PLUS
)
25354 rtx op0
= XEXP (addr
, 0);
25355 rtx op1
= XEXP (addr
, 1);
25356 op0
= force_reg (Pmode
, op0
);
25357 op1
= force_reg (Pmode
, op1
);
25358 x
= replace_equiv_address (x
, gen_rtx_PLUS (Pmode
, op0
, op1
));
25361 x
= replace_equiv_address (x
, force_reg (Pmode
, addr
));
25367 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25369 On the RS/6000, all integer constants are acceptable, most won't be valid
25370 for particular insns, though. Only easy FP constants are acceptable. */
25373 rs6000_legitimate_constant_p (machine_mode mode
, rtx x
)
25375 if (TARGET_ELF
&& tls_referenced_p (x
))
25378 if (CONST_DOUBLE_P (x
))
25379 return easy_fp_constant (x
, mode
);
25381 if (GET_CODE (x
) == CONST_VECTOR
)
25382 return easy_vector_constant (x
, mode
);
25388 /* Implement TARGET_PRECOMPUTE_TLS_P.
25390 On the AIX, TLS symbols are in the TOC, which is maintained in the
25391 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25392 must be considered legitimate constants. */
25395 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
25397 return tls_referenced_p (x
);
25402 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25405 chain_already_loaded (rtx_insn
*last
)
25407 for (; last
!= NULL
; last
= PREV_INSN (last
))
25409 if (NONJUMP_INSN_P (last
))
25411 rtx patt
= PATTERN (last
);
25413 if (GET_CODE (patt
) == SET
)
25415 rtx lhs
= XEXP (patt
, 0);
25417 if (REG_P (lhs
) && REGNO (lhs
) == STATIC_CHAIN_REGNUM
)
25425 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25428 rs6000_call_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25430 rtx func
= func_desc
;
25431 rtx toc_reg
= gen_rtx_REG (Pmode
, TOC_REGNUM
);
25432 rtx toc_load
= NULL_RTX
;
25433 rtx toc_restore
= NULL_RTX
;
25435 rtx abi_reg
= NULL_RTX
;
25439 bool is_pltseq_longcall
;
25442 tlsarg
= global_tlsarg
;
25444 /* Handle longcall attributes. */
25445 is_pltseq_longcall
= false;
25446 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25447 && GET_CODE (func_desc
) == SYMBOL_REF
)
25449 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25451 is_pltseq_longcall
= true;
25454 /* Handle indirect calls. */
25455 if (!SYMBOL_REF_P (func
)
25456 || (DEFAULT_ABI
== ABI_AIX
&& !SYMBOL_REF_FUNCTION_P (func
)))
25458 if (!rs6000_pcrel_p ())
25460 /* Save the TOC into its reserved slot before the call,
25461 and prepare to restore it after the call. */
25462 rtx stack_toc_offset
= GEN_INT (RS6000_TOC_SAVE_SLOT
);
25463 rtx stack_toc_unspec
= gen_rtx_UNSPEC (Pmode
,
25464 gen_rtvec (1, stack_toc_offset
),
25466 toc_restore
= gen_rtx_SET (toc_reg
, stack_toc_unspec
);
25468 /* Can we optimize saving the TOC in the prologue or
25469 do we need to do it at every call? */
25470 if (TARGET_SAVE_TOC_INDIRECT
&& !cfun
->calls_alloca
)
25471 cfun
->machine
->save_toc_in_prologue
= true;
25474 rtx stack_ptr
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
25475 rtx stack_toc_mem
= gen_frame_mem (Pmode
,
25476 gen_rtx_PLUS (Pmode
, stack_ptr
,
25477 stack_toc_offset
));
25478 MEM_VOLATILE_P (stack_toc_mem
) = 1;
25479 if (is_pltseq_longcall
)
25481 rtvec v
= gen_rtvec (3, toc_reg
, func_desc
, tlsarg
);
25482 rtx mark_toc_reg
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25483 emit_insn (gen_rtx_SET (stack_toc_mem
, mark_toc_reg
));
25486 emit_move_insn (stack_toc_mem
, toc_reg
);
25490 if (DEFAULT_ABI
== ABI_ELFv2
)
25492 /* A function pointer in the ELFv2 ABI is just a plain address, but
25493 the ABI requires it to be loaded into r12 before the call. */
25494 func_addr
= gen_rtx_REG (Pmode
, 12);
25495 emit_move_insn (func_addr
, func
);
25496 abi_reg
= func_addr
;
25497 /* Indirect calls via CTR are strongly preferred over indirect
25498 calls via LR, so move the address there. Needed to mark
25499 this insn for linker plt sequence editing too. */
25500 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25501 if (is_pltseq_longcall
)
25503 rtvec v
= gen_rtvec (3, abi_reg
, func_desc
, tlsarg
);
25504 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25505 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25506 v
= gen_rtvec (2, func_addr
, func_desc
);
25507 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25510 emit_move_insn (func_addr
, abi_reg
);
25514 /* A function pointer under AIX is a pointer to a data area whose
25515 first word contains the actual address of the function, whose
25516 second word contains a pointer to its TOC, and whose third word
25517 contains a value to place in the static chain register (r11).
25518 Note that if we load the static chain, our "trampoline" need
25519 not have any executable code. */
25521 /* Load up address of the actual function. */
25522 func
= force_reg (Pmode
, func
);
25523 func_addr
= gen_reg_rtx (Pmode
);
25524 emit_move_insn (func_addr
, gen_rtx_MEM (Pmode
, func
));
25526 /* Indirect calls via CTR are strongly preferred over indirect
25527 calls via LR, so move the address there. */
25528 rtx ctr_reg
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25529 emit_move_insn (ctr_reg
, func_addr
);
25530 func_addr
= ctr_reg
;
25532 /* Prepare to load the TOC of the called function. Note that the
25533 TOC load must happen immediately before the actual call so
25534 that unwinding the TOC registers works correctly. See the
25535 comment in frob_update_context. */
25536 rtx func_toc_offset
= GEN_INT (GET_MODE_SIZE (Pmode
));
25537 rtx func_toc_mem
= gen_rtx_MEM (Pmode
,
25538 gen_rtx_PLUS (Pmode
, func
,
25540 toc_load
= gen_rtx_USE (VOIDmode
, func_toc_mem
);
25542 /* If we have a static chain, load it up. But, if the call was
25543 originally direct, the 3rd word has not been written since no
25544 trampoline has been built, so we ought not to load it, lest we
25545 override a static chain value. */
25546 if (!(GET_CODE (func_desc
) == SYMBOL_REF
25547 && SYMBOL_REF_FUNCTION_P (func_desc
))
25548 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25549 && !chain_already_loaded (get_current_sequence ()->next
->last
))
25551 rtx sc_reg
= gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
25552 rtx func_sc_offset
= GEN_INT (2 * GET_MODE_SIZE (Pmode
));
25553 rtx func_sc_mem
= gen_rtx_MEM (Pmode
,
25554 gen_rtx_PLUS (Pmode
, func
,
25556 emit_move_insn (sc_reg
, func_sc_mem
);
25563 /* No TOC register needed for calls from PC-relative callers. */
25564 if (!rs6000_pcrel_p ())
25565 /* Direct calls use the TOC: for local calls, the callee will
25566 assume the TOC register is set; for non-local calls, the
25567 PLT stub needs the TOC register. */
25572 /* Create the call. */
25573 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25574 if (value
!= NULL_RTX
)
25575 call
[0] = gen_rtx_SET (value
, call
[0]);
25576 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
25580 call
[n_call
++] = toc_load
;
25582 call
[n_call
++] = toc_restore
;
25584 call
[n_call
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
25586 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n_call
, call
));
25587 insn
= emit_call_insn (insn
);
25589 /* Mention all registers defined by the ABI to hold information
25590 as uses in CALL_INSN_FUNCTION_USAGE. */
25592 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
25595 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25598 rs6000_sibcall_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25602 rtx r12
= NULL_RTX
;
25603 rtx func_addr
= func_desc
;
25605 gcc_assert (INTVAL (cookie
) == 0);
25608 tlsarg
= global_tlsarg
;
25610 /* For ELFv2, r12 and CTR need to hold the function address
25611 for an indirect call. */
25612 if (GET_CODE (func_desc
) != SYMBOL_REF
&& DEFAULT_ABI
== ABI_ELFv2
)
25614 r12
= gen_rtx_REG (Pmode
, 12);
25615 emit_move_insn (r12
, func_desc
);
25616 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25617 emit_move_insn (func_addr
, r12
);
25620 /* Create the call. */
25621 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25622 if (value
!= NULL_RTX
)
25623 call
[0] = gen_rtx_SET (value
, call
[0]);
25625 call
[1] = simple_return_rtx
;
25627 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (2, call
));
25628 insn
= emit_call_insn (insn
);
25630 /* Note use of the TOC register. */
25631 if (!rs6000_pcrel_p ())
25632 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
25633 gen_rtx_REG (Pmode
, TOC_REGNUM
));
25635 /* Note use of r12. */
25637 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), r12
);
25640 /* Expand code to perform a call under the SYSV4 ABI. */
25643 rs6000_call_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25645 rtx func
= func_desc
;
25649 rtx abi_reg
= NULL_RTX
;
25653 tlsarg
= global_tlsarg
;
25655 /* Handle longcall attributes. */
25656 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25657 && GET_CODE (func_desc
) == SYMBOL_REF
)
25659 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25660 /* If the longcall was implemented as an inline PLT call using
25661 PLT unspecs then func will be REG:r11. If not, func will be
25662 a pseudo reg. The inline PLT call sequence supports lazy
25663 linking (and longcalls to functions in dlopen'd libraries).
25664 The other style of longcalls don't. The lazy linking entry
25665 to the dynamic symbol resolver requires r11 be the function
25666 address (as it is for linker generated PLT stubs). Ensure
25667 r11 stays valid to the bctrl by marking r11 used by the call. */
25672 /* Handle indirect calls. */
25673 if (GET_CODE (func
) != SYMBOL_REF
)
25675 func
= force_reg (Pmode
, func
);
25677 /* Indirect calls via CTR are strongly preferred over indirect
25678 calls via LR, so move the address there. That can't be left
25679 to reload because we want to mark every instruction in an
25680 inline PLT call sequence with a reloc, enabling the linker to
25681 edit the sequence back to a direct call when that makes sense. */
25682 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25685 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
25686 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25687 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25688 v
= gen_rtvec (2, func_addr
, func_desc
);
25689 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25692 emit_move_insn (func_addr
, func
);
25697 /* Create the call. */
25698 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25699 if (value
!= NULL_RTX
)
25700 call
[0] = gen_rtx_SET (value
, call
[0]);
25702 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
25704 if (TARGET_SECURE_PLT
25706 && GET_CODE (func_addr
) == SYMBOL_REF
25707 && !SYMBOL_REF_LOCAL_P (func_addr
))
25708 call
[n
++] = gen_rtx_USE (VOIDmode
, pic_offset_table_rtx
);
25710 call
[n
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
25712 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n
, call
));
25713 insn
= emit_call_insn (insn
);
25715 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
25718 /* Expand code to perform a sibling call under the SysV4 ABI. */
25721 rs6000_sibcall_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25723 rtx func
= func_desc
;
25727 rtx abi_reg
= NULL_RTX
;
25730 tlsarg
= global_tlsarg
;
25732 /* Handle longcall attributes. */
25733 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25734 && GET_CODE (func_desc
) == SYMBOL_REF
)
25736 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25737 /* If the longcall was implemented as an inline PLT call using
25738 PLT unspecs then func will be REG:r11. If not, func will be
25739 a pseudo reg. The inline PLT call sequence supports lazy
25740 linking (and longcalls to functions in dlopen'd libraries).
25741 The other style of longcalls don't. The lazy linking entry
25742 to the dynamic symbol resolver requires r11 be the function
25743 address (as it is for linker generated PLT stubs). Ensure
25744 r11 stays valid to the bctr by marking r11 used by the call. */
25749 /* Handle indirect calls. */
25750 if (GET_CODE (func
) != SYMBOL_REF
)
25752 func
= force_reg (Pmode
, func
);
25754 /* Indirect sibcalls must go via CTR. That can't be left to
25755 reload because we want to mark every instruction in an inline
25756 PLT call sequence with a reloc, enabling the linker to edit
25757 the sequence back to a direct call when that makes sense. */
25758 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25761 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
25762 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25763 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25764 v
= gen_rtvec (2, func_addr
, func_desc
);
25765 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25768 emit_move_insn (func_addr
, func
);
25773 /* Create the call. */
25774 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25775 if (value
!= NULL_RTX
)
25776 call
[0] = gen_rtx_SET (value
, call
[0]);
25778 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
25779 call
[2] = simple_return_rtx
;
25781 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
25782 insn
= emit_call_insn (insn
);
25784 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
25789 /* Expand code to perform a call under the Darwin ABI.
25790 Modulo handling of mlongcall, this is much the same as sysv.
25791 if/when the longcall optimisation is removed, we could drop this
25792 code and use the sysv case (taking care to avoid the tls stuff).
25794 We can use this for sibcalls too, if needed. */
25797 rs6000_call_darwin_1 (rtx value
, rtx func_desc
, rtx tlsarg
,
25798 rtx cookie
, bool sibcall
)
25800 rtx func
= func_desc
;
25804 int cookie_val
= INTVAL (cookie
);
25805 bool make_island
= false;
25807 /* Handle longcall attributes, there are two cases for Darwin:
25808 1) Newer linkers are capable of synthesising any branch islands needed.
25809 2) We need a helper branch island synthesised by the compiler.
25810 The second case has mostly been retired and we don't use it for m64.
25811 In fact, it's is an optimisation, we could just indirect as sysv does..
25812 ... however, backwards compatibility for now.
25813 If we're going to use this, then we need to keep the CALL_LONG bit set,
25814 so that we can pick up the special insn form later. */
25815 if ((cookie_val
& CALL_LONG
) != 0
25816 && GET_CODE (func_desc
) == SYMBOL_REF
)
25818 /* FIXME: the longcall opt should not hang off this flag, it is most
25819 likely incorrect for kernel-mode code-generation. */
25820 if (darwin_symbol_stubs
&& TARGET_32BIT
)
25821 make_island
= true; /* Do nothing yet, retain the CALL_LONG flag. */
25824 /* The linker is capable of doing this, but the user explicitly
25825 asked for -mlongcall, so we'll do the 'normal' version. */
25826 func
= rs6000_longcall_ref (func_desc
, NULL_RTX
);
25827 cookie_val
&= ~CALL_LONG
; /* Handled, zap it. */
25831 /* Handle indirect calls. */
25832 if (GET_CODE (func
) != SYMBOL_REF
)
25834 func
= force_reg (Pmode
, func
);
25836 /* Indirect calls via CTR are strongly preferred over indirect
25837 calls via LR, and are required for indirect sibcalls, so move
25838 the address there. */
25839 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25840 emit_move_insn (func_addr
, func
);
25845 /* Create the call. */
25846 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25847 if (value
!= NULL_RTX
)
25848 call
[0] = gen_rtx_SET (value
, call
[0]);
25850 call
[1] = gen_rtx_USE (VOIDmode
, GEN_INT (cookie_val
));
25853 call
[2] = simple_return_rtx
;
25855 call
[2] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
25857 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
25858 insn
= emit_call_insn (insn
);
25859 /* Now we have the debug info in the insn, we can set up the branch island
25860 if we're using one. */
25863 tree funname
= get_identifier (XSTR (func_desc
, 0));
25865 if (no_previous_def (funname
))
25867 rtx label_rtx
= gen_label_rtx ();
25868 char *label_buf
, temp_buf
[256];
25869 ASM_GENERATE_INTERNAL_LABEL (temp_buf
, "L",
25870 CODE_LABEL_NUMBER (label_rtx
));
25871 label_buf
= temp_buf
[0] == '*' ? temp_buf
+ 1 : temp_buf
;
25872 tree labelname
= get_identifier (label_buf
);
25873 add_compiler_branch_island (labelname
, funname
,
25874 insn_line ((const rtx_insn
*)insn
));
25881 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
25882 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
25885 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, false);
25893 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
25894 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
25897 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, true);
25903 /* Return whether we should generate PC-relative code for FNDECL. */
25905 rs6000_fndecl_pcrel_p (const_tree fndecl
)
25907 if (DEFAULT_ABI
!= ABI_ELFv2
)
25910 struct cl_target_option
*opts
= target_opts_for_fn (fndecl
);
25912 return ((opts
->x_rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
25913 && TARGET_CMODEL
== CMODEL_MEDIUM
);
25916 /* Return whether we should generate PC-relative code for *FN. */
25918 rs6000_function_pcrel_p (struct function
*fn
)
25920 if (DEFAULT_ABI
!= ABI_ELFv2
)
25923 /* Optimize usual case. */
25925 return ((rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
25926 && TARGET_CMODEL
== CMODEL_MEDIUM
);
25928 return rs6000_fndecl_pcrel_p (fn
->decl
);
25931 /* Return whether we should generate PC-relative code for the current
25936 return (DEFAULT_ABI
== ABI_ELFv2
25937 && (rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
25938 && TARGET_CMODEL
== CMODEL_MEDIUM
);
25942 /* Given an address (ADDR), a mode (MODE), and what the format of the
25943 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
25944 for the address. */
25947 address_to_insn_form (rtx addr
,
25949 enum non_prefixed_form non_prefixed_format
)
25951 /* Single register is easy. */
25952 if (REG_P (addr
) || SUBREG_P (addr
))
25953 return INSN_FORM_BASE_REG
;
25955 /* If the non prefixed instruction format doesn't support offset addressing,
25956 make sure only indexed addressing is allowed.
25958 We special case SDmode so that the register allocator does not try to move
25959 SDmode through GPR registers, but instead uses the 32-bit integer load and
25960 store instructions for the floating point registers. */
25961 if (non_prefixed_format
== NON_PREFIXED_X
|| (mode
== SDmode
&& TARGET_DFP
))
25963 if (GET_CODE (addr
) != PLUS
)
25964 return INSN_FORM_BAD
;
25966 rtx op0
= XEXP (addr
, 0);
25967 rtx op1
= XEXP (addr
, 1);
25968 if (!REG_P (op0
) && !SUBREG_P (op0
))
25969 return INSN_FORM_BAD
;
25971 if (!REG_P (op1
) && !SUBREG_P (op1
))
25972 return INSN_FORM_BAD
;
25974 return INSN_FORM_X
;
25977 /* Deal with update forms. */
25978 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
25979 return INSN_FORM_UPDATE
;
25981 /* Handle PC-relative symbols and labels. Check for both local and
25982 external symbols. Assume labels are always local. TLS symbols
25983 are not PC-relative for rs6000. */
25986 if (LABEL_REF_P (addr
))
25987 return INSN_FORM_PCREL_LOCAL
;
25989 if (SYMBOL_REF_P (addr
) && !SYMBOL_REF_TLS_MODEL (addr
))
25991 if (!SYMBOL_REF_LOCAL_P (addr
))
25992 return INSN_FORM_PCREL_EXTERNAL
;
25994 return INSN_FORM_PCREL_LOCAL
;
25998 if (GET_CODE (addr
) == CONST
)
25999 addr
= XEXP (addr
, 0);
26001 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26002 if (GET_CODE (addr
) == LO_SUM
)
26003 return INSN_FORM_LO_SUM
;
26005 /* Everything below must be an offset address of some form. */
26006 if (GET_CODE (addr
) != PLUS
)
26007 return INSN_FORM_BAD
;
26009 rtx op0
= XEXP (addr
, 0);
26010 rtx op1
= XEXP (addr
, 1);
26012 /* Check for indexed addresses. */
26013 if (REG_P (op1
) || SUBREG_P (op1
))
26015 if (REG_P (op0
) || SUBREG_P (op0
))
26016 return INSN_FORM_X
;
26018 return INSN_FORM_BAD
;
26021 if (!CONST_INT_P (op1
))
26022 return INSN_FORM_BAD
;
26024 HOST_WIDE_INT offset
= INTVAL (op1
);
26025 if (!SIGNED_INTEGER_34BIT_P (offset
))
26026 return INSN_FORM_BAD
;
26028 /* Check for local and external PC-relative addresses. Labels are always
26029 local. TLS symbols are not PC-relative for rs6000. */
26032 if (LABEL_REF_P (op0
))
26033 return INSN_FORM_PCREL_LOCAL
;
26035 if (SYMBOL_REF_P (op0
) && !SYMBOL_REF_TLS_MODEL (op0
))
26037 if (!SYMBOL_REF_LOCAL_P (op0
))
26038 return INSN_FORM_PCREL_EXTERNAL
;
26040 return INSN_FORM_PCREL_LOCAL
;
26044 /* If it isn't PC-relative, the address must use a base register. */
26045 if (!REG_P (op0
) && !SUBREG_P (op0
))
26046 return INSN_FORM_BAD
;
26048 /* Large offsets must be prefixed. */
26049 if (!SIGNED_INTEGER_16BIT_P (offset
))
26051 if (TARGET_PREFIXED
)
26052 return INSN_FORM_PREFIXED_NUMERIC
;
26054 return INSN_FORM_BAD
;
26057 /* We have a 16-bit offset, see what default instruction format to use. */
26058 if (non_prefixed_format
== NON_PREFIXED_DEFAULT
)
26060 unsigned size
= GET_MODE_SIZE (mode
);
26062 /* On 64-bit systems, assume 64-bit integers need to use DS form
26063 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26064 (for LXV and STXV). TImode is problematical in that its normal usage
26065 is expected to be GPRs where it wants a DS instruction format, but if
26066 it goes into the vector registers, it wants a DQ instruction
26068 if (TARGET_POWERPC64
&& size
>= 8 && GET_MODE_CLASS (mode
) == MODE_INT
)
26069 non_prefixed_format
= NON_PREFIXED_DS
;
26071 else if (TARGET_VSX
&& size
>= 16
26072 && (VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
)))
26073 non_prefixed_format
= NON_PREFIXED_DQ
;
26076 non_prefixed_format
= NON_PREFIXED_D
;
26079 /* Classify the D/DS/DQ-form addresses. */
26080 switch (non_prefixed_format
)
26082 /* Instruction format D, all 16 bits are valid. */
26083 case NON_PREFIXED_D
:
26084 return INSN_FORM_D
;
26086 /* Instruction format DS, bottom 2 bits must be 0. */
26087 case NON_PREFIXED_DS
:
26088 if ((offset
& 3) == 0)
26089 return INSN_FORM_DS
;
26091 else if (TARGET_PREFIXED
)
26092 return INSN_FORM_PREFIXED_NUMERIC
;
26095 return INSN_FORM_BAD
;
26097 /* Instruction format DQ, bottom 4 bits must be 0. */
26098 case NON_PREFIXED_DQ
:
26099 if ((offset
& 15) == 0)
26100 return INSN_FORM_DQ
;
26102 else if (TARGET_PREFIXED
)
26103 return INSN_FORM_PREFIXED_NUMERIC
;
26106 return INSN_FORM_BAD
;
26112 return INSN_FORM_BAD
;
26115 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26116 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26117 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26118 a D-form or DS-form instruction. X-form and base_reg are always
26121 address_is_non_pfx_d_or_x (rtx addr
, machine_mode mode
,
26122 enum non_prefixed_form non_prefixed_format
)
26124 enum insn_form result_form
;
26126 result_form
= address_to_insn_form (addr
, mode
, non_prefixed_format
);
26128 switch (non_prefixed_format
)
26130 case NON_PREFIXED_D
:
26131 switch (result_form
)
26136 case INSN_FORM_BASE_REG
:
26142 case NON_PREFIXED_DS
:
26143 switch (result_form
)
26147 case INSN_FORM_BASE_REG
:
26159 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26160 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26161 the load or store with the PCREL_OPT optimization to make sure it is an
26162 instruction that can be optimized.
26164 We need to specify the MODE separately from the REG to allow for loads that
26165 include zero/sign/float extension. */
26168 pcrel_opt_valid_mem_p (rtx reg
, machine_mode mode
, rtx mem
)
26170 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26171 PCREL_OPT optimization. */
26172 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mode
);
26173 if (non_prefixed
== NON_PREFIXED_X
)
26176 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26177 rtx addr
= XEXP (mem
, 0);
26178 enum insn_form iform
= address_to_insn_form (addr
, mode
, non_prefixed
);
26179 return (iform
== INSN_FORM_BASE_REG
26180 || iform
== INSN_FORM_D
26181 || iform
== INSN_FORM_DS
26182 || iform
== INSN_FORM_DQ
);
26185 /* Helper function to see if we're potentially looking at lfs/stfs.
26186 - PARALLEL containing a SET and a CLOBBER
26188 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26189 - CLOBBER is a V4SF
26191 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26196 is_lfs_stfs_insn (rtx_insn
*insn
)
26198 rtx pattern
= PATTERN (insn
);
26199 if (GET_CODE (pattern
) != PARALLEL
)
26202 /* This should be a parallel with exactly one set and one clobber. */
26203 if (XVECLEN (pattern
, 0) != 2)
26206 rtx set
= XVECEXP (pattern
, 0, 0);
26207 if (GET_CODE (set
) != SET
)
26210 rtx clobber
= XVECEXP (pattern
, 0, 1);
26211 if (GET_CODE (clobber
) != CLOBBER
)
26214 /* All we care is that the destination of the SET is a mem:SI,
26215 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26216 should be a scratch:V4SF. */
26218 rtx dest
= SET_DEST (set
);
26219 rtx src
= SET_SRC (set
);
26220 rtx scratch
= SET_DEST (clobber
);
26222 if (GET_CODE (src
) != UNSPEC
)
26226 if (XINT (src
, 1) == UNSPEC_SI_FROM_SF
26227 && GET_CODE (dest
) == MEM
&& GET_MODE (dest
) == SImode
26228 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == V4SFmode
)
26232 if (XINT (src
, 1) == UNSPEC_SF_FROM_SI
26233 && GET_CODE (dest
) == REG
&& GET_MODE (dest
) == SFmode
26234 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == DImode
)
26240 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26241 instruction format (D/DS/DQ) used for offset memory. */
26243 enum non_prefixed_form
26244 reg_to_non_prefixed (rtx reg
, machine_mode mode
)
26246 /* If it isn't a register, use the defaults. */
26247 if (!REG_P (reg
) && !SUBREG_P (reg
))
26248 return NON_PREFIXED_DEFAULT
;
26250 unsigned int r
= reg_or_subregno (reg
);
26252 /* If we have a pseudo, use the default instruction format. */
26253 if (!HARD_REGISTER_NUM_P (r
))
26254 return NON_PREFIXED_DEFAULT
;
26256 unsigned size
= GET_MODE_SIZE (mode
);
26258 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26259 128-bit floating point, and 128-bit integers. Before power9, only indexed
26260 addressing was available for vectors. */
26261 if (FP_REGNO_P (r
))
26263 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26264 return NON_PREFIXED_D
;
26267 return NON_PREFIXED_X
;
26269 else if (TARGET_VSX
&& size
>= 16
26270 && (VECTOR_MODE_P (mode
)
26271 || VECTOR_ALIGNMENT_P (mode
)
26272 || mode
== TImode
|| mode
== CTImode
))
26273 return (TARGET_P9_VECTOR
) ? NON_PREFIXED_DQ
: NON_PREFIXED_X
;
26276 return NON_PREFIXED_DEFAULT
;
26279 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26280 128-bit floating point, and 128-bit integers. Before power9, only indexed
26281 addressing was available. */
26282 else if (ALTIVEC_REGNO_P (r
))
26284 if (!TARGET_P9_VECTOR
)
26285 return NON_PREFIXED_X
;
26287 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26288 return NON_PREFIXED_DS
;
26291 return NON_PREFIXED_X
;
26293 else if (TARGET_VSX
&& size
>= 16
26294 && (VECTOR_MODE_P (mode
)
26295 || VECTOR_ALIGNMENT_P (mode
)
26296 || mode
== TImode
|| mode
== CTImode
))
26297 return NON_PREFIXED_DQ
;
26300 return NON_PREFIXED_DEFAULT
;
26303 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26304 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26305 through the GPR registers for memory operations. */
26306 else if (TARGET_POWERPC64
&& size
>= 8)
26307 return NON_PREFIXED_DS
;
26309 return NON_PREFIXED_D
;
26313 /* Whether a load instruction is a prefixed instruction. This is called from
26314 the prefixed attribute processing. */
26317 prefixed_load_p (rtx_insn
*insn
)
26319 /* Validate the insn to make sure it is a normal load insn. */
26320 extract_insn_cached (insn
);
26321 if (recog_data
.n_operands
< 2)
26324 rtx reg
= recog_data
.operand
[0];
26325 rtx mem
= recog_data
.operand
[1];
26327 if (!REG_P (reg
) && !SUBREG_P (reg
))
26333 /* Prefixed load instructions do not support update or indexed forms. */
26334 if (get_attr_indexed (insn
) == INDEXED_YES
26335 || get_attr_update (insn
) == UPDATE_YES
)
26338 /* LWA uses the DS format instead of the D format that LWZ uses. */
26339 enum non_prefixed_form non_prefixed
;
26340 machine_mode reg_mode
= GET_MODE (reg
);
26341 machine_mode mem_mode
= GET_MODE (mem
);
26343 if (mem_mode
== SImode
&& reg_mode
== DImode
26344 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
26345 non_prefixed
= NON_PREFIXED_DS
;
26348 non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26350 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26351 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, NON_PREFIXED_DEFAULT
);
26353 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, non_prefixed
);
26356 /* Whether a store instruction is a prefixed instruction. This is called from
26357 the prefixed attribute processing. */
26360 prefixed_store_p (rtx_insn
*insn
)
26362 /* Validate the insn to make sure it is a normal store insn. */
26363 extract_insn_cached (insn
);
26364 if (recog_data
.n_operands
< 2)
26367 rtx mem
= recog_data
.operand
[0];
26368 rtx reg
= recog_data
.operand
[1];
26370 if (!REG_P (reg
) && !SUBREG_P (reg
))
26376 /* Prefixed store instructions do not support update or indexed forms. */
26377 if (get_attr_indexed (insn
) == INDEXED_YES
26378 || get_attr_update (insn
) == UPDATE_YES
)
26381 machine_mode mem_mode
= GET_MODE (mem
);
26382 rtx addr
= XEXP (mem
, 0);
26383 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26385 /* Need to make sure we aren't looking at a stfs which doesn't look
26386 like the other things reg_to_non_prefixed/address_is_prefixed
26388 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26389 return address_is_prefixed (addr
, mem_mode
, NON_PREFIXED_DEFAULT
);
26391 return address_is_prefixed (addr
, mem_mode
, non_prefixed
);
26394 /* Whether a load immediate or add instruction is a prefixed instruction. This
26395 is called from the prefixed attribute processing. */
26398 prefixed_paddi_p (rtx_insn
*insn
)
26400 rtx set
= single_set (insn
);
26404 rtx dest
= SET_DEST (set
);
26405 rtx src
= SET_SRC (set
);
26407 if (!REG_P (dest
) && !SUBREG_P (dest
))
26410 /* Is this a load immediate that can't be done with a simple ADDI or
26412 if (CONST_INT_P (src
))
26413 return (satisfies_constraint_eI (src
)
26414 && !satisfies_constraint_I (src
)
26415 && !satisfies_constraint_L (src
));
26417 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26419 if (GET_CODE (src
) == PLUS
)
26421 rtx op1
= XEXP (src
, 1);
26423 return (CONST_INT_P (op1
)
26424 && satisfies_constraint_eI (op1
)
26425 && !satisfies_constraint_I (op1
)
26426 && !satisfies_constraint_L (op1
));
26429 /* If not, is it a load of a PC-relative address? */
26430 if (!TARGET_PCREL
|| GET_MODE (dest
) != Pmode
)
26433 if (!SYMBOL_REF_P (src
) && !LABEL_REF_P (src
) && GET_CODE (src
) != CONST
)
26436 enum insn_form iform
= address_to_insn_form (src
, Pmode
,
26437 NON_PREFIXED_DEFAULT
);
26439 return (iform
== INSN_FORM_PCREL_EXTERNAL
|| iform
== INSN_FORM_PCREL_LOCAL
);
26442 /* Whether the next instruction needs a 'p' prefix issued before the
26443 instruction is printed out. */
26444 static bool prepend_p_to_next_insn
;
26446 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26447 outputting the assembler code. On the PowerPC, we remember if the current
26448 insn is a prefixed insn where we need to emit a 'p' before the insn.
26450 In addition, if the insn is part of a PC-relative reference to an external
26451 label optimization, this is recorded also. */
26453 rs6000_final_prescan_insn (rtx_insn
*insn
, rtx
[], int)
26455 prepend_p_to_next_insn
= (get_attr_maybe_prefixed (insn
)
26456 == MAYBE_PREFIXED_YES
26457 && get_attr_prefixed (insn
) == PREFIXED_YES
);
26461 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26462 We use it to emit a 'p' for prefixed insns that is set in
26463 FINAL_PRESCAN_INSN. */
26465 rs6000_asm_output_opcode (FILE *stream
)
26467 if (prepend_p_to_next_insn
)
26469 fprintf (stream
, "p");
26471 /* Reset the flag in the case where there are separate insn lines in the
26472 sequence, so the 'p' is only emitted for the first line. This shows up
26473 when we are doing the PCREL_OPT optimization, in that the label created
26474 with %r<n> would have a leading 'p' printed. */
26475 prepend_p_to_next_insn
= false;
26481 /* Emit the relocation to tie the next instruction to a previous instruction
26482 that loads up an external address. This is used to do the PCREL_OPT
26483 optimization. Note, the label is generated after the PLD of the got
26484 pc-relative address to allow for the assembler to insert NOPs before the PLD
26485 instruction. The operand is a constant integer that is the label
26489 output_pcrel_opt_reloc (rtx label_num
)
26491 rtx operands
[1] = { label_num
};
26492 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26496 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26497 should be adjusted to reflect any required changes. This macro is used when
26498 there is some systematic length adjustment required that would be difficult
26499 to express in the length attribute.
26501 In the PowerPC, we use this to adjust the length of an instruction if one or
26502 more prefixed instructions are generated, using the attribute
26503 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26504 hardware requires that a prefied instruciton does not cross a 64-byte
26505 boundary. This means the compiler has to assume the length of the first
26506 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26507 already set for the non-prefixed instruction, we just need to udpate for the
26511 rs6000_adjust_insn_length (rtx_insn
*insn
, int length
)
26513 if (TARGET_PREFIXED
&& NONJUMP_INSN_P (insn
))
26515 rtx pattern
= PATTERN (insn
);
26516 if (GET_CODE (pattern
) != USE
&& GET_CODE (pattern
) != CLOBBER
26517 && get_attr_prefixed (insn
) == PREFIXED_YES
)
26519 int num_prefixed
= get_attr_max_prefixed_insns (insn
);
26520 length
+= 4 * (num_prefixed
+ 1);
26528 #ifdef HAVE_GAS_HIDDEN
26529 # define USE_HIDDEN_LINKONCE 1
26531 # define USE_HIDDEN_LINKONCE 0
26534 /* Fills in the label name that should be used for a 476 link stack thunk. */
26537 get_ppc476_thunk_name (char name
[32])
26539 gcc_assert (TARGET_LINK_STACK
);
26541 if (USE_HIDDEN_LINKONCE
)
26542 sprintf (name
, "__ppc476.get_thunk");
26544 ASM_GENERATE_INTERNAL_LABEL (name
, "LPPC476_", 0);
26547 /* This function emits the simple thunk routine that is used to preserve
26548 the link stack on the 476 cpu. */
26550 static void rs6000_code_end (void) ATTRIBUTE_UNUSED
;
26552 rs6000_code_end (void)
26557 if (!TARGET_LINK_STACK
)
26560 get_ppc476_thunk_name (name
);
26562 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
, get_identifier (name
),
26563 build_function_type_list (void_type_node
, NULL_TREE
));
26564 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
26565 NULL_TREE
, void_type_node
);
26566 TREE_PUBLIC (decl
) = 1;
26567 TREE_STATIC (decl
) = 1;
26570 if (USE_HIDDEN_LINKONCE
&& !TARGET_XCOFF
)
26572 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
26573 targetm
.asm_out
.unique_section (decl
, 0);
26574 switch_to_section (get_named_section (decl
, NULL
, 0));
26575 DECL_WEAK (decl
) = 1;
26576 ASM_WEAKEN_DECL (asm_out_file
, decl
, name
, 0);
26577 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
26578 targetm
.asm_out
.assemble_visibility (decl
, VISIBILITY_HIDDEN
);
26579 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
26584 switch_to_section (text_section
);
26585 ASM_OUTPUT_LABEL (asm_out_file
, name
);
26588 DECL_INITIAL (decl
) = make_node (BLOCK
);
26589 current_function_decl
= decl
;
26590 allocate_struct_function (decl
, false);
26591 init_function_start (decl
);
26592 first_function_block_is_cold
= false;
26593 /* Make sure unwind info is emitted for the thunk if needed. */
26594 final_start_function (emit_barrier (), asm_out_file
, 1);
26596 fputs ("\tblr\n", asm_out_file
);
26598 final_end_function ();
26599 init_insn_lengths ();
26600 free_after_compilation (cfun
);
26602 current_function_decl
= NULL
;
26605 /* Add r30 to hard reg set if the prologue sets it up and it is not
26606 pic_offset_table_rtx. */
26609 rs6000_set_up_by_prologue (struct hard_reg_set_container
*set
)
26611 if (!TARGET_SINGLE_PIC_BASE
26613 && TARGET_MINIMAL_TOC
26614 && !constant_pool_empty_p ())
26615 add_to_hard_reg_set (&set
->set
, Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
26616 if (cfun
->machine
->split_stack_argp_used
)
26617 add_to_hard_reg_set (&set
->set
, Pmode
, 12);
26619 /* Make sure the hard reg set doesn't include r2, which was possibly added
26620 via PIC_OFFSET_TABLE_REGNUM. */
26622 remove_from_hard_reg_set (&set
->set
, Pmode
, TOC_REGNUM
);
26626 /* Helper function for rs6000_split_logical to emit a logical instruction after
26627 spliting the operation to single GPR registers.
26629 DEST is the destination register.
26630 OP1 and OP2 are the input source registers.
26631 CODE is the base operation (AND, IOR, XOR, NOT).
26632 MODE is the machine mode.
26633 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26634 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26635 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26638 rs6000_split_logical_inner (rtx dest
,
26641 enum rtx_code code
,
26643 bool complement_final_p
,
26644 bool complement_op1_p
,
26645 bool complement_op2_p
)
26649 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26650 if (op2
&& CONST_INT_P (op2
)
26651 && (mode
== SImode
|| (mode
== DImode
&& TARGET_POWERPC64
))
26652 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
26654 HOST_WIDE_INT mask
= GET_MODE_MASK (mode
);
26655 HOST_WIDE_INT value
= INTVAL (op2
) & mask
;
26657 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26662 emit_insn (gen_rtx_SET (dest
, const0_rtx
));
26666 else if (value
== mask
)
26668 if (!rtx_equal_p (dest
, op1
))
26669 emit_insn (gen_rtx_SET (dest
, op1
));
26674 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26675 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26676 else if (code
== IOR
|| code
== XOR
)
26680 if (!rtx_equal_p (dest
, op1
))
26681 emit_insn (gen_rtx_SET (dest
, op1
));
26687 if (code
== AND
&& mode
== SImode
26688 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
26690 emit_insn (gen_andsi3 (dest
, op1
, op2
));
26694 if (complement_op1_p
)
26695 op1
= gen_rtx_NOT (mode
, op1
);
26697 if (complement_op2_p
)
26698 op2
= gen_rtx_NOT (mode
, op2
);
26700 /* For canonical RTL, if only one arm is inverted it is the first. */
26701 if (!complement_op1_p
&& complement_op2_p
)
26702 std::swap (op1
, op2
);
26704 bool_rtx
= ((code
== NOT
)
26705 ? gen_rtx_NOT (mode
, op1
)
26706 : gen_rtx_fmt_ee (code
, mode
, op1
, op2
));
26708 if (complement_final_p
)
26709 bool_rtx
= gen_rtx_NOT (mode
, bool_rtx
);
26711 emit_insn (gen_rtx_SET (dest
, bool_rtx
));
26714 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
26715 operations are split immediately during RTL generation to allow for more
26716 optimizations of the AND/IOR/XOR.
26718 OPERANDS is an array containing the destination and two input operands.
26719 CODE is the base operation (AND, IOR, XOR, NOT).
26720 MODE is the machine mode.
26721 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26722 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26723 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26724 CLOBBER_REG is either NULL or a scratch register of type CC to allow
26725 formation of the AND instructions. */
26728 rs6000_split_logical_di (rtx operands
[3],
26729 enum rtx_code code
,
26730 bool complement_final_p
,
26731 bool complement_op1_p
,
26732 bool complement_op2_p
)
26734 const HOST_WIDE_INT lower_32bits
= HOST_WIDE_INT_C(0xffffffff);
26735 const HOST_WIDE_INT upper_32bits
= ~ lower_32bits
;
26736 const HOST_WIDE_INT sign_bit
= HOST_WIDE_INT_C(0x80000000);
26737 enum hi_lo
{ hi
= 0, lo
= 1 };
26738 rtx op0_hi_lo
[2], op1_hi_lo
[2], op2_hi_lo
[2];
26741 op0_hi_lo
[hi
] = gen_highpart (SImode
, operands
[0]);
26742 op1_hi_lo
[hi
] = gen_highpart (SImode
, operands
[1]);
26743 op0_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[0]);
26744 op1_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[1]);
26747 op2_hi_lo
[hi
] = op2_hi_lo
[lo
] = NULL_RTX
;
26750 if (!CONST_INT_P (operands
[2]))
26752 op2_hi_lo
[hi
] = gen_highpart_mode (SImode
, DImode
, operands
[2]);
26753 op2_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[2]);
26757 HOST_WIDE_INT value
= INTVAL (operands
[2]);
26758 HOST_WIDE_INT value_hi_lo
[2];
26760 gcc_assert (!complement_final_p
);
26761 gcc_assert (!complement_op1_p
);
26762 gcc_assert (!complement_op2_p
);
26764 value_hi_lo
[hi
] = value
>> 32;
26765 value_hi_lo
[lo
] = value
& lower_32bits
;
26767 for (i
= 0; i
< 2; i
++)
26769 HOST_WIDE_INT sub_value
= value_hi_lo
[i
];
26771 if (sub_value
& sign_bit
)
26772 sub_value
|= upper_32bits
;
26774 op2_hi_lo
[i
] = GEN_INT (sub_value
);
26776 /* If this is an AND instruction, check to see if we need to load
26777 the value in a register. */
26778 if (code
== AND
&& sub_value
!= -1 && sub_value
!= 0
26779 && !and_operand (op2_hi_lo
[i
], SImode
))
26780 op2_hi_lo
[i
] = force_reg (SImode
, op2_hi_lo
[i
]);
26785 for (i
= 0; i
< 2; i
++)
26787 /* Split large IOR/XOR operations. */
26788 if ((code
== IOR
|| code
== XOR
)
26789 && CONST_INT_P (op2_hi_lo
[i
])
26790 && !complement_final_p
26791 && !complement_op1_p
26792 && !complement_op2_p
26793 && !logical_const_operand (op2_hi_lo
[i
], SImode
))
26795 HOST_WIDE_INT value
= INTVAL (op2_hi_lo
[i
]);
26796 HOST_WIDE_INT hi_16bits
= value
& HOST_WIDE_INT_C(0xffff0000);
26797 HOST_WIDE_INT lo_16bits
= value
& HOST_WIDE_INT_C(0x0000ffff);
26798 rtx tmp
= gen_reg_rtx (SImode
);
26800 /* Make sure the constant is sign extended. */
26801 if ((hi_16bits
& sign_bit
) != 0)
26802 hi_16bits
|= upper_32bits
;
26804 rs6000_split_logical_inner (tmp
, op1_hi_lo
[i
], GEN_INT (hi_16bits
),
26805 code
, SImode
, false, false, false);
26807 rs6000_split_logical_inner (op0_hi_lo
[i
], tmp
, GEN_INT (lo_16bits
),
26808 code
, SImode
, false, false, false);
26811 rs6000_split_logical_inner (op0_hi_lo
[i
], op1_hi_lo
[i
], op2_hi_lo
[i
],
26812 code
, SImode
, complement_final_p
,
26813 complement_op1_p
, complement_op2_p
);
26819 /* Split the insns that make up boolean operations operating on multiple GPR
26820 registers. The boolean MD patterns ensure that the inputs either are
26821 exactly the same as the output registers, or there is no overlap.
26823 OPERANDS is an array containing the destination and two input operands.
26824 CODE is the base operation (AND, IOR, XOR, NOT).
26825 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26826 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26827 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26830 rs6000_split_logical (rtx operands
[3],
26831 enum rtx_code code
,
26832 bool complement_final_p
,
26833 bool complement_op1_p
,
26834 bool complement_op2_p
)
26836 machine_mode mode
= GET_MODE (operands
[0]);
26837 machine_mode sub_mode
;
26839 int sub_size
, regno0
, regno1
, nregs
, i
;
26841 /* If this is DImode, use the specialized version that can run before
26842 register allocation. */
26843 if (mode
== DImode
&& !TARGET_POWERPC64
)
26845 rs6000_split_logical_di (operands
, code
, complement_final_p
,
26846 complement_op1_p
, complement_op2_p
);
26852 op2
= (code
== NOT
) ? NULL_RTX
: operands
[2];
26853 sub_mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
26854 sub_size
= GET_MODE_SIZE (sub_mode
);
26855 regno0
= REGNO (op0
);
26856 regno1
= REGNO (op1
);
26858 gcc_assert (reload_completed
);
26859 gcc_assert (IN_RANGE (regno0
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
26860 gcc_assert (IN_RANGE (regno1
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
26862 nregs
= rs6000_hard_regno_nregs
[(int)mode
][regno0
];
26863 gcc_assert (nregs
> 1);
26865 if (op2
&& REG_P (op2
))
26866 gcc_assert (IN_RANGE (REGNO (op2
), FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
26868 for (i
= 0; i
< nregs
; i
++)
26870 int offset
= i
* sub_size
;
26871 rtx sub_op0
= simplify_subreg (sub_mode
, op0
, mode
, offset
);
26872 rtx sub_op1
= simplify_subreg (sub_mode
, op1
, mode
, offset
);
26873 rtx sub_op2
= ((code
== NOT
)
26875 : simplify_subreg (sub_mode
, op2
, mode
, offset
));
26877 rs6000_split_logical_inner (sub_op0
, sub_op1
, sub_op2
, code
, sub_mode
,
26878 complement_final_p
, complement_op1_p
,
26885 /* Emit instructions to move SRC to DST. Called by splitters for
26886 multi-register moves. It will emit at most one instruction for
26887 each register that is accessed; that is, it won't emit li/lis pairs
26888 (or equivalent for 64-bit code). One of SRC or DST must be a hard
26892 rs6000_split_multireg_move (rtx dst
, rtx src
)
26894 /* The register number of the first register being moved. */
26896 /* The mode that is to be moved. */
26898 /* The mode that the move is being done in, and its size. */
26899 machine_mode reg_mode
;
26901 /* The number of registers that will be moved. */
26904 reg
= REG_P (dst
) ? REGNO (dst
) : REGNO (src
);
26905 mode
= GET_MODE (dst
);
26906 nregs
= hard_regno_nregs (reg
, mode
);
26908 /* If we have a vector quad register for MMA, and this is a load or store,
26909 see if we can use vector paired load/stores. */
26910 if (mode
== XOmode
&& TARGET_MMA
26911 && (MEM_P (dst
) || MEM_P (src
)))
26916 /* If we have a vector pair/quad mode, split it into two/four separate
26918 else if (mode
== OOmode
|| mode
== XOmode
)
26919 reg_mode
= V1TImode
;
26920 else if (FP_REGNO_P (reg
))
26921 reg_mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
:
26922 (TARGET_HARD_FLOAT
? DFmode
: SFmode
);
26923 else if (ALTIVEC_REGNO_P (reg
))
26924 reg_mode
= V16QImode
;
26926 reg_mode
= word_mode
;
26927 reg_mode_size
= GET_MODE_SIZE (reg_mode
);
26929 gcc_assert (reg_mode_size
* nregs
== GET_MODE_SIZE (mode
));
26931 /* TDmode residing in FP registers is special, since the ISA requires that
26932 the lower-numbered word of a register pair is always the most significant
26933 word, even in little-endian mode. This does not match the usual subreg
26934 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
26935 the appropriate constituent registers "by hand" in little-endian mode.
26937 Note we do not need to check for destructive overlap here since TDmode
26938 can only reside in even/odd register pairs. */
26939 if (FP_REGNO_P (reg
) && DECIMAL_FLOAT_MODE_P (mode
) && !BYTES_BIG_ENDIAN
)
26944 for (i
= 0; i
< nregs
; i
++)
26946 if (REG_P (src
) && FP_REGNO_P (REGNO (src
)))
26947 p_src
= gen_rtx_REG (reg_mode
, REGNO (src
) + nregs
- 1 - i
);
26949 p_src
= simplify_gen_subreg (reg_mode
, src
, mode
,
26950 i
* reg_mode_size
);
26952 if (REG_P (dst
) && FP_REGNO_P (REGNO (dst
)))
26953 p_dst
= gen_rtx_REG (reg_mode
, REGNO (dst
) + nregs
- 1 - i
);
26955 p_dst
= simplify_gen_subreg (reg_mode
, dst
, mode
,
26956 i
* reg_mode_size
);
26958 emit_insn (gen_rtx_SET (p_dst
, p_src
));
26964 /* The __vector_pair and __vector_quad modes are multi-register
26965 modes, so if we have to load or store the registers, we have to be
26966 careful to properly swap them if we're in little endian mode
26967 below. This means the last register gets the first memory
26968 location. We also need to be careful of using the right register
26969 numbers if we are splitting XO to OO. */
26970 if (mode
== OOmode
|| mode
== XOmode
)
26972 nregs
= hard_regno_nregs (reg
, mode
);
26973 int reg_mode_nregs
= hard_regno_nregs (reg
, reg_mode
);
26976 unsigned offset
= 0;
26977 unsigned size
= GET_MODE_SIZE (reg_mode
);
26979 /* If we are reading an accumulator register, we have to
26980 deprime it before we can access it. */
26982 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
26983 emit_insn (gen_mma_xxmfacc (src
, src
));
26985 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
26988 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
26989 rtx dst2
= adjust_address (dst
, reg_mode
, offset
);
26990 rtx src2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
26992 emit_insn (gen_rtx_SET (dst2
, src2
));
27000 unsigned offset
= 0;
27001 unsigned size
= GET_MODE_SIZE (reg_mode
);
27003 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
27006 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
27007 rtx dst2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
27008 rtx src2
= adjust_address (src
, reg_mode
, offset
);
27010 emit_insn (gen_rtx_SET (dst2
, src2
));
27013 /* If we are writing an accumulator register, we have to
27014 prime it after we've written it. */
27016 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27017 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27022 if (GET_CODE (src
) == UNSPEC
27023 || GET_CODE (src
) == UNSPEC_VOLATILE
)
27025 gcc_assert (XINT (src
, 1) == UNSPEC_VSX_ASSEMBLE
27026 || XINT (src
, 1) == UNSPECV_MMA_ASSEMBLE
);
27027 gcc_assert (REG_P (dst
));
27028 if (GET_MODE (src
) == XOmode
)
27029 gcc_assert (FP_REGNO_P (REGNO (dst
)));
27030 if (GET_MODE (src
) == OOmode
)
27031 gcc_assert (VSX_REGNO_P (REGNO (dst
)));
27033 int nvecs
= XVECLEN (src
, 0);
27034 for (int i
= 0; i
< nvecs
; i
++)
27037 int regno
= reg
+ i
;
27039 if (WORDS_BIG_ENDIAN
)
27041 op
= XVECEXP (src
, 0, i
);
27043 /* If we are loading an even VSX register and the memory location
27044 is adjacent to the next register's memory location (if any),
27045 then we can load them both with one LXVP instruction. */
27046 if ((regno
& 1) == 0)
27048 rtx op2
= XVECEXP (src
, 0, i
+ 1);
27049 if (adjacent_mem_locations (op
, op2
) == op
)
27051 op
= adjust_address (op
, OOmode
, 0);
27052 /* Skip the next register, since we're going to
27053 load it together with this register. */
27060 op
= XVECEXP (src
, 0, nvecs
- i
- 1);
27062 /* If we are loading an even VSX register and the memory location
27063 is adjacent to the next register's memory location (if any),
27064 then we can load them both with one LXVP instruction. */
27065 if ((regno
& 1) == 0)
27067 rtx op2
= XVECEXP (src
, 0, nvecs
- i
- 2);
27068 if (adjacent_mem_locations (op2
, op
) == op2
)
27070 op
= adjust_address (op2
, OOmode
, 0);
27071 /* Skip the next register, since we're going to
27072 load it together with this register. */
27078 rtx dst_i
= gen_rtx_REG (GET_MODE (op
), regno
);
27079 emit_insn (gen_rtx_SET (dst_i
, op
));
27082 /* We are writing an accumulator register, so we have to
27083 prime it after we've written it. */
27084 if (GET_MODE (src
) == XOmode
)
27085 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27090 /* Register -> register moves can use common code. */
27093 if (REG_P (src
) && REG_P (dst
) && (REGNO (src
) < REGNO (dst
)))
27095 /* If we are reading an accumulator register, we have to
27096 deprime it before we can access it. */
27098 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27099 emit_insn (gen_mma_xxmfacc (src
, src
));
27101 /* Move register range backwards, if we might have destructive
27104 /* XO/OO are opaque so cannot use subregs. */
27105 if (mode
== OOmode
|| mode
== XOmode
)
27107 for (i
= nregs
- 1; i
>= 0; i
--)
27109 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + i
);
27110 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + i
);
27111 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27116 for (i
= nregs
- 1; i
>= 0; i
--)
27117 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27118 i
* reg_mode_size
),
27119 simplify_gen_subreg (reg_mode
, src
, mode
,
27120 i
* reg_mode_size
)));
27123 /* If we are writing an accumulator register, we have to
27124 prime it after we've written it. */
27126 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27127 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27133 bool used_update
= false;
27134 rtx restore_basereg
= NULL_RTX
;
27136 if (MEM_P (src
) && INT_REGNO_P (reg
))
27140 if (GET_CODE (XEXP (src
, 0)) == PRE_INC
27141 || GET_CODE (XEXP (src
, 0)) == PRE_DEC
)
27144 breg
= XEXP (XEXP (src
, 0), 0);
27145 delta_rtx
= (GET_CODE (XEXP (src
, 0)) == PRE_INC
27146 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src
)))
27147 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src
))));
27148 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27149 src
= replace_equiv_address (src
, breg
);
27151 else if (! rs6000_offsettable_memref_p (src
, reg_mode
, true))
27153 if (GET_CODE (XEXP (src
, 0)) == PRE_MODIFY
)
27155 rtx basereg
= XEXP (XEXP (src
, 0), 0);
27158 rtx ndst
= simplify_gen_subreg (reg_mode
, dst
, mode
, 0);
27159 emit_insn (gen_rtx_SET (ndst
,
27160 gen_rtx_MEM (reg_mode
,
27162 used_update
= true;
27165 emit_insn (gen_rtx_SET (basereg
,
27166 XEXP (XEXP (src
, 0), 1)));
27167 src
= replace_equiv_address (src
, basereg
);
27171 rtx basereg
= gen_rtx_REG (Pmode
, reg
);
27172 emit_insn (gen_rtx_SET (basereg
, XEXP (src
, 0)));
27173 src
= replace_equiv_address (src
, basereg
);
27177 breg
= XEXP (src
, 0);
27178 if (GET_CODE (breg
) == PLUS
|| GET_CODE (breg
) == LO_SUM
)
27179 breg
= XEXP (breg
, 0);
27181 /* If the base register we are using to address memory is
27182 also a destination reg, then change that register last. */
27184 && REGNO (breg
) >= REGNO (dst
)
27185 && REGNO (breg
) < REGNO (dst
) + nregs
)
27186 j
= REGNO (breg
) - REGNO (dst
);
27188 else if (MEM_P (dst
) && INT_REGNO_P (reg
))
27192 if (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27193 || GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
27196 breg
= XEXP (XEXP (dst
, 0), 0);
27197 delta_rtx
= (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27198 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst
)))
27199 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst
))));
27201 /* We have to update the breg before doing the store.
27202 Use store with update, if available. */
27206 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27207 emit_insn (TARGET_32BIT
27208 ? (TARGET_POWERPC64
27209 ? gen_movdi_si_update (breg
, breg
, delta_rtx
, nsrc
)
27210 : gen_movsi_si_update (breg
, breg
, delta_rtx
, nsrc
))
27211 : gen_movdi_di_update (breg
, breg
, delta_rtx
, nsrc
));
27212 used_update
= true;
27215 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27216 dst
= replace_equiv_address (dst
, breg
);
27218 else if (!rs6000_offsettable_memref_p (dst
, reg_mode
, true)
27219 && GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27221 if (GET_CODE (XEXP (dst
, 0)) == PRE_MODIFY
)
27223 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27226 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27227 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode
,
27230 used_update
= true;
27233 emit_insn (gen_rtx_SET (basereg
,
27234 XEXP (XEXP (dst
, 0), 1)));
27235 dst
= replace_equiv_address (dst
, basereg
);
27239 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27240 rtx offsetreg
= XEXP (XEXP (dst
, 0), 1);
27241 gcc_assert (GET_CODE (XEXP (dst
, 0)) == PLUS
27243 && REG_P (offsetreg
)
27244 && REGNO (basereg
) != REGNO (offsetreg
));
27245 if (REGNO (basereg
) == 0)
27247 rtx tmp
= offsetreg
;
27248 offsetreg
= basereg
;
27251 emit_insn (gen_add3_insn (basereg
, basereg
, offsetreg
));
27252 restore_basereg
= gen_sub3_insn (basereg
, basereg
, offsetreg
);
27253 dst
= replace_equiv_address (dst
, basereg
);
27256 else if (GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27257 gcc_assert (rs6000_offsettable_memref_p (dst
, reg_mode
, true));
27260 /* If we are reading an accumulator register, we have to
27261 deprime it before we can access it. */
27262 if (TARGET_MMA
&& REG_P (src
)
27263 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27264 emit_insn (gen_mma_xxmfacc (src
, src
));
27266 for (i
= 0; i
< nregs
; i
++)
27268 /* Calculate index to next subword. */
27273 /* If compiler already emitted move of first word by
27274 store with update, no need to do anything. */
27275 if (j
== 0 && used_update
)
27278 /* XO/OO are opaque so cannot use subregs. */
27279 if (mode
== OOmode
|| mode
== XOmode
)
27281 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + j
);
27282 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + j
);
27283 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27286 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27287 j
* reg_mode_size
),
27288 simplify_gen_subreg (reg_mode
, src
, mode
,
27289 j
* reg_mode_size
)));
27292 /* If we are writing an accumulator register, we have to
27293 prime it after we've written it. */
27294 if (TARGET_MMA
&& REG_P (dst
)
27295 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27296 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27298 if (restore_basereg
!= NULL_RTX
)
27299 emit_insn (restore_basereg
);
27303 /* Return true if the peephole2 can combine a load involving a combination of
27304 an addis instruction and a load with an offset that can be fused together on
27308 fusion_gpr_load_p (rtx addis_reg
, /* register set via addis. */
27309 rtx addis_value
, /* addis value. */
27310 rtx target
, /* target register that is loaded. */
27311 rtx mem
) /* bottom part of the memory addr. */
27316 /* Validate arguments. */
27317 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
27320 if (!base_reg_operand (target
, GET_MODE (target
)))
27323 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
27326 /* Allow sign/zero extension. */
27327 if (GET_CODE (mem
) == ZERO_EXTEND
27328 || (GET_CODE (mem
) == SIGN_EXTEND
&& TARGET_P8_FUSION_SIGN
))
27329 mem
= XEXP (mem
, 0);
27334 if (!fusion_gpr_mem_load (mem
, GET_MODE (mem
)))
27337 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
27338 if (GET_CODE (addr
) != PLUS
&& GET_CODE (addr
) != LO_SUM
)
27341 /* Validate that the register used to load the high value is either the
27342 register being loaded, or we can safely replace its use.
27344 This function is only called from the peephole2 pass and we assume that
27345 there are 2 instructions in the peephole (addis and load), so we want to
27346 check if the target register was not used in the memory address and the
27347 register to hold the addis result is dead after the peephole. */
27348 if (REGNO (addis_reg
) != REGNO (target
))
27350 if (reg_mentioned_p (target
, mem
))
27353 if (!peep2_reg_dead_p (2, addis_reg
))
27356 /* If the target register being loaded is the stack pointer, we must
27357 avoid loading any other value into it, even temporarily. */
27358 if (REG_P (target
) && REGNO (target
) == STACK_POINTER_REGNUM
)
27362 base_reg
= XEXP (addr
, 0);
27363 return REGNO (addis_reg
) == REGNO (base_reg
);
27366 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27367 sequence. We adjust the addis register to use the target register. If the
27368 load sign extends, we adjust the code to do the zero extending load, and an
27369 explicit sign extension later since the fusion only covers zero extending
27373 operands[0] register set with addis (to be replaced with target)
27374 operands[1] value set via addis
27375 operands[2] target register being loaded
27376 operands[3] D-form memory reference using operands[0]. */
27379 expand_fusion_gpr_load (rtx
*operands
)
27381 rtx addis_value
= operands
[1];
27382 rtx target
= operands
[2];
27383 rtx orig_mem
= operands
[3];
27384 rtx new_addr
, new_mem
, orig_addr
, offset
;
27385 enum rtx_code plus_or_lo_sum
;
27386 machine_mode target_mode
= GET_MODE (target
);
27387 machine_mode extend_mode
= target_mode
;
27388 machine_mode ptr_mode
= Pmode
;
27389 enum rtx_code extend
= UNKNOWN
;
27391 if (GET_CODE (orig_mem
) == ZERO_EXTEND
27392 || (TARGET_P8_FUSION_SIGN
&& GET_CODE (orig_mem
) == SIGN_EXTEND
))
27394 extend
= GET_CODE (orig_mem
);
27395 orig_mem
= XEXP (orig_mem
, 0);
27396 target_mode
= GET_MODE (orig_mem
);
27399 gcc_assert (MEM_P (orig_mem
));
27401 orig_addr
= XEXP (orig_mem
, 0);
27402 plus_or_lo_sum
= GET_CODE (orig_addr
);
27403 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
27405 offset
= XEXP (orig_addr
, 1);
27406 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
27407 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
27409 if (extend
!= UNKNOWN
)
27410 new_mem
= gen_rtx_fmt_e (ZERO_EXTEND
, extend_mode
, new_mem
);
27412 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
27413 UNSPEC_FUSION_GPR
);
27414 emit_insn (gen_rtx_SET (target
, new_mem
));
27416 if (extend
== SIGN_EXTEND
)
27418 int sub_off
= ((BYTES_BIG_ENDIAN
)
27419 ? GET_MODE_SIZE (extend_mode
) - GET_MODE_SIZE (target_mode
)
27422 = simplify_subreg (target_mode
, target
, extend_mode
, sub_off
);
27424 emit_insn (gen_rtx_SET (target
,
27425 gen_rtx_SIGN_EXTEND (extend_mode
, sign_reg
)));
27431 /* Emit the addis instruction that will be part of a fused instruction
27435 emit_fusion_addis (rtx target
, rtx addis_value
)
27438 const char *addis_str
= NULL
;
27440 /* Emit the addis instruction. */
27441 fuse_ops
[0] = target
;
27442 if (satisfies_constraint_L (addis_value
))
27444 fuse_ops
[1] = addis_value
;
27445 addis_str
= "lis %0,%v1";
27448 else if (GET_CODE (addis_value
) == PLUS
)
27450 rtx op0
= XEXP (addis_value
, 0);
27451 rtx op1
= XEXP (addis_value
, 1);
27453 if (REG_P (op0
) && CONST_INT_P (op1
)
27454 && satisfies_constraint_L (op1
))
27458 addis_str
= "addis %0,%1,%v2";
27462 else if (GET_CODE (addis_value
) == HIGH
)
27464 rtx value
= XEXP (addis_value
, 0);
27465 if (GET_CODE (value
) == UNSPEC
&& XINT (value
, 1) == UNSPEC_TOCREL
)
27467 fuse_ops
[1] = XVECEXP (value
, 0, 0); /* symbol ref. */
27468 fuse_ops
[2] = XVECEXP (value
, 0, 1); /* TOC register. */
27470 addis_str
= "addis %0,%2,%1@toc@ha";
27472 else if (TARGET_XCOFF
)
27473 addis_str
= "addis %0,%1@u(%2)";
27476 gcc_unreachable ();
27479 else if (GET_CODE (value
) == PLUS
)
27481 rtx op0
= XEXP (value
, 0);
27482 rtx op1
= XEXP (value
, 1);
27484 if (GET_CODE (op0
) == UNSPEC
27485 && XINT (op0
, 1) == UNSPEC_TOCREL
27486 && CONST_INT_P (op1
))
27488 fuse_ops
[1] = XVECEXP (op0
, 0, 0); /* symbol ref. */
27489 fuse_ops
[2] = XVECEXP (op0
, 0, 1); /* TOC register. */
27492 addis_str
= "addis %0,%2,%1+%3@toc@ha";
27494 else if (TARGET_XCOFF
)
27495 addis_str
= "addis %0,%1+%3@u(%2)";
27498 gcc_unreachable ();
27502 else if (satisfies_constraint_L (value
))
27504 fuse_ops
[1] = value
;
27505 addis_str
= "lis %0,%v1";
27508 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (value
))
27510 fuse_ops
[1] = value
;
27511 addis_str
= "lis %0,%1@ha";
27516 fatal_insn ("Could not generate addis value for fusion", addis_value
);
27518 output_asm_insn (addis_str
, fuse_ops
);
27521 /* Emit a D-form load or store instruction that is the second instruction
27522 of a fusion sequence. */
27525 emit_fusion_load (rtx load_reg
, rtx addis_reg
, rtx offset
, const char *insn_str
)
27528 char insn_template
[80];
27530 fuse_ops
[0] = load_reg
;
27531 fuse_ops
[1] = addis_reg
;
27533 if (CONST_INT_P (offset
) && satisfies_constraint_I (offset
))
27535 sprintf (insn_template
, "%s %%0,%%2(%%1)", insn_str
);
27536 fuse_ops
[2] = offset
;
27537 output_asm_insn (insn_template
, fuse_ops
);
27540 else if (GET_CODE (offset
) == UNSPEC
27541 && XINT (offset
, 1) == UNSPEC_TOCREL
)
27544 sprintf (insn_template
, "%s %%0,%%2@toc@l(%%1)", insn_str
);
27546 else if (TARGET_XCOFF
)
27547 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
27550 gcc_unreachable ();
27552 fuse_ops
[2] = XVECEXP (offset
, 0, 0);
27553 output_asm_insn (insn_template
, fuse_ops
);
27556 else if (GET_CODE (offset
) == PLUS
27557 && GET_CODE (XEXP (offset
, 0)) == UNSPEC
27558 && XINT (XEXP (offset
, 0), 1) == UNSPEC_TOCREL
27559 && CONST_INT_P (XEXP (offset
, 1)))
27561 rtx tocrel_unspec
= XEXP (offset
, 0);
27563 sprintf (insn_template
, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str
);
27565 else if (TARGET_XCOFF
)
27566 sprintf (insn_template
, "%s %%0,%%2+%%3@l(%%1)", insn_str
);
27569 gcc_unreachable ();
27571 fuse_ops
[2] = XVECEXP (tocrel_unspec
, 0, 0);
27572 fuse_ops
[3] = XEXP (offset
, 1);
27573 output_asm_insn (insn_template
, fuse_ops
);
27576 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (offset
))
27578 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
27580 fuse_ops
[2] = offset
;
27581 output_asm_insn (insn_template
, fuse_ops
);
27585 fatal_insn ("Unable to generate load/store offset for fusion", offset
);
27590 /* Given an address, convert it into the addis and load offset parts. Addresses
27591 created during the peephole2 process look like:
27592 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
27593 (unspec [(...)] UNSPEC_TOCREL)) */
27596 fusion_split_address (rtx addr
, rtx
*p_hi
, rtx
*p_lo
)
27600 if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
27602 hi
= XEXP (addr
, 0);
27603 lo
= XEXP (addr
, 1);
27606 gcc_unreachable ();
27612 /* Return a string to fuse an addis instruction with a gpr load to the same
27613 register that we loaded up the addis instruction. The address that is used
27614 is the logical address that was formed during peephole2:
27615 (lo_sum (high) (low-part))
27617 The code is complicated, so we call output_asm_insn directly, and just
27621 emit_fusion_gpr_load (rtx target
, rtx mem
)
27626 const char *load_str
= NULL
;
27629 if (GET_CODE (mem
) == ZERO_EXTEND
)
27630 mem
= XEXP (mem
, 0);
27632 gcc_assert (REG_P (target
) && MEM_P (mem
));
27634 addr
= XEXP (mem
, 0);
27635 fusion_split_address (addr
, &addis_value
, &load_offset
);
27637 /* Now emit the load instruction to the same register. */
27638 mode
= GET_MODE (mem
);
27656 gcc_assert (TARGET_POWERPC64
);
27661 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target
, mem
));
27664 /* Emit the addis instruction. */
27665 emit_fusion_addis (target
, addis_value
);
27667 /* Emit the D-form load instruction. */
27668 emit_fusion_load (target
, target
, load_offset
, load_str
);
27674 #ifdef RS6000_GLIBC_ATOMIC_FENV
27675 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
27676 static tree atomic_hold_decl
, atomic_clear_decl
, atomic_update_decl
;
27679 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
27682 rs6000_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
27684 if (!TARGET_HARD_FLOAT
)
27686 #ifdef RS6000_GLIBC_ATOMIC_FENV
27687 if (atomic_hold_decl
== NULL_TREE
)
27690 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
27691 get_identifier ("__atomic_feholdexcept"),
27692 build_function_type_list (void_type_node
,
27693 double_ptr_type_node
,
27695 TREE_PUBLIC (atomic_hold_decl
) = 1;
27696 DECL_EXTERNAL (atomic_hold_decl
) = 1;
27699 if (atomic_clear_decl
== NULL_TREE
)
27702 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
27703 get_identifier ("__atomic_feclearexcept"),
27704 build_function_type_list (void_type_node
,
27706 TREE_PUBLIC (atomic_clear_decl
) = 1;
27707 DECL_EXTERNAL (atomic_clear_decl
) = 1;
27710 tree const_double
= build_qualified_type (double_type_node
,
27712 tree const_double_ptr
= build_pointer_type (const_double
);
27713 if (atomic_update_decl
== NULL_TREE
)
27716 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
27717 get_identifier ("__atomic_feupdateenv"),
27718 build_function_type_list (void_type_node
,
27721 TREE_PUBLIC (atomic_update_decl
) = 1;
27722 DECL_EXTERNAL (atomic_update_decl
) = 1;
27725 tree fenv_var
= create_tmp_var_raw (double_type_node
);
27726 TREE_ADDRESSABLE (fenv_var
) = 1;
27727 tree fenv_addr
= build1 (ADDR_EXPR
, double_ptr_type_node
,
27728 build4 (TARGET_EXPR
, double_type_node
, fenv_var
,
27729 void_node
, NULL_TREE
, NULL_TREE
));
27731 *hold
= build_call_expr (atomic_hold_decl
, 1, fenv_addr
);
27732 *clear
= build_call_expr (atomic_clear_decl
, 0);
27733 *update
= build_call_expr (atomic_update_decl
, 1,
27734 fold_convert (const_double_ptr
, fenv_addr
));
27739 tree mffs
= rs6000_builtin_decls
[RS6000_BIF_MFFS
];
27740 tree mtfsf
= rs6000_builtin_decls
[RS6000_BIF_MTFSF
];
27741 tree call_mffs
= build_call_expr (mffs
, 0);
27743 /* Generates the equivalent of feholdexcept (&fenv_var)
27745 *fenv_var = __builtin_mffs ();
27747 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
27748 __builtin_mtfsf (0xff, fenv_hold); */
27750 /* Mask to clear everything except for the rounding modes and non-IEEE
27751 arithmetic flag. */
27752 const unsigned HOST_WIDE_INT hold_exception_mask
27753 = HOST_WIDE_INT_C (0xffffffff00000007);
27755 tree fenv_var
= create_tmp_var_raw (double_type_node
);
27757 tree hold_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_var
, call_mffs
,
27758 NULL_TREE
, NULL_TREE
);
27760 tree fenv_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_var
);
27761 tree fenv_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
27762 build_int_cst (uint64_type_node
,
27763 hold_exception_mask
));
27765 tree fenv_hold_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
27768 tree hold_mtfsf
= build_call_expr (mtfsf
, 2,
27769 build_int_cst (unsigned_type_node
, 0xff),
27772 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_mffs
, hold_mtfsf
);
27774 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
27776 double fenv_clear = __builtin_mffs ();
27777 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
27778 __builtin_mtfsf (0xff, fenv_clear); */
27780 /* Mask to clear everything except for the rounding modes and non-IEEE
27781 arithmetic flag. */
27782 const unsigned HOST_WIDE_INT clear_exception_mask
27783 = HOST_WIDE_INT_C (0xffffffff00000000);
27785 tree fenv_clear
= create_tmp_var_raw (double_type_node
);
27787 tree clear_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_clear
,
27788 call_mffs
, NULL_TREE
, NULL_TREE
);
27790 tree fenv_clean_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_clear
);
27791 tree fenv_clear_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
,
27793 build_int_cst (uint64_type_node
,
27794 clear_exception_mask
));
27796 tree fenv_clear_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
27797 fenv_clear_llu_and
);
27799 tree clear_mtfsf
= build_call_expr (mtfsf
, 2,
27800 build_int_cst (unsigned_type_node
, 0xff),
27803 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, clear_mffs
, clear_mtfsf
);
27805 /* Generates the equivalent of feupdateenv (&fenv_var)
27807 double old_fenv = __builtin_mffs ();
27808 double fenv_update;
27809 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
27810 (*(uint64_t*)fenv_var 0x1ff80fff);
27811 __builtin_mtfsf (0xff, fenv_update); */
27813 const unsigned HOST_WIDE_INT update_exception_mask
27814 = HOST_WIDE_INT_C (0xffffffff1fffff00);
27815 const unsigned HOST_WIDE_INT new_exception_mask
27816 = HOST_WIDE_INT_C (0x1ff80fff);
27818 tree old_fenv
= create_tmp_var_raw (double_type_node
);
27819 tree update_mffs
= build4 (TARGET_EXPR
, double_type_node
, old_fenv
,
27820 call_mffs
, NULL_TREE
, NULL_TREE
);
27822 tree old_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, old_fenv
);
27823 tree old_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, old_llu
,
27824 build_int_cst (uint64_type_node
,
27825 update_exception_mask
));
27827 tree new_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
27828 build_int_cst (uint64_type_node
,
27829 new_exception_mask
));
27831 tree new_llu_mask
= build2 (BIT_IOR_EXPR
, uint64_type_node
,
27832 old_llu_and
, new_llu_and
);
27834 tree fenv_update_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
27837 tree update_mtfsf
= build_call_expr (mtfsf
, 2,
27838 build_int_cst (unsigned_type_node
, 0xff),
27839 fenv_update_mtfsf
);
27841 *update
= build2 (COMPOUND_EXPR
, void_type_node
, update_mffs
, update_mtfsf
);
27845 rs6000_generate_float2_double_code (rtx dst
, rtx src1
, rtx src2
)
27847 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
27849 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
27850 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
27852 /* The destination of the vmrgew instruction layout is:
27853 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27854 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27855 vmrgew instruction will be correct. */
27856 if (BYTES_BIG_ENDIAN
)
27858 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0
, src1
, src2
,
27860 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1
, src1
, src2
,
27865 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
27866 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
27869 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
27870 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
27872 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2
, rtx_tmp0
));
27873 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3
, rtx_tmp1
));
27875 if (BYTES_BIG_ENDIAN
)
27876 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
27878 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
27882 rs6000_generate_float2_code (bool signed_convert
, rtx dst
, rtx src1
, rtx src2
)
27884 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
27886 rtx_tmp0
= gen_reg_rtx (V2DImode
);
27887 rtx_tmp1
= gen_reg_rtx (V2DImode
);
27889 /* The destination of the vmrgew instruction layout is:
27890 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27891 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27892 vmrgew instruction will be correct. */
27893 if (BYTES_BIG_ENDIAN
)
27895 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
27896 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
27900 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
27901 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
27904 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
27905 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
27907 if (signed_convert
)
27909 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2
, rtx_tmp0
));
27910 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3
, rtx_tmp1
));
27914 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2
, rtx_tmp0
));
27915 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3
, rtx_tmp1
));
27918 if (BYTES_BIG_ENDIAN
)
27919 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
27921 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
27925 rs6000_generate_vsigned2_code (bool signed_convert
, rtx dst
, rtx src1
,
27928 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
27930 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
27931 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
27933 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
27934 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
27936 rtx_tmp2
= gen_reg_rtx (V4SImode
);
27937 rtx_tmp3
= gen_reg_rtx (V4SImode
);
27939 if (signed_convert
)
27941 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2
, rtx_tmp0
));
27942 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3
, rtx_tmp1
));
27946 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2
, rtx_tmp0
));
27947 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3
, rtx_tmp1
));
27950 emit_insn (gen_p8_vmrgew_v4si (dst
, rtx_tmp2
, rtx_tmp3
));
27953 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
27956 rs6000_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
27957 optimization_type opt_type
)
27962 return (opt_type
== OPTIMIZE_FOR_SPEED
27963 && RS6000_RECIP_AUTO_RSQRTE_P (mode1
));
27970 /* Implement TARGET_CONSTANT_ALIGNMENT. */
27972 static HOST_WIDE_INT
27973 rs6000_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
27975 if (TREE_CODE (exp
) == STRING_CST
27976 && (STRICT_ALIGNMENT
|| !optimize_size
))
27977 return MAX (align
, BITS_PER_WORD
);
27981 /* Implement TARGET_STARTING_FRAME_OFFSET. */
27983 static HOST_WIDE_INT
27984 rs6000_starting_frame_offset (void)
27986 if (FRAME_GROWS_DOWNWARD
)
27988 return RS6000_STARTING_FRAME_OFFSET
;
27992 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
27993 function names from <foo>l to <foo>f128 if the default long double type is
27994 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
27995 include file switches the names on systems that support long double as IEEE
27996 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
27997 In the future, glibc will export names like __ieee128_sinf128 and we can
27998 switch to using those instead of using sinf128, which pollutes the user's
28001 This will switch the names for Fortran math functions as well (which doesn't
28002 use math.h). However, Fortran needs other changes to the compiler and
28003 library before you can switch the real*16 type at compile time.
28005 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28006 only do this transformation if the __float128 type is enabled. This
28007 prevents us from doing the transformation on older 32-bit ports that might
28008 have enabled using IEEE 128-bit floating point as the default long double
28012 rs6000_mangle_decl_assembler_name (tree decl
, tree id
)
28014 if (TARGET_FLOAT128_TYPE
&& TARGET_IEEEQUAD
&& TARGET_LONG_DOUBLE_128
28015 && TREE_CODE (decl
) == FUNCTION_DECL
28016 && DECL_IS_UNDECLARED_BUILTIN (decl
)
28017 && DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
28019 size_t len
= IDENTIFIER_LENGTH (id
);
28020 const char *name
= IDENTIFIER_POINTER (id
);
28021 char *newname
= NULL
;
28023 /* See if it is one of the built-in functions with an unusual name. */
28024 switch (DECL_FUNCTION_CODE (decl
))
28026 case BUILT_IN_DREML
:
28027 newname
= xstrdup ("__remainderieee128");
28030 case BUILT_IN_GAMMAL
:
28031 newname
= xstrdup ("__lgammaieee128");
28034 case BUILT_IN_GAMMAL_R
:
28035 case BUILT_IN_LGAMMAL_R
:
28036 newname
= xstrdup ("__lgammaieee128_r");
28039 case BUILT_IN_NEXTTOWARD
:
28040 newname
= xstrdup ("__nexttoward_to_ieee128");
28043 case BUILT_IN_NEXTTOWARDF
:
28044 newname
= xstrdup ("__nexttowardf_to_ieee128");
28047 case BUILT_IN_NEXTTOWARDL
:
28048 newname
= xstrdup ("__nexttowardieee128");
28051 case BUILT_IN_POW10L
:
28052 newname
= xstrdup ("__exp10ieee128");
28055 case BUILT_IN_SCALBL
:
28056 newname
= xstrdup ("__scalbieee128");
28059 case BUILT_IN_SIGNIFICANDL
:
28060 newname
= xstrdup ("__significandieee128");
28063 case BUILT_IN_SINCOSL
:
28064 newname
= xstrdup ("__sincosieee128");
28071 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28074 size_t printf_len
= strlen ("printf");
28075 size_t scanf_len
= strlen ("scanf");
28077 if (len
>= printf_len
28078 && strcmp (name
+ len
- printf_len
, "printf") == 0)
28079 newname
= xasprintf ("__%sieee128", name
);
28081 else if (len
>= scanf_len
28082 && strcmp (name
+ len
- scanf_len
, "scanf") == 0)
28083 newname
= xasprintf ("__isoc99_%sieee128", name
);
28085 else if (name
[len
- 1] == 'l')
28087 bool uses_ieee128_p
= false;
28088 tree type
= TREE_TYPE (decl
);
28089 machine_mode ret_mode
= TYPE_MODE (type
);
28091 /* See if the function returns a IEEE 128-bit floating point type or
28093 if (ret_mode
== TFmode
|| ret_mode
== TCmode
)
28094 uses_ieee128_p
= true;
28097 function_args_iterator args_iter
;
28100 /* See if the function passes a IEEE 128-bit floating point type
28101 or complex type. */
28102 FOREACH_FUNCTION_ARGS (type
, arg
, args_iter
)
28104 machine_mode arg_mode
= TYPE_MODE (arg
);
28105 if (arg_mode
== TFmode
|| arg_mode
== TCmode
)
28107 uses_ieee128_p
= true;
28113 /* If we passed or returned an IEEE 128-bit floating point type,
28114 change the name. Use __<name>ieee128, instead of <name>l. */
28115 if (uses_ieee128_p
)
28116 newname
= xasprintf ("__%.*sieee128", (int)(len
- 1), name
);
28122 if (TARGET_DEBUG_BUILTIN
)
28123 fprintf (stderr
, "Map %s => %s\n", name
, newname
);
28125 id
= get_identifier (newname
);
28133 /* Predict whether the given loop in gimple will be transformed in the RTL
28134 doloop_optimize pass. */
28137 rs6000_predict_doloop_p (struct loop
*loop
)
28141 /* On rs6000, targetm.can_use_doloop_p is actually
28142 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28143 if (loop
->inner
!= NULL
)
28145 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
28146 fprintf (dump_file
, "Predict doloop failure due to"
28147 " loop nesting.\n");
28154 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28156 static machine_mode
28157 rs6000_preferred_doloop_mode (machine_mode
)
28162 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28165 rs6000_cannot_substitute_mem_equiv_p (rtx mem
)
28167 gcc_assert (MEM_P (mem
));
28169 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28170 type addresses, so don't allow MEMs with those address types to be
28171 substituted as an equivalent expression. See PR93974 for details. */
28172 if (GET_CODE (XEXP (mem
, 0)) == AND
)
28178 /* Implement TARGET_INVALID_CONVERSION. */
28180 static const char *
28181 rs6000_invalid_conversion (const_tree fromtype
, const_tree totype
)
28183 /* Make sure we're working with the canonical types. */
28184 if (TYPE_CANONICAL (fromtype
) != NULL_TREE
)
28185 fromtype
= TYPE_CANONICAL (fromtype
);
28186 if (TYPE_CANONICAL (totype
) != NULL_TREE
)
28187 totype
= TYPE_CANONICAL (totype
);
28189 machine_mode frommode
= TYPE_MODE (fromtype
);
28190 machine_mode tomode
= TYPE_MODE (totype
);
28192 if (frommode
!= tomode
)
28194 /* Do not allow conversions to/from XOmode and OOmode types. */
28195 if (frommode
== XOmode
)
28196 return N_("invalid conversion from type %<__vector_quad%>");
28197 if (tomode
== XOmode
)
28198 return N_("invalid conversion to type %<__vector_quad%>");
28199 if (frommode
== OOmode
)
28200 return N_("invalid conversion from type %<__vector_pair%>");
28201 if (tomode
== OOmode
)
28202 return N_("invalid conversion to type %<__vector_pair%>");
28204 else if (POINTER_TYPE_P (fromtype
) && POINTER_TYPE_P (totype
))
28206 /* We really care about the modes of the base types. */
28207 frommode
= TYPE_MODE (TREE_TYPE (fromtype
));
28208 tomode
= TYPE_MODE (TREE_TYPE (totype
));
28210 /* Do not allow conversions to/from XOmode and OOmode pointer
28211 types, except to/from void pointers. */
28212 if (frommode
!= tomode
28213 && frommode
!= VOIDmode
28214 && tomode
!= VOIDmode
)
28216 if (frommode
== XOmode
)
28217 return N_("invalid conversion from type %<* __vector_quad%>");
28218 if (tomode
== XOmode
)
28219 return N_("invalid conversion to type %<* __vector_quad%>");
28220 if (frommode
== OOmode
)
28221 return N_("invalid conversion from type %<* __vector_pair%>");
28222 if (tomode
== OOmode
)
28223 return N_("invalid conversion to type %<* __vector_pair%>");
28227 /* Conversion allowed. */
28231 /* Convert a SFmode constant to the integer bit pattern. */
28234 rs6000_const_f32_to_i32 (rtx operand
)
28237 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (operand
);
28239 gcc_assert (GET_MODE (operand
) == SFmode
);
28240 REAL_VALUE_TO_TARGET_SINGLE (*rv
, value
);
28245 rs6000_emit_xxspltidp_v2df (rtx dst
, long value
)
28247 if (((value
& 0x7F800000) == 0) && ((value
& 0x7FFFFF) != 0))
28248 inform (input_location
,
28249 "the result for the xxspltidp instruction "
28250 "is undefined for subnormal input values");
28251 emit_insn( gen_xxspltidp_v2df_inst (dst
, GEN_INT (value
)));
28254 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28257 rs6000_gen_pic_addr_diff_vec (void)
28259 return rs6000_relative_jumptables
;
28263 rs6000_output_addr_vec_elt (FILE *file
, int value
)
28265 const char *directive
= TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t";
28268 fprintf (file
, "%s", directive
);
28269 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", value
);
28270 assemble_name (file
, buf
);
28271 fprintf (file
, "\n");
28275 /* Copy an integer constant to the vector constant structure. */
28278 constant_int_to_128bit_vector (rtx op
,
28281 vec_const_128bit_type
*info
)
28283 unsigned HOST_WIDE_INT uvalue
= UINTVAL (op
);
28284 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28286 for (int shift
= bitsize
- 8; shift
>= 0; shift
-= 8)
28287 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28290 /* Copy a floating point constant to the vector constant structure. */
28293 constant_fp_to_128bit_vector (rtx op
,
28296 vec_const_128bit_type
*info
)
28298 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28299 unsigned num_words
= bitsize
/ 32;
28300 const REAL_VALUE_TYPE
*rtype
= CONST_DOUBLE_REAL_VALUE (op
);
28301 long real_words
[VECTOR_128BIT_WORDS
];
28303 /* Make sure we don't overflow the real_words array and that it is
28304 filled completely. */
28305 gcc_assert (num_words
<= VECTOR_128BIT_WORDS
&& (bitsize
% 32) == 0);
28307 real_to_target (real_words
, rtype
, mode
);
28309 /* Iterate over each 32-bit word in the floating point constant. The
28310 real_to_target function puts out words in target endian fashion. We need
28311 to arrange the order so that the bytes are written in big endian order. */
28312 for (unsigned num
= 0; num
< num_words
; num
++)
28314 unsigned endian_num
= (BYTES_BIG_ENDIAN
28316 : num_words
- 1 - num
);
28318 unsigned uvalue
= real_words
[endian_num
];
28319 for (int shift
= 32 - 8; shift
>= 0; shift
-= 8)
28320 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28323 /* Mark that this constant involves floating point. */
28324 info
->fp_constant_p
= true;
28327 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28330 Break out the constant out to bytes, half words, words, and double words.
28331 Return true if we have successfully converted the constant.
28333 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28334 constants. Integer and floating point scalar constants are splatted to fill
28338 vec_const_128bit_to_bytes (rtx op
,
28340 vec_const_128bit_type
*info
)
28342 /* Initialize the constant structure. */
28343 memset ((void *)info
, 0, sizeof (vec_const_128bit_type
));
28345 /* Assume CONST_INTs are DImode. */
28346 if (mode
== VOIDmode
)
28347 mode
= CONST_INT_P (op
) ? DImode
: GET_MODE (op
);
28349 if (mode
== VOIDmode
)
28352 unsigned size
= GET_MODE_SIZE (mode
);
28353 bool splat_p
= false;
28355 if (size
> VECTOR_128BIT_BYTES
)
28358 /* Set up the bits. */
28359 switch (GET_CODE (op
))
28361 /* Integer constants, default to double word. */
28364 constant_int_to_128bit_vector (op
, mode
, 0, info
);
28369 /* Floating point constants. */
28372 /* Fail if the floating point constant is the wrong mode. */
28373 if (GET_MODE (op
) != mode
)
28376 /* SFmode stored as scalars are stored in DFmode format. */
28377 if (mode
== SFmode
)
28380 size
= GET_MODE_SIZE (DFmode
);
28383 constant_fp_to_128bit_vector (op
, mode
, 0, info
);
28388 /* Vector constants, iterate over each element. On little endian
28389 systems, we have to reverse the element numbers. */
28392 /* Fail if the vector constant is the wrong mode or size. */
28393 if (GET_MODE (op
) != mode
28394 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
28397 machine_mode ele_mode
= GET_MODE_INNER (mode
);
28398 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
28399 size_t nunits
= GET_MODE_NUNITS (mode
);
28401 for (size_t num
= 0; num
< nunits
; num
++)
28403 rtx ele
= CONST_VECTOR_ELT (op
, num
);
28404 size_t byte_num
= (BYTES_BIG_ENDIAN
28406 : nunits
- 1 - num
) * ele_size
;
28408 if (CONST_INT_P (ele
))
28409 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28410 else if (CONST_DOUBLE_P (ele
))
28411 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28419 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28420 Since we are duplicating the element, we don't have to worry about
28422 case VEC_DUPLICATE
:
28424 /* Fail if the vector duplicate is the wrong mode or size. */
28425 if (GET_MODE (op
) != mode
28426 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
28429 machine_mode ele_mode
= GET_MODE_INNER (mode
);
28430 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
28431 rtx ele
= XEXP (op
, 0);
28432 size_t nunits
= GET_MODE_NUNITS (mode
);
28434 if (!CONST_INT_P (ele
) && !CONST_DOUBLE_P (ele
))
28437 for (size_t num
= 0; num
< nunits
; num
++)
28439 size_t byte_num
= num
* ele_size
;
28441 if (CONST_INT_P (ele
))
28442 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28444 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28450 /* Any thing else, just return failure. */
28455 /* Splat the constant to fill 128 bits if desired. */
28456 if (splat_p
&& size
< VECTOR_128BIT_BYTES
)
28458 if ((VECTOR_128BIT_BYTES
% size
) != 0)
28461 for (size_t offset
= size
;
28462 offset
< VECTOR_128BIT_BYTES
;
28464 memcpy ((void *) &info
->bytes
[offset
],
28465 (void *) &info
->bytes
[0],
28469 /* Remember original size. */
28470 info
->original_size
= size
;
28472 /* Determine if the bytes are all the same. */
28473 unsigned char first_byte
= info
->bytes
[0];
28474 info
->all_bytes_same
= true;
28475 for (size_t i
= 1; i
< VECTOR_128BIT_BYTES
; i
++)
28476 if (first_byte
!= info
->bytes
[i
])
28478 info
->all_bytes_same
= false;
28482 /* Pack half words together & determine if all of the half words are the
28484 for (size_t i
= 0; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
28485 info
->half_words
[i
] = ((info
->bytes
[i
* 2] << 8)
28486 | info
->bytes
[(i
* 2) + 1]);
28488 unsigned short first_hword
= info
->half_words
[0];
28489 info
->all_half_words_same
= true;
28490 for (size_t i
= 1; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
28491 if (first_hword
!= info
->half_words
[i
])
28493 info
->all_half_words_same
= false;
28497 /* Pack words together & determine if all of the words are the same. */
28498 for (size_t i
= 0; i
< VECTOR_128BIT_WORDS
; i
++)
28499 info
->words
[i
] = ((info
->bytes
[i
* 4] << 24)
28500 | (info
->bytes
[(i
* 4) + 1] << 16)
28501 | (info
->bytes
[(i
* 4) + 2] << 8)
28502 | info
->bytes
[(i
* 4) + 3]);
28504 info
->all_words_same
28505 = (info
->words
[0] == info
->words
[1]
28506 && info
->words
[0] == info
->words
[1]
28507 && info
->words
[0] == info
->words
[2]
28508 && info
->words
[0] == info
->words
[3]);
28510 /* Pack double words together & determine if all of the double words are the
28512 for (size_t i
= 0; i
< VECTOR_128BIT_DOUBLE_WORDS
; i
++)
28514 unsigned HOST_WIDE_INT d_word
= 0;
28515 for (size_t j
= 0; j
< 8; j
++)
28516 d_word
= (d_word
<< 8) | info
->bytes
[(i
* 8) + j
];
28518 info
->double_words
[i
] = d_word
;
28521 info
->all_double_words_same
28522 = (info
->double_words
[0] == info
->double_words
[1]);
28527 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
28528 if the LXVKQ instruction cannot be used. Otherwise return the immediate
28529 value to be used with the LXVKQ instruction. */
28532 constant_generates_lxvkq (vec_const_128bit_type
*vsx_const
)
28534 /* Is the instruction supported with power10 code generation, IEEE 128-bit
28535 floating point hardware and VSX registers are available. */
28536 if (!TARGET_IEEE128_CONSTANT
|| !TARGET_FLOAT128_HW
|| !TARGET_POWER10
28540 /* All of the constants that are generated by LXVKQ have the bottom 3 words
28542 if (vsx_const
->words
[1] != 0
28543 || vsx_const
->words
[2] != 0
28544 || vsx_const
->words
[3] != 0)
28547 /* See if we have a match for the first word. */
28548 switch (vsx_const
->words
[0])
28550 case 0x3FFF0000U
: return 1; /* IEEE 128-bit +1.0. */
28551 case 0x40000000U
: return 2; /* IEEE 128-bit +2.0. */
28552 case 0x40008000U
: return 3; /* IEEE 128-bit +3.0. */
28553 case 0x40010000U
: return 4; /* IEEE 128-bit +4.0. */
28554 case 0x40014000U
: return 5; /* IEEE 128-bit +5.0. */
28555 case 0x40018000U
: return 6; /* IEEE 128-bit +6.0. */
28556 case 0x4001C000U
: return 7; /* IEEE 128-bit +7.0. */
28557 case 0x7FFF0000U
: return 8; /* IEEE 128-bit +Infinity. */
28558 case 0x7FFF8000U
: return 9; /* IEEE 128-bit quiet NaN. */
28559 case 0x80000000U
: return 16; /* IEEE 128-bit -0.0. */
28560 case 0xBFFF0000U
: return 17; /* IEEE 128-bit -1.0. */
28561 case 0xC0000000U
: return 18; /* IEEE 128-bit -2.0. */
28562 case 0xC0008000U
: return 19; /* IEEE 128-bit -3.0. */
28563 case 0xC0010000U
: return 20; /* IEEE 128-bit -4.0. */
28564 case 0xC0014000U
: return 21; /* IEEE 128-bit -5.0. */
28565 case 0xC0018000U
: return 22; /* IEEE 128-bit -6.0. */
28566 case 0xC001C000U
: return 23; /* IEEE 128-bit -7.0. */
28567 case 0xFFFF0000U
: return 24; /* IEEE 128-bit -Infinity. */
28569 /* anything else cannot be loaded. */
28577 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
28578 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
28579 value to be used with the XXSPLTIW instruction. */
28582 constant_generates_xxspltiw (vec_const_128bit_type
*vsx_const
)
28584 if (!TARGET_SPLAT_WORD_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
28587 if (!vsx_const
->all_words_same
)
28590 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
28591 if (vsx_const
->all_bytes_same
)
28594 /* See if we can use VSPLTISH or VSPLTISW. */
28595 if (vsx_const
->all_half_words_same
)
28597 unsigned short h_word
= vsx_const
->half_words
[0];
28598 short sign_h_word
= ((h_word
& 0xffff) ^ 0x8000) - 0x8000;
28599 if (EASY_VECTOR_15 (sign_h_word
))
28603 unsigned int word
= vsx_const
->words
[0];
28604 int sign_word
= ((word
& 0xffffffff) ^ 0x80000000) - 0x80000000;
28605 if (EASY_VECTOR_15 (sign_word
))
28608 return vsx_const
->words
[0];
28611 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
28612 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
28613 value to be used with the XXSPLTIDP instruction. */
28616 constant_generates_xxspltidp (vec_const_128bit_type
*vsx_const
)
28618 if (!TARGET_SPLAT_FLOAT_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
28621 /* Reject if the two 64-bit segments are not the same. */
28622 if (!vsx_const
->all_double_words_same
)
28625 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
28626 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
28627 if (vsx_const
->all_bytes_same
28628 || vsx_const
->all_half_words_same
28629 || vsx_const
->all_words_same
)
28632 unsigned HOST_WIDE_INT value
= vsx_const
->double_words
[0];
28634 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
28635 pattern and the signalling NaN bit pattern. Recognize infinity and
28636 negative infinity. */
28638 /* Bit representation of DFmode normal quiet NaN. */
28639 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
28641 /* Bit representation of DFmode normal signaling NaN. */
28642 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
28644 /* Bit representation of DFmode positive infinity. */
28645 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
28647 /* Bit representation of DFmode negative infinity. */
28648 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
28650 if (value
!= RS6000_CONST_DF_NAN
28651 && value
!= RS6000_CONST_DF_NANS
28652 && value
!= RS6000_CONST_DF_INF
28653 && value
!= RS6000_CONST_DF_NEG_INF
)
28655 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
28656 the exponent, and 52 bits for the mantissa (not counting the hidden
28657 bit used for normal numbers). NaN values have the exponent set to all
28658 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
28660 int df_exponent
= (value
>> 52) & 0x7ff;
28661 unsigned HOST_WIDE_INT
28662 df_mantissa
= value
& ((HOST_WIDE_INT_1U
<< 52) - HOST_WIDE_INT_1U
);
28664 if (df_exponent
== 0x7ff && df_mantissa
!= 0) /* other NaNs. */
28667 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
28668 the exponent all 0 bits, and the mantissa non-zero. If the value is
28669 subnormal, then the hidden bit in the mantissa is not set. */
28670 if (df_exponent
== 0 && df_mantissa
!= 0) /* subnormal. */
28674 /* Change the representation to DFmode constant. */
28675 long df_words
[2] = { vsx_const
->words
[0], vsx_const
->words
[1] };
28677 /* real_from_target takes the target words in target order. */
28678 if (!BYTES_BIG_ENDIAN
)
28679 std::swap (df_words
[0], df_words
[1]);
28681 REAL_VALUE_TYPE rv_type
;
28682 real_from_target (&rv_type
, df_words
, DFmode
);
28684 const REAL_VALUE_TYPE
*rv
= &rv_type
;
28686 /* Validate that the number can be stored as a SFmode value. */
28687 if (!exact_real_truncate (SFmode
, rv
))
28690 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
28691 mantissa field is non-zero) which is undefined for the XXSPLTIDP
28694 real_to_target (&sf_value
, rv
, SFmode
);
28696 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
28697 and 23 bits for the mantissa. Subnormal numbers have the exponent all
28698 0 bits, and the mantissa non-zero. */
28699 long sf_exponent
= (sf_value
>> 23) & 0xFF;
28700 long sf_mantissa
= sf_value
& 0x7FFFFF;
28702 if (sf_exponent
== 0 && sf_mantissa
!= 0)
28705 /* Return the immediate to be used. */
28710 struct gcc_target targetm
= TARGET_INITIALIZER
;
28712 #include "gt-rs6000.h"