]> gcc.gnu.org Git - gcc.git/blob - gcc/config/rs6000/rs6000.cc
Add -mcpu=power11 support.
[gcc.git] / gcc / config / rs6000 / rs6000.cc
1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2024 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #define IN_TARGET_CODE 1
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "ira.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "attribs.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "print-tree.h"
52 #include "varasm.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "output.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
64 #include "ssa.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
67 #include "intl.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
75 #include "sreal.h"
76 #include "ipa-cp.h"
77 #include "ipa-prop.h"
78 #include "ipa-fnsummary.h"
79 #include "except.h"
80 #include "case-cfn-macros.h"
81 #include "ppc-auxv.h"
82 #include "rs6000-internal.h"
83 #include "opts.h"
84
85 /* This file should be included last. */
86 #include "target-def.h"
87
88 extern tree rs6000_builtin_mask_for_load (void);
89 extern tree rs6000_builtin_md_vectorized_function (tree, tree, tree);
90 extern tree rs6000_builtin_reciprocal (tree);
91
92 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
93 systems will also set long double to be IEEE 128-bit. AIX and Darwin
94 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
95 those systems will not pick up this default. This needs to be after all
96 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
97 properly defined. */
98 #ifndef TARGET_IEEEQUAD_DEFAULT
99 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
100 #define TARGET_IEEEQUAD_DEFAULT 1
101 #else
102 #define TARGET_IEEEQUAD_DEFAULT 0
103 #endif
104 #endif
105
106 /* Don't enable PC-relative addressing if the target does not support it. */
107 #ifndef PCREL_SUPPORTED_BY_OS
108 #define PCREL_SUPPORTED_BY_OS 0
109 #endif
110
111 #ifdef USING_ELFOS_H
112 /* Counter for labels which are to be placed in .fixup. */
113 int fixuplabelno = 0;
114 #endif
115
116 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
117 int dot_symbols;
118
119 /* Specify the machine mode that pointers have. After generation of rtl, the
120 compiler makes no further distinction between pointers and any other objects
121 of this machine mode. */
122 scalar_int_mode rs6000_pmode;
123
124 /* Track use of r13 in 64bit AIX TLS. */
125 static bool xcoff_tls_exec_model_detected = false;
126
127 /* Width in bits of a pointer. */
128 unsigned rs6000_pointer_size;
129
130 #ifdef HAVE_AS_GNU_ATTRIBUTE
131 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
132 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
133 # endif
134 /* Flag whether floating point values have been passed/returned.
135 Note that this doesn't say whether fprs are used, since the
136 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
137 should be set for soft-float values passed in gprs and ieee128
138 values passed in vsx registers. */
139 bool rs6000_passes_float = false;
140 bool rs6000_passes_long_double = false;
141 /* Flag whether vector values have been passed/returned. */
142 bool rs6000_passes_vector = false;
143 /* Flag whether small (<= 8 byte) structures have been returned. */
144 bool rs6000_returns_struct = false;
145 #endif
146
147 /* Value is TRUE if register/mode pair is acceptable. */
148 static bool rs6000_hard_regno_mode_ok_p
149 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
150
151 /* Maximum number of registers needed for a given register class and mode. */
152 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
153
154 /* How many registers are needed for a given register and mode. */
155 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
156
157 /* Map register number to register class. */
158 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
159
160 static int dbg_cost_ctrl;
161
162 /* Flag to say the TOC is initialized */
163 int toc_initialized, need_toc_init;
164 char toc_label_name[10];
165
166 /* Cached value of rs6000_variable_issue. This is cached in
167 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
168 static short cached_can_issue_more;
169
170 static GTY(()) section *read_only_data_section;
171 static GTY(()) section *private_data_section;
172 static GTY(()) section *tls_data_section;
173 static GTY(()) section *tls_private_data_section;
174 static GTY(()) section *read_only_private_data_section;
175 static GTY(()) section *sdata2_section;
176
177 section *toc_section = 0;
178
179 /* Describe the vector unit used for modes. */
180 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
181 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
182
183 /* Register classes for various constraints that are based on the target
184 switches. */
185 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
186
187 /* Describe the alignment of a vector. */
188 int rs6000_vector_align[NUM_MACHINE_MODES];
189
190 /* What modes to automatically generate reciprocal divide estimate (fre) and
191 reciprocal sqrt (frsqrte) for. */
192 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
193
194 /* Masks to determine which reciprocal esitmate instructions to generate
195 automatically. */
196 enum rs6000_recip_mask {
197 RECIP_SF_DIV = 0x001, /* Use divide estimate */
198 RECIP_DF_DIV = 0x002,
199 RECIP_V4SF_DIV = 0x004,
200 RECIP_V2DF_DIV = 0x008,
201
202 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
203 RECIP_DF_RSQRT = 0x020,
204 RECIP_V4SF_RSQRT = 0x040,
205 RECIP_V2DF_RSQRT = 0x080,
206
207 /* Various combination of flags for -mrecip=xxx. */
208 RECIP_NONE = 0,
209 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
210 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
211 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
212
213 RECIP_HIGH_PRECISION = RECIP_ALL,
214
215 /* On low precision machines like the power5, don't enable double precision
216 reciprocal square root estimate, since it isn't accurate enough. */
217 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
218 };
219
220 /* -mrecip options. */
221 static struct
222 {
223 const char *string; /* option name */
224 unsigned int mask; /* mask bits to set */
225 } recip_options[] = {
226 { "all", RECIP_ALL },
227 { "none", RECIP_NONE },
228 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
229 | RECIP_V2DF_DIV) },
230 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
231 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
232 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
233 | RECIP_V2DF_RSQRT) },
234 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
235 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
236 };
237
238 /* On PowerPC, we have a limited number of target clones that we care about
239 which means we can use an array to hold the options, rather than having more
240 elaborate data structures to identify each possible variation. Order the
241 clones from the default to the highest ISA. */
242 enum {
243 CLONE_DEFAULT = 0, /* default clone. */
244 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
245 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
246 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
247 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
248 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
249 CLONE_MAX
250 };
251
252 /* Map compiler ISA bits into HWCAP names. */
253 struct clone_map {
254 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
255 const char *name; /* name to use in __builtin_cpu_supports. */
256 };
257
258 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
259 { 0, "" }, /* Default options. */
260 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
261 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
262 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
263 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
264 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
265 };
266
267
268 /* Newer LIBCs explicitly export this symbol to declare that they provide
269 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
270 reference to this symbol whenever we expand a CPU builtin, so that
271 we never link against an old LIBC. */
272 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
273
274 /* True if we have expanded a CPU builtin. */
275 bool cpu_builtin_p = false;
276
277 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
278 macros that have changed. Languages that don't support the preprocessor
279 don't link in rs6000-c.cc, so we can't call it directly. */
280 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT);
281
282 /* Simplfy register classes into simpler classifications. We assume
283 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
284 check for standard register classes (gpr/floating/altivec/vsx) and
285 floating/vector classes (float/altivec/vsx). */
286
287 enum rs6000_reg_type {
288 NO_REG_TYPE,
289 PSEUDO_REG_TYPE,
290 GPR_REG_TYPE,
291 VSX_REG_TYPE,
292 ALTIVEC_REG_TYPE,
293 FPR_REG_TYPE,
294 SPR_REG_TYPE,
295 CR_REG_TYPE
296 };
297
298 /* Map register class to register type. */
299 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
300
301 /* First/last register type for the 'normal' register types (i.e. general
302 purpose, floating point, altivec, and VSX registers). */
303 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
304
305 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
306
307
308 /* Register classes we care about in secondary reload or go if legitimate
309 address. We only need to worry about GPR, FPR, and Altivec registers here,
310 along an ANY field that is the OR of the 3 register classes. */
311
312 enum rs6000_reload_reg_type {
313 RELOAD_REG_GPR, /* General purpose registers. */
314 RELOAD_REG_FPR, /* Traditional floating point regs. */
315 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
316 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
317 N_RELOAD_REG
318 };
319
320 /* For setting up register classes, loop through the 3 register classes mapping
321 into real registers, and skip the ANY class, which is just an OR of the
322 bits. */
323 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
324 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
325
326 /* Map reload register type to a register in the register class. */
327 struct reload_reg_map_type {
328 const char *name; /* Register class name. */
329 int reg; /* Register in the register class. */
330 };
331
332 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
333 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
334 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
335 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
336 { "Any", -1 }, /* RELOAD_REG_ANY. */
337 };
338
339 /* Mask bits for each register class, indexed per mode. Historically the
340 compiler has been more restrictive which types can do PRE_MODIFY instead of
341 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
342 typedef unsigned char addr_mask_type;
343
344 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
345 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
346 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
347 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
348 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
349 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
350 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
351 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
352
353 /* Register type masks based on the type, of valid addressing modes. */
354 struct rs6000_reg_addr {
355 enum insn_code reload_load; /* INSN to reload for loading. */
356 enum insn_code reload_store; /* INSN to reload for storing. */
357 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
358 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
359 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
360 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
361 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
362 };
363
364 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
365
366 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
367 static inline bool
368 mode_supports_pre_incdec_p (machine_mode mode)
369 {
370 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
371 != 0);
372 }
373
374 /* Helper function to say whether a mode supports PRE_MODIFY. */
375 static inline bool
376 mode_supports_pre_modify_p (machine_mode mode)
377 {
378 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
379 != 0);
380 }
381
382 /* Return true if we have D-form addressing in altivec registers. */
383 static inline bool
384 mode_supports_vmx_dform (machine_mode mode)
385 {
386 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
387 }
388
389 /* Return true if we have D-form addressing in VSX registers. This addressing
390 is more limited than normal d-form addressing in that the offset must be
391 aligned on a 16-byte boundary. */
392 static inline bool
393 mode_supports_dq_form (machine_mode mode)
394 {
395 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
396 != 0);
397 }
398
399 /* Given that there exists at least one variable that is set (produced)
400 by OUT_INSN and read (consumed) by IN_INSN, return true iff
401 IN_INSN represents one or more memory store operations and none of
402 the variables set by OUT_INSN is used by IN_INSN as the address of a
403 store operation. If either IN_INSN or OUT_INSN does not represent
404 a "single" RTL SET expression (as loosely defined by the
405 implementation of the single_set function) or a PARALLEL with only
406 SETs, CLOBBERs, and USEs inside, this function returns false.
407
408 This rs6000-specific version of store_data_bypass_p checks for
409 certain conditions that result in assertion failures (and internal
410 compiler errors) in the generic store_data_bypass_p function and
411 returns false rather than calling store_data_bypass_p if one of the
412 problematic conditions is detected. */
413
414 int
415 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
416 {
417 rtx out_set, in_set;
418 rtx out_pat, in_pat;
419 rtx out_exp, in_exp;
420 int i, j;
421
422 in_set = single_set (in_insn);
423 if (in_set)
424 {
425 if (MEM_P (SET_DEST (in_set)))
426 {
427 out_set = single_set (out_insn);
428 if (!out_set)
429 {
430 out_pat = PATTERN (out_insn);
431 if (GET_CODE (out_pat) == PARALLEL)
432 {
433 for (i = 0; i < XVECLEN (out_pat, 0); i++)
434 {
435 out_exp = XVECEXP (out_pat, 0, i);
436 if ((GET_CODE (out_exp) == CLOBBER)
437 || (GET_CODE (out_exp) == USE))
438 continue;
439 else if (GET_CODE (out_exp) != SET)
440 return false;
441 }
442 }
443 }
444 }
445 }
446 else
447 {
448 in_pat = PATTERN (in_insn);
449 if (GET_CODE (in_pat) != PARALLEL)
450 return false;
451
452 for (i = 0; i < XVECLEN (in_pat, 0); i++)
453 {
454 in_exp = XVECEXP (in_pat, 0, i);
455 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
456 continue;
457 else if (GET_CODE (in_exp) != SET)
458 return false;
459
460 if (MEM_P (SET_DEST (in_exp)))
461 {
462 out_set = single_set (out_insn);
463 if (!out_set)
464 {
465 out_pat = PATTERN (out_insn);
466 if (GET_CODE (out_pat) != PARALLEL)
467 return false;
468 for (j = 0; j < XVECLEN (out_pat, 0); j++)
469 {
470 out_exp = XVECEXP (out_pat, 0, j);
471 if ((GET_CODE (out_exp) == CLOBBER)
472 || (GET_CODE (out_exp) == USE))
473 continue;
474 else if (GET_CODE (out_exp) != SET)
475 return false;
476 }
477 }
478 }
479 }
480 }
481 return store_data_bypass_p (out_insn, in_insn);
482 }
483
484 \f
485 /* Processor costs (relative to an add) */
486
487 const struct processor_costs *rs6000_cost;
488
489 /* Instruction size costs on 32bit processors. */
490 static const
491 struct processor_costs size32_cost = {
492 COSTS_N_INSNS (1), /* mulsi */
493 COSTS_N_INSNS (1), /* mulsi_const */
494 COSTS_N_INSNS (1), /* mulsi_const9 */
495 COSTS_N_INSNS (1), /* muldi */
496 COSTS_N_INSNS (1), /* divsi */
497 COSTS_N_INSNS (1), /* divdi */
498 COSTS_N_INSNS (1), /* fp */
499 COSTS_N_INSNS (1), /* dmul */
500 COSTS_N_INSNS (1), /* sdiv */
501 COSTS_N_INSNS (1), /* ddiv */
502 32, /* cache line size */
503 0, /* l1 cache */
504 0, /* l2 cache */
505 0, /* streams */
506 0, /* SF->DF convert */
507 };
508
509 /* Instruction size costs on 64bit processors. */
510 static const
511 struct processor_costs size64_cost = {
512 COSTS_N_INSNS (1), /* mulsi */
513 COSTS_N_INSNS (1), /* mulsi_const */
514 COSTS_N_INSNS (1), /* mulsi_const9 */
515 COSTS_N_INSNS (1), /* muldi */
516 COSTS_N_INSNS (1), /* divsi */
517 COSTS_N_INSNS (1), /* divdi */
518 COSTS_N_INSNS (1), /* fp */
519 COSTS_N_INSNS (1), /* dmul */
520 COSTS_N_INSNS (1), /* sdiv */
521 COSTS_N_INSNS (1), /* ddiv */
522 128, /* cache line size */
523 0, /* l1 cache */
524 0, /* l2 cache */
525 0, /* streams */
526 0, /* SF->DF convert */
527 };
528
529 /* Instruction costs on RS64A processors. */
530 static const
531 struct processor_costs rs64a_cost = {
532 COSTS_N_INSNS (20), /* mulsi */
533 COSTS_N_INSNS (12), /* mulsi_const */
534 COSTS_N_INSNS (8), /* mulsi_const9 */
535 COSTS_N_INSNS (34), /* muldi */
536 COSTS_N_INSNS (65), /* divsi */
537 COSTS_N_INSNS (67), /* divdi */
538 COSTS_N_INSNS (4), /* fp */
539 COSTS_N_INSNS (4), /* dmul */
540 COSTS_N_INSNS (31), /* sdiv */
541 COSTS_N_INSNS (31), /* ddiv */
542 128, /* cache line size */
543 128, /* l1 cache */
544 2048, /* l2 cache */
545 1, /* streams */
546 0, /* SF->DF convert */
547 };
548
549 /* Instruction costs on MPCCORE processors. */
550 static const
551 struct processor_costs mpccore_cost = {
552 COSTS_N_INSNS (2), /* mulsi */
553 COSTS_N_INSNS (2), /* mulsi_const */
554 COSTS_N_INSNS (2), /* mulsi_const9 */
555 COSTS_N_INSNS (2), /* muldi */
556 COSTS_N_INSNS (6), /* divsi */
557 COSTS_N_INSNS (6), /* divdi */
558 COSTS_N_INSNS (4), /* fp */
559 COSTS_N_INSNS (5), /* dmul */
560 COSTS_N_INSNS (10), /* sdiv */
561 COSTS_N_INSNS (17), /* ddiv */
562 32, /* cache line size */
563 4, /* l1 cache */
564 16, /* l2 cache */
565 1, /* streams */
566 0, /* SF->DF convert */
567 };
568
569 /* Instruction costs on PPC403 processors. */
570 static const
571 struct processor_costs ppc403_cost = {
572 COSTS_N_INSNS (4), /* mulsi */
573 COSTS_N_INSNS (4), /* mulsi_const */
574 COSTS_N_INSNS (4), /* mulsi_const9 */
575 COSTS_N_INSNS (4), /* muldi */
576 COSTS_N_INSNS (33), /* divsi */
577 COSTS_N_INSNS (33), /* divdi */
578 COSTS_N_INSNS (11), /* fp */
579 COSTS_N_INSNS (11), /* dmul */
580 COSTS_N_INSNS (11), /* sdiv */
581 COSTS_N_INSNS (11), /* ddiv */
582 32, /* cache line size */
583 4, /* l1 cache */
584 16, /* l2 cache */
585 1, /* streams */
586 0, /* SF->DF convert */
587 };
588
589 /* Instruction costs on PPC405 processors. */
590 static const
591 struct processor_costs ppc405_cost = {
592 COSTS_N_INSNS (5), /* mulsi */
593 COSTS_N_INSNS (4), /* mulsi_const */
594 COSTS_N_INSNS (3), /* mulsi_const9 */
595 COSTS_N_INSNS (5), /* muldi */
596 COSTS_N_INSNS (35), /* divsi */
597 COSTS_N_INSNS (35), /* divdi */
598 COSTS_N_INSNS (11), /* fp */
599 COSTS_N_INSNS (11), /* dmul */
600 COSTS_N_INSNS (11), /* sdiv */
601 COSTS_N_INSNS (11), /* ddiv */
602 32, /* cache line size */
603 16, /* l1 cache */
604 128, /* l2 cache */
605 1, /* streams */
606 0, /* SF->DF convert */
607 };
608
609 /* Instruction costs on PPC440 processors. */
610 static const
611 struct processor_costs ppc440_cost = {
612 COSTS_N_INSNS (3), /* mulsi */
613 COSTS_N_INSNS (2), /* mulsi_const */
614 COSTS_N_INSNS (2), /* mulsi_const9 */
615 COSTS_N_INSNS (3), /* muldi */
616 COSTS_N_INSNS (34), /* divsi */
617 COSTS_N_INSNS (34), /* divdi */
618 COSTS_N_INSNS (5), /* fp */
619 COSTS_N_INSNS (5), /* dmul */
620 COSTS_N_INSNS (19), /* sdiv */
621 COSTS_N_INSNS (33), /* ddiv */
622 32, /* cache line size */
623 32, /* l1 cache */
624 256, /* l2 cache */
625 1, /* streams */
626 0, /* SF->DF convert */
627 };
628
629 /* Instruction costs on PPC476 processors. */
630 static const
631 struct processor_costs ppc476_cost = {
632 COSTS_N_INSNS (4), /* mulsi */
633 COSTS_N_INSNS (4), /* mulsi_const */
634 COSTS_N_INSNS (4), /* mulsi_const9 */
635 COSTS_N_INSNS (4), /* muldi */
636 COSTS_N_INSNS (11), /* divsi */
637 COSTS_N_INSNS (11), /* divdi */
638 COSTS_N_INSNS (6), /* fp */
639 COSTS_N_INSNS (6), /* dmul */
640 COSTS_N_INSNS (19), /* sdiv */
641 COSTS_N_INSNS (33), /* ddiv */
642 32, /* l1 cache line size */
643 32, /* l1 cache */
644 512, /* l2 cache */
645 1, /* streams */
646 0, /* SF->DF convert */
647 };
648
649 /* Instruction costs on PPC601 processors. */
650 static const
651 struct processor_costs ppc601_cost = {
652 COSTS_N_INSNS (5), /* mulsi */
653 COSTS_N_INSNS (5), /* mulsi_const */
654 COSTS_N_INSNS (5), /* mulsi_const9 */
655 COSTS_N_INSNS (5), /* muldi */
656 COSTS_N_INSNS (36), /* divsi */
657 COSTS_N_INSNS (36), /* divdi */
658 COSTS_N_INSNS (4), /* fp */
659 COSTS_N_INSNS (5), /* dmul */
660 COSTS_N_INSNS (17), /* sdiv */
661 COSTS_N_INSNS (31), /* ddiv */
662 32, /* cache line size */
663 32, /* l1 cache */
664 256, /* l2 cache */
665 1, /* streams */
666 0, /* SF->DF convert */
667 };
668
669 /* Instruction costs on PPC603 processors. */
670 static const
671 struct processor_costs ppc603_cost = {
672 COSTS_N_INSNS (5), /* mulsi */
673 COSTS_N_INSNS (3), /* mulsi_const */
674 COSTS_N_INSNS (2), /* mulsi_const9 */
675 COSTS_N_INSNS (5), /* muldi */
676 COSTS_N_INSNS (37), /* divsi */
677 COSTS_N_INSNS (37), /* divdi */
678 COSTS_N_INSNS (3), /* fp */
679 COSTS_N_INSNS (4), /* dmul */
680 COSTS_N_INSNS (18), /* sdiv */
681 COSTS_N_INSNS (33), /* ddiv */
682 32, /* cache line size */
683 8, /* l1 cache */
684 64, /* l2 cache */
685 1, /* streams */
686 0, /* SF->DF convert */
687 };
688
689 /* Instruction costs on PPC604 processors. */
690 static const
691 struct processor_costs ppc604_cost = {
692 COSTS_N_INSNS (4), /* mulsi */
693 COSTS_N_INSNS (4), /* mulsi_const */
694 COSTS_N_INSNS (4), /* mulsi_const9 */
695 COSTS_N_INSNS (4), /* muldi */
696 COSTS_N_INSNS (20), /* divsi */
697 COSTS_N_INSNS (20), /* divdi */
698 COSTS_N_INSNS (3), /* fp */
699 COSTS_N_INSNS (3), /* dmul */
700 COSTS_N_INSNS (18), /* sdiv */
701 COSTS_N_INSNS (32), /* ddiv */
702 32, /* cache line size */
703 16, /* l1 cache */
704 512, /* l2 cache */
705 1, /* streams */
706 0, /* SF->DF convert */
707 };
708
709 /* Instruction costs on PPC604e processors. */
710 static const
711 struct processor_costs ppc604e_cost = {
712 COSTS_N_INSNS (2), /* mulsi */
713 COSTS_N_INSNS (2), /* mulsi_const */
714 COSTS_N_INSNS (2), /* mulsi_const9 */
715 COSTS_N_INSNS (2), /* muldi */
716 COSTS_N_INSNS (20), /* divsi */
717 COSTS_N_INSNS (20), /* divdi */
718 COSTS_N_INSNS (3), /* fp */
719 COSTS_N_INSNS (3), /* dmul */
720 COSTS_N_INSNS (18), /* sdiv */
721 COSTS_N_INSNS (32), /* ddiv */
722 32, /* cache line size */
723 32, /* l1 cache */
724 1024, /* l2 cache */
725 1, /* streams */
726 0, /* SF->DF convert */
727 };
728
729 /* Instruction costs on PPC620 processors. */
730 static const
731 struct processor_costs ppc620_cost = {
732 COSTS_N_INSNS (5), /* mulsi */
733 COSTS_N_INSNS (4), /* mulsi_const */
734 COSTS_N_INSNS (3), /* mulsi_const9 */
735 COSTS_N_INSNS (7), /* muldi */
736 COSTS_N_INSNS (21), /* divsi */
737 COSTS_N_INSNS (37), /* divdi */
738 COSTS_N_INSNS (3), /* fp */
739 COSTS_N_INSNS (3), /* dmul */
740 COSTS_N_INSNS (18), /* sdiv */
741 COSTS_N_INSNS (32), /* ddiv */
742 128, /* cache line size */
743 32, /* l1 cache */
744 1024, /* l2 cache */
745 1, /* streams */
746 0, /* SF->DF convert */
747 };
748
749 /* Instruction costs on PPC630 processors. */
750 static const
751 struct processor_costs ppc630_cost = {
752 COSTS_N_INSNS (5), /* mulsi */
753 COSTS_N_INSNS (4), /* mulsi_const */
754 COSTS_N_INSNS (3), /* mulsi_const9 */
755 COSTS_N_INSNS (7), /* muldi */
756 COSTS_N_INSNS (21), /* divsi */
757 COSTS_N_INSNS (37), /* divdi */
758 COSTS_N_INSNS (3), /* fp */
759 COSTS_N_INSNS (3), /* dmul */
760 COSTS_N_INSNS (17), /* sdiv */
761 COSTS_N_INSNS (21), /* ddiv */
762 128, /* cache line size */
763 64, /* l1 cache */
764 1024, /* l2 cache */
765 1, /* streams */
766 0, /* SF->DF convert */
767 };
768
769 /* Instruction costs on Cell processor. */
770 /* COSTS_N_INSNS (1) ~ one add. */
771 static const
772 struct processor_costs ppccell_cost = {
773 COSTS_N_INSNS (9/2)+2, /* mulsi */
774 COSTS_N_INSNS (6/2), /* mulsi_const */
775 COSTS_N_INSNS (6/2), /* mulsi_const9 */
776 COSTS_N_INSNS (15/2)+2, /* muldi */
777 COSTS_N_INSNS (38/2), /* divsi */
778 COSTS_N_INSNS (70/2), /* divdi */
779 COSTS_N_INSNS (10/2), /* fp */
780 COSTS_N_INSNS (10/2), /* dmul */
781 COSTS_N_INSNS (74/2), /* sdiv */
782 COSTS_N_INSNS (74/2), /* ddiv */
783 128, /* cache line size */
784 32, /* l1 cache */
785 512, /* l2 cache */
786 6, /* streams */
787 0, /* SF->DF convert */
788 };
789
790 /* Instruction costs on PPC750 and PPC7400 processors. */
791 static const
792 struct processor_costs ppc750_cost = {
793 COSTS_N_INSNS (5), /* mulsi */
794 COSTS_N_INSNS (3), /* mulsi_const */
795 COSTS_N_INSNS (2), /* mulsi_const9 */
796 COSTS_N_INSNS (5), /* muldi */
797 COSTS_N_INSNS (17), /* divsi */
798 COSTS_N_INSNS (17), /* divdi */
799 COSTS_N_INSNS (3), /* fp */
800 COSTS_N_INSNS (3), /* dmul */
801 COSTS_N_INSNS (17), /* sdiv */
802 COSTS_N_INSNS (31), /* ddiv */
803 32, /* cache line size */
804 32, /* l1 cache */
805 512, /* l2 cache */
806 1, /* streams */
807 0, /* SF->DF convert */
808 };
809
810 /* Instruction costs on PPC7450 processors. */
811 static const
812 struct processor_costs ppc7450_cost = {
813 COSTS_N_INSNS (4), /* mulsi */
814 COSTS_N_INSNS (3), /* mulsi_const */
815 COSTS_N_INSNS (3), /* mulsi_const9 */
816 COSTS_N_INSNS (4), /* muldi */
817 COSTS_N_INSNS (23), /* divsi */
818 COSTS_N_INSNS (23), /* divdi */
819 COSTS_N_INSNS (5), /* fp */
820 COSTS_N_INSNS (5), /* dmul */
821 COSTS_N_INSNS (21), /* sdiv */
822 COSTS_N_INSNS (35), /* ddiv */
823 32, /* cache line size */
824 32, /* l1 cache */
825 1024, /* l2 cache */
826 1, /* streams */
827 0, /* SF->DF convert */
828 };
829
830 /* Instruction costs on PPC8540 processors. */
831 static const
832 struct processor_costs ppc8540_cost = {
833 COSTS_N_INSNS (4), /* mulsi */
834 COSTS_N_INSNS (4), /* mulsi_const */
835 COSTS_N_INSNS (4), /* mulsi_const9 */
836 COSTS_N_INSNS (4), /* muldi */
837 COSTS_N_INSNS (19), /* divsi */
838 COSTS_N_INSNS (19), /* divdi */
839 COSTS_N_INSNS (4), /* fp */
840 COSTS_N_INSNS (4), /* dmul */
841 COSTS_N_INSNS (29), /* sdiv */
842 COSTS_N_INSNS (29), /* ddiv */
843 32, /* cache line size */
844 32, /* l1 cache */
845 256, /* l2 cache */
846 1, /* prefetch streams /*/
847 0, /* SF->DF convert */
848 };
849
850 /* Instruction costs on E300C2 and E300C3 cores. */
851 static const
852 struct processor_costs ppce300c2c3_cost = {
853 COSTS_N_INSNS (4), /* mulsi */
854 COSTS_N_INSNS (4), /* mulsi_const */
855 COSTS_N_INSNS (4), /* mulsi_const9 */
856 COSTS_N_INSNS (4), /* muldi */
857 COSTS_N_INSNS (19), /* divsi */
858 COSTS_N_INSNS (19), /* divdi */
859 COSTS_N_INSNS (3), /* fp */
860 COSTS_N_INSNS (4), /* dmul */
861 COSTS_N_INSNS (18), /* sdiv */
862 COSTS_N_INSNS (33), /* ddiv */
863 32,
864 16, /* l1 cache */
865 16, /* l2 cache */
866 1, /* prefetch streams /*/
867 0, /* SF->DF convert */
868 };
869
870 /* Instruction costs on PPCE500MC processors. */
871 static const
872 struct processor_costs ppce500mc_cost = {
873 COSTS_N_INSNS (4), /* mulsi */
874 COSTS_N_INSNS (4), /* mulsi_const */
875 COSTS_N_INSNS (4), /* mulsi_const9 */
876 COSTS_N_INSNS (4), /* muldi */
877 COSTS_N_INSNS (14), /* divsi */
878 COSTS_N_INSNS (14), /* divdi */
879 COSTS_N_INSNS (8), /* fp */
880 COSTS_N_INSNS (10), /* dmul */
881 COSTS_N_INSNS (36), /* sdiv */
882 COSTS_N_INSNS (66), /* ddiv */
883 64, /* cache line size */
884 32, /* l1 cache */
885 128, /* l2 cache */
886 1, /* prefetch streams /*/
887 0, /* SF->DF convert */
888 };
889
890 /* Instruction costs on PPCE500MC64 processors. */
891 static const
892 struct processor_costs ppce500mc64_cost = {
893 COSTS_N_INSNS (4), /* mulsi */
894 COSTS_N_INSNS (4), /* mulsi_const */
895 COSTS_N_INSNS (4), /* mulsi_const9 */
896 COSTS_N_INSNS (4), /* muldi */
897 COSTS_N_INSNS (14), /* divsi */
898 COSTS_N_INSNS (14), /* divdi */
899 COSTS_N_INSNS (4), /* fp */
900 COSTS_N_INSNS (10), /* dmul */
901 COSTS_N_INSNS (36), /* sdiv */
902 COSTS_N_INSNS (66), /* ddiv */
903 64, /* cache line size */
904 32, /* l1 cache */
905 128, /* l2 cache */
906 1, /* prefetch streams /*/
907 0, /* SF->DF convert */
908 };
909
910 /* Instruction costs on PPCE5500 processors. */
911 static const
912 struct processor_costs ppce5500_cost = {
913 COSTS_N_INSNS (5), /* mulsi */
914 COSTS_N_INSNS (5), /* mulsi_const */
915 COSTS_N_INSNS (4), /* mulsi_const9 */
916 COSTS_N_INSNS (5), /* muldi */
917 COSTS_N_INSNS (14), /* divsi */
918 COSTS_N_INSNS (14), /* divdi */
919 COSTS_N_INSNS (7), /* fp */
920 COSTS_N_INSNS (10), /* dmul */
921 COSTS_N_INSNS (36), /* sdiv */
922 COSTS_N_INSNS (66), /* ddiv */
923 64, /* cache line size */
924 32, /* l1 cache */
925 128, /* l2 cache */
926 1, /* prefetch streams /*/
927 0, /* SF->DF convert */
928 };
929
930 /* Instruction costs on PPCE6500 processors. */
931 static const
932 struct processor_costs ppce6500_cost = {
933 COSTS_N_INSNS (5), /* mulsi */
934 COSTS_N_INSNS (5), /* mulsi_const */
935 COSTS_N_INSNS (4), /* mulsi_const9 */
936 COSTS_N_INSNS (5), /* muldi */
937 COSTS_N_INSNS (14), /* divsi */
938 COSTS_N_INSNS (14), /* divdi */
939 COSTS_N_INSNS (7), /* fp */
940 COSTS_N_INSNS (10), /* dmul */
941 COSTS_N_INSNS (36), /* sdiv */
942 COSTS_N_INSNS (66), /* ddiv */
943 64, /* cache line size */
944 32, /* l1 cache */
945 128, /* l2 cache */
946 1, /* prefetch streams /*/
947 0, /* SF->DF convert */
948 };
949
950 /* Instruction costs on AppliedMicro Titan processors. */
951 static const
952 struct processor_costs titan_cost = {
953 COSTS_N_INSNS (5), /* mulsi */
954 COSTS_N_INSNS (5), /* mulsi_const */
955 COSTS_N_INSNS (5), /* mulsi_const9 */
956 COSTS_N_INSNS (5), /* muldi */
957 COSTS_N_INSNS (18), /* divsi */
958 COSTS_N_INSNS (18), /* divdi */
959 COSTS_N_INSNS (10), /* fp */
960 COSTS_N_INSNS (10), /* dmul */
961 COSTS_N_INSNS (46), /* sdiv */
962 COSTS_N_INSNS (72), /* ddiv */
963 32, /* cache line size */
964 32, /* l1 cache */
965 512, /* l2 cache */
966 1, /* prefetch streams /*/
967 0, /* SF->DF convert */
968 };
969
970 /* Instruction costs on POWER4 and POWER5 processors. */
971 static const
972 struct processor_costs power4_cost = {
973 COSTS_N_INSNS (3), /* mulsi */
974 COSTS_N_INSNS (2), /* mulsi_const */
975 COSTS_N_INSNS (2), /* mulsi_const9 */
976 COSTS_N_INSNS (4), /* muldi */
977 COSTS_N_INSNS (18), /* divsi */
978 COSTS_N_INSNS (34), /* divdi */
979 COSTS_N_INSNS (3), /* fp */
980 COSTS_N_INSNS (3), /* dmul */
981 COSTS_N_INSNS (17), /* sdiv */
982 COSTS_N_INSNS (17), /* ddiv */
983 128, /* cache line size */
984 32, /* l1 cache */
985 1024, /* l2 cache */
986 8, /* prefetch streams /*/
987 0, /* SF->DF convert */
988 };
989
990 /* Instruction costs on POWER6 processors. */
991 static const
992 struct processor_costs power6_cost = {
993 COSTS_N_INSNS (8), /* mulsi */
994 COSTS_N_INSNS (8), /* mulsi_const */
995 COSTS_N_INSNS (8), /* mulsi_const9 */
996 COSTS_N_INSNS (8), /* muldi */
997 COSTS_N_INSNS (22), /* divsi */
998 COSTS_N_INSNS (28), /* divdi */
999 COSTS_N_INSNS (3), /* fp */
1000 COSTS_N_INSNS (3), /* dmul */
1001 COSTS_N_INSNS (13), /* sdiv */
1002 COSTS_N_INSNS (16), /* ddiv */
1003 128, /* cache line size */
1004 64, /* l1 cache */
1005 2048, /* l2 cache */
1006 16, /* prefetch streams */
1007 0, /* SF->DF convert */
1008 };
1009
1010 /* Instruction costs on POWER7 processors. */
1011 static const
1012 struct processor_costs power7_cost = {
1013 COSTS_N_INSNS (2), /* mulsi */
1014 COSTS_N_INSNS (2), /* mulsi_const */
1015 COSTS_N_INSNS (2), /* mulsi_const9 */
1016 COSTS_N_INSNS (2), /* muldi */
1017 COSTS_N_INSNS (18), /* divsi */
1018 COSTS_N_INSNS (34), /* divdi */
1019 COSTS_N_INSNS (3), /* fp */
1020 COSTS_N_INSNS (3), /* dmul */
1021 COSTS_N_INSNS (13), /* sdiv */
1022 COSTS_N_INSNS (16), /* ddiv */
1023 128, /* cache line size */
1024 32, /* l1 cache */
1025 256, /* l2 cache */
1026 12, /* prefetch streams */
1027 COSTS_N_INSNS (3), /* SF->DF convert */
1028 };
1029
1030 /* Instruction costs on POWER8 processors. */
1031 static const
1032 struct processor_costs power8_cost = {
1033 COSTS_N_INSNS (3), /* mulsi */
1034 COSTS_N_INSNS (3), /* mulsi_const */
1035 COSTS_N_INSNS (3), /* mulsi_const9 */
1036 COSTS_N_INSNS (3), /* muldi */
1037 COSTS_N_INSNS (19), /* divsi */
1038 COSTS_N_INSNS (35), /* divdi */
1039 COSTS_N_INSNS (3), /* fp */
1040 COSTS_N_INSNS (3), /* dmul */
1041 COSTS_N_INSNS (14), /* sdiv */
1042 COSTS_N_INSNS (17), /* ddiv */
1043 128, /* cache line size */
1044 32, /* l1 cache */
1045 512, /* l2 cache */
1046 12, /* prefetch streams */
1047 COSTS_N_INSNS (3), /* SF->DF convert */
1048 };
1049
1050 /* Instruction costs on POWER9 processors. */
1051 static const
1052 struct processor_costs power9_cost = {
1053 COSTS_N_INSNS (3), /* mulsi */
1054 COSTS_N_INSNS (3), /* mulsi_const */
1055 COSTS_N_INSNS (3), /* mulsi_const9 */
1056 COSTS_N_INSNS (3), /* muldi */
1057 COSTS_N_INSNS (8), /* divsi */
1058 COSTS_N_INSNS (12), /* divdi */
1059 COSTS_N_INSNS (3), /* fp */
1060 COSTS_N_INSNS (3), /* dmul */
1061 COSTS_N_INSNS (13), /* sdiv */
1062 COSTS_N_INSNS (18), /* ddiv */
1063 128, /* cache line size */
1064 32, /* l1 cache */
1065 512, /* l2 cache */
1066 8, /* prefetch streams */
1067 COSTS_N_INSNS (3), /* SF->DF convert */
1068 };
1069
1070 /* Instruction costs on POWER10/POWER11 processors. */
1071 static const
1072 struct processor_costs power10_cost = {
1073 COSTS_N_INSNS (2), /* mulsi */
1074 COSTS_N_INSNS (2), /* mulsi_const */
1075 COSTS_N_INSNS (2), /* mulsi_const9 */
1076 COSTS_N_INSNS (2), /* muldi */
1077 COSTS_N_INSNS (6), /* divsi */
1078 COSTS_N_INSNS (6), /* divdi */
1079 COSTS_N_INSNS (2), /* fp */
1080 COSTS_N_INSNS (2), /* dmul */
1081 COSTS_N_INSNS (11), /* sdiv */
1082 COSTS_N_INSNS (13), /* ddiv */
1083 128, /* cache line size */
1084 32, /* l1 cache */
1085 512, /* l2 cache */
1086 16, /* prefetch streams */
1087 COSTS_N_INSNS (2), /* SF->DF convert */
1088 };
1089
1090 /* Instruction costs on POWER A2 processors. */
1091 static const
1092 struct processor_costs ppca2_cost = {
1093 COSTS_N_INSNS (16), /* mulsi */
1094 COSTS_N_INSNS (16), /* mulsi_const */
1095 COSTS_N_INSNS (16), /* mulsi_const9 */
1096 COSTS_N_INSNS (16), /* muldi */
1097 COSTS_N_INSNS (22), /* divsi */
1098 COSTS_N_INSNS (28), /* divdi */
1099 COSTS_N_INSNS (3), /* fp */
1100 COSTS_N_INSNS (3), /* dmul */
1101 COSTS_N_INSNS (59), /* sdiv */
1102 COSTS_N_INSNS (72), /* ddiv */
1103 64,
1104 16, /* l1 cache */
1105 2048, /* l2 cache */
1106 16, /* prefetch streams */
1107 0, /* SF->DF convert */
1108 };
1109
1110 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1111 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1112
1113 \f
1114 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool,
1115 code_helper = ERROR_MARK);
1116 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1117 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1118 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1119 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1120 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int * = nullptr);
1121 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1122 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1123 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1124 bool);
1125 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1126 unsigned int);
1127 static bool is_microcoded_insn (rtx_insn *);
1128 static bool is_nonpipeline_insn (rtx_insn *);
1129 static bool is_cracked_insn (rtx_insn *);
1130 static bool is_load_insn (rtx, rtx *);
1131 static bool is_store_insn (rtx, rtx *);
1132 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1133 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1134 static bool insn_must_be_first_in_group (rtx_insn *);
1135 static bool insn_must_be_last_in_group (rtx_insn *);
1136 bool easy_vector_constant (rtx, machine_mode);
1137 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1138 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1139 #if TARGET_MACHO
1140 static tree get_prev_label (tree);
1141 #endif
1142 static bool rs6000_mode_dependent_address (const_rtx);
1143 static bool rs6000_debug_mode_dependent_address (const_rtx);
1144 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1145 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1146 machine_mode, rtx);
1147 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1148 machine_mode,
1149 rtx);
1150 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1151 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1152 enum reg_class);
1153 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1154 reg_class_t,
1155 reg_class_t);
1156 static bool rs6000_debug_can_change_mode_class (machine_mode,
1157 machine_mode,
1158 reg_class_t);
1159
1160 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1161 = rs6000_mode_dependent_address;
1162
1163 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1164 machine_mode, rtx)
1165 = rs6000_secondary_reload_class;
1166
1167 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1168 = rs6000_preferred_reload_class;
1169
1170 const int INSN_NOT_AVAILABLE = -1;
1171
1172 static void rs6000_print_isa_options (FILE *, int, const char *,
1173 HOST_WIDE_INT);
1174 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1175
1176 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1177 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1178 enum rs6000_reg_type,
1179 machine_mode,
1180 secondary_reload_info *,
1181 bool);
1182 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1183
1184 /* Hash table stuff for keeping track of TOC entries. */
1185
1186 struct GTY((for_user)) toc_hash_struct
1187 {
1188 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1189 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1190 rtx key;
1191 machine_mode key_mode;
1192 int labelno;
1193 };
1194
1195 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1196 {
1197 static hashval_t hash (toc_hash_struct *);
1198 static bool equal (toc_hash_struct *, toc_hash_struct *);
1199 };
1200
1201 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1202
1203
1204 \f
1205 /* Default register names. */
1206 char rs6000_reg_names[][8] =
1207 {
1208 /* GPRs */
1209 "0", "1", "2", "3", "4", "5", "6", "7",
1210 "8", "9", "10", "11", "12", "13", "14", "15",
1211 "16", "17", "18", "19", "20", "21", "22", "23",
1212 "24", "25", "26", "27", "28", "29", "30", "31",
1213 /* FPRs */
1214 "0", "1", "2", "3", "4", "5", "6", "7",
1215 "8", "9", "10", "11", "12", "13", "14", "15",
1216 "16", "17", "18", "19", "20", "21", "22", "23",
1217 "24", "25", "26", "27", "28", "29", "30", "31",
1218 /* VRs */
1219 "0", "1", "2", "3", "4", "5", "6", "7",
1220 "8", "9", "10", "11", "12", "13", "14", "15",
1221 "16", "17", "18", "19", "20", "21", "22", "23",
1222 "24", "25", "26", "27", "28", "29", "30", "31",
1223 /* lr ctr ca ap */
1224 "lr", "ctr", "ca", "ap",
1225 /* cr0..cr7 */
1226 "0", "1", "2", "3", "4", "5", "6", "7",
1227 /* vrsave vscr sfp */
1228 "vrsave", "vscr", "sfp",
1229 };
1230
1231 #ifdef TARGET_REGNAMES
1232 static const char alt_reg_names[][8] =
1233 {
1234 /* GPRs */
1235 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1236 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1237 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1238 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1239 /* FPRs */
1240 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1241 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1242 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1243 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1244 /* VRs */
1245 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1246 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1247 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1248 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1249 /* lr ctr ca ap */
1250 "lr", "ctr", "ca", "ap",
1251 /* cr0..cr7 */
1252 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1253 /* vrsave vscr sfp */
1254 "vrsave", "vscr", "sfp",
1255 };
1256 #endif
1257
1258 /* Table of valid machine attributes. */
1259
1260 static const attribute_spec rs6000_gnu_attributes[] =
1261 {
1262 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1263 affects_type_identity, handler, exclude } */
1264 { "altivec", 1, 1, false, true, false, false,
1265 rs6000_handle_altivec_attribute, NULL },
1266 { "longcall", 0, 0, false, true, true, false,
1267 rs6000_handle_longcall_attribute, NULL },
1268 { "shortcall", 0, 0, false, true, true, false,
1269 rs6000_handle_longcall_attribute, NULL },
1270 { "ms_struct", 0, 0, false, false, false, false,
1271 rs6000_handle_struct_attribute, NULL },
1272 { "gcc_struct", 0, 0, false, false, false, false,
1273 rs6000_handle_struct_attribute, NULL },
1274 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1275 SUBTARGET_ATTRIBUTE_TABLE,
1276 #endif
1277 };
1278
1279 static const scoped_attribute_specs rs6000_gnu_attribute_table =
1280 {
1281 "gnu", { rs6000_gnu_attributes }
1282 };
1283
1284 static const scoped_attribute_specs *const rs6000_attribute_table[] =
1285 {
1286 &rs6000_gnu_attribute_table
1287 };
1288 \f
1289 #ifndef TARGET_PROFILE_KERNEL
1290 #define TARGET_PROFILE_KERNEL 0
1291 #endif
1292 \f
1293 /* Initialize the GCC target structure. */
1294 #undef TARGET_ATTRIBUTE_TABLE
1295 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1296 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1297 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1298 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1299 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1300
1301 #undef TARGET_ASM_ALIGNED_DI_OP
1302 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1303
1304 /* Default unaligned ops are only provided for ELF. Find the ops needed
1305 for non-ELF systems. */
1306 #ifndef OBJECT_FORMAT_ELF
1307 #if TARGET_XCOFF
1308 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1309 64-bit targets. */
1310 #undef TARGET_ASM_UNALIGNED_HI_OP
1311 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1312 #undef TARGET_ASM_UNALIGNED_SI_OP
1313 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1314 #undef TARGET_ASM_UNALIGNED_DI_OP
1315 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1316 #else
1317 /* For Darwin. */
1318 #undef TARGET_ASM_UNALIGNED_HI_OP
1319 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1320 #undef TARGET_ASM_UNALIGNED_SI_OP
1321 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1322 #undef TARGET_ASM_UNALIGNED_DI_OP
1323 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1324 #undef TARGET_ASM_ALIGNED_DI_OP
1325 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1326 #endif
1327 #endif
1328
1329 /* This hook deals with fixups for relocatable code and DI-mode objects
1330 in 64-bit code. */
1331 #undef TARGET_ASM_INTEGER
1332 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1333
1334 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1335 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1336 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1337 #endif
1338
1339 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1340 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1341 rs6000_print_patchable_function_entry
1342
1343 #undef TARGET_SET_UP_BY_PROLOGUE
1344 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1345
1346 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1347 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1348 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1349 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1350 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1351 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1352 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1353 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1354 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1355 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1356 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1357 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1358
1359 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1360 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1361
1362 #undef TARGET_INTERNAL_ARG_POINTER
1363 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1364
1365 #undef TARGET_HAVE_TLS
1366 #define TARGET_HAVE_TLS HAVE_AS_TLS
1367
1368 #undef TARGET_CANNOT_FORCE_CONST_MEM
1369 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1370
1371 #undef TARGET_DELEGITIMIZE_ADDRESS
1372 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1373
1374 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1375 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1376
1377 #undef TARGET_LEGITIMATE_COMBINED_INSN
1378 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1379
1380 #undef TARGET_ASM_FUNCTION_PROLOGUE
1381 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1382 #undef TARGET_ASM_FUNCTION_EPILOGUE
1383 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1384
1385 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1386 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1387
1388 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1389 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1390
1391 #undef TARGET_LEGITIMIZE_ADDRESS
1392 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1393
1394 #undef TARGET_SCHED_VARIABLE_ISSUE
1395 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1396
1397 #undef TARGET_SCHED_ISSUE_RATE
1398 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1399 #undef TARGET_SCHED_ADJUST_COST
1400 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1401 #undef TARGET_SCHED_ADJUST_PRIORITY
1402 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1403 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1404 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1405 #undef TARGET_SCHED_INIT
1406 #define TARGET_SCHED_INIT rs6000_sched_init
1407 #undef TARGET_SCHED_FINISH
1408 #define TARGET_SCHED_FINISH rs6000_sched_finish
1409 #undef TARGET_SCHED_REORDER
1410 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1411 #undef TARGET_SCHED_REORDER2
1412 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1413
1414 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1415 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1416
1417 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1418 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1419
1420 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1421 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1422 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1423 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1424 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1425 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1426 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1427 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1428
1429 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1430 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1431
1432 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1433 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1434 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1435 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1436 rs6000_builtin_support_vector_misalignment
1437 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1438 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1439 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1440 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1441 rs6000_builtin_vectorization_cost
1442 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1443 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1444 rs6000_preferred_simd_mode
1445 #undef TARGET_VECTORIZE_CREATE_COSTS
1446 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1447
1448 #undef TARGET_LOOP_UNROLL_ADJUST
1449 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1450
1451 #undef TARGET_INIT_BUILTINS
1452 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1453 #undef TARGET_BUILTIN_DECL
1454 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1455
1456 #undef TARGET_FOLD_BUILTIN
1457 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1458 #undef TARGET_GIMPLE_FOLD_BUILTIN
1459 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1460
1461 #undef TARGET_EXPAND_BUILTIN
1462 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1463
1464 #undef TARGET_MANGLE_TYPE
1465 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1466
1467 #undef TARGET_INIT_LIBFUNCS
1468 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1469
1470 #if TARGET_MACHO
1471 #undef TARGET_BINDS_LOCAL_P
1472 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1473 #endif
1474
1475 #undef TARGET_MS_BITFIELD_LAYOUT_P
1476 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1477
1478 #undef TARGET_ASM_OUTPUT_MI_THUNK
1479 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1480
1481 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1482 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1483
1484 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1485 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1486
1487 #undef TARGET_REGISTER_MOVE_COST
1488 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1489 #undef TARGET_MEMORY_MOVE_COST
1490 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1491 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1492 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1493 rs6000_ira_change_pseudo_allocno_class
1494 #undef TARGET_CANNOT_COPY_INSN_P
1495 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1496 #undef TARGET_RTX_COSTS
1497 #define TARGET_RTX_COSTS rs6000_rtx_costs
1498 #undef TARGET_ADDRESS_COST
1499 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1500 #undef TARGET_INSN_COST
1501 #define TARGET_INSN_COST rs6000_insn_cost
1502
1503 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1504 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1505
1506 #undef TARGET_PROMOTE_FUNCTION_MODE
1507 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1508
1509 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1510 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1511
1512 #undef TARGET_RETURN_IN_MEMORY
1513 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1514
1515 #undef TARGET_RETURN_IN_MSB
1516 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1517
1518 #undef TARGET_SETUP_INCOMING_VARARGS
1519 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1520
1521 /* Always strict argument naming on rs6000. */
1522 #undef TARGET_STRICT_ARGUMENT_NAMING
1523 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1524 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1525 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1526 #undef TARGET_SPLIT_COMPLEX_ARG
1527 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1528 #undef TARGET_MUST_PASS_IN_STACK
1529 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1530 #undef TARGET_PASS_BY_REFERENCE
1531 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1532 #undef TARGET_ARG_PARTIAL_BYTES
1533 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1534 #undef TARGET_FUNCTION_ARG_ADVANCE
1535 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1536 #undef TARGET_FUNCTION_ARG
1537 #define TARGET_FUNCTION_ARG rs6000_function_arg
1538 #undef TARGET_FUNCTION_ARG_PADDING
1539 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1540 #undef TARGET_FUNCTION_ARG_BOUNDARY
1541 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1542
1543 #undef TARGET_BUILD_BUILTIN_VA_LIST
1544 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1545
1546 #undef TARGET_EXPAND_BUILTIN_VA_START
1547 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1548
1549 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1550 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1551
1552 #undef TARGET_EH_RETURN_FILTER_MODE
1553 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1554
1555 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1556 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1557
1558 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1559 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1560
1561 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1562 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1563 rs6000_libgcc_floating_mode_supported_p
1564
1565 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1566 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1567
1568 #undef TARGET_FLOATN_MODE
1569 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1570
1571 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1572 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1573
1574 #undef TARGET_MD_ASM_ADJUST
1575 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1576
1577 #undef TARGET_OPTION_OVERRIDE
1578 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1579
1580 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1581 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1582 rs6000_builtin_vectorized_function
1583
1584 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1585 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1586 rs6000_builtin_md_vectorized_function
1587
1588 #undef TARGET_STACK_PROTECT_GUARD
1589 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1590
1591 #if !TARGET_MACHO
1592 #undef TARGET_STACK_PROTECT_FAIL
1593 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1594 #endif
1595
1596 #ifdef HAVE_AS_TLS
1597 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1598 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1599 #endif
1600
1601 /* Use a 32-bit anchor range. This leads to sequences like:
1602
1603 addis tmp,anchor,high
1604 add dest,tmp,low
1605
1606 where tmp itself acts as an anchor, and can be shared between
1607 accesses to the same 64k page. */
1608 #undef TARGET_MIN_ANCHOR_OFFSET
1609 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1610 #undef TARGET_MAX_ANCHOR_OFFSET
1611 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1612 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1613 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1614 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1615 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1616
1617 #undef TARGET_BUILTIN_RECIPROCAL
1618 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1619
1620 #undef TARGET_SECONDARY_RELOAD
1621 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1622 #undef TARGET_SECONDARY_MEMORY_NEEDED
1623 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1624 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1625 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1626
1627 #undef TARGET_LEGITIMATE_ADDRESS_P
1628 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1629
1630 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1631 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1632
1633 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1634 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1635
1636 #undef TARGET_CAN_ELIMINATE
1637 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1638
1639 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1640 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1641
1642 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1643 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1644
1645 #undef TARGET_TRAMPOLINE_INIT
1646 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1647
1648 #undef TARGET_FUNCTION_VALUE
1649 #define TARGET_FUNCTION_VALUE rs6000_function_value
1650
1651 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1652 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1653
1654 #undef TARGET_OPTION_SAVE
1655 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1656
1657 #undef TARGET_OPTION_RESTORE
1658 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1659
1660 #undef TARGET_OPTION_PRINT
1661 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1662
1663 #undef TARGET_CAN_INLINE_P
1664 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1665
1666 #undef TARGET_SET_CURRENT_FUNCTION
1667 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1668
1669 #undef TARGET_LEGITIMATE_CONSTANT_P
1670 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1671
1672 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1673 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1674
1675 #undef TARGET_CAN_USE_DOLOOP_P
1676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1677
1678 #undef TARGET_PREDICT_DOLOOP_P
1679 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1680
1681 #undef TARGET_HAVE_COUNT_REG_DECR_P
1682 #define TARGET_HAVE_COUNT_REG_DECR_P true
1683
1684 /* 1000000000 is infinite cost in IVOPTs. */
1685 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1686 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1687
1688 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1689 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1690
1691 #undef TARGET_PREFERRED_DOLOOP_MODE
1692 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1693
1694 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1695 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1696
1697 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1698 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1699 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1700 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1701 #undef TARGET_UNWIND_WORD_MODE
1702 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1703
1704 #undef TARGET_OFFLOAD_OPTIONS
1705 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1706
1707 #undef TARGET_C_MODE_FOR_SUFFIX
1708 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1709
1710 #undef TARGET_INVALID_BINARY_OP
1711 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1712
1713 #undef TARGET_OPTAB_SUPPORTED_P
1714 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1715
1716 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1717 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1718
1719 #undef TARGET_COMPARE_VERSION_PRIORITY
1720 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1721
1722 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1723 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1724 rs6000_generate_version_dispatcher_body
1725
1726 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1727 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1728 rs6000_get_function_versions_dispatcher
1729
1730 #undef TARGET_OPTION_FUNCTION_VERSIONS
1731 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1732
1733 #undef TARGET_HARD_REGNO_NREGS
1734 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1735 #undef TARGET_HARD_REGNO_MODE_OK
1736 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1737
1738 #undef TARGET_MODES_TIEABLE_P
1739 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1740
1741 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1742 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1743 rs6000_hard_regno_call_part_clobbered
1744
1745 #undef TARGET_SLOW_UNALIGNED_ACCESS
1746 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1747
1748 #undef TARGET_CAN_CHANGE_MODE_CLASS
1749 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1750
1751 #undef TARGET_CONSTANT_ALIGNMENT
1752 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1753
1754 #undef TARGET_STARTING_FRAME_OFFSET
1755 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1756
1757 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1758 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1759
1760 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1761 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1762
1763 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1764 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1765 rs6000_cannot_substitute_mem_equiv_p
1766
1767 #undef TARGET_INVALID_CONVERSION
1768 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1769
1770 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1771 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1772
1773 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1774 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1775
1776 #undef TARGET_CONST_ANCHOR
1777 #define TARGET_CONST_ANCHOR 0x8000
1778
1779 \f
1780
1781 /* Processor table. */
1782 struct rs6000_ptt
1783 {
1784 const char *const name; /* Canonical processor name. */
1785 const enum processor_type processor; /* Processor type enum value. */
1786 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1787 };
1788
1789 static struct rs6000_ptt const processor_target_table[] =
1790 {
1791 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1792 #include "rs6000-cpus.def"
1793 #undef RS6000_CPU
1794 };
1795
1796 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1797 name is invalid. */
1798
1799 static int
1800 rs6000_cpu_name_lookup (const char *name)
1801 {
1802 size_t i;
1803
1804 if (name != NULL)
1805 {
1806 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1807 if (! strcmp (name, processor_target_table[i].name))
1808 return (int)i;
1809 }
1810
1811 return -1;
1812 }
1813
1814 \f
1815 /* Return number of consecutive hard regs needed starting at reg REGNO
1816 to hold something of mode MODE.
1817 This is ordinarily the length in words of a value of mode MODE
1818 but can be less for certain modes in special long registers.
1819
1820 POWER and PowerPC GPRs hold 32 bits worth;
1821 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1822
1823 static int
1824 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1825 {
1826 unsigned HOST_WIDE_INT reg_size;
1827
1828 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1829 128-bit floating point that can go in vector registers, which has VSX
1830 memory addressing. */
1831 if (FP_REGNO_P (regno))
1832 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1833 ? UNITS_PER_VSX_WORD
1834 : UNITS_PER_FP_WORD);
1835
1836 else if (ALTIVEC_REGNO_P (regno))
1837 reg_size = UNITS_PER_ALTIVEC_WORD;
1838
1839 else
1840 reg_size = UNITS_PER_WORD;
1841
1842 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1843 }
1844
1845 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1846 MODE. */
1847 static int
1848 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1849 {
1850 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1851
1852 if (COMPLEX_MODE_P (mode))
1853 mode = GET_MODE_INNER (mode);
1854
1855 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1856 registers. */
1857 if (mode == OOmode)
1858 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1859
1860 /* MMA accumulator modes need FPR registers divisible by 4. */
1861 if (mode == XOmode)
1862 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1863
1864 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1865 register combinations, and use PTImode where we need to deal with quad
1866 word memory operations. Don't allow quad words in the argument or frame
1867 pointer registers, just registers 0..31. */
1868 if (mode == PTImode)
1869 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1870 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1871 && ((regno & 1) == 0));
1872
1873 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1874 implementations. Don't allow an item to be split between a FP register
1875 and an Altivec register. Allow TImode in all VSX registers if the user
1876 asked for it. */
1877 if (TARGET_VSX && VSX_REGNO_P (regno)
1878 && (VECTOR_MEM_VSX_P (mode)
1879 || VECTOR_ALIGNMENT_P (mode)
1880 || reg_addr[mode].scalar_in_vmx_p
1881 || mode == TImode
1882 || (TARGET_VADDUQM && mode == V1TImode)))
1883 {
1884 if (FP_REGNO_P (regno))
1885 return FP_REGNO_P (last_regno);
1886
1887 if (ALTIVEC_REGNO_P (regno))
1888 {
1889 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1890 return 0;
1891
1892 return ALTIVEC_REGNO_P (last_regno);
1893 }
1894 }
1895
1896 /* The GPRs can hold any mode, but values bigger than one register
1897 cannot go past R31. */
1898 if (INT_REGNO_P (regno))
1899 return INT_REGNO_P (last_regno);
1900
1901 /* The float registers (except for VSX vector modes) can only hold floating
1902 modes and DImode. */
1903 if (FP_REGNO_P (regno))
1904 {
1905 if (VECTOR_ALIGNMENT_P (mode))
1906 return false;
1907
1908 if (SCALAR_FLOAT_MODE_P (mode)
1909 && (mode != TDmode || (regno % 2) == 0)
1910 && FP_REGNO_P (last_regno))
1911 return 1;
1912
1913 if (GET_MODE_CLASS (mode) == MODE_INT)
1914 {
1915 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1916 return 1;
1917
1918 if (TARGET_POPCNTD && mode == SImode)
1919 return 1;
1920
1921 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1922 return 1;
1923 }
1924
1925 return 0;
1926 }
1927
1928 /* The CR register can only hold CC modes. */
1929 if (CR_REGNO_P (regno))
1930 return GET_MODE_CLASS (mode) == MODE_CC;
1931
1932 if (CA_REGNO_P (regno))
1933 return mode == Pmode || mode == SImode;
1934
1935 /* AltiVec only in AldyVec registers. */
1936 if (ALTIVEC_REGNO_P (regno))
1937 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1938 || mode == V1TImode);
1939
1940 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1941 and it must be able to fit within the register set. */
1942
1943 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1944 }
1945
1946 /* Implement TARGET_HARD_REGNO_NREGS. */
1947
1948 static unsigned int
1949 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1950 {
1951 return rs6000_hard_regno_nregs[mode][regno];
1952 }
1953
1954 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1955
1956 static bool
1957 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1958 {
1959 return rs6000_hard_regno_mode_ok_p[mode][regno];
1960 }
1961
1962 /* Implement TARGET_MODES_TIEABLE_P.
1963
1964 PTImode cannot tie with other modes because PTImode is restricted to even
1965 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1966 57744).
1967
1968 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1969 registers) or XOmode (vector quad, restricted to FPR registers divisible
1970 by 4) to tie with other modes.
1971
1972 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1973 128-bit floating point on VSX systems ties with other vectors. */
1974
1975 static bool
1976 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1977 {
1978 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1979 || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1980 return mode1 == mode2;
1981
1982 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1983 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1984 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1985 return false;
1986
1987 if (SCALAR_FLOAT_MODE_P (mode1))
1988 return SCALAR_FLOAT_MODE_P (mode2);
1989 if (SCALAR_FLOAT_MODE_P (mode2))
1990 return false;
1991
1992 if (GET_MODE_CLASS (mode1) == MODE_CC)
1993 return GET_MODE_CLASS (mode2) == MODE_CC;
1994 if (GET_MODE_CLASS (mode2) == MODE_CC)
1995 return false;
1996
1997 return true;
1998 }
1999
2000 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2001
2002 static bool
2003 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
2004 machine_mode mode)
2005 {
2006 if (TARGET_32BIT
2007 && TARGET_POWERPC64
2008 && GET_MODE_SIZE (mode) > 4
2009 && INT_REGNO_P (regno))
2010 return true;
2011
2012 if (TARGET_VSX
2013 && FP_REGNO_P (regno)
2014 && GET_MODE_SIZE (mode) > 8
2015 && !FLOAT128_2REG_P (mode))
2016 return true;
2017
2018 return false;
2019 }
2020
2021 /* Print interesting facts about registers. */
2022 static void
2023 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2024 {
2025 int r, m;
2026
2027 for (r = first_regno; r <= last_regno; ++r)
2028 {
2029 const char *comma = "";
2030 int len;
2031
2032 if (first_regno == last_regno)
2033 fprintf (stderr, "%s:\t", reg_name);
2034 else
2035 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2036
2037 len = 8;
2038 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2039 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2040 {
2041 if (len > 70)
2042 {
2043 fprintf (stderr, ",\n\t");
2044 len = 8;
2045 comma = "";
2046 }
2047
2048 if (rs6000_hard_regno_nregs[m][r] > 1)
2049 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2050 rs6000_hard_regno_nregs[m][r]);
2051 else
2052 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2053
2054 comma = ", ";
2055 }
2056
2057 if (call_used_or_fixed_reg_p (r))
2058 {
2059 if (len > 70)
2060 {
2061 fprintf (stderr, ",\n\t");
2062 len = 8;
2063 comma = "";
2064 }
2065
2066 len += fprintf (stderr, "%s%s", comma, "call-used");
2067 comma = ", ";
2068 }
2069
2070 if (fixed_regs[r])
2071 {
2072 if (len > 70)
2073 {
2074 fprintf (stderr, ",\n\t");
2075 len = 8;
2076 comma = "";
2077 }
2078
2079 len += fprintf (stderr, "%s%s", comma, "fixed");
2080 comma = ", ";
2081 }
2082
2083 if (len > 70)
2084 {
2085 fprintf (stderr, ",\n\t");
2086 comma = "";
2087 }
2088
2089 len += fprintf (stderr, "%sreg-class = %s", comma,
2090 reg_class_names[(int)rs6000_regno_regclass[r]]);
2091 comma = ", ";
2092
2093 if (len > 70)
2094 {
2095 fprintf (stderr, ",\n\t");
2096 comma = "";
2097 }
2098
2099 fprintf (stderr, "%sregno = %d\n", comma, r);
2100 }
2101 }
2102
2103 static const char *
2104 rs6000_debug_vector_unit (enum rs6000_vector v)
2105 {
2106 const char *ret;
2107
2108 switch (v)
2109 {
2110 case VECTOR_NONE: ret = "none"; break;
2111 case VECTOR_ALTIVEC: ret = "altivec"; break;
2112 case VECTOR_VSX: ret = "vsx"; break;
2113 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2114 default: ret = "unknown"; break;
2115 }
2116
2117 return ret;
2118 }
2119
2120 /* Inner function printing just the address mask for a particular reload
2121 register class. */
2122 DEBUG_FUNCTION char *
2123 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2124 {
2125 static char ret[8];
2126 char *p = ret;
2127
2128 if ((mask & RELOAD_REG_VALID) != 0)
2129 *p++ = 'v';
2130 else if (keep_spaces)
2131 *p++ = ' ';
2132
2133 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2134 *p++ = 'm';
2135 else if (keep_spaces)
2136 *p++ = ' ';
2137
2138 if ((mask & RELOAD_REG_INDEXED) != 0)
2139 *p++ = 'i';
2140 else if (keep_spaces)
2141 *p++ = ' ';
2142
2143 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2144 *p++ = 'O';
2145 else if ((mask & RELOAD_REG_OFFSET) != 0)
2146 *p++ = 'o';
2147 else if (keep_spaces)
2148 *p++ = ' ';
2149
2150 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2151 *p++ = '+';
2152 else if (keep_spaces)
2153 *p++ = ' ';
2154
2155 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2156 *p++ = '+';
2157 else if (keep_spaces)
2158 *p++ = ' ';
2159
2160 if ((mask & RELOAD_REG_AND_M16) != 0)
2161 *p++ = '&';
2162 else if (keep_spaces)
2163 *p++ = ' ';
2164
2165 *p = '\0';
2166
2167 return ret;
2168 }
2169
2170 /* Print the address masks in a human readble fashion. */
2171 DEBUG_FUNCTION void
2172 rs6000_debug_print_mode (ssize_t m)
2173 {
2174 ssize_t rc;
2175 int spaces = 0;
2176
2177 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2178 for (rc = 0; rc < N_RELOAD_REG; rc++)
2179 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2180 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2181
2182 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2183 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2184 {
2185 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2186 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2187 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2188 spaces = 0;
2189 }
2190 else
2191 spaces += strlen (" Reload=sl");
2192
2193 if (reg_addr[m].scalar_in_vmx_p)
2194 {
2195 fprintf (stderr, "%*s Upper=y", spaces, "");
2196 spaces = 0;
2197 }
2198 else
2199 spaces += strlen (" Upper=y");
2200
2201 if (rs6000_vector_unit[m] != VECTOR_NONE
2202 || rs6000_vector_mem[m] != VECTOR_NONE)
2203 {
2204 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2205 spaces, "",
2206 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2207 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2208 }
2209
2210 fputs ("\n", stderr);
2211 }
2212
2213 #define DEBUG_FMT_ID "%-32s= "
2214 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2215 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2216 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2217
2218 /* Print various interesting information with -mdebug=reg. */
2219 static void
2220 rs6000_debug_reg_global (void)
2221 {
2222 static const char *const tf[2] = { "false", "true" };
2223 const char *nl = (const char *)0;
2224 int m;
2225 size_t m1, m2, v;
2226 char costly_num[20];
2227 char nop_num[20];
2228 char flags_buffer[40];
2229 const char *costly_str;
2230 const char *nop_str;
2231 const char *trace_str;
2232 const char *abi_str;
2233 const char *cmodel_str;
2234 struct cl_target_option cl_opts;
2235
2236 /* Modes we want tieable information on. */
2237 static const machine_mode print_tieable_modes[] = {
2238 QImode,
2239 HImode,
2240 SImode,
2241 DImode,
2242 TImode,
2243 PTImode,
2244 SFmode,
2245 DFmode,
2246 TFmode,
2247 IFmode,
2248 KFmode,
2249 SDmode,
2250 DDmode,
2251 TDmode,
2252 V2SImode,
2253 V2SFmode,
2254 V16QImode,
2255 V8HImode,
2256 V4SImode,
2257 V2DImode,
2258 V1TImode,
2259 V32QImode,
2260 V16HImode,
2261 V8SImode,
2262 V4DImode,
2263 V2TImode,
2264 V4SFmode,
2265 V2DFmode,
2266 V8SFmode,
2267 V4DFmode,
2268 OOmode,
2269 XOmode,
2270 CCmode,
2271 CCUNSmode,
2272 CCEQmode,
2273 CCFPmode,
2274 };
2275
2276 /* Virtual regs we are interested in. */
2277 const static struct {
2278 int regno; /* register number. */
2279 const char *name; /* register name. */
2280 } virtual_regs[] = {
2281 { STACK_POINTER_REGNUM, "stack pointer:" },
2282 { TOC_REGNUM, "toc: " },
2283 { STATIC_CHAIN_REGNUM, "static chain: " },
2284 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2285 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2286 { ARG_POINTER_REGNUM, "arg pointer: " },
2287 { FRAME_POINTER_REGNUM, "frame pointer:" },
2288 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2289 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2290 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2291 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2292 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2293 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2294 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2295 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2296 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2297 };
2298
2299 fputs ("\nHard register information:\n", stderr);
2300 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2301 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2302 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2303 LAST_ALTIVEC_REGNO,
2304 "vs");
2305 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2306 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2307 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2308 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2309 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2310 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2311
2312 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2313 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2314 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2315
2316 fprintf (stderr,
2317 "\n"
2318 "d reg_class = %s\n"
2319 "v reg_class = %s\n"
2320 "wa reg_class = %s\n"
2321 "we reg_class = %s\n"
2322 "wr reg_class = %s\n"
2323 "wx reg_class = %s\n"
2324 "wA reg_class = %s\n"
2325 "\n",
2326 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2327 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2328 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2329 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2330 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2331 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2332 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2333
2334 nl = "\n";
2335 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2336 rs6000_debug_print_mode (m);
2337
2338 fputs ("\n", stderr);
2339
2340 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2341 {
2342 machine_mode mode1 = print_tieable_modes[m1];
2343 bool first_time = true;
2344
2345 nl = (const char *)0;
2346 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2347 {
2348 machine_mode mode2 = print_tieable_modes[m2];
2349 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2350 {
2351 if (first_time)
2352 {
2353 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2354 nl = "\n";
2355 first_time = false;
2356 }
2357
2358 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2359 }
2360 }
2361
2362 if (!first_time)
2363 fputs ("\n", stderr);
2364 }
2365
2366 if (nl)
2367 fputs (nl, stderr);
2368
2369 if (rs6000_recip_control)
2370 {
2371 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2372
2373 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2374 if (rs6000_recip_bits[m])
2375 {
2376 fprintf (stderr,
2377 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2378 GET_MODE_NAME (m),
2379 (RS6000_RECIP_AUTO_RE_P (m)
2380 ? "auto"
2381 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2382 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2383 ? "auto"
2384 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2385 }
2386
2387 fputs ("\n", stderr);
2388 }
2389
2390 if (rs6000_cpu_index >= 0)
2391 {
2392 const char *name = processor_target_table[rs6000_cpu_index].name;
2393 HOST_WIDE_INT flags
2394 = processor_target_table[rs6000_cpu_index].target_enable;
2395
2396 sprintf (flags_buffer, "-mcpu=%s flags", name);
2397 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2398 }
2399 else
2400 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2401
2402 if (rs6000_tune_index >= 0)
2403 {
2404 const char *name = processor_target_table[rs6000_tune_index].name;
2405 HOST_WIDE_INT flags
2406 = processor_target_table[rs6000_tune_index].target_enable;
2407
2408 sprintf (flags_buffer, "-mtune=%s flags", name);
2409 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2410 }
2411 else
2412 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2413
2414 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2415 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2416 rs6000_isa_flags);
2417
2418 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2419 rs6000_isa_flags_explicit);
2420
2421 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2422
2423 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2424 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2425
2426 switch (rs6000_sched_costly_dep)
2427 {
2428 case max_dep_latency:
2429 costly_str = "max_dep_latency";
2430 break;
2431
2432 case no_dep_costly:
2433 costly_str = "no_dep_costly";
2434 break;
2435
2436 case all_deps_costly:
2437 costly_str = "all_deps_costly";
2438 break;
2439
2440 case true_store_to_load_dep_costly:
2441 costly_str = "true_store_to_load_dep_costly";
2442 break;
2443
2444 case store_to_load_dep_costly:
2445 costly_str = "store_to_load_dep_costly";
2446 break;
2447
2448 default:
2449 costly_str = costly_num;
2450 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2451 break;
2452 }
2453
2454 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2455
2456 switch (rs6000_sched_insert_nops)
2457 {
2458 case sched_finish_regroup_exact:
2459 nop_str = "sched_finish_regroup_exact";
2460 break;
2461
2462 case sched_finish_pad_groups:
2463 nop_str = "sched_finish_pad_groups";
2464 break;
2465
2466 case sched_finish_none:
2467 nop_str = "sched_finish_none";
2468 break;
2469
2470 default:
2471 nop_str = nop_num;
2472 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2473 break;
2474 }
2475
2476 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2477
2478 switch (rs6000_sdata)
2479 {
2480 default:
2481 case SDATA_NONE:
2482 break;
2483
2484 case SDATA_DATA:
2485 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2486 break;
2487
2488 case SDATA_SYSV:
2489 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2490 break;
2491
2492 case SDATA_EABI:
2493 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2494 break;
2495
2496 }
2497
2498 switch (rs6000_traceback)
2499 {
2500 case traceback_default: trace_str = "default"; break;
2501 case traceback_none: trace_str = "none"; break;
2502 case traceback_part: trace_str = "part"; break;
2503 case traceback_full: trace_str = "full"; break;
2504 default: trace_str = "unknown"; break;
2505 }
2506
2507 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2508
2509 switch (rs6000_current_cmodel)
2510 {
2511 case CMODEL_SMALL: cmodel_str = "small"; break;
2512 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2513 case CMODEL_LARGE: cmodel_str = "large"; break;
2514 default: cmodel_str = "unknown"; break;
2515 }
2516
2517 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2518
2519 switch (rs6000_current_abi)
2520 {
2521 case ABI_NONE: abi_str = "none"; break;
2522 case ABI_AIX: abi_str = "aix"; break;
2523 case ABI_ELFv2: abi_str = "ELFv2"; break;
2524 case ABI_V4: abi_str = "V4"; break;
2525 case ABI_DARWIN: abi_str = "darwin"; break;
2526 default: abi_str = "unknown"; break;
2527 }
2528
2529 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2530
2531 if (rs6000_altivec_abi)
2532 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2533
2534 if (rs6000_aix_extabi)
2535 fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true");
2536
2537 if (rs6000_darwin64_abi)
2538 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2539
2540 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2541 (TARGET_SOFT_FLOAT ? "true" : "false"));
2542
2543 if (TARGET_LINK_STACK)
2544 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2545
2546 if (TARGET_P8_FUSION)
2547 {
2548 char options[80];
2549
2550 strcpy (options, "power8");
2551 if (TARGET_P8_FUSION_SIGN)
2552 strcat (options, ", sign");
2553
2554 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2555 }
2556
2557 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2558 TARGET_SECURE_PLT ? "secure" : "bss");
2559 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2560 aix_struct_return ? "aix" : "sysv");
2561 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2562 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2563 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2564 tf[!!rs6000_align_branch_targets]);
2565 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2566 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2567 rs6000_long_double_type_size);
2568 if (rs6000_long_double_type_size > 64)
2569 {
2570 fprintf (stderr, DEBUG_FMT_S, "long double type",
2571 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2572 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2573 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2574 }
2575 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2576 (int)rs6000_sched_restricted_insns_priority);
2577 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2578 (int)END_BUILTINS);
2579
2580 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2581 (int)TARGET_FLOAT128_ENABLE_TYPE);
2582
2583 if (TARGET_VSX)
2584 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2585 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2586
2587 if (TARGET_DIRECT_MOVE_128)
2588 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2589 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2590 }
2591
2592 \f
2593 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2594 legitimate address support to figure out the appropriate addressing to
2595 use. */
2596
2597 static void
2598 rs6000_setup_reg_addr_masks (void)
2599 {
2600 ssize_t rc, reg, m, nregs;
2601 addr_mask_type any_addr_mask, addr_mask;
2602
2603 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2604 {
2605 machine_mode m2 = (machine_mode) m;
2606 bool complex_p = false;
2607 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2608 size_t msize;
2609
2610 if (COMPLEX_MODE_P (m2))
2611 {
2612 complex_p = true;
2613 m2 = GET_MODE_INNER (m2);
2614 }
2615
2616 msize = GET_MODE_SIZE (m2);
2617
2618 /* SDmode is special in that we want to access it only via REG+REG
2619 addressing on power7 and above, since we want to use the LFIWZX and
2620 STFIWZX instructions to load it. */
2621 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2622
2623 any_addr_mask = 0;
2624 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2625 {
2626 addr_mask = 0;
2627 reg = reload_reg_map[rc].reg;
2628
2629 /* Can mode values go in the GPR/FPR/Altivec registers? */
2630 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2631 {
2632 bool small_int_vsx_p = (small_int_p
2633 && (rc == RELOAD_REG_FPR
2634 || rc == RELOAD_REG_VMX));
2635
2636 nregs = rs6000_hard_regno_nregs[m][reg];
2637 addr_mask |= RELOAD_REG_VALID;
2638
2639 /* Indicate if the mode takes more than 1 physical register. If
2640 it takes a single register, indicate it can do REG+REG
2641 addressing. Small integers in VSX registers can only do
2642 REG+REG addressing. */
2643 if (small_int_vsx_p)
2644 addr_mask |= RELOAD_REG_INDEXED;
2645 else if (nregs > 1 || m == BLKmode || complex_p)
2646 addr_mask |= RELOAD_REG_MULTIPLE;
2647 else
2648 addr_mask |= RELOAD_REG_INDEXED;
2649
2650 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2651 addressing. If we allow scalars into Altivec registers,
2652 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2653
2654 For VSX systems, we don't allow update addressing for
2655 DFmode/SFmode if those registers can go in both the
2656 traditional floating point registers and Altivec registers.
2657 The load/store instructions for the Altivec registers do not
2658 have update forms. If we allowed update addressing, it seems
2659 to break IV-OPT code using floating point if the index type is
2660 int instead of long (PR target/81550 and target/84042). */
2661
2662 if (TARGET_UPDATE
2663 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2664 && msize <= 8
2665 && !VECTOR_MODE_P (m2)
2666 && !VECTOR_ALIGNMENT_P (m2)
2667 && !complex_p
2668 && (m != E_DFmode || !TARGET_VSX)
2669 && (m != E_SFmode || !TARGET_P8_VECTOR)
2670 && !small_int_vsx_p)
2671 {
2672 addr_mask |= RELOAD_REG_PRE_INCDEC;
2673
2674 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2675 we don't allow PRE_MODIFY for some multi-register
2676 operations. */
2677 switch (m)
2678 {
2679 default:
2680 addr_mask |= RELOAD_REG_PRE_MODIFY;
2681 break;
2682
2683 case E_DImode:
2684 if (TARGET_POWERPC64)
2685 addr_mask |= RELOAD_REG_PRE_MODIFY;
2686 break;
2687
2688 case E_DFmode:
2689 case E_DDmode:
2690 if (TARGET_HARD_FLOAT)
2691 addr_mask |= RELOAD_REG_PRE_MODIFY;
2692 break;
2693 }
2694 }
2695 }
2696
2697 /* GPR and FPR registers can do REG+OFFSET addressing, except
2698 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2699 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2700 if ((addr_mask != 0) && !indexed_only_p
2701 && msize <= 8
2702 && (rc == RELOAD_REG_GPR
2703 || ((msize == 8 || m2 == SFmode)
2704 && (rc == RELOAD_REG_FPR
2705 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2706 addr_mask |= RELOAD_REG_OFFSET;
2707
2708 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2709 instructions are enabled. The offset for 128-bit VSX registers is
2710 only 12-bits. While GPRs can handle the full offset range, VSX
2711 registers can only handle the restricted range. */
2712 else if ((addr_mask != 0) && !indexed_only_p
2713 && msize == 16 && TARGET_P9_VECTOR
2714 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2715 || (m2 == TImode && TARGET_VSX)))
2716 {
2717 addr_mask |= RELOAD_REG_OFFSET;
2718 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2719 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2720 }
2721
2722 /* Vector pairs can do both indexed and offset loads if the
2723 instructions are enabled, otherwise they can only do offset loads
2724 since it will be broken into two vector moves. Vector quads can
2725 only do offset loads. */
2726 else if ((addr_mask != 0) && TARGET_MMA
2727 && (m2 == OOmode || m2 == XOmode))
2728 {
2729 addr_mask |= RELOAD_REG_OFFSET;
2730 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2731 {
2732 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2733 if (m2 == OOmode)
2734 addr_mask |= RELOAD_REG_INDEXED;
2735 }
2736 }
2737
2738 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2739 addressing on 128-bit types. */
2740 if (rc == RELOAD_REG_VMX && msize == 16
2741 && (addr_mask & RELOAD_REG_VALID) != 0)
2742 addr_mask |= RELOAD_REG_AND_M16;
2743
2744 reg_addr[m].addr_mask[rc] = addr_mask;
2745 any_addr_mask |= addr_mask;
2746 }
2747
2748 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2749 }
2750 }
2751
2752 \f
2753 /* Initialize the various global tables that are based on register size. */
2754 static void
2755 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2756 {
2757 ssize_t r, m, c;
2758 int align64;
2759 int align32;
2760
2761 /* Precalculate REGNO_REG_CLASS. */
2762 rs6000_regno_regclass[0] = GENERAL_REGS;
2763 for (r = 1; r < 32; ++r)
2764 rs6000_regno_regclass[r] = BASE_REGS;
2765
2766 for (r = 32; r < 64; ++r)
2767 rs6000_regno_regclass[r] = FLOAT_REGS;
2768
2769 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2770 rs6000_regno_regclass[r] = NO_REGS;
2771
2772 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2773 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2774
2775 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2776 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2777 rs6000_regno_regclass[r] = CR_REGS;
2778
2779 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2780 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2781 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2782 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2783 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2784 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2785 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2786
2787 /* Precalculate register class to simpler reload register class. We don't
2788 need all of the register classes that are combinations of different
2789 classes, just the simple ones that have constraint letters. */
2790 for (c = 0; c < N_REG_CLASSES; c++)
2791 reg_class_to_reg_type[c] = NO_REG_TYPE;
2792
2793 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2794 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2795 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2796 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2797 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2798 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2799 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2800 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2801 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2802 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2803
2804 if (TARGET_VSX)
2805 {
2806 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2807 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2808 }
2809 else
2810 {
2811 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2812 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2813 }
2814
2815 /* Precalculate the valid memory formats as well as the vector information,
2816 this must be set up before the rs6000_hard_regno_nregs_internal calls
2817 below. */
2818 gcc_assert ((int)VECTOR_NONE == 0);
2819 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2820 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2821
2822 gcc_assert ((int)CODE_FOR_nothing == 0);
2823 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2824
2825 gcc_assert ((int)NO_REGS == 0);
2826 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2827
2828 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2829 believes it can use native alignment or still uses 128-bit alignment. */
2830 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2831 {
2832 align64 = 64;
2833 align32 = 32;
2834 }
2835 else
2836 {
2837 align64 = 128;
2838 align32 = 128;
2839 }
2840
2841 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2842 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2843 if (TARGET_FLOAT128_TYPE)
2844 {
2845 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2846 rs6000_vector_align[KFmode] = 128;
2847
2848 if (FLOAT128_IEEE_P (TFmode))
2849 {
2850 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2851 rs6000_vector_align[TFmode] = 128;
2852 }
2853 }
2854
2855 /* V2DF mode, VSX only. */
2856 if (TARGET_VSX)
2857 {
2858 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2859 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2860 rs6000_vector_align[V2DFmode] = align64;
2861 }
2862
2863 /* V4SF mode, either VSX or Altivec. */
2864 if (TARGET_VSX)
2865 {
2866 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2867 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2868 rs6000_vector_align[V4SFmode] = align32;
2869 }
2870 else if (TARGET_ALTIVEC)
2871 {
2872 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2873 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2874 rs6000_vector_align[V4SFmode] = align32;
2875 }
2876
2877 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2878 and stores. */
2879 if (TARGET_ALTIVEC)
2880 {
2881 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2882 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2883 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2884 rs6000_vector_align[V4SImode] = align32;
2885 rs6000_vector_align[V8HImode] = align32;
2886 rs6000_vector_align[V16QImode] = align32;
2887
2888 if (TARGET_VSX)
2889 {
2890 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2891 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2892 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2893 }
2894 else
2895 {
2896 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2897 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2898 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2899 }
2900 }
2901
2902 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2903 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2904 if (TARGET_VSX)
2905 {
2906 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2907 rs6000_vector_unit[V2DImode]
2908 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2909 rs6000_vector_align[V2DImode] = align64;
2910
2911 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2912 rs6000_vector_unit[V1TImode]
2913 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2914 rs6000_vector_align[V1TImode] = 128;
2915 }
2916
2917 /* DFmode, see if we want to use the VSX unit. Memory is handled
2918 differently, so don't set rs6000_vector_mem. */
2919 if (TARGET_VSX)
2920 {
2921 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2922 rs6000_vector_align[DFmode] = 64;
2923 }
2924
2925 /* SFmode, see if we want to use the VSX unit. */
2926 if (TARGET_P8_VECTOR)
2927 {
2928 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2929 rs6000_vector_align[SFmode] = 32;
2930 }
2931
2932 /* Allow TImode in VSX register and set the VSX memory macros. */
2933 if (TARGET_VSX)
2934 {
2935 rs6000_vector_mem[TImode] = VECTOR_VSX;
2936 rs6000_vector_align[TImode] = align64;
2937 }
2938
2939 /* Add support for vector pairs and vector quad registers. */
2940 if (TARGET_MMA)
2941 {
2942 rs6000_vector_unit[OOmode] = VECTOR_NONE;
2943 rs6000_vector_mem[OOmode] = VECTOR_VSX;
2944 rs6000_vector_align[OOmode] = 256;
2945
2946 rs6000_vector_unit[XOmode] = VECTOR_NONE;
2947 rs6000_vector_mem[XOmode] = VECTOR_VSX;
2948 rs6000_vector_align[XOmode] = 512;
2949 }
2950
2951 /* Register class constraints for the constraints that depend on compile
2952 switches. When the VSX code was added, different constraints were added
2953 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2954 of the VSX registers are used. The register classes for scalar floating
2955 point types is set, based on whether we allow that type into the upper
2956 (Altivec) registers. GCC has register classes to target the Altivec
2957 registers for load/store operations, to select using a VSX memory
2958 operation instead of the traditional floating point operation. The
2959 constraints are:
2960
2961 d - Register class to use with traditional DFmode instructions.
2962 v - Altivec register.
2963 wa - Any VSX register.
2964 wc - Reserved to represent individual CR bits (used in LLVM).
2965 wn - always NO_REGS.
2966 wr - GPR if 64-bit mode is permitted.
2967 wx - Float register if we can do 32-bit int stores. */
2968
2969 if (TARGET_HARD_FLOAT)
2970 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS;
2971 if (TARGET_ALTIVEC)
2972 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2973 if (TARGET_VSX)
2974 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2975
2976 if (TARGET_POWERPC64)
2977 {
2978 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2979 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2980 }
2981
2982 if (TARGET_STFIWX)
2983 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2984
2985 /* Support for new direct moves (ISA 3.0 + 64bit). */
2986 if (TARGET_DIRECT_MOVE_128)
2987 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2988
2989 /* Set up the reload helper and direct move functions. */
2990 if (TARGET_VSX || TARGET_ALTIVEC)
2991 {
2992 if (TARGET_64BIT)
2993 {
2994 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2995 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2996 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2997 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2998 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2999 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3000 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3001 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3002 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3003 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3004 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3005 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3006 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3007 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3008 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3009 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3010 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3011 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3012 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3013 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3014
3015 if (FLOAT128_VECTOR_P (KFmode))
3016 {
3017 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3018 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3019 }
3020
3021 if (FLOAT128_VECTOR_P (TFmode))
3022 {
3023 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3024 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3025 }
3026
3027 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3028 available. */
3029 if (TARGET_NO_SDMODE_STACK)
3030 {
3031 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3032 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3033 }
3034
3035 if (TARGET_VSX)
3036 {
3037 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3038 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3039 }
3040
3041 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3042 {
3043 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3044 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3045 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3046 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3047 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3048 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3049 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3050 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3051 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3052
3053 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3054 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3055 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3056 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3057 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3058 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3059 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3060 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3061 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3062
3063 if (FLOAT128_VECTOR_P (KFmode))
3064 {
3065 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3066 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3067 }
3068
3069 if (FLOAT128_VECTOR_P (TFmode))
3070 {
3071 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3072 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3073 }
3074
3075 if (TARGET_MMA)
3076 {
3077 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3078 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3079 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3080 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3081 }
3082 }
3083 }
3084 else
3085 {
3086 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3087 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3088 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3089 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3090 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3091 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3092 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3093 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3094 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3095 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3096 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3097 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3098 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3099 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3100 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3101 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3102 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3103 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3104 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3105 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3106
3107 if (FLOAT128_VECTOR_P (KFmode))
3108 {
3109 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3110 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3111 }
3112
3113 if (FLOAT128_IEEE_P (TFmode))
3114 {
3115 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3116 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3117 }
3118
3119 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3120 available. */
3121 if (TARGET_NO_SDMODE_STACK)
3122 {
3123 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3124 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3125 }
3126
3127 if (TARGET_VSX)
3128 {
3129 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3130 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3131 }
3132
3133 if (TARGET_DIRECT_MOVE)
3134 {
3135 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3136 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3137 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3138 }
3139 }
3140
3141 reg_addr[DFmode].scalar_in_vmx_p = true;
3142 reg_addr[DImode].scalar_in_vmx_p = true;
3143
3144 if (TARGET_P8_VECTOR)
3145 {
3146 reg_addr[SFmode].scalar_in_vmx_p = true;
3147 reg_addr[SImode].scalar_in_vmx_p = true;
3148
3149 if (TARGET_P9_VECTOR)
3150 {
3151 reg_addr[HImode].scalar_in_vmx_p = true;
3152 reg_addr[QImode].scalar_in_vmx_p = true;
3153 }
3154 }
3155 }
3156
3157 /* Precalculate HARD_REGNO_NREGS. */
3158 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3159 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3160 rs6000_hard_regno_nregs[m][r]
3161 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3162
3163 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3164 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3165 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3166 rs6000_hard_regno_mode_ok_p[m][r]
3167 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3168
3169 /* Precalculate CLASS_MAX_NREGS sizes. */
3170 for (c = 0; c < LIM_REG_CLASSES; ++c)
3171 {
3172 int reg_size;
3173
3174 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3175 reg_size = UNITS_PER_VSX_WORD;
3176
3177 else if (c == ALTIVEC_REGS)
3178 reg_size = UNITS_PER_ALTIVEC_WORD;
3179
3180 else if (c == FLOAT_REGS)
3181 reg_size = UNITS_PER_FP_WORD;
3182
3183 else
3184 reg_size = UNITS_PER_WORD;
3185
3186 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3187 {
3188 machine_mode m2 = (machine_mode)m;
3189 int reg_size2 = reg_size;
3190
3191 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3192 in VSX. */
3193 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3194 reg_size2 = UNITS_PER_FP_WORD;
3195
3196 rs6000_class_max_nregs[m][c]
3197 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3198 }
3199 }
3200
3201 /* Calculate which modes to automatically generate code to use a the
3202 reciprocal divide and square root instructions. In the future, possibly
3203 automatically generate the instructions even if the user did not specify
3204 -mrecip. The older machines double precision reciprocal sqrt estimate is
3205 not accurate enough. */
3206 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3207 if (TARGET_FRES)
3208 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3209 if (TARGET_FRE)
3210 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3211 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3212 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3213 if (VECTOR_UNIT_VSX_P (V2DFmode))
3214 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3215
3216 if (TARGET_FRSQRTES)
3217 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3218 if (TARGET_FRSQRTE)
3219 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3220 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3221 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3222 if (VECTOR_UNIT_VSX_P (V2DFmode))
3223 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3224
3225 if (rs6000_recip_control)
3226 {
3227 if (!flag_finite_math_only)
3228 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3229 "-ffast-math");
3230 if (flag_trapping_math)
3231 warning (0, "%qs requires %qs or %qs", "-mrecip",
3232 "-fno-trapping-math", "-ffast-math");
3233 if (!flag_reciprocal_math)
3234 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3235 "-ffast-math");
3236 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3237 {
3238 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3239 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3240 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3241
3242 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3243 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3244 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3245
3246 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3247 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3248 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3249
3250 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3251 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3252 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3253
3254 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3255 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3256 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3257
3258 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3259 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3260 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3261
3262 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3263 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3264 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3265
3266 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3267 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3268 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3269 }
3270 }
3271
3272 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3273 legitimate address support to figure out the appropriate addressing to
3274 use. */
3275 rs6000_setup_reg_addr_masks ();
3276
3277 if (global_init_p || TARGET_DEBUG_TARGET)
3278 {
3279 if (TARGET_DEBUG_REG)
3280 rs6000_debug_reg_global ();
3281
3282 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3283 fprintf (stderr,
3284 "SImode variable mult cost = %d\n"
3285 "SImode constant mult cost = %d\n"
3286 "SImode short constant mult cost = %d\n"
3287 "DImode multipliciation cost = %d\n"
3288 "SImode division cost = %d\n"
3289 "DImode division cost = %d\n"
3290 "Simple fp operation cost = %d\n"
3291 "DFmode multiplication cost = %d\n"
3292 "SFmode division cost = %d\n"
3293 "DFmode division cost = %d\n"
3294 "cache line size = %d\n"
3295 "l1 cache size = %d\n"
3296 "l2 cache size = %d\n"
3297 "simultaneous prefetches = %d\n"
3298 "\n",
3299 rs6000_cost->mulsi,
3300 rs6000_cost->mulsi_const,
3301 rs6000_cost->mulsi_const9,
3302 rs6000_cost->muldi,
3303 rs6000_cost->divsi,
3304 rs6000_cost->divdi,
3305 rs6000_cost->fp,
3306 rs6000_cost->dmul,
3307 rs6000_cost->sdiv,
3308 rs6000_cost->ddiv,
3309 rs6000_cost->cache_line_size,
3310 rs6000_cost->l1_cache_size,
3311 rs6000_cost->l2_cache_size,
3312 rs6000_cost->simultaneous_prefetches);
3313 }
3314 }
3315
3316 #if TARGET_MACHO
3317 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3318
3319 static void
3320 darwin_rs6000_override_options (void)
3321 {
3322 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3323 off. */
3324 rs6000_altivec_abi = 1;
3325 TARGET_ALTIVEC_VRSAVE = 1;
3326 rs6000_current_abi = ABI_DARWIN;
3327
3328 if (DEFAULT_ABI == ABI_DARWIN
3329 && TARGET_64BIT)
3330 darwin_one_byte_bool = 1;
3331
3332 if (TARGET_64BIT && ! TARGET_POWERPC64)
3333 {
3334 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3335 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3336 }
3337
3338 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3339 optimisation, and will not work with the most generic case (where the
3340 symbol is undefined external, but there is no symbl stub). */
3341 if (TARGET_64BIT)
3342 rs6000_default_long_calls = 0;
3343
3344 /* ld_classic is (so far) still used for kernel (static) code, and supports
3345 the JBSR longcall / branch islands. */
3346 if (flag_mkernel)
3347 {
3348 rs6000_default_long_calls = 1;
3349
3350 /* Allow a kext author to do -mkernel -mhard-float. */
3351 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3352 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3353 }
3354
3355 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3356 Altivec. */
3357 if (!flag_mkernel && !flag_apple_kext
3358 && TARGET_64BIT
3359 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3360 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3361
3362 /* Unless the user (not the configurer) has explicitly overridden
3363 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3364 G4 unless targeting the kernel. */
3365 if (!flag_mkernel
3366 && !flag_apple_kext
3367 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3368 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3369 && ! OPTION_SET_P (rs6000_cpu_index))
3370 {
3371 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3372 }
3373 }
3374 #endif
3375
3376 /* If not otherwise specified by a target, make 'long double' equivalent to
3377 'double'. */
3378
3379 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3380 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3381 #endif
3382
3383 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3384 to clobber the XER[CA] bit because clobbering that bit without telling
3385 the compiler worked just fine with versions of GCC before GCC 5, and
3386 breaking a lot of older code in ways that are hard to track down is
3387 not such a great idea. */
3388
3389 static rtx_insn *
3390 rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
3391 vec<machine_mode> & /*input_modes*/,
3392 vec<const char *> & /*constraints*/,
3393 vec<rtx> &/*uses*/, vec<rtx> &clobbers,
3394 HARD_REG_SET &clobbered_regs, location_t /*loc*/)
3395 {
3396 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3397 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3398 return NULL;
3399 }
3400
3401 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3402 but is called when the optimize level is changed via an attribute or
3403 pragma or when it is reset at the end of the code affected by the
3404 attribute or pragma. It is not called at the beginning of compilation
3405 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3406 actions then, you should have TARGET_OPTION_OVERRIDE call
3407 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3408
3409 static void
3410 rs6000_override_options_after_change (void)
3411 {
3412 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3413 turns -frename-registers on. */
3414 if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
3415 || (OPTION_SET_P (flag_unroll_all_loops)
3416 && flag_unroll_all_loops))
3417 {
3418 if (!OPTION_SET_P (unroll_only_small_loops))
3419 unroll_only_small_loops = 0;
3420 if (!OPTION_SET_P (flag_rename_registers))
3421 flag_rename_registers = 1;
3422 if (!OPTION_SET_P (flag_cunroll_grow_size))
3423 flag_cunroll_grow_size = 1;
3424 }
3425 else if (!OPTION_SET_P (flag_cunroll_grow_size))
3426 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3427
3428 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3429 if (rs6000_rop_protect)
3430 flag_shrink_wrap = 0;
3431 }
3432
3433 #ifdef TARGET_USES_LINUX64_OPT
3434 static void
3435 rs6000_linux64_override_options ()
3436 {
3437 if (!OPTION_SET_P (rs6000_alignment_flags))
3438 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3439 if (rs6000_isa_flags & OPTION_MASK_64BIT)
3440 {
3441 if (DEFAULT_ABI != ABI_AIX)
3442 {
3443 rs6000_current_abi = ABI_AIX;
3444 error (INVALID_64BIT, "call");
3445 }
3446 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3447 if (ELFv2_ABI_CHECK)
3448 {
3449 rs6000_current_abi = ABI_ELFv2;
3450 if (dot_symbols)
3451 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3452 }
3453 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3454 {
3455 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3456 error (INVALID_64BIT, "relocatable");
3457 }
3458 if (rs6000_isa_flags & OPTION_MASK_EABI)
3459 {
3460 rs6000_isa_flags &= ~OPTION_MASK_EABI;
3461 error (INVALID_64BIT, "eabi");
3462 }
3463 if (TARGET_PROTOTYPE)
3464 {
3465 target_prototype = 0;
3466 error (INVALID_64BIT, "prototype");
3467 }
3468 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3469 {
3470 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3471 error ("%<-m64%> requires a PowerPC64 cpu");
3472 }
3473 if (!OPTION_SET_P (rs6000_current_cmodel))
3474 SET_CMODEL (CMODEL_MEDIUM);
3475 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3476 {
3477 if (OPTION_SET_P (rs6000_current_cmodel)
3478 && rs6000_current_cmodel != CMODEL_SMALL)
3479 error ("%<-mcmodel%> incompatible with other toc options");
3480 if (TARGET_MINIMAL_TOC)
3481 SET_CMODEL (CMODEL_SMALL);
3482 else if (TARGET_PCREL
3483 || (PCREL_SUPPORTED_BY_OS
3484 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3485 /* Ignore -mno-minimal-toc. */
3486 ;
3487 else
3488 SET_CMODEL (CMODEL_SMALL);
3489 }
3490 if (rs6000_current_cmodel != CMODEL_SMALL)
3491 {
3492 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC))
3493 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3494 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC))
3495 TARGET_NO_SUM_IN_TOC = 0;
3496 }
3497 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3498 {
3499 if (OPTION_SET_P (rs6000_pltseq))
3500 warning (0, "%qs unsupported for this ABI",
3501 "-mpltseq");
3502 rs6000_pltseq = false;
3503 }
3504 }
3505 else if (TARGET_64BIT)
3506 error (INVALID_32BIT, "32");
3507 else
3508 {
3509 if (TARGET_PROFILE_KERNEL)
3510 {
3511 profile_kernel = 0;
3512 error (INVALID_32BIT, "profile-kernel");
3513 }
3514 if (OPTION_SET_P (rs6000_current_cmodel))
3515 {
3516 SET_CMODEL (CMODEL_SMALL);
3517 error (INVALID_32BIT, "cmodel");
3518 }
3519 }
3520 }
3521 #endif
3522
3523 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3524 This support is only in little endian GLIBC 2.32 or newer. */
3525 static bool
3526 glibc_supports_ieee_128bit (void)
3527 {
3528 #ifdef OPTION_GLIBC
3529 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3530 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3531 return true;
3532 #endif /* OPTION_GLIBC. */
3533
3534 return false;
3535 }
3536
3537 /* Override command line options.
3538
3539 Combine build-specific configuration information with options
3540 specified on the command line to set various state variables which
3541 influence code generation, optimization, and expansion of built-in
3542 functions. Assure that command-line configuration preferences are
3543 compatible with each other and with the build configuration; issue
3544 warnings while adjusting configuration or error messages while
3545 rejecting configuration.
3546
3547 Upon entry to this function:
3548
3549 This function is called once at the beginning of
3550 compilation, and then again at the start and end of compiling
3551 each section of code that has a different configuration, as
3552 indicated, for example, by adding the
3553
3554 __attribute__((__target__("cpu=power9")))
3555
3556 qualifier to a function definition or, for example, by bracketing
3557 code between
3558
3559 #pragma GCC target("altivec")
3560
3561 and
3562
3563 #pragma GCC reset_options
3564
3565 directives. Parameter global_init_p is true for the initial
3566 invocation, which initializes global variables, and false for all
3567 subsequent invocations.
3568
3569
3570 Various global state information is assumed to be valid. This
3571 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3572 default CPU specified at build configure time, TARGET_DEFAULT,
3573 representing the default set of option flags for the default
3574 target, and OPTION_SET_P (rs6000_isa_flags), representing
3575 which options were requested on the command line.
3576
3577 Upon return from this function:
3578
3579 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3580 was set by name on the command line. Additionally, if certain
3581 attributes are automatically enabled or disabled by this function
3582 in order to assure compatibility between options and
3583 configuration, the flags associated with those attributes are
3584 also set. By setting these "explicit bits", we avoid the risk
3585 that other code might accidentally overwrite these particular
3586 attributes with "default values".
3587
3588 The various bits of rs6000_isa_flags are set to indicate the
3589 target options that have been selected for the most current
3590 compilation efforts. This has the effect of also turning on the
3591 associated TARGET_XXX values since these are macros which are
3592 generally defined to test the corresponding bit of the
3593 rs6000_isa_flags variable.
3594
3595 Various other global variables and fields of global structures
3596 (over 50 in all) are initialized to reflect the desired options
3597 for the most current compilation efforts. */
3598
3599 static bool
3600 rs6000_option_override_internal (bool global_init_p)
3601 {
3602 bool ret = true;
3603
3604 HOST_WIDE_INT set_masks;
3605 HOST_WIDE_INT ignore_masks;
3606 int cpu_index = -1;
3607 int tune_index;
3608 struct cl_target_option *main_target_opt
3609 = ((global_init_p || target_option_default_node == NULL)
3610 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3611
3612 /* Print defaults. */
3613 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3614 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3615
3616 /* Remember the explicit arguments. */
3617 if (global_init_p)
3618 rs6000_isa_flags_explicit = OPTION_SET_P (rs6000_isa_flags);
3619
3620 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3621 library functions, so warn about it. The flag may be useful for
3622 performance studies from time to time though, so don't disable it
3623 entirely. */
3624 if (OPTION_SET_P (rs6000_alignment_flags)
3625 && rs6000_alignment_flags == MASK_ALIGN_POWER
3626 && DEFAULT_ABI == ABI_DARWIN
3627 && TARGET_64BIT)
3628 warning (0, "%qs is not supported for 64-bit Darwin;"
3629 " it is incompatible with the installed C and C++ libraries",
3630 "-malign-power");
3631
3632 /* Numerous experiment shows that IRA based loop pressure
3633 calculation works better for RTL loop invariant motion on targets
3634 with enough (>= 32) registers. It is an expensive optimization.
3635 So it is on only for peak performance. */
3636 if (optimize >= 3 && global_init_p
3637 && !OPTION_SET_P (flag_ira_loop_pressure))
3638 flag_ira_loop_pressure = 1;
3639
3640 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3641 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3642 options were already specified. */
3643 if (flag_sanitize & SANITIZE_USER_ADDRESS
3644 && !OPTION_SET_P (flag_asynchronous_unwind_tables))
3645 flag_asynchronous_unwind_tables = 1;
3646
3647 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3648 loop unroller is active. It is only checked during unrolling, so
3649 we can just set it on by default. */
3650 if (!OPTION_SET_P (flag_variable_expansion_in_unroller))
3651 flag_variable_expansion_in_unroller = 1;
3652
3653 /* Set the pointer size. */
3654 if (TARGET_64BIT)
3655 {
3656 rs6000_pmode = DImode;
3657 rs6000_pointer_size = 64;
3658 }
3659 else
3660 {
3661 rs6000_pmode = SImode;
3662 rs6000_pointer_size = 32;
3663 }
3664
3665 /* Some OSs don't support saving Altivec registers. On those OSs, we don't
3666 touch the OPTION_MASK_ALTIVEC settings; if the user wants it, the user
3667 must explicitly specify it and we won't interfere with the user's
3668 specification. */
3669
3670 set_masks = POWERPC_MASKS;
3671 #ifdef OS_MISSING_ALTIVEC
3672 if (OS_MISSING_ALTIVEC)
3673 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3674 | OTHER_VSX_VECTOR_MASKS);
3675 #endif
3676
3677 /* Don't override by the processor default if given explicitly. */
3678 set_masks &= ~rs6000_isa_flags_explicit;
3679
3680 /* Without option powerpc64 specified explicitly, we need to ensure
3681 powerpc64 always enabled for 64 bit here, otherwise some following
3682 checks can use unexpected TARGET_POWERPC64 value. */
3683 if (!(rs6000_isa_flags_explicit & OPTION_MASK_POWERPC64)
3684 && TARGET_64BIT)
3685 {
3686 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3687 /* Need to stop powerpc64 from being unset in later processing,
3688 so clear it in set_masks. But as PR108240 shows, to keep it
3689 consistent with before, we want to make this only if 64 bit
3690 is enabled explicitly. This is a hack, revisit this later. */
3691 if (rs6000_isa_flags_explicit & OPTION_MASK_64BIT)
3692 set_masks &= ~OPTION_MASK_POWERPC64;
3693 }
3694
3695 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3696 the cpu in a target attribute or pragma, but did not specify a tuning
3697 option, use the cpu for the tuning option rather than the option specified
3698 with -mtune on the command line. Process a '--with-cpu' configuration
3699 request as an implicit --cpu. */
3700 if (rs6000_cpu_index >= 0)
3701 cpu_index = rs6000_cpu_index;
3702 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3703 cpu_index = main_target_opt->x_rs6000_cpu_index;
3704 else if (OPTION_TARGET_CPU_DEFAULT)
3705 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3706
3707 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3708 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3709 with those from the cpu, except for options that were explicitly set. If
3710 we don't have a cpu, do not override the target bits set in
3711 TARGET_DEFAULT. */
3712 if (cpu_index >= 0)
3713 {
3714 rs6000_cpu_index = cpu_index;
3715 rs6000_isa_flags &= ~set_masks;
3716 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3717 & set_masks);
3718 }
3719 else
3720 {
3721 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3722 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3723 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3724 to using rs6000_isa_flags, we need to do the initialization here.
3725
3726 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3727 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3728 HOST_WIDE_INT flags;
3729 if (TARGET_DEFAULT)
3730 flags = TARGET_DEFAULT;
3731 else
3732 {
3733 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3734 const char *default_cpu = (!TARGET_POWERPC64
3735 ? "powerpc"
3736 : (BYTES_BIG_ENDIAN
3737 ? "powerpc64"
3738 : "powerpc64le"));
3739 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3740 flags = processor_target_table[default_cpu_index].target_enable;
3741 }
3742 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3743 }
3744
3745 /* Don't expect powerpc64 enabled on those OSes with OS_MISSING_POWERPC64,
3746 since they do not save and restore the high half of the GPRs correctly
3747 in all cases. If the user explicitly specifies it, we won't interfere
3748 with the user's specification. */
3749 #ifdef OS_MISSING_POWERPC64
3750 if (OS_MISSING_POWERPC64
3751 && TARGET_32BIT
3752 && TARGET_POWERPC64
3753 && !(rs6000_isa_flags_explicit & OPTION_MASK_POWERPC64))
3754 rs6000_isa_flags &= ~OPTION_MASK_POWERPC64;
3755 #endif
3756
3757 if (rs6000_tune_index >= 0)
3758 tune_index = rs6000_tune_index;
3759 else if (cpu_index >= 0)
3760 rs6000_tune_index = tune_index = cpu_index;
3761 else
3762 {
3763 size_t i;
3764 enum processor_type tune_proc
3765 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3766
3767 tune_index = -1;
3768 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3769 if (processor_target_table[i].processor == tune_proc)
3770 {
3771 tune_index = i;
3772 break;
3773 }
3774 }
3775
3776 if (cpu_index >= 0)
3777 rs6000_cpu = processor_target_table[cpu_index].processor;
3778 else
3779 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3780
3781 gcc_assert (tune_index >= 0);
3782 rs6000_tune = processor_target_table[tune_index].processor;
3783
3784 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3785 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3786 || rs6000_cpu == PROCESSOR_PPCE5500)
3787 {
3788 if (TARGET_ALTIVEC)
3789 error ("AltiVec not supported in this target");
3790 }
3791
3792 /* If we are optimizing big endian systems for space, use the load/store
3793 multiple instructions. */
3794 if (BYTES_BIG_ENDIAN && optimize_size)
3795 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3796
3797 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3798 because the hardware doesn't support the instructions used in little
3799 endian mode, and causes an alignment trap. The 750 does not cause an
3800 alignment trap (except when the target is unaligned). */
3801
3802 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3803 {
3804 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3805 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3806 warning (0, "%qs is not supported on little endian systems",
3807 "-mmultiple");
3808 }
3809
3810 /* If little-endian, default to -mstrict-align on older processors.
3811 Testing for direct_move matches power8 and later. */
3812 if (!BYTES_BIG_ENDIAN
3813 && !(processor_target_table[tune_index].target_enable
3814 & OPTION_MASK_DIRECT_MOVE))
3815 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3816
3817 /* Add some warnings for VSX. */
3818 if (TARGET_VSX)
3819 {
3820 const char *msg = NULL;
3821 if (!TARGET_HARD_FLOAT)
3822 {
3823 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3824 msg = N_("%<-mvsx%> requires hardware floating point");
3825 else
3826 {
3827 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3828 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3829 }
3830 }
3831 else if (TARGET_AVOID_XFORM > 0)
3832 msg = N_("%<-mvsx%> needs indexed addressing");
3833 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3834 & OPTION_MASK_ALTIVEC))
3835 {
3836 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3837 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3838 else
3839 msg = N_("%<-mno-altivec%> disables vsx");
3840 }
3841
3842 if (msg)
3843 {
3844 warning (0, msg);
3845 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3846 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3847 }
3848 }
3849
3850 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3851 the -mcpu setting to enable options that conflict. */
3852 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3853 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3854 | OPTION_MASK_ALTIVEC
3855 | OPTION_MASK_VSX)) != 0)
3856 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3857 | OPTION_MASK_DIRECT_MOVE)
3858 & ~rs6000_isa_flags_explicit);
3859
3860 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3861 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3862
3863 #ifdef XCOFF_DEBUGGING_INFO
3864 /* For AIX default to 64-bit DWARF. */
3865 if (!OPTION_SET_P (dwarf_offset_size))
3866 dwarf_offset_size = POINTER_SIZE_UNITS;
3867 #endif
3868
3869 /* Handle explicit -mno-{altivec,vsx} and turn off all of
3870 the options that depend on those flags. */
3871 ignore_masks = rs6000_disable_incompatible_switches ();
3872
3873 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3874 unless the user explicitly used the -mno-<option> to disable the code. */
3875 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3876 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3877 else if (TARGET_P9_MINMAX)
3878 {
3879 if (cpu_index >= 0)
3880 {
3881 if (cpu_index == PROCESSOR_POWER9)
3882 {
3883 /* legacy behavior: allow -mcpu=power9 with certain
3884 capabilities explicitly disabled. */
3885 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3886 }
3887 else
3888 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3889 "for <xxx> less than power9", "-mcpu");
3890 }
3891 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3892 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3893 & rs6000_isa_flags_explicit))
3894 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3895 were explicitly cleared. */
3896 error ("%qs incompatible with explicitly disabled options",
3897 "-mpower9-minmax");
3898 else
3899 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3900 }
3901 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3902 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3903 else if (TARGET_VSX)
3904 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3905 else if (TARGET_POPCNTD)
3906 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3907 else if (TARGET_DFP)
3908 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3909 else if (TARGET_CMPB)
3910 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3911 else if (TARGET_FPRND)
3912 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3913 else if (TARGET_POPCNTB)
3914 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3915 else if (TARGET_ALTIVEC)
3916 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3917
3918 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3919 target attribute or pragma which automatically enables both options,
3920 unless the altivec ABI was set. This is set by default for 64-bit, but
3921 not for 32-bit. Don't move this before the above code using ignore_masks,
3922 since it can reset the cleared VSX/ALTIVEC flag again. */
3923 if (main_target_opt && !main_target_opt->x_rs6000_altivec_abi)
3924 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3925 & ~rs6000_isa_flags_explicit);
3926
3927 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3928 {
3929 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3930 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3931 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3932 }
3933
3934 if (!TARGET_FPRND && TARGET_VSX)
3935 {
3936 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3937 /* TARGET_VSX = 1 implies Power 7 and newer */
3938 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3939 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3940 }
3941
3942 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3943 {
3944 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3945 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3946 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3947 }
3948
3949 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3950 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3951
3952 if (TARGET_P8_VECTOR && !TARGET_VSX)
3953 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3954
3955 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3956 {
3957 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3958 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3959 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3960 }
3961
3962 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3963 silently turn off quad memory mode. */
3964 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3965 {
3966 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3967 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3968
3969 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3970 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3971
3972 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3973 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3974 }
3975
3976 /* Non-atomic quad memory load/store are disabled for little endian, since
3977 the words are reversed, but atomic operations can still be done by
3978 swapping the words. */
3979 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3980 {
3981 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3982 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3983 "mode"));
3984
3985 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3986 }
3987
3988 /* Assume if the user asked for normal quad memory instructions, they want
3989 the atomic versions as well, unless they explicity told us not to use quad
3990 word atomic instructions. */
3991 if (TARGET_QUAD_MEMORY
3992 && !TARGET_QUAD_MEMORY_ATOMIC
3993 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3994 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3995
3996 /* If we can shrink-wrap the TOC register save separately, then use
3997 -msave-toc-indirect unless explicitly disabled. */
3998 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3999 && flag_shrink_wrap_separate
4000 && optimize_function_for_speed_p (cfun))
4001 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4002
4003 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4004 generating power8 instructions. Power9 does not optimize power8 fusion
4005 cases. */
4006 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4007 {
4008 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4009 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4010 else
4011 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4012 }
4013
4014 /* Setting additional fusion flags turns on base fusion. */
4015 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4016 {
4017 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4018 {
4019 if (TARGET_P8_FUSION_SIGN)
4020 error ("%qs requires %qs", "-mpower8-fusion-sign",
4021 "-mpower8-fusion");
4022
4023 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4024 }
4025 else
4026 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4027 }
4028
4029 /* Power8 does not fuse sign extended loads with the addis. If we are
4030 optimizing at high levels for speed, convert a sign extended load into a
4031 zero extending load, and an explicit sign extension. */
4032 if (TARGET_P8_FUSION
4033 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4034 && optimize_function_for_speed_p (cfun)
4035 && optimize >= 3)
4036 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4037
4038 /* ISA 3.0 vector instructions include ISA 2.07. */
4039 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4040 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4041
4042 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4043 support. If we only have ISA 2.06 support, and the user did not specify
4044 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4045 but we don't enable the full vectorization support */
4046 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4047 TARGET_ALLOW_MOVMISALIGN = 1;
4048
4049 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4050 {
4051 if (TARGET_ALLOW_MOVMISALIGN > 0
4052 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN))
4053 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4054
4055 TARGET_ALLOW_MOVMISALIGN = 0;
4056 }
4057
4058 /* Determine when unaligned vector accesses are permitted, and when
4059 they are preferred over masked Altivec loads. Note that if
4060 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4061 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4062 not true. */
4063 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4064 {
4065 if (!TARGET_VSX)
4066 {
4067 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4068 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4069
4070 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4071 }
4072
4073 else if (!TARGET_ALLOW_MOVMISALIGN)
4074 {
4075 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4076 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4077 "-mallow-movmisalign");
4078
4079 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4080 }
4081 }
4082
4083 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4084 {
4085 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4086 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4087 else
4088 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4089 }
4090
4091 /* Use long double size to select the appropriate long double. We use
4092 TYPE_PRECISION to differentiate the 3 different long double types. We map
4093 128 into the precision used for TFmode. */
4094 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4095 ? 64
4096 : FLOAT_PRECISION_TFmode);
4097
4098 /* Set long double size before the IEEE 128-bit tests. */
4099 if (!OPTION_SET_P (rs6000_long_double_type_size))
4100 {
4101 if (main_target_opt != NULL
4102 && (main_target_opt->x_rs6000_long_double_type_size
4103 != default_long_double_size))
4104 error ("target attribute or pragma changes %<long double%> size");
4105 else
4106 rs6000_long_double_type_size = default_long_double_size;
4107 }
4108 else if (rs6000_long_double_type_size == FLOAT_PRECISION_TFmode)
4109 ; /* The option value can be seen when cl_target_option_restore is called. */
4110 else if (rs6000_long_double_type_size == 128)
4111 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4112
4113 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4114 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4115 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4116 those systems will not pick up this default. Warn if the user changes the
4117 default unless -Wno-psabi. */
4118 if (!OPTION_SET_P (rs6000_ieeequad))
4119 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4120
4121 else if (TARGET_LONG_DOUBLE_128)
4122 {
4123 if (global_options.x_rs6000_ieeequad
4124 && (!TARGET_POPCNTD || !TARGET_VSX))
4125 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4126
4127 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT)
4128 {
4129 /* Determine if the user can change the default long double type at
4130 compilation time. You need GLIBC 2.32 or newer to be able to
4131 change the long double type. Only issue one warning. */
4132 static bool warned_change_long_double;
4133
4134 if (!warned_change_long_double && !glibc_supports_ieee_128bit ())
4135 {
4136 warned_change_long_double = true;
4137 if (TARGET_IEEEQUAD)
4138 warning (OPT_Wpsabi, "Using IEEE extended precision "
4139 "%<long double%>");
4140 else
4141 warning (OPT_Wpsabi, "Using IBM extended precision "
4142 "%<long double%>");
4143 }
4144 }
4145 }
4146
4147 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4148 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4149 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4150 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4151 the keyword as well as the type. */
4152 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4153
4154 /* IEEE 128-bit floating point requires VSX support. */
4155 if (TARGET_FLOAT128_KEYWORD)
4156 {
4157 if (!TARGET_VSX)
4158 {
4159 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4160 error ("%qs requires VSX support", "-mfloat128");
4161
4162 TARGET_FLOAT128_TYPE = 0;
4163 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4164 | OPTION_MASK_FLOAT128_HW);
4165 }
4166 else if (!TARGET_FLOAT128_TYPE)
4167 {
4168 TARGET_FLOAT128_TYPE = 1;
4169 warning (0, "The %<-mfloat128%> option may not be fully supported");
4170 }
4171 }
4172
4173 /* Enable the __float128 keyword under Linux by default. */
4174 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4175 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4176 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4177
4178 /* If we have are supporting the float128 type and full ISA 3.0 support,
4179 enable -mfloat128-hardware by default. However, don't enable the
4180 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4181 because sometimes the compiler wants to put things in an integer
4182 container, and if we don't have __int128 support, it is impossible. */
4183 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4184 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4185 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4186 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4187
4188 if (TARGET_FLOAT128_HW
4189 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4190 {
4191 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4192 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4193
4194 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4195 }
4196
4197 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4198 {
4199 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4200 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4201
4202 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4203 }
4204
4205 /* Enable -mprefixed by default on power10 systems. */
4206 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4207 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4208
4209 /* -mprefixed requires -mcpu=power10 (or later). */
4210 else if (TARGET_PREFIXED && !TARGET_POWER10)
4211 {
4212 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4213 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4214
4215 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4216 }
4217
4218 /* -mpcrel requires prefixed load/store addressing. */
4219 if (TARGET_PCREL && !TARGET_PREFIXED)
4220 {
4221 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4222 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4223
4224 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4225 }
4226
4227 /* Print the options after updating the defaults. */
4228 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4229 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4230
4231 /* E500mc does "better" if we inline more aggressively. Respect the
4232 user's opinion, though. */
4233 if (rs6000_block_move_inline_limit == 0
4234 && (rs6000_tune == PROCESSOR_PPCE500MC
4235 || rs6000_tune == PROCESSOR_PPCE500MC64
4236 || rs6000_tune == PROCESSOR_PPCE5500
4237 || rs6000_tune == PROCESSOR_PPCE6500))
4238 rs6000_block_move_inline_limit = 128;
4239
4240 /* store_one_arg depends on expand_block_move to handle at least the
4241 size of reg_parm_stack_space. */
4242 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4243 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4244
4245 if (global_init_p)
4246 {
4247 /* If the appropriate debug option is enabled, replace the target hooks
4248 with debug versions that call the real version and then prints
4249 debugging information. */
4250 if (TARGET_DEBUG_COST)
4251 {
4252 targetm.rtx_costs = rs6000_debug_rtx_costs;
4253 targetm.address_cost = rs6000_debug_address_cost;
4254 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4255 }
4256
4257 if (TARGET_DEBUG_ADDR)
4258 {
4259 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4260 targetm.legitimize_address = rs6000_debug_legitimize_address;
4261 rs6000_secondary_reload_class_ptr
4262 = rs6000_debug_secondary_reload_class;
4263 targetm.secondary_memory_needed
4264 = rs6000_debug_secondary_memory_needed;
4265 targetm.can_change_mode_class
4266 = rs6000_debug_can_change_mode_class;
4267 rs6000_preferred_reload_class_ptr
4268 = rs6000_debug_preferred_reload_class;
4269 rs6000_mode_dependent_address_ptr
4270 = rs6000_debug_mode_dependent_address;
4271 }
4272
4273 if (rs6000_veclibabi_name)
4274 {
4275 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4276 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4277 else
4278 {
4279 error ("unknown vectorization library ABI type in "
4280 "%<-mveclibabi=%s%>", rs6000_veclibabi_name);
4281 ret = false;
4282 }
4283 }
4284 }
4285
4286 /* Enable Altivec ABI for AIX -maltivec. */
4287 if (TARGET_XCOFF
4288 && (TARGET_ALTIVEC || TARGET_VSX)
4289 && !OPTION_SET_P (rs6000_altivec_abi))
4290 {
4291 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4292 error ("target attribute or pragma changes AltiVec ABI");
4293 else
4294 rs6000_altivec_abi = 1;
4295 }
4296
4297 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4298 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4299 be explicitly overridden in either case. */
4300 if (TARGET_ELF)
4301 {
4302 if (!OPTION_SET_P (rs6000_altivec_abi)
4303 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4304 {
4305 if (main_target_opt != NULL &&
4306 !main_target_opt->x_rs6000_altivec_abi)
4307 error ("target attribute or pragma changes AltiVec ABI");
4308 else
4309 rs6000_altivec_abi = 1;
4310 }
4311 }
4312
4313 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4314 So far, the only darwin64 targets are also MACH-O. */
4315 if (TARGET_MACHO
4316 && DEFAULT_ABI == ABI_DARWIN
4317 && TARGET_64BIT)
4318 {
4319 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4320 error ("target attribute or pragma changes darwin64 ABI");
4321 else
4322 {
4323 rs6000_darwin64_abi = 1;
4324 /* Default to natural alignment, for better performance. */
4325 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4326 }
4327 }
4328
4329 /* Place FP constants in the constant pool instead of TOC
4330 if section anchors enabled. */
4331 if (flag_section_anchors
4332 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC))
4333 TARGET_NO_FP_IN_TOC = 1;
4334
4335 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4336 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4337
4338 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4339 SUBTARGET_OVERRIDE_OPTIONS;
4340 #endif
4341 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4342 SUBSUBTARGET_OVERRIDE_OPTIONS;
4343 #endif
4344 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4345 SUB3TARGET_OVERRIDE_OPTIONS;
4346 #endif
4347
4348 /* If the ABI has support for PC-relative relocations, enable it by default.
4349 This test depends on the sub-target tests above setting the code model to
4350 medium for ELF v2 systems. */
4351 if (PCREL_SUPPORTED_BY_OS
4352 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4353 rs6000_isa_flags |= OPTION_MASK_PCREL;
4354
4355 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4356 after the subtarget override options are done. */
4357 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4358 {
4359 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4360 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4361
4362 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4363 }
4364
4365 /* Enable -mmma by default on power10 systems. */
4366 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4367 rs6000_isa_flags |= OPTION_MASK_MMA;
4368
4369 /* Turn off vector pair/mma options on non-power10 systems. */
4370 else if (!TARGET_POWER10 && TARGET_MMA)
4371 {
4372 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4373 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4374
4375 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4376 }
4377
4378 /* Enable power10 fusion if we are tuning for power10, even if we aren't
4379 generating power10 instructions. */
4380 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION))
4381 {
4382 if (rs6000_tune == PROCESSOR_POWER10
4383 || rs6000_tune == PROCESSOR_POWER11)
4384 rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
4385 else
4386 rs6000_isa_flags &= ~OPTION_MASK_P10_FUSION;
4387 }
4388
4389 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4390 such as "*movoo" uses vector pair access which use VSX registers.
4391 So make MMA require VSX support here. */
4392 if (TARGET_MMA && !TARGET_VSX)
4393 {
4394 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4395 error ("%qs requires %qs", "-mmma", "-mvsx");
4396 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4397 }
4398
4399 if (!TARGET_PCREL && TARGET_PCREL_OPT)
4400 rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
4401
4402 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4403 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4404
4405 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4406 && rs6000_tune != PROCESSOR_POWER5
4407 && rs6000_tune != PROCESSOR_POWER6
4408 && rs6000_tune != PROCESSOR_POWER7
4409 && rs6000_tune != PROCESSOR_POWER8
4410 && rs6000_tune != PROCESSOR_POWER9
4411 && rs6000_tune != PROCESSOR_POWER10
4412 && rs6000_tune != PROCESSOR_POWER11
4413 && rs6000_tune != PROCESSOR_PPCA2
4414 && rs6000_tune != PROCESSOR_CELL
4415 && rs6000_tune != PROCESSOR_PPC476);
4416 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4417 || rs6000_tune == PROCESSOR_POWER5
4418 || rs6000_tune == PROCESSOR_POWER7
4419 || rs6000_tune == PROCESSOR_POWER8);
4420 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4421 || rs6000_tune == PROCESSOR_POWER5
4422 || rs6000_tune == PROCESSOR_POWER6
4423 || rs6000_tune == PROCESSOR_POWER7
4424 || rs6000_tune == PROCESSOR_POWER8
4425 || rs6000_tune == PROCESSOR_POWER9
4426 || rs6000_tune == PROCESSOR_POWER10
4427 || rs6000_tune == PROCESSOR_POWER11
4428 || rs6000_tune == PROCESSOR_PPCE500MC
4429 || rs6000_tune == PROCESSOR_PPCE500MC64
4430 || rs6000_tune == PROCESSOR_PPCE5500
4431 || rs6000_tune == PROCESSOR_PPCE6500);
4432
4433 /* Allow debug switches to override the above settings. These are set to -1
4434 in rs6000.opt to indicate the user hasn't directly set the switch. */
4435 if (TARGET_ALWAYS_HINT >= 0)
4436 rs6000_always_hint = TARGET_ALWAYS_HINT;
4437
4438 if (TARGET_SCHED_GROUPS >= 0)
4439 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4440
4441 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4442 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4443
4444 rs6000_sched_restricted_insns_priority
4445 = (rs6000_sched_groups ? 1 : 0);
4446
4447 /* Handle -msched-costly-dep option. */
4448 rs6000_sched_costly_dep
4449 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4450
4451 if (rs6000_sched_costly_dep_str)
4452 {
4453 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4454 rs6000_sched_costly_dep = no_dep_costly;
4455 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4456 rs6000_sched_costly_dep = all_deps_costly;
4457 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4458 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4459 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4460 rs6000_sched_costly_dep = store_to_load_dep_costly;
4461 else
4462 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4463 atoi (rs6000_sched_costly_dep_str));
4464 }
4465
4466 /* Handle -minsert-sched-nops option. */
4467 rs6000_sched_insert_nops
4468 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4469
4470 if (rs6000_sched_insert_nops_str)
4471 {
4472 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4473 rs6000_sched_insert_nops = sched_finish_none;
4474 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4475 rs6000_sched_insert_nops = sched_finish_pad_groups;
4476 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4477 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4478 else
4479 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4480 atoi (rs6000_sched_insert_nops_str));
4481 }
4482
4483 /* Handle stack protector */
4484 if (!OPTION_SET_P (rs6000_stack_protector_guard))
4485 #ifdef TARGET_THREAD_SSP_OFFSET
4486 rs6000_stack_protector_guard = SSP_TLS;
4487 #else
4488 rs6000_stack_protector_guard = SSP_GLOBAL;
4489 #endif
4490
4491 #ifdef TARGET_THREAD_SSP_OFFSET
4492 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4493 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4494 #endif
4495
4496 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str))
4497 {
4498 char *endp;
4499 const char *str = rs6000_stack_protector_guard_offset_str;
4500
4501 errno = 0;
4502 long offset = strtol (str, &endp, 0);
4503 if (!*str || *endp || errno)
4504 error ("%qs is not a valid number in %qs", str,
4505 "-mstack-protector-guard-offset=");
4506
4507 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4508 || (TARGET_64BIT && (offset & 3)))
4509 error ("%qs is not a valid offset in %qs", str,
4510 "-mstack-protector-guard-offset=");
4511
4512 rs6000_stack_protector_guard_offset = offset;
4513 }
4514
4515 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str))
4516 {
4517 const char *str = rs6000_stack_protector_guard_reg_str;
4518 int reg = decode_reg_name (str);
4519
4520 if (!IN_RANGE (reg, 1, 31))
4521 error ("%qs is not a valid base register in %qs", str,
4522 "-mstack-protector-guard-reg=");
4523
4524 rs6000_stack_protector_guard_reg = reg;
4525 }
4526
4527 if (rs6000_stack_protector_guard == SSP_TLS
4528 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4529 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4530
4531 if (global_init_p)
4532 {
4533 #ifdef TARGET_REGNAMES
4534 /* If the user desires alternate register names, copy in the
4535 alternate names now. */
4536 if (TARGET_REGNAMES)
4537 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4538 #endif
4539
4540 /* Set aix_struct_return last, after the ABI is determined.
4541 If -maix-struct-return or -msvr4-struct-return was explicitly
4542 used, don't override with the ABI default. */
4543 if (!OPTION_SET_P (aix_struct_return))
4544 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4545
4546 #if 0
4547 /* IBM XL compiler defaults to unsigned bitfields. */
4548 if (TARGET_XL_COMPAT)
4549 flag_signed_bitfields = 0;
4550 #endif
4551
4552 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4553 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4554
4555 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4556
4557 /* We can only guarantee the availability of DI pseudo-ops when
4558 assembling for 64-bit targets. */
4559 if (!TARGET_64BIT)
4560 {
4561 targetm.asm_out.aligned_op.di = NULL;
4562 targetm.asm_out.unaligned_op.di = NULL;
4563 }
4564
4565
4566 /* Set branch target alignment, if not optimizing for size. */
4567 if (!optimize_size)
4568 {
4569 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4570 aligned 8byte to avoid misprediction by the branch predictor. */
4571 if (rs6000_tune == PROCESSOR_TITAN
4572 || rs6000_tune == PROCESSOR_CELL)
4573 {
4574 if (flag_align_functions && !str_align_functions)
4575 str_align_functions = "8";
4576 if (flag_align_jumps && !str_align_jumps)
4577 str_align_jumps = "8";
4578 if (flag_align_loops && !str_align_loops)
4579 str_align_loops = "8";
4580 }
4581 if (rs6000_align_branch_targets)
4582 {
4583 if (flag_align_functions && !str_align_functions)
4584 str_align_functions = "16";
4585 if (flag_align_jumps && !str_align_jumps)
4586 str_align_jumps = "16";
4587 if (flag_align_loops && !str_align_loops)
4588 {
4589 can_override_loop_align = 1;
4590 str_align_loops = "16";
4591 }
4592 }
4593 }
4594
4595 /* Arrange to save and restore machine status around nested functions. */
4596 init_machine_status = rs6000_init_machine_status;
4597
4598 /* We should always be splitting complex arguments, but we can't break
4599 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4600 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4601 targetm.calls.split_complex_arg = NULL;
4602
4603 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4604 if (DEFAULT_ABI == ABI_AIX)
4605 targetm.calls.custom_function_descriptors = 0;
4606 }
4607
4608 /* Initialize rs6000_cost with the appropriate target costs. */
4609 if (optimize_size)
4610 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4611 else
4612 switch (rs6000_tune)
4613 {
4614 case PROCESSOR_RS64A:
4615 rs6000_cost = &rs64a_cost;
4616 break;
4617
4618 case PROCESSOR_MPCCORE:
4619 rs6000_cost = &mpccore_cost;
4620 break;
4621
4622 case PROCESSOR_PPC403:
4623 rs6000_cost = &ppc403_cost;
4624 break;
4625
4626 case PROCESSOR_PPC405:
4627 rs6000_cost = &ppc405_cost;
4628 break;
4629
4630 case PROCESSOR_PPC440:
4631 rs6000_cost = &ppc440_cost;
4632 break;
4633
4634 case PROCESSOR_PPC476:
4635 rs6000_cost = &ppc476_cost;
4636 break;
4637
4638 case PROCESSOR_PPC601:
4639 rs6000_cost = &ppc601_cost;
4640 break;
4641
4642 case PROCESSOR_PPC603:
4643 rs6000_cost = &ppc603_cost;
4644 break;
4645
4646 case PROCESSOR_PPC604:
4647 rs6000_cost = &ppc604_cost;
4648 break;
4649
4650 case PROCESSOR_PPC604e:
4651 rs6000_cost = &ppc604e_cost;
4652 break;
4653
4654 case PROCESSOR_PPC620:
4655 rs6000_cost = &ppc620_cost;
4656 break;
4657
4658 case PROCESSOR_PPC630:
4659 rs6000_cost = &ppc630_cost;
4660 break;
4661
4662 case PROCESSOR_CELL:
4663 rs6000_cost = &ppccell_cost;
4664 break;
4665
4666 case PROCESSOR_PPC750:
4667 case PROCESSOR_PPC7400:
4668 rs6000_cost = &ppc750_cost;
4669 break;
4670
4671 case PROCESSOR_PPC7450:
4672 rs6000_cost = &ppc7450_cost;
4673 break;
4674
4675 case PROCESSOR_PPC8540:
4676 case PROCESSOR_PPC8548:
4677 rs6000_cost = &ppc8540_cost;
4678 break;
4679
4680 case PROCESSOR_PPCE300C2:
4681 case PROCESSOR_PPCE300C3:
4682 rs6000_cost = &ppce300c2c3_cost;
4683 break;
4684
4685 case PROCESSOR_PPCE500MC:
4686 rs6000_cost = &ppce500mc_cost;
4687 break;
4688
4689 case PROCESSOR_PPCE500MC64:
4690 rs6000_cost = &ppce500mc64_cost;
4691 break;
4692
4693 case PROCESSOR_PPCE5500:
4694 rs6000_cost = &ppce5500_cost;
4695 break;
4696
4697 case PROCESSOR_PPCE6500:
4698 rs6000_cost = &ppce6500_cost;
4699 break;
4700
4701 case PROCESSOR_TITAN:
4702 rs6000_cost = &titan_cost;
4703 break;
4704
4705 case PROCESSOR_POWER4:
4706 case PROCESSOR_POWER5:
4707 rs6000_cost = &power4_cost;
4708 break;
4709
4710 case PROCESSOR_POWER6:
4711 rs6000_cost = &power6_cost;
4712 break;
4713
4714 case PROCESSOR_POWER7:
4715 rs6000_cost = &power7_cost;
4716 break;
4717
4718 case PROCESSOR_POWER8:
4719 rs6000_cost = &power8_cost;
4720 break;
4721
4722 case PROCESSOR_POWER9:
4723 rs6000_cost = &power9_cost;
4724 break;
4725
4726 case PROCESSOR_POWER10:
4727 case PROCESSOR_POWER11:
4728 rs6000_cost = &power10_cost;
4729 break;
4730
4731 case PROCESSOR_PPCA2:
4732 rs6000_cost = &ppca2_cost;
4733 break;
4734
4735 default:
4736 gcc_unreachable ();
4737 }
4738
4739 if (global_init_p)
4740 {
4741 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4742 param_simultaneous_prefetches,
4743 rs6000_cost->simultaneous_prefetches);
4744 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4745 param_l1_cache_size,
4746 rs6000_cost->l1_cache_size);
4747 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4748 param_l1_cache_line_size,
4749 rs6000_cost->cache_line_size);
4750 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4751 param_l2_cache_size,
4752 rs6000_cost->l2_cache_size);
4753
4754 /* Increase loop peeling limits based on performance analysis. */
4755 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4756 param_max_peeled_insns, 400);
4757 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4758 param_max_completely_peeled_insns, 400);
4759
4760 /* The lxvl/stxvl instructions don't perform well before Power10. */
4761 if (TARGET_POWER10)
4762 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4763 param_vect_partial_vector_usage, 1);
4764 else
4765 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4766 param_vect_partial_vector_usage, 0);
4767
4768 /* Use the 'model' -fsched-pressure algorithm by default. */
4769 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4770 param_sched_pressure_algorithm,
4771 SCHED_PRESSURE_MODEL);
4772
4773 /* If using typedef char *va_list, signal that
4774 __builtin_va_start (&ap, 0) can be optimized to
4775 ap = __builtin_next_arg (0). */
4776 if (DEFAULT_ABI != ABI_V4)
4777 targetm.expand_builtin_va_start = NULL;
4778 }
4779
4780 rs6000_override_options_after_change ();
4781
4782 /* If not explicitly specified via option, decide whether to generate indexed
4783 load/store instructions. A value of -1 indicates that the
4784 initial value of this variable has not been overwritten. During
4785 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4786 if (TARGET_AVOID_XFORM == -1)
4787 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4788 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4789 need indexed accesses and the type used is the scalar type of the element
4790 being loaded or stored. */
4791 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4792 && !TARGET_ALTIVEC);
4793
4794 /* Set the -mrecip options. */
4795 if (rs6000_recip_name)
4796 {
4797 char *p = ASTRDUP (rs6000_recip_name);
4798 char *q;
4799 unsigned int mask, i;
4800 bool invert;
4801
4802 while ((q = strtok (p, ",")) != NULL)
4803 {
4804 p = NULL;
4805 if (*q == '!')
4806 {
4807 invert = true;
4808 q++;
4809 }
4810 else
4811 invert = false;
4812
4813 if (!strcmp (q, "default"))
4814 mask = ((TARGET_RECIP_PRECISION)
4815 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4816 else
4817 {
4818 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4819 if (!strcmp (q, recip_options[i].string))
4820 {
4821 mask = recip_options[i].mask;
4822 break;
4823 }
4824
4825 if (i == ARRAY_SIZE (recip_options))
4826 {
4827 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4828 invert = false;
4829 mask = 0;
4830 ret = false;
4831 }
4832 }
4833
4834 if (invert)
4835 rs6000_recip_control &= ~mask;
4836 else
4837 rs6000_recip_control |= mask;
4838 }
4839 }
4840
4841 /* Initialize all of the registers. */
4842 rs6000_init_hard_regno_mode_ok (global_init_p);
4843
4844 /* Save the initial options in case the user does function specific options */
4845 if (global_init_p)
4846 target_option_default_node = target_option_current_node
4847 = build_target_option_node (&global_options, &global_options_set);
4848
4849 /* If not explicitly specified via option, decide whether to generate the
4850 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4851 if (TARGET_LINK_STACK == -1)
4852 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4853
4854 /* Deprecate use of -mno-speculate-indirect-jumps. */
4855 if (!rs6000_speculate_indirect_jumps)
4856 warning (0, "%qs is deprecated and not recommended in any circumstances",
4857 "-mno-speculate-indirect-jumps");
4858
4859 return ret;
4860 }
4861
4862 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4863 define the target cpu type. */
4864
4865 static void
4866 rs6000_option_override (void)
4867 {
4868 (void) rs6000_option_override_internal (true);
4869 }
4870
4871 \f
4872 /* Implement LOOP_ALIGN. */
4873 align_flags
4874 rs6000_loop_align (rtx label)
4875 {
4876 basic_block bb;
4877 int ninsns;
4878
4879 /* Don't override loop alignment if -falign-loops was specified. */
4880 if (!can_override_loop_align)
4881 return align_loops;
4882
4883 bb = BLOCK_FOR_INSN (label);
4884 ninsns = num_loop_insns(bb->loop_father);
4885
4886 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4887 if (ninsns > 4 && ninsns <= 8
4888 && (rs6000_tune == PROCESSOR_POWER4
4889 || rs6000_tune == PROCESSOR_POWER5
4890 || rs6000_tune == PROCESSOR_POWER6
4891 || rs6000_tune == PROCESSOR_POWER7
4892 || rs6000_tune == PROCESSOR_POWER8))
4893 return align_flags (5);
4894 else
4895 return align_loops;
4896 }
4897
4898 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4899 after applying N number of iterations. This routine does not determine
4900 how may iterations are required to reach desired alignment. */
4901
4902 static bool
4903 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4904 {
4905 if (is_packed)
4906 return false;
4907
4908 if (TARGET_32BIT)
4909 {
4910 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4911 return true;
4912
4913 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4914 return true;
4915
4916 return false;
4917 }
4918 else
4919 {
4920 if (TARGET_MACHO)
4921 return false;
4922
4923 /* Assuming that all other types are naturally aligned. CHECKME! */
4924 return true;
4925 }
4926 }
4927
4928 /* Return true if the vector misalignment factor is supported by the
4929 target. */
4930 static bool
4931 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4932 const_tree type,
4933 int misalignment,
4934 bool is_packed)
4935 {
4936 if (TARGET_VSX)
4937 {
4938 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4939 return true;
4940
4941 /* Return if movmisalign pattern is not supported for this mode. */
4942 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4943 return false;
4944
4945 if (misalignment == -1)
4946 {
4947 /* Misalignment factor is unknown at compile time but we know
4948 it's word aligned. */
4949 if (rs6000_vector_alignment_reachable (type, is_packed))
4950 {
4951 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4952
4953 if (element_size == 64 || element_size == 32)
4954 return true;
4955 }
4956
4957 return false;
4958 }
4959
4960 /* VSX supports word-aligned vector. */
4961 if (misalignment % 4 == 0)
4962 return true;
4963 }
4964 return false;
4965 }
4966
4967 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4968 static int
4969 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4970 tree vectype, int misalign)
4971 {
4972 unsigned elements;
4973 tree elem_type;
4974
4975 switch (type_of_cost)
4976 {
4977 case scalar_stmt:
4978 case scalar_store:
4979 case vector_stmt:
4980 case vector_store:
4981 case vec_to_scalar:
4982 case scalar_to_vec:
4983 case cond_branch_not_taken:
4984 return 1;
4985 case scalar_load:
4986 case vector_load:
4987 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4988 return 2;
4989
4990 case vec_perm:
4991 /* Power7 has only one permute unit, make it a bit expensive. */
4992 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4993 return 3;
4994 else
4995 return 1;
4996
4997 case vec_promote_demote:
4998 /* Power7 has only one permute/pack unit, make it a bit expensive. */
4999 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5000 return 4;
5001 else
5002 return 1;
5003
5004 case cond_branch_taken:
5005 return 3;
5006
5007 case unaligned_load:
5008 case vector_gather_load:
5009 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5010 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5011 return 2;
5012
5013 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5014 {
5015 elements = TYPE_VECTOR_SUBPARTS (vectype);
5016 /* See PR102767, consider V1TI to keep consistency. */
5017 if (elements == 2 || elements == 1)
5018 /* Double word aligned. */
5019 return 4;
5020
5021 if (elements == 4)
5022 {
5023 switch (misalign)
5024 {
5025 case 8:
5026 /* Double word aligned. */
5027 return 4;
5028
5029 case -1:
5030 /* Unknown misalignment. */
5031 case 4:
5032 case 12:
5033 /* Word aligned. */
5034 return 33;
5035
5036 default:
5037 gcc_unreachable ();
5038 }
5039 }
5040 }
5041
5042 if (TARGET_ALTIVEC)
5043 /* Misaligned loads are not supported. */
5044 gcc_unreachable ();
5045
5046 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5047 return 4;
5048
5049 case unaligned_store:
5050 case vector_scatter_store:
5051 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5052 return 1;
5053
5054 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5055 {
5056 elements = TYPE_VECTOR_SUBPARTS (vectype);
5057 /* See PR102767, consider V1TI to keep consistency. */
5058 if (elements == 2 || elements == 1)
5059 /* Double word aligned. */
5060 return 2;
5061
5062 if (elements == 4)
5063 {
5064 switch (misalign)
5065 {
5066 case 8:
5067 /* Double word aligned. */
5068 return 2;
5069
5070 case -1:
5071 /* Unknown misalignment. */
5072 case 4:
5073 case 12:
5074 /* Word aligned. */
5075 return 23;
5076
5077 default:
5078 gcc_unreachable ();
5079 }
5080 }
5081 }
5082
5083 if (TARGET_ALTIVEC)
5084 /* Misaligned stores are not supported. */
5085 gcc_unreachable ();
5086
5087 return 2;
5088
5089 case vec_construct:
5090 /* This is a rough approximation assuming non-constant elements
5091 constructed into a vector via element insertion. FIXME:
5092 vec_construct is not granular enough for uniformly good
5093 decisions. If the initialization is a splat, this is
5094 cheaper than we estimate. Improve this someday. */
5095 elem_type = TREE_TYPE (vectype);
5096 /* 32-bit vectors loaded into registers are stored as double
5097 precision, so we need 2 permutes, 2 converts, and 1 merge
5098 to construct a vector of short floats from them. */
5099 if (SCALAR_FLOAT_TYPE_P (elem_type)
5100 && TYPE_PRECISION (elem_type) == 32)
5101 return 5;
5102 /* On POWER9, integer vector types are built up in GPRs and then
5103 use a direct move (2 cycles). For POWER8 this is even worse,
5104 as we need two direct moves and a merge, and the direct moves
5105 are five cycles. */
5106 else if (INTEGRAL_TYPE_P (elem_type))
5107 {
5108 if (TARGET_P9_VECTOR)
5109 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5110 else
5111 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5112 }
5113 else
5114 /* V2DFmode doesn't need a direct move. */
5115 return 2;
5116
5117 default:
5118 gcc_unreachable ();
5119 }
5120 }
5121
5122 /* Implement targetm.vectorize.preferred_simd_mode. */
5123
5124 static machine_mode
5125 rs6000_preferred_simd_mode (scalar_mode mode)
5126 {
5127 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5128
5129 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5130 return vmode.require ();
5131
5132 return word_mode;
5133 }
5134
5135 class rs6000_cost_data : public vector_costs
5136 {
5137 public:
5138 using vector_costs::vector_costs;
5139
5140 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
5141 stmt_vec_info stmt_info, slp_tree, tree vectype,
5142 int misalign,
5143 vect_cost_model_location where) override;
5144 void finish_cost (const vector_costs *) override;
5145
5146 protected:
5147 void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
5148 vect_cost_model_location, unsigned int);
5149 void density_test (loop_vec_info);
5150 void adjust_vect_cost_per_loop (loop_vec_info);
5151 unsigned int determine_suggested_unroll_factor (loop_vec_info);
5152
5153 /* Total number of vectorized stmts (loop only). */
5154 unsigned m_nstmts = 0;
5155 /* Total number of loads (loop only). */
5156 unsigned m_nloads = 0;
5157 /* Total number of stores (loop only). */
5158 unsigned m_nstores = 0;
5159 /* Reduction factor for suggesting unroll factor (loop only). */
5160 unsigned m_reduc_factor = 0;
5161 /* Possible extra penalized cost on vector construction (loop only). */
5162 unsigned m_extra_ctor_cost = 0;
5163 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5164 instruction is needed by the vectorization. */
5165 bool m_vect_nonmem = false;
5166 /* If this loop gets vectorized with emulated gather load. */
5167 bool m_gather_load = false;
5168 };
5169
5170 /* Test for likely overcommitment of vector hardware resources. If a
5171 loop iteration is relatively large, and too large a percentage of
5172 instructions in the loop are vectorized, the cost model may not
5173 adequately reflect delays from unavailable vector resources.
5174 Penalize the loop body cost for this case. */
5175
5176 void
5177 rs6000_cost_data::density_test (loop_vec_info loop_vinfo)
5178 {
5179 /* This density test only cares about the cost of vector version of the
5180 loop, so immediately return if we are passed costing for the scalar
5181 version (namely computing single scalar iteration cost). */
5182 if (m_costing_for_scalar)
5183 return;
5184
5185 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5186 basic_block *bbs = get_loop_body (loop);
5187 int nbbs = loop->num_nodes;
5188 int vec_cost = m_costs[vect_body], not_vec_cost = 0;
5189
5190 for (int i = 0; i < nbbs; i++)
5191 {
5192 basic_block bb = bbs[i];
5193 gimple_stmt_iterator gsi;
5194
5195 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5196 {
5197 gimple *stmt = gsi_stmt (gsi);
5198 if (is_gimple_debug (stmt))
5199 continue;
5200
5201 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5202
5203 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5204 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5205 not_vec_cost++;
5206 }
5207 }
5208
5209 free (bbs);
5210 int density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5211
5212 if (density_pct > rs6000_density_pct_threshold
5213 && vec_cost + not_vec_cost > rs6000_density_size_threshold)
5214 {
5215 m_costs[vect_body] = vec_cost * (100 + rs6000_density_penalty) / 100;
5216 if (dump_enabled_p ())
5217 dump_printf_loc (MSG_NOTE, vect_location,
5218 "density %d%%, cost %d exceeds threshold, penalizing "
5219 "loop body cost by %u%%\n", density_pct,
5220 vec_cost + not_vec_cost, rs6000_density_penalty);
5221 }
5222
5223 /* Check whether we need to penalize the body cost to account
5224 for excess strided or elementwise loads. */
5225 if (m_extra_ctor_cost > 0)
5226 {
5227 gcc_assert (m_nloads <= m_nstmts);
5228 unsigned int load_pct = (m_nloads * 100) / m_nstmts;
5229
5230 /* It's likely to be bounded by latency and execution resources
5231 from many scalar loads which are strided or elementwise loads
5232 into a vector if both conditions below are found:
5233 1. there are many loads, it's easy to result in a long wait
5234 for load units;
5235 2. load has a big proportion of all vectorized statements,
5236 it's not easy to schedule other statements to spread among
5237 the loads.
5238 One typical case is the innermost loop of the hotspot of SPEC2017
5239 503.bwaves_r without loop interchange. */
5240 if (m_nloads > (unsigned int) rs6000_density_load_num_threshold
5241 && load_pct > (unsigned int) rs6000_density_load_pct_threshold)
5242 {
5243 m_costs[vect_body] += m_extra_ctor_cost;
5244 if (dump_enabled_p ())
5245 dump_printf_loc (MSG_NOTE, vect_location,
5246 "Found %u loads and "
5247 "load pct. %u%% exceed "
5248 "the threshold, "
5249 "penalizing loop body "
5250 "cost by extra cost %u "
5251 "for ctor.\n",
5252 m_nloads, load_pct,
5253 m_extra_ctor_cost);
5254 }
5255 }
5256 }
5257
5258 /* Implement targetm.vectorize.create_costs. */
5259
5260 static vector_costs *
5261 rs6000_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
5262 {
5263 return new rs6000_cost_data (vinfo, costing_for_scalar);
5264 }
5265
5266 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5267 For some statement, we would like to further fine-grain tweak the cost on
5268 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5269 information on statement operation codes etc. One typical case here is
5270 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5271 for scalar cost, but it should be priced more whatever transformed to either
5272 compare + branch or compare + isel instructions. */
5273
5274 static unsigned
5275 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5276 struct _stmt_vec_info *stmt_info)
5277 {
5278 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5279 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5280 {
5281 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5282 if (subcode == COND_EXPR)
5283 return 2;
5284 }
5285
5286 return 0;
5287 }
5288
5289 /* Helper function for add_stmt_cost. Check each statement cost
5290 entry, gather information and update the target_cost fields
5291 accordingly. */
5292 void
5293 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
5294 stmt_vec_info stmt_info,
5295 vect_cost_model_location where,
5296 unsigned int orig_count)
5297 {
5298
5299 /* Check whether we're doing something other than just a copy loop.
5300 Not all such loops may be profitably vectorized; see
5301 rs6000_finish_cost. */
5302 if (kind == vec_to_scalar
5303 || kind == vec_perm
5304 || kind == vec_promote_demote
5305 || kind == vec_construct
5306 || kind == scalar_to_vec
5307 || (where == vect_body && kind == vector_stmt))
5308 m_vect_nonmem = true;
5309
5310 /* Gather some information when we are costing the vectorized instruction
5311 for the statements located in a loop body. */
5312 if (!m_costing_for_scalar
5313 && is_a<loop_vec_info> (m_vinfo)
5314 && where == vect_body)
5315 {
5316 m_nstmts += orig_count;
5317
5318 if (kind == scalar_load
5319 || kind == vector_load
5320 || kind == unaligned_load
5321 || kind == vector_gather_load)
5322 {
5323 m_nloads += orig_count;
5324 if (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5325 m_gather_load = true;
5326 }
5327 else if (kind == scalar_store
5328 || kind == vector_store
5329 || kind == unaligned_store
5330 || kind == vector_scatter_store)
5331 m_nstores += orig_count;
5332 else if ((kind == scalar_stmt
5333 || kind == vector_stmt
5334 || kind == vec_to_scalar)
5335 && stmt_info
5336 && vect_is_reduction (stmt_info))
5337 {
5338 /* Loop body contains normal int or fp operations and epilogue
5339 contains vector reduction. For simplicity, we assume int
5340 operation takes one cycle and fp operation takes one more. */
5341 tree lhs = gimple_get_lhs (stmt_info->stmt);
5342 bool is_float = FLOAT_TYPE_P (TREE_TYPE (lhs));
5343 unsigned int basic_cost = is_float ? 2 : 1;
5344 m_reduc_factor = MAX (basic_cost * orig_count, m_reduc_factor);
5345 }
5346
5347 /* Power processors do not currently have instructions for strided
5348 and elementwise loads, and instead we must generate multiple
5349 scalar loads. This leads to undercounting of the cost. We
5350 account for this by scaling the construction cost by the number
5351 of elements involved, and saving this as extra cost that we may
5352 or may not need to apply. When finalizing the cost of the loop,
5353 the extra penalty is applied when the load density heuristics
5354 are satisfied. */
5355 if (kind == vec_construct && stmt_info
5356 && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
5357 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
5358 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP))
5359 {
5360 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5361 unsigned int nunits = vect_nunits_for_cost (vectype);
5362 /* As PR103702 shows, it's possible that vectorizer wants to do
5363 costings for only one unit here, it's no need to do any
5364 penalization for it, so simply early return here. */
5365 if (nunits == 1)
5366 return;
5367 /* i386 port adopts nunits * stmt_cost as the penalized cost
5368 for this kind of penalization, we used to follow it but
5369 found it could result in an unreliable body cost especially
5370 for V16QI/V8HI modes. To make it better, we choose this
5371 new heuristic: for each scalar load, we use 2 as penalized
5372 cost for the case with 2 nunits and use 1 for the other
5373 cases. It's without much supporting theory, mainly
5374 concluded from the broad performance evaluations on Power8,
5375 Power9 and Power10. One possibly related point is that:
5376 vector construction for more units would use more insns,
5377 it has more chances to schedule them better (even run in
5378 parallelly when enough available units at that time), so
5379 it seems reasonable not to penalize that much for them. */
5380 unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
5381 unsigned int extra_cost = nunits * adjusted_cost;
5382 m_extra_ctor_cost += extra_cost;
5383 }
5384 }
5385 }
5386
5387 unsigned
5388 rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
5389 stmt_vec_info stmt_info, slp_tree,
5390 tree vectype, int misalign,
5391 vect_cost_model_location where)
5392 {
5393 unsigned retval = 0;
5394
5395 if (flag_vect_cost_model)
5396 {
5397 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5398 misalign);
5399 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5400 /* Statements in an inner loop relative to the loop being
5401 vectorized are weighted more heavily. The value here is
5402 arbitrary and could potentially be improved with analysis. */
5403 unsigned int orig_count = count;
5404 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
5405 m_costs[where] += retval;
5406
5407 update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
5408 }
5409
5410 return retval;
5411 }
5412
5413 /* For some target specific vectorization cost which can't be handled per stmt,
5414 we check the requisite conditions and adjust the vectorization cost
5415 accordingly if satisfied. One typical example is to model shift cost for
5416 vector with length by counting number of required lengths under condition
5417 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5418
5419 void
5420 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
5421 {
5422 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5423 {
5424 rgroup_controls *rgc;
5425 unsigned int num_vectors_m1;
5426 unsigned int shift_cnt = 0;
5427 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5428 if (rgc->type)
5429 /* Each length needs one shift to fill into bits 0-7. */
5430 shift_cnt += num_vectors_m1 + 1;
5431
5432 add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL,
5433 NULL_TREE, 0, vect_body);
5434 }
5435 }
5436
5437 /* Determine suggested unroll factor by considering some below factors:
5438
5439 - unroll option/pragma which can disable unrolling for this loop;
5440 - simple hardware resource model for non memory vector insns;
5441 - aggressive heuristics when iteration count is unknown:
5442 - reduction case to break cross iteration dependency;
5443 - emulated gather load;
5444 - estimated iteration count when iteration count is unknown;
5445 */
5446
5447
5448 unsigned int
5449 rs6000_cost_data::determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
5450 {
5451 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5452
5453 /* Don't unroll if it's specified explicitly not to be unrolled. */
5454 if (loop->unroll == 1
5455 || (OPTION_SET_P (flag_unroll_loops) && !flag_unroll_loops)
5456 || (OPTION_SET_P (flag_unroll_all_loops) && !flag_unroll_all_loops))
5457 return 1;
5458
5459 unsigned int nstmts_nonldst = m_nstmts - m_nloads - m_nstores;
5460 /* Don't unroll if no vector instructions excepting for memory access. */
5461 if (nstmts_nonldst == 0)
5462 return 1;
5463
5464 /* Consider breaking cross iteration dependency for reduction. */
5465 unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1;
5466
5467 /* Use this simple hardware resource model that how many non ld/st
5468 vector instructions can be issued per cycle. */
5469 unsigned int issue_width = rs6000_vect_unroll_issue;
5470 unsigned int uf = CEIL (reduc_factor * issue_width, nstmts_nonldst);
5471 uf = MIN ((unsigned int) rs6000_vect_unroll_limit, uf);
5472 /* Make sure it is power of 2. */
5473 uf = 1 << ceil_log2 (uf);
5474
5475 /* If the iteration count is known, the costing would be exact enough,
5476 don't worry it could be worse. */
5477 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5478 return uf;
5479
5480 /* Inspired by SPEC2017 parest_r, we want to aggressively unroll the
5481 loop if either condition is satisfied:
5482 - reduction factor exceeds the threshold;
5483 - emulated gather load adopted. */
5484 if (reduc_factor > (unsigned int) rs6000_vect_unroll_reduc_threshold
5485 || m_gather_load)
5486 return uf;
5487
5488 /* Check if we can conclude it's good to unroll from the estimated
5489 iteration count. */
5490 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
5491 unsigned int vf = vect_vf_for_cost (loop_vinfo);
5492 unsigned int unrolled_vf = vf * uf;
5493 if (est_niter == -1 || est_niter < unrolled_vf)
5494 /* When the estimated iteration of this loop is unknown, it's possible
5495 that we are able to vectorize this loop with the original VF but fail
5496 to vectorize it with the unrolled VF any more if the actual iteration
5497 count is in between. */
5498 return 1;
5499 else
5500 {
5501 unsigned int epil_niter_unr = est_niter % unrolled_vf;
5502 unsigned int epil_niter = est_niter % vf;
5503 /* Even if we have partial vector support, it can be still inefficent
5504 to calculate the length when the iteration count is unknown, so
5505 only expect it's good to unroll when the epilogue iteration count
5506 is not bigger than VF (only one time length calculation). */
5507 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5508 && epil_niter_unr <= vf)
5509 return uf;
5510 /* Without partial vector support, conservatively unroll this when
5511 the epilogue iteration count is less than the original one
5512 (epilogue execution time wouldn't be longer than before). */
5513 else if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5514 && epil_niter_unr <= epil_niter)
5515 return uf;
5516 }
5517
5518 return 1;
5519 }
5520
5521 void
5522 rs6000_cost_data::finish_cost (const vector_costs *scalar_costs)
5523 {
5524 if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
5525 {
5526 adjust_vect_cost_per_loop (loop_vinfo);
5527 density_test (loop_vinfo);
5528
5529 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5530 that require versioning for any reason. The vectorization is at
5531 best a wash inside the loop, and the versioning checks make
5532 profitability highly unlikely and potentially quite harmful. */
5533 if (!m_vect_nonmem
5534 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) == 2
5535 && LOOP_REQUIRES_VERSIONING (loop_vinfo))
5536 m_costs[vect_body] += 10000;
5537
5538 m_suggested_unroll_factor
5539 = determine_suggested_unroll_factor (loop_vinfo);
5540 }
5541
5542 vector_costs::finish_cost (scalar_costs);
5543 }
5544
5545 /* Implement targetm.loop_unroll_adjust. */
5546
5547 static unsigned
5548 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5549 {
5550 if (unroll_only_small_loops)
5551 {
5552 /* TODO: These are hardcoded values right now. We probably should use
5553 a PARAM here. */
5554 if (loop->ninsns <= 6)
5555 return MIN (4, nunroll);
5556 if (loop->ninsns <= 10)
5557 return MIN (2, nunroll);
5558
5559 return 0;
5560 }
5561
5562 return nunroll;
5563 }
5564
5565 /* Returns a function decl for a vectorized version of the builtin function
5566 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5567 if it is not available.
5568
5569 Implement targetm.vectorize.builtin_vectorized_function. */
5570
5571 static tree
5572 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5573 tree type_in)
5574 {
5575 machine_mode in_mode, out_mode;
5576 int in_n, out_n;
5577
5578 if (TARGET_DEBUG_BUILTIN)
5579 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5580 combined_fn_name (combined_fn (fn)),
5581 GET_MODE_NAME (TYPE_MODE (type_out)),
5582 GET_MODE_NAME (TYPE_MODE (type_in)));
5583
5584 /* TODO: Should this be gcc_assert? */
5585 if (TREE_CODE (type_out) != VECTOR_TYPE
5586 || TREE_CODE (type_in) != VECTOR_TYPE)
5587 return NULL_TREE;
5588
5589 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5590 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5591 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5592 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5593
5594 switch (fn)
5595 {
5596 CASE_CFN_COPYSIGN:
5597 if (VECTOR_UNIT_VSX_P (V2DFmode)
5598 && out_mode == DFmode && out_n == 2
5599 && in_mode == DFmode && in_n == 2)
5600 return rs6000_builtin_decls[RS6000_BIF_CPSGNDP];
5601 if (VECTOR_UNIT_VSX_P (V4SFmode)
5602 && out_mode == SFmode && out_n == 4
5603 && in_mode == SFmode && in_n == 4)
5604 return rs6000_builtin_decls[RS6000_BIF_CPSGNSP];
5605 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5606 && out_mode == SFmode && out_n == 4
5607 && in_mode == SFmode && in_n == 4)
5608 return rs6000_builtin_decls[RS6000_BIF_COPYSIGN_V4SF];
5609 break;
5610 CASE_CFN_CEIL:
5611 if (VECTOR_UNIT_VSX_P (V2DFmode)
5612 && out_mode == DFmode && out_n == 2
5613 && in_mode == DFmode && in_n == 2)
5614 return rs6000_builtin_decls[RS6000_BIF_XVRDPIP];
5615 if (VECTOR_UNIT_VSX_P (V4SFmode)
5616 && out_mode == SFmode && out_n == 4
5617 && in_mode == SFmode && in_n == 4)
5618 return rs6000_builtin_decls[RS6000_BIF_XVRSPIP];
5619 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5620 && out_mode == SFmode && out_n == 4
5621 && in_mode == SFmode && in_n == 4)
5622 return rs6000_builtin_decls[RS6000_BIF_VRFIP];
5623 break;
5624 CASE_CFN_FLOOR:
5625 if (VECTOR_UNIT_VSX_P (V2DFmode)
5626 && out_mode == DFmode && out_n == 2
5627 && in_mode == DFmode && in_n == 2)
5628 return rs6000_builtin_decls[RS6000_BIF_XVRDPIM];
5629 if (VECTOR_UNIT_VSX_P (V4SFmode)
5630 && out_mode == SFmode && out_n == 4
5631 && in_mode == SFmode && in_n == 4)
5632 return rs6000_builtin_decls[RS6000_BIF_XVRSPIM];
5633 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5634 && out_mode == SFmode && out_n == 4
5635 && in_mode == SFmode && in_n == 4)
5636 return rs6000_builtin_decls[RS6000_BIF_VRFIM];
5637 break;
5638 CASE_CFN_FMA:
5639 if (VECTOR_UNIT_VSX_P (V2DFmode)
5640 && out_mode == DFmode && out_n == 2
5641 && in_mode == DFmode && in_n == 2)
5642 return rs6000_builtin_decls[RS6000_BIF_XVMADDDP];
5643 if (VECTOR_UNIT_VSX_P (V4SFmode)
5644 && out_mode == SFmode && out_n == 4
5645 && in_mode == SFmode && in_n == 4)
5646 return rs6000_builtin_decls[RS6000_BIF_XVMADDSP];
5647 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5648 && out_mode == SFmode && out_n == 4
5649 && in_mode == SFmode && in_n == 4)
5650 return rs6000_builtin_decls[RS6000_BIF_VMADDFP];
5651 break;
5652 CASE_CFN_TRUNC:
5653 if (VECTOR_UNIT_VSX_P (V2DFmode)
5654 && out_mode == DFmode && out_n == 2
5655 && in_mode == DFmode && in_n == 2)
5656 return rs6000_builtin_decls[RS6000_BIF_XVRDPIZ];
5657 if (VECTOR_UNIT_VSX_P (V4SFmode)
5658 && out_mode == SFmode && out_n == 4
5659 && in_mode == SFmode && in_n == 4)
5660 return rs6000_builtin_decls[RS6000_BIF_XVRSPIZ];
5661 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5662 && out_mode == SFmode && out_n == 4
5663 && in_mode == SFmode && in_n == 4)
5664 return rs6000_builtin_decls[RS6000_BIF_VRFIZ];
5665 break;
5666 CASE_CFN_NEARBYINT:
5667 if (VECTOR_UNIT_VSX_P (V2DFmode)
5668 && flag_unsafe_math_optimizations
5669 && out_mode == DFmode && out_n == 2
5670 && in_mode == DFmode && in_n == 2)
5671 return rs6000_builtin_decls[RS6000_BIF_XVRDPI];
5672 if (VECTOR_UNIT_VSX_P (V4SFmode)
5673 && flag_unsafe_math_optimizations
5674 && out_mode == SFmode && out_n == 4
5675 && in_mode == SFmode && in_n == 4)
5676 return rs6000_builtin_decls[RS6000_BIF_XVRSPI];
5677 break;
5678 CASE_CFN_RINT:
5679 if (VECTOR_UNIT_VSX_P (V2DFmode)
5680 && !flag_trapping_math
5681 && out_mode == DFmode && out_n == 2
5682 && in_mode == DFmode && in_n == 2)
5683 return rs6000_builtin_decls[RS6000_BIF_XVRDPIC];
5684 if (VECTOR_UNIT_VSX_P (V4SFmode)
5685 && !flag_trapping_math
5686 && out_mode == SFmode && out_n == 4
5687 && in_mode == SFmode && in_n == 4)
5688 return rs6000_builtin_decls[RS6000_BIF_XVRSPIC];
5689 break;
5690 default:
5691 break;
5692 }
5693
5694 /* Generate calls to libmass if appropriate. */
5695 if (rs6000_veclib_handler)
5696 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5697
5698 return NULL_TREE;
5699 }
5700
5701 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5702 library with vectorized intrinsics. */
5703
5704 static tree
5705 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5706 tree type_in)
5707 {
5708 char name[32];
5709 const char *suffix = NULL;
5710 tree fntype, new_fndecl, bdecl = NULL_TREE;
5711 int n_args = 1;
5712 const char *bname;
5713 machine_mode el_mode, in_mode;
5714 int n, in_n;
5715
5716 /* Libmass is suitable for unsafe math only as it does not correctly support
5717 parts of IEEE with the required precision such as denormals. Only support
5718 it if we have VSX to use the simd d2 or f4 functions.
5719 XXX: Add variable length support. */
5720 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5721 return NULL_TREE;
5722
5723 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5724 n = TYPE_VECTOR_SUBPARTS (type_out);
5725 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5726 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5727 if (el_mode != in_mode
5728 || n != in_n)
5729 return NULL_TREE;
5730
5731 switch (fn)
5732 {
5733 CASE_CFN_ATAN2:
5734 CASE_CFN_HYPOT:
5735 CASE_CFN_POW:
5736 n_args = 2;
5737 gcc_fallthrough ();
5738
5739 CASE_CFN_ACOS:
5740 CASE_CFN_ACOSH:
5741 CASE_CFN_ASIN:
5742 CASE_CFN_ASINH:
5743 CASE_CFN_ATAN:
5744 CASE_CFN_ATANH:
5745 CASE_CFN_CBRT:
5746 CASE_CFN_COS:
5747 CASE_CFN_COSH:
5748 CASE_CFN_ERF:
5749 CASE_CFN_ERFC:
5750 CASE_CFN_EXP2:
5751 CASE_CFN_EXP:
5752 CASE_CFN_EXPM1:
5753 CASE_CFN_LGAMMA:
5754 CASE_CFN_LOG10:
5755 CASE_CFN_LOG1P:
5756 CASE_CFN_LOG2:
5757 CASE_CFN_LOG:
5758 CASE_CFN_SIN:
5759 CASE_CFN_SINH:
5760 CASE_CFN_SQRT:
5761 CASE_CFN_TAN:
5762 CASE_CFN_TANH:
5763 if (el_mode == DFmode && n == 2)
5764 {
5765 bdecl = mathfn_built_in (double_type_node, fn);
5766 suffix = "d2"; /* pow -> powd2 */
5767 }
5768 else if (el_mode == SFmode && n == 4)
5769 {
5770 bdecl = mathfn_built_in (float_type_node, fn);
5771 suffix = "4"; /* powf -> powf4 */
5772 }
5773 else
5774 return NULL_TREE;
5775 if (!bdecl)
5776 return NULL_TREE;
5777 break;
5778
5779 default:
5780 return NULL_TREE;
5781 }
5782
5783 gcc_assert (suffix != NULL);
5784 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5785 if (!bname)
5786 return NULL_TREE;
5787
5788 strcpy (name, bname + strlen ("__builtin_"));
5789 strcat (name, suffix);
5790
5791 if (n_args == 1)
5792 fntype = build_function_type_list (type_out, type_in, NULL);
5793 else if (n_args == 2)
5794 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5795 else
5796 gcc_unreachable ();
5797
5798 /* Build a function declaration for the vectorized function. */
5799 new_fndecl = build_decl (BUILTINS_LOCATION,
5800 FUNCTION_DECL, get_identifier (name), fntype);
5801 TREE_PUBLIC (new_fndecl) = 1;
5802 DECL_EXTERNAL (new_fndecl) = 1;
5803 DECL_IS_NOVOPS (new_fndecl) = 1;
5804 TREE_READONLY (new_fndecl) = 1;
5805
5806 return new_fndecl;
5807 }
5808
5809 \f
5810 /* Default CPU string for rs6000*_file_start functions. */
5811 static const char *rs6000_default_cpu;
5812
5813 #ifdef USING_ELFOS_H
5814 const char *rs6000_machine;
5815
5816 const char *
5817 rs6000_machine_from_flags (void)
5818 {
5819 /* e300 and e500 */
5820 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3)
5821 return "e300";
5822 if (rs6000_cpu == PROCESSOR_PPC8540 || rs6000_cpu == PROCESSOR_PPC8548)
5823 return "e500";
5824 if (rs6000_cpu == PROCESSOR_PPCE500MC)
5825 return "e500mc";
5826 if (rs6000_cpu == PROCESSOR_PPCE500MC64)
5827 return "e500mc64";
5828 if (rs6000_cpu == PROCESSOR_PPCE5500)
5829 return "e5500";
5830 if (rs6000_cpu == PROCESSOR_PPCE6500)
5831 return "e6500";
5832
5833 /* 400 series */
5834 if (rs6000_cpu == PROCESSOR_PPC403)
5835 return "\"403\"";
5836 if (rs6000_cpu == PROCESSOR_PPC405)
5837 return "\"405\"";
5838 if (rs6000_cpu == PROCESSOR_PPC440)
5839 return "\"440\"";
5840 if (rs6000_cpu == PROCESSOR_PPC476)
5841 return "\"476\"";
5842
5843 /* A2 */
5844 if (rs6000_cpu == PROCESSOR_PPCA2)
5845 return "a2";
5846
5847 /* Cell BE */
5848 if (rs6000_cpu == PROCESSOR_CELL)
5849 return "cell";
5850
5851 /* Titan */
5852 if (rs6000_cpu == PROCESSOR_TITAN)
5853 return "titan";
5854
5855 /* 500 series and 800 series */
5856 if (rs6000_cpu == PROCESSOR_MPCCORE)
5857 return "\"821\"";
5858
5859 #if 0
5860 /* This (and ppc64 below) are disabled here (for now at least) because
5861 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5862 are #define'd as some of these. Untangling that is a job for later. */
5863
5864 /* 600 series and 700 series, "classic" */
5865 if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603
5866 || rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e
5867 || rs6000_cpu == PROCESSOR_PPC750)
5868 return "ppc";
5869 #endif
5870
5871 /* Classic with AltiVec, "G4" */
5872 if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450)
5873 return "\"7450\"";
5874
5875 #if 0
5876 /* The older 64-bit CPUs */
5877 if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630
5878 || rs6000_cpu == PROCESSOR_RS64A)
5879 return "ppc64";
5880 #endif
5881
5882 HOST_WIDE_INT flags = rs6000_isa_flags;
5883
5884 /* Disable the flags that should never influence the .machine selection. */
5885 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL);
5886
5887 if ((flags & (ISA_POWER11_MASKS_SERVER & ~ISA_3_1_MASKS_SERVER)) != 0)
5888 return "power11";
5889 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5890 return "power10";
5891 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5892 return "power9";
5893 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5894 return "power8";
5895 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5896 return "power7";
5897 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5898 return "power6";
5899 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5900 return "power5";
5901 if ((flags & ISA_2_1_MASKS) != 0)
5902 return "power4";
5903 if ((flags & OPTION_MASK_POWERPC64) != 0)
5904 return "ppc64";
5905 return "ppc";
5906 }
5907
5908 void
5909 emit_asm_machine (void)
5910 {
5911 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5912 }
5913 #endif
5914
5915 /* Do anything needed at the start of the asm file. */
5916
5917 static void
5918 rs6000_file_start (void)
5919 {
5920 char buffer[80];
5921 const char *start = buffer;
5922 FILE *file = asm_out_file;
5923
5924 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5925
5926 default_file_start ();
5927
5928 if (flag_verbose_asm)
5929 {
5930 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5931
5932 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5933 {
5934 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5935 start = "";
5936 }
5937
5938 if (OPTION_SET_P (rs6000_cpu_index))
5939 {
5940 fprintf (file, "%s -mcpu=%s", start,
5941 processor_target_table[rs6000_cpu_index].name);
5942 start = "";
5943 }
5944
5945 if (OPTION_SET_P (rs6000_tune_index))
5946 {
5947 fprintf (file, "%s -mtune=%s", start,
5948 processor_target_table[rs6000_tune_index].name);
5949 start = "";
5950 }
5951
5952 if (PPC405_ERRATUM77)
5953 {
5954 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5955 start = "";
5956 }
5957
5958 #ifdef USING_ELFOS_H
5959 switch (rs6000_sdata)
5960 {
5961 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5962 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5963 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5964 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5965 }
5966
5967 if (rs6000_sdata && g_switch_value)
5968 {
5969 fprintf (file, "%s -G %d", start,
5970 g_switch_value);
5971 start = "";
5972 }
5973 #endif
5974
5975 if (*start == '\0')
5976 putc ('\n', file);
5977 }
5978
5979 #ifdef USING_ELFOS_H
5980 rs6000_machine = rs6000_machine_from_flags ();
5981 emit_asm_machine ();
5982 #endif
5983
5984 if (DEFAULT_ABI == ABI_ELFv2)
5985 fprintf (file, "\t.abiversion 2\n");
5986 }
5987
5988 \f
5989 /* Return nonzero if this function is known to have a null epilogue. */
5990
5991 int
5992 direct_return (void)
5993 {
5994 if (reload_completed)
5995 {
5996 rs6000_stack_t *info = rs6000_stack_info ();
5997
5998 if (info->first_gp_reg_save == 32
5999 && info->first_fp_reg_save == 64
6000 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6001 && ! info->lr_save_p
6002 && ! info->cr_save_p
6003 && info->vrsave_size == 0
6004 && ! info->push_p)
6005 return 1;
6006 }
6007
6008 return 0;
6009 }
6010
6011 /* Helper for num_insns_constant. Calculate number of instructions to
6012 load VALUE to a single gpr using combinations of addi, addis, ori,
6013 oris, sldi and rldimi instructions. */
6014
6015 static int
6016 num_insns_constant_gpr (HOST_WIDE_INT value)
6017 {
6018 /* signed constant loadable with addi */
6019 if (SIGNED_INTEGER_16BIT_P (value))
6020 return 1;
6021
6022 /* constant loadable with addis */
6023 else if ((value & 0xffff) == 0
6024 && (value >> 31 == -1 || value >> 31 == 0))
6025 return 1;
6026
6027 /* PADDI can support up to 34 bit signed integers. */
6028 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
6029 return 1;
6030
6031 else if (TARGET_POWERPC64)
6032 {
6033 int num_insns = 0;
6034 rs6000_emit_set_long_const (nullptr, value, &num_insns);
6035 return num_insns;
6036 }
6037
6038 else
6039 return 2;
6040 }
6041
6042 /* Helper for num_insns_constant. Allow constants formed by the
6043 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
6044 and handle modes that require multiple gprs. */
6045
6046 static int
6047 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
6048 {
6049 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6050 int total = 0;
6051 while (nregs-- > 0)
6052 {
6053 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
6054 int insns = num_insns_constant_gpr (low);
6055 if (insns > 2
6056 /* We won't get more than 2 from num_insns_constant_gpr
6057 except when TARGET_POWERPC64 and mode is DImode or
6058 wider, so the register mode must be DImode. */
6059 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
6060 insns = 2;
6061 total += insns;
6062 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6063 it all at once would be UB. */
6064 value >>= (BITS_PER_WORD - 1);
6065 value >>= 1;
6066 }
6067 return total;
6068 }
6069
6070 /* Return the number of instructions it takes to form a constant in as
6071 many gprs are needed for MODE. */
6072
6073 int
6074 num_insns_constant (rtx op, machine_mode mode)
6075 {
6076 HOST_WIDE_INT val;
6077
6078 switch (GET_CODE (op))
6079 {
6080 case CONST_INT:
6081 val = INTVAL (op);
6082 break;
6083
6084 case CONST_WIDE_INT:
6085 {
6086 int insns = 0;
6087 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6088 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
6089 DImode);
6090 return insns;
6091 }
6092
6093 case CONST_DOUBLE:
6094 {
6095 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
6096
6097 if (mode == SFmode || mode == SDmode)
6098 {
6099 long l;
6100
6101 if (mode == SDmode)
6102 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
6103 else
6104 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
6105 /* See the first define_split in rs6000.md handling a
6106 const_double_operand. */
6107 val = l;
6108 mode = SImode;
6109 }
6110 else if (mode == DFmode || mode == DDmode)
6111 {
6112 long l[2];
6113
6114 if (mode == DDmode)
6115 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
6116 else
6117 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
6118
6119 /* See the second (32-bit) and third (64-bit) define_split
6120 in rs6000.md handling a const_double_operand. */
6121 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
6122 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
6123 mode = DImode;
6124 }
6125 else if (mode == TFmode || mode == TDmode
6126 || mode == KFmode || mode == IFmode)
6127 {
6128 long l[4];
6129 int insns;
6130
6131 if (mode == TDmode)
6132 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
6133 else
6134 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
6135
6136 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
6137 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
6138 insns = num_insns_constant_multi (val, DImode);
6139 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
6140 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
6141 insns += num_insns_constant_multi (val, DImode);
6142 return insns;
6143 }
6144 else
6145 gcc_unreachable ();
6146 }
6147 break;
6148
6149 default:
6150 gcc_unreachable ();
6151 }
6152
6153 return num_insns_constant_multi (val, mode);
6154 }
6155
6156 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6157 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6158 corresponding element of the vector, but for V4SFmode, the
6159 corresponding "float" is interpreted as an SImode integer. */
6160
6161 HOST_WIDE_INT
6162 const_vector_elt_as_int (rtx op, unsigned int elt)
6163 {
6164 rtx tmp;
6165
6166 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6167 gcc_assert (GET_MODE (op) != V2DImode
6168 && GET_MODE (op) != V2DFmode);
6169
6170 tmp = CONST_VECTOR_ELT (op, elt);
6171 if (GET_MODE (op) == V4SFmode)
6172 tmp = gen_lowpart (SImode, tmp);
6173 return INTVAL (tmp);
6174 }
6175
6176 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6177 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6178 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6179 all items are set to the same value and contain COPIES replicas of the
6180 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6181 operand and the others are set to the value of the operand's msb. */
6182
6183 static bool
6184 vspltis_constant (rtx op, unsigned step, unsigned copies)
6185 {
6186 machine_mode mode = GET_MODE (op);
6187 machine_mode inner = GET_MODE_INNER (mode);
6188
6189 unsigned i;
6190 unsigned nunits;
6191 unsigned bitsize;
6192 unsigned mask;
6193
6194 HOST_WIDE_INT val;
6195 HOST_WIDE_INT splat_val;
6196 HOST_WIDE_INT msb_val;
6197
6198 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6199 return false;
6200
6201 nunits = GET_MODE_NUNITS (mode);
6202 bitsize = GET_MODE_BITSIZE (inner);
6203 mask = GET_MODE_MASK (inner);
6204
6205 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6206 splat_val = val;
6207 msb_val = val >= 0 ? 0 : -1;
6208
6209 if (val == 0 && step > 1)
6210 {
6211 /* Special case for loading most significant bit with step > 1.
6212 In that case, match 0s in all but step-1s elements, where match
6213 EASY_VECTOR_MSB. */
6214 for (i = 1; i < nunits; ++i)
6215 {
6216 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6217 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6218 if ((i & (step - 1)) == step - 1)
6219 {
6220 if (!EASY_VECTOR_MSB (elt_val, inner))
6221 break;
6222 }
6223 else if (elt_val)
6224 break;
6225 }
6226 if (i == nunits)
6227 return true;
6228 }
6229
6230 /* Construct the value to be splatted, if possible. If not, return 0. */
6231 for (i = 2; i <= copies; i *= 2)
6232 {
6233 HOST_WIDE_INT small_val;
6234 bitsize /= 2;
6235 small_val = splat_val >> bitsize;
6236 mask >>= bitsize;
6237 if (splat_val != ((HOST_WIDE_INT)
6238 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6239 | (small_val & mask)))
6240 return false;
6241 splat_val = small_val;
6242 inner = smallest_int_mode_for_size (bitsize);
6243 }
6244
6245 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6246 if (EASY_VECTOR_15 (splat_val))
6247 ;
6248
6249 /* Also check if we can splat, and then add the result to itself. Do so if
6250 the value is positive, of if the splat instruction is using OP's mode;
6251 for splat_val < 0, the splat and the add should use the same mode. */
6252 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6253 && (splat_val >= 0 || (step == 1 && copies == 1)))
6254 ;
6255
6256 /* Also check if are loading up the most significant bit which can be done by
6257 loading up -1 and shifting the value left by -1. Only do this for
6258 step 1 here, for larger steps it is done earlier. */
6259 else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
6260 ;
6261
6262 else
6263 return false;
6264
6265 /* Check if VAL is present in every STEP-th element, and the
6266 other elements are filled with its most significant bit. */
6267 for (i = 1; i < nunits; ++i)
6268 {
6269 HOST_WIDE_INT desired_val;
6270 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6271 if ((i & (step - 1)) == 0)
6272 desired_val = val;
6273 else
6274 desired_val = msb_val;
6275
6276 if (desired_val != const_vector_elt_as_int (op, elt))
6277 return false;
6278 }
6279
6280 return true;
6281 }
6282
6283 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6284 instruction, filling in the bottom elements with 0 or -1.
6285
6286 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6287 for the number of zeroes to shift in, or negative for the number of 0xff
6288 bytes to shift in.
6289
6290 OP is a CONST_VECTOR. */
6291
6292 int
6293 vspltis_shifted (rtx op)
6294 {
6295 machine_mode mode = GET_MODE (op);
6296 machine_mode inner = GET_MODE_INNER (mode);
6297
6298 unsigned i, j;
6299 unsigned nunits;
6300 unsigned mask;
6301
6302 HOST_WIDE_INT val;
6303
6304 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6305 return false;
6306
6307 /* We need to create pseudo registers to do the shift, so don't recognize
6308 shift vector constants after reload. Don't match it even before RA
6309 after split1 is done, because there won't be further splitting pass
6310 before RA to do the splitting. */
6311 if (!can_create_pseudo_p ()
6312 || (cfun->curr_properties & PROP_rtl_split_insns))
6313 return false;
6314
6315 nunits = GET_MODE_NUNITS (mode);
6316 mask = GET_MODE_MASK (inner);
6317
6318 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6319
6320 /* Check if the value can really be the operand of a vspltis[bhw]. */
6321 if (EASY_VECTOR_15 (val))
6322 ;
6323
6324 /* Also check if we are loading up the most significant bit which can be done
6325 by loading up -1 and shifting the value left by -1. */
6326 else if (EASY_VECTOR_MSB (val, inner))
6327 ;
6328
6329 else
6330 return 0;
6331
6332 /* Check if VAL is present in every STEP-th element until we find elements
6333 that are 0 or all 1 bits. */
6334 for (i = 1; i < nunits; ++i)
6335 {
6336 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6337 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6338
6339 /* If the value isn't the splat value, check for the remaining elements
6340 being 0/-1. */
6341 if (val != elt_val)
6342 {
6343 if (elt_val == 0)
6344 {
6345 for (j = i+1; j < nunits; ++j)
6346 {
6347 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6348 if (const_vector_elt_as_int (op, elt2) != 0)
6349 return 0;
6350 }
6351
6352 return (nunits - i) * GET_MODE_SIZE (inner);
6353 }
6354
6355 else if ((elt_val & mask) == mask)
6356 {
6357 for (j = i+1; j < nunits; ++j)
6358 {
6359 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6360 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6361 return 0;
6362 }
6363
6364 return -((nunits - i) * GET_MODE_SIZE (inner));
6365 }
6366
6367 else
6368 return 0;
6369 }
6370 }
6371
6372 /* If all elements are equal, we don't need to do VSLDOI. */
6373 return 0;
6374 }
6375
6376
6377 /* Return non-zero (element mode byte size) if OP is of the given MODE
6378 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6379
6380 int
6381 easy_altivec_constant (rtx op, machine_mode mode)
6382 {
6383 unsigned step, copies;
6384
6385 if (mode == VOIDmode)
6386 mode = GET_MODE (op);
6387 else if (mode != GET_MODE (op))
6388 return 0;
6389
6390 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6391 constants. */
6392 if (mode == V2DFmode)
6393 return zero_constant (op, mode) ? 8 : 0;
6394
6395 else if (mode == V2DImode)
6396 {
6397 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6398 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6399 return 0;
6400
6401 if (zero_constant (op, mode))
6402 return 8;
6403
6404 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6405 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6406 return 8;
6407
6408 return 0;
6409 }
6410
6411 /* V1TImode is a special container for TImode. Ignore for now. */
6412 else if (mode == V1TImode)
6413 return 0;
6414
6415 /* Start with a vspltisw. */
6416 step = GET_MODE_NUNITS (mode) / 4;
6417 copies = 1;
6418
6419 if (vspltis_constant (op, step, copies))
6420 return 4;
6421
6422 /* Then try with a vspltish. */
6423 if (step == 1)
6424 copies <<= 1;
6425 else
6426 step >>= 1;
6427
6428 if (vspltis_constant (op, step, copies))
6429 return 2;
6430
6431 /* And finally a vspltisb. */
6432 if (step == 1)
6433 copies <<= 1;
6434 else
6435 step >>= 1;
6436
6437 if (vspltis_constant (op, step, copies))
6438 return 1;
6439
6440 if (vspltis_shifted (op) != 0)
6441 return GET_MODE_SIZE (GET_MODE_INNER (mode));
6442
6443 return 0;
6444 }
6445
6446 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6447 result is OP. Abort if it is not possible. */
6448
6449 rtx
6450 gen_easy_altivec_constant (rtx op)
6451 {
6452 machine_mode mode = GET_MODE (op);
6453 int nunits = GET_MODE_NUNITS (mode);
6454 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6455 unsigned step = nunits / 4;
6456 unsigned copies = 1;
6457
6458 /* Start with a vspltisw. */
6459 if (vspltis_constant (op, step, copies))
6460 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6461
6462 /* Then try with a vspltish. */
6463 if (step == 1)
6464 copies <<= 1;
6465 else
6466 step >>= 1;
6467
6468 if (vspltis_constant (op, step, copies))
6469 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6470
6471 /* And finally a vspltisb. */
6472 if (step == 1)
6473 copies <<= 1;
6474 else
6475 step >>= 1;
6476
6477 if (vspltis_constant (op, step, copies))
6478 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6479
6480 gcc_unreachable ();
6481 }
6482
6483 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6484 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6485
6486 Return the number of instructions needed (1 or 2) into the address pointed
6487 via NUM_INSNS_PTR.
6488
6489 Return the constant that is being split via CONSTANT_PTR. */
6490
6491 bool
6492 xxspltib_constant_p (rtx op,
6493 machine_mode mode,
6494 int *num_insns_ptr,
6495 int *constant_ptr)
6496 {
6497 size_t nunits = GET_MODE_NUNITS (mode);
6498 size_t i;
6499 HOST_WIDE_INT value;
6500 rtx element;
6501
6502 /* Set the returned values to out of bound values. */
6503 *num_insns_ptr = -1;
6504 *constant_ptr = 256;
6505
6506 if (!TARGET_P9_VECTOR)
6507 return false;
6508
6509 if (mode == VOIDmode)
6510 mode = GET_MODE (op);
6511
6512 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6513 return false;
6514
6515 /* Handle (vec_duplicate <constant>). */
6516 if (GET_CODE (op) == VEC_DUPLICATE)
6517 {
6518 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6519 && mode != V2DImode)
6520 return false;
6521
6522 element = XEXP (op, 0);
6523 if (!CONST_INT_P (element))
6524 return false;
6525
6526 value = INTVAL (element);
6527 if (!IN_RANGE (value, -128, 127))
6528 return false;
6529 }
6530
6531 /* Handle (const_vector [...]). */
6532 else if (GET_CODE (op) == CONST_VECTOR)
6533 {
6534 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6535 && mode != V2DImode)
6536 return false;
6537
6538 element = CONST_VECTOR_ELT (op, 0);
6539 if (!CONST_INT_P (element))
6540 return false;
6541
6542 value = INTVAL (element);
6543 if (!IN_RANGE (value, -128, 127))
6544 return false;
6545
6546 for (i = 1; i < nunits; i++)
6547 {
6548 element = CONST_VECTOR_ELT (op, i);
6549 if (!CONST_INT_P (element))
6550 return false;
6551
6552 if (value != INTVAL (element))
6553 return false;
6554 }
6555 }
6556
6557 /* Handle integer constants being loaded into the upper part of the VSX
6558 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6559 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6560 else if (CONST_INT_P (op))
6561 {
6562 if (!SCALAR_INT_MODE_P (mode))
6563 return false;
6564
6565 value = INTVAL (op);
6566 if (!IN_RANGE (value, -128, 127))
6567 return false;
6568
6569 if (!IN_RANGE (value, -1, 0))
6570 {
6571 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6572 return false;
6573
6574 if (EASY_VECTOR_15 (value))
6575 return false;
6576 }
6577 }
6578
6579 else
6580 return false;
6581
6582 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6583 sign extend. Special case 0/-1 to allow getting any VSX register instead
6584 of an Altivec register. */
6585 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6586 && EASY_VECTOR_15 (value))
6587 return false;
6588
6589 /* Return # of instructions and the constant byte for XXSPLTIB. */
6590 if (mode == V16QImode)
6591 *num_insns_ptr = 1;
6592
6593 else if (IN_RANGE (value, -1, 0))
6594 *num_insns_ptr = 1;
6595
6596 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6597 single XXSPLTIW or XXSPLTIDP instruction. */
6598 else if (vsx_prefixed_constant (op, mode))
6599 return false;
6600
6601 /* Return XXSPLITB followed by a sign extend operation to convert the
6602 constant to V8HImode or V4SImode. */
6603 else
6604 *num_insns_ptr = 2;
6605
6606 *constant_ptr = (int) value;
6607 return true;
6608 }
6609
6610 /* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
6611 instructions vupkhsw and vspltisw.
6612
6613 Return the constant that is being split via CONSTANT_PTR. */
6614
6615 bool
6616 vspltisw_vupkhsw_constant_p (rtx op, machine_mode mode, int *constant_ptr)
6617 {
6618 HOST_WIDE_INT value;
6619 rtx elt;
6620
6621 if (!TARGET_P8_VECTOR)
6622 return false;
6623
6624 if (mode != V2DImode)
6625 return false;
6626
6627 if (!const_vec_duplicate_p (op, &elt))
6628 return false;
6629
6630 value = INTVAL (elt);
6631 if (value == 0 || value == 1
6632 || !EASY_VECTOR_15 (value))
6633 return false;
6634
6635 if (constant_ptr)
6636 *constant_ptr = (int) value;
6637 return true;
6638 }
6639
6640 const char *
6641 output_vec_const_move (rtx *operands)
6642 {
6643 int shift;
6644 machine_mode mode;
6645 rtx dest, vec;
6646
6647 dest = operands[0];
6648 vec = operands[1];
6649 mode = GET_MODE (dest);
6650
6651 if (TARGET_VSX)
6652 {
6653 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6654 int xxspltib_value = 256;
6655 int num_insns = -1;
6656
6657 if (zero_constant (vec, mode))
6658 {
6659 if (TARGET_P9_VECTOR)
6660 return "xxspltib %x0,0";
6661
6662 else if (dest_vmx_p)
6663 return "vspltisw %0,0";
6664
6665 else
6666 return "xxlxor %x0,%x0,%x0";
6667 }
6668
6669 if (all_ones_constant (vec, mode))
6670 {
6671 if (TARGET_P9_VECTOR)
6672 return "xxspltib %x0,255";
6673
6674 else if (dest_vmx_p)
6675 return "vspltisw %0,-1";
6676
6677 else if (TARGET_P8_VECTOR)
6678 return "xxlorc %x0,%x0,%x0";
6679
6680 else
6681 gcc_unreachable ();
6682 }
6683
6684 vec_const_128bit_type vsx_const;
6685 if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
6686 {
6687 unsigned imm = constant_generates_lxvkq (&vsx_const);
6688 if (imm)
6689 {
6690 operands[2] = GEN_INT (imm);
6691 return "lxvkq %x0,%2";
6692 }
6693
6694 imm = constant_generates_xxspltiw (&vsx_const);
6695 if (imm)
6696 {
6697 operands[2] = GEN_INT (imm);
6698 return "xxspltiw %x0,%2";
6699 }
6700
6701 imm = constant_generates_xxspltidp (&vsx_const);
6702 if (imm)
6703 {
6704 operands[2] = GEN_INT (imm);
6705 return "xxspltidp %x0,%2";
6706 }
6707 }
6708
6709 if (TARGET_P9_VECTOR
6710 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6711 {
6712 if (num_insns == 1)
6713 {
6714 operands[2] = GEN_INT (xxspltib_value & 0xff);
6715 return "xxspltib %x0,%2";
6716 }
6717
6718 return "#";
6719 }
6720 }
6721
6722 if (TARGET_ALTIVEC)
6723 {
6724 rtx splat_vec;
6725
6726 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6727 if (zero_constant (vec, mode))
6728 return "vspltisw %0,0";
6729
6730 if (all_ones_constant (vec, mode))
6731 return "vspltisw %0,-1";
6732
6733 /* Do we need to construct a value using VSLDOI? */
6734 shift = vspltis_shifted (vec);
6735 if (shift != 0)
6736 return "#";
6737
6738 splat_vec = gen_easy_altivec_constant (vec);
6739 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6740 operands[1] = XEXP (splat_vec, 0);
6741 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6742 return "#";
6743
6744 switch (GET_MODE (splat_vec))
6745 {
6746 case E_V4SImode:
6747 return "vspltisw %0,%1";
6748
6749 case E_V8HImode:
6750 return "vspltish %0,%1";
6751
6752 case E_V16QImode:
6753 return "vspltisb %0,%1";
6754
6755 default:
6756 gcc_unreachable ();
6757 }
6758 }
6759
6760 gcc_unreachable ();
6761 }
6762
6763 /* Initialize vector TARGET to VALS. */
6764
6765 void
6766 rs6000_expand_vector_init (rtx target, rtx vals)
6767 {
6768 machine_mode mode = GET_MODE (target);
6769 machine_mode inner_mode = GET_MODE_INNER (mode);
6770 unsigned int n_elts = GET_MODE_NUNITS (mode);
6771 int n_var = 0, one_var = -1;
6772 bool all_same = true, all_const_zero = true;
6773 rtx x, mem;
6774 unsigned int i;
6775
6776 for (i = 0; i < n_elts; ++i)
6777 {
6778 x = XVECEXP (vals, 0, i);
6779 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6780 ++n_var, one_var = i;
6781 else if (x != CONST0_RTX (inner_mode))
6782 all_const_zero = false;
6783
6784 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6785 all_same = false;
6786 }
6787
6788 if (n_var == 0)
6789 {
6790 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6791 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6792 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6793 {
6794 /* Zero register. */
6795 emit_move_insn (target, CONST0_RTX (mode));
6796 return;
6797 }
6798 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6799 {
6800 /* Splat immediate. */
6801 emit_insn (gen_rtx_SET (target, const_vec));
6802 return;
6803 }
6804 else
6805 {
6806 /* Load from constant pool. */
6807 emit_move_insn (target, const_vec);
6808 return;
6809 }
6810 }
6811
6812 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6813 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6814 {
6815 rtx op[2];
6816 size_t i;
6817 size_t num_elements = all_same ? 1 : 2;
6818 for (i = 0; i < num_elements; i++)
6819 {
6820 op[i] = XVECEXP (vals, 0, i);
6821 /* Just in case there is a SUBREG with a smaller mode, do a
6822 conversion. */
6823 if (GET_MODE (op[i]) != inner_mode)
6824 {
6825 rtx tmp = gen_reg_rtx (inner_mode);
6826 convert_move (tmp, op[i], 0);
6827 op[i] = tmp;
6828 }
6829 /* Allow load with splat double word. */
6830 else if (MEM_P (op[i]))
6831 {
6832 if (!all_same)
6833 op[i] = force_reg (inner_mode, op[i]);
6834 }
6835 else if (!REG_P (op[i]))
6836 op[i] = force_reg (inner_mode, op[i]);
6837 }
6838
6839 if (all_same)
6840 {
6841 if (mode == V2DFmode)
6842 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6843 else
6844 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6845 }
6846 else
6847 {
6848 if (mode == V2DFmode)
6849 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6850 else
6851 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6852 }
6853 return;
6854 }
6855
6856 /* Special case initializing vector int if we are on 64-bit systems with
6857 direct move or we have the ISA 3.0 instructions. */
6858 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6859 && TARGET_DIRECT_MOVE_64BIT)
6860 {
6861 if (all_same)
6862 {
6863 rtx element0 = XVECEXP (vals, 0, 0);
6864 if (MEM_P (element0))
6865 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6866 else
6867 element0 = force_reg (SImode, element0);
6868
6869 if (TARGET_P9_VECTOR)
6870 emit_insn (gen_vsx_splat_v4si (target, element0));
6871 else
6872 {
6873 rtx tmp = gen_reg_rtx (DImode);
6874 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6875 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6876 }
6877 return;
6878 }
6879 else
6880 {
6881 rtx elements[4];
6882 size_t i;
6883
6884 for (i = 0; i < 4; i++)
6885 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6886
6887 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6888 elements[2], elements[3]));
6889 return;
6890 }
6891 }
6892
6893 /* With single precision floating point on VSX, know that internally single
6894 precision is actually represented as a double, and either make 2 V2DF
6895 vectors, and convert these vectors to single precision, or do one
6896 conversion, and splat the result to the other elements. */
6897 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6898 {
6899 if (all_same)
6900 {
6901 rtx element0 = XVECEXP (vals, 0, 0);
6902
6903 if (TARGET_P9_VECTOR)
6904 {
6905 if (MEM_P (element0))
6906 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6907
6908 emit_insn (gen_vsx_splat_v4sf (target, element0));
6909 }
6910
6911 else
6912 {
6913 rtx freg = gen_reg_rtx (V4SFmode);
6914 rtx sreg = force_reg (SFmode, element0);
6915 rtx cvt = (TARGET_XSCVDPSPN
6916 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6917 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6918
6919 emit_insn (cvt);
6920 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6921 const0_rtx));
6922 }
6923 }
6924 else
6925 {
6926 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6927 {
6928 rtx tmp_sf[4];
6929 rtx tmp_si[4];
6930 rtx tmp_di[4];
6931 rtx mrg_di[4];
6932 for (i = 0; i < 4; i++)
6933 {
6934 tmp_si[i] = gen_reg_rtx (SImode);
6935 tmp_di[i] = gen_reg_rtx (DImode);
6936 mrg_di[i] = gen_reg_rtx (DImode);
6937 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6938 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6939 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6940 }
6941
6942 if (!BYTES_BIG_ENDIAN)
6943 {
6944 std::swap (tmp_di[0], tmp_di[1]);
6945 std::swap (tmp_di[2], tmp_di[3]);
6946 }
6947
6948 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6949 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6950 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6951 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6952
6953 rtx tmp_v2di = gen_reg_rtx (V2DImode);
6954 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6955 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6956 }
6957 else
6958 {
6959 rtx dbl_even = gen_reg_rtx (V2DFmode);
6960 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6961 rtx flt_even = gen_reg_rtx (V4SFmode);
6962 rtx flt_odd = gen_reg_rtx (V4SFmode);
6963 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6964 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6965 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6966 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6967
6968 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6969 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6970 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6971 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6972 rs6000_expand_extract_even (target, flt_even, flt_odd);
6973 }
6974 }
6975 return;
6976 }
6977
6978 /* Special case initializing vector short/char that are splats if we are on
6979 64-bit systems with direct move. */
6980 if (all_same && TARGET_DIRECT_MOVE_64BIT
6981 && (mode == V16QImode || mode == V8HImode))
6982 {
6983 rtx op0 = XVECEXP (vals, 0, 0);
6984 rtx di_tmp = gen_reg_rtx (DImode);
6985
6986 if (!REG_P (op0))
6987 op0 = force_reg (GET_MODE_INNER (mode), op0);
6988
6989 if (mode == V16QImode)
6990 {
6991 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6992 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6993 return;
6994 }
6995
6996 if (mode == V8HImode)
6997 {
6998 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6999 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7000 return;
7001 }
7002 }
7003
7004 /* Store value to stack temp. Load vector element. Splat. However, splat
7005 of 64-bit items is not supported on Altivec. */
7006 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7007 {
7008 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7009 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7010 XVECEXP (vals, 0, 0));
7011 x = gen_rtx_UNSPEC (VOIDmode,
7012 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7013 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7014 gen_rtvec (2,
7015 gen_rtx_SET (target, mem),
7016 x)));
7017 x = gen_rtx_VEC_SELECT (inner_mode, target,
7018 gen_rtx_PARALLEL (VOIDmode,
7019 gen_rtvec (1, const0_rtx)));
7020 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7021 return;
7022 }
7023
7024 /* One field is non-constant. Load constant then overwrite
7025 varying field. */
7026 if (n_var == 1)
7027 {
7028 rtx copy = copy_rtx (vals);
7029
7030 /* Load constant part of vector, substitute neighboring value for
7031 varying element. */
7032 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7033 rs6000_expand_vector_init (target, copy);
7034
7035 /* Insert variable. */
7036 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
7037 GEN_INT (one_var));
7038 return;
7039 }
7040
7041 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
7042 {
7043 rtx op[16];
7044 /* Force the values into word_mode registers. */
7045 for (i = 0; i < n_elts; i++)
7046 {
7047 rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
7048 machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
7049 op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
7050 }
7051
7052 /* Take unsigned char big endianness on 64bit as example for below
7053 construction, the input values are: A, B, C, D, ..., O, P. */
7054
7055 if (TARGET_DIRECT_MOVE_128)
7056 {
7057 /* Move to VSX register with vec_concat, each has 2 values.
7058 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
7059 vr1[1] = { xxxxxxxC, xxxxxxxD };
7060 ...
7061 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
7062 rtx vr1[8];
7063 for (i = 0; i < n_elts / 2; i++)
7064 {
7065 vr1[i] = gen_reg_rtx (V2DImode);
7066 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
7067 op[i * 2 + 1]));
7068 }
7069
7070 /* Pack vectors with 2 values into vectors with 4 values.
7071 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
7072 vr2[1] = { xxxExxxF, xxxGxxxH };
7073 vr2[1] = { xxxIxxxJ, xxxKxxxL };
7074 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
7075 rtx vr2[4];
7076 for (i = 0; i < n_elts / 4; i++)
7077 {
7078 vr2[i] = gen_reg_rtx (V4SImode);
7079 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
7080 vr1[i * 2 + 1]));
7081 }
7082
7083 /* Pack vectors with 4 values into vectors with 8 values.
7084 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7085 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7086 rtx vr3[2];
7087 for (i = 0; i < n_elts / 8; i++)
7088 {
7089 vr3[i] = gen_reg_rtx (V8HImode);
7090 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
7091 vr2[i * 2 + 1]));
7092 }
7093
7094 /* If it's V8HImode, it's done and return it. */
7095 if (mode == V8HImode)
7096 {
7097 emit_insn (gen_rtx_SET (target, vr3[0]));
7098 return;
7099 }
7100
7101 /* Pack vectors with 8 values into 16 values. */
7102 rtx res = gen_reg_rtx (V16QImode);
7103 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
7104 emit_insn (gen_rtx_SET (target, res));
7105 }
7106 else
7107 {
7108 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
7109 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
7110 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
7111 rtx perm_idx;
7112
7113 /* Set up some common gen routines and values. */
7114 if (BYTES_BIG_ENDIAN)
7115 {
7116 if (mode == V16QImode)
7117 {
7118 merge_v16qi = gen_altivec_vmrghb;
7119 merge_v8hi = gen_altivec_vmrglh;
7120 }
7121 else
7122 merge_v8hi = gen_altivec_vmrghh;
7123
7124 merge_v4si = gen_altivec_vmrglw;
7125 perm_idx = GEN_INT (3);
7126 }
7127 else
7128 {
7129 if (mode == V16QImode)
7130 {
7131 merge_v16qi = gen_altivec_vmrglb;
7132 merge_v8hi = gen_altivec_vmrghh;
7133 }
7134 else
7135 merge_v8hi = gen_altivec_vmrglh;
7136
7137 merge_v4si = gen_altivec_vmrghw;
7138 perm_idx = GEN_INT (0);
7139 }
7140
7141 /* Move to VSX register with direct move.
7142 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7143 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7144 ...
7145 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7146 rtx vr_qi[16];
7147 for (i = 0; i < n_elts; i++)
7148 {
7149 vr_qi[i] = gen_reg_rtx (V16QImode);
7150 if (TARGET_POWERPC64)
7151 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
7152 else
7153 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
7154 }
7155
7156 /* Merge/move to vector short.
7157 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7158 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7159 ...
7160 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7161 rtx vr_hi[8];
7162 for (i = 0; i < 8; i++)
7163 {
7164 rtx tmp = vr_qi[i];
7165 if (mode == V16QImode)
7166 {
7167 tmp = gen_reg_rtx (V16QImode);
7168 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
7169 }
7170 vr_hi[i] = gen_reg_rtx (V8HImode);
7171 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
7172 }
7173
7174 /* Merge vector short to vector int.
7175 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7176 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7177 ...
7178 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7179 rtx vr_si[4];
7180 for (i = 0; i < 4; i++)
7181 {
7182 rtx tmp = gen_reg_rtx (V8HImode);
7183 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
7184 vr_si[i] = gen_reg_rtx (V4SImode);
7185 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
7186 }
7187
7188 /* Merge vector int to vector long.
7189 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7190 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7191 rtx vr_di[2];
7192 for (i = 0; i < 2; i++)
7193 {
7194 rtx tmp = gen_reg_rtx (V4SImode);
7195 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
7196 vr_di[i] = gen_reg_rtx (V2DImode);
7197 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
7198 }
7199
7200 rtx res = gen_reg_rtx (V2DImode);
7201 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
7202 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
7203 }
7204
7205 return;
7206 }
7207
7208 /* Construct the vector in memory one field at a time
7209 and load the whole vector. */
7210 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7211 for (i = 0; i < n_elts; i++)
7212 emit_move_insn (adjust_address_nv (mem, inner_mode,
7213 i * GET_MODE_SIZE (inner_mode)),
7214 XVECEXP (vals, 0, i));
7215 emit_move_insn (target, mem);
7216 }
7217
7218 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7219 is variable and also counts by vector element size for p9 and above. */
7220
7221 static void
7222 rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
7223 {
7224 machine_mode mode = GET_MODE (target);
7225
7226 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7227
7228 machine_mode inner_mode = GET_MODE (val);
7229
7230 int width = GET_MODE_SIZE (inner_mode);
7231
7232 gcc_assert (width >= 1 && width <= 8);
7233
7234 int shift = exact_log2 (width);
7235
7236 machine_mode idx_mode = GET_MODE (idx);
7237
7238 machine_mode shift_mode;
7239 /* Gen function pointers for shifting left and generation of permutation
7240 control vectors. */
7241 rtx (*gen_ashl) (rtx, rtx, rtx);
7242 rtx (*gen_pcvr1) (rtx, rtx);
7243 rtx (*gen_pcvr2) (rtx, rtx);
7244
7245 if (TARGET_POWERPC64)
7246 {
7247 shift_mode = DImode;
7248 gen_ashl = gen_ashldi3;
7249 gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_di
7250 : gen_altivec_lvsr_reg_di;
7251 gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_di
7252 : gen_altivec_lvsl_reg_di;
7253 }
7254 else
7255 {
7256 shift_mode = SImode;
7257 gen_ashl = gen_ashlsi3;
7258 gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_si
7259 : gen_altivec_lvsr_reg_si;
7260 gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_si
7261 : gen_altivec_lvsl_reg_si;
7262 }
7263 /* Generate the IDX for permute shift, width is the vector element size.
7264 idx = idx * width. */
7265 rtx tmp = gen_reg_rtx (shift_mode);
7266 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7267
7268 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7269
7270 /* Generate one permutation control vector used for rotating the element
7271 at to-insert position to element zero in target vector. lvsl is
7272 used for big endianness while lvsr is used for little endianness:
7273 lvs[lr] v1,0,idx. */
7274 rtx pcvr1 = gen_reg_rtx (V16QImode);
7275 emit_insn (gen_pcvr1 (pcvr1, tmp));
7276
7277 rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
7278 rtx perm1 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7279 pcvr1);
7280 emit_insn (perm1);
7281
7282 /* Insert val into element 0 of target vector. */
7283 rs6000_expand_vector_set (target, val, const0_rtx);
7284
7285 /* Rotate back with a reversed permutation control vector generated from:
7286 lvs[rl] v2,0,idx. */
7287 rtx pcvr2 = gen_reg_rtx (V16QImode);
7288 emit_insn (gen_pcvr2 (pcvr2, tmp));
7289
7290 rtx perm2 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7291 pcvr2);
7292 emit_insn (perm2);
7293 }
7294
7295 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7296 is variable and also counts by vector element size for p7 & p8. */
7297
7298 static void
7299 rs6000_expand_vector_set_var_p7 (rtx target, rtx val, rtx idx)
7300 {
7301 machine_mode mode = GET_MODE (target);
7302
7303 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7304
7305 machine_mode inner_mode = GET_MODE (val);
7306 HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
7307
7308 int width = GET_MODE_SIZE (inner_mode);
7309 gcc_assert (width >= 1 && width <= 4);
7310
7311 int shift = exact_log2 (width);
7312
7313 machine_mode idx_mode = GET_MODE (idx);
7314
7315 machine_mode shift_mode;
7316 rtx (*gen_ashl)(rtx, rtx, rtx);
7317 rtx (*gen_add)(rtx, rtx, rtx);
7318 rtx (*gen_sub)(rtx, rtx, rtx);
7319 rtx (*gen_lvsl)(rtx, rtx);
7320
7321 if (TARGET_POWERPC64)
7322 {
7323 shift_mode = DImode;
7324 gen_ashl = gen_ashldi3;
7325 gen_add = gen_adddi3;
7326 gen_sub = gen_subdi3;
7327 gen_lvsl = gen_altivec_lvsl_reg_di;
7328 }
7329 else
7330 {
7331 shift_mode = SImode;
7332 gen_ashl = gen_ashlsi3;
7333 gen_add = gen_addsi3;
7334 gen_sub = gen_subsi3;
7335 gen_lvsl = gen_altivec_lvsl_reg_si;
7336 }
7337
7338 /* idx = idx * width. */
7339 rtx tmp = gen_reg_rtx (shift_mode);
7340 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7341
7342 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7343
7344 /* For LE: idx = idx + 8. */
7345 if (!BYTES_BIG_ENDIAN)
7346 emit_insn (gen_add (tmp, tmp, GEN_INT (8)));
7347 else
7348 emit_insn (gen_sub (tmp, GEN_INT (24 - width), tmp));
7349
7350 /* lxv vs33, mask.
7351 DImode: 0xffffffffffffffff0000000000000000
7352 SImode: 0x00000000ffffffff0000000000000000
7353 HImode: 0x000000000000ffff0000000000000000.
7354 QImode: 0x00000000000000ff0000000000000000. */
7355 rtx mask = gen_reg_rtx (V16QImode);
7356 rtx mask_v2di = gen_reg_rtx (V2DImode);
7357 rtvec v = rtvec_alloc (2);
7358 if (!BYTES_BIG_ENDIAN)
7359 {
7360 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
7361 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
7362 }
7363 else
7364 {
7365 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
7366 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
7367 }
7368 emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
7369 rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
7370 emit_insn (gen_rtx_SET (mask, sub_mask));
7371
7372 /* mtvsrd[wz] f0,tmp_val. */
7373 rtx tmp_val = gen_reg_rtx (SImode);
7374 if (inner_mode == E_SFmode)
7375 if (TARGET_DIRECT_MOVE_64BIT)
7376 emit_insn (gen_movsi_from_sf (tmp_val, val));
7377 else
7378 {
7379 rtx stack = rs6000_allocate_stack_temp (SFmode, false, true);
7380 emit_insn (gen_movsf_hardfloat (stack, val));
7381 rtx stack2 = copy_rtx (stack);
7382 PUT_MODE (stack2, SImode);
7383 emit_move_insn (tmp_val, stack2);
7384 }
7385 else
7386 tmp_val = force_reg (SImode, val);
7387
7388 rtx val_v16qi = gen_reg_rtx (V16QImode);
7389 rtx val_v2di = gen_reg_rtx (V2DImode);
7390 rtvec vec_val = rtvec_alloc (2);
7391 if (!BYTES_BIG_ENDIAN)
7392 {
7393 RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
7394 RTVEC_ELT (vec_val, 1) = tmp_val;
7395 }
7396 else
7397 {
7398 RTVEC_ELT (vec_val, 0) = tmp_val;
7399 RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
7400 }
7401 emit_insn (
7402 gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
7403 rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
7404 emit_insn (gen_rtx_SET (val_v16qi, sub_val));
7405
7406 /* lvsl 13,0,idx. */
7407 rtx pcv = gen_reg_rtx (V16QImode);
7408 emit_insn (gen_lvsl (pcv, tmp));
7409
7410 /* vperm 1,1,1,13. */
7411 /* vperm 0,0,0,13. */
7412 rtx val_perm = gen_reg_rtx (V16QImode);
7413 rtx mask_perm = gen_reg_rtx (V16QImode);
7414 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
7415 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
7416
7417 rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
7418
7419 /* xxsel 34,34,32,33. */
7420 emit_insn (
7421 gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
7422 }
7423
7424 /* Set field ELT_RTX of TARGET to VAL. */
7425
7426 void
7427 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
7428 {
7429 machine_mode mode = GET_MODE (target);
7430 machine_mode inner_mode = GET_MODE_INNER (mode);
7431 rtx reg = gen_reg_rtx (mode);
7432 rtx mask, mem, x;
7433 int width = GET_MODE_SIZE (inner_mode);
7434 int i;
7435
7436 val = force_reg (GET_MODE (val), val);
7437
7438 if (VECTOR_MEM_VSX_P (mode))
7439 {
7440 if (!CONST_INT_P (elt_rtx))
7441 {
7442 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7443 when elt_rtx is variable. */
7444 if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
7445 {
7446 rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
7447 return;
7448 }
7449 else if (TARGET_VSX)
7450 {
7451 rs6000_expand_vector_set_var_p7 (target, val, elt_rtx);
7452 return;
7453 }
7454 else
7455 gcc_assert (CONST_INT_P (elt_rtx));
7456 }
7457
7458 rtx insn = NULL_RTX;
7459
7460 if (mode == V2DFmode)
7461 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7462
7463 else if (mode == V2DImode)
7464 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7465
7466 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7467 {
7468 if (mode == V4SImode)
7469 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7470 else if (mode == V8HImode)
7471 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7472 else if (mode == V16QImode)
7473 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7474 else if (mode == V4SFmode)
7475 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7476 }
7477
7478 if (insn)
7479 {
7480 emit_insn (insn);
7481 return;
7482 }
7483 }
7484
7485 /* Simplify setting single element vectors like V1TImode. */
7486 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7487 && INTVAL (elt_rtx) == 0)
7488 {
7489 emit_move_insn (target, gen_lowpart (mode, val));
7490 return;
7491 }
7492
7493 /* Load single variable value. */
7494 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7495 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7496 x = gen_rtx_UNSPEC (VOIDmode,
7497 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7498 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7499 gen_rtvec (2,
7500 gen_rtx_SET (reg, mem),
7501 x)));
7502
7503 /* Linear sequence. */
7504 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7505 for (i = 0; i < 16; ++i)
7506 XVECEXP (mask, 0, i) = GEN_INT (i);
7507
7508 /* Set permute mask to insert element into target. */
7509 for (i = 0; i < width; ++i)
7510 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7511 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7512
7513 if (BYTES_BIG_ENDIAN)
7514 x = gen_rtx_UNSPEC (mode,
7515 gen_rtvec (3, target, reg,
7516 force_reg (V16QImode, x)),
7517 UNSPEC_VPERM);
7518 else
7519 {
7520 if (TARGET_P9_VECTOR)
7521 x = gen_rtx_UNSPEC (mode,
7522 gen_rtvec (3, reg, target,
7523 force_reg (V16QImode, x)),
7524 UNSPEC_VPERMR);
7525 else
7526 {
7527 /* Invert selector. We prefer to generate VNAND on P8 so
7528 that future fusion opportunities can kick in, but must
7529 generate VNOR elsewhere. */
7530 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7531 rtx iorx = (TARGET_P8_VECTOR
7532 ? gen_rtx_IOR (V16QImode, notx, notx)
7533 : gen_rtx_AND (V16QImode, notx, notx));
7534 rtx tmp = gen_reg_rtx (V16QImode);
7535 emit_insn (gen_rtx_SET (tmp, iorx));
7536
7537 /* Permute with operands reversed and adjusted selector. */
7538 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7539 UNSPEC_VPERM);
7540 }
7541 }
7542
7543 emit_insn (gen_rtx_SET (target, x));
7544 }
7545
7546 /* Extract field ELT from VEC into TARGET. */
7547
7548 void
7549 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7550 {
7551 machine_mode mode = GET_MODE (vec);
7552 machine_mode inner_mode = GET_MODE_INNER (mode);
7553 rtx mem;
7554
7555 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7556 {
7557 switch (mode)
7558 {
7559 default:
7560 break;
7561 case E_V1TImode:
7562 emit_move_insn (target, gen_lowpart (TImode, vec));
7563 break;
7564 case E_V2DFmode:
7565 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7566 return;
7567 case E_V2DImode:
7568 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7569 return;
7570 case E_V4SFmode:
7571 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7572 return;
7573 case E_V16QImode:
7574 if (TARGET_DIRECT_MOVE_64BIT)
7575 {
7576 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7577 return;
7578 }
7579 else
7580 break;
7581 case E_V8HImode:
7582 if (TARGET_DIRECT_MOVE_64BIT)
7583 {
7584 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7585 return;
7586 }
7587 else
7588 break;
7589 case E_V4SImode:
7590 if (TARGET_DIRECT_MOVE_64BIT)
7591 {
7592 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7593 return;
7594 }
7595 break;
7596 }
7597 }
7598 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7599 && TARGET_DIRECT_MOVE_64BIT)
7600 {
7601 if (GET_MODE (elt) != DImode)
7602 {
7603 rtx tmp = gen_reg_rtx (DImode);
7604 convert_move (tmp, elt, 0);
7605 elt = tmp;
7606 }
7607 else if (!REG_P (elt))
7608 elt = force_reg (DImode, elt);
7609
7610 switch (mode)
7611 {
7612 case E_V1TImode:
7613 emit_move_insn (target, gen_lowpart (TImode, vec));
7614 return;
7615
7616 case E_V2DFmode:
7617 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7618 return;
7619
7620 case E_V2DImode:
7621 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7622 return;
7623
7624 case E_V4SFmode:
7625 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7626 return;
7627
7628 case E_V4SImode:
7629 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7630 return;
7631
7632 case E_V8HImode:
7633 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7634 return;
7635
7636 case E_V16QImode:
7637 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7638 return;
7639
7640 default:
7641 gcc_unreachable ();
7642 }
7643 }
7644
7645 /* Allocate mode-sized buffer. */
7646 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7647
7648 emit_move_insn (mem, vec);
7649 if (CONST_INT_P (elt))
7650 {
7651 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7652
7653 /* Add offset to field within buffer matching vector element. */
7654 mem = adjust_address_nv (mem, inner_mode,
7655 modulo_elt * GET_MODE_SIZE (inner_mode));
7656 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7657 }
7658 else
7659 {
7660 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7661 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7662
7663 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7664 if (ele_size > 1)
7665 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7666 rtx new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7667 new_addr = change_address (mem, inner_mode, new_addr);
7668 emit_move_insn (target, new_addr);
7669 }
7670 }
7671
7672 /* Return the offset within a memory object (MEM) of a vector type to a given
7673 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7674 the element is constant, we return a constant integer.
7675
7676 Otherwise, we use a base register temporary to calculate the offset after
7677 masking it to fit within the bounds of the vector and scaling it. The
7678 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7679 built-in function. */
7680
7681 static rtx
7682 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7683 {
7684 if (CONST_INT_P (element))
7685 return GEN_INT (INTVAL (element) * scalar_size);
7686
7687 /* All insns should use the 'Q' constraint (address is a single register) if
7688 the element number is not a constant. */
7689 gcc_assert (satisfies_constraint_Q (mem));
7690
7691 /* Mask the element to make sure the element number is between 0 and the
7692 maximum number of elements - 1 so that we don't generate an address
7693 outside the vector. */
7694 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7695 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7696 emit_insn (gen_rtx_SET (base_tmp, and_op));
7697
7698 /* Shift the element to get the byte offset from the element number. */
7699 int shift = exact_log2 (scalar_size);
7700 gcc_assert (shift >= 0);
7701
7702 if (shift > 0)
7703 {
7704 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7705 emit_insn (gen_rtx_SET (base_tmp, shift_op));
7706 }
7707
7708 return base_tmp;
7709 }
7710
7711 /* Helper function update PC-relative addresses when we are adjusting a memory
7712 address (ADDR) to a vector to point to a scalar field within the vector with
7713 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7714 use the base register temporary (BASE_TMP) to form the address. */
7715
7716 static rtx
7717 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7718 {
7719 rtx new_addr = NULL;
7720
7721 gcc_assert (CONST_INT_P (element_offset));
7722
7723 if (GET_CODE (addr) == CONST)
7724 addr = XEXP (addr, 0);
7725
7726 if (GET_CODE (addr) == PLUS)
7727 {
7728 rtx op0 = XEXP (addr, 0);
7729 rtx op1 = XEXP (addr, 1);
7730
7731 if (CONST_INT_P (op1))
7732 {
7733 HOST_WIDE_INT offset
7734 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7735
7736 if (offset == 0)
7737 new_addr = op0;
7738
7739 else
7740 {
7741 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7742 new_addr = gen_rtx_CONST (Pmode, plus);
7743 }
7744 }
7745
7746 else
7747 {
7748 emit_move_insn (base_tmp, addr);
7749 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7750 }
7751 }
7752
7753 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7754 {
7755 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7756 new_addr = gen_rtx_CONST (Pmode, plus);
7757 }
7758
7759 else
7760 gcc_unreachable ();
7761
7762 return new_addr;
7763 }
7764
7765 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7766 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7767 temporary (BASE_TMP) to fixup the address. Return the new memory address
7768 that is valid for reads or writes to a given register (SCALAR_REG).
7769
7770 This function is expected to be called after reload is completed when we are
7771 splitting insns. The temporary BASE_TMP might be set multiple times with
7772 this code. */
7773
7774 rtx
7775 rs6000_adjust_vec_address (rtx scalar_reg,
7776 rtx mem,
7777 rtx element,
7778 rtx base_tmp,
7779 machine_mode scalar_mode)
7780 {
7781 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7782 rtx addr = XEXP (mem, 0);
7783 rtx new_addr;
7784
7785 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7786 gcc_assert (!reg_mentioned_p (base_tmp, element));
7787
7788 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7789 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7790
7791 /* Calculate what we need to add to the address to get the element
7792 address. */
7793 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7794
7795 /* Create the new address pointing to the element within the vector. If we
7796 are adding 0, we don't have to change the address. */
7797 if (element_offset == const0_rtx)
7798 new_addr = addr;
7799
7800 /* A simple indirect address can be converted into a reg + offset
7801 address. */
7802 else if (REG_P (addr) || SUBREG_P (addr))
7803 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7804
7805 /* For references to local static variables, fold a constant offset into the
7806 address. */
7807 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7808 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7809
7810 /* Optimize D-FORM addresses with constant offset with a constant element, to
7811 include the element offset in the address directly. */
7812 else if (GET_CODE (addr) == PLUS)
7813 {
7814 rtx op0 = XEXP (addr, 0);
7815 rtx op1 = XEXP (addr, 1);
7816
7817 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7818 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7819 {
7820 /* op0 should never be r0, because r0+offset is not valid. But it
7821 doesn't hurt to make sure it is not r0. */
7822 gcc_assert (reg_or_subregno (op0) != 0);
7823
7824 /* D-FORM address with constant element number. */
7825 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7826 rtx offset_rtx = GEN_INT (offset);
7827 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7828 }
7829 else
7830 {
7831 /* If we don't have a D-FORM address with a constant element number,
7832 add the two elements in the current address. Then add the offset.
7833
7834 Previously, we tried to add the offset to OP1 and change the
7835 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7836 complicated because we had to verify that op1 was not GPR0 and we
7837 had a constant element offset (due to the way ADDI is defined).
7838 By doing the add of OP0 and OP1 first, and then adding in the
7839 offset, it has the benefit that if D-FORM instructions are
7840 allowed, the offset is part of the memory access to the vector
7841 element. */
7842 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7843 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7844 }
7845 }
7846
7847 else
7848 {
7849 emit_move_insn (base_tmp, addr);
7850 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7851 }
7852
7853 /* If the address isn't valid, move the address into the temporary base
7854 register. Some reasons it could not be valid include:
7855
7856 The address offset overflowed the 16 or 34 bit offset size;
7857 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7858 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7859 Only X_FORM loads can be done, and the address is D_FORM. */
7860
7861 enum insn_form iform
7862 = address_to_insn_form (new_addr, scalar_mode,
7863 reg_to_non_prefixed (scalar_reg, scalar_mode));
7864
7865 if (iform == INSN_FORM_BAD)
7866 {
7867 emit_move_insn (base_tmp, new_addr);
7868 new_addr = base_tmp;
7869 }
7870
7871 return change_address (mem, scalar_mode, new_addr);
7872 }
7873
7874 /* Split a variable vec_extract operation into the component instructions. */
7875
7876 void
7877 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7878 rtx tmp_altivec)
7879 {
7880 machine_mode mode = GET_MODE (src);
7881 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7882 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7883 int byte_shift = exact_log2 (scalar_size);
7884
7885 gcc_assert (byte_shift >= 0);
7886
7887 /* If we are given a memory address, optimize to load just the element. We
7888 don't have to adjust the vector element number on little endian
7889 systems. */
7890 if (MEM_P (src))
7891 {
7892 emit_move_insn (dest,
7893 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7894 scalar_mode));
7895 return;
7896 }
7897
7898 else if (REG_P (src) || SUBREG_P (src))
7899 {
7900 int num_elements = GET_MODE_NUNITS (mode);
7901 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7902 int bit_shift = 7 - exact_log2 (num_elements);
7903 rtx element2;
7904 unsigned int dest_regno = reg_or_subregno (dest);
7905 unsigned int src_regno = reg_or_subregno (src);
7906 unsigned int element_regno = reg_or_subregno (element);
7907
7908 gcc_assert (REG_P (tmp_gpr));
7909
7910 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7911 a general purpose register. */
7912 if (TARGET_P9_VECTOR
7913 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7914 && INT_REGNO_P (dest_regno)
7915 && ALTIVEC_REGNO_P (src_regno)
7916 && INT_REGNO_P (element_regno))
7917 {
7918 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7919 rtx element_si = gen_rtx_REG (SImode, element_regno);
7920
7921 if (mode == V16QImode)
7922 emit_insn (BYTES_BIG_ENDIAN
7923 ? gen_vextublx (dest_si, element_si, src)
7924 : gen_vextubrx (dest_si, element_si, src));
7925
7926 else if (mode == V8HImode)
7927 {
7928 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7929 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7930 emit_insn (BYTES_BIG_ENDIAN
7931 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7932 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7933 }
7934
7935
7936 else
7937 {
7938 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7939 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7940 emit_insn (BYTES_BIG_ENDIAN
7941 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7942 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7943 }
7944
7945 return;
7946 }
7947
7948
7949 gcc_assert (REG_P (tmp_altivec));
7950
7951 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7952 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7953 will shift the element into the upper position (adding 3 to convert a
7954 byte shift into a bit shift). */
7955 if (scalar_size == 8)
7956 {
7957 if (!BYTES_BIG_ENDIAN)
7958 {
7959 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7960 element2 = tmp_gpr;
7961 }
7962 else
7963 element2 = element;
7964
7965 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7966 bit. */
7967 emit_insn (gen_rtx_SET (tmp_gpr,
7968 gen_rtx_AND (DImode,
7969 gen_rtx_ASHIFT (DImode,
7970 element2,
7971 GEN_INT (6)),
7972 GEN_INT (64))));
7973 }
7974 else
7975 {
7976 if (!BYTES_BIG_ENDIAN)
7977 {
7978 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7979
7980 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7981 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7982 element2 = tmp_gpr;
7983 }
7984 else
7985 element2 = element;
7986
7987 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7988 }
7989
7990 /* Get the value into the lower byte of the Altivec register where VSLO
7991 expects it. */
7992 if (TARGET_P9_VECTOR)
7993 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7994 else if (can_create_pseudo_p ())
7995 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7996 else
7997 {
7998 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7999 emit_move_insn (tmp_di, tmp_gpr);
8000 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
8001 }
8002
8003 /* Do the VSLO to get the value into the final location. */
8004 switch (mode)
8005 {
8006 case E_V2DFmode:
8007 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
8008 return;
8009
8010 case E_V2DImode:
8011 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
8012 return;
8013
8014 case E_V4SFmode:
8015 {
8016 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8017 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
8018 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8019 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8020 tmp_altivec));
8021
8022 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
8023 return;
8024 }
8025
8026 case E_V4SImode:
8027 case E_V8HImode:
8028 case E_V16QImode:
8029 {
8030 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8031 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8032 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
8033 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8034 tmp_altivec));
8035 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
8036 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
8037 GEN_INT (64 - bits_in_element)));
8038 return;
8039 }
8040
8041 default:
8042 gcc_unreachable ();
8043 }
8044
8045 return;
8046 }
8047 else
8048 gcc_unreachable ();
8049 }
8050
8051 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8052 selects whether the alignment is abi mandated, optional, or
8053 both abi and optional alignment. */
8054
8055 unsigned int
8056 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
8057 {
8058 if (how != align_opt)
8059 {
8060 if (VECTOR_TYPE_P (type) && align < 128)
8061 align = 128;
8062 }
8063
8064 if (how != align_abi)
8065 {
8066 if (TREE_CODE (type) == ARRAY_TYPE
8067 && TYPE_MODE (TREE_TYPE (type)) == QImode)
8068 {
8069 if (align < BITS_PER_WORD)
8070 align = BITS_PER_WORD;
8071 }
8072 }
8073
8074 return align;
8075 }
8076
8077 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8078 instructions simply ignore the low bits; VSX memory instructions
8079 are aligned to 4 or 8 bytes. */
8080
8081 static bool
8082 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
8083 {
8084 return (STRICT_ALIGNMENT
8085 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8086 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
8087 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
8088 && (int) align < VECTOR_ALIGN (mode)))));
8089 }
8090
8091 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
8092
8093 unsigned int
8094 rs6000_special_adjust_field_align (tree type, unsigned int computed)
8095 {
8096 if (computed <= 32 || TYPE_PACKED (type))
8097 return computed;
8098
8099 /* Strip initial arrays. */
8100 while (TREE_CODE (type) == ARRAY_TYPE)
8101 type = TREE_TYPE (type);
8102
8103 /* If RECORD or UNION, recursively find the first field. */
8104 while (AGGREGATE_TYPE_P (type))
8105 {
8106 tree field = TYPE_FIELDS (type);
8107
8108 /* Skip all non field decls */
8109 while (field != NULL
8110 && (TREE_CODE (field) != FIELD_DECL
8111 || DECL_FIELD_ABI_IGNORED (field)))
8112 field = DECL_CHAIN (field);
8113
8114 if (! field)
8115 break;
8116
8117 /* A packed field does not contribute any extra alignment. */
8118 if (DECL_PACKED (field))
8119 return computed;
8120
8121 type = TREE_TYPE (field);
8122
8123 /* Strip arrays. */
8124 while (TREE_CODE (type) == ARRAY_TYPE)
8125 type = TREE_TYPE (type);
8126 }
8127
8128 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8129 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8130 computed = MIN (computed, 32);
8131
8132 return computed;
8133 }
8134
8135 /* AIX increases natural record alignment to doubleword if the innermost first
8136 field is an FP double while the FP fields remain word aligned.
8137 Only called if TYPE initially is a RECORD or UNION. */
8138
8139 unsigned int
8140 rs6000_special_round_type_align (tree type, unsigned int computed,
8141 unsigned int specified)
8142 {
8143 unsigned int align = MAX (computed, specified);
8144
8145 if (TYPE_PACKED (type) || align >= 64)
8146 return align;
8147
8148 /* If RECORD or UNION, recursively find the first field. */
8149 do
8150 {
8151 tree field = TYPE_FIELDS (type);
8152
8153 /* Skip all non field decls */
8154 while (field != NULL
8155 && (TREE_CODE (field) != FIELD_DECL
8156 || DECL_FIELD_ABI_IGNORED (field)))
8157 field = DECL_CHAIN (field);
8158
8159 if (! field)
8160 break;
8161
8162 /* A packed field does not contribute any extra alignment. */
8163 if (DECL_PACKED (field))
8164 return align;
8165
8166 type = TREE_TYPE (field);
8167
8168 /* Strip arrays. */
8169 while (TREE_CODE (type) == ARRAY_TYPE)
8170 type = TREE_TYPE (type);
8171 } while (AGGREGATE_TYPE_P (type));
8172
8173 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8174 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8175 align = MAX (align, 64);
8176
8177 return align;
8178 }
8179
8180 /* Darwin increases record alignment to the natural alignment of
8181 the first field. */
8182
8183 unsigned int
8184 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8185 unsigned int specified)
8186 {
8187 unsigned int align = MAX (computed, specified);
8188
8189 if (TYPE_PACKED (type))
8190 return align;
8191
8192 /* Find the first field, looking down into aggregates. */
8193 do {
8194 tree field = TYPE_FIELDS (type);
8195 /* Skip all non field decls */
8196 while (field != NULL
8197 && (TREE_CODE (field) != FIELD_DECL
8198 || DECL_FIELD_ABI_IGNORED (field)))
8199 field = DECL_CHAIN (field);
8200 if (! field)
8201 break;
8202 /* A packed field does not contribute any extra alignment. */
8203 if (DECL_PACKED (field))
8204 return align;
8205 type = TREE_TYPE (field);
8206 while (TREE_CODE (type) == ARRAY_TYPE)
8207 type = TREE_TYPE (type);
8208 } while (AGGREGATE_TYPE_P (type));
8209
8210 if (type != error_mark_node && ! AGGREGATE_TYPE_P (type)
8211 && ! TYPE_PACKED (type) && maximum_field_alignment == 0)
8212 align = MAX (align, TYPE_ALIGN (type));
8213
8214 return align;
8215 }
8216
8217 /* Return 1 for an operand in small memory on V.4/eabi. */
8218
8219 int
8220 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8221 machine_mode mode ATTRIBUTE_UNUSED)
8222 {
8223 #if TARGET_ELF
8224 rtx sym_ref;
8225
8226 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8227 return 0;
8228
8229 if (DEFAULT_ABI != ABI_V4)
8230 return 0;
8231
8232 if (SYMBOL_REF_P (op))
8233 sym_ref = op;
8234
8235 else if (GET_CODE (op) != CONST
8236 || GET_CODE (XEXP (op, 0)) != PLUS
8237 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
8238 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
8239 return 0;
8240
8241 else
8242 {
8243 rtx sum = XEXP (op, 0);
8244 HOST_WIDE_INT summand;
8245
8246 /* We have to be careful here, because it is the referenced address
8247 that must be 32k from _SDA_BASE_, not just the symbol. */
8248 summand = INTVAL (XEXP (sum, 1));
8249 if (summand < 0 || summand > g_switch_value)
8250 return 0;
8251
8252 sym_ref = XEXP (sum, 0);
8253 }
8254
8255 return SYMBOL_REF_SMALL_P (sym_ref);
8256 #else
8257 return 0;
8258 #endif
8259 }
8260
8261 /* Return true if either operand is a general purpose register. */
8262
8263 bool
8264 gpr_or_gpr_p (rtx op0, rtx op1)
8265 {
8266 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8267 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8268 }
8269
8270 /* Return true if this is a move direct operation between GPR registers and
8271 floating point/VSX registers. */
8272
8273 bool
8274 direct_move_p (rtx op0, rtx op1)
8275 {
8276 if (!REG_P (op0) || !REG_P (op1))
8277 return false;
8278
8279 if (!TARGET_DIRECT_MOVE)
8280 return false;
8281
8282 int regno0 = REGNO (op0);
8283 int regno1 = REGNO (op1);
8284 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
8285 return false;
8286
8287 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
8288 return true;
8289
8290 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
8291 return true;
8292
8293 return false;
8294 }
8295
8296 /* Return true if the ADDR is an acceptable address for a quad memory
8297 operation of mode MODE (either LQ/STQ for general purpose registers, or
8298 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8299 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8300 3.0 LXV/STXV instruction. */
8301
8302 bool
8303 quad_address_p (rtx addr, machine_mode mode, bool strict)
8304 {
8305 rtx op0, op1;
8306
8307 if (GET_MODE_SIZE (mode) < 16)
8308 return false;
8309
8310 if (legitimate_indirect_address_p (addr, strict))
8311 return true;
8312
8313 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
8314 return false;
8315
8316 /* Is this a valid prefixed address? If the bottom four bits of the offset
8317 are non-zero, we could use a prefixed instruction (which does not have the
8318 DQ-form constraint that the traditional instruction had) instead of
8319 forcing the unaligned offset to a GPR. */
8320 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
8321 return true;
8322
8323 if (GET_CODE (addr) != PLUS)
8324 return false;
8325
8326 op0 = XEXP (addr, 0);
8327 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8328 return false;
8329
8330 op1 = XEXP (addr, 1);
8331 if (!CONST_INT_P (op1))
8332 return false;
8333
8334 return quad_address_offset_p (INTVAL (op1));
8335 }
8336
8337 /* Return true if this is a load or store quad operation. This function does
8338 not handle the atomic quad memory instructions. */
8339
8340 bool
8341 quad_load_store_p (rtx op0, rtx op1)
8342 {
8343 bool ret;
8344
8345 if (!TARGET_QUAD_MEMORY)
8346 ret = false;
8347
8348 else if (REG_P (op0) && MEM_P (op1))
8349 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8350 && quad_memory_operand (op1, GET_MODE (op1))
8351 && !reg_overlap_mentioned_p (op0, op1));
8352
8353 else if (MEM_P (op0) && REG_P (op1))
8354 ret = (quad_memory_operand (op0, GET_MODE (op0))
8355 && quad_int_reg_operand (op1, GET_MODE (op1)));
8356
8357 else
8358 ret = false;
8359
8360 if (TARGET_DEBUG_ADDR)
8361 {
8362 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8363 ret ? "true" : "false");
8364 debug_rtx (gen_rtx_SET (op0, op1));
8365 }
8366
8367 return ret;
8368 }
8369
8370 /* Given an address, return a constant offset term if one exists. */
8371
8372 static rtx
8373 address_offset (rtx op)
8374 {
8375 if (GET_CODE (op) == PRE_INC
8376 || GET_CODE (op) == PRE_DEC)
8377 op = XEXP (op, 0);
8378 else if (GET_CODE (op) == PRE_MODIFY
8379 || GET_CODE (op) == LO_SUM)
8380 op = XEXP (op, 1);
8381
8382 if (GET_CODE (op) == CONST)
8383 op = XEXP (op, 0);
8384
8385 if (GET_CODE (op) == PLUS)
8386 op = XEXP (op, 1);
8387
8388 if (CONST_INT_P (op))
8389 return op;
8390
8391 return NULL_RTX;
8392 }
8393
8394 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8395 the mode. If we can't find (or don't know) the alignment of the symbol
8396 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8397 should be pessimistic]. Offsets are validated in the same way as for
8398 reg + offset. */
8399 static bool
8400 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
8401 {
8402 /* We should not get here with this. */
8403 gcc_checking_assert (! mode_supports_dq_form (mode));
8404
8405 if (GET_CODE (x) == CONST)
8406 x = XEXP (x, 0);
8407
8408 /* If we are building PIC code, then any symbol must be wrapped in an
8409 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8410 bool machopic_offs_p = false;
8411 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8412 {
8413 x = XVECEXP (x, 0, 0);
8414 machopic_offs_p = true;
8415 }
8416
8417 rtx sym = NULL_RTX;
8418 unsigned HOST_WIDE_INT offset = 0;
8419
8420 if (GET_CODE (x) == PLUS)
8421 {
8422 sym = XEXP (x, 0);
8423 if (! SYMBOL_REF_P (sym))
8424 return false;
8425 if (!CONST_INT_P (XEXP (x, 1)))
8426 return false;
8427 offset = INTVAL (XEXP (x, 1));
8428 }
8429 else if (SYMBOL_REF_P (x))
8430 sym = x;
8431 else if (CONST_INT_P (x))
8432 offset = INTVAL (x);
8433 else if (GET_CODE (x) == LABEL_REF)
8434 offset = 0; // We assume code labels are Pmode aligned
8435 else
8436 return false; // not sure what we have here.
8437
8438 /* If we don't know the alignment of the thing to which the symbol refers,
8439 we assume optimistically it is "enough".
8440 ??? maybe we should be pessimistic instead. */
8441 unsigned align = 0;
8442
8443 if (sym)
8444 {
8445 tree decl = SYMBOL_REF_DECL (sym);
8446 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8447 if (TARGET_MACHO && flag_pic && !machopic_offs_p)
8448 return false;
8449 #if TARGET_MACHO
8450 if (MACHO_SYMBOL_INDIRECTION_P (sym))
8451 /* The decl in an indirection symbol is the original one, which might
8452 be less aligned than the indirection. Our indirections are always
8453 pointer-aligned. */
8454 ;
8455 else
8456 #endif
8457 if (decl && DECL_ALIGN (decl))
8458 align = DECL_ALIGN_UNIT (decl);
8459 }
8460
8461 unsigned int extra = 0;
8462 switch (mode)
8463 {
8464 case E_DFmode:
8465 case E_DDmode:
8466 case E_DImode:
8467 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8468 addressing. */
8469 if (VECTOR_MEM_VSX_P (mode))
8470 return false;
8471
8472 if (!TARGET_POWERPC64)
8473 extra = 4;
8474 else if ((offset & 3) || (align & 3))
8475 return false;
8476 break;
8477
8478 case E_TFmode:
8479 case E_IFmode:
8480 case E_KFmode:
8481 case E_TDmode:
8482 case E_TImode:
8483 case E_PTImode:
8484 extra = 8;
8485 if (!TARGET_POWERPC64)
8486 extra = 12;
8487 else if ((offset & 3) || (align & 3))
8488 return false;
8489 break;
8490
8491 default:
8492 break;
8493 }
8494
8495 /* We only care if the access(es) would cause a change to the high part. */
8496 offset = sext_hwi (offset, 16);
8497 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8498 }
8499
8500 /* Return true if the MEM operand is a memory operand suitable for use
8501 with a (full width, possibly multiple) gpr load/store. On
8502 powerpc64 this means the offset must be divisible by 4.
8503 Implements 'Y' constraint.
8504
8505 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8506 a constraint function we know the operand has satisfied a suitable
8507 memory predicate.
8508
8509 Offsetting a lo_sum should not be allowed, except where we know by
8510 alignment that a 32k boundary is not crossed. Note that by
8511 "offsetting" here we mean a further offset to access parts of the
8512 MEM. It's fine to have a lo_sum where the inner address is offset
8513 from a sym, since the same sym+offset will appear in the high part
8514 of the address calculation. */
8515
8516 bool
8517 mem_operand_gpr (rtx op, machine_mode mode)
8518 {
8519 unsigned HOST_WIDE_INT offset;
8520 int extra;
8521 rtx addr = XEXP (op, 0);
8522
8523 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8524 if (TARGET_UPDATE
8525 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8526 && mode_supports_pre_incdec_p (mode)
8527 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8528 return true;
8529
8530 /* Allow prefixed instructions if supported. If the bottom two bits of the
8531 offset are non-zero, we could use a prefixed instruction (which does not
8532 have the DS-form constraint that the traditional instruction had) instead
8533 of forcing the unaligned offset to a GPR. */
8534 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8535 return true;
8536
8537 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8538 really OK. Doing this early avoids teaching all the other machinery
8539 about them. */
8540 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8541 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8542
8543 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8544 if (!rs6000_offsettable_memref_p (op, mode, false))
8545 return false;
8546
8547 op = address_offset (addr);
8548 if (op == NULL_RTX)
8549 return true;
8550
8551 offset = INTVAL (op);
8552 if (TARGET_POWERPC64 && (offset & 3) != 0)
8553 return false;
8554
8555 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8556 if (extra < 0)
8557 extra = 0;
8558
8559 if (GET_CODE (addr) == LO_SUM)
8560 /* For lo_sum addresses, we must allow any offset except one that
8561 causes a wrap, so test only the low 16 bits. */
8562 offset = sext_hwi (offset, 16);
8563
8564 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8565 }
8566
8567 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8568 enforce an offset divisible by 4 even for 32-bit. */
8569
8570 bool
8571 mem_operand_ds_form (rtx op, machine_mode mode)
8572 {
8573 unsigned HOST_WIDE_INT offset;
8574 int extra;
8575 rtx addr = XEXP (op, 0);
8576
8577 /* Allow prefixed instructions if supported. If the bottom two bits of the
8578 offset are non-zero, we could use a prefixed instruction (which does not
8579 have the DS-form constraint that the traditional instruction had) instead
8580 of forcing the unaligned offset to a GPR. */
8581 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8582 return true;
8583
8584 if (!offsettable_address_p (false, mode, addr))
8585 return false;
8586
8587 op = address_offset (addr);
8588 if (op == NULL_RTX)
8589 return true;
8590
8591 offset = INTVAL (op);
8592 if ((offset & 3) != 0)
8593 return false;
8594
8595 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8596 if (extra < 0)
8597 extra = 0;
8598
8599 if (GET_CODE (addr) == LO_SUM)
8600 /* For lo_sum addresses, we must allow any offset except one that
8601 causes a wrap, so test only the low 16 bits. */
8602 offset = sext_hwi (offset, 16);
8603
8604 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8605 }
8606 \f
8607 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8608
8609 static bool
8610 reg_offset_addressing_ok_p (machine_mode mode)
8611 {
8612 switch (mode)
8613 {
8614 case E_V16QImode:
8615 case E_V8HImode:
8616 case E_V4SFmode:
8617 case E_V4SImode:
8618 case E_V2DFmode:
8619 case E_V2DImode:
8620 case E_V1TImode:
8621 case E_TImode:
8622 case E_TFmode:
8623 case E_KFmode:
8624 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8625 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8626 a vector mode, if we want to use the VSX registers to move it around,
8627 we need to restrict ourselves to reg+reg addressing. Similarly for
8628 IEEE 128-bit floating point that is passed in a single vector
8629 register. */
8630 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8631 return mode_supports_dq_form (mode);
8632 break;
8633
8634 /* The vector pair/quad types support offset addressing if the
8635 underlying vectors support offset addressing. */
8636 case E_OOmode:
8637 case E_XOmode:
8638 return TARGET_MMA;
8639
8640 case E_SDmode:
8641 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8642 addressing for the LFIWZX and STFIWX instructions. */
8643 if (TARGET_NO_SDMODE_STACK)
8644 return false;
8645 break;
8646
8647 default:
8648 break;
8649 }
8650
8651 return true;
8652 }
8653
8654 static bool
8655 virtual_stack_registers_memory_p (rtx op)
8656 {
8657 int regnum;
8658
8659 if (REG_P (op))
8660 regnum = REGNO (op);
8661
8662 else if (GET_CODE (op) == PLUS
8663 && REG_P (XEXP (op, 0))
8664 && CONST_INT_P (XEXP (op, 1)))
8665 regnum = REGNO (XEXP (op, 0));
8666
8667 else
8668 return false;
8669
8670 return (regnum >= FIRST_VIRTUAL_REGISTER
8671 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8672 }
8673
8674 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8675 is known to not straddle a 32k boundary. This function is used
8676 to determine whether -mcmodel=medium code can use TOC pointer
8677 relative addressing for OP. This means the alignment of the TOC
8678 pointer must also be taken into account, and unfortunately that is
8679 only 8 bytes. */
8680
8681 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8682 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8683 #endif
8684
8685 static bool
8686 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8687 machine_mode mode)
8688 {
8689 tree decl;
8690 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8691
8692 if (!SYMBOL_REF_P (op))
8693 return false;
8694
8695 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8696 SYMBOL_REF. */
8697 if (mode_supports_dq_form (mode))
8698 return false;
8699
8700 dsize = GET_MODE_SIZE (mode);
8701 decl = SYMBOL_REF_DECL (op);
8702 if (!decl)
8703 {
8704 if (dsize == 0)
8705 return false;
8706
8707 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8708 replacing memory addresses with an anchor plus offset. We
8709 could find the decl by rummaging around in the block->objects
8710 VEC for the given offset but that seems like too much work. */
8711 dalign = BITS_PER_UNIT;
8712 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8713 && SYMBOL_REF_ANCHOR_P (op)
8714 && SYMBOL_REF_BLOCK (op) != NULL)
8715 {
8716 struct object_block *block = SYMBOL_REF_BLOCK (op);
8717
8718 dalign = block->alignment;
8719 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8720 }
8721 else if (CONSTANT_POOL_ADDRESS_P (op))
8722 {
8723 /* It would be nice to have get_pool_align().. */
8724 machine_mode cmode = get_pool_mode (op);
8725
8726 dalign = GET_MODE_ALIGNMENT (cmode);
8727 }
8728 }
8729 else if (DECL_P (decl))
8730 {
8731 dalign = DECL_ALIGN (decl);
8732
8733 if (dsize == 0)
8734 {
8735 /* Allow BLKmode when the entire object is known to not
8736 cross a 32k boundary. */
8737 if (!DECL_SIZE_UNIT (decl))
8738 return false;
8739
8740 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8741 return false;
8742
8743 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8744 if (dsize > 32768)
8745 return false;
8746
8747 dalign /= BITS_PER_UNIT;
8748 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8749 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8750 return dalign >= dsize;
8751 }
8752 }
8753 else
8754 gcc_unreachable ();
8755
8756 /* Find how many bits of the alignment we know for this access. */
8757 dalign /= BITS_PER_UNIT;
8758 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8759 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8760 mask = dalign - 1;
8761 lsb = offset & -offset;
8762 mask &= lsb - 1;
8763 dalign = mask + 1;
8764
8765 return dalign >= dsize;
8766 }
8767
8768 static bool
8769 constant_pool_expr_p (rtx op)
8770 {
8771 rtx base, offset;
8772
8773 split_const (op, &base, &offset);
8774 return (SYMBOL_REF_P (base)
8775 && CONSTANT_POOL_ADDRESS_P (base)
8776 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8777 }
8778
8779 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8780 use that as the register to put the HIGH value into if register allocation
8781 is already done. */
8782
8783 rtx
8784 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8785 {
8786 rtx tocrel, tocreg, hi;
8787
8788 gcc_assert (TARGET_TOC);
8789
8790 if (TARGET_DEBUG_ADDR)
8791 {
8792 if (SYMBOL_REF_P (symbol))
8793 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8794 XSTR (symbol, 0));
8795 else
8796 {
8797 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8798 GET_RTX_NAME (GET_CODE (symbol)));
8799 debug_rtx (symbol);
8800 }
8801 }
8802
8803 if (!can_create_pseudo_p ())
8804 df_set_regs_ever_live (TOC_REGISTER, true);
8805
8806 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8807 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8808 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8809 return tocrel;
8810
8811 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8812 if (largetoc_reg != NULL)
8813 {
8814 emit_move_insn (largetoc_reg, hi);
8815 hi = largetoc_reg;
8816 }
8817 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8818 }
8819
8820 /* These are only used to pass through from print_operand/print_operand_address
8821 to rs6000_output_addr_const_extra over the intervening function
8822 output_addr_const which is not target code. */
8823 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8824
8825 /* Return true if OP is a toc pointer relative address (the output
8826 of create_TOC_reference). If STRICT, do not match non-split
8827 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8828 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8829 TOCREL_OFFSET_RET respectively. */
8830
8831 bool
8832 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8833 const_rtx *tocrel_offset_ret)
8834 {
8835 if (!TARGET_TOC)
8836 return false;
8837
8838 if (TARGET_CMODEL != CMODEL_SMALL)
8839 {
8840 /* When strict ensure we have everything tidy. */
8841 if (strict
8842 && !(GET_CODE (op) == LO_SUM
8843 && REG_P (XEXP (op, 0))
8844 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8845 return false;
8846
8847 /* When not strict, allow non-split TOC addresses and also allow
8848 (lo_sum (high ..)) TOC addresses created during reload. */
8849 if (GET_CODE (op) == LO_SUM)
8850 op = XEXP (op, 1);
8851 }
8852
8853 const_rtx tocrel_base = op;
8854 const_rtx tocrel_offset = const0_rtx;
8855
8856 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8857 {
8858 tocrel_base = XEXP (op, 0);
8859 tocrel_offset = XEXP (op, 1);
8860 }
8861
8862 if (tocrel_base_ret)
8863 *tocrel_base_ret = tocrel_base;
8864 if (tocrel_offset_ret)
8865 *tocrel_offset_ret = tocrel_offset;
8866
8867 return (GET_CODE (tocrel_base) == UNSPEC
8868 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8869 && REG_P (XVECEXP (tocrel_base, 0, 1))
8870 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8871 }
8872
8873 /* Return true if X is a constant pool address, and also for cmodel=medium
8874 if X is a toc-relative address known to be offsettable within MODE. */
8875
8876 bool
8877 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8878 bool strict)
8879 {
8880 const_rtx tocrel_base, tocrel_offset;
8881 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8882 && (TARGET_CMODEL != CMODEL_MEDIUM
8883 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8884 || mode == QImode
8885 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8886 INTVAL (tocrel_offset), mode)));
8887 }
8888
8889 static bool
8890 legitimate_small_data_p (machine_mode mode, rtx x)
8891 {
8892 return (DEFAULT_ABI == ABI_V4
8893 && !flag_pic && !TARGET_TOC
8894 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8895 && small_data_operand (x, mode));
8896 }
8897
8898 bool
8899 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8900 bool strict, bool worst_case)
8901 {
8902 unsigned HOST_WIDE_INT offset;
8903 unsigned int extra;
8904
8905 if (GET_CODE (x) != PLUS)
8906 return false;
8907 if (!REG_P (XEXP (x, 0)))
8908 return false;
8909 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8910 return false;
8911 if (mode_supports_dq_form (mode))
8912 return quad_address_p (x, mode, strict);
8913 if (!reg_offset_addressing_ok_p (mode))
8914 return virtual_stack_registers_memory_p (x);
8915 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8916 return true;
8917 if (!CONST_INT_P (XEXP (x, 1)))
8918 return false;
8919
8920 offset = INTVAL (XEXP (x, 1));
8921 extra = 0;
8922 switch (mode)
8923 {
8924 case E_DFmode:
8925 case E_DDmode:
8926 case E_DImode:
8927 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8928 addressing. */
8929 if (VECTOR_MEM_VSX_P (mode))
8930 return false;
8931
8932 if (!worst_case)
8933 break;
8934 if (!TARGET_POWERPC64)
8935 extra = 4;
8936 else if (offset & 3)
8937 return false;
8938 break;
8939
8940 case E_TFmode:
8941 case E_IFmode:
8942 case E_KFmode:
8943 case E_TDmode:
8944 case E_TImode:
8945 case E_PTImode:
8946 extra = 8;
8947 if (!worst_case)
8948 break;
8949 if (!TARGET_POWERPC64)
8950 extra = 12;
8951 else if (offset & 3)
8952 return false;
8953 break;
8954
8955 default:
8956 break;
8957 }
8958
8959 if (TARGET_PREFIXED)
8960 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8961 else
8962 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8963 }
8964
8965 bool
8966 legitimate_indexed_address_p (rtx x, int strict)
8967 {
8968 rtx op0, op1;
8969
8970 if (GET_CODE (x) != PLUS)
8971 return false;
8972
8973 op0 = XEXP (x, 0);
8974 op1 = XEXP (x, 1);
8975
8976 return (REG_P (op0) && REG_P (op1)
8977 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8978 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8979 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8980 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8981 }
8982
8983 bool
8984 avoiding_indexed_address_p (machine_mode mode)
8985 {
8986 unsigned int msize = GET_MODE_SIZE (mode);
8987
8988 /* Avoid indexed addressing for modes that have non-indexed load/store
8989 instruction forms. On power10, vector pairs have an indexed
8990 form, but vector quads don't. */
8991 if (msize > 16)
8992 return msize != 32;
8993
8994 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8995 }
8996
8997 bool
8998 legitimate_indirect_address_p (rtx x, int strict)
8999 {
9000 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
9001 }
9002
9003 bool
9004 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
9005 {
9006 if (!TARGET_MACHO || !flag_pic
9007 || mode != SImode || !MEM_P (x))
9008 return false;
9009 x = XEXP (x, 0);
9010
9011 if (GET_CODE (x) != LO_SUM)
9012 return false;
9013 if (!REG_P (XEXP (x, 0)))
9014 return false;
9015 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
9016 return false;
9017 x = XEXP (x, 1);
9018
9019 return CONSTANT_P (x);
9020 }
9021
9022 static bool
9023 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
9024 {
9025 if (GET_CODE (x) != LO_SUM)
9026 return false;
9027 if (!REG_P (XEXP (x, 0)))
9028 return false;
9029 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9030 return false;
9031 /* quad word addresses are restricted, and we can't use LO_SUM. */
9032 if (mode_supports_dq_form (mode))
9033 return false;
9034 x = XEXP (x, 1);
9035
9036 if (TARGET_ELF)
9037 {
9038 bool large_toc_ok;
9039
9040 if (DEFAULT_ABI == ABI_V4 && flag_pic)
9041 return false;
9042 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9043 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9044 recognizes some LO_SUM addresses as valid although this
9045 function says opposite. In most cases, LRA through different
9046 transformations can generate correct code for address reloads.
9047 It cannot manage only some LO_SUM cases. So we need to add
9048 code here saying that some addresses are still valid. */
9049 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
9050 && small_toc_ref (x, VOIDmode));
9051 if (TARGET_TOC && ! large_toc_ok)
9052 return false;
9053 if (GET_MODE_NUNITS (mode) != 1)
9054 return false;
9055 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9056 && !(/* ??? Assume floating point reg based on mode? */
9057 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9058 return false;
9059
9060 return CONSTANT_P (x) || large_toc_ok;
9061 }
9062 else if (TARGET_MACHO)
9063 {
9064 if (GET_MODE_NUNITS (mode) != 1)
9065 return false;
9066 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9067 && !(/* see above */
9068 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9069 return false;
9070 #if TARGET_MACHO
9071 if (MACHO_DYNAMIC_NO_PIC_P || !flag_pic)
9072 return CONSTANT_P (x);
9073 #endif
9074 /* Macho-O PIC code from here. */
9075 if (GET_CODE (x) == CONST)
9076 x = XEXP (x, 0);
9077
9078 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
9079 if (SYMBOL_REF_P (x))
9080 return false;
9081
9082 /* So this is OK if the wrapped object is const. */
9083 if (GET_CODE (x) == UNSPEC
9084 && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
9085 return CONSTANT_P (XVECEXP (x, 0, 0));
9086 return CONSTANT_P (x);
9087 }
9088 return false;
9089 }
9090
9091
9092 /* Try machine-dependent ways of modifying an illegitimate address
9093 to be legitimate. If we find one, return the new, valid address.
9094 This is used from only one place: `memory_address' in explow.cc.
9095
9096 OLDX is the address as it was before break_out_memory_refs was
9097 called. In some cases it is useful to look at this to decide what
9098 needs to be done.
9099
9100 It is always safe for this function to do nothing. It exists to
9101 recognize opportunities to optimize the output.
9102
9103 On RS/6000, first check for the sum of a register with a constant
9104 integer that is out of range. If so, generate code to add the
9105 constant with the low-order 16 bits masked to the register and force
9106 this result into another register (this can be done with `cau').
9107 Then generate an address of REG+(CONST&0xffff), allowing for the
9108 possibility of bit 16 being a one.
9109
9110 Then check for the sum of a register and something not constant, try to
9111 load the other things into a register and return the sum. */
9112
9113 static rtx
9114 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9115 machine_mode mode)
9116 {
9117 unsigned int extra;
9118
9119 if (!reg_offset_addressing_ok_p (mode)
9120 || mode_supports_dq_form (mode))
9121 {
9122 if (virtual_stack_registers_memory_p (x))
9123 return x;
9124
9125 /* In theory we should not be seeing addresses of the form reg+0,
9126 but just in case it is generated, optimize it away. */
9127 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9128 return force_reg (Pmode, XEXP (x, 0));
9129
9130 /* For TImode with load/store quad, restrict addresses to just a single
9131 pointer, so it works with both GPRs and VSX registers. */
9132 /* Make sure both operands are registers. */
9133 else if (GET_CODE (x) == PLUS
9134 && (mode != TImode || !TARGET_VSX))
9135 return gen_rtx_PLUS (Pmode,
9136 force_reg (Pmode, XEXP (x, 0)),
9137 force_reg (Pmode, XEXP (x, 1)));
9138 else
9139 return force_reg (Pmode, x);
9140 }
9141 if (SYMBOL_REF_P (x) && !TARGET_MACHO)
9142 {
9143 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9144 if (model != 0)
9145 return rs6000_legitimize_tls_address (x, model);
9146 }
9147
9148 extra = 0;
9149 switch (mode)
9150 {
9151 case E_TFmode:
9152 case E_TDmode:
9153 case E_TImode:
9154 case E_PTImode:
9155 case E_IFmode:
9156 case E_KFmode:
9157 /* As in legitimate_offset_address_p we do not assume
9158 worst-case. The mode here is just a hint as to the registers
9159 used. A TImode is usually in gprs, but may actually be in
9160 fprs. Leave worst-case scenario for reload to handle via
9161 insn constraints. PTImode is only GPRs. */
9162 extra = 8;
9163 break;
9164 default:
9165 break;
9166 }
9167
9168 if (GET_CODE (x) == PLUS
9169 && REG_P (XEXP (x, 0))
9170 && CONST_INT_P (XEXP (x, 1))
9171 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9172 >= 0x10000 - extra))
9173 {
9174 HOST_WIDE_INT high_int, low_int;
9175 rtx sum;
9176 low_int = sext_hwi (INTVAL (XEXP (x, 1)), 16);
9177 if (low_int >= 0x8000 - extra)
9178 low_int = 0;
9179 high_int = INTVAL (XEXP (x, 1)) - low_int;
9180 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9181 gen_int_mode (high_int, Pmode)), 0);
9182 return plus_constant (Pmode, sum, low_int);
9183 }
9184 else if (GET_CODE (x) == PLUS
9185 && REG_P (XEXP (x, 0))
9186 && !CONST_INT_P (XEXP (x, 1))
9187 && GET_MODE_NUNITS (mode) == 1
9188 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9189 || (/* ??? Assume floating point reg based on mode? */
9190 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9191 && !avoiding_indexed_address_p (mode))
9192 {
9193 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9194 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9195 }
9196 else if ((TARGET_ELF
9197 #if TARGET_MACHO
9198 || !MACHO_DYNAMIC_NO_PIC_P
9199 #endif
9200 )
9201 && TARGET_32BIT
9202 && TARGET_NO_TOC_OR_PCREL
9203 && !flag_pic
9204 && !CONST_INT_P (x)
9205 && !CONST_WIDE_INT_P (x)
9206 && !CONST_DOUBLE_P (x)
9207 && CONSTANT_P (x)
9208 && GET_MODE_NUNITS (mode) == 1
9209 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9210 || (/* ??? Assume floating point reg based on mode? */
9211 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
9212 {
9213 rtx reg = gen_reg_rtx (Pmode);
9214 if (TARGET_ELF)
9215 emit_insn (gen_elf_high (reg, x));
9216 else
9217 emit_insn (gen_macho_high (Pmode, reg, x));
9218 return gen_rtx_LO_SUM (Pmode, reg, x);
9219 }
9220 else if (TARGET_TOC
9221 && SYMBOL_REF_P (x)
9222 && constant_pool_expr_p (x)
9223 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9224 return create_TOC_reference (x, NULL_RTX);
9225 else
9226 return x;
9227 }
9228
9229 /* Debug version of rs6000_legitimize_address. */
9230 static rtx
9231 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9232 {
9233 rtx ret;
9234 rtx_insn *insns;
9235
9236 start_sequence ();
9237 ret = rs6000_legitimize_address (x, oldx, mode);
9238 insns = get_insns ();
9239 end_sequence ();
9240
9241 if (ret != x)
9242 {
9243 fprintf (stderr,
9244 "\nrs6000_legitimize_address: mode %s, old code %s, "
9245 "new code %s, modified\n",
9246 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9247 GET_RTX_NAME (GET_CODE (ret)));
9248
9249 fprintf (stderr, "Original address:\n");
9250 debug_rtx (x);
9251
9252 fprintf (stderr, "oldx:\n");
9253 debug_rtx (oldx);
9254
9255 fprintf (stderr, "New address:\n");
9256 debug_rtx (ret);
9257
9258 if (insns)
9259 {
9260 fprintf (stderr, "Insns added:\n");
9261 debug_rtx_list (insns, 20);
9262 }
9263 }
9264 else
9265 {
9266 fprintf (stderr,
9267 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9268 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9269
9270 debug_rtx (x);
9271 }
9272
9273 if (insns)
9274 emit_insn (insns);
9275
9276 return ret;
9277 }
9278
9279 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9280 We need to emit DTP-relative relocations. */
9281
9282 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9283 static void
9284 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9285 {
9286 switch (size)
9287 {
9288 case 4:
9289 fputs ("\t.long\t", file);
9290 break;
9291 case 8:
9292 fputs (DOUBLE_INT_ASM_OP, file);
9293 break;
9294 default:
9295 gcc_unreachable ();
9296 }
9297 output_addr_const (file, x);
9298 if (TARGET_ELF)
9299 fputs ("@dtprel+0x8000", file);
9300 }
9301
9302 /* Return true if X is a symbol that refers to real (rather than emulated)
9303 TLS. */
9304
9305 static bool
9306 rs6000_real_tls_symbol_ref_p (rtx x)
9307 {
9308 return (SYMBOL_REF_P (x)
9309 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9310 }
9311
9312 /* In the name of slightly smaller debug output, and to cater to
9313 general assembler lossage, recognize various UNSPEC sequences
9314 and turn them back into a direct symbol reference. */
9315
9316 static rtx
9317 rs6000_delegitimize_address (rtx orig_x)
9318 {
9319 rtx x, y, offset;
9320
9321 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9322 encodes loading up the high part of the address of a TOC reference along
9323 with a load of a GPR using the same base register used for the load. We
9324 return the original SYMBOL_REF.
9325
9326 (set (reg:INT1 <reg>
9327 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9328
9329 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9330 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9331 We return the original SYMBOL_REF.
9332
9333 (parallel [(set (reg:DI <base-reg>)
9334 (unspec:DI [(symbol_ref <symbol>)
9335 (const_int <marker>)]
9336 UNSPEC_PCREL_OPT_LD_ADDR))
9337 (set (reg:DI <load-reg>)
9338 (unspec:DI [(const_int 0)]
9339 UNSPEC_PCREL_OPT_LD_DATA))])
9340
9341 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9342 GPR being loaded is the same as the GPR used to hold the external address.
9343
9344 (set (reg:DI <base-reg>)
9345 (unspec:DI [(symbol_ref <symbol>)
9346 (const_int <marker>)]
9347 UNSPEC_PCREL_OPT_LD_SAME_REG))
9348
9349 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9350 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9351 We return the original SYMBOL_REF.
9352
9353 (parallel [(set (reg:DI <base-reg>)
9354 (unspec:DI [(symbol_ref <symbol>)
9355 (const_int <marker>)]
9356 UNSPEC_PCREL_OPT_ST_ADDR))
9357 (use (reg <store-reg>))]) */
9358
9359 if (GET_CODE (orig_x) == UNSPEC)
9360 switch (XINT (orig_x, 1))
9361 {
9362 case UNSPEC_FUSION_GPR:
9363 case UNSPEC_PCREL_OPT_LD_ADDR:
9364 case UNSPEC_PCREL_OPT_LD_SAME_REG:
9365 case UNSPEC_PCREL_OPT_ST_ADDR:
9366 orig_x = XVECEXP (orig_x, 0, 0);
9367 break;
9368
9369 default:
9370 break;
9371 }
9372
9373 orig_x = delegitimize_mem_from_attrs (orig_x);
9374
9375 x = orig_x;
9376 if (MEM_P (x))
9377 x = XEXP (x, 0);
9378
9379 y = x;
9380 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
9381 y = XEXP (y, 1);
9382
9383 offset = NULL_RTX;
9384 if (GET_CODE (y) == PLUS
9385 && GET_MODE (y) == Pmode
9386 && CONST_INT_P (XEXP (y, 1)))
9387 {
9388 offset = XEXP (y, 1);
9389 y = XEXP (y, 0);
9390 }
9391
9392 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
9393 {
9394 y = XVECEXP (y, 0, 0);
9395
9396 #ifdef HAVE_AS_TLS
9397 /* Do not associate thread-local symbols with the original
9398 constant pool symbol. */
9399 if (TARGET_XCOFF
9400 && SYMBOL_REF_P (y)
9401 && CONSTANT_POOL_ADDRESS_P (y)
9402 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9403 return orig_x;
9404 #endif
9405
9406 if (offset != NULL_RTX)
9407 y = gen_rtx_PLUS (Pmode, y, offset);
9408 if (!MEM_P (orig_x))
9409 return y;
9410 else
9411 return replace_equiv_address_nv (orig_x, y);
9412 }
9413
9414 if (TARGET_MACHO
9415 && GET_CODE (orig_x) == LO_SUM
9416 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9417 {
9418 y = XEXP (XEXP (orig_x, 1), 0);
9419 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9420 return XVECEXP (y, 0, 0);
9421 }
9422
9423 return orig_x;
9424 }
9425
9426 /* Return true if X shouldn't be emitted into the debug info.
9427 The linker doesn't like .toc section references from
9428 .debug_* sections, so reject .toc section symbols. */
9429
9430 static bool
9431 rs6000_const_not_ok_for_debug_p (rtx x)
9432 {
9433 if (GET_CODE (x) == UNSPEC)
9434 return true;
9435 if (SYMBOL_REF_P (x)
9436 && CONSTANT_POOL_ADDRESS_P (x))
9437 {
9438 rtx c = get_pool_constant (x);
9439 machine_mode cmode = get_pool_mode (x);
9440 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9441 return true;
9442 }
9443
9444 return false;
9445 }
9446
9447 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9448
9449 static bool
9450 rs6000_legitimate_combined_insn (rtx_insn *insn)
9451 {
9452 int icode = INSN_CODE (insn);
9453
9454 /* Reject creating doloop insns. Combine should not be allowed
9455 to create these for a number of reasons:
9456 1) In a nested loop, if combine creates one of these in an
9457 outer loop and the register allocator happens to allocate ctr
9458 to the outer loop insn, then the inner loop can't use ctr.
9459 Inner loops ought to be more highly optimized.
9460 2) Combine often wants to create one of these from what was
9461 originally a three insn sequence, first combining the three
9462 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9463 allocated ctr, the splitter takes use back to the three insn
9464 sequence. It's better to stop combine at the two insn
9465 sequence.
9466 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9467 insns, the register allocator sometimes uses floating point
9468 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9469 jump insn and output reloads are not implemented for jumps,
9470 the ctrsi/ctrdi splitters need to handle all possible cases.
9471 That's a pain, and it gets to be seriously difficult when a
9472 splitter that runs after reload needs memory to transfer from
9473 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9474 for the difficult case. It's better to not create problems
9475 in the first place. */
9476 if (icode != CODE_FOR_nothing
9477 && (icode == CODE_FOR_bdz_si
9478 || icode == CODE_FOR_bdz_di
9479 || icode == CODE_FOR_bdnz_si
9480 || icode == CODE_FOR_bdnz_di
9481 || icode == CODE_FOR_bdztf_si
9482 || icode == CODE_FOR_bdztf_di
9483 || icode == CODE_FOR_bdnztf_si
9484 || icode == CODE_FOR_bdnztf_di))
9485 return false;
9486
9487 return true;
9488 }
9489
9490 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9491
9492 static GTY(()) rtx rs6000_tls_symbol;
9493 static rtx
9494 rs6000_tls_get_addr (void)
9495 {
9496 if (!rs6000_tls_symbol)
9497 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9498
9499 return rs6000_tls_symbol;
9500 }
9501
9502 /* Construct the SYMBOL_REF for TLS GOT references. */
9503
9504 static GTY(()) rtx rs6000_got_symbol;
9505 rtx
9506 rs6000_got_sym (void)
9507 {
9508 if (!rs6000_got_symbol)
9509 {
9510 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9511 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9512 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9513 }
9514
9515 return rs6000_got_symbol;
9516 }
9517
9518 /* AIX Thread-Local Address support. */
9519
9520 static rtx
9521 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9522 {
9523 rtx sym, mem, tocref, tlsreg, tmpreg, dest;
9524 const char *name;
9525 char *tlsname;
9526
9527 /* Place addr into TOC constant pool. */
9528 sym = force_const_mem (GET_MODE (addr), addr);
9529
9530 /* Output the TOC entry and create the MEM referencing the value. */
9531 if (constant_pool_expr_p (XEXP (sym, 0))
9532 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9533 {
9534 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9535 mem = gen_const_mem (Pmode, tocref);
9536 set_mem_alias_set (mem, get_TOC_alias_set ());
9537 }
9538 else
9539 return sym;
9540
9541 /* Use global-dynamic for local-dynamic. */
9542 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9543 || model == TLS_MODEL_LOCAL_DYNAMIC)
9544 {
9545 /* Create new TOC reference for @m symbol. */
9546 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9547 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9548 strcpy (tlsname, "*LCM");
9549 strcat (tlsname, name + 3);
9550 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9551 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9552 tocref = create_TOC_reference (modaddr, NULL_RTX);
9553 rtx modmem = gen_const_mem (Pmode, tocref);
9554 set_mem_alias_set (modmem, get_TOC_alias_set ());
9555
9556 rtx modreg = gen_reg_rtx (Pmode);
9557 emit_insn (gen_rtx_SET (modreg, modmem));
9558
9559 tmpreg = gen_reg_rtx (Pmode);
9560 emit_insn (gen_rtx_SET (tmpreg, mem));
9561
9562 dest = gen_reg_rtx (Pmode);
9563 if (TARGET_32BIT)
9564 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9565 else
9566 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9567 return dest;
9568 }
9569 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9570 else if (TARGET_32BIT)
9571 {
9572 tlsreg = gen_reg_rtx (SImode);
9573 emit_insn (gen_tls_get_tpointer (tlsreg));
9574 }
9575 else
9576 {
9577 tlsreg = gen_rtx_REG (DImode, 13);
9578 xcoff_tls_exec_model_detected = true;
9579 }
9580
9581 /* Load the TOC value into temporary register. */
9582 tmpreg = gen_reg_rtx (Pmode);
9583 emit_insn (gen_rtx_SET (tmpreg, mem));
9584 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9585 gen_rtx_MINUS (Pmode, addr, tlsreg));
9586
9587 /* Add TOC symbol value to TLS pointer. */
9588 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9589
9590 return dest;
9591 }
9592
9593 /* Passes the tls arg value for global dynamic and local dynamic
9594 emit_library_call_value in rs6000_legitimize_tls_address to
9595 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9596 marker relocs put on __tls_get_addr calls. */
9597 static rtx global_tlsarg;
9598
9599 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9600 this (thread-local) address. */
9601
9602 static rtx
9603 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9604 {
9605 rtx dest, insn;
9606
9607 if (TARGET_XCOFF)
9608 return rs6000_legitimize_tls_address_aix (addr, model);
9609
9610 dest = gen_reg_rtx (Pmode);
9611 if (model == TLS_MODEL_LOCAL_EXEC
9612 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9613 {
9614 rtx tlsreg;
9615
9616 if (TARGET_64BIT)
9617 {
9618 tlsreg = gen_rtx_REG (Pmode, 13);
9619 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9620 }
9621 else
9622 {
9623 tlsreg = gen_rtx_REG (Pmode, 2);
9624 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9625 }
9626 emit_insn (insn);
9627 }
9628 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9629 {
9630 rtx tlsreg, tmp;
9631
9632 tmp = gen_reg_rtx (Pmode);
9633 if (TARGET_64BIT)
9634 {
9635 tlsreg = gen_rtx_REG (Pmode, 13);
9636 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9637 }
9638 else
9639 {
9640 tlsreg = gen_rtx_REG (Pmode, 2);
9641 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9642 }
9643 emit_insn (insn);
9644 if (TARGET_64BIT)
9645 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9646 else
9647 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9648 emit_insn (insn);
9649 }
9650 else
9651 {
9652 rtx got, tga, tmp1, tmp2;
9653
9654 /* We currently use relocations like @got@tlsgd for tls, which
9655 means the linker will handle allocation of tls entries, placing
9656 them in the .got section. So use a pointer to the .got section,
9657 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9658 or to secondary GOT sections used by 32-bit -fPIC. */
9659 if (rs6000_pcrel_p ())
9660 got = const0_rtx;
9661 else if (TARGET_64BIT)
9662 got = gen_rtx_REG (Pmode, 2);
9663 else
9664 {
9665 if (flag_pic == 1)
9666 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9667 else
9668 {
9669 rtx gsym = rs6000_got_sym ();
9670 got = gen_reg_rtx (Pmode);
9671 if (flag_pic == 0)
9672 rs6000_emit_move (got, gsym, Pmode);
9673 else
9674 {
9675 rtx mem, lab;
9676
9677 tmp1 = gen_reg_rtx (Pmode);
9678 tmp2 = gen_reg_rtx (Pmode);
9679 mem = gen_const_mem (Pmode, tmp1);
9680 lab = gen_label_rtx ();
9681 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9682 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9683 if (TARGET_LINK_STACK)
9684 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9685 emit_move_insn (tmp2, mem);
9686 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9687 set_unique_reg_note (last, REG_EQUAL, gsym);
9688 }
9689 }
9690 }
9691
9692 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9693 {
9694 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9695 UNSPEC_TLSGD);
9696 tga = rs6000_tls_get_addr ();
9697 rtx argreg = gen_rtx_REG (Pmode, 3);
9698 emit_insn (gen_rtx_SET (argreg, arg));
9699 global_tlsarg = arg;
9700 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9701 global_tlsarg = NULL_RTX;
9702
9703 /* Make a note so that the result of this call can be CSEd. */
9704 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9705 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9706 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9707 }
9708 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9709 {
9710 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9711 tga = rs6000_tls_get_addr ();
9712 tmp1 = gen_reg_rtx (Pmode);
9713 rtx argreg = gen_rtx_REG (Pmode, 3);
9714 emit_insn (gen_rtx_SET (argreg, arg));
9715 global_tlsarg = arg;
9716 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9717 global_tlsarg = NULL_RTX;
9718
9719 /* Make a note so that the result of this call can be CSEd. */
9720 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9721 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9722 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9723
9724 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9725 {
9726 if (TARGET_64BIT)
9727 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9728 else
9729 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9730 }
9731 else if (rs6000_tls_size == 32)
9732 {
9733 tmp2 = gen_reg_rtx (Pmode);
9734 if (TARGET_64BIT)
9735 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9736 else
9737 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9738 emit_insn (insn);
9739 if (TARGET_64BIT)
9740 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9741 else
9742 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9743 }
9744 else
9745 {
9746 tmp2 = gen_reg_rtx (Pmode);
9747 if (TARGET_64BIT)
9748 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9749 else
9750 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9751 emit_insn (insn);
9752 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9753 }
9754 emit_insn (insn);
9755 }
9756 else
9757 {
9758 /* IE, or 64-bit offset LE. */
9759 tmp2 = gen_reg_rtx (Pmode);
9760 if (TARGET_64BIT)
9761 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9762 else
9763 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9764 emit_insn (insn);
9765 if (rs6000_pcrel_p ())
9766 {
9767 if (TARGET_64BIT)
9768 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9769 else
9770 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9771 }
9772 else if (TARGET_64BIT)
9773 insn = gen_tls_tls_64 (dest, tmp2, addr);
9774 else
9775 insn = gen_tls_tls_32 (dest, tmp2, addr);
9776 emit_insn (insn);
9777 }
9778 }
9779
9780 return dest;
9781 }
9782
9783 /* Only create the global variable for the stack protect guard if we are using
9784 the global flavor of that guard. */
9785 static tree
9786 rs6000_init_stack_protect_guard (void)
9787 {
9788 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9789 return default_stack_protect_guard ();
9790
9791 return NULL_TREE;
9792 }
9793
9794 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9795
9796 static bool
9797 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9798 {
9799 /* If GET_CODE (x) is HIGH, the 'X' represets the high part of a symbol_ref.
9800 It can not be put into a constant pool. e.g.
9801 (high:DI (unspec:DI [(symbol_ref/u:DI ("*.LC0")..)
9802 (high:DI (symbol_ref:DI ("var")..)). */
9803 if (GET_CODE (x) == HIGH)
9804 return true;
9805
9806 /* A TLS symbol in the TOC cannot contain a sum. */
9807 if (GET_CODE (x) == CONST
9808 && GET_CODE (XEXP (x, 0)) == PLUS
9809 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9810 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9811 return true;
9812
9813 /* Allow AIX TOC TLS symbols in the constant pool,
9814 but not ELF TLS symbols. */
9815 return TARGET_ELF && tls_referenced_p (x);
9816 }
9817
9818 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9819 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9820 can be addressed relative to the toc pointer. */
9821
9822 static bool
9823 use_toc_relative_ref (rtx sym, machine_mode mode)
9824 {
9825 return ((constant_pool_expr_p (sym)
9826 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9827 get_pool_mode (sym)))
9828 || (TARGET_CMODEL == CMODEL_MEDIUM
9829 && SYMBOL_REF_LOCAL_P (sym)
9830 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9831 }
9832
9833 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9834 that is a valid memory address for an instruction.
9835 The MODE argument is the machine mode for the MEM expression
9836 that wants to use this address.
9837
9838 On the RS/6000, there are four valid address: a SYMBOL_REF that
9839 refers to a constant pool entry of an address (or the sum of it
9840 plus a constant), a short (16-bit signed) constant plus a register,
9841 the sum of two registers, or a register indirect, possibly with an
9842 auto-increment. For DFmode, DDmode and DImode with a constant plus
9843 register, we must ensure that both words are addressable or PowerPC64
9844 with offset word aligned.
9845
9846 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9847 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9848 because adjacent memory cells are accessed by adding word-sized offsets
9849 during assembly output. */
9850 static bool
9851 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict,
9852 code_helper ch = ERROR_MARK)
9853 {
9854 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9855 bool quad_offset_p = mode_supports_dq_form (mode);
9856
9857 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9858 return 0;
9859
9860 /* lxvl and stxvl doesn't support any addressing modes with PLUS. */
9861 if (ch.is_internal_fn ()
9862 && (ch == IFN_LEN_LOAD || ch == IFN_LEN_STORE)
9863 && GET_CODE (x) == PLUS)
9864 return 0;
9865
9866 /* Handle unaligned altivec lvx/stvx type addresses. */
9867 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9868 && GET_CODE (x) == AND
9869 && CONST_INT_P (XEXP (x, 1))
9870 && INTVAL (XEXP (x, 1)) == -16)
9871 {
9872 x = XEXP (x, 0);
9873 return (legitimate_indirect_address_p (x, reg_ok_strict)
9874 || legitimate_indexed_address_p (x, reg_ok_strict)
9875 || virtual_stack_registers_memory_p (x));
9876 }
9877
9878 if (legitimate_indirect_address_p (x, reg_ok_strict))
9879 return 1;
9880 if (TARGET_UPDATE
9881 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9882 && mode_supports_pre_incdec_p (mode)
9883 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9884 return 1;
9885
9886 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9887 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9888 return 1;
9889
9890 /* Handle restricted vector d-form offsets in ISA 3.0. */
9891 if (quad_offset_p)
9892 {
9893 if (quad_address_p (x, mode, reg_ok_strict))
9894 return 1;
9895 }
9896 else if (virtual_stack_registers_memory_p (x))
9897 return 1;
9898
9899 else if (reg_offset_p)
9900 {
9901 if (legitimate_small_data_p (mode, x))
9902 return 1;
9903 if (legitimate_constant_pool_address_p (x, mode,
9904 reg_ok_strict || lra_in_progress))
9905 return 1;
9906 }
9907
9908 /* For TImode, if we have TImode in VSX registers, only allow register
9909 indirect addresses. This will allow the values to go in either GPRs
9910 or VSX registers without reloading. The vector types would tend to
9911 go into VSX registers, so we allow REG+REG, while TImode seems
9912 somewhat split, in that some uses are GPR based, and some VSX based. */
9913 /* FIXME: We could loosen this by changing the following to
9914 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9915 but currently we cannot allow REG+REG addressing for TImode. See
9916 PR72827 for complete details on how this ends up hoodwinking DSE. */
9917 if (mode == TImode && TARGET_VSX)
9918 return 0;
9919 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9920 if (! reg_ok_strict
9921 && reg_offset_p
9922 && GET_CODE (x) == PLUS
9923 && REG_P (XEXP (x, 0))
9924 && (XEXP (x, 0) == virtual_stack_vars_rtx
9925 || XEXP (x, 0) == arg_pointer_rtx)
9926 && CONST_INT_P (XEXP (x, 1)))
9927 return 1;
9928 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9929 return 1;
9930 if (!FLOAT128_2REG_P (mode)
9931 && (TARGET_HARD_FLOAT
9932 || TARGET_POWERPC64
9933 || (mode != DFmode && mode != DDmode))
9934 && (TARGET_POWERPC64 || mode != DImode)
9935 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9936 && mode != PTImode
9937 && !avoiding_indexed_address_p (mode)
9938 && legitimate_indexed_address_p (x, reg_ok_strict))
9939 return 1;
9940 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9941 && mode_supports_pre_modify_p (mode)
9942 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9943 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9944 reg_ok_strict, false)
9945 || (!avoiding_indexed_address_p (mode)
9946 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9947 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9948 {
9949 /* There is no prefixed version of the load/store with update. */
9950 rtx addr = XEXP (x, 1);
9951 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9952 }
9953 if (reg_offset_p && !quad_offset_p
9954 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9955 return 1;
9956 return 0;
9957 }
9958
9959 /* Debug version of rs6000_legitimate_address_p. */
9960 static bool
9961 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict,
9962 code_helper ch)
9963 {
9964 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict, ch);
9965 fprintf (stderr,
9966 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9967 "strict = %d, reload = %s, code = %s\n",
9968 ret ? "true" : "false",
9969 GET_MODE_NAME (mode),
9970 reg_ok_strict,
9971 (reload_completed ? "after" : "before"),
9972 GET_RTX_NAME (GET_CODE (x)));
9973 debug_rtx (x);
9974
9975 return ret;
9976 }
9977
9978 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9979
9980 static bool
9981 rs6000_mode_dependent_address_p (const_rtx addr,
9982 addr_space_t as ATTRIBUTE_UNUSED)
9983 {
9984 return rs6000_mode_dependent_address_ptr (addr);
9985 }
9986
9987 /* Go to LABEL if ADDR (a legitimate address expression)
9988 has an effect that depends on the machine mode it is used for.
9989
9990 On the RS/6000 this is true of all integral offsets (since AltiVec
9991 and VSX modes don't allow them) or is a pre-increment or decrement.
9992
9993 ??? Except that due to conceptual problems in offsettable_address_p
9994 we can't really report the problems of integral offsets. So leave
9995 this assuming that the adjustable offset must be valid for the
9996 sub-words of a TFmode operand, which is what we had before. */
9997
9998 static bool
9999 rs6000_mode_dependent_address (const_rtx addr)
10000 {
10001 switch (GET_CODE (addr))
10002 {
10003 case PLUS:
10004 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10005 is considered a legitimate address before reload, so there
10006 are no offset restrictions in that case. Note that this
10007 condition is safe in strict mode because any address involving
10008 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10009 been rejected as illegitimate. */
10010 if (XEXP (addr, 0) != virtual_stack_vars_rtx
10011 && XEXP (addr, 0) != arg_pointer_rtx
10012 && CONST_INT_P (XEXP (addr, 1)))
10013 {
10014 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
10015 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
10016 if (TARGET_PREFIXED)
10017 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
10018 else
10019 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
10020 }
10021 break;
10022
10023 case LO_SUM:
10024 /* Anything in the constant pool is sufficiently aligned that
10025 all bytes have the same high part address. */
10026 return !legitimate_constant_pool_address_p (addr, QImode, false);
10027
10028 /* Auto-increment cases are now treated generically in recog.cc. */
10029 case PRE_MODIFY:
10030 return TARGET_UPDATE;
10031
10032 /* AND is only allowed in Altivec loads. */
10033 case AND:
10034 return true;
10035
10036 default:
10037 break;
10038 }
10039
10040 return false;
10041 }
10042
10043 /* Debug version of rs6000_mode_dependent_address. */
10044 static bool
10045 rs6000_debug_mode_dependent_address (const_rtx addr)
10046 {
10047 bool ret = rs6000_mode_dependent_address (addr);
10048
10049 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
10050 ret ? "true" : "false");
10051 debug_rtx (addr);
10052
10053 return ret;
10054 }
10055
10056 /* Implement FIND_BASE_TERM. */
10057
10058 rtx
10059 rs6000_find_base_term (rtx op)
10060 {
10061 rtx base;
10062
10063 base = op;
10064 if (GET_CODE (base) == CONST)
10065 base = XEXP (base, 0);
10066 if (GET_CODE (base) == PLUS)
10067 base = XEXP (base, 0);
10068 if (GET_CODE (base) == UNSPEC)
10069 switch (XINT (base, 1))
10070 {
10071 case UNSPEC_TOCREL:
10072 case UNSPEC_MACHOPIC_OFFSET:
10073 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10074 for aliasing purposes. */
10075 return XVECEXP (base, 0, 0);
10076 }
10077
10078 return op;
10079 }
10080
10081 /* More elaborate version of recog's offsettable_memref_p predicate
10082 that works around the ??? note of rs6000_mode_dependent_address.
10083 In particular it accepts
10084
10085 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10086
10087 in 32-bit mode, that the recog predicate rejects. */
10088
10089 static bool
10090 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
10091 {
10092 bool worst_case;
10093
10094 if (!MEM_P (op))
10095 return false;
10096
10097 /* First mimic offsettable_memref_p. */
10098 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
10099 return true;
10100
10101 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10102 the latter predicate knows nothing about the mode of the memory
10103 reference and, therefore, assumes that it is the largest supported
10104 mode (TFmode). As a consequence, legitimate offsettable memory
10105 references are rejected. rs6000_legitimate_offset_address_p contains
10106 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10107 at least with a little bit of help here given that we know the
10108 actual registers used. */
10109 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10110 || GET_MODE_SIZE (reg_mode) == 4);
10111 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10112 strict, worst_case);
10113 }
10114
10115 /* Determine the reassociation width to be used in reassociate_bb.
10116 This takes into account how many parallel operations we
10117 can actually do of a given type, and also the latency.
10118 P8:
10119 int add/sub 6/cycle
10120 mul 2/cycle
10121 vect add/sub/mul 2/cycle
10122 fp add/sub/mul 2/cycle
10123 dfp 1/cycle
10124 */
10125
10126 static int
10127 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10128 machine_mode mode)
10129 {
10130 switch (rs6000_tune)
10131 {
10132 case PROCESSOR_POWER8:
10133 case PROCESSOR_POWER9:
10134 case PROCESSOR_POWER10:
10135 case PROCESSOR_POWER11:
10136 if (DECIMAL_FLOAT_MODE_P (mode))
10137 return 1;
10138 if (VECTOR_MODE_P (mode))
10139 return 4;
10140 if (INTEGRAL_MODE_P (mode))
10141 return 1;
10142 if (FLOAT_MODE_P (mode))
10143 return 4;
10144 break;
10145 default:
10146 break;
10147 }
10148 return 1;
10149 }
10150
10151 /* Change register usage conditional on target flags. */
10152 static void
10153 rs6000_conditional_register_usage (void)
10154 {
10155 int i;
10156
10157 if (TARGET_DEBUG_TARGET)
10158 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10159
10160 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10161 if (TARGET_64BIT)
10162 fixed_regs[13] = call_used_regs[13] = 1;
10163
10164 /* Conditionally disable FPRs. */
10165 if (TARGET_SOFT_FLOAT)
10166 for (i = 32; i < 64; i++)
10167 fixed_regs[i] = call_used_regs[i] = 1;
10168
10169 /* For non PC-relative code, GPR2 is unavailable for register allocation. */
10170 if (FIXED_R2 && !rs6000_pcrel_p ())
10171 fixed_regs[2] = 1;
10172
10173 /* The TOC register is not killed across calls in a way that is
10174 visible to the compiler. */
10175 if (fixed_regs[2] && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2))
10176 call_used_regs[2] = 0;
10177
10178 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10179 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10180
10181 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10182 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10183 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10184
10185 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10186 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10187 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10188
10189 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10190 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10191
10192 if (!TARGET_ALTIVEC && !TARGET_VSX)
10193 {
10194 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10195 fixed_regs[i] = call_used_regs[i] = 1;
10196 call_used_regs[VRSAVE_REGNO] = 1;
10197 }
10198
10199 if (TARGET_ALTIVEC || TARGET_VSX)
10200 global_regs[VSCR_REGNO] = 1;
10201
10202 if (TARGET_ALTIVEC_ABI)
10203 {
10204 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10205 call_used_regs[i] = 1;
10206
10207 /* AIX reserves VR20:31 in non-extended ABI mode. */
10208 if (TARGET_XCOFF && !rs6000_aix_extabi)
10209 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10210 fixed_regs[i] = call_used_regs[i] = 1;
10211 }
10212 }
10213
10214 \f
10215 /* Output insns to set DEST equal to the constant SOURCE as a series of
10216 lis, ori and shl instructions and return TRUE. */
10217
10218 bool
10219 rs6000_emit_set_const (rtx dest, rtx source)
10220 {
10221 machine_mode mode = GET_MODE (dest);
10222 rtx temp, set;
10223 rtx_insn *insn;
10224 HOST_WIDE_INT c;
10225
10226 gcc_checking_assert (CONST_INT_P (source));
10227 c = INTVAL (source);
10228 switch (mode)
10229 {
10230 case E_QImode:
10231 case E_HImode:
10232 emit_insn (gen_rtx_SET (dest, source));
10233 return true;
10234
10235 case E_SImode:
10236 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10237
10238 emit_insn (gen_rtx_SET (temp, GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10239 emit_insn (gen_rtx_SET (dest,
10240 gen_rtx_IOR (SImode, temp,
10241 GEN_INT (c & 0xffff))));
10242 break;
10243
10244 case E_DImode:
10245 if (!TARGET_POWERPC64)
10246 {
10247 rtx hi, lo;
10248
10249 hi = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0, DImode);
10250 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0, DImode);
10251 emit_move_insn (hi, GEN_INT (c >> 32));
10252 c = sext_hwi (c, 32);
10253 emit_move_insn (lo, GEN_INT (c));
10254 }
10255 else
10256 rs6000_emit_set_long_const (dest, c);
10257 break;
10258
10259 default:
10260 gcc_unreachable ();
10261 }
10262
10263 insn = get_last_insn ();
10264 set = single_set (insn);
10265 if (! CONSTANT_P (SET_SRC (set)))
10266 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10267
10268 return true;
10269 }
10270
10271 /* Check if C can be rotated to a negative value which 'lis' instruction is
10272 able to load: 1..1xx0..0. If so, set *ROT to the number by which C is
10273 rotated, and return true. Return false otherwise. */
10274
10275 static bool
10276 can_be_rotated_to_negative_lis (HOST_WIDE_INT c, int *rot)
10277 {
10278 /* case a. 1..1xxx0..01..1: up to 15 x's, at least 16 0's. */
10279 int leading_ones = clz_hwi (~c);
10280 int tailing_ones = ctz_hwi (~c);
10281 int middle_zeros = ctz_hwi (c >> tailing_ones);
10282 if (middle_zeros >= 16 && leading_ones + tailing_ones >= 33)
10283 {
10284 *rot = HOST_BITS_PER_WIDE_INT - tailing_ones;
10285 return true;
10286 }
10287
10288 /* case b. xx0..01..1xx: some of 15 x's (and some of 16 0's) are
10289 rotated over the highest bit. */
10290 int pos_one = clz_hwi ((c << 16) >> 16);
10291 middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_one));
10292 int middle_ones = clz_hwi (~(c << pos_one));
10293 if (middle_zeros >= 16 && middle_ones >= 33)
10294 {
10295 *rot = pos_one;
10296 return true;
10297 }
10298
10299 return false;
10300 }
10301
10302 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10303 another is rotldi.
10304
10305 If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
10306 is set to the mask operand of rotldi(rldicl), and return true.
10307 Return false otherwise. */
10308
10309 static bool
10310 can_be_built_by_li_lis_and_rotldi (HOST_WIDE_INT c, int *shift,
10311 HOST_WIDE_INT *mask)
10312 {
10313 /* If C or ~C contains at least 49 successive zeros, then C can be rotated
10314 to/from a positive or negative value that 'li' is able to load. */
10315 int n;
10316 if (can_be_rotated_to_lowbits (c, 15, &n)
10317 || can_be_rotated_to_lowbits (~c, 15, &n)
10318 || can_be_rotated_to_negative_lis (c, &n))
10319 {
10320 *mask = HOST_WIDE_INT_M1;
10321 *shift = HOST_BITS_PER_WIDE_INT - n;
10322 return true;
10323 }
10324
10325 return false;
10326 }
10327
10328 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10329 another is rldicl.
10330
10331 If so, *SHIFT is set to the shift operand of rldicl, and *MASK is set to
10332 the mask operand of rldicl, and return true.
10333 Return false otherwise. */
10334
10335 static bool
10336 can_be_built_by_li_lis_and_rldicl (HOST_WIDE_INT c, int *shift,
10337 HOST_WIDE_INT *mask)
10338 {
10339 /* Leading zeros may be cleaned by rldicl with a mask. Change leading zeros
10340 to ones and then recheck it. */
10341 int lz = clz_hwi (c);
10342
10343 /* If lz == 0, the left shift is undefined. */
10344 if (!lz)
10345 return false;
10346
10347 HOST_WIDE_INT unmask_c
10348 = c | (HOST_WIDE_INT_M1U << (HOST_BITS_PER_WIDE_INT - lz));
10349 int n;
10350 if (can_be_rotated_to_lowbits (~unmask_c, 15, &n)
10351 || can_be_rotated_to_negative_lis (unmask_c, &n))
10352 {
10353 *mask = HOST_WIDE_INT_M1U >> lz;
10354 *shift = n == 0 ? 0 : HOST_BITS_PER_WIDE_INT - n;
10355 return true;
10356 }
10357
10358 return false;
10359 }
10360
10361 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10362 another is rldicr.
10363
10364 If so, *SHIFT is set to the shift operand of rldicr, and *MASK is set to
10365 the mask operand of rldicr, and return true.
10366 Return false otherwise. */
10367
10368 static bool
10369 can_be_built_by_li_lis_and_rldicr (HOST_WIDE_INT c, int *shift,
10370 HOST_WIDE_INT *mask)
10371 {
10372 /* Tailing zeros may be cleaned by rldicr with a mask. Change tailing zeros
10373 to ones and then recheck it. */
10374 int tz = ctz_hwi (c);
10375
10376 /* If tz == HOST_BITS_PER_WIDE_INT, the left shift is undefined. */
10377 if (tz >= HOST_BITS_PER_WIDE_INT)
10378 return false;
10379
10380 HOST_WIDE_INT unmask_c = c | ((HOST_WIDE_INT_1U << tz) - 1);
10381 int n;
10382 if (can_be_rotated_to_lowbits (~unmask_c, 15, &n)
10383 || can_be_rotated_to_negative_lis (unmask_c, &n))
10384 {
10385 *mask = HOST_WIDE_INT_M1U << tz;
10386 *shift = HOST_BITS_PER_WIDE_INT - n;
10387 return true;
10388 }
10389
10390 return false;
10391 }
10392
10393 /* Check if value C can be built by 2 instructions: one is 'li', another is
10394 rldic.
10395
10396 If so, *SHIFT is set to the 'shift' operand of rldic; and *MASK is set
10397 to the mask value about the 'mb' operand of rldic; and return true.
10398 Return false otherwise. */
10399
10400 static bool
10401 can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
10402 {
10403 /* There are 49 successive ones in the negative value of 'li'. */
10404 int ones = 49;
10405
10406 /* 1..1xx1..1: negative value of li --> 0..01..1xx0..0:
10407 right bits are shifted as 0's, and left 1's(and x's) are cleaned. */
10408 int tz = ctz_hwi (c);
10409 int lz = clz_hwi (c);
10410
10411 /* If lz == HOST_BITS_PER_WIDE_INT, the left shift is undefined. */
10412 if (lz >= HOST_BITS_PER_WIDE_INT)
10413 return false;
10414
10415 int middle_ones = clz_hwi (~(c << lz));
10416 if (tz + lz + middle_ones >= ones
10417 && (tz - lz) < HOST_BITS_PER_WIDE_INT
10418 && tz < HOST_BITS_PER_WIDE_INT)
10419 {
10420 *mask = ((1LL << (HOST_BITS_PER_WIDE_INT - tz - lz)) - 1LL) << tz;
10421 *shift = tz;
10422 return true;
10423 }
10424
10425 /* 1..1xx1..1 --> 1..1xx0..01..1: some 1's(following x's) are cleaned. */
10426 int leading_ones = clz_hwi (~c);
10427 int tailing_ones = ctz_hwi (~c);
10428 int middle_zeros = ctz_hwi (c >> tailing_ones);
10429 if (leading_ones + tailing_ones + middle_zeros >= ones
10430 && middle_zeros < HOST_BITS_PER_WIDE_INT)
10431 {
10432 *mask = ~(((1ULL << middle_zeros) - 1ULL) << tailing_ones);
10433 *shift = tailing_ones + middle_zeros;
10434 return true;
10435 }
10436
10437 /* xx1..1xx: --> xx0..01..1xx: some 1's(following x's) are cleaned. */
10438 /* Get the position for the first bit of successive 1.
10439 The 24th bit would be in successive 0 or 1. */
10440 HOST_WIDE_INT low_mask = (HOST_WIDE_INT_1U << 24) - HOST_WIDE_INT_1U;
10441 int pos_first_1 = ((c & (low_mask + 1)) == 0)
10442 ? clz_hwi (c & low_mask)
10443 : HOST_BITS_PER_WIDE_INT - ctz_hwi (~(c | low_mask));
10444
10445 /* Make sure the left and right shifts are defined. */
10446 if (!IN_RANGE (pos_first_1, 1, HOST_BITS_PER_WIDE_INT-1))
10447 return false;
10448
10449 middle_ones = clz_hwi (~c << pos_first_1);
10450 middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_first_1));
10451 if (pos_first_1 < HOST_BITS_PER_WIDE_INT
10452 && middle_ones + middle_zeros < HOST_BITS_PER_WIDE_INT
10453 && middle_ones + middle_zeros >= ones)
10454 {
10455 *mask = ~(((1ULL << middle_zeros) - 1LL)
10456 << (HOST_BITS_PER_WIDE_INT - pos_first_1));
10457 *shift = HOST_BITS_PER_WIDE_INT - pos_first_1 + middle_zeros;
10458 return true;
10459 }
10460
10461 return false;
10462 }
10463
10464 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10465 Output insns to set DEST equal to the constant C as a series of
10466 lis, ori and shl instructions. If NUM_INSNS is not NULL, then
10467 only increase *NUM_INSNS as the number of insns, and do not emit
10468 any insns. */
10469
10470 static void
10471 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
10472 {
10473 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10474
10475 ud1 = c & 0xffff;
10476 ud2 = (c >> 16) & 0xffff;
10477 ud3 = (c >> 32) & 0xffff;
10478 ud4 = (c >> 48) & 0xffff;
10479
10480 /* This lambda is used to emit one insn or just increase the insn count.
10481 When counting the insn number, no need to emit the insn. */
10482 auto count_or_emit_insn = [&num_insns] (rtx dest_or_insn, rtx src = nullptr) {
10483 if (num_insns)
10484 {
10485 (*num_insns)++;
10486 return;
10487 }
10488
10489 if (src)
10490 emit_move_insn (dest_or_insn, src);
10491 else
10492 emit_insn (dest_or_insn);
10493 };
10494
10495 if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (c))
10496 {
10497 /* li/lis/pli */
10498 count_or_emit_insn (dest, GEN_INT (c));
10499 return;
10500 }
10501
10502 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10503 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
10504 {
10505 /* li */
10506 count_or_emit_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
10507 return;
10508 }
10509
10510 rtx temp
10511 = (num_insns || !can_create_pseudo_p ()) ? dest : gen_reg_rtx (DImode);
10512
10513 if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10514 || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000)))
10515 {
10516 /* lis[; ori] */
10517 count_or_emit_insn (ud1 != 0 ? temp : dest,
10518 GEN_INT (sext_hwi (ud2 << 16, 32)));
10519 if (ud1 != 0)
10520 count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10521 return;
10522 }
10523
10524 if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
10525 {
10526 /* lis; xoris */
10527 count_or_emit_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32)));
10528 count_or_emit_insn (dest,
10529 gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000)));
10530 return;
10531 }
10532
10533 if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
10534 {
10535 /* li; xoris */
10536 count_or_emit_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
10537 count_or_emit_insn (dest, gen_rtx_XOR (DImode, temp,
10538 GEN_INT ((ud2 ^ 0xffff) << 16)));
10539 return;
10540 }
10541
10542 int shift;
10543 HOST_WIDE_INT mask;
10544 if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
10545 || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
10546 || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
10547 || can_be_built_by_li_and_rldic (c, &shift, &mask))
10548 {
10549 /* li/lis; rldicX */
10550 unsigned HOST_WIDE_INT imm = (c | ~mask);
10551 imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
10552
10553 count_or_emit_insn (temp, GEN_INT (imm));
10554 if (shift != 0)
10555 temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift));
10556 if (mask != HOST_WIDE_INT_M1)
10557 temp = gen_rtx_AND (DImode, temp, GEN_INT (mask));
10558 count_or_emit_insn (dest, temp);
10559
10560 return;
10561 }
10562
10563 if (ud3 == 0 && ud4 == 0)
10564 {
10565 gcc_assert ((ud2 & 0x8000) && ud1 != 0);
10566 if (!(ud1 & 0x8000))
10567 {
10568 /* li; oris */
10569 count_or_emit_insn (temp, GEN_INT (ud1));
10570 count_or_emit_insn (dest,
10571 gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
10572 return;
10573 }
10574
10575 /* lis; ori; rldicl */
10576 count_or_emit_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
10577 count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10578 count_or_emit_insn (dest,
10579 gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
10580 return;
10581 }
10582
10583 if (ud1 == ud3 && ud2 == ud4)
10584 {
10585 /* load low 32bits first, e.g. "lis; ori", then "rldimi". */
10586 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10587 rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns);
10588
10589 rtx rldimi = gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp,
10590 GEN_INT (0xffffffff));
10591 count_or_emit_insn (rldimi);
10592 return;
10593 }
10594
10595 if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000)))
10596 {
10597 /* li; [ori;] rldicl [;oir]. */
10598 count_or_emit_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
10599 if (ud2 != 0)
10600 count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2)));
10601 count_or_emit_insn (ud1 != 0 ? temp : dest,
10602 gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
10603 if (ud1 != 0)
10604 count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10605 return;
10606 }
10607
10608 if (TARGET_PREFIXED)
10609 {
10610 if (can_create_pseudo_p ())
10611 {
10612 /* pli A,L; pli B,H; rldimi A,B,32,0. */
10613 rtx temp1 = num_insns ? nullptr : gen_reg_rtx (DImode);
10614 count_or_emit_insn (temp, GEN_INT ((ud4 << 16) | ud3));
10615 count_or_emit_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
10616 rtx rldimi = gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1,
10617 GEN_INT (0xffffffff));
10618 count_or_emit_insn (rldimi);
10619 return;
10620 }
10621
10622 /* pli A,H; sldi A,32; paddi A,A,L. */
10623 count_or_emit_insn (dest, GEN_INT ((ud4 << 16) | ud3));
10624 count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
10625
10626 bool can_use_paddi = dest ? REGNO (dest) != FIRST_GPR_REGNO : false;
10627 /* Use paddi for the low 32 bits. */
10628 if (ud2 != 0 && ud1 != 0 && can_use_paddi)
10629 count_or_emit_insn (dest, gen_rtx_PLUS (DImode, dest,
10630 GEN_INT ((ud2 << 16) | ud1)));
10631 /* Use oris, ori for low 32 bits. */
10632 if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
10633 count_or_emit_insn (dest,
10634 gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
10635 if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
10636 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
10637 return;
10638 }
10639
10640 if (can_create_pseudo_p ())
10641 {
10642 /* lis HIGH,UD4 ; ori HIGH,UD3 ;
10643 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */
10644 rtx high = num_insns ? nullptr : gen_reg_rtx (DImode);
10645 rtx low = num_insns ? nullptr : gen_reg_rtx (DImode);
10646 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10647 rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns);
10648 num = (ud4 << 16) | ud3;
10649 rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns);
10650
10651 rtx rldimi = gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
10652 GEN_INT (0xffffffff));
10653 count_or_emit_insn (rldimi);
10654 return;
10655 }
10656
10657 /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
10658 oris DEST,UD2 ; ori DEST,UD1. */
10659 count_or_emit_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
10660 if (ud3 != 0)
10661 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
10662
10663 count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
10664 if (ud2 != 0)
10665 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
10666 if (ud1 != 0)
10667 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
10668
10669 return;
10670 }
10671
10672 /* Helper for the following. Get rid of [r+r] memory refs
10673 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10674
10675 static void
10676 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10677 {
10678 if (MEM_P (operands[0])
10679 && !REG_P (XEXP (operands[0], 0))
10680 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10681 GET_MODE (operands[0]), false))
10682 operands[0]
10683 = replace_equiv_address (operands[0],
10684 copy_addr_to_reg (XEXP (operands[0], 0)));
10685
10686 if (MEM_P (operands[1])
10687 && !REG_P (XEXP (operands[1], 0))
10688 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10689 GET_MODE (operands[1]), false))
10690 operands[1]
10691 = replace_equiv_address (operands[1],
10692 copy_addr_to_reg (XEXP (operands[1], 0)));
10693 }
10694
10695 /* Generate a vector of constants to permute MODE for a little-endian
10696 storage operation by swapping the two halves of a vector. */
10697 static rtvec
10698 rs6000_const_vec (machine_mode mode)
10699 {
10700 int i, subparts;
10701 rtvec v;
10702
10703 switch (mode)
10704 {
10705 case E_V1TImode:
10706 subparts = 1;
10707 break;
10708 case E_V2DFmode:
10709 case E_V2DImode:
10710 subparts = 2;
10711 break;
10712 case E_V4SFmode:
10713 case E_V4SImode:
10714 subparts = 4;
10715 break;
10716 case E_V8HImode:
10717 subparts = 8;
10718 break;
10719 case E_V16QImode:
10720 subparts = 16;
10721 break;
10722 default:
10723 gcc_unreachable();
10724 }
10725
10726 v = rtvec_alloc (subparts);
10727
10728 for (i = 0; i < subparts / 2; ++i)
10729 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10730 for (i = subparts / 2; i < subparts; ++i)
10731 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10732
10733 return v;
10734 }
10735
10736 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10737 store operation. */
10738 void
10739 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10740 {
10741 gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode));
10742 gcc_assert (!altivec_indexed_or_indirect_operand (source, mode));
10743
10744 /* Scalar permutations are easier to express in integer modes rather than
10745 floating-point modes, so cast them here. We use V1TImode instead
10746 of TImode to ensure that the values don't go through GPRs. */
10747 if (FLOAT128_VECTOR_P (mode))
10748 {
10749 dest = gen_lowpart (V1TImode, dest);
10750 source = gen_lowpart (V1TImode, source);
10751 mode = V1TImode;
10752 }
10753
10754 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10755 scalar. */
10756 if (mode == TImode || mode == V1TImode)
10757 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10758 GEN_INT (64))));
10759 else
10760 {
10761 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10762 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10763 }
10764 }
10765
10766 /* Emit a little-endian load from vector memory location SOURCE to VSX
10767 register DEST in mode MODE. The load is done with two permuting
10768 insn's that represent an lxvd2x and xxpermdi. */
10769 void
10770 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10771 {
10772 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10773 V1TImode). */
10774 if (mode == TImode || mode == V1TImode)
10775 {
10776 mode = V2DImode;
10777 dest = gen_lowpart (V2DImode, dest);
10778 source = adjust_address (source, V2DImode, 0);
10779 }
10780
10781 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10782 rs6000_emit_le_vsx_permute (tmp, source, mode);
10783 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10784 }
10785
10786 /* Emit a little-endian store to vector memory location DEST from VSX
10787 register SOURCE in mode MODE. The store is done with two permuting
10788 insn's that represent an xxpermdi and an stxvd2x. */
10789 void
10790 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10791 {
10792 /* This should never be called after LRA. */
10793 gcc_assert (can_create_pseudo_p ());
10794
10795 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10796 V1TImode). */
10797 if (mode == TImode || mode == V1TImode)
10798 {
10799 mode = V2DImode;
10800 dest = adjust_address (dest, V2DImode, 0);
10801 source = gen_lowpart (V2DImode, source);
10802 }
10803
10804 rtx tmp = gen_reg_rtx_and_attrs (source);
10805 rs6000_emit_le_vsx_permute (tmp, source, mode);
10806 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10807 }
10808
10809 /* Emit a sequence representing a little-endian VSX load or store,
10810 moving data from SOURCE to DEST in mode MODE. This is done
10811 separately from rs6000_emit_move to ensure it is called only
10812 during expand. LE VSX loads and stores introduced later are
10813 handled with a split. The expand-time RTL generation allows
10814 us to optimize away redundant pairs of register-permutes. */
10815 void
10816 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10817 {
10818 gcc_assert (!BYTES_BIG_ENDIAN
10819 && VECTOR_MEM_VSX_P (mode)
10820 && !TARGET_P9_VECTOR
10821 && !gpr_or_gpr_p (dest, source)
10822 && (MEM_P (source) ^ MEM_P (dest)));
10823
10824 if (MEM_P (source))
10825 {
10826 gcc_assert (REG_P (dest) || SUBREG_P (dest));
10827 rs6000_emit_le_vsx_load (dest, source, mode);
10828 }
10829 else
10830 {
10831 if (!REG_P (source))
10832 source = force_reg (mode, source);
10833 rs6000_emit_le_vsx_store (dest, source, mode);
10834 }
10835 }
10836
10837 /* Return whether a SFmode or SImode move can be done without converting one
10838 mode to another. This arrises when we have:
10839
10840 (SUBREG:SF (REG:SI ...))
10841 (SUBREG:SI (REG:SF ...))
10842
10843 and one of the values is in a floating point/vector register, where SFmode
10844 scalars are stored in DFmode format. */
10845
10846 bool
10847 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10848 {
10849 if (TARGET_ALLOW_SF_SUBREG)
10850 return true;
10851
10852 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10853 return true;
10854
10855 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10856 return true;
10857
10858 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10859 if (SUBREG_P (dest))
10860 {
10861 rtx dest_subreg = SUBREG_REG (dest);
10862 rtx src_subreg = SUBREG_REG (src);
10863 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10864 }
10865
10866 return false;
10867 }
10868
10869
10870 /* Helper function to change moves with:
10871
10872 (SUBREG:SF (REG:SI)) and
10873 (SUBREG:SI (REG:SF))
10874
10875 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10876 values are stored as DFmode values in the VSX registers. We need to convert
10877 the bits before we can use a direct move or operate on the bits in the
10878 vector register as an integer type.
10879
10880 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10881
10882 static bool
10883 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10884 {
10885 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10886 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10887 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10888 {
10889 rtx inner_source = SUBREG_REG (source);
10890 machine_mode inner_mode = GET_MODE (inner_source);
10891
10892 if (mode == SImode && inner_mode == SFmode)
10893 {
10894 emit_insn (gen_movsi_from_sf (dest, inner_source));
10895 return true;
10896 }
10897
10898 if (mode == SFmode && inner_mode == SImode)
10899 {
10900 emit_insn (gen_movsf_from_si (dest, inner_source));
10901 return true;
10902 }
10903 }
10904
10905 return false;
10906 }
10907
10908 /* Emit a move from SOURCE to DEST in mode MODE. */
10909 void
10910 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10911 {
10912 rtx operands[2];
10913 operands[0] = dest;
10914 operands[1] = source;
10915
10916 if (TARGET_DEBUG_ADDR)
10917 {
10918 fprintf (stderr,
10919 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10920 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10921 GET_MODE_NAME (mode),
10922 lra_in_progress,
10923 reload_completed,
10924 can_create_pseudo_p ());
10925 debug_rtx (dest);
10926 fprintf (stderr, "source:\n");
10927 debug_rtx (source);
10928 }
10929
10930 /* Check that we get CONST_WIDE_INT only when we should. */
10931 if (CONST_WIDE_INT_P (operands[1])
10932 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10933 gcc_unreachable ();
10934
10935 #ifdef HAVE_AS_GNU_ATTRIBUTE
10936 /* If we use a long double type, set the flags in .gnu_attribute that say
10937 what the long double type is. This is to allow the linker's warning
10938 message for the wrong long double to be useful, even if the function does
10939 not do a call (for example, doing a 128-bit add on power9 if the long
10940 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10941 used if they aren't the default long dobule type. */
10942 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10943 {
10944 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10945 rs6000_passes_float = rs6000_passes_long_double = true;
10946
10947 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10948 rs6000_passes_float = rs6000_passes_long_double = true;
10949 }
10950 #endif
10951
10952 /* See if we need to special case SImode/SFmode SUBREG moves. */
10953 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10954 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10955 return;
10956
10957 /* Check if GCC is setting up a block move that will end up using FP
10958 registers as temporaries. We must make sure this is acceptable. */
10959 if (MEM_P (operands[0])
10960 && MEM_P (operands[1])
10961 && mode == DImode
10962 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10963 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10964 && ! (rs6000_slow_unaligned_access (SImode,
10965 (MEM_ALIGN (operands[0]) > 32
10966 ? 32 : MEM_ALIGN (operands[0])))
10967 || rs6000_slow_unaligned_access (SImode,
10968 (MEM_ALIGN (operands[1]) > 32
10969 ? 32 : MEM_ALIGN (operands[1]))))
10970 && ! MEM_VOLATILE_P (operands [0])
10971 && ! MEM_VOLATILE_P (operands [1]))
10972 {
10973 emit_move_insn (adjust_address (operands[0], SImode, 0),
10974 adjust_address (operands[1], SImode, 0));
10975 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10976 adjust_address (copy_rtx (operands[1]), SImode, 4));
10977 return;
10978 }
10979
10980 if (can_create_pseudo_p () && MEM_P (operands[0])
10981 && !gpc_reg_operand (operands[1], mode))
10982 operands[1] = force_reg (mode, operands[1]);
10983
10984 /* Recognize the case where operand[1] is a reference to thread-local
10985 data and load its address to a register. */
10986 if (tls_referenced_p (operands[1]))
10987 {
10988 enum tls_model model;
10989 rtx tmp = operands[1];
10990 rtx addend = NULL;
10991
10992 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10993 {
10994 addend = XEXP (XEXP (tmp, 0), 1);
10995 tmp = XEXP (XEXP (tmp, 0), 0);
10996 }
10997
10998 gcc_assert (SYMBOL_REF_P (tmp));
10999 model = SYMBOL_REF_TLS_MODEL (tmp);
11000 gcc_assert (model != 0);
11001
11002 tmp = rs6000_legitimize_tls_address (tmp, model);
11003 if (addend)
11004 {
11005 tmp = gen_rtx_PLUS (mode, tmp, addend);
11006 tmp = force_operand (tmp, operands[0]);
11007 }
11008 operands[1] = tmp;
11009 }
11010
11011 /* 128-bit constant floating-point values on Darwin should really be loaded
11012 as two parts. However, this premature splitting is a problem when DFmode
11013 values can go into Altivec registers. */
11014 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
11015 && !reg_addr[DFmode].scalar_in_vmx_p)
11016 {
11017 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
11018 simplify_gen_subreg (DFmode, operands[1], mode, 0),
11019 DFmode);
11020 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
11021 GET_MODE_SIZE (DFmode)),
11022 simplify_gen_subreg (DFmode, operands[1], mode,
11023 GET_MODE_SIZE (DFmode)),
11024 DFmode);
11025 return;
11026 }
11027
11028 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
11029 p1:SD) if p1 is not of floating point class and p0 is spilled as
11030 we can have no analogous movsd_store for this. */
11031 if (lra_in_progress && mode == DDmode
11032 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
11033 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11034 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
11035 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
11036 {
11037 enum reg_class cl;
11038 int regno = REGNO (SUBREG_REG (operands[1]));
11039
11040 if (!HARD_REGISTER_NUM_P (regno))
11041 {
11042 cl = reg_preferred_class (regno);
11043 regno = reg_renumber[regno];
11044 if (regno < 0)
11045 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
11046 }
11047 if (regno >= 0 && ! FP_REGNO_P (regno))
11048 {
11049 mode = SDmode;
11050 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
11051 operands[1] = SUBREG_REG (operands[1]);
11052 }
11053 }
11054 if (lra_in_progress
11055 && mode == SDmode
11056 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
11057 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11058 && (REG_P (operands[1])
11059 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
11060 {
11061 int regno = reg_or_subregno (operands[1]);
11062 enum reg_class cl;
11063
11064 if (!HARD_REGISTER_NUM_P (regno))
11065 {
11066 cl = reg_preferred_class (regno);
11067 gcc_assert (cl != NO_REGS);
11068 regno = reg_renumber[regno];
11069 if (regno < 0)
11070 regno = ira_class_hard_regs[cl][0];
11071 }
11072 if (FP_REGNO_P (regno))
11073 {
11074 if (GET_MODE (operands[0]) != DDmode)
11075 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
11076 emit_insn (gen_movsd_store (operands[0], operands[1]));
11077 }
11078 else if (INT_REGNO_P (regno))
11079 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11080 else
11081 gcc_unreachable();
11082 return;
11083 }
11084 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
11085 p:DD)) if p0 is not of floating point class and p1 is spilled as
11086 we can have no analogous movsd_load for this. */
11087 if (lra_in_progress && mode == DDmode
11088 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
11089 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
11090 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
11091 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11092 {
11093 enum reg_class cl;
11094 int regno = REGNO (SUBREG_REG (operands[0]));
11095
11096 if (!HARD_REGISTER_NUM_P (regno))
11097 {
11098 cl = reg_preferred_class (regno);
11099 regno = reg_renumber[regno];
11100 if (regno < 0)
11101 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
11102 }
11103 if (regno >= 0 && ! FP_REGNO_P (regno))
11104 {
11105 mode = SDmode;
11106 operands[0] = SUBREG_REG (operands[0]);
11107 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
11108 }
11109 }
11110 if (lra_in_progress
11111 && mode == SDmode
11112 && (REG_P (operands[0])
11113 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
11114 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
11115 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11116 {
11117 int regno = reg_or_subregno (operands[0]);
11118 enum reg_class cl;
11119
11120 if (!HARD_REGISTER_NUM_P (regno))
11121 {
11122 cl = reg_preferred_class (regno);
11123 gcc_assert (cl != NO_REGS);
11124 regno = reg_renumber[regno];
11125 if (regno < 0)
11126 regno = ira_class_hard_regs[cl][0];
11127 }
11128 if (FP_REGNO_P (regno))
11129 {
11130 if (GET_MODE (operands[1]) != DDmode)
11131 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
11132 emit_insn (gen_movsd_load (operands[0], operands[1]));
11133 }
11134 else if (INT_REGNO_P (regno))
11135 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11136 else
11137 gcc_unreachable();
11138 return;
11139 }
11140
11141 /* FIXME: In the long term, this switch statement should go away
11142 and be replaced by a sequence of tests based on things like
11143 mode == Pmode. */
11144 switch (mode)
11145 {
11146 case E_HImode:
11147 case E_QImode:
11148 if (CONSTANT_P (operands[1])
11149 && !CONST_INT_P (operands[1]))
11150 operands[1] = force_const_mem (mode, operands[1]);
11151 break;
11152
11153 case E_TFmode:
11154 case E_TDmode:
11155 case E_IFmode:
11156 case E_KFmode:
11157 if (FLOAT128_2REG_P (mode))
11158 rs6000_eliminate_indexed_memrefs (operands);
11159 /* fall through */
11160
11161 case E_DFmode:
11162 case E_DDmode:
11163 case E_SFmode:
11164 case E_SDmode:
11165 if (CONSTANT_P (operands[1])
11166 && ! easy_fp_constant (operands[1], mode))
11167 operands[1] = force_const_mem (mode, operands[1]);
11168 break;
11169
11170 case E_V16QImode:
11171 case E_V8HImode:
11172 case E_V4SFmode:
11173 case E_V4SImode:
11174 case E_V2DFmode:
11175 case E_V2DImode:
11176 case E_V1TImode:
11177 if (CONSTANT_P (operands[1])
11178 && !easy_vector_constant (operands[1], mode))
11179 operands[1] = force_const_mem (mode, operands[1]);
11180 break;
11181
11182 case E_OOmode:
11183 case E_XOmode:
11184 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
11185 error ("%qs is an opaque type, and you cannot set it to other values",
11186 (mode == OOmode) ? "__vector_pair" : "__vector_quad");
11187 break;
11188
11189 case E_SImode:
11190 case E_DImode:
11191 /* Use default pattern for address of ELF small data */
11192 if (TARGET_ELF
11193 && mode == Pmode
11194 && DEFAULT_ABI == ABI_V4
11195 && (SYMBOL_REF_P (operands[1])
11196 || GET_CODE (operands[1]) == CONST)
11197 && small_data_operand (operands[1], mode))
11198 {
11199 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11200 return;
11201 }
11202
11203 /* Use the default pattern for loading up PC-relative addresses. */
11204 if (TARGET_PCREL && mode == Pmode
11205 && pcrel_local_or_external_address (operands[1], Pmode))
11206 {
11207 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11208 return;
11209 }
11210
11211 if (DEFAULT_ABI == ABI_V4
11212 && mode == Pmode && mode == SImode
11213 && flag_pic == 1 && got_operand (operands[1], mode))
11214 {
11215 emit_insn (gen_movsi_got (operands[0], operands[1]));
11216 return;
11217 }
11218
11219 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
11220 && TARGET_NO_TOC_OR_PCREL
11221 && ! flag_pic
11222 && mode == Pmode
11223 && CONSTANT_P (operands[1])
11224 && GET_CODE (operands[1]) != HIGH
11225 && !CONST_INT_P (operands[1]))
11226 {
11227 rtx target = (!can_create_pseudo_p ()
11228 ? operands[0]
11229 : gen_reg_rtx (mode));
11230
11231 /* If this is a function address on -mcall-aixdesc,
11232 convert it to the address of the descriptor. */
11233 if (DEFAULT_ABI == ABI_AIX
11234 && SYMBOL_REF_P (operands[1])
11235 && XSTR (operands[1], 0)[0] == '.')
11236 {
11237 const char *name = XSTR (operands[1], 0);
11238 rtx new_ref;
11239 while (*name == '.')
11240 name++;
11241 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
11242 CONSTANT_POOL_ADDRESS_P (new_ref)
11243 = CONSTANT_POOL_ADDRESS_P (operands[1]);
11244 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
11245 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
11246 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
11247 operands[1] = new_ref;
11248 }
11249
11250 if (DEFAULT_ABI == ABI_DARWIN)
11251 {
11252 #if TARGET_MACHO
11253 /* This is not PIC code, but could require the subset of
11254 indirections used by mdynamic-no-pic. */
11255 if (MACHO_DYNAMIC_NO_PIC_P)
11256 {
11257 /* Take care of any required data indirection. */
11258 operands[1] = rs6000_machopic_legitimize_pic_address (
11259 operands[1], mode, operands[0]);
11260 if (operands[0] != operands[1])
11261 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11262 return;
11263 }
11264 #endif
11265 emit_insn (gen_macho_high (Pmode, target, operands[1]));
11266 emit_insn (gen_macho_low (Pmode, operands[0],
11267 target, operands[1]));
11268 return;
11269 }
11270
11271 emit_insn (gen_elf_high (target, operands[1]));
11272 emit_insn (gen_elf_low (operands[0], target, operands[1]));
11273 return;
11274 }
11275
11276 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11277 and we have put it in the TOC, we just need to make a TOC-relative
11278 reference to it. */
11279 if (TARGET_TOC
11280 && SYMBOL_REF_P (operands[1])
11281 && use_toc_relative_ref (operands[1], mode))
11282 operands[1] = create_TOC_reference (operands[1], operands[0]);
11283 else if (mode == Pmode
11284 && CONSTANT_P (operands[1])
11285 && GET_CODE (operands[1]) != HIGH
11286 && ((REG_P (operands[0])
11287 && FP_REGNO_P (REGNO (operands[0])))
11288 || !CONST_INT_P (operands[1])
11289 || (num_insns_constant (operands[1], mode)
11290 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
11291 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
11292 && (TARGET_CMODEL == CMODEL_SMALL
11293 || can_create_pseudo_p ()
11294 || (REG_P (operands[0])
11295 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
11296 {
11297
11298 #if TARGET_MACHO
11299 /* Darwin uses a special PIC legitimizer. */
11300 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
11301 {
11302 operands[1] =
11303 rs6000_machopic_legitimize_pic_address (operands[1], mode,
11304 operands[0]);
11305 if (operands[0] != operands[1])
11306 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11307 return;
11308 }
11309 #endif
11310
11311 /* If we are to limit the number of things we put in the TOC and
11312 this is a symbol plus a constant we can add in one insn,
11313 just put the symbol in the TOC and add the constant. */
11314 if (GET_CODE (operands[1]) == CONST
11315 && TARGET_NO_SUM_IN_TOC
11316 && GET_CODE (XEXP (operands[1], 0)) == PLUS
11317 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
11318 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
11319 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
11320 && ! side_effects_p (operands[0]))
11321 {
11322 rtx sym =
11323 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
11324 rtx other = XEXP (XEXP (operands[1], 0), 1);
11325
11326 sym = force_reg (mode, sym);
11327 emit_insn (gen_add3_insn (operands[0], sym, other));
11328 return;
11329 }
11330
11331 operands[1] = force_const_mem (mode, operands[1]);
11332
11333 if (TARGET_TOC
11334 && SYMBOL_REF_P (XEXP (operands[1], 0))
11335 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
11336 {
11337 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
11338 operands[0]);
11339 operands[1] = gen_const_mem (mode, tocref);
11340 set_mem_alias_set (operands[1], get_TOC_alias_set ());
11341 }
11342 }
11343 break;
11344
11345 case E_TImode:
11346 if (!VECTOR_MEM_VSX_P (TImode))
11347 rs6000_eliminate_indexed_memrefs (operands);
11348 break;
11349
11350 case E_PTImode:
11351 rs6000_eliminate_indexed_memrefs (operands);
11352 break;
11353
11354 default:
11355 fatal_insn ("bad move", gen_rtx_SET (dest, source));
11356 }
11357
11358 /* Above, we may have called force_const_mem which may have returned
11359 an invalid address. If we can, fix this up; otherwise, reload will
11360 have to deal with it. */
11361 if (MEM_P (operands[1]))
11362 operands[1] = validize_mem (operands[1]);
11363
11364 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11365 }
11366 \f
11367
11368 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
11369 static void
11370 init_float128_ibm (machine_mode mode)
11371 {
11372 if (!TARGET_XL_COMPAT)
11373 {
11374 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
11375 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
11376 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
11377 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
11378
11379 if (!TARGET_HARD_FLOAT)
11380 {
11381 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
11382 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
11383 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
11384 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
11385 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
11386 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
11387 set_optab_libfunc (le_optab, mode, "__gcc_qle");
11388 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
11389
11390 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
11391 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
11392 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
11393 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
11394 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
11395 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
11396 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
11397 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
11398 }
11399 }
11400 else
11401 {
11402 set_optab_libfunc (add_optab, mode, "_xlqadd");
11403 set_optab_libfunc (sub_optab, mode, "_xlqsub");
11404 set_optab_libfunc (smul_optab, mode, "_xlqmul");
11405 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
11406 }
11407
11408 /* Add various conversions for IFmode to use the traditional TFmode
11409 names. */
11410 if (mode == IFmode)
11411 {
11412 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
11413 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
11414 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
11415 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
11416 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
11417 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
11418
11419 set_conv_libfunc (sfix_optab, DImode, mode, "__fixtfdi");
11420 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunstfdi");
11421
11422 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatditf");
11423 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatunditf");
11424
11425 if (TARGET_POWERPC64)
11426 {
11427 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
11428 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
11429 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
11430 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
11431 }
11432 }
11433 }
11434
11435 /* Set up IEEE 128-bit floating point routines. Use different names if the
11436 arguments can be passed in a vector register. The historical PowerPC
11437 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11438 continue to use that if we aren't using vector registers to pass IEEE
11439 128-bit floating point. */
11440
11441 static void
11442 init_float128_ieee (machine_mode mode)
11443 {
11444 if (FLOAT128_VECTOR_P (mode))
11445 {
11446 set_optab_libfunc (add_optab, mode, "__addkf3");
11447 set_optab_libfunc (sub_optab, mode, "__subkf3");
11448 set_optab_libfunc (neg_optab, mode, "__negkf2");
11449 set_optab_libfunc (smul_optab, mode, "__mulkf3");
11450 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
11451 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
11452 set_optab_libfunc (abs_optab, mode, "__abskf2");
11453 set_optab_libfunc (powi_optab, mode, "__powikf2");
11454
11455 set_optab_libfunc (eq_optab, mode, "__eqkf2");
11456 set_optab_libfunc (ne_optab, mode, "__nekf2");
11457 set_optab_libfunc (gt_optab, mode, "__gtkf2");
11458 set_optab_libfunc (ge_optab, mode, "__gekf2");
11459 set_optab_libfunc (lt_optab, mode, "__ltkf2");
11460 set_optab_libfunc (le_optab, mode, "__lekf2");
11461 set_optab_libfunc (unord_optab, mode, "__unordkf2");
11462
11463 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
11464 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
11465 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
11466 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
11467
11468 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
11469 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11470 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
11471
11472 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
11473 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11474 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
11475
11476 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
11477 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
11478 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
11479 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
11480 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
11481 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
11482
11483 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
11484 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
11485 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
11486 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
11487
11488 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
11489 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
11490 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
11491 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
11492
11493 if (TARGET_POWERPC64)
11494 {
11495 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw");
11496 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw");
11497 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw");
11498 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw");
11499 }
11500 }
11501
11502 else
11503 {
11504 set_optab_libfunc (add_optab, mode, "_q_add");
11505 set_optab_libfunc (sub_optab, mode, "_q_sub");
11506 set_optab_libfunc (neg_optab, mode, "_q_neg");
11507 set_optab_libfunc (smul_optab, mode, "_q_mul");
11508 set_optab_libfunc (sdiv_optab, mode, "_q_div");
11509 if (TARGET_PPC_GPOPT)
11510 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
11511
11512 set_optab_libfunc (eq_optab, mode, "_q_feq");
11513 set_optab_libfunc (ne_optab, mode, "_q_fne");
11514 set_optab_libfunc (gt_optab, mode, "_q_fgt");
11515 set_optab_libfunc (ge_optab, mode, "_q_fge");
11516 set_optab_libfunc (lt_optab, mode, "_q_flt");
11517 set_optab_libfunc (le_optab, mode, "_q_fle");
11518
11519 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
11520 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
11521 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
11522 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
11523 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
11524 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
11525 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
11526 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
11527 }
11528 }
11529
11530 static void
11531 rs6000_init_libfuncs (void)
11532 {
11533 /* __float128 support. */
11534 if (TARGET_FLOAT128_TYPE)
11535 {
11536 init_float128_ibm (IFmode);
11537 init_float128_ieee (KFmode);
11538 }
11539
11540 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11541 if (TARGET_LONG_DOUBLE_128)
11542 {
11543 if (!TARGET_IEEEQUAD)
11544 init_float128_ibm (TFmode);
11545
11546 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11547 else
11548 init_float128_ieee (TFmode);
11549 }
11550 }
11551
11552 /* Emit a potentially record-form instruction, setting DST from SRC.
11553 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11554 signed comparison of DST with zero. If DOT is 1, the generated RTL
11555 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11556 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11557 a separate COMPARE. */
11558
11559 void
11560 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
11561 {
11562 if (dot == 0)
11563 {
11564 emit_move_insn (dst, src);
11565 return;
11566 }
11567
11568 if (cc_reg_not_cr0_operand (ccreg, CCmode))
11569 {
11570 emit_move_insn (dst, src);
11571 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
11572 return;
11573 }
11574
11575 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
11576 if (dot == 1)
11577 {
11578 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
11579 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
11580 }
11581 else
11582 {
11583 rtx set = gen_rtx_SET (dst, src);
11584 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
11585 }
11586 }
11587
11588 \f
11589 /* A validation routine: say whether CODE, a condition code, and MODE
11590 match. The other alternatives either don't make sense or should
11591 never be generated. */
11592
11593 void
11594 validate_condition_mode (enum rtx_code code, machine_mode mode)
11595 {
11596 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
11597 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
11598 && GET_MODE_CLASS (mode) == MODE_CC);
11599
11600 /* These don't make sense. */
11601 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
11602 || mode != CCUNSmode);
11603
11604 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
11605 || mode == CCUNSmode);
11606
11607 gcc_assert (mode == CCFPmode
11608 || (code != ORDERED && code != UNORDERED
11609 && code != UNEQ && code != LTGT
11610 && code != UNGT && code != UNLT
11611 && code != UNGE && code != UNLE));
11612
11613 /* These are invalid; the information is not there. */
11614 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
11615 }
11616
11617 \f
11618 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11619 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11620 not zero, store there the bit offset (counted from the right) where
11621 the single stretch of 1 bits begins; and similarly for B, the bit
11622 offset where it ends. */
11623
11624 bool
11625 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
11626 {
11627 unsigned HOST_WIDE_INT val = INTVAL (mask);
11628 unsigned HOST_WIDE_INT bit;
11629 int nb, ne;
11630 int n = GET_MODE_PRECISION (mode);
11631
11632 if (mode != DImode && mode != SImode)
11633 return false;
11634
11635 if (INTVAL (mask) >= 0)
11636 {
11637 bit = val & -val;
11638 ne = exact_log2 (bit);
11639 nb = exact_log2 (val + bit);
11640 }
11641 else if (val + 1 == 0)
11642 {
11643 nb = n;
11644 ne = 0;
11645 }
11646 else if (val & 1)
11647 {
11648 val = ~val;
11649 bit = val & -val;
11650 nb = exact_log2 (bit);
11651 ne = exact_log2 (val + bit);
11652 }
11653 else
11654 {
11655 bit = val & -val;
11656 ne = exact_log2 (bit);
11657 if (val + bit == 0)
11658 nb = n;
11659 else
11660 nb = 0;
11661 }
11662
11663 nb--;
11664
11665 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
11666 return false;
11667
11668 if (b)
11669 *b = nb;
11670 if (e)
11671 *e = ne;
11672
11673 return true;
11674 }
11675
11676 bool
11677 rs6000_is_valid_rotate_dot_mask (rtx mask, machine_mode mode)
11678 {
11679 int nb, ne;
11680 if (rs6000_is_valid_mask (mask, &nb, &ne, mode) && nb >= ne && ne > 0)
11681 {
11682 if (TARGET_64BIT)
11683 return true;
11684 /* *rotldi3_mask_dot requires for -m32 -mpowerpc64 that the mask is
11685 <= 0x7fffffff. */
11686 return (UINTVAL (mask) << (63 - nb)) <= 0x7fffffff;
11687 }
11688
11689 return false;
11690 }
11691
11692 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11693 or rldicr instruction, to implement an AND with it in mode MODE. */
11694
11695 bool
11696 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
11697 {
11698 int nb, ne;
11699
11700 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11701 return false;
11702
11703 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11704 does not wrap. */
11705 if (mode == DImode)
11706 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
11707
11708 /* For SImode, rlwinm can do everything. */
11709 if (mode == SImode)
11710 return (nb < 32 && ne < 32);
11711
11712 return false;
11713 }
11714
11715 /* Return the instruction template for an AND with mask in mode MODE, with
11716 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11717
11718 const char *
11719 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
11720 {
11721 int nb, ne;
11722
11723 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
11724 gcc_unreachable ();
11725
11726 if (mode == DImode && ne == 0)
11727 {
11728 operands[3] = GEN_INT (63 - nb);
11729 if (dot)
11730 return "rldicl. %0,%1,0,%3";
11731 return "rldicl %0,%1,0,%3";
11732 }
11733
11734 if (mode == DImode && nb == 63)
11735 {
11736 operands[3] = GEN_INT (63 - ne);
11737 if (dot)
11738 return "rldicr. %0,%1,0,%3";
11739 return "rldicr %0,%1,0,%3";
11740 }
11741
11742 if (nb < 32 && ne < 32)
11743 {
11744 operands[3] = GEN_INT (31 - nb);
11745 operands[4] = GEN_INT (31 - ne);
11746 if (dot)
11747 return "rlwinm. %0,%1,0,%3,%4";
11748 return "rlwinm %0,%1,0,%3,%4";
11749 }
11750
11751 gcc_unreachable ();
11752 }
11753
11754 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11755 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11756 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11757
11758 bool
11759 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
11760 {
11761 int nb, ne;
11762
11763 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11764 return false;
11765
11766 int n = GET_MODE_PRECISION (mode);
11767 int sh = -1;
11768
11769 if (CONST_INT_P (XEXP (shift, 1)))
11770 {
11771 sh = INTVAL (XEXP (shift, 1));
11772 if (sh < 0 || sh >= n)
11773 return false;
11774 }
11775
11776 rtx_code code = GET_CODE (shift);
11777
11778 /* Convert any shift by 0 to a rotate, to simplify below code. */
11779 if (sh == 0)
11780 code = ROTATE;
11781
11782 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11783 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11784 code = ASHIFT;
11785 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11786 {
11787 code = LSHIFTRT;
11788 sh = n - sh;
11789 }
11790
11791 /* DImode rotates need rld*. */
11792 if (mode == DImode && code == ROTATE)
11793 return (nb == 63 || ne == 0 || ne == sh);
11794
11795 /* SImode rotates need rlw*. */
11796 if (mode == SImode && code == ROTATE)
11797 return (nb < 32 && ne < 32 && sh < 32);
11798
11799 /* Wrap-around masks are only okay for rotates. */
11800 if (ne > nb)
11801 return false;
11802
11803 /* Variable shifts are only okay for rotates. */
11804 if (sh < 0)
11805 return false;
11806
11807 /* Don't allow ASHIFT if the mask is wrong for that. */
11808 if (code == ASHIFT && ne < sh)
11809 return false;
11810
11811 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11812 if the mask is wrong for that. */
11813 if (nb < 32 && ne < 32 && sh < 32
11814 && !(code == LSHIFTRT && nb >= 32 - sh))
11815 return true;
11816
11817 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11818 if the mask is wrong for that. */
11819 if (code == LSHIFTRT)
11820 sh = 64 - sh;
11821 if (nb == 63 || ne == 0 || ne == sh)
11822 return !(code == LSHIFTRT && nb >= sh);
11823
11824 return false;
11825 }
11826
11827 /* Return the instruction template for a shift with mask in mode MODE, with
11828 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11829
11830 const char *
11831 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11832 {
11833 int nb, ne;
11834
11835 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11836 gcc_unreachable ();
11837
11838 if (mode == DImode && ne == 0)
11839 {
11840 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11841 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11842 operands[3] = GEN_INT (63 - nb);
11843 if (dot)
11844 return "rld%I2cl. %0,%1,%2,%3";
11845 return "rld%I2cl %0,%1,%2,%3";
11846 }
11847
11848 if (mode == DImode && nb == 63)
11849 {
11850 operands[3] = GEN_INT (63 - ne);
11851 if (dot)
11852 return "rld%I2cr. %0,%1,%2,%3";
11853 return "rld%I2cr %0,%1,%2,%3";
11854 }
11855
11856 if (mode == DImode
11857 && GET_CODE (operands[4]) != LSHIFTRT
11858 && CONST_INT_P (operands[2])
11859 && ne == INTVAL (operands[2]))
11860 {
11861 operands[3] = GEN_INT (63 - nb);
11862 if (dot)
11863 return "rld%I2c. %0,%1,%2,%3";
11864 return "rld%I2c %0,%1,%2,%3";
11865 }
11866
11867 if (nb < 32 && ne < 32)
11868 {
11869 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11870 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11871 operands[3] = GEN_INT (31 - nb);
11872 operands[4] = GEN_INT (31 - ne);
11873 /* This insn can also be a 64-bit rotate with mask that really makes
11874 it just a shift right (with mask); the %h below are to adjust for
11875 that situation (shift count is >= 32 in that case). */
11876 if (dot)
11877 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11878 return "rlw%I2nm %0,%1,%h2,%3,%4";
11879 }
11880
11881 gcc_unreachable ();
11882 }
11883
11884 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11885 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11886 ASHIFT, or LSHIFTRT) in mode MODE. */
11887
11888 bool
11889 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11890 {
11891 int nb, ne;
11892
11893 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11894 return false;
11895
11896 int n = GET_MODE_PRECISION (mode);
11897
11898 int sh = INTVAL (XEXP (shift, 1));
11899 if (sh < 0 || sh >= n)
11900 return false;
11901
11902 rtx_code code = GET_CODE (shift);
11903
11904 /* Convert any shift by 0 to a rotate, to simplify below code. */
11905 if (sh == 0)
11906 code = ROTATE;
11907
11908 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11909 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11910 code = ASHIFT;
11911 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11912 {
11913 code = LSHIFTRT;
11914 sh = n - sh;
11915 }
11916
11917 /* DImode rotates need rldimi. */
11918 if (mode == DImode && code == ROTATE)
11919 return (ne == sh);
11920
11921 /* SImode rotates need rlwimi. */
11922 if (mode == SImode && code == ROTATE)
11923 return (nb < 32 && ne < 32 && sh < 32);
11924
11925 /* Wrap-around masks are only okay for rotates. */
11926 if (ne > nb)
11927 return false;
11928
11929 /* Don't allow ASHIFT if the mask is wrong for that. */
11930 if (code == ASHIFT && ne < sh)
11931 return false;
11932
11933 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11934 if the mask is wrong for that. */
11935 if (nb < 32 && ne < 32 && sh < 32
11936 && !(code == LSHIFTRT && nb >= 32 - sh))
11937 return true;
11938
11939 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11940 if the mask is wrong for that. */
11941 if (code == LSHIFTRT)
11942 sh = 64 - sh;
11943 if (ne == sh)
11944 return !(code == LSHIFTRT && nb >= sh);
11945
11946 return false;
11947 }
11948
11949 /* Return the instruction template for an insert with mask in mode MODE, with
11950 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11951
11952 const char *
11953 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11954 {
11955 int nb, ne;
11956
11957 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11958 gcc_unreachable ();
11959
11960 /* Prefer rldimi because rlwimi is cracked. */
11961 if (TARGET_POWERPC64
11962 && (!dot || mode == DImode)
11963 && GET_CODE (operands[4]) != LSHIFTRT
11964 && ne == INTVAL (operands[2]))
11965 {
11966 operands[3] = GEN_INT (63 - nb);
11967 if (dot)
11968 return "rldimi. %0,%1,%2,%3";
11969 return "rldimi %0,%1,%2,%3";
11970 }
11971
11972 if (nb < 32 && ne < 32)
11973 {
11974 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11975 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11976 operands[3] = GEN_INT (31 - nb);
11977 operands[4] = GEN_INT (31 - ne);
11978 if (dot)
11979 return "rlwimi. %0,%1,%2,%3,%4";
11980 return "rlwimi %0,%1,%2,%3,%4";
11981 }
11982
11983 gcc_unreachable ();
11984 }
11985
11986 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11987 using two machine instructions. */
11988
11989 bool
11990 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
11991 {
11992 /* There are two kinds of AND we can handle with two insns:
11993 1) those we can do with two rl* insn;
11994 2) ori[s];xori[s].
11995
11996 We do not handle that last case yet. */
11997
11998 /* If there is just one stretch of ones, we can do it. */
11999 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
12000 return true;
12001
12002 /* Otherwise, fill in the lowest "hole"; if we can do the result with
12003 one insn, we can do the whole thing with two. */
12004 unsigned HOST_WIDE_INT val = INTVAL (c);
12005 unsigned HOST_WIDE_INT bit1 = val & -val;
12006 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
12007 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
12008 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
12009 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
12010 }
12011
12012 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
12013 If EXPAND is true, split rotate-and-mask instructions we generate to
12014 their constituent parts as well (this is used during expand); if DOT
12015 is 1, make the last insn a record-form instruction clobbering the
12016 destination GPR and setting the CC reg (from operands[3]); if 2, set
12017 that GPR as well as the CC reg. */
12018
12019 void
12020 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
12021 {
12022 gcc_assert (!(expand && dot));
12023
12024 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
12025
12026 /* If it is one stretch of ones, it is DImode; shift left, mask, then
12027 shift right. This generates better code than doing the masks without
12028 shifts, or shifting first right and then left. */
12029 int nb, ne;
12030 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
12031 {
12032 gcc_assert (mode == DImode);
12033
12034 int shift = 63 - nb;
12035 if (expand)
12036 {
12037 rtx tmp1 = gen_reg_rtx (DImode);
12038 rtx tmp2 = gen_reg_rtx (DImode);
12039 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
12040 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
12041 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
12042 }
12043 else
12044 {
12045 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
12046 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
12047 emit_move_insn (operands[0], tmp);
12048 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
12049 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12050 }
12051 return;
12052 }
12053
12054 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
12055 that does the rest. */
12056 unsigned HOST_WIDE_INT bit1 = val & -val;
12057 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
12058 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
12059 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
12060
12061 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
12062 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
12063
12064 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
12065
12066 /* Two "no-rotate"-and-mask instructions, for SImode. */
12067 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
12068 {
12069 gcc_assert (mode == SImode);
12070
12071 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
12072 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
12073 emit_move_insn (reg, tmp);
12074 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
12075 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12076 return;
12077 }
12078
12079 gcc_assert (mode == DImode);
12080
12081 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
12082 insns; we have to do the first in SImode, because it wraps. */
12083 if (mask2 <= 0xffffffff
12084 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
12085 {
12086 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
12087 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
12088 GEN_INT (mask1));
12089 rtx reg_low = gen_lowpart (SImode, reg);
12090 emit_move_insn (reg_low, tmp);
12091 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
12092 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12093 return;
12094 }
12095
12096 /* Two rld* insns: rotate, clear the hole in the middle (which now is
12097 at the top end), rotate back and clear the other hole. */
12098 int right = exact_log2 (bit3);
12099 int left = 64 - right;
12100
12101 /* Rotate the mask too. */
12102 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
12103
12104 if (expand)
12105 {
12106 rtx tmp1 = gen_reg_rtx (DImode);
12107 rtx tmp2 = gen_reg_rtx (DImode);
12108 rtx tmp3 = gen_reg_rtx (DImode);
12109 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
12110 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
12111 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
12112 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
12113 }
12114 else
12115 {
12116 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
12117 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
12118 emit_move_insn (operands[0], tmp);
12119 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
12120 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
12121 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12122 }
12123 }
12124 \f
12125 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
12126 for lfq and stfq insns iff the registers are hard registers. */
12127
12128 int
12129 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
12130 {
12131 /* We might have been passed a SUBREG. */
12132 if (!REG_P (reg1) || !REG_P (reg2))
12133 return 0;
12134
12135 /* We might have been passed non floating point registers. */
12136 if (!FP_REGNO_P (REGNO (reg1))
12137 || !FP_REGNO_P (REGNO (reg2)))
12138 return 0;
12139
12140 return (REGNO (reg1) == REGNO (reg2) - 1);
12141 }
12142
12143 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
12144 addr1 and addr2 must be in consecutive memory locations
12145 (addr2 == addr1 + 8). */
12146
12147 int
12148 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
12149 {
12150 rtx addr1, addr2;
12151 unsigned int reg1, reg2;
12152 int offset1, offset2;
12153
12154 /* The mems cannot be volatile. */
12155 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
12156 return 0;
12157
12158 addr1 = XEXP (mem1, 0);
12159 addr2 = XEXP (mem2, 0);
12160
12161 /* Extract an offset (if used) from the first addr. */
12162 if (GET_CODE (addr1) == PLUS)
12163 {
12164 /* If not a REG, return zero. */
12165 if (!REG_P (XEXP (addr1, 0)))
12166 return 0;
12167 else
12168 {
12169 reg1 = REGNO (XEXP (addr1, 0));
12170 /* The offset must be constant! */
12171 if (!CONST_INT_P (XEXP (addr1, 1)))
12172 return 0;
12173 offset1 = INTVAL (XEXP (addr1, 1));
12174 }
12175 }
12176 else if (!REG_P (addr1))
12177 return 0;
12178 else
12179 {
12180 reg1 = REGNO (addr1);
12181 /* This was a simple (mem (reg)) expression. Offset is 0. */
12182 offset1 = 0;
12183 }
12184
12185 /* And now for the second addr. */
12186 if (GET_CODE (addr2) == PLUS)
12187 {
12188 /* If not a REG, return zero. */
12189 if (!REG_P (XEXP (addr2, 0)))
12190 return 0;
12191 else
12192 {
12193 reg2 = REGNO (XEXP (addr2, 0));
12194 /* The offset must be constant. */
12195 if (!CONST_INT_P (XEXP (addr2, 1)))
12196 return 0;
12197 offset2 = INTVAL (XEXP (addr2, 1));
12198 }
12199 }
12200 else if (!REG_P (addr2))
12201 return 0;
12202 else
12203 {
12204 reg2 = REGNO (addr2);
12205 /* This was a simple (mem (reg)) expression. Offset is 0. */
12206 offset2 = 0;
12207 }
12208
12209 /* Both of these must have the same base register. */
12210 if (reg1 != reg2)
12211 return 0;
12212
12213 /* The offset for the second addr must be 8 more than the first addr. */
12214 if (offset2 != offset1 + 8)
12215 return 0;
12216
12217 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
12218 instructions. */
12219 return 1;
12220 }
12221 \f
12222 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
12223 need to use DDmode, in all other cases we can use the same mode. */
12224 static machine_mode
12225 rs6000_secondary_memory_needed_mode (machine_mode mode)
12226 {
12227 if (lra_in_progress && mode == SDmode)
12228 return DDmode;
12229 return mode;
12230 }
12231
12232 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
12233 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
12234 only work on the traditional altivec registers, note if an altivec register
12235 was chosen. */
12236
12237 static enum rs6000_reg_type
12238 register_to_reg_type (rtx reg, bool *is_altivec)
12239 {
12240 HOST_WIDE_INT regno;
12241 enum reg_class rclass;
12242
12243 if (SUBREG_P (reg))
12244 reg = SUBREG_REG (reg);
12245
12246 if (!REG_P (reg))
12247 return NO_REG_TYPE;
12248
12249 regno = REGNO (reg);
12250 if (!HARD_REGISTER_NUM_P (regno))
12251 {
12252 if (!lra_in_progress && !reload_completed)
12253 return PSEUDO_REG_TYPE;
12254
12255 regno = true_regnum (reg);
12256 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
12257 return PSEUDO_REG_TYPE;
12258 }
12259
12260 gcc_assert (regno >= 0);
12261
12262 if (is_altivec && ALTIVEC_REGNO_P (regno))
12263 *is_altivec = true;
12264
12265 rclass = rs6000_regno_regclass[regno];
12266 return reg_class_to_reg_type[(int)rclass];
12267 }
12268
12269 /* Helper function to return the cost of adding a TOC entry address. */
12270
12271 static inline int
12272 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
12273 {
12274 int ret;
12275
12276 if (TARGET_CMODEL != CMODEL_SMALL)
12277 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
12278
12279 else
12280 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
12281
12282 return ret;
12283 }
12284
12285 /* Helper function for rs6000_secondary_reload to determine whether the memory
12286 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
12287 needs reloading. Return negative if the memory is not handled by the memory
12288 helper functions and to try a different reload method, 0 if no additional
12289 instructions are need, and positive to give the extra cost for the
12290 memory. */
12291
12292 static int
12293 rs6000_secondary_reload_memory (rtx addr,
12294 enum reg_class rclass,
12295 machine_mode mode)
12296 {
12297 int extra_cost = 0;
12298 rtx reg, and_arg, plus_arg0, plus_arg1;
12299 addr_mask_type addr_mask;
12300 const char *type = NULL;
12301 const char *fail_msg = NULL;
12302
12303 if (GPR_REG_CLASS_P (rclass))
12304 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12305
12306 else if (rclass == FLOAT_REGS)
12307 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12308
12309 else if (rclass == ALTIVEC_REGS)
12310 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12311
12312 /* For the combined VSX_REGS, turn off Altivec AND -16. */
12313 else if (rclass == VSX_REGS)
12314 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
12315 & ~RELOAD_REG_AND_M16);
12316
12317 /* If the register allocator hasn't made up its mind yet on the register
12318 class to use, settle on defaults to use. */
12319 else if (rclass == NO_REGS)
12320 {
12321 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
12322 & ~RELOAD_REG_AND_M16);
12323
12324 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
12325 addr_mask &= ~(RELOAD_REG_INDEXED
12326 | RELOAD_REG_PRE_INCDEC
12327 | RELOAD_REG_PRE_MODIFY);
12328 }
12329
12330 else
12331 addr_mask = 0;
12332
12333 /* If the register isn't valid in this register class, just return now. */
12334 if ((addr_mask & RELOAD_REG_VALID) == 0)
12335 {
12336 if (TARGET_DEBUG_ADDR)
12337 {
12338 fprintf (stderr,
12339 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12340 "not valid in class\n",
12341 GET_MODE_NAME (mode), reg_class_names[rclass]);
12342 debug_rtx (addr);
12343 }
12344
12345 return -1;
12346 }
12347
12348 switch (GET_CODE (addr))
12349 {
12350 /* Does the register class supports auto update forms for this mode? We
12351 don't need a scratch register, since the powerpc only supports
12352 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12353 case PRE_INC:
12354 case PRE_DEC:
12355 reg = XEXP (addr, 0);
12356 if (!base_reg_operand (addr, GET_MODE (reg)))
12357 {
12358 fail_msg = "no base register #1";
12359 extra_cost = -1;
12360 }
12361
12362 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12363 {
12364 extra_cost = 1;
12365 type = "update";
12366 }
12367 break;
12368
12369 case PRE_MODIFY:
12370 reg = XEXP (addr, 0);
12371 plus_arg1 = XEXP (addr, 1);
12372 if (!base_reg_operand (reg, GET_MODE (reg))
12373 || GET_CODE (plus_arg1) != PLUS
12374 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
12375 {
12376 fail_msg = "bad PRE_MODIFY";
12377 extra_cost = -1;
12378 }
12379
12380 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12381 {
12382 extra_cost = 1;
12383 type = "update";
12384 }
12385 break;
12386
12387 /* Do we need to simulate AND -16 to clear the bottom address bits used
12388 in VMX load/stores? Only allow the AND for vector sizes. */
12389 case AND:
12390 and_arg = XEXP (addr, 0);
12391 if (GET_MODE_SIZE (mode) != 16
12392 || !CONST_INT_P (XEXP (addr, 1))
12393 || INTVAL (XEXP (addr, 1)) != -16)
12394 {
12395 fail_msg = "bad Altivec AND #1";
12396 extra_cost = -1;
12397 }
12398
12399 if (rclass != ALTIVEC_REGS)
12400 {
12401 if (legitimate_indirect_address_p (and_arg, false))
12402 extra_cost = 1;
12403
12404 else if (legitimate_indexed_address_p (and_arg, false))
12405 extra_cost = 2;
12406
12407 else
12408 {
12409 fail_msg = "bad Altivec AND #2";
12410 extra_cost = -1;
12411 }
12412
12413 type = "and";
12414 }
12415 break;
12416
12417 /* If this is an indirect address, make sure it is a base register. */
12418 case REG:
12419 case SUBREG:
12420 if (!legitimate_indirect_address_p (addr, false))
12421 {
12422 extra_cost = 1;
12423 type = "move";
12424 }
12425 break;
12426
12427 /* If this is an indexed address, make sure the register class can handle
12428 indexed addresses for this mode. */
12429 case PLUS:
12430 plus_arg0 = XEXP (addr, 0);
12431 plus_arg1 = XEXP (addr, 1);
12432
12433 /* (plus (plus (reg) (constant)) (constant)) is generated during
12434 push_reload processing, so handle it now. */
12435 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
12436 {
12437 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12438 {
12439 extra_cost = 1;
12440 type = "offset";
12441 }
12442 }
12443
12444 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12445 push_reload processing, so handle it now. */
12446 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
12447 {
12448 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12449 {
12450 extra_cost = 1;
12451 type = "indexed #2";
12452 }
12453 }
12454
12455 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
12456 {
12457 fail_msg = "no base register #2";
12458 extra_cost = -1;
12459 }
12460
12461 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
12462 {
12463 if ((addr_mask & RELOAD_REG_INDEXED) == 0
12464 || !legitimate_indexed_address_p (addr, false))
12465 {
12466 extra_cost = 1;
12467 type = "indexed";
12468 }
12469 }
12470
12471 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
12472 && CONST_INT_P (plus_arg1))
12473 {
12474 if (!quad_address_offset_p (INTVAL (plus_arg1)))
12475 {
12476 extra_cost = 1;
12477 type = "vector d-form offset";
12478 }
12479 }
12480
12481 /* Make sure the register class can handle offset addresses. */
12482 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12483 {
12484 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12485 {
12486 extra_cost = 1;
12487 type = "offset #2";
12488 }
12489 }
12490
12491 else
12492 {
12493 fail_msg = "bad PLUS";
12494 extra_cost = -1;
12495 }
12496
12497 break;
12498
12499 case LO_SUM:
12500 /* Quad offsets are restricted and can't handle normal addresses. */
12501 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12502 {
12503 extra_cost = -1;
12504 type = "vector d-form lo_sum";
12505 }
12506
12507 else if (!legitimate_lo_sum_address_p (mode, addr, false))
12508 {
12509 fail_msg = "bad LO_SUM";
12510 extra_cost = -1;
12511 }
12512
12513 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12514 {
12515 extra_cost = 1;
12516 type = "lo_sum";
12517 }
12518 break;
12519
12520 /* Static addresses need to create a TOC entry. */
12521 case CONST:
12522 case SYMBOL_REF:
12523 case LABEL_REF:
12524 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12525 {
12526 extra_cost = -1;
12527 type = "vector d-form lo_sum #2";
12528 }
12529
12530 else
12531 {
12532 type = "address";
12533 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
12534 }
12535 break;
12536
12537 /* TOC references look like offsetable memory. */
12538 case UNSPEC:
12539 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
12540 {
12541 fail_msg = "bad UNSPEC";
12542 extra_cost = -1;
12543 }
12544
12545 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12546 {
12547 extra_cost = -1;
12548 type = "vector d-form lo_sum #3";
12549 }
12550
12551 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12552 {
12553 extra_cost = 1;
12554 type = "toc reference";
12555 }
12556 break;
12557
12558 default:
12559 {
12560 fail_msg = "bad address";
12561 extra_cost = -1;
12562 }
12563 }
12564
12565 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
12566 {
12567 if (extra_cost < 0)
12568 fprintf (stderr,
12569 "rs6000_secondary_reload_memory error: mode = %s, "
12570 "class = %s, addr_mask = '%s', %s\n",
12571 GET_MODE_NAME (mode),
12572 reg_class_names[rclass],
12573 rs6000_debug_addr_mask (addr_mask, false),
12574 (fail_msg != NULL) ? fail_msg : "<bad address>");
12575
12576 else
12577 fprintf (stderr,
12578 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12579 "addr_mask = '%s', extra cost = %d, %s\n",
12580 GET_MODE_NAME (mode),
12581 reg_class_names[rclass],
12582 rs6000_debug_addr_mask (addr_mask, false),
12583 extra_cost,
12584 (type) ? type : "<none>");
12585
12586 debug_rtx (addr);
12587 }
12588
12589 return extra_cost;
12590 }
12591
12592 /* Helper function for rs6000_secondary_reload to return true if a move to a
12593 different register classe is really a simple move. */
12594
12595 static bool
12596 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
12597 enum rs6000_reg_type from_type,
12598 machine_mode mode)
12599 {
12600 int size = GET_MODE_SIZE (mode);
12601
12602 /* Add support for various direct moves available. In this function, we only
12603 look at cases where we don't need any extra registers, and one or more
12604 simple move insns are issued. Originally small integers are not allowed
12605 in FPR/VSX registers. Single precision binary floating is not a simple
12606 move because we need to convert to the single precision memory layout.
12607 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12608 need special direct move handling, which we do not support yet. */
12609 if (TARGET_DIRECT_MOVE
12610 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12611 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
12612 {
12613 if (TARGET_POWERPC64)
12614 {
12615 /* ISA 2.07: MTVSRD or MVFVSRD. */
12616 if (size == 8)
12617 return true;
12618
12619 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12620 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
12621 return true;
12622 }
12623
12624 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12625 if (TARGET_P8_VECTOR)
12626 {
12627 if (mode == SImode)
12628 return true;
12629
12630 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
12631 return true;
12632 }
12633
12634 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12635 if (mode == SDmode)
12636 return true;
12637 }
12638
12639 /* Move to/from SPR. */
12640 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
12641 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
12642 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
12643 return true;
12644
12645 return false;
12646 }
12647
12648 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12649 special direct moves that involve allocating an extra register, return the
12650 insn code of the helper function if there is such a function or
12651 CODE_FOR_nothing if not. */
12652
12653 static bool
12654 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
12655 enum rs6000_reg_type from_type,
12656 machine_mode mode,
12657 secondary_reload_info *sri,
12658 bool altivec_p)
12659 {
12660 bool ret = false;
12661 enum insn_code icode = CODE_FOR_nothing;
12662 int cost = 0;
12663 int size = GET_MODE_SIZE (mode);
12664
12665 if (TARGET_POWERPC64 && size == 16)
12666 {
12667 /* Handle moving 128-bit values from GPRs to VSX point registers on
12668 ISA 2.07 (power8, power9) when running in 64-bit mode using
12669 XXPERMDI to glue the two 64-bit values back together. */
12670 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12671 {
12672 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
12673 icode = reg_addr[mode].reload_vsx_gpr;
12674 }
12675
12676 /* Handle moving 128-bit values from VSX point registers to GPRs on
12677 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12678 bottom 64-bit value. */
12679 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12680 {
12681 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
12682 icode = reg_addr[mode].reload_gpr_vsx;
12683 }
12684 }
12685
12686 else if (TARGET_POWERPC64 && mode == SFmode)
12687 {
12688 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12689 {
12690 cost = 3; /* xscvdpspn, mfvsrd, and. */
12691 icode = reg_addr[mode].reload_gpr_vsx;
12692 }
12693
12694 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12695 {
12696 cost = 2; /* mtvsrz, xscvspdpn. */
12697 icode = reg_addr[mode].reload_vsx_gpr;
12698 }
12699 }
12700
12701 else if (!TARGET_POWERPC64 && size == 8)
12702 {
12703 /* Handle moving 64-bit values from GPRs to floating point registers on
12704 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12705 32-bit values back together. Altivec register classes must be handled
12706 specially since a different instruction is used, and the secondary
12707 reload support requires a single instruction class in the scratch
12708 register constraint. However, right now TFmode is not allowed in
12709 Altivec registers, so the pattern will never match. */
12710 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
12711 {
12712 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
12713 icode = reg_addr[mode].reload_fpr_gpr;
12714 }
12715 }
12716
12717 if (icode != CODE_FOR_nothing)
12718 {
12719 ret = true;
12720 if (sri)
12721 {
12722 sri->icode = icode;
12723 sri->extra_cost = cost;
12724 }
12725 }
12726
12727 return ret;
12728 }
12729
12730 /* Return whether a move between two register classes can be done either
12731 directly (simple move) or via a pattern that uses a single extra temporary
12732 (using ISA 2.07's direct move in this case. */
12733
12734 static bool
12735 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
12736 enum rs6000_reg_type from_type,
12737 machine_mode mode,
12738 secondary_reload_info *sri,
12739 bool altivec_p)
12740 {
12741 /* Fall back to load/store reloads if either type is not a register. */
12742 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
12743 return false;
12744
12745 /* If we haven't allocated registers yet, assume the move can be done for the
12746 standard register types. */
12747 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
12748 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
12749 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
12750 return true;
12751
12752 /* Moves to the same set of registers is a simple move for non-specialized
12753 registers. */
12754 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
12755 return true;
12756
12757 /* Check whether a simple move can be done directly. */
12758 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
12759 {
12760 if (sri)
12761 {
12762 sri->icode = CODE_FOR_nothing;
12763 sri->extra_cost = 0;
12764 }
12765 return true;
12766 }
12767
12768 /* Now check if we can do it in a few steps. */
12769 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
12770 altivec_p);
12771 }
12772
12773 /* Inform reload about cases where moving X with a mode MODE to a register in
12774 RCLASS requires an extra scratch or immediate register. Return the class
12775 needed for the immediate register.
12776
12777 For VSX and Altivec, we may need a register to convert sp+offset into
12778 reg+sp.
12779
12780 For misaligned 64-bit gpr loads and stores we need a register to
12781 convert an offset address to indirect. */
12782
12783 static reg_class_t
12784 rs6000_secondary_reload (bool in_p,
12785 rtx x,
12786 reg_class_t rclass_i,
12787 machine_mode mode,
12788 secondary_reload_info *sri)
12789 {
12790 enum reg_class rclass = (enum reg_class) rclass_i;
12791 reg_class_t ret = ALL_REGS;
12792 enum insn_code icode;
12793 bool default_p = false;
12794 bool done_p = false;
12795
12796 /* Allow subreg of memory before/during reload. */
12797 bool memory_p = (MEM_P (x)
12798 || (!reload_completed && SUBREG_P (x)
12799 && MEM_P (SUBREG_REG (x))));
12800
12801 sri->icode = CODE_FOR_nothing;
12802 sri->t_icode = CODE_FOR_nothing;
12803 sri->extra_cost = 0;
12804 icode = ((in_p)
12805 ? reg_addr[mode].reload_load
12806 : reg_addr[mode].reload_store);
12807
12808 if (REG_P (x) || register_operand (x, mode))
12809 {
12810 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
12811 bool altivec_p = (rclass == ALTIVEC_REGS);
12812 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
12813
12814 if (!in_p)
12815 std::swap (to_type, from_type);
12816
12817 /* Can we do a direct move of some sort? */
12818 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
12819 altivec_p))
12820 {
12821 icode = (enum insn_code)sri->icode;
12822 default_p = false;
12823 done_p = true;
12824 ret = NO_REGS;
12825 }
12826 }
12827
12828 /* Make sure 0.0 is not reloaded or forced into memory. */
12829 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
12830 {
12831 ret = NO_REGS;
12832 default_p = false;
12833 done_p = true;
12834 }
12835
12836 /* If this is a scalar floating point value and we want to load it into the
12837 traditional Altivec registers, do it via a move via a traditional floating
12838 point register, unless we have D-form addressing. Also make sure that
12839 non-zero constants use a FPR. */
12840 if (!done_p && reg_addr[mode].scalar_in_vmx_p
12841 && !mode_supports_vmx_dform (mode)
12842 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12843 && (memory_p || CONST_DOUBLE_P (x)))
12844 {
12845 ret = FLOAT_REGS;
12846 default_p = false;
12847 done_p = true;
12848 }
12849
12850 /* Handle reload of load/stores if we have reload helper functions. */
12851 if (!done_p && icode != CODE_FOR_nothing && memory_p)
12852 {
12853 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12854 mode);
12855
12856 if (extra_cost >= 0)
12857 {
12858 done_p = true;
12859 ret = NO_REGS;
12860 if (extra_cost > 0)
12861 {
12862 sri->extra_cost = extra_cost;
12863 sri->icode = icode;
12864 }
12865 }
12866 }
12867
12868 /* Handle unaligned loads and stores of integer registers. */
12869 if (!done_p && TARGET_POWERPC64
12870 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12871 && memory_p
12872 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12873 {
12874 rtx addr = XEXP (x, 0);
12875 rtx off = address_offset (addr);
12876
12877 if (off != NULL_RTX)
12878 {
12879 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12880 unsigned HOST_WIDE_INT offset = INTVAL (off);
12881
12882 /* We need a secondary reload when our legitimate_address_p
12883 says the address is good (as otherwise the entire address
12884 will be reloaded), and the offset is not a multiple of
12885 four or we have an address wrap. Address wrap will only
12886 occur for LO_SUMs since legitimate_offset_address_p
12887 rejects addresses for 16-byte mems that will wrap. */
12888 if (GET_CODE (addr) == LO_SUM
12889 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12890 && ((offset & 3) != 0
12891 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12892 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12893 && (offset & 3) != 0))
12894 {
12895 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12896 if (in_p)
12897 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12898 : CODE_FOR_reload_di_load);
12899 else
12900 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12901 : CODE_FOR_reload_di_store);
12902 sri->extra_cost = 2;
12903 ret = NO_REGS;
12904 done_p = true;
12905 }
12906 else
12907 default_p = true;
12908 }
12909 else
12910 default_p = true;
12911 }
12912
12913 if (!done_p && !TARGET_POWERPC64
12914 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12915 && memory_p
12916 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12917 {
12918 rtx addr = XEXP (x, 0);
12919 rtx off = address_offset (addr);
12920
12921 if (off != NULL_RTX)
12922 {
12923 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12924 unsigned HOST_WIDE_INT offset = INTVAL (off);
12925
12926 /* We need a secondary reload when our legitimate_address_p
12927 says the address is good (as otherwise the entire address
12928 will be reloaded), and we have a wrap.
12929
12930 legitimate_lo_sum_address_p allows LO_SUM addresses to
12931 have any offset so test for wrap in the low 16 bits.
12932
12933 legitimate_offset_address_p checks for the range
12934 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12935 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12936 [0x7ff4,0x7fff] respectively, so test for the
12937 intersection of these ranges, [0x7ffc,0x7fff] and
12938 [0x7ff4,0x7ff7] respectively.
12939
12940 Note that the address we see here may have been
12941 manipulated by legitimize_reload_address. */
12942 if (GET_CODE (addr) == LO_SUM
12943 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12944 : offset - (0x8000 - extra) < UNITS_PER_WORD)
12945 {
12946 if (in_p)
12947 sri->icode = CODE_FOR_reload_si_load;
12948 else
12949 sri->icode = CODE_FOR_reload_si_store;
12950 sri->extra_cost = 2;
12951 ret = NO_REGS;
12952 done_p = true;
12953 }
12954 else
12955 default_p = true;
12956 }
12957 else
12958 default_p = true;
12959 }
12960
12961 if (!done_p)
12962 default_p = true;
12963
12964 if (default_p)
12965 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12966
12967 gcc_assert (ret != ALL_REGS);
12968
12969 if (TARGET_DEBUG_ADDR)
12970 {
12971 fprintf (stderr,
12972 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12973 "mode = %s",
12974 reg_class_names[ret],
12975 in_p ? "true" : "false",
12976 reg_class_names[rclass],
12977 GET_MODE_NAME (mode));
12978
12979 if (reload_completed)
12980 fputs (", after reload", stderr);
12981
12982 if (!done_p)
12983 fputs (", done_p not set", stderr);
12984
12985 if (default_p)
12986 fputs (", default secondary reload", stderr);
12987
12988 if (sri->icode != CODE_FOR_nothing)
12989 fprintf (stderr, ", reload func = %s, extra cost = %d",
12990 insn_data[sri->icode].name, sri->extra_cost);
12991
12992 else if (sri->extra_cost > 0)
12993 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
12994
12995 fputs ("\n", stderr);
12996 debug_rtx (x);
12997 }
12998
12999 return ret;
13000 }
13001
13002 /* Better tracing for rs6000_secondary_reload_inner. */
13003
13004 static void
13005 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
13006 bool store_p)
13007 {
13008 rtx set, clobber;
13009
13010 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
13011
13012 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
13013 store_p ? "store" : "load");
13014
13015 if (store_p)
13016 set = gen_rtx_SET (mem, reg);
13017 else
13018 set = gen_rtx_SET (reg, mem);
13019
13020 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
13021 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
13022 }
13023
13024 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
13025 ATTRIBUTE_NORETURN;
13026
13027 static void
13028 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
13029 bool store_p)
13030 {
13031 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
13032 gcc_unreachable ();
13033 }
13034
13035 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
13036 reload helper functions. These were identified in
13037 rs6000_secondary_reload_memory, and if reload decided to use the secondary
13038 reload, it calls the insns:
13039 reload_<RELOAD:mode>_<P:mptrsize>_store
13040 reload_<RELOAD:mode>_<P:mptrsize>_load
13041
13042 which in turn calls this function, to do whatever is necessary to create
13043 valid addresses. */
13044
13045 void
13046 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
13047 {
13048 int regno = true_regnum (reg);
13049 machine_mode mode = GET_MODE (reg);
13050 addr_mask_type addr_mask;
13051 rtx addr;
13052 rtx new_addr;
13053 rtx op_reg, op0, op1;
13054 rtx and_op;
13055 rtx cc_clobber;
13056 rtvec rv;
13057
13058 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
13059 || !base_reg_operand (scratch, GET_MODE (scratch)))
13060 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13061
13062 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
13063 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
13064
13065 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
13066 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
13067
13068 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
13069 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
13070
13071 else
13072 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13073
13074 /* Make sure the mode is valid in this register class. */
13075 if ((addr_mask & RELOAD_REG_VALID) == 0)
13076 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13077
13078 if (TARGET_DEBUG_ADDR)
13079 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
13080
13081 new_addr = addr = XEXP (mem, 0);
13082 switch (GET_CODE (addr))
13083 {
13084 /* Does the register class support auto update forms for this mode? If
13085 not, do the update now. We don't need a scratch register, since the
13086 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
13087 case PRE_INC:
13088 case PRE_DEC:
13089 op_reg = XEXP (addr, 0);
13090 if (!base_reg_operand (op_reg, Pmode))
13091 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13092
13093 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
13094 {
13095 int delta = GET_MODE_SIZE (mode);
13096 if (GET_CODE (addr) == PRE_DEC)
13097 delta = -delta;
13098 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
13099 new_addr = op_reg;
13100 }
13101 break;
13102
13103 case PRE_MODIFY:
13104 op0 = XEXP (addr, 0);
13105 op1 = XEXP (addr, 1);
13106 if (!base_reg_operand (op0, Pmode)
13107 || GET_CODE (op1) != PLUS
13108 || !rtx_equal_p (op0, XEXP (op1, 0)))
13109 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13110
13111 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
13112 {
13113 emit_insn (gen_rtx_SET (op0, op1));
13114 new_addr = reg;
13115 }
13116 break;
13117
13118 /* Do we need to simulate AND -16 to clear the bottom address bits used
13119 in VMX load/stores? */
13120 case AND:
13121 op0 = XEXP (addr, 0);
13122 op1 = XEXP (addr, 1);
13123 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
13124 {
13125 if (REG_P (op0) || SUBREG_P (op0))
13126 op_reg = op0;
13127
13128 else if (GET_CODE (op1) == PLUS)
13129 {
13130 emit_insn (gen_rtx_SET (scratch, op1));
13131 op_reg = scratch;
13132 }
13133
13134 else
13135 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13136
13137 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
13138 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
13139 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
13140 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
13141 new_addr = scratch;
13142 }
13143 break;
13144
13145 /* If this is an indirect address, make sure it is a base register. */
13146 case REG:
13147 case SUBREG:
13148 if (!base_reg_operand (addr, GET_MODE (addr)))
13149 {
13150 emit_insn (gen_rtx_SET (scratch, addr));
13151 new_addr = scratch;
13152 }
13153 break;
13154
13155 /* If this is an indexed address, make sure the register class can handle
13156 indexed addresses for this mode. */
13157 case PLUS:
13158 op0 = XEXP (addr, 0);
13159 op1 = XEXP (addr, 1);
13160 if (!base_reg_operand (op0, Pmode))
13161 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13162
13163 else if (int_reg_operand (op1, Pmode))
13164 {
13165 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
13166 {
13167 emit_insn (gen_rtx_SET (scratch, addr));
13168 new_addr = scratch;
13169 }
13170 }
13171
13172 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
13173 {
13174 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
13175 || !quad_address_p (addr, mode, false))
13176 {
13177 emit_insn (gen_rtx_SET (scratch, addr));
13178 new_addr = scratch;
13179 }
13180 }
13181
13182 /* Make sure the register class can handle offset addresses. */
13183 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
13184 {
13185 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
13186 {
13187 emit_insn (gen_rtx_SET (scratch, addr));
13188 new_addr = scratch;
13189 }
13190 }
13191
13192 else
13193 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13194
13195 break;
13196
13197 case LO_SUM:
13198 op0 = XEXP (addr, 0);
13199 op1 = XEXP (addr, 1);
13200 if (!base_reg_operand (op0, Pmode))
13201 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13202
13203 else if (int_reg_operand (op1, Pmode))
13204 {
13205 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
13206 {
13207 emit_insn (gen_rtx_SET (scratch, addr));
13208 new_addr = scratch;
13209 }
13210 }
13211
13212 /* Quad offsets are restricted and can't handle normal addresses. */
13213 else if (mode_supports_dq_form (mode))
13214 {
13215 emit_insn (gen_rtx_SET (scratch, addr));
13216 new_addr = scratch;
13217 }
13218
13219 /* Make sure the register class can handle offset addresses. */
13220 else if (legitimate_lo_sum_address_p (mode, addr, false))
13221 {
13222 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
13223 {
13224 emit_insn (gen_rtx_SET (scratch, addr));
13225 new_addr = scratch;
13226 }
13227 }
13228
13229 else
13230 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13231
13232 break;
13233
13234 case SYMBOL_REF:
13235 case CONST:
13236 case LABEL_REF:
13237 rs6000_emit_move (scratch, addr, Pmode);
13238 new_addr = scratch;
13239 break;
13240
13241 default:
13242 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13243 }
13244
13245 /* Adjust the address if it changed. */
13246 if (addr != new_addr)
13247 {
13248 mem = replace_equiv_address_nv (mem, new_addr);
13249 if (TARGET_DEBUG_ADDR)
13250 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
13251 }
13252
13253 /* Now create the move. */
13254 if (store_p)
13255 emit_insn (gen_rtx_SET (mem, reg));
13256 else
13257 emit_insn (gen_rtx_SET (reg, mem));
13258
13259 return;
13260 }
13261
13262 /* Convert reloads involving 64-bit gprs and misaligned offset
13263 addressing, or multiple 32-bit gprs and offsets that are too large,
13264 to use indirect addressing. */
13265
13266 void
13267 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
13268 {
13269 int regno = true_regnum (reg);
13270 enum reg_class rclass;
13271 rtx addr;
13272 rtx scratch_or_premodify = scratch;
13273
13274 if (TARGET_DEBUG_ADDR)
13275 {
13276 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
13277 store_p ? "store" : "load");
13278 fprintf (stderr, "reg:\n");
13279 debug_rtx (reg);
13280 fprintf (stderr, "mem:\n");
13281 debug_rtx (mem);
13282 fprintf (stderr, "scratch:\n");
13283 debug_rtx (scratch);
13284 }
13285
13286 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
13287 gcc_assert (MEM_P (mem));
13288 rclass = REGNO_REG_CLASS (regno);
13289 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
13290 addr = XEXP (mem, 0);
13291
13292 if (GET_CODE (addr) == PRE_MODIFY)
13293 {
13294 gcc_assert (REG_P (XEXP (addr, 0))
13295 && GET_CODE (XEXP (addr, 1)) == PLUS
13296 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
13297 scratch_or_premodify = XEXP (addr, 0);
13298 addr = XEXP (addr, 1);
13299 }
13300 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
13301
13302 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
13303
13304 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
13305
13306 /* Now create the move. */
13307 if (store_p)
13308 emit_insn (gen_rtx_SET (mem, reg));
13309 else
13310 emit_insn (gen_rtx_SET (reg, mem));
13311
13312 return;
13313 }
13314
13315 /* Given an rtx X being reloaded into a reg required to be
13316 in class CLASS, return the class of reg to actually use.
13317 In general this is just CLASS; but on some machines
13318 in some cases it is preferable to use a more restrictive class.
13319
13320 On the RS/6000, we have to return NO_REGS when we want to reload a
13321 floating-point CONST_DOUBLE to force it to be copied to memory.
13322
13323 We also don't want to reload integer values into floating-point
13324 registers if we can at all help it. In fact, this can
13325 cause reload to die, if it tries to generate a reload of CTR
13326 into a FP register and discovers it doesn't have the memory location
13327 required.
13328
13329 ??? Would it be a good idea to have reload do the converse, that is
13330 try to reload floating modes into FP registers if possible?
13331 */
13332
13333 static enum reg_class
13334 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
13335 {
13336 machine_mode mode = GET_MODE (x);
13337 bool is_constant = CONSTANT_P (x);
13338
13339 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
13340 reload class for it. */
13341 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13342 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
13343 return NO_REGS;
13344
13345 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
13346 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
13347 return NO_REGS;
13348
13349 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13350 the reloading of address expressions using PLUS into floating point
13351 registers. */
13352 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
13353 {
13354 if (is_constant)
13355 {
13356 /* Zero is always allowed in all VSX registers. */
13357 if (x == CONST0_RTX (mode))
13358 return rclass;
13359
13360 /* If this is a vector constant that can be formed with a few Altivec
13361 instructions, we want altivec registers. */
13362 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
13363 return ALTIVEC_REGS;
13364
13365 /* If this is an integer constant that can easily be loaded into
13366 vector registers, allow it. */
13367 if (CONST_INT_P (x))
13368 {
13369 HOST_WIDE_INT value = INTVAL (x);
13370
13371 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13372 2.06 can generate it in the Altivec registers with
13373 VSPLTI<x>. */
13374 if (value == -1)
13375 {
13376 if (TARGET_P8_VECTOR)
13377 return rclass;
13378 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13379 return ALTIVEC_REGS;
13380 else
13381 return NO_REGS;
13382 }
13383
13384 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13385 a sign extend in the Altivec registers. */
13386 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
13387 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
13388 return ALTIVEC_REGS;
13389 }
13390
13391 /* Force constant to memory. */
13392 return NO_REGS;
13393 }
13394
13395 /* D-form addressing can easily reload the value. */
13396 if (mode_supports_vmx_dform (mode)
13397 || mode_supports_dq_form (mode))
13398 return rclass;
13399
13400 /* If this is a scalar floating point value and we don't have D-form
13401 addressing, prefer the traditional floating point registers so that we
13402 can use D-form (register+offset) addressing. */
13403 if (rclass == VSX_REGS
13404 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
13405 return FLOAT_REGS;
13406
13407 /* Prefer the Altivec registers if Altivec is handling the vector
13408 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13409 loads. */
13410 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
13411 || mode == V1TImode)
13412 return ALTIVEC_REGS;
13413
13414 return rclass;
13415 }
13416
13417 if (is_constant || GET_CODE (x) == PLUS)
13418 {
13419 if (reg_class_subset_p (GENERAL_REGS, rclass))
13420 return GENERAL_REGS;
13421 if (reg_class_subset_p (BASE_REGS, rclass))
13422 return BASE_REGS;
13423 return NO_REGS;
13424 }
13425
13426 /* For the vector pair and vector quad modes, prefer their natural register
13427 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13428 the GPR registers. */
13429 if (rclass == GEN_OR_FLOAT_REGS)
13430 {
13431 if (mode == OOmode)
13432 return VSX_REGS;
13433
13434 if (mode == XOmode)
13435 return FLOAT_REGS;
13436
13437 if (GET_MODE_CLASS (mode) == MODE_INT)
13438 return GENERAL_REGS;
13439 }
13440
13441 return rclass;
13442 }
13443
13444 /* Debug version of rs6000_preferred_reload_class. */
13445 static enum reg_class
13446 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
13447 {
13448 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
13449
13450 fprintf (stderr,
13451 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13452 "mode = %s, x:\n",
13453 reg_class_names[ret], reg_class_names[rclass],
13454 GET_MODE_NAME (GET_MODE (x)));
13455 debug_rtx (x);
13456
13457 return ret;
13458 }
13459
13460 /* If we are copying between FP or AltiVec registers and anything else, we need
13461 a memory location. The exception is when we are targeting ppc64 and the
13462 move to/from fpr to gpr instructions are available. Also, under VSX, you
13463 can copy vector registers from the FP register set to the Altivec register
13464 set and vice versa. */
13465
13466 static bool
13467 rs6000_secondary_memory_needed (machine_mode mode,
13468 reg_class_t from_class,
13469 reg_class_t to_class)
13470 {
13471 enum rs6000_reg_type from_type, to_type;
13472 bool altivec_p = ((from_class == ALTIVEC_REGS)
13473 || (to_class == ALTIVEC_REGS));
13474
13475 /* If a simple/direct move is available, we don't need secondary memory */
13476 from_type = reg_class_to_reg_type[(int)from_class];
13477 to_type = reg_class_to_reg_type[(int)to_class];
13478
13479 if (rs6000_secondary_reload_move (to_type, from_type, mode,
13480 (secondary_reload_info *)0, altivec_p))
13481 return false;
13482
13483 /* If we have a floating point or vector register class, we need to use
13484 memory to transfer the data. */
13485 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
13486 return true;
13487
13488 return false;
13489 }
13490
13491 /* Debug version of rs6000_secondary_memory_needed. */
13492 static bool
13493 rs6000_debug_secondary_memory_needed (machine_mode mode,
13494 reg_class_t from_class,
13495 reg_class_t to_class)
13496 {
13497 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
13498
13499 fprintf (stderr,
13500 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13501 "to_class = %s, mode = %s\n",
13502 ret ? "true" : "false",
13503 reg_class_names[from_class],
13504 reg_class_names[to_class],
13505 GET_MODE_NAME (mode));
13506
13507 return ret;
13508 }
13509
13510 /* Return the register class of a scratch register needed to copy IN into
13511 or out of a register in RCLASS in MODE. If it can be done directly,
13512 NO_REGS is returned. */
13513
13514 static enum reg_class
13515 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
13516 rtx in)
13517 {
13518 int regno;
13519
13520 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
13521 #if TARGET_MACHO
13522 && MACHOPIC_INDIRECT
13523 #endif
13524 ))
13525 {
13526 /* We cannot copy a symbolic operand directly into anything
13527 other than BASE_REGS for TARGET_ELF. So indicate that a
13528 register from BASE_REGS is needed as an intermediate
13529 register.
13530
13531 On Darwin, pic addresses require a load from memory, which
13532 needs a base register. */
13533 if (rclass != BASE_REGS
13534 && (SYMBOL_REF_P (in)
13535 || GET_CODE (in) == HIGH
13536 || GET_CODE (in) == LABEL_REF
13537 || GET_CODE (in) == CONST))
13538 return BASE_REGS;
13539 }
13540
13541 if (REG_P (in))
13542 {
13543 regno = REGNO (in);
13544 if (!HARD_REGISTER_NUM_P (regno))
13545 {
13546 regno = true_regnum (in);
13547 if (!HARD_REGISTER_NUM_P (regno))
13548 regno = -1;
13549 }
13550 }
13551 else if (SUBREG_P (in))
13552 {
13553 regno = true_regnum (in);
13554 if (!HARD_REGISTER_NUM_P (regno))
13555 regno = -1;
13556 }
13557 else
13558 regno = -1;
13559
13560 /* If we have VSX register moves, prefer moving scalar values between
13561 Altivec registers and GPR by going via an FPR (and then via memory)
13562 instead of reloading the secondary memory address for Altivec moves. */
13563 if (TARGET_VSX
13564 && GET_MODE_SIZE (mode) < 16
13565 && !mode_supports_vmx_dform (mode)
13566 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
13567 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
13568 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
13569 && (regno >= 0 && INT_REGNO_P (regno)))))
13570 return FLOAT_REGS;
13571
13572 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13573 into anything. */
13574 if (rclass == GENERAL_REGS || rclass == BASE_REGS
13575 || (regno >= 0 && INT_REGNO_P (regno)))
13576 return NO_REGS;
13577
13578 /* Constants, memory, and VSX registers can go into VSX registers (both the
13579 traditional floating point and the altivec registers). */
13580 if (rclass == VSX_REGS
13581 && (regno == -1 || VSX_REGNO_P (regno)))
13582 return NO_REGS;
13583
13584 /* Constants, memory, and FP registers can go into FP registers. */
13585 if ((regno == -1 || FP_REGNO_P (regno))
13586 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
13587 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
13588
13589 /* Memory, and AltiVec registers can go into AltiVec registers. */
13590 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
13591 && rclass == ALTIVEC_REGS)
13592 return NO_REGS;
13593
13594 /* We can copy among the CR registers. */
13595 if ((rclass == CR_REGS || rclass == CR0_REGS)
13596 && regno >= 0 && CR_REGNO_P (regno))
13597 return NO_REGS;
13598
13599 /* Otherwise, we need GENERAL_REGS. */
13600 return GENERAL_REGS;
13601 }
13602
13603 /* Debug version of rs6000_secondary_reload_class. */
13604 static enum reg_class
13605 rs6000_debug_secondary_reload_class (enum reg_class rclass,
13606 machine_mode mode, rtx in)
13607 {
13608 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
13609 fprintf (stderr,
13610 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13611 "mode = %s, input rtx:\n",
13612 reg_class_names[ret], reg_class_names[rclass],
13613 GET_MODE_NAME (mode));
13614 debug_rtx (in);
13615
13616 return ret;
13617 }
13618
13619 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13620
13621 static bool
13622 rs6000_can_change_mode_class (machine_mode from,
13623 machine_mode to,
13624 reg_class_t rclass)
13625 {
13626 unsigned from_size = GET_MODE_SIZE (from);
13627 unsigned to_size = GET_MODE_SIZE (to);
13628
13629 if (from_size != to_size)
13630 {
13631 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
13632
13633 if (reg_classes_intersect_p (xclass, rclass))
13634 {
13635 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
13636 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
13637 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
13638 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
13639
13640 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13641 single register under VSX because the scalar part of the register
13642 is in the upper 64-bits, and not the lower 64-bits. Types like
13643 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13644 IEEE floating point can't overlap, and neither can small
13645 values. */
13646
13647 if (to_float128_vector_p && from_float128_vector_p)
13648 return true;
13649
13650 else if (to_float128_vector_p || from_float128_vector_p)
13651 return false;
13652
13653 /* TDmode in floating-mode registers must always go into a register
13654 pair with the most significant word in the even-numbered register
13655 to match ISA requirements. In little-endian mode, this does not
13656 match subreg numbering, so we cannot allow subregs. */
13657 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
13658 return false;
13659
13660 /* Allow SD<->DD changes, since SDmode values are stored in
13661 the low half of the DDmode, just like target-independent
13662 code expects. We need to allow at least SD->DD since
13663 rs6000_secondary_memory_needed_mode asks for that change
13664 to be made for SD reloads. */
13665 if ((to == DDmode && from == SDmode)
13666 || (to == SDmode && from == DDmode))
13667 return true;
13668
13669 if (from_size < 8 || to_size < 8)
13670 return false;
13671
13672 if (from_size == 8 && (8 * to_nregs) != to_size)
13673 return false;
13674
13675 if (to_size == 8 && (8 * from_nregs) != from_size)
13676 return false;
13677
13678 return true;
13679 }
13680 else
13681 return true;
13682 }
13683
13684 /* Since the VSX register set includes traditional floating point registers
13685 and altivec registers, just check for the size being different instead of
13686 trying to check whether the modes are vector modes. Otherwise it won't
13687 allow say DF and DI to change classes. For types like TFmode and TDmode
13688 that take 2 64-bit registers, rather than a single 128-bit register, don't
13689 allow subregs of those types to other 128 bit types. */
13690 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
13691 {
13692 unsigned num_regs = (from_size + 15) / 16;
13693 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
13694 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
13695 return false;
13696
13697 return (from_size == 8 || from_size == 16);
13698 }
13699
13700 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
13701 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
13702 return false;
13703
13704 return true;
13705 }
13706
13707 /* Debug version of rs6000_can_change_mode_class. */
13708 static bool
13709 rs6000_debug_can_change_mode_class (machine_mode from,
13710 machine_mode to,
13711 reg_class_t rclass)
13712 {
13713 bool ret = rs6000_can_change_mode_class (from, to, rclass);
13714
13715 fprintf (stderr,
13716 "rs6000_can_change_mode_class, return %s, from = %s, "
13717 "to = %s, rclass = %s\n",
13718 ret ? "true" : "false",
13719 GET_MODE_NAME (from), GET_MODE_NAME (to),
13720 reg_class_names[rclass]);
13721
13722 return ret;
13723 }
13724 \f
13725 /* Return a string to do a move operation of 128 bits of data. */
13726
13727 const char *
13728 rs6000_output_move_128bit (rtx operands[])
13729 {
13730 rtx dest = operands[0];
13731 rtx src = operands[1];
13732 machine_mode mode = GET_MODE (dest);
13733 int dest_regno;
13734 int src_regno;
13735 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
13736 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
13737
13738 if (REG_P (dest))
13739 {
13740 dest_regno = REGNO (dest);
13741 dest_gpr_p = INT_REGNO_P (dest_regno);
13742 dest_fp_p = FP_REGNO_P (dest_regno);
13743 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
13744 dest_vsx_p = dest_fp_p | dest_vmx_p;
13745 }
13746 else
13747 {
13748 dest_regno = -1;
13749 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
13750 }
13751
13752 if (REG_P (src))
13753 {
13754 src_regno = REGNO (src);
13755 src_gpr_p = INT_REGNO_P (src_regno);
13756 src_fp_p = FP_REGNO_P (src_regno);
13757 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
13758 src_vsx_p = src_fp_p | src_vmx_p;
13759 }
13760 else
13761 {
13762 src_regno = -1;
13763 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
13764 }
13765
13766 /* Register moves. */
13767 if (dest_regno >= 0 && src_regno >= 0)
13768 {
13769 if (dest_gpr_p)
13770 {
13771 if (src_gpr_p)
13772 return "#";
13773
13774 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
13775 return (WORDS_BIG_ENDIAN
13776 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13777 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13778
13779 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
13780 return "#";
13781 }
13782
13783 else if (TARGET_VSX && dest_vsx_p)
13784 {
13785 if (src_vsx_p)
13786 return "xxlor %x0,%x1,%x1";
13787
13788 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
13789 return (WORDS_BIG_ENDIAN
13790 ? "mtvsrdd %x0,%1,%L1"
13791 : "mtvsrdd %x0,%L1,%1");
13792
13793 else if (TARGET_DIRECT_MOVE && src_gpr_p)
13794 return "#";
13795 }
13796
13797 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
13798 return "vor %0,%1,%1";
13799
13800 else if (dest_fp_p && src_fp_p)
13801 return "#";
13802 }
13803
13804 /* Loads. */
13805 else if (dest_regno >= 0 && MEM_P (src))
13806 {
13807 if (dest_gpr_p)
13808 {
13809 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13810 return "lq %0,%1";
13811 else
13812 return "#";
13813 }
13814
13815 else if (TARGET_ALTIVEC && dest_vmx_p
13816 && altivec_indexed_or_indirect_operand (src, mode))
13817 return "lvx %0,%y1";
13818
13819 else if (TARGET_VSX && dest_vsx_p)
13820 {
13821 if (mode_supports_dq_form (mode)
13822 && quad_address_p (XEXP (src, 0), mode, true))
13823 return "lxv %x0,%1";
13824
13825 else if (TARGET_P9_VECTOR)
13826 return "lxvx %x0,%y1";
13827
13828 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13829 return "lxvw4x %x0,%y1";
13830
13831 else
13832 return "lxvd2x %x0,%y1";
13833 }
13834
13835 else if (TARGET_ALTIVEC && dest_vmx_p)
13836 return "lvx %0,%y1";
13837
13838 else if (dest_fp_p)
13839 return "#";
13840 }
13841
13842 /* Stores. */
13843 else if (src_regno >= 0 && MEM_P (dest))
13844 {
13845 if (src_gpr_p)
13846 {
13847 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13848 return "stq %1,%0";
13849 else
13850 return "#";
13851 }
13852
13853 else if (TARGET_ALTIVEC && src_vmx_p
13854 && altivec_indexed_or_indirect_operand (dest, mode))
13855 return "stvx %1,%y0";
13856
13857 else if (TARGET_VSX && src_vsx_p)
13858 {
13859 if (mode_supports_dq_form (mode)
13860 && quad_address_p (XEXP (dest, 0), mode, true))
13861 return "stxv %x1,%0";
13862
13863 else if (TARGET_P9_VECTOR)
13864 return "stxvx %x1,%y0";
13865
13866 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13867 return "stxvw4x %x1,%y0";
13868
13869 else
13870 return "stxvd2x %x1,%y0";
13871 }
13872
13873 else if (TARGET_ALTIVEC && src_vmx_p)
13874 return "stvx %1,%y0";
13875
13876 else if (src_fp_p)
13877 return "#";
13878 }
13879
13880 /* Constants. */
13881 else if (dest_regno >= 0
13882 && (CONST_INT_P (src)
13883 || CONST_WIDE_INT_P (src)
13884 || CONST_DOUBLE_P (src)
13885 || GET_CODE (src) == CONST_VECTOR))
13886 {
13887 if (dest_gpr_p)
13888 return "#";
13889
13890 else if ((dest_vmx_p && TARGET_ALTIVEC)
13891 || (dest_vsx_p && TARGET_VSX))
13892 return output_vec_const_move (operands);
13893 }
13894
13895 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13896 }
13897
13898 /* Validate a 128-bit move. */
13899 bool
13900 rs6000_move_128bit_ok_p (rtx operands[])
13901 {
13902 machine_mode mode = GET_MODE (operands[0]);
13903 return (gpc_reg_operand (operands[0], mode)
13904 || gpc_reg_operand (operands[1], mode));
13905 }
13906
13907 /* Return true if a 128-bit move needs to be split. */
13908 bool
13909 rs6000_split_128bit_ok_p (rtx operands[])
13910 {
13911 if (!reload_completed)
13912 return false;
13913
13914 if (!gpr_or_gpr_p (operands[0], operands[1]))
13915 return false;
13916
13917 if (quad_load_store_p (operands[0], operands[1]))
13918 return false;
13919
13920 return true;
13921 }
13922
13923 \f
13924 /* Given a comparison operation, return the bit number in CCR to test. We
13925 know this is a valid comparison.
13926
13927 SCC_P is 1 if this is for an scc. That means that %D will have been
13928 used instead of %C, so the bits will be in different places.
13929
13930 Return -1 if OP isn't a valid comparison for some reason. */
13931
13932 int
13933 ccr_bit (rtx op, int scc_p)
13934 {
13935 enum rtx_code code = GET_CODE (op);
13936 machine_mode cc_mode;
13937 int cc_regnum;
13938 int base_bit;
13939 rtx reg;
13940
13941 if (!COMPARISON_P (op))
13942 return -1;
13943
13944 reg = XEXP (op, 0);
13945
13946 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13947 return -1;
13948
13949 cc_mode = GET_MODE (reg);
13950 cc_regnum = REGNO (reg);
13951 base_bit = 4 * (cc_regnum - CR0_REGNO);
13952
13953 validate_condition_mode (code, cc_mode);
13954
13955 /* When generating a sCOND operation, only positive conditions are
13956 allowed. */
13957 if (scc_p)
13958 switch (code)
13959 {
13960 case EQ:
13961 case GT:
13962 case LT:
13963 case UNORDERED:
13964 case GTU:
13965 case LTU:
13966 break;
13967 default:
13968 return -1;
13969 }
13970
13971 switch (code)
13972 {
13973 case NE:
13974 return scc_p ? base_bit + 3 : base_bit + 2;
13975 case EQ:
13976 return base_bit + 2;
13977 case GT: case GTU: case UNLE:
13978 return base_bit + 1;
13979 case LT: case LTU: case UNGE:
13980 return base_bit;
13981 case ORDERED: case UNORDERED:
13982 return base_bit + 3;
13983
13984 case GE: case GEU:
13985 /* If scc, we will have done a cror to put the bit in the
13986 unordered position. So test that bit. For integer, this is ! LT
13987 unless this is an scc insn. */
13988 return scc_p ? base_bit + 3 : base_bit;
13989
13990 case LE: case LEU:
13991 return scc_p ? base_bit + 3 : base_bit + 1;
13992
13993 default:
13994 return -1;
13995 }
13996 }
13997 \f
13998 /* Return the GOT register. */
13999
14000 rtx
14001 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
14002 {
14003 /* The second flow pass currently (June 1999) can't update
14004 regs_ever_live without disturbing other parts of the compiler, so
14005 update it here to make the prolog/epilogue code happy. */
14006 if (!can_create_pseudo_p ()
14007 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
14008 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
14009
14010 crtl->uses_pic_offset_table = 1;
14011
14012 return pic_offset_table_rtx;
14013 }
14014 \f
14015 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
14016
14017 /* Write out a function code label. */
14018
14019 void
14020 rs6000_output_function_entry (FILE *file, const char *fname)
14021 {
14022 if (fname[0] != '.')
14023 {
14024 switch (DEFAULT_ABI)
14025 {
14026 default:
14027 gcc_unreachable ();
14028
14029 case ABI_AIX:
14030 if (DOT_SYMBOLS)
14031 putc ('.', file);
14032 else
14033 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
14034 break;
14035
14036 case ABI_ELFv2:
14037 case ABI_V4:
14038 case ABI_DARWIN:
14039 break;
14040 }
14041 }
14042
14043 RS6000_OUTPUT_BASENAME (file, fname);
14044 }
14045
14046 /* Print an operand. Recognize special options, documented below. */
14047
14048 #if TARGET_ELF
14049 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
14050 only introduced by the linker, when applying the sda21
14051 relocation. */
14052 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
14053 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
14054 #else
14055 #define SMALL_DATA_RELOC "sda21"
14056 #define SMALL_DATA_REG 0
14057 #endif
14058
14059 void
14060 print_operand (FILE *file, rtx x, int code)
14061 {
14062 int i;
14063 unsigned HOST_WIDE_INT uval;
14064
14065 switch (code)
14066 {
14067 /* %a is output_address. */
14068
14069 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
14070 output_operand. */
14071
14072 case 'A':
14073 /* Write the MMA accumulator number associated with VSX register X. */
14074 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
14075 output_operand_lossage ("invalid %%A value");
14076 else
14077 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
14078 return;
14079
14080 case 'D':
14081 /* Like 'J' but get to the GT bit only. */
14082 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14083 {
14084 output_operand_lossage ("invalid %%D value");
14085 return;
14086 }
14087
14088 /* Bit 1 is GT bit. */
14089 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
14090
14091 /* Add one for shift count in rlinm for scc. */
14092 fprintf (file, "%d", i + 1);
14093 return;
14094
14095 case 'e':
14096 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
14097 if (! INT_P (x))
14098 {
14099 output_operand_lossage ("invalid %%e value");
14100 return;
14101 }
14102
14103 uval = INTVAL (x);
14104 if ((uval & 0xffff) == 0 && uval != 0)
14105 putc ('s', file);
14106 return;
14107
14108 case 'E':
14109 /* X is a CR register. Print the number of the EQ bit of the CR */
14110 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14111 output_operand_lossage ("invalid %%E value");
14112 else
14113 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
14114 return;
14115
14116 case 'f':
14117 /* X is a CR register. Print the shift count needed to move it
14118 to the high-order four bits. */
14119 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14120 output_operand_lossage ("invalid %%f value");
14121 else
14122 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
14123 return;
14124
14125 case 'F':
14126 /* Similar, but print the count for the rotate in the opposite
14127 direction. */
14128 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14129 output_operand_lossage ("invalid %%F value");
14130 else
14131 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
14132 return;
14133
14134 case 'G':
14135 /* X is a constant integer. If it is negative, print "m",
14136 otherwise print "z". This is to make an aze or ame insn. */
14137 if (!CONST_INT_P (x))
14138 output_operand_lossage ("invalid %%G value");
14139 else if (INTVAL (x) >= 0)
14140 putc ('z', file);
14141 else
14142 putc ('m', file);
14143 return;
14144
14145 case 'h':
14146 /* If constant, output low-order five bits. Otherwise, write
14147 normally. */
14148 if (INT_P (x))
14149 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
14150 else
14151 print_operand (file, x, 0);
14152 return;
14153
14154 case 'H':
14155 /* If constant, output low-order six bits. Otherwise, write
14156 normally. */
14157 if (INT_P (x))
14158 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
14159 else
14160 print_operand (file, x, 0);
14161 return;
14162
14163 case 'I':
14164 /* Print `i' if this is a constant, else nothing. */
14165 if (INT_P (x))
14166 putc ('i', file);
14167 return;
14168
14169 case 'j':
14170 /* Write the bit number in CCR for jump. */
14171 i = ccr_bit (x, 0);
14172 if (i == -1)
14173 output_operand_lossage ("invalid %%j code");
14174 else
14175 fprintf (file, "%d", i);
14176 return;
14177
14178 case 'J':
14179 /* Similar, but add one for shift count in rlinm for scc and pass
14180 scc flag to `ccr_bit'. */
14181 i = ccr_bit (x, 1);
14182 if (i == -1)
14183 output_operand_lossage ("invalid %%J code");
14184 else
14185 /* If we want bit 31, write a shift count of zero, not 32. */
14186 fprintf (file, "%d", i == 31 ? 0 : i + 1);
14187 return;
14188
14189 case 'k':
14190 /* X must be a constant. Write the 1's complement of the
14191 constant. */
14192 if (! INT_P (x))
14193 output_operand_lossage ("invalid %%k value");
14194 else
14195 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
14196 return;
14197
14198 case 'K':
14199 /* X must be a symbolic constant on ELF. Write an
14200 expression suitable for an 'addi' that adds in the low 16
14201 bits of the MEM. */
14202 if (GET_CODE (x) == CONST)
14203 {
14204 if (GET_CODE (XEXP (x, 0)) != PLUS
14205 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
14206 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
14207 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
14208 output_operand_lossage ("invalid %%K value");
14209 }
14210 print_operand_address (file, x);
14211 fputs ("@l", file);
14212 return;
14213
14214 /* %l is output_asm_label. */
14215
14216 case 'L':
14217 /* Write second word of DImode or DFmode reference. Works on register
14218 or non-indexed memory only. */
14219 if (REG_P (x))
14220 fputs (reg_names[REGNO (x) + 1], file);
14221 else if (MEM_P (x))
14222 {
14223 machine_mode mode = GET_MODE (x);
14224 /* Handle possible auto-increment. Since it is pre-increment and
14225 we have already done it, we can just use an offset of word. */
14226 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14227 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14228 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
14229 UNITS_PER_WORD));
14230 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14231 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
14232 UNITS_PER_WORD));
14233 else
14234 output_address (mode, XEXP (adjust_address_nv (x, SImode,
14235 UNITS_PER_WORD),
14236 0));
14237
14238 if (small_data_operand (x, GET_MODE (x)))
14239 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14240 reg_names[SMALL_DATA_REG]);
14241 }
14242 return;
14243
14244 case 'N': /* Unused */
14245 /* Write the number of elements in the vector times 4. */
14246 if (GET_CODE (x) != PARALLEL)
14247 output_operand_lossage ("invalid %%N value");
14248 else
14249 fprintf (file, "%d", XVECLEN (x, 0) * 4);
14250 return;
14251
14252 case 'O': /* Unused */
14253 /* Similar, but subtract 1 first. */
14254 if (GET_CODE (x) != PARALLEL)
14255 output_operand_lossage ("invalid %%O value");
14256 else
14257 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
14258 return;
14259
14260 case 'p':
14261 /* X is a CONST_INT that is a power of two. Output the logarithm. */
14262 if (! INT_P (x)
14263 || INTVAL (x) < 0
14264 || (i = exact_log2 (INTVAL (x))) < 0)
14265 output_operand_lossage ("invalid %%p value");
14266 else
14267 fprintf (file, "%d", i);
14268 return;
14269
14270 case 'P':
14271 /* The operand must be an indirect memory reference. The result
14272 is the register name. */
14273 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
14274 || REGNO (XEXP (x, 0)) >= 32)
14275 output_operand_lossage ("invalid %%P value");
14276 else
14277 fputs (reg_names[REGNO (XEXP (x, 0))], file);
14278 return;
14279
14280 case 'q':
14281 /* This outputs the logical code corresponding to a boolean
14282 expression. The expression may have one or both operands
14283 negated (if one, only the first one). For condition register
14284 logical operations, it will also treat the negated
14285 CR codes as NOTs, but not handle NOTs of them. */
14286 {
14287 const char *const *t = 0;
14288 const char *s;
14289 enum rtx_code code = GET_CODE (x);
14290 static const char * const tbl[3][3] = {
14291 { "and", "andc", "nor" },
14292 { "or", "orc", "nand" },
14293 { "xor", "eqv", "xor" } };
14294
14295 if (code == AND)
14296 t = tbl[0];
14297 else if (code == IOR)
14298 t = tbl[1];
14299 else if (code == XOR)
14300 t = tbl[2];
14301 else
14302 output_operand_lossage ("invalid %%q value");
14303
14304 if (GET_CODE (XEXP (x, 0)) != NOT)
14305 s = t[0];
14306 else
14307 {
14308 if (GET_CODE (XEXP (x, 1)) == NOT)
14309 s = t[2];
14310 else
14311 s = t[1];
14312 }
14313
14314 fputs (s, file);
14315 }
14316 return;
14317
14318 case 'Q':
14319 if (! TARGET_MFCRF)
14320 return;
14321 fputc (',', file);
14322 /* FALLTHRU */
14323
14324 case 'R':
14325 /* X is a CR register. Print the mask for `mtcrf'. */
14326 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14327 output_operand_lossage ("invalid %%R value");
14328 else
14329 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
14330 return;
14331
14332 case 's':
14333 /* Low 5 bits of 32 - value */
14334 if (! INT_P (x))
14335 output_operand_lossage ("invalid %%s value");
14336 else
14337 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
14338 return;
14339
14340 case 't':
14341 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
14342 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14343 {
14344 output_operand_lossage ("invalid %%t value");
14345 return;
14346 }
14347
14348 /* Bit 3 is OV bit. */
14349 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
14350
14351 /* If we want bit 31, write a shift count of zero, not 32. */
14352 fprintf (file, "%d", i == 31 ? 0 : i + 1);
14353 return;
14354
14355 case 'T':
14356 /* Print the symbolic name of a branch target register. */
14357 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14358 x = XVECEXP (x, 0, 0);
14359 if (!REG_P (x) || (REGNO (x) != LR_REGNO
14360 && REGNO (x) != CTR_REGNO))
14361 output_operand_lossage ("invalid %%T value");
14362 else if (REGNO (x) == LR_REGNO)
14363 fputs ("lr", file);
14364 else
14365 fputs ("ctr", file);
14366 return;
14367
14368 case 'u':
14369 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14370 for use in unsigned operand. */
14371 if (! INT_P (x))
14372 {
14373 output_operand_lossage ("invalid %%u value");
14374 return;
14375 }
14376
14377 uval = INTVAL (x);
14378 if ((uval & 0xffff) == 0)
14379 uval >>= 16;
14380
14381 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
14382 return;
14383
14384 case 'v':
14385 /* High-order 16 bits of constant for use in signed operand. */
14386 if (! INT_P (x))
14387 output_operand_lossage ("invalid %%v value");
14388 else
14389 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
14390 (INTVAL (x) >> 16) & 0xffff);
14391 return;
14392
14393 case 'U':
14394 /* Print `u' if this has an auto-increment or auto-decrement. */
14395 if (MEM_P (x)
14396 && (GET_CODE (XEXP (x, 0)) == PRE_INC
14397 || GET_CODE (XEXP (x, 0)) == PRE_DEC
14398 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
14399 putc ('u', file);
14400 return;
14401
14402 case 'V':
14403 /* Print the trap code for this operand. */
14404 switch (GET_CODE (x))
14405 {
14406 case EQ:
14407 fputs ("eq", file); /* 4 */
14408 break;
14409 case NE:
14410 fputs ("ne", file); /* 24 */
14411 break;
14412 case LT:
14413 fputs ("lt", file); /* 16 */
14414 break;
14415 case LE:
14416 fputs ("le", file); /* 20 */
14417 break;
14418 case GT:
14419 fputs ("gt", file); /* 8 */
14420 break;
14421 case GE:
14422 fputs ("ge", file); /* 12 */
14423 break;
14424 case LTU:
14425 fputs ("llt", file); /* 2 */
14426 break;
14427 case LEU:
14428 fputs ("lle", file); /* 6 */
14429 break;
14430 case GTU:
14431 fputs ("lgt", file); /* 1 */
14432 break;
14433 case GEU:
14434 fputs ("lge", file); /* 5 */
14435 break;
14436 default:
14437 output_operand_lossage ("invalid %%V value");
14438 }
14439 break;
14440
14441 case 'w':
14442 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14443 normally. */
14444 if (INT_P (x))
14445 fprintf (file, HOST_WIDE_INT_PRINT_DEC, sext_hwi (INTVAL (x), 16));
14446 else
14447 print_operand (file, x, 0);
14448 return;
14449
14450 case 'x':
14451 /* X is a FPR or Altivec register used in a VSX context. */
14452 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
14453 output_operand_lossage ("invalid %%x value");
14454 else
14455 {
14456 int reg = REGNO (x);
14457 int vsx_reg = (FP_REGNO_P (reg)
14458 ? reg - 32
14459 : reg - FIRST_ALTIVEC_REGNO + 32);
14460
14461 #ifdef TARGET_REGNAMES
14462 if (TARGET_REGNAMES)
14463 fprintf (file, "%%vs%d", vsx_reg);
14464 else
14465 #endif
14466 fprintf (file, "%d", vsx_reg);
14467 }
14468 return;
14469
14470 case 'X':
14471 if (MEM_P (x)
14472 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
14473 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
14474 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
14475 putc ('x', file);
14476 return;
14477
14478 case 'Y':
14479 /* Like 'L', for third word of TImode/PTImode */
14480 if (REG_P (x))
14481 fputs (reg_names[REGNO (x) + 2], file);
14482 else if (MEM_P (x))
14483 {
14484 machine_mode mode = GET_MODE (x);
14485 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14486 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14487 output_address (mode, plus_constant (Pmode,
14488 XEXP (XEXP (x, 0), 0), 8));
14489 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14490 output_address (mode, plus_constant (Pmode,
14491 XEXP (XEXP (x, 0), 0), 8));
14492 else
14493 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
14494 if (small_data_operand (x, GET_MODE (x)))
14495 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14496 reg_names[SMALL_DATA_REG]);
14497 }
14498 return;
14499
14500 case 'z':
14501 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14502 x = XVECEXP (x, 0, 1);
14503 /* X is a SYMBOL_REF. Write out the name preceded by a
14504 period and without any trailing data in brackets. Used for function
14505 names. If we are configured for System V (or the embedded ABI) on
14506 the PowerPC, do not emit the period, since those systems do not use
14507 TOCs and the like. */
14508 if (!SYMBOL_REF_P (x))
14509 {
14510 output_operand_lossage ("invalid %%z value");
14511 return;
14512 }
14513
14514 /* For macho, check to see if we need a stub. */
14515 if (TARGET_MACHO)
14516 {
14517 const char *name = XSTR (x, 0);
14518 #if TARGET_MACHO
14519 if (darwin_symbol_stubs
14520 && MACHOPIC_INDIRECT
14521 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14522 name = machopic_indirection_name (x, /*stub_p=*/true);
14523 #endif
14524 assemble_name (file, name);
14525 }
14526 else if (!DOT_SYMBOLS)
14527 assemble_name (file, XSTR (x, 0));
14528 else
14529 rs6000_output_function_entry (file, XSTR (x, 0));
14530 return;
14531
14532 case 'Z':
14533 /* Like 'L', for last word of TImode/PTImode. */
14534 if (REG_P (x))
14535 fputs (reg_names[REGNO (x) + 3], file);
14536 else if (MEM_P (x))
14537 {
14538 machine_mode mode = GET_MODE (x);
14539 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14540 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14541 output_address (mode, plus_constant (Pmode,
14542 XEXP (XEXP (x, 0), 0), 12));
14543 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14544 output_address (mode, plus_constant (Pmode,
14545 XEXP (XEXP (x, 0), 0), 12));
14546 else
14547 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
14548 if (small_data_operand (x, GET_MODE (x)))
14549 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14550 reg_names[SMALL_DATA_REG]);
14551 }
14552 return;
14553
14554 /* Print AltiVec memory operand. */
14555 case 'y':
14556 {
14557 rtx tmp;
14558
14559 gcc_assert (MEM_P (x));
14560
14561 tmp = XEXP (x, 0);
14562
14563 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
14564 && GET_CODE (tmp) == AND
14565 && CONST_INT_P (XEXP (tmp, 1))
14566 && INTVAL (XEXP (tmp, 1)) == -16)
14567 tmp = XEXP (tmp, 0);
14568 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
14569 && GET_CODE (tmp) == PRE_MODIFY)
14570 tmp = XEXP (tmp, 1);
14571 if (REG_P (tmp))
14572 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
14573 else
14574 {
14575 if (GET_CODE (tmp) != PLUS
14576 || !REG_P (XEXP (tmp, 0))
14577 || !REG_P (XEXP (tmp, 1)))
14578 {
14579 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14580 break;
14581 }
14582
14583 if (REGNO (XEXP (tmp, 0)) == 0)
14584 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
14585 reg_names[ REGNO (XEXP (tmp, 0)) ]);
14586 else
14587 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
14588 reg_names[ REGNO (XEXP (tmp, 1)) ]);
14589 }
14590 break;
14591 }
14592
14593 case 0:
14594 if (REG_P (x))
14595 fprintf (file, "%s", reg_names[REGNO (x)]);
14596 else if (MEM_P (x))
14597 {
14598 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14599 know the width from the mode. */
14600 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
14601 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
14602 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14603 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
14604 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
14605 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14606 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14607 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
14608 else
14609 output_address (GET_MODE (x), XEXP (x, 0));
14610 }
14611 else if (toc_relative_expr_p (x, false,
14612 &tocrel_base_oac, &tocrel_offset_oac))
14613 /* This hack along with a corresponding hack in
14614 rs6000_output_addr_const_extra arranges to output addends
14615 where the assembler expects to find them. eg.
14616 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14617 without this hack would be output as "x@toc+4". We
14618 want "x+4@toc". */
14619 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14620 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
14621 output_addr_const (file, XVECEXP (x, 0, 0));
14622 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14623 output_addr_const (file, XVECEXP (x, 0, 1));
14624 else
14625 output_addr_const (file, x);
14626 return;
14627
14628 case '&':
14629 if (const char *name = get_some_local_dynamic_name ())
14630 assemble_name (file, name);
14631 else
14632 output_operand_lossage ("'%%&' used without any "
14633 "local dynamic TLS references");
14634 return;
14635
14636 default:
14637 output_operand_lossage ("invalid %%xn code");
14638 }
14639 }
14640 \f
14641 /* Print the address of an operand. */
14642
14643 void
14644 print_operand_address (FILE *file, rtx x)
14645 {
14646 if (REG_P (x))
14647 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
14648
14649 /* Is it a PC-relative address? */
14650 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
14651 {
14652 HOST_WIDE_INT offset;
14653
14654 if (GET_CODE (x) == CONST)
14655 x = XEXP (x, 0);
14656
14657 if (GET_CODE (x) == PLUS)
14658 {
14659 offset = INTVAL (XEXP (x, 1));
14660 x = XEXP (x, 0);
14661 }
14662 else
14663 offset = 0;
14664
14665 output_addr_const (file, x);
14666
14667 if (offset)
14668 fprintf (file, "%+" PRId64, offset);
14669
14670 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
14671 fprintf (file, "@got");
14672
14673 fprintf (file, "@pcrel");
14674 }
14675 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
14676 || GET_CODE (x) == LABEL_REF)
14677 {
14678 output_addr_const (file, x);
14679 if (small_data_operand (x, GET_MODE (x)))
14680 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14681 reg_names[SMALL_DATA_REG]);
14682 else
14683 gcc_assert (!TARGET_TOC);
14684 }
14685 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14686 && REG_P (XEXP (x, 1)))
14687 {
14688 if (REGNO (XEXP (x, 0)) == 0)
14689 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
14690 reg_names[ REGNO (XEXP (x, 0)) ]);
14691 else
14692 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
14693 reg_names[ REGNO (XEXP (x, 1)) ]);
14694 }
14695 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14696 && CONST_INT_P (XEXP (x, 1)))
14697 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
14698 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
14699 #if TARGET_MACHO
14700 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14701 && CONSTANT_P (XEXP (x, 1)))
14702 {
14703 fprintf (file, "lo16(");
14704 output_addr_const (file, XEXP (x, 1));
14705 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14706 }
14707 #endif
14708 #if TARGET_ELF
14709 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14710 && CONSTANT_P (XEXP (x, 1)))
14711 {
14712 output_addr_const (file, XEXP (x, 1));
14713 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14714 }
14715 #endif
14716 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
14717 {
14718 /* This hack along with a corresponding hack in
14719 rs6000_output_addr_const_extra arranges to output addends
14720 where the assembler expects to find them. eg.
14721 (lo_sum (reg 9)
14722 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14723 without this hack would be output as "x@toc+8@l(9)". We
14724 want "x+8@toc@l(9)". */
14725 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14726 if (GET_CODE (x) == LO_SUM)
14727 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
14728 else
14729 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
14730 }
14731 else
14732 output_addr_const (file, x);
14733 }
14734 \f
14735 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14736
14737 bool
14738 rs6000_output_addr_const_extra (FILE *file, rtx x)
14739 {
14740 if (GET_CODE (x) == UNSPEC)
14741 switch (XINT (x, 1))
14742 {
14743 case UNSPEC_TOCREL:
14744 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
14745 && REG_P (XVECEXP (x, 0, 1))
14746 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
14747 output_addr_const (file, XVECEXP (x, 0, 0));
14748 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
14749 {
14750 if (INTVAL (tocrel_offset_oac) >= 0)
14751 fprintf (file, "+");
14752 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
14753 }
14754 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
14755 {
14756 putc ('-', file);
14757 assemble_name (file, toc_label_name);
14758 need_toc_init = 1;
14759 }
14760 else if (TARGET_ELF)
14761 fputs ("@toc", file);
14762 return true;
14763
14764 #if TARGET_MACHO
14765 case UNSPEC_MACHOPIC_OFFSET:
14766 output_addr_const (file, XVECEXP (x, 0, 0));
14767 putc ('-', file);
14768 machopic_output_function_base_name (file);
14769 return true;
14770 #endif
14771 }
14772 return false;
14773 }
14774 \f
14775 /* Target hook for assembling integer objects. The PowerPC version has
14776 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14777 is defined. It also needs to handle DI-mode objects on 64-bit
14778 targets. */
14779
14780 static bool
14781 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
14782 {
14783 #ifdef RELOCATABLE_NEEDS_FIXUP
14784 /* Special handling for SI values. */
14785 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
14786 {
14787 static int recurse = 0;
14788
14789 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14790 the .fixup section. Since the TOC section is already relocated, we
14791 don't need to mark it here. We used to skip the text section, but it
14792 should never be valid for relocated addresses to be placed in the text
14793 section. */
14794 if (DEFAULT_ABI == ABI_V4
14795 && (TARGET_RELOCATABLE || flag_pic > 1)
14796 && in_section != toc_section
14797 && !recurse
14798 && !CONST_SCALAR_INT_P (x)
14799 && CONSTANT_P (x))
14800 {
14801 char buf[256];
14802
14803 recurse = 1;
14804 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
14805 fixuplabelno++;
14806 ASM_OUTPUT_LABEL (asm_out_file, buf);
14807 fprintf (asm_out_file, "\t.long\t(");
14808 output_addr_const (asm_out_file, x);
14809 fprintf (asm_out_file, ")@fixup\n");
14810 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
14811 ASM_OUTPUT_ALIGN (asm_out_file, 2);
14812 fprintf (asm_out_file, "\t.long\t");
14813 assemble_name (asm_out_file, buf);
14814 fprintf (asm_out_file, "\n\t.previous\n");
14815 recurse = 0;
14816 return true;
14817 }
14818 /* Remove initial .'s to turn a -mcall-aixdesc function
14819 address into the address of the descriptor, not the function
14820 itself. */
14821 else if (SYMBOL_REF_P (x)
14822 && XSTR (x, 0)[0] == '.'
14823 && DEFAULT_ABI == ABI_AIX)
14824 {
14825 const char *name = XSTR (x, 0);
14826 while (*name == '.')
14827 name++;
14828
14829 fprintf (asm_out_file, "\t.long\t%s\n", name);
14830 return true;
14831 }
14832 }
14833 #endif /* RELOCATABLE_NEEDS_FIXUP */
14834 return default_assemble_integer (x, size, aligned_p);
14835 }
14836
14837 /* Return a template string for assembly to emit when making an
14838 external call. FUNOP is the call mem argument operand number. */
14839
14840 static const char *
14841 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14842 {
14843 /* -Wformat-overflow workaround, without which gcc thinks that %u
14844 might produce 10 digits. */
14845 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14846
14847 char arg[12];
14848 arg[0] = 0;
14849 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14850 {
14851 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14852 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14853 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14854 sprintf (arg, "(%%&@tlsld)");
14855 }
14856
14857 /* The magic 32768 offset here corresponds to the offset of
14858 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14859 char z[11];
14860 sprintf (z, "%%z%u%s", funop,
14861 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14862 ? "+32768" : ""));
14863
14864 static char str[32]; /* 1 spare */
14865 if (rs6000_pcrel_p ())
14866 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14867 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14868 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14869 sibcall ? "" : "\n\tnop");
14870 else if (DEFAULT_ABI == ABI_V4)
14871 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14872 flag_pic ? "@plt" : "");
14873 #if TARGET_MACHO
14874 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14875 else if (DEFAULT_ABI == ABI_DARWIN)
14876 {
14877 /* The cookie is in operand func+2. */
14878 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14879 int cookie = INTVAL (operands[funop + 2]);
14880 if (cookie & CALL_LONG)
14881 {
14882 tree funname = get_identifier (XSTR (operands[funop], 0));
14883 tree labelname = get_prev_label (funname);
14884 gcc_checking_assert (labelname && !sibcall);
14885
14886 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14887 instruction will reach 'foo', otherwise link as 'bl L42'".
14888 "L42" should be a 'branch island', that will do a far jump to
14889 'foo'. Branch islands are generated in
14890 macho_branch_islands(). */
14891 sprintf (str, "jbsr %%z%u,%.10s", funop,
14892 IDENTIFIER_POINTER (labelname));
14893 }
14894 else
14895 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14896 after the call. */
14897 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14898 }
14899 #endif
14900 else
14901 gcc_unreachable ();
14902 return str;
14903 }
14904
14905 const char *
14906 rs6000_call_template (rtx *operands, unsigned int funop)
14907 {
14908 return rs6000_call_template_1 (operands, funop, false);
14909 }
14910
14911 const char *
14912 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14913 {
14914 return rs6000_call_template_1 (operands, funop, true);
14915 }
14916
14917 /* As above, for indirect calls. */
14918
14919 static const char *
14920 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14921 bool sibcall)
14922 {
14923 /* -Wformat-overflow workaround, without which gcc thinks that %u
14924 might produce 10 digits. Note that -Wformat-overflow will not
14925 currently warn here for str[], so do not rely on a warning to
14926 ensure str[] is correctly sized. */
14927 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14928
14929 /* Currently, funop is either 0 or 1. The maximum string is always
14930 a !speculate 64-bit __tls_get_addr call.
14931
14932 ABI_ELFv2, pcrel:
14933 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14934 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14935 . 9 crset 2\n\t
14936 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14937 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14938 . 8 beq%T1l-
14939 .---
14940 .142
14941
14942 ABI_AIX:
14943 . 9 ld 2,%3\n\t
14944 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14945 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14946 . 9 crset 2\n\t
14947 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14948 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14949 . 10 beq%T1l-\n\t
14950 . 10 ld 2,%4(1)
14951 .---
14952 .151
14953
14954 ABI_ELFv2:
14955 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14956 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14957 . 9 crset 2\n\t
14958 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14959 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14960 . 10 beq%T1l-\n\t
14961 . 10 ld 2,%3(1)
14962 .---
14963 .142
14964
14965 ABI_V4:
14966 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14967 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14968 . 9 crset 2\n\t
14969 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14970 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14971 . 8 beq%T1l-
14972 .---
14973 .141 */
14974 static char str[160]; /* 8 spare */
14975 char *s = str;
14976 const char *ptrload = TARGET_64BIT ? "d" : "wz";
14977
14978 if (DEFAULT_ABI == ABI_AIX)
14979 s += sprintf (s,
14980 "l%s 2,%%%u\n\t",
14981 ptrload, funop + 3);
14982
14983 /* We don't need the extra code to stop indirect call speculation if
14984 calling via LR. */
14985 bool speculate = (TARGET_MACHO
14986 || rs6000_speculate_indirect_jumps
14987 || (REG_P (operands[funop])
14988 && REGNO (operands[funop]) == LR_REGNO));
14989
14990 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
14991 {
14992 const char *rel64 = TARGET_64BIT ? "64" : "";
14993 char tls[29];
14994 tls[0] = 0;
14995 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14996 {
14997 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14998 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14999 rel64, funop + 1);
15000 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
15001 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
15002 rel64);
15003 }
15004
15005 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
15006 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
15007 && flag_pic == 2 ? "+32768" : "");
15008 if (!speculate)
15009 {
15010 s += sprintf (s,
15011 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
15012 tls, rel64, notoc, funop, addend);
15013 s += sprintf (s, "crset 2\n\t");
15014 }
15015 s += sprintf (s,
15016 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
15017 tls, rel64, notoc, funop, addend);
15018 }
15019 else if (!speculate)
15020 s += sprintf (s, "crset 2\n\t");
15021
15022 if (rs6000_pcrel_p ())
15023 {
15024 if (speculate)
15025 sprintf (s, "b%%T%ul", funop);
15026 else
15027 sprintf (s, "beq%%T%ul-", funop);
15028 }
15029 else if (DEFAULT_ABI == ABI_AIX)
15030 {
15031 if (speculate)
15032 sprintf (s,
15033 "b%%T%ul\n\t"
15034 "l%s 2,%%%u(1)",
15035 funop, ptrload, funop + 4);
15036 else
15037 sprintf (s,
15038 "beq%%T%ul-\n\t"
15039 "l%s 2,%%%u(1)",
15040 funop, ptrload, funop + 4);
15041 }
15042 else if (DEFAULT_ABI == ABI_ELFv2)
15043 {
15044 if (speculate)
15045 sprintf (s,
15046 "b%%T%ul\n\t"
15047 "l%s 2,%%%u(1)",
15048 funop, ptrload, funop + 3);
15049 else
15050 sprintf (s,
15051 "beq%%T%ul-\n\t"
15052 "l%s 2,%%%u(1)",
15053 funop, ptrload, funop + 3);
15054 }
15055 else
15056 {
15057 if (speculate)
15058 sprintf (s,
15059 "b%%T%u%s",
15060 funop, sibcall ? "" : "l");
15061 else
15062 sprintf (s,
15063 "beq%%T%u%s-%s",
15064 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
15065 }
15066 return str;
15067 }
15068
15069 const char *
15070 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
15071 {
15072 return rs6000_indirect_call_template_1 (operands, funop, false);
15073 }
15074
15075 const char *
15076 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
15077 {
15078 return rs6000_indirect_call_template_1 (operands, funop, true);
15079 }
15080
15081 #if HAVE_AS_PLTSEQ
15082 /* Output indirect call insns. WHICH identifies the type of sequence. */
15083 const char *
15084 rs6000_pltseq_template (rtx *operands, int which)
15085 {
15086 const char *rel64 = TARGET_64BIT ? "64" : "";
15087 char tls[30];
15088 tls[0] = 0;
15089 if (GET_CODE (operands[3]) == UNSPEC)
15090 {
15091 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
15092 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
15093 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
15094 off, rel64);
15095 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
15096 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
15097 off, rel64);
15098 }
15099
15100 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
15101 static char str[96]; /* 10 spare */
15102 char off = WORDS_BIG_ENDIAN ? '2' : '4';
15103 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
15104 && flag_pic == 2 ? "+32768" : "");
15105 switch (which)
15106 {
15107 case RS6000_PLTSEQ_TOCSAVE:
15108 sprintf (str,
15109 "st%s\n\t"
15110 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
15111 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
15112 tls, rel64);
15113 break;
15114 case RS6000_PLTSEQ_PLT16_HA:
15115 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
15116 sprintf (str,
15117 "lis %%0,0\n\t"
15118 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
15119 tls, off, rel64);
15120 else
15121 sprintf (str,
15122 "addis %%0,%%1,0\n\t"
15123 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
15124 tls, off, rel64, addend);
15125 break;
15126 case RS6000_PLTSEQ_PLT16_LO:
15127 sprintf (str,
15128 "l%s %%0,0(%%1)\n\t"
15129 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
15130 TARGET_64BIT ? "d" : "wz",
15131 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
15132 break;
15133 case RS6000_PLTSEQ_MTCTR:
15134 sprintf (str,
15135 "mtctr %%1\n\t"
15136 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
15137 tls, rel64, addend);
15138 break;
15139 case RS6000_PLTSEQ_PLT_PCREL34:
15140 sprintf (str,
15141 "pl%s %%0,0(0),1\n\t"
15142 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
15143 TARGET_64BIT ? "d" : "wz",
15144 tls, rel64);
15145 break;
15146 default:
15147 gcc_unreachable ();
15148 }
15149 return str;
15150 }
15151 #endif
15152 \f
15153 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
15154 /* Emit an assembler directive to set symbol visibility for DECL to
15155 VISIBILITY_TYPE. */
15156
15157 static void
15158 rs6000_assemble_visibility (tree decl, int vis)
15159 {
15160 if (TARGET_XCOFF)
15161 return;
15162
15163 /* Functions need to have their entry point symbol visibility set as
15164 well as their descriptor symbol visibility. */
15165 if (DEFAULT_ABI == ABI_AIX
15166 && DOT_SYMBOLS
15167 && TREE_CODE (decl) == FUNCTION_DECL)
15168 {
15169 static const char * const visibility_types[] = {
15170 NULL, "protected", "hidden", "internal"
15171 };
15172
15173 const char *name, *type;
15174
15175 name = ((* targetm.strip_name_encoding)
15176 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
15177 type = visibility_types[vis];
15178
15179 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
15180 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
15181 }
15182 else
15183 default_assemble_visibility (decl, vis);
15184 }
15185 #endif
15186 \f
15187 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
15188 entry. If RECORD_P is true and the target supports named sections,
15189 the location of the NOPs will be recorded in a special object section
15190 called "__patchable_function_entries". This routine may be called
15191 twice per function to put NOPs before and after the function
15192 entry. */
15193
15194 void
15195 rs6000_print_patchable_function_entry (FILE *file,
15196 unsigned HOST_WIDE_INT patch_area_size,
15197 bool record_p)
15198 {
15199 bool global_entry_needed_p = rs6000_global_entry_point_prologue_needed_p ();
15200 /* For a function which needs global entry point, we will emit the
15201 patchable area before and after local entry point under the control of
15202 cfun->machine->global_entry_emitted, see the handling in function
15203 rs6000_output_function_prologue. */
15204 if (!global_entry_needed_p || cfun->machine->global_entry_emitted)
15205 default_print_patchable_function_entry (file, patch_area_size, record_p);
15206 }
15207 \f
15208 enum rtx_code
15209 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
15210 {
15211 /* Reversal of FP compares takes care -- an ordered compare
15212 becomes an unordered compare and vice versa. */
15213 if (mode == CCFPmode
15214 && (!flag_finite_math_only
15215 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
15216 || code == UNEQ || code == LTGT))
15217 return reverse_condition_maybe_unordered (code);
15218 else
15219 return reverse_condition (code);
15220 }
15221
15222 /* Check if C (as 64bit integer) can be rotated to a constant which constains
15223 nonzero bits at the LOWBITS low bits only.
15224
15225 Return true if C can be rotated to such constant. If so, *ROT is written
15226 to the number by which C is rotated.
15227 Return false otherwise. */
15228
15229 bool
15230 can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
15231 {
15232 int clz = HOST_BITS_PER_WIDE_INT - lowbits;
15233
15234 /* case a. 0..0xxx: already at least clz zeros. */
15235 int lz = clz_hwi (c);
15236 if (lz >= clz)
15237 {
15238 *rot = 0;
15239 return true;
15240 }
15241
15242 /* case b. 0..0xxx0..0: at least clz zeros. */
15243 int tz = ctz_hwi (c);
15244 if (lz + tz >= clz)
15245 {
15246 *rot = HOST_BITS_PER_WIDE_INT - tz;
15247 return true;
15248 }
15249
15250 /* case c. xx10.....0xx: rotate 'clz - 1' bits first, then check case b.
15251 ^bit -> Vbit, , then zeros are at head or tail.
15252 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
15253 const int rot_bits = lowbits + 1;
15254 unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
15255 tz = ctz_hwi (rc);
15256 if (clz_hwi (rc) + tz >= clz)
15257 {
15258 *rot = HOST_BITS_PER_WIDE_INT - (tz + rot_bits);
15259 return true;
15260 }
15261
15262 return false;
15263 }
15264
15265 /* Check if C (as 64bit integer) can be rotated to a positive 16bits constant
15266 which contains 48bits leading zeros and 16bits of any value. */
15267
15268 bool
15269 can_be_rotated_to_positive_16bits (HOST_WIDE_INT c)
15270 {
15271 int rot = 0;
15272 bool res = can_be_rotated_to_lowbits (c, 16, &rot);
15273 return res && rot > 0;
15274 }
15275
15276 /* Check if C (as 64bit integer) can be rotated to a negative 15bits constant
15277 which contains 49bits leading ones and 15bits of any value. */
15278
15279 bool
15280 can_be_rotated_to_negative_15bits (HOST_WIDE_INT c)
15281 {
15282 int rot = 0;
15283 bool res = can_be_rotated_to_lowbits (~c, 15, &rot);
15284 return res && rot > 0;
15285 }
15286
15287 /* Generate a compare for CODE. Return a brand-new rtx that
15288 represents the result of the compare. */
15289
15290 static rtx
15291 rs6000_generate_compare (rtx cmp, machine_mode mode)
15292 {
15293 machine_mode comp_mode;
15294 rtx compare_result;
15295 enum rtx_code code = GET_CODE (cmp);
15296 rtx op0 = XEXP (cmp, 0);
15297 rtx op1 = XEXP (cmp, 1);
15298
15299 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
15300 comp_mode = CCmode;
15301 else if (FLOAT_MODE_P (mode))
15302 comp_mode = CCFPmode;
15303 else if (code == GTU || code == LTU
15304 || code == GEU || code == LEU)
15305 comp_mode = CCUNSmode;
15306 else if ((code == EQ || code == NE)
15307 && unsigned_reg_p (op0)
15308 && (unsigned_reg_p (op1)
15309 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
15310 /* These are unsigned values, perhaps there will be a later
15311 ordering compare that can be shared with this one. */
15312 comp_mode = CCUNSmode;
15313 else
15314 comp_mode = CCmode;
15315
15316 /* If we have an unsigned compare, make sure we don't have a signed value as
15317 an immediate. */
15318 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
15319 && INTVAL (op1) < 0)
15320 {
15321 op0 = copy_rtx_if_shared (op0);
15322 op1 = force_reg (GET_MODE (op0), op1);
15323 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
15324 }
15325
15326 /* First, the compare. */
15327 compare_result = gen_reg_rtx (comp_mode);
15328
15329 /* IEEE 128-bit support in VSX registers when we do not have hardware
15330 support. */
15331 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
15332 {
15333 rtx libfunc = NULL_RTX;
15334 bool check_nan = false;
15335 rtx dest;
15336
15337 switch (code)
15338 {
15339 case EQ:
15340 case NE:
15341 libfunc = optab_libfunc (eq_optab, mode);
15342 break;
15343
15344 case GT:
15345 case GE:
15346 libfunc = optab_libfunc (ge_optab, mode);
15347 break;
15348
15349 case LT:
15350 case LE:
15351 libfunc = optab_libfunc (le_optab, mode);
15352 break;
15353
15354 case UNORDERED:
15355 case ORDERED:
15356 libfunc = optab_libfunc (unord_optab, mode);
15357 code = (code == UNORDERED) ? NE : EQ;
15358 break;
15359
15360 case UNGE:
15361 case UNGT:
15362 check_nan = true;
15363 libfunc = optab_libfunc (ge_optab, mode);
15364 code = (code == UNGE) ? GE : GT;
15365 break;
15366
15367 case UNLE:
15368 case UNLT:
15369 check_nan = true;
15370 libfunc = optab_libfunc (le_optab, mode);
15371 code = (code == UNLE) ? LE : LT;
15372 break;
15373
15374 case UNEQ:
15375 case LTGT:
15376 check_nan = true;
15377 libfunc = optab_libfunc (eq_optab, mode);
15378 code = (code = UNEQ) ? EQ : NE;
15379 break;
15380
15381 default:
15382 gcc_unreachable ();
15383 }
15384
15385 gcc_assert (libfunc);
15386
15387 if (!check_nan)
15388 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15389 SImode, op0, mode, op1, mode);
15390
15391 /* The library signals an exception for signalling NaNs, so we need to
15392 handle isgreater, etc. by first checking isordered. */
15393 else
15394 {
15395 rtx ne_rtx, normal_dest, unord_dest;
15396 rtx unord_func = optab_libfunc (unord_optab, mode);
15397 rtx join_label = gen_label_rtx ();
15398 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
15399 rtx unord_cmp = gen_reg_rtx (comp_mode);
15400
15401
15402 /* Test for either value being a NaN. */
15403 gcc_assert (unord_func);
15404 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
15405 SImode, op0, mode, op1, mode);
15406
15407 /* Set value (0) if either value is a NaN, and jump to the join
15408 label. */
15409 dest = gen_reg_rtx (SImode);
15410 emit_move_insn (dest, const1_rtx);
15411 emit_insn (gen_rtx_SET (unord_cmp,
15412 gen_rtx_COMPARE (comp_mode, unord_dest,
15413 const0_rtx)));
15414
15415 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
15416 emit_jump_insn (gen_rtx_SET (pc_rtx,
15417 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
15418 join_ref,
15419 pc_rtx)));
15420
15421 /* Do the normal comparison, knowing that the values are not
15422 NaNs. */
15423 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15424 SImode, op0, mode, op1, mode);
15425
15426 emit_insn (gen_cstoresi4 (dest,
15427 gen_rtx_fmt_ee (code, SImode, normal_dest,
15428 const0_rtx),
15429 normal_dest, const0_rtx));
15430
15431 /* Join NaN and non-Nan paths. Compare dest against 0. */
15432 emit_label (join_label);
15433 code = NE;
15434 }
15435
15436 emit_insn (gen_rtx_SET (compare_result,
15437 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
15438 }
15439
15440 else
15441 {
15442 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15443 CLOBBERs to match cmptf_internal2 pattern. */
15444 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
15445 && FLOAT128_IBM_P (GET_MODE (op0))
15446 && TARGET_HARD_FLOAT)
15447 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15448 gen_rtvec (10,
15449 gen_rtx_SET (compare_result,
15450 gen_rtx_COMPARE (comp_mode, op0, op1)),
15451 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15452 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15453 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15454 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15455 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15456 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15457 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15458 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15459 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
15460 else if (GET_CODE (op1) == UNSPEC
15461 && XINT (op1, 1) == UNSPEC_SP_TEST)
15462 {
15463 rtx op1b = XVECEXP (op1, 0, 0);
15464 comp_mode = CCEQmode;
15465 compare_result = gen_reg_rtx (CCEQmode);
15466 if (TARGET_64BIT)
15467 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
15468 else
15469 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
15470 }
15471 else if (mode == V16QImode)
15472 {
15473 gcc_assert (code == EQ || code == NE);
15474
15475 rtx result_vector = gen_reg_rtx (V16QImode);
15476 rtx cc_bit = gen_reg_rtx (SImode);
15477 emit_insn (gen_altivec_vcmpequb_p (result_vector, op0, op1));
15478 emit_insn (gen_cr6_test_for_lt (cc_bit));
15479 emit_insn (gen_rtx_SET (compare_result,
15480 gen_rtx_COMPARE (comp_mode, cc_bit,
15481 const1_rtx)));
15482 }
15483 else
15484 emit_insn (gen_rtx_SET (compare_result,
15485 gen_rtx_COMPARE (comp_mode, op0, op1)));
15486 }
15487
15488 validate_condition_mode (code, GET_MODE (compare_result));
15489
15490 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
15491 }
15492
15493 \f
15494 /* Return the diagnostic message string if the binary operation OP is
15495 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15496
15497 static const char*
15498 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
15499 const_tree type1,
15500 const_tree type2)
15501 {
15502 machine_mode mode1 = TYPE_MODE (type1);
15503 machine_mode mode2 = TYPE_MODE (type2);
15504
15505 /* For complex modes, use the inner type. */
15506 if (COMPLEX_MODE_P (mode1))
15507 mode1 = GET_MODE_INNER (mode1);
15508
15509 if (COMPLEX_MODE_P (mode2))
15510 mode2 = GET_MODE_INNER (mode2);
15511
15512 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15513 double to intermix unless -mfloat128-convert. */
15514 if (mode1 == mode2)
15515 return NULL;
15516
15517 if (!TARGET_FLOAT128_CVT)
15518 {
15519 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
15520 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
15521 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15522 "point types");
15523 }
15524
15525 return NULL;
15526 }
15527
15528 \f
15529 /* Expand floating point conversion to/from __float128 and __ibm128. */
15530
15531 void
15532 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
15533 {
15534 machine_mode dest_mode = GET_MODE (dest);
15535 machine_mode src_mode = GET_MODE (src);
15536 convert_optab cvt = unknown_optab;
15537 bool do_move = false;
15538 rtx libfunc = NULL_RTX;
15539 rtx dest2;
15540 typedef rtx (*rtx_2func_t) (rtx, rtx);
15541 rtx_2func_t hw_convert = (rtx_2func_t)0;
15542 size_t kf_or_tf;
15543
15544 struct hw_conv_t {
15545 rtx_2func_t from_df;
15546 rtx_2func_t from_sf;
15547 rtx_2func_t from_si_sign;
15548 rtx_2func_t from_si_uns;
15549 rtx_2func_t from_di_sign;
15550 rtx_2func_t from_di_uns;
15551 rtx_2func_t to_df;
15552 rtx_2func_t to_sf;
15553 rtx_2func_t to_si_sign;
15554 rtx_2func_t to_si_uns;
15555 rtx_2func_t to_di_sign;
15556 rtx_2func_t to_di_uns;
15557 } hw_conversions[2] = {
15558 /* convertions to/from KFmode */
15559 {
15560 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
15561 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
15562 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
15563 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
15564 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
15565 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
15566 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
15567 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
15568 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
15569 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
15570 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
15571 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
15572 },
15573
15574 /* convertions to/from TFmode */
15575 {
15576 gen_extenddftf2_hw, /* TFmode <- DFmode. */
15577 gen_extendsftf2_hw, /* TFmode <- SFmode. */
15578 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
15579 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
15580 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
15581 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
15582 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
15583 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
15584 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
15585 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
15586 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
15587 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
15588 },
15589 };
15590
15591 if (dest_mode == src_mode)
15592 gcc_unreachable ();
15593
15594 /* Eliminate memory operations. */
15595 if (MEM_P (src))
15596 src = force_reg (src_mode, src);
15597
15598 if (MEM_P (dest))
15599 {
15600 rtx tmp = gen_reg_rtx (dest_mode);
15601 rs6000_expand_float128_convert (tmp, src, unsigned_p);
15602 rs6000_emit_move (dest, tmp, dest_mode);
15603 return;
15604 }
15605
15606 /* Convert to IEEE 128-bit floating point. */
15607 if (FLOAT128_IEEE_P (dest_mode))
15608 {
15609 if (dest_mode == KFmode)
15610 kf_or_tf = 0;
15611 else if (dest_mode == TFmode)
15612 kf_or_tf = 1;
15613 else
15614 gcc_unreachable ();
15615
15616 switch (src_mode)
15617 {
15618 case E_DFmode:
15619 cvt = sext_optab;
15620 hw_convert = hw_conversions[kf_or_tf].from_df;
15621 break;
15622
15623 case E_SFmode:
15624 cvt = sext_optab;
15625 hw_convert = hw_conversions[kf_or_tf].from_sf;
15626 break;
15627
15628 case E_KFmode:
15629 case E_IFmode:
15630 case E_TFmode:
15631 if (FLOAT128_IBM_P (src_mode))
15632 cvt = sext_optab;
15633 else
15634 do_move = true;
15635 break;
15636
15637 case E_SImode:
15638 if (unsigned_p)
15639 {
15640 cvt = ufloat_optab;
15641 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
15642 }
15643 else
15644 {
15645 cvt = sfloat_optab;
15646 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
15647 }
15648 break;
15649
15650 case E_DImode:
15651 if (unsigned_p)
15652 {
15653 cvt = ufloat_optab;
15654 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
15655 }
15656 else
15657 {
15658 cvt = sfloat_optab;
15659 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
15660 }
15661 break;
15662
15663 default:
15664 gcc_unreachable ();
15665 }
15666 }
15667
15668 /* Convert from IEEE 128-bit floating point. */
15669 else if (FLOAT128_IEEE_P (src_mode))
15670 {
15671 if (src_mode == KFmode)
15672 kf_or_tf = 0;
15673 else if (src_mode == TFmode)
15674 kf_or_tf = 1;
15675 else
15676 gcc_unreachable ();
15677
15678 switch (dest_mode)
15679 {
15680 case E_DFmode:
15681 cvt = trunc_optab;
15682 hw_convert = hw_conversions[kf_or_tf].to_df;
15683 break;
15684
15685 case E_SFmode:
15686 cvt = trunc_optab;
15687 hw_convert = hw_conversions[kf_or_tf].to_sf;
15688 break;
15689
15690 case E_KFmode:
15691 case E_IFmode:
15692 case E_TFmode:
15693 if (FLOAT128_IBM_P (dest_mode))
15694 cvt = trunc_optab;
15695 else
15696 do_move = true;
15697 break;
15698
15699 case E_SImode:
15700 if (unsigned_p)
15701 {
15702 cvt = ufix_optab;
15703 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
15704 }
15705 else
15706 {
15707 cvt = sfix_optab;
15708 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
15709 }
15710 break;
15711
15712 case E_DImode:
15713 if (unsigned_p)
15714 {
15715 cvt = ufix_optab;
15716 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
15717 }
15718 else
15719 {
15720 cvt = sfix_optab;
15721 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
15722 }
15723 break;
15724
15725 default:
15726 gcc_unreachable ();
15727 }
15728 }
15729
15730 /* Both IBM format. */
15731 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
15732 do_move = true;
15733
15734 else
15735 gcc_unreachable ();
15736
15737 /* Handle conversion between TFmode/KFmode/IFmode. */
15738 if (do_move)
15739 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
15740
15741 /* Handle conversion if we have hardware support. */
15742 else if (TARGET_FLOAT128_HW && hw_convert)
15743 emit_insn ((hw_convert) (dest, src));
15744
15745 /* Call an external function to do the conversion. */
15746 else if (cvt != unknown_optab)
15747 {
15748 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
15749 gcc_assert (libfunc != NULL_RTX);
15750
15751 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
15752 src, src_mode);
15753
15754 gcc_assert (dest2 != NULL_RTX);
15755 if (!rtx_equal_p (dest, dest2))
15756 emit_move_insn (dest, dest2);
15757 }
15758
15759 else
15760 gcc_unreachable ();
15761
15762 return;
15763 }
15764
15765 \f
15766 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15767 can be used as that dest register. Return the dest register. */
15768
15769 rtx
15770 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
15771 {
15772 if (op2 == const0_rtx)
15773 return op1;
15774
15775 if (GET_CODE (scratch) == SCRATCH)
15776 scratch = gen_reg_rtx (mode);
15777
15778 if (logical_operand (op2, mode))
15779 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
15780 else
15781 emit_insn (gen_rtx_SET (scratch,
15782 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
15783
15784 return scratch;
15785 }
15786
15787 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15788 requires this. The result is mode MODE. */
15789 rtx
15790 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
15791 {
15792 rtx cond[2];
15793 int n = 0;
15794 if (code == LTGT || code == LE || code == UNLT)
15795 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
15796 if (code == LTGT || code == GE || code == UNGT)
15797 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
15798 if (code == LE || code == GE || code == UNEQ)
15799 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
15800 if (code == UNLT || code == UNGT || code == UNEQ)
15801 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
15802
15803 gcc_assert (n == 2);
15804
15805 rtx cc = gen_reg_rtx (CCEQmode);
15806 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
15807 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
15808
15809 return cc;
15810 }
15811
15812 void
15813 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
15814 {
15815 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
15816 rtx_code cond_code = GET_CODE (condition_rtx);
15817
15818 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
15819 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
15820 ;
15821 else if (cond_code == NE
15822 || cond_code == GE || cond_code == LE
15823 || cond_code == GEU || cond_code == LEU
15824 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
15825 {
15826 rtx not_result = gen_reg_rtx (CCEQmode);
15827 rtx not_op, rev_cond_rtx;
15828 machine_mode cc_mode;
15829
15830 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
15831
15832 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
15833 SImode, XEXP (condition_rtx, 0), const0_rtx);
15834 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
15835 emit_insn (gen_rtx_SET (not_result, not_op));
15836 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
15837 }
15838
15839 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
15840 if (op_mode == VOIDmode)
15841 op_mode = GET_MODE (XEXP (operands[1], 1));
15842
15843 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
15844 {
15845 PUT_MODE (condition_rtx, DImode);
15846 convert_move (operands[0], condition_rtx, 0);
15847 }
15848 else
15849 {
15850 PUT_MODE (condition_rtx, SImode);
15851 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
15852 }
15853 }
15854
15855 /* Emit a branch of kind CODE to location LOC. */
15856
15857 void
15858 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
15859 {
15860 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
15861 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
15862 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
15863 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
15864 }
15865
15866 /* Return the string to output a conditional branch to LABEL, which is
15867 the operand template of the label, or NULL if the branch is really a
15868 conditional return.
15869
15870 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15871 condition code register and its mode specifies what kind of
15872 comparison we made.
15873
15874 REVERSED is nonzero if we should reverse the sense of the comparison.
15875
15876 INSN is the insn. */
15877
15878 char *
15879 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
15880 {
15881 static char string[64];
15882 enum rtx_code code = GET_CODE (op);
15883 rtx cc_reg = XEXP (op, 0);
15884 machine_mode mode = GET_MODE (cc_reg);
15885 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
15886 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
15887 int really_reversed = reversed ^ need_longbranch;
15888 char *s = string;
15889 const char *ccode;
15890 const char *pred;
15891 rtx note;
15892
15893 validate_condition_mode (code, mode);
15894
15895 /* Work out which way this really branches. We could use
15896 reverse_condition_maybe_unordered here always but this
15897 makes the resulting assembler clearer. */
15898 if (really_reversed)
15899 {
15900 /* Reversal of FP compares takes care -- an ordered compare
15901 becomes an unordered compare and vice versa. */
15902 if (mode == CCFPmode)
15903 code = reverse_condition_maybe_unordered (code);
15904 else
15905 code = reverse_condition (code);
15906 }
15907
15908 switch (code)
15909 {
15910 /* Not all of these are actually distinct opcodes, but
15911 we distinguish them for clarity of the resulting assembler. */
15912 case NE: case LTGT:
15913 ccode = "ne"; break;
15914 case EQ: case UNEQ:
15915 ccode = "eq"; break;
15916 case GE: case GEU:
15917 ccode = "ge"; break;
15918 case GT: case GTU: case UNGT:
15919 ccode = "gt"; break;
15920 case LE: case LEU:
15921 ccode = "le"; break;
15922 case LT: case LTU: case UNLT:
15923 ccode = "lt"; break;
15924 case UNORDERED: ccode = "un"; break;
15925 case ORDERED: ccode = "nu"; break;
15926 case UNGE: ccode = "nl"; break;
15927 case UNLE: ccode = "ng"; break;
15928 default:
15929 gcc_unreachable ();
15930 }
15931
15932 /* Maybe we have a guess as to how likely the branch is. */
15933 pred = "";
15934 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15935 if (note != NULL_RTX)
15936 {
15937 /* PROB is the difference from 50%. */
15938 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15939 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15940
15941 /* Only hint for highly probable/improbable branches on newer cpus when
15942 we have real profile data, as static prediction overrides processor
15943 dynamic prediction. For older cpus we may as well always hint, but
15944 assume not taken for branches that are very close to 50% as a
15945 mispredicted taken branch is more expensive than a
15946 mispredicted not-taken branch. */
15947 if (rs6000_always_hint
15948 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15949 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15950 && br_prob_note_reliable_p (note)))
15951 {
15952 if (abs (prob) > REG_BR_PROB_BASE / 20
15953 && ((prob > 0) ^ need_longbranch))
15954 pred = "+";
15955 else
15956 pred = "-";
15957 }
15958 }
15959
15960 if (label == NULL)
15961 s += sprintf (s, "b%slr%s ", ccode, pred);
15962 else
15963 s += sprintf (s, "b%s%s ", ccode, pred);
15964
15965 /* We need to escape any '%' characters in the reg_names string.
15966 Assume they'd only be the first character.... */
15967 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15968 *s++ = '%';
15969 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15970
15971 if (label != NULL)
15972 {
15973 /* If the branch distance was too far, we may have to use an
15974 unconditional branch to go the distance. */
15975 if (need_longbranch)
15976 s += sprintf (s, ",$+8\n\tb %s", label);
15977 else
15978 s += sprintf (s, ",%s", label);
15979 }
15980
15981 return string;
15982 }
15983
15984 /* Return insn for VSX or Altivec comparisons. */
15985
15986 static rtx
15987 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
15988 {
15989 rtx mask;
15990 machine_mode mode = GET_MODE (op0);
15991
15992 switch (code)
15993 {
15994 default:
15995 break;
15996
15997 case GE:
15998 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15999 return NULL_RTX;
16000 /* FALLTHRU */
16001
16002 case EQ:
16003 case GT:
16004 case GTU:
16005 case ORDERED:
16006 case UNORDERED:
16007 case UNEQ:
16008 case LTGT:
16009 mask = gen_reg_rtx (mode);
16010 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
16011 return mask;
16012 }
16013
16014 return NULL_RTX;
16015 }
16016
16017 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
16018 DMODE is expected destination mode. This is a recursive function. */
16019
16020 static rtx
16021 rs6000_emit_vector_compare (enum rtx_code rcode,
16022 rtx op0, rtx op1,
16023 machine_mode dmode)
16024 {
16025 rtx mask;
16026 bool swap_operands = false;
16027 bool try_again = false;
16028
16029 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
16030 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
16031
16032 /* See if the comparison works as is. */
16033 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
16034 if (mask)
16035 return mask;
16036
16037 switch (rcode)
16038 {
16039 case LT:
16040 rcode = GT;
16041 swap_operands = true;
16042 try_again = true;
16043 break;
16044 case LTU:
16045 rcode = GTU;
16046 swap_operands = true;
16047 try_again = true;
16048 break;
16049 case NE:
16050 case UNLE:
16051 case UNLT:
16052 case UNGE:
16053 case UNGT:
16054 /* Invert condition and try again.
16055 e.g., A != B becomes ~(A==B). */
16056 {
16057 enum rtx_code rev_code;
16058 enum insn_code nor_code;
16059 rtx mask2;
16060
16061 rev_code = reverse_condition_maybe_unordered (rcode);
16062 if (rev_code == UNKNOWN)
16063 return NULL_RTX;
16064
16065 nor_code = optab_handler (one_cmpl_optab, dmode);
16066 if (nor_code == CODE_FOR_nothing)
16067 return NULL_RTX;
16068
16069 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
16070 if (!mask2)
16071 return NULL_RTX;
16072
16073 mask = gen_reg_rtx (dmode);
16074 emit_insn (GEN_FCN (nor_code) (mask, mask2));
16075 return mask;
16076 }
16077 break;
16078 case GE:
16079 case GEU:
16080 case LE:
16081 case LEU:
16082 /* Try GT/GTU/LT/LTU OR EQ */
16083 {
16084 rtx c_rtx, eq_rtx;
16085 enum insn_code ior_code;
16086 enum rtx_code new_code;
16087
16088 switch (rcode)
16089 {
16090 case GE:
16091 new_code = GT;
16092 break;
16093
16094 case GEU:
16095 new_code = GTU;
16096 break;
16097
16098 case LE:
16099 new_code = LT;
16100 break;
16101
16102 case LEU:
16103 new_code = LTU;
16104 break;
16105
16106 default:
16107 gcc_unreachable ();
16108 }
16109
16110 ior_code = optab_handler (ior_optab, dmode);
16111 if (ior_code == CODE_FOR_nothing)
16112 return NULL_RTX;
16113
16114 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
16115 if (!c_rtx)
16116 return NULL_RTX;
16117
16118 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
16119 if (!eq_rtx)
16120 return NULL_RTX;
16121
16122 mask = gen_reg_rtx (dmode);
16123 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
16124 return mask;
16125 }
16126 break;
16127 default:
16128 return NULL_RTX;
16129 }
16130
16131 if (try_again)
16132 {
16133 if (swap_operands)
16134 std::swap (op0, op1);
16135
16136 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
16137 if (mask)
16138 return mask;
16139 }
16140
16141 /* You only get two chances. */
16142 return NULL_RTX;
16143 }
16144
16145 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
16146 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
16147 operands for the relation operation COND. */
16148
16149 int
16150 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
16151 rtx cond, rtx cc_op0, rtx cc_op1)
16152 {
16153 machine_mode dest_mode = GET_MODE (dest);
16154 machine_mode mask_mode = GET_MODE (cc_op0);
16155 enum rtx_code rcode = GET_CODE (cond);
16156 rtx mask;
16157 bool invert_move = false;
16158
16159 if (VECTOR_UNIT_NONE_P (dest_mode))
16160 return 0;
16161
16162 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
16163 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
16164
16165 switch (rcode)
16166 {
16167 /* Swap operands if we can, and fall back to doing the operation as
16168 specified, and doing a NOR to invert the test. */
16169 case NE:
16170 case UNLE:
16171 case UNLT:
16172 case UNGE:
16173 case UNGT:
16174 /* Invert condition and try again.
16175 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
16176 invert_move = true;
16177 rcode = reverse_condition_maybe_unordered (rcode);
16178 if (rcode == UNKNOWN)
16179 return 0;
16180 break;
16181
16182 case GE:
16183 case LE:
16184 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
16185 {
16186 /* Invert condition to avoid compound test. */
16187 invert_move = true;
16188 rcode = reverse_condition (rcode);
16189 }
16190 break;
16191
16192 case GTU:
16193 case GEU:
16194 case LTU:
16195 case LEU:
16196
16197 /* Invert condition to avoid compound test if necessary. */
16198 if (rcode == GEU || rcode == LEU)
16199 {
16200 invert_move = true;
16201 rcode = reverse_condition (rcode);
16202 }
16203 break;
16204
16205 default:
16206 break;
16207 }
16208
16209 /* Get the vector mask for the given relational operations. */
16210 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
16211
16212 if (!mask)
16213 return 0;
16214
16215 if (mask_mode != dest_mode)
16216 mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0);
16217
16218 if (invert_move)
16219 std::swap (op_true, op_false);
16220
16221 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
16222 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
16223 && (GET_CODE (op_true) == CONST_VECTOR
16224 || GET_CODE (op_false) == CONST_VECTOR))
16225 {
16226 rtx constant_0 = CONST0_RTX (dest_mode);
16227 rtx constant_m1 = CONSTM1_RTX (dest_mode);
16228
16229 if (op_true == constant_m1 && op_false == constant_0)
16230 {
16231 emit_move_insn (dest, mask);
16232 return 1;
16233 }
16234
16235 else if (op_true == constant_0 && op_false == constant_m1)
16236 {
16237 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
16238 return 1;
16239 }
16240
16241 /* If we can't use the vector comparison directly, perhaps we can use
16242 the mask for the true or false fields, instead of loading up a
16243 constant. */
16244 if (op_true == constant_m1)
16245 op_true = mask;
16246
16247 if (op_false == constant_0)
16248 op_false = mask;
16249 }
16250
16251 if (!REG_P (op_true) && !SUBREG_P (op_true))
16252 op_true = force_reg (dest_mode, op_true);
16253
16254 if (!REG_P (op_false) && !SUBREG_P (op_false))
16255 op_false = force_reg (dest_mode, op_false);
16256
16257 rtx tmp = gen_rtx_IOR (dest_mode,
16258 gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask),
16259 op_false),
16260 gen_rtx_AND (dest_mode, mask, op_true));
16261 emit_insn (gen_rtx_SET (dest, tmp));
16262 return 1;
16263 }
16264
16265 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
16266 maximum or minimum with "C" semantics.
16267
16268 Unless you use -ffast-math, you can't use these instructions to replace
16269 conditions that implicitly reverse the condition because the comparison
16270 might generate a NaN or signed zer0.
16271
16272 I.e. the following can be replaced all of the time
16273 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
16274 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
16275 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
16276 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
16277
16278 The following can be replaced only if -ffast-math is used:
16279 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
16280 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
16281 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
16282 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
16283
16284 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16285 nonzero/true, FALSE_COND if it is zero/false.
16286
16287 Return false if we can't generate the appropriate minimum or maximum, and
16288 true if we can did the minimum or maximum. */
16289
16290 static bool
16291 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16292 {
16293 enum rtx_code code = GET_CODE (op);
16294 rtx op0 = XEXP (op, 0);
16295 rtx op1 = XEXP (op, 1);
16296 machine_mode compare_mode = GET_MODE (op0);
16297 machine_mode result_mode = GET_MODE (dest);
16298
16299 if (result_mode != compare_mode)
16300 return false;
16301
16302 /* See the comments of this function, it simply expects GE/GT/LE/LT in
16303 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
16304 we need to do the reversions first to make the following checks
16305 support fewer cases, like:
16306
16307 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
16308 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
16309 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
16310 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
16311
16312 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
16313 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
16314 have to check for fast-math or the like. */
16315 if (code == UNGE || code == UNGT || code == UNLE || code == UNLT)
16316 {
16317 code = reverse_condition_maybe_unordered (code);
16318 std::swap (true_cond, false_cond);
16319 }
16320
16321 bool max_p;
16322 if (code == GE || code == GT)
16323 max_p = true;
16324 else if (code == LE || code == LT)
16325 max_p = false;
16326 else
16327 return false;
16328
16329 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
16330 ;
16331
16332 /* Only when NaNs and signed-zeros are not in effect, smax could be
16333 used for `op0 < op1 ? op1 : op0`, and smin could be used for
16334 `op0 > op1 ? op1 : op0`. */
16335 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
16336 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
16337 max_p = !max_p;
16338
16339 else
16340 return false;
16341
16342 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
16343 return true;
16344 }
16345
16346 /* Possibly emit a floating point conditional move by generating a compare that
16347 sets a mask instruction and a XXSEL select instruction.
16348
16349 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16350 nonzero/true, FALSE_COND if it is zero/false.
16351
16352 Return false if the operation cannot be generated, and true if we could
16353 generate the instruction. */
16354
16355 static bool
16356 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16357 {
16358 enum rtx_code code = GET_CODE (op);
16359 rtx op0 = XEXP (op, 0);
16360 rtx op1 = XEXP (op, 1);
16361 machine_mode compare_mode = GET_MODE (op0);
16362 machine_mode result_mode = GET_MODE (dest);
16363 rtx compare_rtx;
16364 rtx cmove_rtx;
16365 rtx clobber_rtx;
16366
16367 if (!can_create_pseudo_p ())
16368 return 0;
16369
16370 /* We allow the comparison to be either SFmode/DFmode and the true/false
16371 condition to be either SFmode/DFmode. I.e. we allow:
16372
16373 float a, b;
16374 double c, d, r;
16375
16376 r = (a == b) ? c : d;
16377
16378 and:
16379
16380 double a, b;
16381 float c, d, r;
16382
16383 r = (a == b) ? c : d;
16384
16385 but we don't allow intermixing the IEEE 128-bit floating point types with
16386 the 32/64-bit scalar types. */
16387
16388 if (!(compare_mode == result_mode
16389 || (compare_mode == SFmode && result_mode == DFmode)
16390 || (compare_mode == DFmode && result_mode == SFmode)))
16391 return false;
16392
16393 switch (code)
16394 {
16395 case EQ:
16396 case GE:
16397 case GT:
16398 break;
16399
16400 case NE:
16401 case LT:
16402 case LE:
16403 code = swap_condition (code);
16404 std::swap (op0, op1);
16405 break;
16406
16407 default:
16408 return false;
16409 }
16410
16411 /* Generate: [(parallel [(set (dest)
16412 (if_then_else (op (cmp1) (cmp2))
16413 (true)
16414 (false)))
16415 (clobber (scratch))])]. */
16416
16417 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
16418 cmove_rtx = gen_rtx_SET (dest,
16419 gen_rtx_IF_THEN_ELSE (result_mode,
16420 compare_rtx,
16421 true_cond,
16422 false_cond));
16423
16424 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
16425 emit_insn (gen_rtx_PARALLEL (VOIDmode,
16426 gen_rtvec (2, cmove_rtx, clobber_rtx)));
16427
16428 return true;
16429 }
16430
16431 /* Helper function to return true if the target has instructions to do a
16432 compare and set mask instruction that can be used with XXSEL to implement a
16433 conditional move. It is also assumed that such a target also supports the
16434 "C" minimum and maximum instructions. */
16435
16436 static bool
16437 have_compare_and_set_mask (machine_mode mode)
16438 {
16439 switch (mode)
16440 {
16441 case E_SFmode:
16442 case E_DFmode:
16443 return TARGET_P9_MINMAX;
16444
16445 case E_KFmode:
16446 case E_TFmode:
16447 return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
16448
16449 default:
16450 break;
16451 }
16452
16453 return false;
16454 }
16455
16456 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16457 operands of the last comparison is nonzero/true, FALSE_COND if it
16458 is zero/false. Return 0 if the hardware has no such operation. */
16459
16460 bool
16461 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16462 {
16463 enum rtx_code code = GET_CODE (op);
16464 rtx op0 = XEXP (op, 0);
16465 rtx op1 = XEXP (op, 1);
16466 machine_mode compare_mode = GET_MODE (op0);
16467 machine_mode result_mode = GET_MODE (dest);
16468 rtx temp;
16469 bool is_against_zero;
16470
16471 /* These modes should always match. */
16472 if (GET_MODE (op1) != compare_mode
16473 /* In the isel case however, we can use a compare immediate, so
16474 op1 may be a small constant. */
16475 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
16476 return false;
16477 if (GET_MODE (true_cond) != result_mode)
16478 return false;
16479 if (GET_MODE (false_cond) != result_mode)
16480 return false;
16481
16482 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16483 instructions. */
16484 if (have_compare_and_set_mask (compare_mode)
16485 && have_compare_and_set_mask (result_mode))
16486 {
16487 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
16488 return true;
16489
16490 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
16491 return true;
16492 }
16493
16494 /* Don't allow using floating point comparisons for integer results for
16495 now. */
16496 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
16497 return false;
16498
16499 /* First, work out if the hardware can do this at all, or
16500 if it's too slow.... */
16501 if (!FLOAT_MODE_P (compare_mode))
16502 {
16503 if (TARGET_ISEL)
16504 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
16505 return false;
16506 }
16507
16508 is_against_zero = op1 == CONST0_RTX (compare_mode);
16509
16510 /* A floating-point subtract might overflow, underflow, or produce
16511 an inexact result, thus changing the floating-point flags, so it
16512 can't be generated if we care about that. It's safe if one side
16513 of the construct is zero, since then no subtract will be
16514 generated. */
16515 if (SCALAR_FLOAT_MODE_P (compare_mode)
16516 && flag_trapping_math && ! is_against_zero)
16517 return false;
16518
16519 /* Eliminate half of the comparisons by switching operands, this
16520 makes the remaining code simpler. */
16521 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
16522 || code == LTGT || code == LT || code == UNLE)
16523 {
16524 code = reverse_condition_maybe_unordered (code);
16525 temp = true_cond;
16526 true_cond = false_cond;
16527 false_cond = temp;
16528 }
16529
16530 /* UNEQ and LTGT take four instructions for a comparison with zero,
16531 it'll probably be faster to use a branch here too. */
16532 if (code == UNEQ && HONOR_NANS (compare_mode))
16533 return false;
16534
16535 /* We're going to try to implement comparisons by performing
16536 a subtract, then comparing against zero. Unfortunately,
16537 Inf - Inf is NaN which is not zero, and so if we don't
16538 know that the operand is finite and the comparison
16539 would treat EQ different to UNORDERED, we can't do it. */
16540 if (HONOR_INFINITIES (compare_mode)
16541 && code != GT && code != UNGE
16542 && (!CONST_DOUBLE_P (op1)
16543 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
16544 /* Constructs of the form (a OP b ? a : b) are safe. */
16545 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
16546 || (! rtx_equal_p (op0, true_cond)
16547 && ! rtx_equal_p (op1, true_cond))))
16548 return false;
16549
16550 /* At this point we know we can use fsel. */
16551
16552 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16553 is no fsel instruction. */
16554 if (compare_mode != SFmode && compare_mode != DFmode)
16555 return false;
16556
16557 /* Reduce the comparison to a comparison against zero. */
16558 if (! is_against_zero)
16559 {
16560 temp = gen_reg_rtx (compare_mode);
16561 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
16562 op0 = temp;
16563 op1 = CONST0_RTX (compare_mode);
16564 }
16565
16566 /* If we don't care about NaNs we can reduce some of the comparisons
16567 down to faster ones. */
16568 if (! HONOR_NANS (compare_mode))
16569 switch (code)
16570 {
16571 case GT:
16572 code = LE;
16573 temp = true_cond;
16574 true_cond = false_cond;
16575 false_cond = temp;
16576 break;
16577 case UNGE:
16578 code = GE;
16579 break;
16580 case UNEQ:
16581 code = EQ;
16582 break;
16583 default:
16584 break;
16585 }
16586
16587 /* Now, reduce everything down to a GE. */
16588 switch (code)
16589 {
16590 case GE:
16591 break;
16592
16593 case LE:
16594 temp = gen_reg_rtx (compare_mode);
16595 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16596 op0 = temp;
16597 break;
16598
16599 case ORDERED:
16600 temp = gen_reg_rtx (compare_mode);
16601 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
16602 op0 = temp;
16603 break;
16604
16605 case EQ:
16606 temp = gen_reg_rtx (compare_mode);
16607 emit_insn (gen_rtx_SET (temp,
16608 gen_rtx_NEG (compare_mode,
16609 gen_rtx_ABS (compare_mode, op0))));
16610 op0 = temp;
16611 break;
16612
16613 case UNGE:
16614 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16615 temp = gen_reg_rtx (result_mode);
16616 emit_insn (gen_rtx_SET (temp,
16617 gen_rtx_IF_THEN_ELSE (result_mode,
16618 gen_rtx_GE (VOIDmode,
16619 op0, op1),
16620 true_cond, false_cond)));
16621 false_cond = true_cond;
16622 true_cond = temp;
16623
16624 temp = gen_reg_rtx (compare_mode);
16625 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16626 op0 = temp;
16627 break;
16628
16629 case GT:
16630 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16631 temp = gen_reg_rtx (result_mode);
16632 emit_insn (gen_rtx_SET (temp,
16633 gen_rtx_IF_THEN_ELSE (result_mode,
16634 gen_rtx_GE (VOIDmode,
16635 op0, op1),
16636 true_cond, false_cond)));
16637 true_cond = false_cond;
16638 false_cond = temp;
16639
16640 temp = gen_reg_rtx (compare_mode);
16641 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16642 op0 = temp;
16643 break;
16644
16645 default:
16646 gcc_unreachable ();
16647 }
16648
16649 emit_insn (gen_rtx_SET (dest,
16650 gen_rtx_IF_THEN_ELSE (result_mode,
16651 gen_rtx_GE (VOIDmode,
16652 op0, op1),
16653 true_cond, false_cond)));
16654 return true;
16655 }
16656
16657 /* Same as above, but for ints (isel). */
16658
16659 bool
16660 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16661 {
16662 rtx condition_rtx, cr;
16663 machine_mode mode = GET_MODE (dest);
16664 enum rtx_code cond_code;
16665 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
16666 bool signedp;
16667
16668 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
16669 return false;
16670
16671 /* PR104335: We now need to expect CC-mode "comparisons"
16672 coming from ifcvt. The following code expects proper
16673 comparisons so better abort here. */
16674 if (GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC)
16675 return false;
16676
16677 /* We still have to do the compare, because isel doesn't do a
16678 compare, it just looks at the CRx bits set by a previous compare
16679 instruction. */
16680 condition_rtx = rs6000_generate_compare (op, mode);
16681 cond_code = GET_CODE (condition_rtx);
16682 cr = XEXP (condition_rtx, 0);
16683 signedp = GET_MODE (cr) == CCmode;
16684
16685 isel_func = (mode == SImode
16686 ? (signedp ? gen_isel_cc_si : gen_isel_ccuns_si)
16687 : (signedp ? gen_isel_cc_di : gen_isel_ccuns_di));
16688
16689 switch (cond_code)
16690 {
16691 case LT: case GT: case LTU: case GTU: case EQ:
16692 /* isel handles these directly. */
16693 break;
16694
16695 default:
16696 /* We need to swap the sense of the comparison. */
16697 {
16698 std::swap (false_cond, true_cond);
16699 PUT_CODE (condition_rtx, reverse_condition (cond_code));
16700 }
16701 break;
16702 }
16703
16704 false_cond = force_reg (mode, false_cond);
16705 if (true_cond != const0_rtx)
16706 true_cond = force_reg (mode, true_cond);
16707
16708 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
16709
16710 return true;
16711 }
16712
16713 void
16714 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16715 {
16716 machine_mode mode = GET_MODE (op0);
16717 enum rtx_code c;
16718 rtx target;
16719
16720 /* VSX/altivec have direct min/max insns. */
16721 if ((code == SMAX || code == SMIN)
16722 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
16723 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))
16724 || (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))))
16725 {
16726 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
16727 return;
16728 }
16729
16730 if (code == SMAX || code == SMIN)
16731 c = GE;
16732 else
16733 c = GEU;
16734
16735 if (code == SMAX || code == UMAX)
16736 target = emit_conditional_move (dest, { c, op0, op1, mode },
16737 op0, op1, mode, 0);
16738 else
16739 target = emit_conditional_move (dest, { c, op0, op1, mode },
16740 op1, op0, mode, 0);
16741 gcc_assert (target);
16742 if (target != dest)
16743 emit_move_insn (dest, target);
16744 }
16745
16746 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16747 COND is true. Mark the jump as unlikely to be taken. */
16748
16749 static void
16750 emit_unlikely_jump (rtx cond, rtx label)
16751 {
16752 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
16753 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
16754 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
16755 }
16756
16757 /* A subroutine of the atomic operation splitters. Emit a load-locked
16758 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16759 the zero_extend operation. */
16760
16761 static void
16762 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
16763 {
16764 rtx (*fn) (rtx, rtx) = NULL;
16765
16766 switch (mode)
16767 {
16768 case E_QImode:
16769 fn = gen_load_lockedqi;
16770 break;
16771 case E_HImode:
16772 fn = gen_load_lockedhi;
16773 break;
16774 case E_SImode:
16775 if (GET_MODE (mem) == QImode)
16776 fn = gen_load_lockedqi_si;
16777 else if (GET_MODE (mem) == HImode)
16778 fn = gen_load_lockedhi_si;
16779 else
16780 fn = gen_load_lockedsi;
16781 break;
16782 case E_DImode:
16783 fn = gen_load_lockeddi;
16784 break;
16785 case E_TImode:
16786 fn = gen_load_lockedti;
16787 break;
16788 default:
16789 gcc_unreachable ();
16790 }
16791 emit_insn (fn (reg, mem));
16792 }
16793
16794 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16795 instruction in MODE. */
16796
16797 static void
16798 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
16799 {
16800 rtx (*fn) (rtx, rtx, rtx) = NULL;
16801
16802 switch (mode)
16803 {
16804 case E_QImode:
16805 fn = gen_store_conditionalqi;
16806 break;
16807 case E_HImode:
16808 fn = gen_store_conditionalhi;
16809 break;
16810 case E_SImode:
16811 fn = gen_store_conditionalsi;
16812 break;
16813 case E_DImode:
16814 fn = gen_store_conditionaldi;
16815 break;
16816 case E_TImode:
16817 fn = gen_store_conditionalti;
16818 break;
16819 default:
16820 gcc_unreachable ();
16821 }
16822
16823 /* Emit sync before stwcx. to address PPC405 Erratum. */
16824 if (PPC405_ERRATUM77)
16825 emit_insn (gen_hwsync ());
16826
16827 emit_insn (fn (res, mem, val));
16828 }
16829
16830 /* Expand barriers before and after a load_locked/store_cond sequence. */
16831
16832 static rtx
16833 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
16834 {
16835 rtx addr = XEXP (mem, 0);
16836
16837 if (!legitimate_indirect_address_p (addr, reload_completed)
16838 && !legitimate_indexed_address_p (addr, reload_completed))
16839 {
16840 addr = force_reg (Pmode, addr);
16841 mem = replace_equiv_address_nv (mem, addr);
16842 }
16843
16844 switch (model)
16845 {
16846 case MEMMODEL_RELAXED:
16847 case MEMMODEL_CONSUME:
16848 case MEMMODEL_ACQUIRE:
16849 break;
16850 case MEMMODEL_RELEASE:
16851 case MEMMODEL_ACQ_REL:
16852 emit_insn (gen_lwsync ());
16853 break;
16854 case MEMMODEL_SEQ_CST:
16855 emit_insn (gen_hwsync ());
16856 break;
16857 default:
16858 gcc_unreachable ();
16859 }
16860 return mem;
16861 }
16862
16863 static void
16864 rs6000_post_atomic_barrier (enum memmodel model)
16865 {
16866 switch (model)
16867 {
16868 case MEMMODEL_RELAXED:
16869 case MEMMODEL_CONSUME:
16870 case MEMMODEL_RELEASE:
16871 break;
16872 case MEMMODEL_ACQUIRE:
16873 case MEMMODEL_ACQ_REL:
16874 case MEMMODEL_SEQ_CST:
16875 emit_insn (gen_isync ());
16876 break;
16877 default:
16878 gcc_unreachable ();
16879 }
16880 }
16881
16882 /* A subroutine of the various atomic expanders. For sub-word operations,
16883 we must adjust things to operate on SImode. Given the original MEM,
16884 return a new aligned memory. Also build and return the quantities by
16885 which to shift and mask. */
16886
16887 static rtx
16888 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
16889 {
16890 rtx addr, align, shift, mask, mem;
16891 HOST_WIDE_INT shift_mask;
16892 machine_mode mode = GET_MODE (orig_mem);
16893
16894 /* For smaller modes, we have to implement this via SImode. */
16895 shift_mask = (mode == QImode ? 0x18 : 0x10);
16896
16897 addr = XEXP (orig_mem, 0);
16898 addr = force_reg (GET_MODE (addr), addr);
16899
16900 /* Aligned memory containing subword. Generate a new memory. We
16901 do not want any of the existing MEM_ATTR data, as we're now
16902 accessing memory outside the original object. */
16903 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
16904 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16905 mem = gen_rtx_MEM (SImode, align);
16906 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
16907 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
16908 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
16909
16910 /* Shift amount for subword relative to aligned word. */
16911 shift = gen_reg_rtx (SImode);
16912 addr = gen_lowpart (SImode, addr);
16913 rtx tmp = gen_reg_rtx (SImode);
16914 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
16915 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
16916 if (BYTES_BIG_ENDIAN)
16917 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
16918 shift, 1, OPTAB_LIB_WIDEN);
16919 *pshift = shift;
16920
16921 /* Mask for insertion. */
16922 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
16923 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
16924 *pmask = mask;
16925
16926 return mem;
16927 }
16928
16929 /* A subroutine of the various atomic expanders. For sub-word operands,
16930 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16931
16932 static rtx
16933 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
16934 {
16935 rtx x;
16936
16937 x = gen_reg_rtx (SImode);
16938 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
16939 gen_rtx_NOT (SImode, mask),
16940 oldval)));
16941
16942 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
16943
16944 return x;
16945 }
16946
16947 /* A subroutine of the various atomic expanders. For sub-word operands,
16948 extract WIDE to NARROW via SHIFT. */
16949
16950 static void
16951 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
16952 {
16953 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
16954 wide, 1, OPTAB_LIB_WIDEN);
16955 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
16956 }
16957
16958 /* Expand an atomic compare and swap operation. */
16959
16960 void
16961 rs6000_expand_atomic_compare_and_swap (rtx operands[])
16962 {
16963 rtx boolval, retval, mem, oldval, newval, cond;
16964 rtx label1, label2, x, mask, shift;
16965 machine_mode mode, orig_mode;
16966 enum memmodel mod_s, mod_f;
16967 bool is_weak;
16968
16969 boolval = operands[0];
16970 retval = operands[1];
16971 mem = operands[2];
16972 oldval = operands[3];
16973 newval = operands[4];
16974 is_weak = (INTVAL (operands[5]) != 0);
16975 mod_s = memmodel_base (INTVAL (operands[6]));
16976 mod_f = memmodel_base (INTVAL (operands[7]));
16977 orig_mode = mode = GET_MODE (mem);
16978
16979 mask = shift = NULL_RTX;
16980 if (mode == QImode || mode == HImode)
16981 {
16982 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16983 lwarx and shift/mask operations. With power8, we need to do the
16984 comparison in SImode, but the store is still done in QI/HImode. */
16985 oldval = convert_modes (SImode, mode, oldval, 1);
16986
16987 if (!TARGET_SYNC_HI_QI)
16988 {
16989 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16990
16991 /* Shift and mask OLDVAL into position with the word. */
16992 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
16993 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16994
16995 /* Shift and mask NEWVAL into position within the word. */
16996 newval = convert_modes (SImode, mode, newval, 1);
16997 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
16998 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16999 }
17000
17001 /* Prepare to adjust the return value. */
17002 retval = gen_reg_rtx (SImode);
17003 mode = SImode;
17004 }
17005 else if (reg_overlap_mentioned_p (retval, oldval))
17006 oldval = copy_to_reg (oldval);
17007
17008 if (mode != TImode && !reg_or_short_operand (oldval, mode))
17009 oldval = copy_to_mode_reg (mode, oldval);
17010
17011 if (reg_overlap_mentioned_p (retval, newval))
17012 newval = copy_to_reg (newval);
17013
17014 mem = rs6000_pre_atomic_barrier (mem, mod_s);
17015
17016 label1 = NULL_RTX;
17017 if (!is_weak)
17018 {
17019 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
17020 emit_label (XEXP (label1, 0));
17021 }
17022 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
17023
17024 emit_load_locked (mode, retval, mem);
17025
17026 x = retval;
17027 if (mask)
17028 x = expand_simple_binop (SImode, AND, retval, mask,
17029 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17030
17031 cond = gen_reg_rtx (CCmode);
17032 /* If we have TImode, synthesize a comparison. */
17033 if (mode != TImode)
17034 x = gen_rtx_COMPARE (CCmode, x, oldval);
17035 else
17036 {
17037 rtx xor1_result = gen_reg_rtx (DImode);
17038 rtx xor2_result = gen_reg_rtx (DImode);
17039 rtx or_result = gen_reg_rtx (DImode);
17040 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
17041 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
17042 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
17043 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
17044
17045 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
17046 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
17047 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
17048 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
17049 }
17050
17051 emit_insn (gen_rtx_SET (cond, x));
17052
17053 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17054 emit_unlikely_jump (x, label2);
17055
17056 x = newval;
17057 if (mask)
17058 x = rs6000_mask_atomic_subword (retval, newval, mask);
17059
17060 emit_store_conditional (orig_mode, cond, mem, x);
17061
17062 if (!is_weak)
17063 {
17064 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17065 emit_unlikely_jump (x, label1);
17066 }
17067
17068 if (!is_mm_relaxed (mod_f))
17069 emit_label (XEXP (label2, 0));
17070
17071 rs6000_post_atomic_barrier (mod_s);
17072
17073 if (is_mm_relaxed (mod_f))
17074 emit_label (XEXP (label2, 0));
17075
17076 if (shift)
17077 rs6000_finish_atomic_subword (operands[1], retval, shift);
17078 else if (mode != GET_MODE (operands[1]))
17079 convert_move (operands[1], retval, 1);
17080
17081 /* In all cases, CR0 contains EQ on success, and NE on failure. */
17082 x = gen_rtx_EQ (SImode, cond, const0_rtx);
17083 emit_insn (gen_rtx_SET (boolval, x));
17084 }
17085
17086 /* Expand an atomic exchange operation. */
17087
17088 void
17089 rs6000_expand_atomic_exchange (rtx operands[])
17090 {
17091 rtx retval, mem, val, cond;
17092 machine_mode mode;
17093 enum memmodel model;
17094 rtx label, x, mask, shift;
17095
17096 retval = operands[0];
17097 mem = operands[1];
17098 val = operands[2];
17099 model = memmodel_base (INTVAL (operands[3]));
17100 mode = GET_MODE (mem);
17101
17102 mask = shift = NULL_RTX;
17103 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
17104 {
17105 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17106
17107 /* Shift and mask VAL into position with the word. */
17108 val = convert_modes (SImode, mode, val, 1);
17109 val = expand_simple_binop (SImode, ASHIFT, val, shift,
17110 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17111
17112 /* Prepare to adjust the return value. */
17113 retval = gen_reg_rtx (SImode);
17114 mode = SImode;
17115 }
17116
17117 mem = rs6000_pre_atomic_barrier (mem, model);
17118
17119 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
17120 emit_label (XEXP (label, 0));
17121
17122 emit_load_locked (mode, retval, mem);
17123
17124 x = val;
17125 if (mask)
17126 x = rs6000_mask_atomic_subword (retval, val, mask);
17127
17128 cond = gen_reg_rtx (CCmode);
17129 emit_store_conditional (mode, cond, mem, x);
17130
17131 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17132 emit_unlikely_jump (x, label);
17133
17134 rs6000_post_atomic_barrier (model);
17135
17136 if (shift)
17137 rs6000_finish_atomic_subword (operands[0], retval, shift);
17138 }
17139
17140 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
17141 to perform. MEM is the memory on which to operate. VAL is the second
17142 operand of the binary operator. BEFORE and AFTER are optional locations to
17143 return the value of MEM either before of after the operation. MODEL_RTX
17144 is a CONST_INT containing the memory model to use. */
17145
17146 void
17147 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
17148 rtx orig_before, rtx orig_after, rtx model_rtx)
17149 {
17150 enum memmodel model = memmodel_base (INTVAL (model_rtx));
17151 machine_mode mode = GET_MODE (mem);
17152 machine_mode store_mode = mode;
17153 rtx label, x, cond, mask, shift;
17154 rtx before = orig_before, after = orig_after;
17155
17156 mask = shift = NULL_RTX;
17157 /* On power8, we want to use SImode for the operation. On previous systems,
17158 use the operation in a subword and shift/mask to get the proper byte or
17159 halfword. */
17160 if (mode == QImode || mode == HImode)
17161 {
17162 if (TARGET_SYNC_HI_QI)
17163 {
17164 val = convert_modes (SImode, mode, val, 1);
17165
17166 /* Prepare to adjust the return value. */
17167 before = gen_reg_rtx (SImode);
17168 if (after)
17169 after = gen_reg_rtx (SImode);
17170 mode = SImode;
17171 }
17172 else
17173 {
17174 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17175
17176 /* Shift and mask VAL into position with the word. */
17177 val = convert_modes (SImode, mode, val, 1);
17178 val = expand_simple_binop (SImode, ASHIFT, val, shift,
17179 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17180
17181 switch (code)
17182 {
17183 case IOR:
17184 case XOR:
17185 /* We've already zero-extended VAL. That is sufficient to
17186 make certain that it does not affect other bits. */
17187 mask = NULL;
17188 break;
17189
17190 case AND:
17191 /* If we make certain that all of the other bits in VAL are
17192 set, that will be sufficient to not affect other bits. */
17193 x = gen_rtx_NOT (SImode, mask);
17194 x = gen_rtx_IOR (SImode, x, val);
17195 emit_insn (gen_rtx_SET (val, x));
17196 mask = NULL;
17197 break;
17198
17199 case NOT:
17200 case PLUS:
17201 case MINUS:
17202 /* These will all affect bits outside the field and need
17203 adjustment via MASK within the loop. */
17204 break;
17205
17206 default:
17207 gcc_unreachable ();
17208 }
17209
17210 /* Prepare to adjust the return value. */
17211 before = gen_reg_rtx (SImode);
17212 if (after)
17213 after = gen_reg_rtx (SImode);
17214 store_mode = mode = SImode;
17215 }
17216 }
17217
17218 mem = rs6000_pre_atomic_barrier (mem, model);
17219
17220 label = gen_label_rtx ();
17221 emit_label (label);
17222 label = gen_rtx_LABEL_REF (VOIDmode, label);
17223
17224 if (before == NULL_RTX)
17225 before = gen_reg_rtx (mode);
17226
17227 emit_load_locked (mode, before, mem);
17228
17229 if (code == NOT)
17230 {
17231 x = expand_simple_binop (mode, AND, before, val,
17232 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17233 after = expand_simple_unop (mode, NOT, x, after, 1);
17234 }
17235 else
17236 {
17237 after = expand_simple_binop (mode, code, before, val,
17238 after, 1, OPTAB_LIB_WIDEN);
17239 }
17240
17241 x = after;
17242 if (mask)
17243 {
17244 x = expand_simple_binop (SImode, AND, after, mask,
17245 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17246 x = rs6000_mask_atomic_subword (before, x, mask);
17247 }
17248 else if (store_mode != mode)
17249 x = convert_modes (store_mode, mode, x, 1);
17250
17251 cond = gen_reg_rtx (CCmode);
17252 emit_store_conditional (store_mode, cond, mem, x);
17253
17254 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17255 emit_unlikely_jump (x, label);
17256
17257 rs6000_post_atomic_barrier (model);
17258
17259 if (shift)
17260 {
17261 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
17262 then do the calcuations in a SImode register. */
17263 if (orig_before)
17264 rs6000_finish_atomic_subword (orig_before, before, shift);
17265 if (orig_after)
17266 rs6000_finish_atomic_subword (orig_after, after, shift);
17267 }
17268 else if (store_mode != mode)
17269 {
17270 /* QImode/HImode on machines with lbarx/lharx where we do the native
17271 operation and then do the calcuations in a SImode register. */
17272 if (orig_before)
17273 convert_move (orig_before, before, 1);
17274 if (orig_after)
17275 convert_move (orig_after, after, 1);
17276 }
17277 else if (orig_after && after != orig_after)
17278 emit_move_insn (orig_after, after);
17279 }
17280
17281 static GTY(()) alias_set_type TOC_alias_set = -1;
17282
17283 alias_set_type
17284 get_TOC_alias_set (void)
17285 {
17286 if (TOC_alias_set == -1)
17287 TOC_alias_set = new_alias_set ();
17288 return TOC_alias_set;
17289 }
17290
17291 /* The mode the ABI uses for a word. This is not the same as word_mode
17292 for -m32 -mpowerpc64. This is used to implement various target hooks. */
17293
17294 static scalar_int_mode
17295 rs6000_abi_word_mode (void)
17296 {
17297 return TARGET_32BIT ? SImode : DImode;
17298 }
17299
17300 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
17301 static char *
17302 rs6000_offload_options (void)
17303 {
17304 if (TARGET_64BIT)
17305 return xstrdup ("-foffload-abi=lp64");
17306 else
17307 return xstrdup ("-foffload-abi=ilp32");
17308 }
17309
17310 \f
17311 /* A quick summary of the various types of 'constant-pool tables'
17312 under PowerPC:
17313
17314 Target Flags Name One table per
17315 AIX (none) AIX TOC object file
17316 AIX -mfull-toc AIX TOC object file
17317 AIX -mminimal-toc AIX minimal TOC translation unit
17318 SVR4/EABI (none) SVR4 SDATA object file
17319 SVR4/EABI -fpic SVR4 pic object file
17320 SVR4/EABI -fPIC SVR4 PIC translation unit
17321 SVR4/EABI -mrelocatable EABI TOC function
17322 SVR4/EABI -maix AIX TOC object file
17323 SVR4/EABI -maix -mminimal-toc
17324 AIX minimal TOC translation unit
17325
17326 Name Reg. Set by entries contains:
17327 made by addrs? fp? sum?
17328
17329 AIX TOC 2 crt0 as Y option option
17330 AIX minimal TOC 30 prolog gcc Y Y option
17331 SVR4 SDATA 13 crt0 gcc N Y N
17332 SVR4 pic 30 prolog ld Y not yet N
17333 SVR4 PIC 30 prolog gcc Y option option
17334 EABI TOC 30 prolog gcc Y option option
17335
17336 */
17337
17338 /* Hash functions for the hash table. */
17339
17340 static unsigned
17341 rs6000_hash_constant (rtx k)
17342 {
17343 enum rtx_code code = GET_CODE (k);
17344 machine_mode mode = GET_MODE (k);
17345 unsigned result = (code << 3) ^ mode;
17346 const char *format;
17347 int flen, fidx;
17348
17349 format = GET_RTX_FORMAT (code);
17350 flen = strlen (format);
17351 fidx = 0;
17352
17353 switch (code)
17354 {
17355 case LABEL_REF:
17356 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
17357
17358 case CONST_WIDE_INT:
17359 {
17360 int i;
17361 flen = CONST_WIDE_INT_NUNITS (k);
17362 for (i = 0; i < flen; i++)
17363 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
17364 return result;
17365 }
17366
17367 case CONST_DOUBLE:
17368 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
17369
17370 case CODE_LABEL:
17371 fidx = 3;
17372 break;
17373
17374 default:
17375 break;
17376 }
17377
17378 for (; fidx < flen; fidx++)
17379 switch (format[fidx])
17380 {
17381 case 's':
17382 {
17383 unsigned i, len;
17384 const char *str = XSTR (k, fidx);
17385 len = strlen (str);
17386 result = result * 613 + len;
17387 for (i = 0; i < len; i++)
17388 result = result * 613 + (unsigned) str[i];
17389 break;
17390 }
17391 case 'u':
17392 case 'e':
17393 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
17394 break;
17395 case 'i':
17396 case 'n':
17397 result = result * 613 + (unsigned) XINT (k, fidx);
17398 break;
17399 case 'w':
17400 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
17401 result = result * 613 + (unsigned) XWINT (k, fidx);
17402 else
17403 {
17404 size_t i;
17405 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
17406 result = result * 613 + (unsigned) (XWINT (k, fidx)
17407 >> CHAR_BIT * i);
17408 }
17409 break;
17410 case '0':
17411 break;
17412 default:
17413 gcc_unreachable ();
17414 }
17415
17416 return result;
17417 }
17418
17419 hashval_t
17420 toc_hasher::hash (toc_hash_struct *thc)
17421 {
17422 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
17423 }
17424
17425 /* Compare H1 and H2 for equivalence. */
17426
17427 bool
17428 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
17429 {
17430 rtx r1 = h1->key;
17431 rtx r2 = h2->key;
17432
17433 if (h1->key_mode != h2->key_mode)
17434 return 0;
17435
17436 return rtx_equal_p (r1, r2);
17437 }
17438
17439 /* These are the names given by the C++ front-end to vtables, and
17440 vtable-like objects. Ideally, this logic should not be here;
17441 instead, there should be some programmatic way of inquiring as
17442 to whether or not an object is a vtable. */
17443
17444 #define VTABLE_NAME_P(NAME) \
17445 (startswith (name, "_vt.") \
17446 || startswith (name, "_ZTV") \
17447 || startswith (name, "_ZTT") \
17448 || startswith (name, "_ZTI") \
17449 || startswith (name, "_ZTC"))
17450
17451 #ifdef NO_DOLLAR_IN_LABEL
17452 /* Return a GGC-allocated character string translating dollar signs in
17453 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17454
17455 const char *
17456 rs6000_xcoff_strip_dollar (const char *name)
17457 {
17458 char *strip, *p;
17459 const char *q;
17460 size_t len;
17461
17462 q = (const char *) strchr (name, '$');
17463
17464 if (q == 0 || q == name)
17465 return name;
17466
17467 len = strlen (name);
17468 strip = XALLOCAVEC (char, len + 1);
17469 strcpy (strip, name);
17470 p = strip + (q - name);
17471 while (p)
17472 {
17473 *p = '_';
17474 p = strchr (p + 1, '$');
17475 }
17476
17477 return ggc_alloc_string (strip, len);
17478 }
17479 #endif
17480
17481 void
17482 rs6000_output_symbol_ref (FILE *file, rtx x)
17483 {
17484 const char *name = XSTR (x, 0);
17485
17486 /* Currently C++ toc references to vtables can be emitted before it
17487 is decided whether the vtable is public or private. If this is
17488 the case, then the linker will eventually complain that there is
17489 a reference to an unknown section. Thus, for vtables only,
17490 we emit the TOC reference to reference the identifier and not the
17491 symbol. */
17492 if (VTABLE_NAME_P (name))
17493 {
17494 RS6000_OUTPUT_BASENAME (file, name);
17495 }
17496 else
17497 assemble_name (file, name);
17498 }
17499
17500 /* Output a TOC entry. We derive the entry name from what is being
17501 written. */
17502
17503 void
17504 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
17505 {
17506 char buf[256];
17507 const char *name = buf;
17508 rtx base = x;
17509 HOST_WIDE_INT offset = 0;
17510
17511 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
17512
17513 /* When the linker won't eliminate them, don't output duplicate
17514 TOC entries (this happens on AIX if there is any kind of TOC,
17515 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17516 CODE_LABELs. */
17517 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
17518 {
17519 struct toc_hash_struct *h;
17520
17521 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17522 time because GGC is not initialized at that point. */
17523 if (toc_hash_table == NULL)
17524 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
17525
17526 h = ggc_alloc<toc_hash_struct> ();
17527 h->key = x;
17528 h->key_mode = mode;
17529 h->labelno = labelno;
17530
17531 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
17532 if (*found == NULL)
17533 *found = h;
17534 else /* This is indeed a duplicate.
17535 Set this label equal to that label. */
17536 {
17537 fputs ("\t.set ", file);
17538 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17539 fprintf (file, "%d,", labelno);
17540 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17541 fprintf (file, "%d\n", ((*found)->labelno));
17542
17543 #ifdef HAVE_AS_TLS
17544 if (TARGET_XCOFF && SYMBOL_REF_P (x)
17545 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
17546 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
17547 {
17548 fputs ("\t.set ", file);
17549 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17550 fprintf (file, "%d,", labelno);
17551 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17552 fprintf (file, "%d\n", ((*found)->labelno));
17553 }
17554 #endif
17555 return;
17556 }
17557 }
17558
17559 /* If we're going to put a double constant in the TOC, make sure it's
17560 aligned properly when strict alignment is on. */
17561 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
17562 && STRICT_ALIGNMENT
17563 && GET_MODE_BITSIZE (mode) >= 64
17564 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
17565 ASM_OUTPUT_ALIGN (file, 3);
17566 }
17567
17568 (*targetm.asm_out.internal_label) (file, "LC", labelno);
17569
17570 /* Handle FP constants specially. Note that if we have a minimal
17571 TOC, things we put here aren't actually in the TOC, so we can allow
17572 FP constants. */
17573 if (CONST_DOUBLE_P (x)
17574 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
17575 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
17576 {
17577 long k[4];
17578
17579 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17580 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
17581 else
17582 real_to_target (k, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
17583
17584 if (TARGET_64BIT)
17585 {
17586 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17587 fputs (DOUBLE_INT_ASM_OP, file);
17588 else
17589 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17590 k[0] & 0xffffffff, k[1] & 0xffffffff,
17591 k[2] & 0xffffffff, k[3] & 0xffffffff);
17592 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
17593 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17594 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
17595 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
17596 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
17597 return;
17598 }
17599 else
17600 {
17601 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17602 fputs ("\t.long ", file);
17603 else
17604 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17605 k[0] & 0xffffffff, k[1] & 0xffffffff,
17606 k[2] & 0xffffffff, k[3] & 0xffffffff);
17607 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17608 k[0] & 0xffffffff, k[1] & 0xffffffff,
17609 k[2] & 0xffffffff, k[3] & 0xffffffff);
17610 return;
17611 }
17612 }
17613 else if (CONST_DOUBLE_P (x)
17614 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17615 {
17616 long k[2];
17617
17618 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17619 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17620 else
17621 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17622
17623 if (TARGET_64BIT)
17624 {
17625 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17626 fputs (DOUBLE_INT_ASM_OP, file);
17627 else
17628 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17629 k[0] & 0xffffffff, k[1] & 0xffffffff);
17630 fprintf (file, "0x%lx%08lx\n",
17631 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17632 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17633 return;
17634 }
17635 else
17636 {
17637 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17638 fputs ("\t.long ", file);
17639 else
17640 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17641 k[0] & 0xffffffff, k[1] & 0xffffffff);
17642 fprintf (file, "0x%lx,0x%lx\n",
17643 k[0] & 0xffffffff, k[1] & 0xffffffff);
17644 return;
17645 }
17646 }
17647 else if (CONST_DOUBLE_P (x)
17648 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17649 {
17650 long l;
17651
17652 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17653 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17654 else
17655 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17656
17657 if (TARGET_64BIT)
17658 {
17659 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17660 fputs (DOUBLE_INT_ASM_OP, file);
17661 else
17662 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17663 if (WORDS_BIG_ENDIAN)
17664 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17665 else
17666 fprintf (file, "0x%lx\n", l & 0xffffffff);
17667 return;
17668 }
17669 else
17670 {
17671 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17672 fputs ("\t.long ", file);
17673 else
17674 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17675 fprintf (file, "0x%lx\n", l & 0xffffffff);
17676 return;
17677 }
17678 }
17679 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17680 {
17681 unsigned HOST_WIDE_INT low;
17682 HOST_WIDE_INT high;
17683
17684 low = INTVAL (x) & 0xffffffff;
17685 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17686
17687 /* TOC entries are always Pmode-sized, so when big-endian
17688 smaller integer constants in the TOC need to be padded.
17689 (This is still a win over putting the constants in
17690 a separate constant pool, because then we'd have
17691 to have both a TOC entry _and_ the actual constant.)
17692
17693 For a 32-bit target, CONST_INT values are loaded and shifted
17694 entirely within `low' and can be stored in one TOC entry. */
17695
17696 /* It would be easy to make this work, but it doesn't now. */
17697 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17698
17699 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17700 {
17701 low |= high << 32;
17702 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17703 high = (HOST_WIDE_INT) low >> 32;
17704 low &= 0xffffffff;
17705 }
17706
17707 if (TARGET_64BIT)
17708 {
17709 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17710 fputs (DOUBLE_INT_ASM_OP, file);
17711 else
17712 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17713 (long) high & 0xffffffff, (long) low & 0xffffffff);
17714 fprintf (file, "0x%lx%08lx\n",
17715 (long) high & 0xffffffff, (long) low & 0xffffffff);
17716 return;
17717 }
17718 else
17719 {
17720 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17721 {
17722 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17723 fputs ("\t.long ", file);
17724 else
17725 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17726 (long) high & 0xffffffff, (long) low & 0xffffffff);
17727 fprintf (file, "0x%lx,0x%lx\n",
17728 (long) high & 0xffffffff, (long) low & 0xffffffff);
17729 }
17730 else
17731 {
17732 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17733 fputs ("\t.long ", file);
17734 else
17735 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17736 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17737 }
17738 return;
17739 }
17740 }
17741
17742 if (GET_CODE (x) == CONST)
17743 {
17744 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17745 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17746
17747 base = XEXP (XEXP (x, 0), 0);
17748 offset = INTVAL (XEXP (XEXP (x, 0), 1));
17749 }
17750
17751 switch (GET_CODE (base))
17752 {
17753 case SYMBOL_REF:
17754 name = XSTR (base, 0);
17755 break;
17756
17757 case LABEL_REF:
17758 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17759 CODE_LABEL_NUMBER (XEXP (base, 0)));
17760 break;
17761
17762 case CODE_LABEL:
17763 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17764 break;
17765
17766 default:
17767 gcc_unreachable ();
17768 }
17769
17770 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17771 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17772 else
17773 {
17774 fputs ("\t.tc ", file);
17775 RS6000_OUTPUT_BASENAME (file, name);
17776
17777 if (offset < 0)
17778 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17779 else if (offset)
17780 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17781
17782 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17783 after other TOC symbols, reducing overflow of small TOC access
17784 to [TC] symbols. */
17785 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17786 ? "[TE]," : "[TC],", file);
17787 }
17788
17789 /* Currently C++ toc references to vtables can be emitted before it
17790 is decided whether the vtable is public or private. If this is
17791 the case, then the linker will eventually complain that there is
17792 a TOC reference to an unknown section. Thus, for vtables only,
17793 we emit the TOC reference to reference the symbol and not the
17794 section. */
17795 if (VTABLE_NAME_P (name))
17796 {
17797 RS6000_OUTPUT_BASENAME (file, name);
17798 if (offset < 0)
17799 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17800 else if (offset > 0)
17801 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17802 }
17803 else
17804 output_addr_const (file, x);
17805
17806 #if HAVE_AS_TLS
17807 if (TARGET_XCOFF && SYMBOL_REF_P (base))
17808 {
17809 switch (SYMBOL_REF_TLS_MODEL (base))
17810 {
17811 case 0:
17812 break;
17813 case TLS_MODEL_LOCAL_EXEC:
17814 fputs ("@le", file);
17815 break;
17816 case TLS_MODEL_INITIAL_EXEC:
17817 fputs ("@ie", file);
17818 break;
17819 /* Use global-dynamic for local-dynamic. */
17820 case TLS_MODEL_GLOBAL_DYNAMIC:
17821 case TLS_MODEL_LOCAL_DYNAMIC:
17822 putc ('\n', file);
17823 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17824 fputs ("\t.tc .", file);
17825 RS6000_OUTPUT_BASENAME (file, name);
17826 fputs ("[TC],", file);
17827 output_addr_const (file, x);
17828 fputs ("@m", file);
17829 break;
17830 default:
17831 gcc_unreachable ();
17832 }
17833 }
17834 #endif
17835
17836 putc ('\n', file);
17837 }
17838 \f
17839 /* Output an assembler pseudo-op to write an ASCII string of N characters
17840 starting at P to FILE.
17841
17842 On the RS/6000, we have to do this using the .byte operation and
17843 write out special characters outside the quoted string.
17844 Also, the assembler is broken; very long strings are truncated,
17845 so we must artificially break them up early. */
17846
17847 void
17848 output_ascii (FILE *file, const char *p, int n)
17849 {
17850 char c;
17851 int i, count_string;
17852 const char *for_string = "\t.byte \"";
17853 const char *for_decimal = "\t.byte ";
17854 const char *to_close = NULL;
17855
17856 count_string = 0;
17857 for (i = 0; i < n; i++)
17858 {
17859 c = *p++;
17860 if (c >= ' ' && c < 0177)
17861 {
17862 if (for_string)
17863 fputs (for_string, file);
17864 putc (c, file);
17865
17866 /* Write two quotes to get one. */
17867 if (c == '"')
17868 {
17869 putc (c, file);
17870 ++count_string;
17871 }
17872
17873 for_string = NULL;
17874 for_decimal = "\"\n\t.byte ";
17875 to_close = "\"\n";
17876 ++count_string;
17877
17878 if (count_string >= 512)
17879 {
17880 fputs (to_close, file);
17881
17882 for_string = "\t.byte \"";
17883 for_decimal = "\t.byte ";
17884 to_close = NULL;
17885 count_string = 0;
17886 }
17887 }
17888 else
17889 {
17890 if (for_decimal)
17891 fputs (for_decimal, file);
17892 fprintf (file, "%d", c);
17893
17894 for_string = "\n\t.byte \"";
17895 for_decimal = ", ";
17896 to_close = "\n";
17897 count_string = 0;
17898 }
17899 }
17900
17901 /* Now close the string if we have written one. Then end the line. */
17902 if (to_close)
17903 fputs (to_close, file);
17904 }
17905 \f
17906 /* Generate a unique section name for FILENAME for a section type
17907 represented by SECTION_DESC. Output goes into BUF.
17908
17909 SECTION_DESC can be any string, as long as it is different for each
17910 possible section type.
17911
17912 We name the section in the same manner as xlc. The name begins with an
17913 underscore followed by the filename (after stripping any leading directory
17914 names) with the last period replaced by the string SECTION_DESC. If
17915 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17916 the name. */
17917
17918 void
17919 rs6000_gen_section_name (char **buf, const char *filename,
17920 const char *section_desc)
17921 {
17922 const char *q, *after_last_slash, *last_period = 0;
17923 char *p;
17924 int len;
17925
17926 after_last_slash = filename;
17927 for (q = filename; *q; q++)
17928 {
17929 if (*q == '/')
17930 after_last_slash = q + 1;
17931 else if (*q == '.')
17932 last_period = q;
17933 }
17934
17935 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17936 *buf = (char *) xmalloc (len);
17937
17938 p = *buf;
17939 *p++ = '_';
17940
17941 for (q = after_last_slash; *q; q++)
17942 {
17943 if (q == last_period)
17944 {
17945 strcpy (p, section_desc);
17946 p += strlen (section_desc);
17947 break;
17948 }
17949
17950 else if (ISALNUM (*q))
17951 *p++ = *q;
17952 }
17953
17954 if (last_period == 0)
17955 strcpy (p, section_desc);
17956 else
17957 *p = '\0';
17958 }
17959 \f
17960 /* Emit profile function. */
17961
17962 void
17963 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17964 {
17965 /* Non-standard profiling for kernels, which just saves LR then calls
17966 _mcount without worrying about arg saves. The idea is to change
17967 the function prologue as little as possible as it isn't easy to
17968 account for arg save/restore code added just for _mcount. */
17969 if (TARGET_PROFILE_KERNEL)
17970 return;
17971
17972 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17973 {
17974 #ifndef NO_PROFILE_COUNTERS
17975 # define NO_PROFILE_COUNTERS 0
17976 #endif
17977 if (NO_PROFILE_COUNTERS)
17978 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17979 LCT_NORMAL, VOIDmode);
17980 else
17981 {
17982 char buf[30];
17983 const char *label_name;
17984 rtx fun;
17985
17986 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17987 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
17988 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
17989
17990 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17991 LCT_NORMAL, VOIDmode, fun, Pmode);
17992 }
17993 }
17994 else if (DEFAULT_ABI == ABI_DARWIN)
17995 {
17996 const char *mcount_name = RS6000_MCOUNT;
17997 int caller_addr_regno = LR_REGNO;
17998
17999 /* Be conservative and always set this, at least for now. */
18000 crtl->uses_pic_offset_table = 1;
18001
18002 #if TARGET_MACHO
18003 /* For PIC code, set up a stub and collect the caller's address
18004 from r0, which is where the prologue puts it. */
18005 if (MACHOPIC_INDIRECT
18006 && crtl->uses_pic_offset_table)
18007 caller_addr_regno = 0;
18008 #endif
18009 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
18010 LCT_NORMAL, VOIDmode,
18011 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
18012 }
18013 }
18014
18015 /* Write function profiler code. */
18016
18017 void
18018 output_function_profiler (FILE *file, int labelno)
18019 {
18020 char buf[100];
18021
18022 switch (DEFAULT_ABI)
18023 {
18024 default:
18025 gcc_unreachable ();
18026
18027 case ABI_V4:
18028 if (!TARGET_32BIT)
18029 {
18030 warning (0, "no profiling of 64-bit code for this ABI");
18031 return;
18032 }
18033 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
18034 fprintf (file, "\tmflr %s\n", reg_names[0]);
18035 if (NO_PROFILE_COUNTERS)
18036 {
18037 asm_fprintf (file, "\tstw %s,4(%s)\n",
18038 reg_names[0], reg_names[1]);
18039 }
18040 else if (TARGET_SECURE_PLT && flag_pic)
18041 {
18042 if (TARGET_LINK_STACK)
18043 {
18044 char name[32];
18045 get_ppc476_thunk_name (name);
18046 asm_fprintf (file, "\tbl %s\n", name);
18047 }
18048 else
18049 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
18050 asm_fprintf (file, "\tstw %s,4(%s)\n",
18051 reg_names[0], reg_names[1]);
18052 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
18053 asm_fprintf (file, "\taddis %s,%s,",
18054 reg_names[12], reg_names[12]);
18055 assemble_name (file, buf);
18056 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
18057 assemble_name (file, buf);
18058 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
18059 }
18060 else if (flag_pic == 1)
18061 {
18062 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
18063 asm_fprintf (file, "\tstw %s,4(%s)\n",
18064 reg_names[0], reg_names[1]);
18065 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
18066 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
18067 assemble_name (file, buf);
18068 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
18069 }
18070 else if (flag_pic > 1)
18071 {
18072 asm_fprintf (file, "\tstw %s,4(%s)\n",
18073 reg_names[0], reg_names[1]);
18074 /* Now, we need to get the address of the label. */
18075 if (TARGET_LINK_STACK)
18076 {
18077 char name[32];
18078 get_ppc476_thunk_name (name);
18079 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
18080 assemble_name (file, buf);
18081 fputs ("-.\n1:", file);
18082 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
18083 asm_fprintf (file, "\taddi %s,%s,4\n",
18084 reg_names[11], reg_names[11]);
18085 }
18086 else
18087 {
18088 fputs ("\tbcl 20,31,1f\n\t.long ", file);
18089 assemble_name (file, buf);
18090 fputs ("-.\n1:", file);
18091 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
18092 }
18093 asm_fprintf (file, "\tlwz %s,0(%s)\n",
18094 reg_names[0], reg_names[11]);
18095 asm_fprintf (file, "\tadd %s,%s,%s\n",
18096 reg_names[0], reg_names[0], reg_names[11]);
18097 }
18098 else
18099 {
18100 asm_fprintf (file, "\tlis %s,", reg_names[12]);
18101 assemble_name (file, buf);
18102 fputs ("@ha\n", file);
18103 asm_fprintf (file, "\tstw %s,4(%s)\n",
18104 reg_names[0], reg_names[1]);
18105 asm_fprintf (file, "\tla %s,", reg_names[0]);
18106 assemble_name (file, buf);
18107 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
18108 }
18109
18110 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
18111 fprintf (file, "\tbl %s%s\n",
18112 RS6000_MCOUNT, flag_pic ? "@plt" : "");
18113 break;
18114
18115 case ABI_AIX:
18116 case ABI_ELFv2:
18117 case ABI_DARWIN:
18118 /* Don't do anything, done in output_profile_hook (). */
18119 break;
18120 }
18121 }
18122
18123 \f
18124
18125 /* The following variable value is the last issued insn. */
18126
18127 static rtx_insn *last_scheduled_insn;
18128
18129 /* The following variable helps to balance issuing of load and
18130 store instructions */
18131
18132 static int load_store_pendulum;
18133
18134 /* The following variable helps pair divide insns during scheduling. */
18135 static int divide_cnt;
18136 /* The following variable helps pair and alternate vector and vector load
18137 insns during scheduling. */
18138 static int vec_pairing;
18139
18140
18141 /* Power4 load update and store update instructions are cracked into a
18142 load or store and an integer insn which are executed in the same cycle.
18143 Branches have their own dispatch slot which does not count against the
18144 GCC issue rate, but it changes the program flow so there are no other
18145 instructions to issue in this cycle. */
18146
18147 static int
18148 rs6000_variable_issue_1 (rtx_insn *insn, int more)
18149 {
18150 last_scheduled_insn = insn;
18151 if (GET_CODE (PATTERN (insn)) == USE
18152 || GET_CODE (PATTERN (insn)) == CLOBBER)
18153 {
18154 cached_can_issue_more = more;
18155 return cached_can_issue_more;
18156 }
18157
18158 if (insn_terminates_group_p (insn, current_group))
18159 {
18160 cached_can_issue_more = 0;
18161 return cached_can_issue_more;
18162 }
18163
18164 /* If no reservation, but reach here */
18165 if (recog_memoized (insn) < 0)
18166 return more;
18167
18168 if (rs6000_sched_groups)
18169 {
18170 if (is_microcoded_insn (insn))
18171 cached_can_issue_more = 0;
18172 else if (is_cracked_insn (insn))
18173 cached_can_issue_more = more > 2 ? more - 2 : 0;
18174 else
18175 cached_can_issue_more = more - 1;
18176
18177 return cached_can_issue_more;
18178 }
18179
18180 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
18181 return 0;
18182
18183 cached_can_issue_more = more - 1;
18184 return cached_can_issue_more;
18185 }
18186
18187 static int
18188 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
18189 {
18190 int r = rs6000_variable_issue_1 (insn, more);
18191 if (verbose)
18192 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
18193 return r;
18194 }
18195
18196 /* Adjust the cost of a scheduling dependency. Return the new cost of
18197 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
18198
18199 static int
18200 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
18201 unsigned int)
18202 {
18203 enum attr_type attr_type;
18204
18205 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
18206 return cost;
18207
18208 switch (dep_type)
18209 {
18210 case REG_DEP_TRUE:
18211 {
18212 /* Data dependency; DEP_INSN writes a register that INSN reads
18213 some cycles later. */
18214
18215 /* Separate a load from a narrower, dependent store. */
18216 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
18217 || rs6000_tune == PROCESSOR_POWER10
18218 || rs6000_tune == PROCESSOR_POWER11)
18219 && GET_CODE (PATTERN (insn)) == SET
18220 && GET_CODE (PATTERN (dep_insn)) == SET
18221 && MEM_P (XEXP (PATTERN (insn), 1))
18222 && MEM_P (XEXP (PATTERN (dep_insn), 0))
18223 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
18224 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
18225 return cost + 14;
18226
18227 attr_type = get_attr_type (insn);
18228
18229 switch (attr_type)
18230 {
18231 case TYPE_JMPREG:
18232 /* Tell the first scheduling pass about the latency between
18233 a mtctr and bctr (and mtlr and br/blr). The first
18234 scheduling pass will not know about this latency since
18235 the mtctr instruction, which has the latency associated
18236 to it, will be generated by reload. */
18237 return 4;
18238 case TYPE_BRANCH:
18239 /* Leave some extra cycles between a compare and its
18240 dependent branch, to inhibit expensive mispredicts. */
18241 if ((rs6000_tune == PROCESSOR_PPC603
18242 || rs6000_tune == PROCESSOR_PPC604
18243 || rs6000_tune == PROCESSOR_PPC604e
18244 || rs6000_tune == PROCESSOR_PPC620
18245 || rs6000_tune == PROCESSOR_PPC630
18246 || rs6000_tune == PROCESSOR_PPC750
18247 || rs6000_tune == PROCESSOR_PPC7400
18248 || rs6000_tune == PROCESSOR_PPC7450
18249 || rs6000_tune == PROCESSOR_PPCE5500
18250 || rs6000_tune == PROCESSOR_PPCE6500
18251 || rs6000_tune == PROCESSOR_POWER4
18252 || rs6000_tune == PROCESSOR_POWER5
18253 || rs6000_tune == PROCESSOR_POWER7
18254 || rs6000_tune == PROCESSOR_POWER8
18255 || rs6000_tune == PROCESSOR_POWER9
18256 || rs6000_tune == PROCESSOR_POWER10
18257 || rs6000_tune == PROCESSOR_POWER11
18258 || rs6000_tune == PROCESSOR_CELL)
18259 && recog_memoized (dep_insn)
18260 && (INSN_CODE (dep_insn) >= 0))
18261
18262 switch (get_attr_type (dep_insn))
18263 {
18264 case TYPE_CMP:
18265 case TYPE_FPCOMPARE:
18266 case TYPE_CR_LOGICAL:
18267 return cost + 2;
18268 case TYPE_EXTS:
18269 case TYPE_MUL:
18270 if (get_attr_dot (dep_insn) == DOT_YES)
18271 return cost + 2;
18272 else
18273 break;
18274 case TYPE_SHIFT:
18275 if (get_attr_dot (dep_insn) == DOT_YES
18276 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
18277 return cost + 2;
18278 else
18279 break;
18280 default:
18281 break;
18282 }
18283 break;
18284
18285 case TYPE_STORE:
18286 case TYPE_FPSTORE:
18287 if ((rs6000_tune == PROCESSOR_POWER6)
18288 && recog_memoized (dep_insn)
18289 && (INSN_CODE (dep_insn) >= 0))
18290 {
18291
18292 if (GET_CODE (PATTERN (insn)) != SET)
18293 /* If this happens, we have to extend this to schedule
18294 optimally. Return default for now. */
18295 return cost;
18296
18297 /* Adjust the cost for the case where the value written
18298 by a fixed point operation is used as the address
18299 gen value on a store. */
18300 switch (get_attr_type (dep_insn))
18301 {
18302 case TYPE_LOAD:
18303 case TYPE_CNTLZ:
18304 {
18305 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18306 return get_attr_sign_extend (dep_insn)
18307 == SIGN_EXTEND_YES ? 6 : 4;
18308 break;
18309 }
18310 case TYPE_SHIFT:
18311 {
18312 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18313 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
18314 6 : 3;
18315 break;
18316 }
18317 case TYPE_INTEGER:
18318 case TYPE_ADD:
18319 case TYPE_LOGICAL:
18320 case TYPE_EXTS:
18321 case TYPE_INSERT:
18322 {
18323 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18324 return 3;
18325 break;
18326 }
18327 case TYPE_STORE:
18328 case TYPE_FPLOAD:
18329 case TYPE_FPSTORE:
18330 {
18331 if (get_attr_update (dep_insn) == UPDATE_YES
18332 && ! rs6000_store_data_bypass_p (dep_insn, insn))
18333 return 3;
18334 break;
18335 }
18336 case TYPE_MUL:
18337 {
18338 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18339 return 17;
18340 break;
18341 }
18342 case TYPE_DIV:
18343 {
18344 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18345 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
18346 break;
18347 }
18348 default:
18349 break;
18350 }
18351 }
18352 break;
18353
18354 case TYPE_LOAD:
18355 if ((rs6000_tune == PROCESSOR_POWER6)
18356 && recog_memoized (dep_insn)
18357 && (INSN_CODE (dep_insn) >= 0))
18358 {
18359
18360 /* Adjust the cost for the case where the value written
18361 by a fixed point instruction is used within the address
18362 gen portion of a subsequent load(u)(x) */
18363 switch (get_attr_type (dep_insn))
18364 {
18365 case TYPE_LOAD:
18366 case TYPE_CNTLZ:
18367 {
18368 if (set_to_load_agen (dep_insn, insn))
18369 return get_attr_sign_extend (dep_insn)
18370 == SIGN_EXTEND_YES ? 6 : 4;
18371 break;
18372 }
18373 case TYPE_SHIFT:
18374 {
18375 if (set_to_load_agen (dep_insn, insn))
18376 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
18377 6 : 3;
18378 break;
18379 }
18380 case TYPE_INTEGER:
18381 case TYPE_ADD:
18382 case TYPE_LOGICAL:
18383 case TYPE_EXTS:
18384 case TYPE_INSERT:
18385 {
18386 if (set_to_load_agen (dep_insn, insn))
18387 return 3;
18388 break;
18389 }
18390 case TYPE_STORE:
18391 case TYPE_FPLOAD:
18392 case TYPE_FPSTORE:
18393 {
18394 if (get_attr_update (dep_insn) == UPDATE_YES
18395 && set_to_load_agen (dep_insn, insn))
18396 return 3;
18397 break;
18398 }
18399 case TYPE_MUL:
18400 {
18401 if (set_to_load_agen (dep_insn, insn))
18402 return 17;
18403 break;
18404 }
18405 case TYPE_DIV:
18406 {
18407 if (set_to_load_agen (dep_insn, insn))
18408 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
18409 break;
18410 }
18411 default:
18412 break;
18413 }
18414 }
18415 break;
18416
18417 default:
18418 break;
18419 }
18420
18421 /* Fall out to return default cost. */
18422 }
18423 break;
18424
18425 case REG_DEP_OUTPUT:
18426 /* Output dependency; DEP_INSN writes a register that INSN writes some
18427 cycles later. */
18428 if ((rs6000_tune == PROCESSOR_POWER6)
18429 && recog_memoized (dep_insn)
18430 && (INSN_CODE (dep_insn) >= 0))
18431 {
18432 attr_type = get_attr_type (insn);
18433
18434 switch (attr_type)
18435 {
18436 case TYPE_FP:
18437 case TYPE_FPSIMPLE:
18438 if (get_attr_type (dep_insn) == TYPE_FP
18439 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
18440 return 1;
18441 break;
18442 default:
18443 break;
18444 }
18445 }
18446 /* Fall through, no cost for output dependency. */
18447 /* FALLTHRU */
18448
18449 case REG_DEP_ANTI:
18450 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18451 cycles later. */
18452 return 0;
18453
18454 default:
18455 gcc_unreachable ();
18456 }
18457
18458 return cost;
18459 }
18460
18461 /* Debug version of rs6000_adjust_cost. */
18462
18463 static int
18464 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
18465 int cost, unsigned int dw)
18466 {
18467 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
18468
18469 if (ret != cost)
18470 {
18471 const char *dep;
18472
18473 switch (dep_type)
18474 {
18475 default: dep = "unknown depencency"; break;
18476 case REG_DEP_TRUE: dep = "data dependency"; break;
18477 case REG_DEP_OUTPUT: dep = "output dependency"; break;
18478 case REG_DEP_ANTI: dep = "anti depencency"; break;
18479 }
18480
18481 fprintf (stderr,
18482 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18483 "%s, insn:\n", ret, cost, dep);
18484
18485 debug_rtx (insn);
18486 }
18487
18488 return ret;
18489 }
18490
18491 /* The function returns a true if INSN is microcoded.
18492 Return false otherwise. */
18493
18494 static bool
18495 is_microcoded_insn (rtx_insn *insn)
18496 {
18497 if (!insn || !NONDEBUG_INSN_P (insn)
18498 || GET_CODE (PATTERN (insn)) == USE
18499 || GET_CODE (PATTERN (insn)) == CLOBBER)
18500 return false;
18501
18502 if (rs6000_tune == PROCESSOR_CELL)
18503 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
18504
18505 if (rs6000_sched_groups
18506 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18507 {
18508 enum attr_type type = get_attr_type (insn);
18509 if ((type == TYPE_LOAD
18510 && get_attr_update (insn) == UPDATE_YES
18511 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
18512 || ((type == TYPE_LOAD || type == TYPE_STORE)
18513 && get_attr_update (insn) == UPDATE_YES
18514 && get_attr_indexed (insn) == INDEXED_YES)
18515 || type == TYPE_MFCR)
18516 return true;
18517 }
18518
18519 return false;
18520 }
18521
18522 /* The function returns true if INSN is cracked into 2 instructions
18523 by the processor (and therefore occupies 2 issue slots). */
18524
18525 static bool
18526 is_cracked_insn (rtx_insn *insn)
18527 {
18528 if (!insn || !NONDEBUG_INSN_P (insn)
18529 || GET_CODE (PATTERN (insn)) == USE
18530 || GET_CODE (PATTERN (insn)) == CLOBBER)
18531 return false;
18532
18533 if (rs6000_sched_groups
18534 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18535 {
18536 enum attr_type type = get_attr_type (insn);
18537 if ((type == TYPE_LOAD
18538 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18539 && get_attr_update (insn) == UPDATE_NO)
18540 || (type == TYPE_LOAD
18541 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
18542 && get_attr_update (insn) == UPDATE_YES
18543 && get_attr_indexed (insn) == INDEXED_NO)
18544 || (type == TYPE_STORE
18545 && get_attr_update (insn) == UPDATE_YES
18546 && get_attr_indexed (insn) == INDEXED_NO)
18547 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
18548 && get_attr_update (insn) == UPDATE_YES)
18549 || (type == TYPE_CR_LOGICAL
18550 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
18551 || (type == TYPE_EXTS
18552 && get_attr_dot (insn) == DOT_YES)
18553 || (type == TYPE_SHIFT
18554 && get_attr_dot (insn) == DOT_YES
18555 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
18556 || (type == TYPE_MUL
18557 && get_attr_dot (insn) == DOT_YES)
18558 || type == TYPE_DIV
18559 || (type == TYPE_INSERT
18560 && get_attr_size (insn) == SIZE_32))
18561 return true;
18562 }
18563
18564 return false;
18565 }
18566
18567 /* The function returns true if INSN can be issued only from
18568 the branch slot. */
18569
18570 static bool
18571 is_branch_slot_insn (rtx_insn *insn)
18572 {
18573 if (!insn || !NONDEBUG_INSN_P (insn)
18574 || GET_CODE (PATTERN (insn)) == USE
18575 || GET_CODE (PATTERN (insn)) == CLOBBER)
18576 return false;
18577
18578 if (rs6000_sched_groups)
18579 {
18580 enum attr_type type = get_attr_type (insn);
18581 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
18582 return true;
18583 return false;
18584 }
18585
18586 return false;
18587 }
18588
18589 /* The function returns true if out_inst sets a value that is
18590 used in the address generation computation of in_insn */
18591 static bool
18592 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
18593 {
18594 rtx out_set, in_set;
18595
18596 /* For performance reasons, only handle the simple case where
18597 both loads are a single_set. */
18598 out_set = single_set (out_insn);
18599 if (out_set)
18600 {
18601 in_set = single_set (in_insn);
18602 if (in_set)
18603 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
18604 }
18605
18606 return false;
18607 }
18608
18609 /* Try to determine base/offset/size parts of the given MEM.
18610 Return true if successful, false if all the values couldn't
18611 be determined.
18612
18613 This function only looks for REG or REG+CONST address forms.
18614 REG+REG address form will return false. */
18615
18616 static bool
18617 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18618 HOST_WIDE_INT *size)
18619 {
18620 rtx addr_rtx;
18621 if (MEM_SIZE_KNOWN_P (mem))
18622 *size = MEM_SIZE (mem);
18623 else
18624 return false;
18625
18626 addr_rtx = (XEXP (mem, 0));
18627 if (GET_CODE (addr_rtx) == PRE_MODIFY)
18628 addr_rtx = XEXP (addr_rtx, 1);
18629
18630 *offset = 0;
18631 while (GET_CODE (addr_rtx) == PLUS
18632 && CONST_INT_P (XEXP (addr_rtx, 1)))
18633 {
18634 *offset += INTVAL (XEXP (addr_rtx, 1));
18635 addr_rtx = XEXP (addr_rtx, 0);
18636 }
18637 if (!REG_P (addr_rtx))
18638 return false;
18639
18640 *base = addr_rtx;
18641 return true;
18642 }
18643
18644 /* If the target storage locations of arguments MEM1 and MEM2 are
18645 adjacent, then return the argument that has the lower address.
18646 Otherwise, return NULL_RTX. */
18647
18648 static rtx
18649 adjacent_mem_locations (rtx mem1, rtx mem2)
18650 {
18651 rtx reg1, reg2;
18652 HOST_WIDE_INT off1, size1, off2, size2;
18653
18654 if (MEM_P (mem1)
18655 && MEM_P (mem2)
18656 && get_memref_parts (mem1, &reg1, &off1, &size1)
18657 && get_memref_parts (mem2, &reg2, &off2, &size2)
18658 && REGNO (reg1) == REGNO (reg2))
18659 {
18660 if (off1 + size1 == off2)
18661 return mem1;
18662 else if (off2 + size2 == off1)
18663 return mem2;
18664 }
18665
18666 return NULL_RTX;
18667 }
18668
18669 /* This function returns true if it can be determined that the two MEM
18670 locations overlap by at least 1 byte based on base reg/offset/size. */
18671
18672 static bool
18673 mem_locations_overlap (rtx mem1, rtx mem2)
18674 {
18675 rtx reg1, reg2;
18676 HOST_WIDE_INT off1, size1, off2, size2;
18677
18678 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18679 && get_memref_parts (mem2, &reg2, &off2, &size2))
18680 return ((REGNO (reg1) == REGNO (reg2))
18681 && (((off1 <= off2) && (off1 + size1 > off2))
18682 || ((off2 <= off1) && (off2 + size2 > off1))));
18683
18684 return false;
18685 }
18686
18687 /* A C statement (sans semicolon) to update the integer scheduling
18688 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18689 INSN earlier, reduce the priority to execute INSN later. Do not
18690 define this macro if you do not need to adjust the scheduling
18691 priorities of insns. */
18692
18693 static int
18694 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18695 {
18696 rtx load_mem, str_mem;
18697 /* On machines (like the 750) which have asymmetric integer units,
18698 where one integer unit can do multiply and divides and the other
18699 can't, reduce the priority of multiply/divide so it is scheduled
18700 before other integer operations. */
18701
18702 #if 0
18703 if (! INSN_P (insn))
18704 return priority;
18705
18706 if (GET_CODE (PATTERN (insn)) == USE)
18707 return priority;
18708
18709 switch (rs6000_tune) {
18710 case PROCESSOR_PPC750:
18711 switch (get_attr_type (insn))
18712 {
18713 default:
18714 break;
18715
18716 case TYPE_MUL:
18717 case TYPE_DIV:
18718 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18719 priority, priority);
18720 if (priority >= 0 && priority < 0x01000000)
18721 priority >>= 3;
18722 break;
18723 }
18724 }
18725 #endif
18726
18727 if (insn_must_be_first_in_group (insn)
18728 && reload_completed
18729 && current_sched_info->sched_max_insns_priority
18730 && rs6000_sched_restricted_insns_priority)
18731 {
18732
18733 /* Prioritize insns that can be dispatched only in the first
18734 dispatch slot. */
18735 if (rs6000_sched_restricted_insns_priority == 1)
18736 /* Attach highest priority to insn. This means that in
18737 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18738 precede 'priority' (critical path) considerations. */
18739 return current_sched_info->sched_max_insns_priority;
18740 else if (rs6000_sched_restricted_insns_priority == 2)
18741 /* Increase priority of insn by a minimal amount. This means that in
18742 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18743 considerations precede dispatch-slot restriction considerations. */
18744 return (priority + 1);
18745 }
18746
18747 if (rs6000_tune == PROCESSOR_POWER6
18748 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18749 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18750 /* Attach highest priority to insn if the scheduler has just issued two
18751 stores and this instruction is a load, or two loads and this instruction
18752 is a store. Power6 wants loads and stores scheduled alternately
18753 when possible */
18754 return current_sched_info->sched_max_insns_priority;
18755
18756 return priority;
18757 }
18758
18759 /* Return true if the instruction is nonpipelined on the Cell. */
18760 static bool
18761 is_nonpipeline_insn (rtx_insn *insn)
18762 {
18763 enum attr_type type;
18764 if (!insn || !NONDEBUG_INSN_P (insn)
18765 || GET_CODE (PATTERN (insn)) == USE
18766 || GET_CODE (PATTERN (insn)) == CLOBBER)
18767 return false;
18768
18769 type = get_attr_type (insn);
18770 if (type == TYPE_MUL
18771 || type == TYPE_DIV
18772 || type == TYPE_SDIV
18773 || type == TYPE_DDIV
18774 || type == TYPE_SSQRT
18775 || type == TYPE_DSQRT
18776 || type == TYPE_MFCR
18777 || type == TYPE_MFCRF
18778 || type == TYPE_MFJMPR)
18779 {
18780 return true;
18781 }
18782 return false;
18783 }
18784
18785
18786 /* Return how many instructions the machine can issue per cycle. */
18787
18788 static int
18789 rs6000_issue_rate (void)
18790 {
18791 /* Unless scheduling for register pressure, use issue rate of 1 for
18792 first scheduling pass to decrease degradation. */
18793 if (!reload_completed && !flag_sched_pressure)
18794 return 1;
18795
18796 switch (rs6000_tune) {
18797 case PROCESSOR_RS64A:
18798 case PROCESSOR_PPC601: /* ? */
18799 case PROCESSOR_PPC7450:
18800 return 3;
18801 case PROCESSOR_PPC440:
18802 case PROCESSOR_PPC603:
18803 case PROCESSOR_PPC750:
18804 case PROCESSOR_PPC7400:
18805 case PROCESSOR_PPC8540:
18806 case PROCESSOR_PPC8548:
18807 case PROCESSOR_CELL:
18808 case PROCESSOR_PPCE300C2:
18809 case PROCESSOR_PPCE300C3:
18810 case PROCESSOR_PPCE500MC:
18811 case PROCESSOR_PPCE500MC64:
18812 case PROCESSOR_PPCE5500:
18813 case PROCESSOR_PPCE6500:
18814 case PROCESSOR_TITAN:
18815 return 2;
18816 case PROCESSOR_PPC476:
18817 case PROCESSOR_PPC604:
18818 case PROCESSOR_PPC604e:
18819 case PROCESSOR_PPC620:
18820 case PROCESSOR_PPC630:
18821 return 4;
18822 case PROCESSOR_POWER4:
18823 case PROCESSOR_POWER5:
18824 case PROCESSOR_POWER6:
18825 case PROCESSOR_POWER7:
18826 return 5;
18827 case PROCESSOR_POWER8:
18828 return 7;
18829 case PROCESSOR_POWER9:
18830 return 6;
18831 case PROCESSOR_POWER10:
18832 case PROCESSOR_POWER11:
18833 return 8;
18834 default:
18835 return 1;
18836 }
18837 }
18838
18839 /* Return how many instructions to look ahead for better insn
18840 scheduling. */
18841
18842 static int
18843 rs6000_use_sched_lookahead (void)
18844 {
18845 switch (rs6000_tune)
18846 {
18847 case PROCESSOR_PPC8540:
18848 case PROCESSOR_PPC8548:
18849 return 4;
18850
18851 case PROCESSOR_CELL:
18852 return (reload_completed ? 8 : 0);
18853
18854 default:
18855 return 0;
18856 }
18857 }
18858
18859 /* We are choosing insn from the ready queue. Return zero if INSN can be
18860 chosen. */
18861 static int
18862 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18863 {
18864 if (ready_index == 0)
18865 return 0;
18866
18867 if (rs6000_tune != PROCESSOR_CELL)
18868 return 0;
18869
18870 gcc_assert (insn != NULL_RTX && INSN_P (insn));
18871
18872 if (!reload_completed
18873 || is_nonpipeline_insn (insn)
18874 || is_microcoded_insn (insn))
18875 return 1;
18876
18877 return 0;
18878 }
18879
18880 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18881 and return true. */
18882
18883 static bool
18884 find_mem_ref (rtx pat, rtx *mem_ref)
18885 {
18886 const char * fmt;
18887 int i, j;
18888
18889 /* stack_tie does not produce any real memory traffic. */
18890 if (tie_operand (pat, VOIDmode))
18891 return false;
18892
18893 if (MEM_P (pat))
18894 {
18895 *mem_ref = pat;
18896 return true;
18897 }
18898
18899 /* Recursively process the pattern. */
18900 fmt = GET_RTX_FORMAT (GET_CODE (pat));
18901
18902 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18903 {
18904 if (fmt[i] == 'e')
18905 {
18906 if (find_mem_ref (XEXP (pat, i), mem_ref))
18907 return true;
18908 }
18909 else if (fmt[i] == 'E')
18910 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18911 {
18912 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18913 return true;
18914 }
18915 }
18916
18917 return false;
18918 }
18919
18920 /* Determine if PAT is a PATTERN of a load insn. */
18921
18922 static bool
18923 is_load_insn1 (rtx pat, rtx *load_mem)
18924 {
18925 if (!pat || pat == NULL_RTX)
18926 return false;
18927
18928 if (GET_CODE (pat) == SET)
18929 {
18930 if (REG_P (SET_DEST (pat)))
18931 return find_mem_ref (SET_SRC (pat), load_mem);
18932 else
18933 return false;
18934 }
18935
18936 if (GET_CODE (pat) == PARALLEL)
18937 {
18938 int i;
18939
18940 for (i = 0; i < XVECLEN (pat, 0); i++)
18941 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18942 return true;
18943 }
18944
18945 return false;
18946 }
18947
18948 /* Determine if INSN loads from memory. */
18949
18950 static bool
18951 is_load_insn (rtx insn, rtx *load_mem)
18952 {
18953 if (!insn || !INSN_P (insn))
18954 return false;
18955
18956 if (CALL_P (insn))
18957 return false;
18958
18959 return is_load_insn1 (PATTERN (insn), load_mem);
18960 }
18961
18962 /* Determine if PAT is a PATTERN of a store insn. */
18963
18964 static bool
18965 is_store_insn1 (rtx pat, rtx *str_mem)
18966 {
18967 if (!pat || pat == NULL_RTX)
18968 return false;
18969
18970 if (GET_CODE (pat) == SET)
18971 {
18972 if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat)))
18973 return find_mem_ref (SET_DEST (pat), str_mem);
18974 else
18975 return false;
18976 }
18977
18978 if (GET_CODE (pat) == PARALLEL)
18979 {
18980 int i;
18981
18982 for (i = 0; i < XVECLEN (pat, 0); i++)
18983 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
18984 return true;
18985 }
18986
18987 return false;
18988 }
18989
18990 /* Determine if INSN stores to memory. */
18991
18992 static bool
18993 is_store_insn (rtx insn, rtx *str_mem)
18994 {
18995 if (!insn || !INSN_P (insn))
18996 return false;
18997
18998 return is_store_insn1 (PATTERN (insn), str_mem);
18999 }
19000
19001 /* Return whether TYPE is a Power9 pairable vector instruction type. */
19002
19003 static bool
19004 is_power9_pairable_vec_type (enum attr_type type)
19005 {
19006 switch (type)
19007 {
19008 case TYPE_VECSIMPLE:
19009 case TYPE_VECCOMPLEX:
19010 case TYPE_VECDIV:
19011 case TYPE_VECCMP:
19012 case TYPE_VECPERM:
19013 case TYPE_VECFLOAT:
19014 case TYPE_VECFDIV:
19015 case TYPE_VECDOUBLE:
19016 return true;
19017 default:
19018 break;
19019 }
19020 return false;
19021 }
19022
19023 /* Returns whether the dependence between INSN and NEXT is considered
19024 costly by the given target. */
19025
19026 static bool
19027 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
19028 {
19029 rtx insn;
19030 rtx next;
19031 rtx load_mem, str_mem;
19032
19033 /* If the flag is not enabled - no dependence is considered costly;
19034 allow all dependent insns in the same group.
19035 This is the most aggressive option. */
19036 if (rs6000_sched_costly_dep == no_dep_costly)
19037 return false;
19038
19039 /* If the flag is set to 1 - a dependence is always considered costly;
19040 do not allow dependent instructions in the same group.
19041 This is the most conservative option. */
19042 if (rs6000_sched_costly_dep == all_deps_costly)
19043 return true;
19044
19045 insn = DEP_PRO (dep);
19046 next = DEP_CON (dep);
19047
19048 if (rs6000_sched_costly_dep == store_to_load_dep_costly
19049 && is_load_insn (next, &load_mem)
19050 && is_store_insn (insn, &str_mem))
19051 /* Prevent load after store in the same group. */
19052 return true;
19053
19054 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
19055 && is_load_insn (next, &load_mem)
19056 && is_store_insn (insn, &str_mem)
19057 && DEP_TYPE (dep) == REG_DEP_TRUE
19058 && mem_locations_overlap(str_mem, load_mem))
19059 /* Prevent load after store in the same group if it is a true
19060 dependence. */
19061 return true;
19062
19063 /* The flag is set to X; dependences with latency >= X are considered costly,
19064 and will not be scheduled in the same group. */
19065 if (rs6000_sched_costly_dep <= max_dep_latency
19066 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
19067 return true;
19068
19069 return false;
19070 }
19071
19072 /* Return the next insn after INSN that is found before TAIL is reached,
19073 skipping any "non-active" insns - insns that will not actually occupy
19074 an issue slot. Return NULL_RTX if such an insn is not found. */
19075
19076 static rtx_insn *
19077 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
19078 {
19079 if (insn == NULL_RTX || insn == tail)
19080 return NULL;
19081
19082 while (1)
19083 {
19084 insn = NEXT_INSN (insn);
19085 if (insn == NULL_RTX || insn == tail)
19086 return NULL;
19087
19088 if (CALL_P (insn)
19089 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
19090 || (NONJUMP_INSN_P (insn)
19091 && GET_CODE (PATTERN (insn)) != USE
19092 && GET_CODE (PATTERN (insn)) != CLOBBER
19093 && INSN_CODE (insn) != CODE_FOR_stack_tie))
19094 break;
19095 }
19096 return insn;
19097 }
19098
19099 /* Move instruction at POS to the end of the READY list. */
19100
19101 static void
19102 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
19103 {
19104 rtx_insn *tmp;
19105 int i;
19106
19107 tmp = ready[pos];
19108 for (i = pos; i < lastpos; i++)
19109 ready[i] = ready[i + 1];
19110 ready[lastpos] = tmp;
19111 }
19112
19113 /* Do Power6 specific sched_reorder2 reordering of ready list. */
19114
19115 static int
19116 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
19117 {
19118 /* For Power6, we need to handle some special cases to try and keep the
19119 store queue from overflowing and triggering expensive flushes.
19120
19121 This code monitors how load and store instructions are being issued
19122 and skews the ready list one way or the other to increase the likelihood
19123 that a desired instruction is issued at the proper time.
19124
19125 A couple of things are done. First, we maintain a "load_store_pendulum"
19126 to track the current state of load/store issue.
19127
19128 - If the pendulum is at zero, then no loads or stores have been
19129 issued in the current cycle so we do nothing.
19130
19131 - If the pendulum is 1, then a single load has been issued in this
19132 cycle and we attempt to locate another load in the ready list to
19133 issue with it.
19134
19135 - If the pendulum is -2, then two stores have already been
19136 issued in this cycle, so we increase the priority of the first load
19137 in the ready list to increase it's likelihood of being chosen first
19138 in the next cycle.
19139
19140 - If the pendulum is -1, then a single store has been issued in this
19141 cycle and we attempt to locate another store in the ready list to
19142 issue with it, preferring a store to an adjacent memory location to
19143 facilitate store pairing in the store queue.
19144
19145 - If the pendulum is 2, then two loads have already been
19146 issued in this cycle, so we increase the priority of the first store
19147 in the ready list to increase it's likelihood of being chosen first
19148 in the next cycle.
19149
19150 - If the pendulum < -2 or > 2, then do nothing.
19151
19152 Note: This code covers the most common scenarios. There exist non
19153 load/store instructions which make use of the LSU and which
19154 would need to be accounted for to strictly model the behavior
19155 of the machine. Those instructions are currently unaccounted
19156 for to help minimize compile time overhead of this code.
19157 */
19158 int pos;
19159 rtx load_mem, str_mem;
19160
19161 if (is_store_insn (last_scheduled_insn, &str_mem))
19162 /* Issuing a store, swing the load_store_pendulum to the left */
19163 load_store_pendulum--;
19164 else if (is_load_insn (last_scheduled_insn, &load_mem))
19165 /* Issuing a load, swing the load_store_pendulum to the right */
19166 load_store_pendulum++;
19167 else
19168 return cached_can_issue_more;
19169
19170 /* If the pendulum is balanced, or there is only one instruction on
19171 the ready list, then all is well, so return. */
19172 if ((load_store_pendulum == 0) || (lastpos <= 0))
19173 return cached_can_issue_more;
19174
19175 if (load_store_pendulum == 1)
19176 {
19177 /* A load has been issued in this cycle. Scan the ready list
19178 for another load to issue with it */
19179 pos = lastpos;
19180
19181 while (pos >= 0)
19182 {
19183 if (is_load_insn (ready[pos], &load_mem))
19184 {
19185 /* Found a load. Move it to the head of the ready list,
19186 and adjust it's priority so that it is more likely to
19187 stay there */
19188 move_to_end_of_ready (ready, pos, lastpos);
19189
19190 if (!sel_sched_p ()
19191 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19192 INSN_PRIORITY (ready[lastpos])++;
19193 break;
19194 }
19195 pos--;
19196 }
19197 }
19198 else if (load_store_pendulum == -2)
19199 {
19200 /* Two stores have been issued in this cycle. Increase the
19201 priority of the first load in the ready list to favor it for
19202 issuing in the next cycle. */
19203 pos = lastpos;
19204
19205 while (pos >= 0)
19206 {
19207 if (is_load_insn (ready[pos], &load_mem)
19208 && !sel_sched_p ()
19209 && INSN_PRIORITY_KNOWN (ready[pos]))
19210 {
19211 INSN_PRIORITY (ready[pos])++;
19212
19213 /* Adjust the pendulum to account for the fact that a load
19214 was found and increased in priority. This is to prevent
19215 increasing the priority of multiple loads */
19216 load_store_pendulum--;
19217
19218 break;
19219 }
19220 pos--;
19221 }
19222 }
19223 else if (load_store_pendulum == -1)
19224 {
19225 /* A store has been issued in this cycle. Scan the ready list for
19226 another store to issue with it, preferring a store to an adjacent
19227 memory location */
19228 int first_store_pos = -1;
19229
19230 pos = lastpos;
19231
19232 while (pos >= 0)
19233 {
19234 if (is_store_insn (ready[pos], &str_mem))
19235 {
19236 rtx str_mem2;
19237 /* Maintain the index of the first store found on the
19238 list */
19239 if (first_store_pos == -1)
19240 first_store_pos = pos;
19241
19242 if (is_store_insn (last_scheduled_insn, &str_mem2)
19243 && adjacent_mem_locations (str_mem, str_mem2))
19244 {
19245 /* Found an adjacent store. Move it to the head of the
19246 ready list, and adjust it's priority so that it is
19247 more likely to stay there */
19248 move_to_end_of_ready (ready, pos, lastpos);
19249
19250 if (!sel_sched_p ()
19251 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19252 INSN_PRIORITY (ready[lastpos])++;
19253
19254 first_store_pos = -1;
19255
19256 break;
19257 };
19258 }
19259 pos--;
19260 }
19261
19262 if (first_store_pos >= 0)
19263 {
19264 /* An adjacent store wasn't found, but a non-adjacent store was,
19265 so move the non-adjacent store to the front of the ready
19266 list, and adjust its priority so that it is more likely to
19267 stay there. */
19268 move_to_end_of_ready (ready, first_store_pos, lastpos);
19269 if (!sel_sched_p ()
19270 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19271 INSN_PRIORITY (ready[lastpos])++;
19272 }
19273 }
19274 else if (load_store_pendulum == 2)
19275 {
19276 /* Two loads have been issued in this cycle. Increase the priority
19277 of the first store in the ready list to favor it for issuing in
19278 the next cycle. */
19279 pos = lastpos;
19280
19281 while (pos >= 0)
19282 {
19283 if (is_store_insn (ready[pos], &str_mem)
19284 && !sel_sched_p ()
19285 && INSN_PRIORITY_KNOWN (ready[pos]))
19286 {
19287 INSN_PRIORITY (ready[pos])++;
19288
19289 /* Adjust the pendulum to account for the fact that a store
19290 was found and increased in priority. This is to prevent
19291 increasing the priority of multiple stores */
19292 load_store_pendulum++;
19293
19294 break;
19295 }
19296 pos--;
19297 }
19298 }
19299
19300 return cached_can_issue_more;
19301 }
19302
19303 /* Do Power9 specific sched_reorder2 reordering of ready list. */
19304
19305 static int
19306 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
19307 {
19308 int pos;
19309 enum attr_type type, type2;
19310
19311 type = get_attr_type (last_scheduled_insn);
19312
19313 /* Try to issue fixed point divides back-to-back in pairs so they will be
19314 routed to separate execution units and execute in parallel. */
19315 if (type == TYPE_DIV && divide_cnt == 0)
19316 {
19317 /* First divide has been scheduled. */
19318 divide_cnt = 1;
19319
19320 /* Scan the ready list looking for another divide, if found move it
19321 to the end of the list so it is chosen next. */
19322 pos = lastpos;
19323 while (pos >= 0)
19324 {
19325 if (recog_memoized (ready[pos]) >= 0
19326 && get_attr_type (ready[pos]) == TYPE_DIV)
19327 {
19328 move_to_end_of_ready (ready, pos, lastpos);
19329 break;
19330 }
19331 pos--;
19332 }
19333 }
19334 else
19335 {
19336 /* Last insn was the 2nd divide or not a divide, reset the counter. */
19337 divide_cnt = 0;
19338
19339 /* The best dispatch throughput for vector and vector load insns can be
19340 achieved by interleaving a vector and vector load such that they'll
19341 dispatch to the same superslice. If this pairing cannot be achieved
19342 then it is best to pair vector insns together and vector load insns
19343 together.
19344
19345 To aid in this pairing, vec_pairing maintains the current state with
19346 the following values:
19347
19348 0 : Initial state, no vecload/vector pairing has been started.
19349
19350 1 : A vecload or vector insn has been issued and a candidate for
19351 pairing has been found and moved to the end of the ready
19352 list. */
19353 if (type == TYPE_VECLOAD)
19354 {
19355 /* Issued a vecload. */
19356 if (vec_pairing == 0)
19357 {
19358 int vecload_pos = -1;
19359 /* We issued a single vecload, look for a vector insn to pair it
19360 with. If one isn't found, try to pair another vecload. */
19361 pos = lastpos;
19362 while (pos >= 0)
19363 {
19364 if (recog_memoized (ready[pos]) >= 0)
19365 {
19366 type2 = get_attr_type (ready[pos]);
19367 if (is_power9_pairable_vec_type (type2))
19368 {
19369 /* Found a vector insn to pair with, move it to the
19370 end of the ready list so it is scheduled next. */
19371 move_to_end_of_ready (ready, pos, lastpos);
19372 vec_pairing = 1;
19373 return cached_can_issue_more;
19374 }
19375 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
19376 /* Remember position of first vecload seen. */
19377 vecload_pos = pos;
19378 }
19379 pos--;
19380 }
19381 if (vecload_pos >= 0)
19382 {
19383 /* Didn't find a vector to pair with but did find a vecload,
19384 move it to the end of the ready list. */
19385 move_to_end_of_ready (ready, vecload_pos, lastpos);
19386 vec_pairing = 1;
19387 return cached_can_issue_more;
19388 }
19389 }
19390 }
19391 else if (is_power9_pairable_vec_type (type))
19392 {
19393 /* Issued a vector operation. */
19394 if (vec_pairing == 0)
19395 {
19396 int vec_pos = -1;
19397 /* We issued a single vector insn, look for a vecload to pair it
19398 with. If one isn't found, try to pair another vector. */
19399 pos = lastpos;
19400 while (pos >= 0)
19401 {
19402 if (recog_memoized (ready[pos]) >= 0)
19403 {
19404 type2 = get_attr_type (ready[pos]);
19405 if (type2 == TYPE_VECLOAD)
19406 {
19407 /* Found a vecload insn to pair with, move it to the
19408 end of the ready list so it is scheduled next. */
19409 move_to_end_of_ready (ready, pos, lastpos);
19410 vec_pairing = 1;
19411 return cached_can_issue_more;
19412 }
19413 else if (is_power9_pairable_vec_type (type2)
19414 && vec_pos == -1)
19415 /* Remember position of first vector insn seen. */
19416 vec_pos = pos;
19417 }
19418 pos--;
19419 }
19420 if (vec_pos >= 0)
19421 {
19422 /* Didn't find a vecload to pair with but did find a vector
19423 insn, move it to the end of the ready list. */
19424 move_to_end_of_ready (ready, vec_pos, lastpos);
19425 vec_pairing = 1;
19426 return cached_can_issue_more;
19427 }
19428 }
19429 }
19430
19431 /* We've either finished a vec/vecload pair, couldn't find an insn to
19432 continue the current pair, or the last insn had nothing to do with
19433 with pairing. In any case, reset the state. */
19434 vec_pairing = 0;
19435 }
19436
19437 return cached_can_issue_more;
19438 }
19439
19440 /* Determine if INSN is a store to memory that can be fused with a similar
19441 adjacent store. */
19442
19443 static bool
19444 is_fusable_store (rtx_insn *insn, rtx *str_mem)
19445 {
19446 /* Insn must be a non-prefixed base+disp form store. */
19447 if (is_store_insn (insn, str_mem)
19448 && get_attr_prefixed (insn) == PREFIXED_NO
19449 && get_attr_update (insn) == UPDATE_NO
19450 && get_attr_indexed (insn) == INDEXED_NO)
19451 {
19452 /* Further restrictions by mode and size. */
19453 if (!MEM_SIZE_KNOWN_P (*str_mem))
19454 return false;
19455
19456 machine_mode mode = GET_MODE (*str_mem);
19457 HOST_WIDE_INT size = MEM_SIZE (*str_mem);
19458
19459 if (INTEGRAL_MODE_P (mode))
19460 /* Must be word or dword size. */
19461 return (size == 4 || size == 8);
19462 else if (FLOAT_MODE_P (mode))
19463 /* Must be dword size. */
19464 return (size == 8);
19465 }
19466
19467 return false;
19468 }
19469
19470 /* Do Power10 specific reordering of the ready list. */
19471
19472 static int
19473 power10_sched_reorder (rtx_insn **ready, int lastpos)
19474 {
19475 rtx mem1;
19476
19477 /* Do store fusion during sched2 only. */
19478 if (!reload_completed)
19479 return cached_can_issue_more;
19480
19481 /* If the prior insn finished off a store fusion pair then simply
19482 reset the counter and return, nothing more to do. */
19483 if (load_store_pendulum != 0)
19484 {
19485 load_store_pendulum = 0;
19486 return cached_can_issue_more;
19487 }
19488
19489 /* Try to pair certain store insns to adjacent memory locations
19490 so that the hardware will fuse them to a single operation. */
19491 if (TARGET_P10_FUSION && is_fusable_store (last_scheduled_insn, &mem1))
19492 {
19493
19494 /* A fusable store was just scheduled. Scan the ready list for another
19495 store that it can fuse with. */
19496 int pos = lastpos;
19497 while (pos >= 0)
19498 {
19499 rtx mem2;
19500 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19501 must be ascending only. */
19502 if (is_fusable_store (ready[pos], &mem2)
19503 && ((INTEGRAL_MODE_P (GET_MODE (mem1))
19504 && adjacent_mem_locations (mem1, mem2))
19505 || (FLOAT_MODE_P (GET_MODE (mem1))
19506 && (adjacent_mem_locations (mem1, mem2) == mem1))))
19507 {
19508 /* Found a fusable store. Move it to the end of the ready list
19509 so it is scheduled next. */
19510 move_to_end_of_ready (ready, pos, lastpos);
19511
19512 load_store_pendulum = -1;
19513 break;
19514 }
19515 pos--;
19516 }
19517 }
19518
19519 return cached_can_issue_more;
19520 }
19521
19522 /* We are about to begin issuing insns for this clock cycle. */
19523
19524 static int
19525 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
19526 rtx_insn **ready ATTRIBUTE_UNUSED,
19527 int *pn_ready ATTRIBUTE_UNUSED,
19528 int clock_var ATTRIBUTE_UNUSED)
19529 {
19530 int n_ready = *pn_ready;
19531
19532 if (sched_verbose)
19533 fprintf (dump, "// rs6000_sched_reorder :\n");
19534
19535 /* Reorder the ready list, if the second to last ready insn
19536 is a nonepipeline insn. */
19537 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
19538 {
19539 if (is_nonpipeline_insn (ready[n_ready - 1])
19540 && (recog_memoized (ready[n_ready - 2]) > 0))
19541 /* Simply swap first two insns. */
19542 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
19543 }
19544
19545 if (rs6000_tune == PROCESSOR_POWER6)
19546 load_store_pendulum = 0;
19547
19548 /* Do Power10/power11 dependent reordering. */
19549 if (last_scheduled_insn
19550 && (rs6000_tune == PROCESSOR_POWER10
19551 || rs6000_tune == PROCESSOR_POWER11))
19552 power10_sched_reorder (ready, n_ready - 1);
19553
19554 return rs6000_issue_rate ();
19555 }
19556
19557 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19558
19559 static int
19560 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
19561 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
19562 {
19563 if (sched_verbose)
19564 fprintf (dump, "// rs6000_sched_reorder2 :\n");
19565
19566 /* Do Power6 dependent reordering if necessary. */
19567 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
19568 return power6_sched_reorder2 (ready, *pn_ready - 1);
19569
19570 /* Do Power9 dependent reordering if necessary. */
19571 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
19572 && recog_memoized (last_scheduled_insn) >= 0)
19573 return power9_sched_reorder2 (ready, *pn_ready - 1);
19574
19575 /* Do Power10/power11 dependent reordering. */
19576 if (last_scheduled_insn
19577 && (rs6000_tune == PROCESSOR_POWER10
19578 || rs6000_tune == PROCESSOR_POWER11))
19579 return power10_sched_reorder (ready, *pn_ready - 1);
19580
19581 return cached_can_issue_more;
19582 }
19583
19584 /* Return whether the presence of INSN causes a dispatch group termination
19585 of group WHICH_GROUP.
19586
19587 If WHICH_GROUP == current_group, this function will return true if INSN
19588 causes the termination of the current group (i.e, the dispatch group to
19589 which INSN belongs). This means that INSN will be the last insn in the
19590 group it belongs to.
19591
19592 If WHICH_GROUP == previous_group, this function will return true if INSN
19593 causes the termination of the previous group (i.e, the dispatch group that
19594 precedes the group to which INSN belongs). This means that INSN will be
19595 the first insn in the group it belongs to). */
19596
19597 static bool
19598 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
19599 {
19600 bool first, last;
19601
19602 if (! insn)
19603 return false;
19604
19605 first = insn_must_be_first_in_group (insn);
19606 last = insn_must_be_last_in_group (insn);
19607
19608 if (first && last)
19609 return true;
19610
19611 if (which_group == current_group)
19612 return last;
19613 else if (which_group == previous_group)
19614 return first;
19615
19616 return false;
19617 }
19618
19619
19620 static bool
19621 insn_must_be_first_in_group (rtx_insn *insn)
19622 {
19623 enum attr_type type;
19624
19625 if (!insn
19626 || NOTE_P (insn)
19627 || DEBUG_INSN_P (insn)
19628 || GET_CODE (PATTERN (insn)) == USE
19629 || GET_CODE (PATTERN (insn)) == CLOBBER)
19630 return false;
19631
19632 switch (rs6000_tune)
19633 {
19634 case PROCESSOR_POWER5:
19635 if (is_cracked_insn (insn))
19636 return true;
19637 /* FALLTHRU */
19638 case PROCESSOR_POWER4:
19639 if (is_microcoded_insn (insn))
19640 return true;
19641
19642 if (!rs6000_sched_groups)
19643 return false;
19644
19645 type = get_attr_type (insn);
19646
19647 switch (type)
19648 {
19649 case TYPE_MFCR:
19650 case TYPE_MFCRF:
19651 case TYPE_MTCR:
19652 case TYPE_CR_LOGICAL:
19653 case TYPE_MTJMPR:
19654 case TYPE_MFJMPR:
19655 case TYPE_DIV:
19656 case TYPE_LOAD_L:
19657 case TYPE_STORE_C:
19658 case TYPE_ISYNC:
19659 case TYPE_SYNC:
19660 return true;
19661 default:
19662 break;
19663 }
19664 break;
19665 case PROCESSOR_POWER6:
19666 type = get_attr_type (insn);
19667
19668 switch (type)
19669 {
19670 case TYPE_EXTS:
19671 case TYPE_CNTLZ:
19672 case TYPE_TRAP:
19673 case TYPE_MUL:
19674 case TYPE_INSERT:
19675 case TYPE_FPCOMPARE:
19676 case TYPE_MFCR:
19677 case TYPE_MTCR:
19678 case TYPE_MFJMPR:
19679 case TYPE_MTJMPR:
19680 case TYPE_ISYNC:
19681 case TYPE_SYNC:
19682 case TYPE_LOAD_L:
19683 case TYPE_STORE_C:
19684 return true;
19685 case TYPE_SHIFT:
19686 if (get_attr_dot (insn) == DOT_NO
19687 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19688 return true;
19689 else
19690 break;
19691 case TYPE_DIV:
19692 if (get_attr_size (insn) == SIZE_32)
19693 return true;
19694 else
19695 break;
19696 case TYPE_LOAD:
19697 case TYPE_STORE:
19698 case TYPE_FPLOAD:
19699 case TYPE_FPSTORE:
19700 if (get_attr_update (insn) == UPDATE_YES)
19701 return true;
19702 else
19703 break;
19704 default:
19705 break;
19706 }
19707 break;
19708 case PROCESSOR_POWER7:
19709 type = get_attr_type (insn);
19710
19711 switch (type)
19712 {
19713 case TYPE_CR_LOGICAL:
19714 case TYPE_MFCR:
19715 case TYPE_MFCRF:
19716 case TYPE_MTCR:
19717 case TYPE_DIV:
19718 case TYPE_ISYNC:
19719 case TYPE_LOAD_L:
19720 case TYPE_STORE_C:
19721 case TYPE_MFJMPR:
19722 case TYPE_MTJMPR:
19723 return true;
19724 case TYPE_MUL:
19725 case TYPE_SHIFT:
19726 case TYPE_EXTS:
19727 if (get_attr_dot (insn) == DOT_YES)
19728 return true;
19729 else
19730 break;
19731 case TYPE_LOAD:
19732 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19733 || get_attr_update (insn) == UPDATE_YES)
19734 return true;
19735 else
19736 break;
19737 case TYPE_STORE:
19738 case TYPE_FPLOAD:
19739 case TYPE_FPSTORE:
19740 if (get_attr_update (insn) == UPDATE_YES)
19741 return true;
19742 else
19743 break;
19744 default:
19745 break;
19746 }
19747 break;
19748 case PROCESSOR_POWER8:
19749 type = get_attr_type (insn);
19750
19751 switch (type)
19752 {
19753 case TYPE_CR_LOGICAL:
19754 case TYPE_MFCR:
19755 case TYPE_MFCRF:
19756 case TYPE_MTCR:
19757 case TYPE_SYNC:
19758 case TYPE_ISYNC:
19759 case TYPE_LOAD_L:
19760 case TYPE_STORE_C:
19761 case TYPE_VECSTORE:
19762 case TYPE_MFJMPR:
19763 case TYPE_MTJMPR:
19764 return true;
19765 case TYPE_SHIFT:
19766 case TYPE_EXTS:
19767 case TYPE_MUL:
19768 if (get_attr_dot (insn) == DOT_YES)
19769 return true;
19770 else
19771 break;
19772 case TYPE_LOAD:
19773 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19774 || get_attr_update (insn) == UPDATE_YES)
19775 return true;
19776 else
19777 break;
19778 case TYPE_STORE:
19779 if (get_attr_update (insn) == UPDATE_YES
19780 && get_attr_indexed (insn) == INDEXED_YES)
19781 return true;
19782 else
19783 break;
19784 default:
19785 break;
19786 }
19787 break;
19788 default:
19789 break;
19790 }
19791
19792 return false;
19793 }
19794
19795 static bool
19796 insn_must_be_last_in_group (rtx_insn *insn)
19797 {
19798 enum attr_type type;
19799
19800 if (!insn
19801 || NOTE_P (insn)
19802 || DEBUG_INSN_P (insn)
19803 || GET_CODE (PATTERN (insn)) == USE
19804 || GET_CODE (PATTERN (insn)) == CLOBBER)
19805 return false;
19806
19807 switch (rs6000_tune) {
19808 case PROCESSOR_POWER4:
19809 case PROCESSOR_POWER5:
19810 if (is_microcoded_insn (insn))
19811 return true;
19812
19813 if (is_branch_slot_insn (insn))
19814 return true;
19815
19816 break;
19817 case PROCESSOR_POWER6:
19818 type = get_attr_type (insn);
19819
19820 switch (type)
19821 {
19822 case TYPE_EXTS:
19823 case TYPE_CNTLZ:
19824 case TYPE_TRAP:
19825 case TYPE_MUL:
19826 case TYPE_FPCOMPARE:
19827 case TYPE_MFCR:
19828 case TYPE_MTCR:
19829 case TYPE_MFJMPR:
19830 case TYPE_MTJMPR:
19831 case TYPE_ISYNC:
19832 case TYPE_SYNC:
19833 case TYPE_LOAD_L:
19834 case TYPE_STORE_C:
19835 return true;
19836 case TYPE_SHIFT:
19837 if (get_attr_dot (insn) == DOT_NO
19838 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19839 return true;
19840 else
19841 break;
19842 case TYPE_DIV:
19843 if (get_attr_size (insn) == SIZE_32)
19844 return true;
19845 else
19846 break;
19847 default:
19848 break;
19849 }
19850 break;
19851 case PROCESSOR_POWER7:
19852 type = get_attr_type (insn);
19853
19854 switch (type)
19855 {
19856 case TYPE_ISYNC:
19857 case TYPE_SYNC:
19858 case TYPE_LOAD_L:
19859 case TYPE_STORE_C:
19860 return true;
19861 case TYPE_LOAD:
19862 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19863 && get_attr_update (insn) == UPDATE_YES)
19864 return true;
19865 else
19866 break;
19867 case TYPE_STORE:
19868 if (get_attr_update (insn) == UPDATE_YES
19869 && get_attr_indexed (insn) == INDEXED_YES)
19870 return true;
19871 else
19872 break;
19873 default:
19874 break;
19875 }
19876 break;
19877 case PROCESSOR_POWER8:
19878 type = get_attr_type (insn);
19879
19880 switch (type)
19881 {
19882 case TYPE_MFCR:
19883 case TYPE_MTCR:
19884 case TYPE_ISYNC:
19885 case TYPE_SYNC:
19886 case TYPE_LOAD_L:
19887 case TYPE_STORE_C:
19888 return true;
19889 case TYPE_LOAD:
19890 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19891 && get_attr_update (insn) == UPDATE_YES)
19892 return true;
19893 else
19894 break;
19895 case TYPE_STORE:
19896 if (get_attr_update (insn) == UPDATE_YES
19897 && get_attr_indexed (insn) == INDEXED_YES)
19898 return true;
19899 else
19900 break;
19901 default:
19902 break;
19903 }
19904 break;
19905 default:
19906 break;
19907 }
19908
19909 return false;
19910 }
19911
19912 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19913 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19914
19915 static bool
19916 is_costly_group (rtx *group_insns, rtx next_insn)
19917 {
19918 int i;
19919 int issue_rate = rs6000_issue_rate ();
19920
19921 for (i = 0; i < issue_rate; i++)
19922 {
19923 sd_iterator_def sd_it;
19924 dep_t dep;
19925 rtx insn = group_insns[i];
19926
19927 if (!insn)
19928 continue;
19929
19930 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19931 {
19932 rtx next = DEP_CON (dep);
19933
19934 if (next == next_insn
19935 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19936 return true;
19937 }
19938 }
19939
19940 return false;
19941 }
19942
19943 /* Utility of the function redefine_groups.
19944 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19945 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19946 to keep it "far" (in a separate group) from GROUP_INSNS, following
19947 one of the following schemes, depending on the value of the flag
19948 -minsert_sched_nops = X:
19949 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19950 in order to force NEXT_INSN into a separate group.
19951 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19952 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19953 insertion (has a group just ended, how many vacant issue slots remain in the
19954 last group, and how many dispatch groups were encountered so far). */
19955
19956 static int
19957 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19958 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19959 int *group_count)
19960 {
19961 rtx nop;
19962 bool force;
19963 int issue_rate = rs6000_issue_rate ();
19964 bool end = *group_end;
19965 int i;
19966
19967 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19968 return can_issue_more;
19969
19970 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19971 return can_issue_more;
19972
19973 force = is_costly_group (group_insns, next_insn);
19974 if (!force)
19975 return can_issue_more;
19976
19977 if (sched_verbose > 6)
19978 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
19979 *group_count ,can_issue_more);
19980
19981 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
19982 {
19983 if (*group_end)
19984 can_issue_more = 0;
19985
19986 /* Since only a branch can be issued in the last issue_slot, it is
19987 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19988 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19989 in this case the last nop will start a new group and the branch
19990 will be forced to the new group. */
19991 if (can_issue_more && !is_branch_slot_insn (next_insn))
19992 can_issue_more--;
19993
19994 /* Do we have a special group ending nop? */
19995 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
19996 || rs6000_tune == PROCESSOR_POWER8)
19997 {
19998 nop = gen_group_ending_nop ();
19999 emit_insn_before (nop, next_insn);
20000 can_issue_more = 0;
20001 }
20002 else
20003 while (can_issue_more > 0)
20004 {
20005 nop = gen_nop ();
20006 emit_insn_before (nop, next_insn);
20007 can_issue_more--;
20008 }
20009
20010 *group_end = true;
20011 return 0;
20012 }
20013
20014 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
20015 {
20016 int n_nops = rs6000_sched_insert_nops;
20017
20018 /* Nops can't be issued from the branch slot, so the effective
20019 issue_rate for nops is 'issue_rate - 1'. */
20020 if (can_issue_more == 0)
20021 can_issue_more = issue_rate;
20022 can_issue_more--;
20023 if (can_issue_more == 0)
20024 {
20025 can_issue_more = issue_rate - 1;
20026 (*group_count)++;
20027 end = true;
20028 for (i = 0; i < issue_rate; i++)
20029 {
20030 group_insns[i] = 0;
20031 }
20032 }
20033
20034 while (n_nops > 0)
20035 {
20036 nop = gen_nop ();
20037 emit_insn_before (nop, next_insn);
20038 if (can_issue_more == issue_rate - 1) /* new group begins */
20039 end = false;
20040 can_issue_more--;
20041 if (can_issue_more == 0)
20042 {
20043 can_issue_more = issue_rate - 1;
20044 (*group_count)++;
20045 end = true;
20046 for (i = 0; i < issue_rate; i++)
20047 {
20048 group_insns[i] = 0;
20049 }
20050 }
20051 n_nops--;
20052 }
20053
20054 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
20055 can_issue_more++;
20056
20057 /* Is next_insn going to start a new group? */
20058 *group_end
20059 = (end
20060 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
20061 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
20062 || (can_issue_more < issue_rate &&
20063 insn_terminates_group_p (next_insn, previous_group)));
20064 if (*group_end && end)
20065 (*group_count)--;
20066
20067 if (sched_verbose > 6)
20068 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
20069 *group_count, can_issue_more);
20070 return can_issue_more;
20071 }
20072
20073 return can_issue_more;
20074 }
20075
20076 /* This function tries to synch the dispatch groups that the compiler "sees"
20077 with the dispatch groups that the processor dispatcher is expected to
20078 form in practice. It tries to achieve this synchronization by forcing the
20079 estimated processor grouping on the compiler (as opposed to the function
20080 'pad_goups' which tries to force the scheduler's grouping on the processor).
20081
20082 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
20083 examines the (estimated) dispatch groups that will be formed by the processor
20084 dispatcher. It marks these group boundaries to reflect the estimated
20085 processor grouping, overriding the grouping that the scheduler had marked.
20086 Depending on the value of the flag '-minsert-sched-nops' this function can
20087 force certain insns into separate groups or force a certain distance between
20088 them by inserting nops, for example, if there exists a "costly dependence"
20089 between the insns.
20090
20091 The function estimates the group boundaries that the processor will form as
20092 follows: It keeps track of how many vacant issue slots are available after
20093 each insn. A subsequent insn will start a new group if one of the following
20094 4 cases applies:
20095 - no more vacant issue slots remain in the current dispatch group.
20096 - only the last issue slot, which is the branch slot, is vacant, but the next
20097 insn is not a branch.
20098 - only the last 2 or less issue slots, including the branch slot, are vacant,
20099 which means that a cracked insn (which occupies two issue slots) can't be
20100 issued in this group.
20101 - less than 'issue_rate' slots are vacant, and the next insn always needs to
20102 start a new group. */
20103
20104 static int
20105 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
20106 rtx_insn *tail)
20107 {
20108 rtx_insn *insn, *next_insn;
20109 int issue_rate;
20110 int can_issue_more;
20111 int slot, i;
20112 bool group_end;
20113 int group_count = 0;
20114 rtx *group_insns;
20115
20116 /* Initialize. */
20117 issue_rate = rs6000_issue_rate ();
20118 group_insns = XALLOCAVEC (rtx, issue_rate);
20119 for (i = 0; i < issue_rate; i++)
20120 {
20121 group_insns[i] = 0;
20122 }
20123 can_issue_more = issue_rate;
20124 slot = 0;
20125 insn = get_next_active_insn (prev_head_insn, tail);
20126 group_end = false;
20127
20128 while (insn != NULL_RTX)
20129 {
20130 slot = (issue_rate - can_issue_more);
20131 group_insns[slot] = insn;
20132 can_issue_more =
20133 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
20134 if (insn_terminates_group_p (insn, current_group))
20135 can_issue_more = 0;
20136
20137 next_insn = get_next_active_insn (insn, tail);
20138 if (next_insn == NULL_RTX)
20139 return group_count + 1;
20140
20141 /* Is next_insn going to start a new group? */
20142 group_end
20143 = (can_issue_more == 0
20144 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
20145 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
20146 || (can_issue_more < issue_rate &&
20147 insn_terminates_group_p (next_insn, previous_group)));
20148
20149 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
20150 next_insn, &group_end, can_issue_more,
20151 &group_count);
20152
20153 if (group_end)
20154 {
20155 group_count++;
20156 can_issue_more = 0;
20157 for (i = 0; i < issue_rate; i++)
20158 {
20159 group_insns[i] = 0;
20160 }
20161 }
20162
20163 if (GET_MODE (next_insn) == TImode && can_issue_more)
20164 PUT_MODE (next_insn, VOIDmode);
20165 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
20166 PUT_MODE (next_insn, TImode);
20167
20168 insn = next_insn;
20169 if (can_issue_more == 0)
20170 can_issue_more = issue_rate;
20171 } /* while */
20172
20173 return group_count;
20174 }
20175
20176 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
20177 dispatch group boundaries that the scheduler had marked. Pad with nops
20178 any dispatch groups which have vacant issue slots, in order to force the
20179 scheduler's grouping on the processor dispatcher. The function
20180 returns the number of dispatch groups found. */
20181
20182 static int
20183 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
20184 rtx_insn *tail)
20185 {
20186 rtx_insn *insn, *next_insn;
20187 rtx nop;
20188 int issue_rate;
20189 int can_issue_more;
20190 int group_end;
20191 int group_count = 0;
20192
20193 /* Initialize issue_rate. */
20194 issue_rate = rs6000_issue_rate ();
20195 can_issue_more = issue_rate;
20196
20197 insn = get_next_active_insn (prev_head_insn, tail);
20198 next_insn = get_next_active_insn (insn, tail);
20199
20200 while (insn != NULL_RTX)
20201 {
20202 can_issue_more =
20203 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
20204
20205 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
20206
20207 if (next_insn == NULL_RTX)
20208 break;
20209
20210 if (group_end)
20211 {
20212 /* If the scheduler had marked group termination at this location
20213 (between insn and next_insn), and neither insn nor next_insn will
20214 force group termination, pad the group with nops to force group
20215 termination. */
20216 if (can_issue_more
20217 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
20218 && !insn_terminates_group_p (insn, current_group)
20219 && !insn_terminates_group_p (next_insn, previous_group))
20220 {
20221 if (!is_branch_slot_insn (next_insn))
20222 can_issue_more--;
20223
20224 while (can_issue_more)
20225 {
20226 nop = gen_nop ();
20227 emit_insn_before (nop, next_insn);
20228 can_issue_more--;
20229 }
20230 }
20231
20232 can_issue_more = issue_rate;
20233 group_count++;
20234 }
20235
20236 insn = next_insn;
20237 next_insn = get_next_active_insn (insn, tail);
20238 }
20239
20240 return group_count;
20241 }
20242
20243 /* We're beginning a new block. Initialize data structures as necessary. */
20244
20245 static void
20246 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
20247 int sched_verbose ATTRIBUTE_UNUSED,
20248 int max_ready ATTRIBUTE_UNUSED)
20249 {
20250 last_scheduled_insn = NULL;
20251 load_store_pendulum = 0;
20252 divide_cnt = 0;
20253 vec_pairing = 0;
20254 }
20255
20256 /* The following function is called at the end of scheduling BB.
20257 After reload, it inserts nops at insn group bundling. */
20258
20259 static void
20260 rs6000_sched_finish (FILE *dump, int sched_verbose)
20261 {
20262 int n_groups;
20263
20264 if (sched_verbose)
20265 fprintf (dump, "=== Finishing schedule.\n");
20266
20267 if (reload_completed && rs6000_sched_groups)
20268 {
20269 /* Do not run sched_finish hook when selective scheduling enabled. */
20270 if (sel_sched_p ())
20271 return;
20272
20273 if (rs6000_sched_insert_nops == sched_finish_none)
20274 return;
20275
20276 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
20277 n_groups = pad_groups (dump, sched_verbose,
20278 current_sched_info->prev_head,
20279 current_sched_info->next_tail);
20280 else
20281 n_groups = redefine_groups (dump, sched_verbose,
20282 current_sched_info->prev_head,
20283 current_sched_info->next_tail);
20284
20285 if (sched_verbose >= 6)
20286 {
20287 fprintf (dump, "ngroups = %d\n", n_groups);
20288 print_rtl (dump, current_sched_info->prev_head);
20289 fprintf (dump, "Done finish_sched\n");
20290 }
20291 }
20292 }
20293
20294 struct rs6000_sched_context
20295 {
20296 short cached_can_issue_more;
20297 rtx_insn *last_scheduled_insn;
20298 int load_store_pendulum;
20299 int divide_cnt;
20300 int vec_pairing;
20301 };
20302
20303 typedef struct rs6000_sched_context rs6000_sched_context_def;
20304 typedef rs6000_sched_context_def *rs6000_sched_context_t;
20305
20306 /* Allocate store for new scheduling context. */
20307 static void *
20308 rs6000_alloc_sched_context (void)
20309 {
20310 return xmalloc (sizeof (rs6000_sched_context_def));
20311 }
20312
20313 /* If CLEAN_P is true then initializes _SC with clean data,
20314 and from the global context otherwise. */
20315 static void
20316 rs6000_init_sched_context (void *_sc, bool clean_p)
20317 {
20318 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
20319
20320 if (clean_p)
20321 {
20322 sc->cached_can_issue_more = 0;
20323 sc->last_scheduled_insn = NULL;
20324 sc->load_store_pendulum = 0;
20325 sc->divide_cnt = 0;
20326 sc->vec_pairing = 0;
20327 }
20328 else
20329 {
20330 sc->cached_can_issue_more = cached_can_issue_more;
20331 sc->last_scheduled_insn = last_scheduled_insn;
20332 sc->load_store_pendulum = load_store_pendulum;
20333 sc->divide_cnt = divide_cnt;
20334 sc->vec_pairing = vec_pairing;
20335 }
20336 }
20337
20338 /* Sets the global scheduling context to the one pointed to by _SC. */
20339 static void
20340 rs6000_set_sched_context (void *_sc)
20341 {
20342 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
20343
20344 gcc_assert (sc != NULL);
20345
20346 cached_can_issue_more = sc->cached_can_issue_more;
20347 last_scheduled_insn = sc->last_scheduled_insn;
20348 load_store_pendulum = sc->load_store_pendulum;
20349 divide_cnt = sc->divide_cnt;
20350 vec_pairing = sc->vec_pairing;
20351 }
20352
20353 /* Free _SC. */
20354 static void
20355 rs6000_free_sched_context (void *_sc)
20356 {
20357 gcc_assert (_sc != NULL);
20358
20359 free (_sc);
20360 }
20361
20362 static bool
20363 rs6000_sched_can_speculate_insn (rtx_insn *insn)
20364 {
20365 switch (get_attr_type (insn))
20366 {
20367 case TYPE_DIV:
20368 case TYPE_SDIV:
20369 case TYPE_DDIV:
20370 case TYPE_VECDIV:
20371 case TYPE_SSQRT:
20372 case TYPE_DSQRT:
20373 return false;
20374
20375 default:
20376 return true;
20377 }
20378 }
20379 \f
20380 /* Length in units of the trampoline for entering a nested function. */
20381
20382 int
20383 rs6000_trampoline_size (void)
20384 {
20385 int ret = 0;
20386
20387 switch (DEFAULT_ABI)
20388 {
20389 default:
20390 gcc_unreachable ();
20391
20392 case ABI_AIX:
20393 ret = (TARGET_32BIT) ? 12 : 24;
20394 break;
20395
20396 case ABI_ELFv2:
20397 gcc_assert (!TARGET_32BIT);
20398 ret = 32;
20399 break;
20400
20401 case ABI_DARWIN:
20402 case ABI_V4:
20403 ret = (TARGET_32BIT) ? 40 : 48;
20404 break;
20405 }
20406
20407 return ret;
20408 }
20409
20410 /* Emit RTL insns to initialize the variable parts of a trampoline.
20411 FNADDR is an RTX for the address of the function's pure code.
20412 CXT is an RTX for the static chain value for the function. */
20413
20414 static void
20415 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
20416 {
20417 int regsize = (TARGET_32BIT) ? 4 : 8;
20418 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
20419 rtx ctx_reg = force_reg (Pmode, cxt);
20420 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
20421
20422 switch (DEFAULT_ABI)
20423 {
20424 default:
20425 gcc_unreachable ();
20426
20427 /* Under AIX, just build the 3 word function descriptor */
20428 case ABI_AIX:
20429 {
20430 rtx fnmem, fn_reg, toc_reg;
20431
20432 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
20433 error ("you cannot take the address of a nested function if you use "
20434 "the %qs option", "-mno-pointers-to-nested-functions");
20435
20436 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
20437 fn_reg = gen_reg_rtx (Pmode);
20438 toc_reg = gen_reg_rtx (Pmode);
20439
20440 /* Macro to shorten the code expansions below. */
20441 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20442
20443 m_tramp = replace_equiv_address (m_tramp, addr);
20444
20445 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
20446 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
20447 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
20448 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
20449 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
20450
20451 # undef MEM_PLUS
20452 }
20453 break;
20454
20455 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20456 case ABI_ELFv2:
20457 case ABI_DARWIN:
20458 case ABI_V4:
20459 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
20460 LCT_NORMAL, VOIDmode,
20461 addr, Pmode,
20462 GEN_INT (rs6000_trampoline_size ()), SImode,
20463 fnaddr, Pmode,
20464 ctx_reg, Pmode);
20465 break;
20466 }
20467 }
20468
20469 \f
20470 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20471 identifier as an argument, so the front end shouldn't look it up. */
20472
20473 static bool
20474 rs6000_attribute_takes_identifier_p (const_tree attr_id)
20475 {
20476 return is_attribute_p ("altivec", attr_id);
20477 }
20478
20479 /* Handle the "altivec" attribute. The attribute may have
20480 arguments as follows:
20481
20482 __attribute__((altivec(vector__)))
20483 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20484 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20485
20486 and may appear more than once (e.g., 'vector bool char') in a
20487 given declaration. */
20488
20489 static tree
20490 rs6000_handle_altivec_attribute (tree *node,
20491 tree name ATTRIBUTE_UNUSED,
20492 tree args,
20493 int flags ATTRIBUTE_UNUSED,
20494 bool *no_add_attrs)
20495 {
20496 tree type = *node, result = NULL_TREE;
20497 machine_mode mode;
20498 int unsigned_p;
20499 char altivec_type
20500 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
20501 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
20502 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
20503 : '?');
20504
20505 while (POINTER_TYPE_P (type)
20506 || TREE_CODE (type) == FUNCTION_TYPE
20507 || TREE_CODE (type) == METHOD_TYPE
20508 || TREE_CODE (type) == ARRAY_TYPE)
20509 type = TREE_TYPE (type);
20510
20511 mode = TYPE_MODE (type);
20512
20513 /* Check for invalid AltiVec type qualifiers. */
20514 if (type == long_double_type_node)
20515 error ("use of %<long double%> in AltiVec types is invalid");
20516 else if (type == boolean_type_node)
20517 error ("use of boolean types in AltiVec types is invalid");
20518 else if (TREE_CODE (type) == COMPLEX_TYPE)
20519 error ("use of %<complex%> in AltiVec types is invalid");
20520 else if (DECIMAL_FLOAT_MODE_P (mode))
20521 error ("use of decimal floating-point types in AltiVec types is invalid");
20522 else if (!TARGET_VSX)
20523 {
20524 if (type == long_unsigned_type_node || type == long_integer_type_node)
20525 {
20526 if (TARGET_64BIT)
20527 error ("use of %<long%> in AltiVec types is invalid for "
20528 "64-bit code without %qs", "-mvsx");
20529 else if (rs6000_warn_altivec_long)
20530 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20531 "use %<int%>");
20532 }
20533 else if (type == long_long_unsigned_type_node
20534 || type == long_long_integer_type_node)
20535 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20536 "-mvsx");
20537 else if (type == double_type_node)
20538 error ("use of %<double%> in AltiVec types is invalid without %qs",
20539 "-mvsx");
20540 }
20541
20542 switch (altivec_type)
20543 {
20544 case 'v':
20545 unsigned_p = TYPE_UNSIGNED (type);
20546 switch (mode)
20547 {
20548 case E_TImode:
20549 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
20550 break;
20551 case E_DImode:
20552 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
20553 break;
20554 case E_SImode:
20555 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
20556 break;
20557 case E_HImode:
20558 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
20559 break;
20560 case E_QImode:
20561 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
20562 break;
20563 case E_SFmode: result = V4SF_type_node; break;
20564 case E_DFmode: result = V2DF_type_node; break;
20565 /* If the user says 'vector int bool', we may be handed the 'bool'
20566 attribute _before_ the 'vector' attribute, and so select the
20567 proper type in the 'b' case below. */
20568 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
20569 case E_V2DImode: case E_V2DFmode:
20570 result = type;
20571 default: break;
20572 }
20573 break;
20574 case 'b':
20575 switch (mode)
20576 {
20577 case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break;
20578 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
20579 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
20580 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
20581 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
20582 default: break;
20583 }
20584 break;
20585 case 'p':
20586 switch (mode)
20587 {
20588 case E_V8HImode: result = pixel_V8HI_type_node;
20589 default: break;
20590 }
20591 default: break;
20592 }
20593
20594 /* Propagate qualifiers attached to the element type
20595 onto the vector type. */
20596 if (result && result != type && TYPE_QUALS (type))
20597 result = build_qualified_type (result, TYPE_QUALS (type));
20598
20599 *no_add_attrs = true; /* No need to hang on to the attribute. */
20600
20601 if (result)
20602 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
20603
20604 return NULL_TREE;
20605 }
20606
20607 /* AltiVec defines five built-in scalar types that serve as vector
20608 elements; we must teach the compiler how to mangle them. The 128-bit
20609 floating point mangling is target-specific as well. MMA defines
20610 two built-in types to be used as opaque vector types. */
20611
20612 static const char *
20613 rs6000_mangle_type (const_tree type)
20614 {
20615 type = TYPE_MAIN_VARIANT (type);
20616
20617 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
20618 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
20619 && TREE_CODE (type) != OPAQUE_TYPE)
20620 return NULL;
20621
20622 if (type == bool_char_type_node) return "U6__boolc";
20623 if (type == bool_short_type_node) return "U6__bools";
20624 if (type == pixel_type_node) return "u7__pixel";
20625 if (type == bool_int_type_node) return "U6__booli";
20626 if (type == bool_long_long_type_node) return "U6__boolx";
20627
20628 if (type == float128_type_node || type == float64x_type_node)
20629 return NULL;
20630
20631 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
20632 return "g";
20633 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
20634 return "u9__ieee128";
20635
20636 if (type == vector_pair_type_node)
20637 return "u13__vector_pair";
20638 if (type == vector_quad_type_node)
20639 return "u13__vector_quad";
20640
20641 /* For all other types, use the default mangling. */
20642 return NULL;
20643 }
20644
20645 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20646 struct attribute_spec.handler. */
20647
20648 static tree
20649 rs6000_handle_longcall_attribute (tree *node, tree name,
20650 tree args ATTRIBUTE_UNUSED,
20651 int flags ATTRIBUTE_UNUSED,
20652 bool *no_add_attrs)
20653 {
20654 if (TREE_CODE (*node) != FUNCTION_TYPE
20655 && TREE_CODE (*node) != FIELD_DECL
20656 && TREE_CODE (*node) != TYPE_DECL)
20657 {
20658 warning (OPT_Wattributes, "%qE attribute only applies to functions",
20659 name);
20660 *no_add_attrs = true;
20661 }
20662
20663 return NULL_TREE;
20664 }
20665
20666 /* Set longcall attributes on all functions declared when
20667 rs6000_default_long_calls is true. */
20668 static void
20669 rs6000_set_default_type_attributes (tree type)
20670 {
20671 if (rs6000_default_long_calls
20672 && FUNC_OR_METHOD_TYPE_P (type))
20673 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
20674 NULL_TREE,
20675 TYPE_ATTRIBUTES (type));
20676
20677 #if TARGET_MACHO
20678 darwin_set_default_type_attributes (type);
20679 #endif
20680 }
20681
20682 /* Return a reference suitable for calling a function with the
20683 longcall attribute. */
20684
20685 static rtx
20686 rs6000_longcall_ref (rtx call_ref, rtx arg)
20687 {
20688 /* System V adds '.' to the internal name, so skip them. */
20689 const char *call_name = XSTR (call_ref, 0);
20690 if (*call_name == '.')
20691 {
20692 while (*call_name == '.')
20693 call_name++;
20694
20695 tree node = get_identifier (call_name);
20696 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
20697 }
20698
20699 if (TARGET_PLTSEQ)
20700 {
20701 rtx base = const0_rtx;
20702 int regno = 12;
20703 if (rs6000_pcrel_p ())
20704 {
20705 rtx reg = gen_rtx_REG (Pmode, regno);
20706 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
20707 gen_rtvec (3, base, call_ref, arg),
20708 UNSPECV_PLT_PCREL);
20709 emit_insn (gen_rtx_SET (reg, u));
20710 return reg;
20711 }
20712
20713 if (DEFAULT_ABI == ABI_ELFv2)
20714 base = gen_rtx_REG (Pmode, TOC_REGISTER);
20715 else
20716 {
20717 if (flag_pic)
20718 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
20719 regno = 11;
20720 }
20721 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20722 may be used by a function global entry point. For SysV4, r11
20723 is used by __glink_PLTresolve lazy resolver entry. */
20724 rtx reg = gen_rtx_REG (Pmode, regno);
20725 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20726 UNSPEC_PLT16_HA);
20727 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20728 gen_rtvec (3, reg, call_ref, arg),
20729 UNSPECV_PLT16_LO);
20730 emit_insn (gen_rtx_SET (reg, hi));
20731 emit_insn (gen_rtx_SET (reg, lo));
20732 return reg;
20733 }
20734
20735 return force_reg (Pmode, call_ref);
20736 }
20737 \f
20738 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20739 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20740 #endif
20741
20742 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20743 struct attribute_spec.handler. */
20744 static tree
20745 rs6000_handle_struct_attribute (tree *node, tree name,
20746 tree args ATTRIBUTE_UNUSED,
20747 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20748 {
20749 tree *type = NULL;
20750 if (DECL_P (*node))
20751 {
20752 if (TREE_CODE (*node) == TYPE_DECL)
20753 type = &TREE_TYPE (*node);
20754 }
20755 else
20756 type = node;
20757
20758 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20759 || TREE_CODE (*type) == UNION_TYPE)))
20760 {
20761 warning (OPT_Wattributes, "%qE attribute ignored", name);
20762 *no_add_attrs = true;
20763 }
20764
20765 else if ((is_attribute_p ("ms_struct", name)
20766 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20767 || ((is_attribute_p ("gcc_struct", name)
20768 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20769 {
20770 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20771 name);
20772 *no_add_attrs = true;
20773 }
20774
20775 return NULL_TREE;
20776 }
20777
20778 static bool
20779 rs6000_ms_bitfield_layout_p (const_tree record_type)
20780 {
20781 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20782 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20783 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20784 }
20785 \f
20786 #ifdef USING_ELFOS_H
20787
20788 /* A get_unnamed_section callback, used for switching to toc_section. */
20789
20790 static void
20791 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
20792 {
20793 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20794 && TARGET_MINIMAL_TOC)
20795 {
20796 if (!toc_initialized)
20797 {
20798 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20799 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20800 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20801 fprintf (asm_out_file, "\t.tc ");
20802 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20803 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20804 fprintf (asm_out_file, "\n");
20805
20806 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20807 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20808 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20809 fprintf (asm_out_file, " = .+32768\n");
20810 toc_initialized = 1;
20811 }
20812 else
20813 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20814 }
20815 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20816 {
20817 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20818 if (!toc_initialized)
20819 {
20820 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20821 toc_initialized = 1;
20822 }
20823 }
20824 else
20825 {
20826 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20827 if (!toc_initialized)
20828 {
20829 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20830 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20831 fprintf (asm_out_file, " = .+32768\n");
20832 toc_initialized = 1;
20833 }
20834 }
20835 }
20836
20837 /* Implement TARGET_ASM_INIT_SECTIONS. */
20838
20839 static void
20840 rs6000_elf_asm_init_sections (void)
20841 {
20842 toc_section
20843 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20844
20845 sdata2_section
20846 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20847 SDATA2_SECTION_ASM_OP);
20848 }
20849
20850 /* Implement TARGET_SELECT_RTX_SECTION. */
20851
20852 static section *
20853 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20854 unsigned HOST_WIDE_INT align)
20855 {
20856 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20857 return toc_section;
20858 else
20859 return default_elf_select_rtx_section (mode, x, align);
20860 }
20861 \f
20862 /* For a SYMBOL_REF, set generic flags and then perform some
20863 target-specific processing.
20864
20865 When the AIX ABI is requested on a non-AIX system, replace the
20866 function name with the real name (with a leading .) rather than the
20867 function descriptor name. This saves a lot of overriding code to
20868 read the prefixes. */
20869
20870 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20871 static void
20872 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20873 {
20874 default_encode_section_info (decl, rtl, first);
20875
20876 if (first
20877 && TREE_CODE (decl) == FUNCTION_DECL
20878 && !TARGET_AIX
20879 && DEFAULT_ABI == ABI_AIX)
20880 {
20881 rtx sym_ref = XEXP (rtl, 0);
20882 size_t len = strlen (XSTR (sym_ref, 0));
20883 char *str = XALLOCAVEC (char, len + 2);
20884 str[0] = '.';
20885 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20886 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20887 }
20888 }
20889
20890 static inline bool
20891 compare_section_name (const char *section, const char *templ)
20892 {
20893 int len;
20894
20895 len = strlen (templ);
20896 return (strncmp (section, templ, len) == 0
20897 && (section[len] == 0 || section[len] == '.'));
20898 }
20899
20900 bool
20901 rs6000_elf_in_small_data_p (const_tree decl)
20902 {
20903 if (rs6000_sdata == SDATA_NONE)
20904 return false;
20905
20906 /* We want to merge strings, so we never consider them small data. */
20907 if (TREE_CODE (decl) == STRING_CST)
20908 return false;
20909
20910 /* Functions are never in the small data area. */
20911 if (TREE_CODE (decl) == FUNCTION_DECL)
20912 return false;
20913
20914 if (VAR_P (decl) && DECL_SECTION_NAME (decl))
20915 {
20916 const char *section = DECL_SECTION_NAME (decl);
20917 if (compare_section_name (section, ".sdata")
20918 || compare_section_name (section, ".sdata2")
20919 || compare_section_name (section, ".gnu.linkonce.s")
20920 || compare_section_name (section, ".sbss")
20921 || compare_section_name (section, ".sbss2")
20922 || compare_section_name (section, ".gnu.linkonce.sb")
20923 || strcmp (section, ".PPC.EMB.sdata0") == 0
20924 || strcmp (section, ".PPC.EMB.sbss0") == 0)
20925 return true;
20926 }
20927 else
20928 {
20929 /* If we are told not to put readonly data in sdata, then don't. */
20930 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20931 && !rs6000_readonly_in_sdata)
20932 return false;
20933
20934 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20935
20936 if (size > 0
20937 && size <= g_switch_value
20938 /* If it's not public, and we're not going to reference it there,
20939 there's no need to put it in the small data section. */
20940 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20941 return true;
20942 }
20943
20944 return false;
20945 }
20946
20947 #endif /* USING_ELFOS_H */
20948 \f
20949 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20950
20951 static bool
20952 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20953 {
20954 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20955 }
20956
20957 /* Do not place thread-local symbols refs in the object blocks. */
20958
20959 static bool
20960 rs6000_use_blocks_for_decl_p (const_tree decl)
20961 {
20962 return !DECL_THREAD_LOCAL_P (decl);
20963 }
20964 \f
20965 /* Return a REG that occurs in ADDR with coefficient 1.
20966 ADDR can be effectively incremented by incrementing REG.
20967
20968 r0 is special and we must not select it as an address
20969 register by this routine since our caller will try to
20970 increment the returned register via an "la" instruction. */
20971
20972 rtx
20973 find_addr_reg (rtx addr)
20974 {
20975 while (GET_CODE (addr) == PLUS)
20976 {
20977 if (REG_P (XEXP (addr, 0))
20978 && REGNO (XEXP (addr, 0)) != 0)
20979 addr = XEXP (addr, 0);
20980 else if (REG_P (XEXP (addr, 1))
20981 && REGNO (XEXP (addr, 1)) != 0)
20982 addr = XEXP (addr, 1);
20983 else if (CONSTANT_P (XEXP (addr, 0)))
20984 addr = XEXP (addr, 1);
20985 else if (CONSTANT_P (XEXP (addr, 1)))
20986 addr = XEXP (addr, 0);
20987 else
20988 gcc_unreachable ();
20989 }
20990 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
20991 return addr;
20992 }
20993
20994 void
20995 rs6000_fatal_bad_address (rtx op)
20996 {
20997 fatal_insn ("bad address", op);
20998 }
20999
21000 #if TARGET_MACHO
21001
21002 vec<branch_island, va_gc> *branch_islands;
21003
21004 /* Remember to generate a branch island for far calls to the given
21005 function. */
21006
21007 static void
21008 add_compiler_branch_island (tree label_name, tree function_name,
21009 int line_number)
21010 {
21011 branch_island bi = {function_name, label_name, line_number};
21012 vec_safe_push (branch_islands, bi);
21013 }
21014
21015 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
21016 already there or not. */
21017
21018 static int
21019 no_previous_def (tree function_name)
21020 {
21021 branch_island *bi;
21022 unsigned ix;
21023
21024 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
21025 if (function_name == bi->function_name)
21026 return 0;
21027 return 1;
21028 }
21029
21030 /* GET_PREV_LABEL gets the label name from the previous definition of
21031 the function. */
21032
21033 static tree
21034 get_prev_label (tree function_name)
21035 {
21036 branch_island *bi;
21037 unsigned ix;
21038
21039 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
21040 if (function_name == bi->function_name)
21041 return bi->label_name;
21042 return NULL_TREE;
21043 }
21044
21045 /* Generate external symbol indirection stubs (PIC and non-PIC). */
21046
21047 void
21048 machopic_output_stub (FILE *file, const char *symb, const char *stub)
21049 {
21050 unsigned int length;
21051 char *symbol_name, *lazy_ptr_name;
21052 char *local_label_0;
21053 static unsigned label = 0;
21054
21055 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21056 symb = (*targetm.strip_name_encoding) (symb);
21057
21058 length = strlen (symb);
21059 symbol_name = XALLOCAVEC (char, length + 32);
21060 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
21061
21062 lazy_ptr_name = XALLOCAVEC (char, length + 32);
21063 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
21064
21065 if (MACHOPIC_PURE)
21066 {
21067 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
21068 fprintf (file, "\t.align 5\n");
21069
21070 fprintf (file, "%s:\n", stub);
21071 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21072
21073 label++;
21074 local_label_0 = XALLOCAVEC (char, 16);
21075 sprintf (local_label_0, "L%u$spb", label);
21076
21077 fprintf (file, "\tmflr r0\n");
21078 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
21079 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
21080 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
21081 lazy_ptr_name, local_label_0);
21082 fprintf (file, "\tmtlr r0\n");
21083 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
21084 (TARGET_64BIT ? "ldu" : "lwzu"),
21085 lazy_ptr_name, local_label_0);
21086 fprintf (file, "\tmtctr r12\n");
21087 fprintf (file, "\tbctr\n");
21088 }
21089 else /* mdynamic-no-pic or mkernel. */
21090 {
21091 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
21092 fprintf (file, "\t.align 4\n");
21093
21094 fprintf (file, "%s:\n", stub);
21095 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21096
21097 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
21098 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
21099 (TARGET_64BIT ? "ldu" : "lwzu"),
21100 lazy_ptr_name);
21101 fprintf (file, "\tmtctr r12\n");
21102 fprintf (file, "\tbctr\n");
21103 }
21104
21105 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
21106 fprintf (file, "%s:\n", lazy_ptr_name);
21107 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21108 fprintf (file, "%sdyld_stub_binding_helper\n",
21109 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
21110 }
21111
21112 /* Legitimize PIC addresses. If the address is already
21113 position-independent, we return ORIG. Newly generated
21114 position-independent addresses go into a reg. This is REG if non
21115 zero, otherwise we allocate register(s) as necessary. */
21116
21117 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
21118
21119 rtx
21120 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
21121 rtx reg)
21122 {
21123 rtx base, offset;
21124
21125 if (reg == NULL && !reload_completed)
21126 reg = gen_reg_rtx (Pmode);
21127
21128 if (GET_CODE (orig) == CONST)
21129 {
21130 rtx reg_temp;
21131
21132 if (GET_CODE (XEXP (orig, 0)) == PLUS
21133 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
21134 return orig;
21135
21136 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
21137
21138 /* Use a different reg for the intermediate value, as
21139 it will be marked UNCHANGING. */
21140 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
21141 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
21142 Pmode, reg_temp);
21143 offset =
21144 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
21145 Pmode, reg);
21146
21147 if (CONST_INT_P (offset))
21148 {
21149 if (SMALL_INT (offset))
21150 return plus_constant (Pmode, base, INTVAL (offset));
21151 else if (!reload_completed)
21152 offset = force_reg (Pmode, offset);
21153 else
21154 {
21155 rtx mem = force_const_mem (Pmode, orig);
21156 return machopic_legitimize_pic_address (mem, Pmode, reg);
21157 }
21158 }
21159 return gen_rtx_PLUS (Pmode, base, offset);
21160 }
21161
21162 /* Fall back on generic machopic code. */
21163 return machopic_legitimize_pic_address (orig, mode, reg);
21164 }
21165
21166 /* Output a .machine directive for the Darwin assembler, and call
21167 the generic start_file routine. */
21168
21169 static void
21170 rs6000_darwin_file_start (void)
21171 {
21172 static const struct
21173 {
21174 const char *arg;
21175 const char *name;
21176 HOST_WIDE_INT if_set;
21177 } mapping[] = {
21178 { "ppc64", "ppc64", MASK_64BIT },
21179 { "970", "ppc970", OPTION_MASK_PPC_GPOPT | OPTION_MASK_MFCRF \
21180 | MASK_POWERPC64 },
21181 { "power4", "ppc970", 0 },
21182 { "G5", "ppc970", 0 },
21183 { "7450", "ppc7450", 0 },
21184 { "7400", "ppc7400", OPTION_MASK_ALTIVEC },
21185 { "G4", "ppc7400", 0 },
21186 { "750", "ppc750", 0 },
21187 { "740", "ppc750", 0 },
21188 { "G3", "ppc750", 0 },
21189 { "604e", "ppc604e", 0 },
21190 { "604", "ppc604", 0 },
21191 { "603e", "ppc603", 0 },
21192 { "603", "ppc603", 0 },
21193 { "601", "ppc601", 0 },
21194 { NULL, "ppc", 0 } };
21195 const char *cpu_id = "";
21196 size_t i;
21197
21198 rs6000_file_start ();
21199 darwin_file_start ();
21200
21201 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
21202
21203 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
21204 cpu_id = rs6000_default_cpu;
21205
21206 if (OPTION_SET_P (rs6000_cpu_index))
21207 cpu_id = processor_target_table[rs6000_cpu_index].name;
21208
21209 /* Look through the mapping array. Pick the first name that either
21210 matches the argument, has a bit set in IF_SET that is also set
21211 in the target flags, or has a NULL name. */
21212
21213 i = 0;
21214 while (mapping[i].arg != NULL
21215 && strcmp (mapping[i].arg, cpu_id) != 0
21216 && (mapping[i].if_set & rs6000_isa_flags) == 0)
21217 i++;
21218
21219 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
21220 }
21221
21222 #endif /* TARGET_MACHO */
21223
21224 #if TARGET_ELF
21225 static int
21226 rs6000_elf_reloc_rw_mask (void)
21227 {
21228 if (flag_pic)
21229 return 3;
21230 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21231 return 2;
21232 else
21233 return 0;
21234 }
21235
21236 /* Record an element in the table of global constructors. SYMBOL is
21237 a SYMBOL_REF of the function to be called; PRIORITY is a number
21238 between 0 and MAX_INIT_PRIORITY.
21239
21240 This differs from default_named_section_asm_out_constructor in
21241 that we have special handling for -mrelocatable. */
21242
21243 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
21244 static void
21245 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
21246 {
21247 const char *section = ".ctors";
21248 char buf[18];
21249
21250 if (priority != DEFAULT_INIT_PRIORITY)
21251 {
21252 sprintf (buf, ".ctors.%.5u",
21253 /* Invert the numbering so the linker puts us in the proper
21254 order; constructors are run from right to left, and the
21255 linker sorts in increasing order. */
21256 MAX_INIT_PRIORITY - priority);
21257 section = buf;
21258 }
21259
21260 switch_to_section (get_section (section, SECTION_WRITE, NULL));
21261 assemble_align (POINTER_SIZE);
21262
21263 if (DEFAULT_ABI == ABI_V4
21264 && (TARGET_RELOCATABLE || flag_pic > 1))
21265 {
21266 fputs ("\t.long (", asm_out_file);
21267 output_addr_const (asm_out_file, symbol);
21268 fputs (")@fixup\n", asm_out_file);
21269 }
21270 else
21271 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
21272 }
21273
21274 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
21275 static void
21276 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
21277 {
21278 const char *section = ".dtors";
21279 char buf[18];
21280
21281 if (priority != DEFAULT_INIT_PRIORITY)
21282 {
21283 sprintf (buf, ".dtors.%.5u",
21284 /* Invert the numbering so the linker puts us in the proper
21285 order; constructors are run from right to left, and the
21286 linker sorts in increasing order. */
21287 MAX_INIT_PRIORITY - priority);
21288 section = buf;
21289 }
21290
21291 switch_to_section (get_section (section, SECTION_WRITE, NULL));
21292 assemble_align (POINTER_SIZE);
21293
21294 if (DEFAULT_ABI == ABI_V4
21295 && (TARGET_RELOCATABLE || flag_pic > 1))
21296 {
21297 fputs ("\t.long (", asm_out_file);
21298 output_addr_const (asm_out_file, symbol);
21299 fputs (")@fixup\n", asm_out_file);
21300 }
21301 else
21302 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
21303 }
21304
21305 void
21306 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
21307 {
21308 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
21309 {
21310 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
21311 ASM_OUTPUT_LABEL (file, name);
21312 fputs (DOUBLE_INT_ASM_OP, file);
21313 rs6000_output_function_entry (file, name);
21314 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
21315 if (DOT_SYMBOLS)
21316 {
21317 fputs ("\t.size\t", file);
21318 assemble_name (file, name);
21319 fputs (",24\n\t.type\t.", file);
21320 assemble_name (file, name);
21321 fputs (",@function\n", file);
21322 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
21323 {
21324 fputs ("\t.globl\t.", file);
21325 assemble_name (file, name);
21326 putc ('\n', file);
21327 }
21328 }
21329 else
21330 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21331 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21332 rs6000_output_function_entry (file, name);
21333 fputs (":\n", file);
21334 assemble_function_label_final ();
21335 return;
21336 }
21337
21338 int uses_toc;
21339 if (DEFAULT_ABI == ABI_V4
21340 && (TARGET_RELOCATABLE || flag_pic > 1)
21341 && !TARGET_SECURE_PLT
21342 && (!constant_pool_empty_p () || crtl->profile)
21343 && (uses_toc = uses_TOC ()))
21344 {
21345 char buf[256];
21346
21347 if (uses_toc == 2)
21348 switch_to_other_text_partition ();
21349 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
21350
21351 fprintf (file, "\t.long ");
21352 assemble_name (file, toc_label_name);
21353 need_toc_init = 1;
21354 putc ('-', file);
21355 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21356 assemble_name (file, buf);
21357 putc ('\n', file);
21358 if (uses_toc == 2)
21359 switch_to_other_text_partition ();
21360 }
21361
21362 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21363 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21364
21365 if (TARGET_CMODEL == CMODEL_LARGE
21366 && rs6000_global_entry_point_prologue_needed_p ())
21367 {
21368 char buf[256];
21369
21370 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
21371
21372 fprintf (file, "\t.quad .TOC.-");
21373 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21374 assemble_name (file, buf);
21375 putc ('\n', file);
21376 }
21377
21378 if (DEFAULT_ABI == ABI_AIX)
21379 {
21380 const char *desc_name, *orig_name;
21381
21382 orig_name = (*targetm.strip_name_encoding) (name);
21383 desc_name = orig_name;
21384 while (*desc_name == '.')
21385 desc_name++;
21386
21387 if (TREE_PUBLIC (decl))
21388 fprintf (file, "\t.globl %s\n", desc_name);
21389
21390 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
21391 fprintf (file, "%s:\n", desc_name);
21392 fprintf (file, "\t.long %s\n", orig_name);
21393 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
21394 fputs ("\t.long 0\n", file);
21395 fprintf (file, "\t.previous\n");
21396 }
21397 ASM_OUTPUT_FUNCTION_LABEL (file, name, decl);
21398 }
21399
21400 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
21401 static void
21402 rs6000_elf_file_end (void)
21403 {
21404 #ifdef HAVE_AS_GNU_ATTRIBUTE
21405 /* ??? The value emitted depends on options active at file end.
21406 Assume anyone using #pragma or attributes that might change
21407 options knows what they are doing. */
21408 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
21409 && rs6000_passes_float)
21410 {
21411 int fp;
21412
21413 if (TARGET_HARD_FLOAT)
21414 fp = 1;
21415 else
21416 fp = 2;
21417 if (rs6000_passes_long_double)
21418 {
21419 if (!TARGET_LONG_DOUBLE_128)
21420 fp |= 2 * 4;
21421 else if (TARGET_IEEEQUAD)
21422 fp |= 3 * 4;
21423 else
21424 fp |= 1 * 4;
21425 }
21426 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
21427 }
21428 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
21429 {
21430 if (rs6000_passes_vector)
21431 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
21432 (TARGET_ALTIVEC_ABI ? 2 : 1));
21433 if (rs6000_returns_struct)
21434 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
21435 aix_struct_return ? 2 : 1);
21436 }
21437 #endif
21438 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21439 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
21440 file_end_indicate_exec_stack ();
21441 #endif
21442
21443 if (flag_split_stack)
21444 file_end_indicate_split_stack ();
21445
21446 if (cpu_builtin_p)
21447 {
21448 /* We have expanded a CPU builtin, so we need to emit a reference to
21449 the special symbol that LIBC uses to declare it supports the
21450 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21451 switch_to_section (data_section);
21452 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
21453 fprintf (asm_out_file, "\t%s %s\n",
21454 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
21455 }
21456 }
21457 #endif
21458
21459 #if TARGET_XCOFF
21460
21461 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21462 #define HAVE_XCOFF_DWARF_EXTRAS 0
21463 #endif
21464
21465
21466 /* Names of bss and data sections. These should be unique names for each
21467 compilation unit. */
21468
21469 char *xcoff_bss_section_name;
21470 char *xcoff_private_data_section_name;
21471 char *xcoff_private_rodata_section_name;
21472 char *xcoff_tls_data_section_name;
21473 char *xcoff_read_only_section_name;
21474
21475 static enum unwind_info_type
21476 rs6000_xcoff_debug_unwind_info (void)
21477 {
21478 return UI_NONE;
21479 }
21480
21481 static void
21482 rs6000_xcoff_asm_output_anchor (rtx symbol)
21483 {
21484 char buffer[100];
21485
21486 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
21487 SYMBOL_REF_BLOCK_OFFSET (symbol));
21488 fprintf (asm_out_file, "%s", SET_ASM_OP);
21489 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
21490 fprintf (asm_out_file, ",");
21491 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
21492 fprintf (asm_out_file, "\n");
21493 }
21494
21495 static void
21496 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
21497 {
21498 fputs (GLOBAL_ASM_OP, stream);
21499 RS6000_OUTPUT_BASENAME (stream, name);
21500 putc ('\n', stream);
21501 }
21502
21503 /* A get_unnamed_decl callback, used for read-only sections. PTR
21504 points to the section string variable. */
21505
21506 static void
21507 rs6000_xcoff_output_readonly_section_asm_op (const char *directive)
21508 {
21509 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
21510 directive
21511 ? xcoff_private_rodata_section_name
21512 : xcoff_read_only_section_name,
21513 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21514 }
21515
21516 /* Likewise for read-write sections. */
21517
21518 static void
21519 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21520 {
21521 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
21522 xcoff_private_data_section_name,
21523 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21524 }
21525
21526 static void
21527 rs6000_xcoff_output_tls_section_asm_op (const char *directive)
21528 {
21529 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
21530 directive
21531 ? xcoff_private_data_section_name
21532 : xcoff_tls_data_section_name,
21533 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21534 }
21535
21536 /* A get_unnamed_section callback, used for switching to toc_section. */
21537
21538 static void
21539 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
21540 {
21541 if (TARGET_MINIMAL_TOC)
21542 {
21543 /* toc_section is always selected at least once from
21544 rs6000_xcoff_file_start, so this is guaranteed to
21545 always be defined once and only once in each file. */
21546 if (!toc_initialized)
21547 {
21548 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
21549 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
21550 toc_initialized = 1;
21551 }
21552 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
21553 (TARGET_32BIT ? "" : ",3"));
21554 }
21555 else
21556 fputs ("\t.toc\n", asm_out_file);
21557 }
21558
21559 /* Implement TARGET_ASM_INIT_SECTIONS. */
21560
21561 static void
21562 rs6000_xcoff_asm_init_sections (void)
21563 {
21564 read_only_data_section
21565 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21566 NULL);
21567
21568 private_data_section
21569 = get_unnamed_section (SECTION_WRITE,
21570 rs6000_xcoff_output_readwrite_section_asm_op,
21571 NULL);
21572
21573 read_only_private_data_section
21574 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21575 "");
21576
21577 tls_data_section
21578 = get_unnamed_section (SECTION_TLS,
21579 rs6000_xcoff_output_tls_section_asm_op,
21580 NULL);
21581
21582 tls_private_data_section
21583 = get_unnamed_section (SECTION_TLS,
21584 rs6000_xcoff_output_tls_section_asm_op,
21585 "");
21586
21587 toc_section
21588 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
21589
21590 readonly_data_section = read_only_data_section;
21591 }
21592
21593 static int
21594 rs6000_xcoff_reloc_rw_mask (void)
21595 {
21596 return 3;
21597 }
21598
21599 static void
21600 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
21601 tree decl ATTRIBUTE_UNUSED)
21602 {
21603 int smclass;
21604 static const char * const suffix[7]
21605 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21606
21607 if (flags & SECTION_EXCLUDE)
21608 smclass = 6;
21609 else if (flags & SECTION_DEBUG)
21610 {
21611 fprintf (asm_out_file, "\t.dwsect %s\n", name);
21612 return;
21613 }
21614 else if (flags & SECTION_CODE)
21615 smclass = 0;
21616 else if (flags & SECTION_TLS)
21617 {
21618 if (flags & SECTION_BSS)
21619 smclass = 5;
21620 else
21621 smclass = 4;
21622 }
21623 else if (flags & SECTION_WRITE)
21624 {
21625 if (flags & SECTION_BSS)
21626 smclass = 3;
21627 else
21628 smclass = 2;
21629 }
21630 else
21631 smclass = 1;
21632
21633 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
21634 (flags & SECTION_CODE) ? "." : "",
21635 name, suffix[smclass], flags & SECTION_ENTSIZE);
21636 }
21637
21638 #define IN_NAMED_SECTION(DECL) \
21639 ((TREE_CODE (DECL) == FUNCTION_DECL || VAR_P (DECL)) \
21640 && DECL_SECTION_NAME (DECL) != NULL)
21641
21642 static section *
21643 rs6000_xcoff_select_section (tree decl, int reloc,
21644 unsigned HOST_WIDE_INT align)
21645 {
21646 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21647 named section. */
21648 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
21649 {
21650 resolve_unique_section (decl, reloc, true);
21651 if (IN_NAMED_SECTION (decl))
21652 return get_named_section (decl, NULL, reloc);
21653 }
21654
21655 if (decl_readonly_section (decl, reloc))
21656 {
21657 if (TREE_PUBLIC (decl))
21658 return read_only_data_section;
21659 else
21660 return read_only_private_data_section;
21661 }
21662 else
21663 {
21664 #if HAVE_AS_TLS
21665 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21666 {
21667 if (bss_initializer_p (decl))
21668 return tls_comm_section;
21669 else if (TREE_PUBLIC (decl))
21670 return tls_data_section;
21671 else
21672 return tls_private_data_section;
21673 }
21674 else
21675 #endif
21676 if (TREE_PUBLIC (decl))
21677 return data_section;
21678 else
21679 return private_data_section;
21680 }
21681 }
21682
21683 static void
21684 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
21685 {
21686 const char *name;
21687
21688 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
21689 name = (*targetm.strip_name_encoding) (name);
21690 set_decl_section_name (decl, name);
21691 }
21692
21693 /* Select section for constant in constant pool.
21694
21695 On RS/6000, all constants are in the private read-only data area.
21696 However, if this is being placed in the TOC it must be output as a
21697 toc entry. */
21698
21699 static section *
21700 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
21701 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
21702 {
21703 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
21704 return toc_section;
21705 else
21706 return read_only_private_data_section;
21707 }
21708
21709 /* Remove any trailing [DS] or the like from the symbol name. */
21710
21711 static const char *
21712 rs6000_xcoff_strip_name_encoding (const char *name)
21713 {
21714 size_t len;
21715 if (*name == '*')
21716 name++;
21717 len = strlen (name);
21718 if (name[len - 1] == ']')
21719 return ggc_alloc_string (name, len - 4);
21720 else
21721 return name;
21722 }
21723
21724 /* Section attributes. AIX is always PIC. */
21725
21726 static unsigned int
21727 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
21728 {
21729 unsigned int align;
21730 unsigned int flags = default_section_type_flags (decl, name, reloc);
21731
21732 if (decl && DECL_P (decl) && VAR_P (decl) && bss_initializer_p (decl))
21733 flags |= SECTION_BSS;
21734
21735 /* Align to at least UNIT size. */
21736 if (!decl || !DECL_P (decl))
21737 align = MIN_UNITS_PER_WORD;
21738 /* Align code CSECT to at least 32 bytes. */
21739 else if ((flags & SECTION_CODE) != 0)
21740 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 32);
21741 else
21742 /* Increase alignment of large objects if not already stricter. */
21743 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21744 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21745 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21746
21747 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21748 }
21749
21750 /* Output at beginning of assembler file.
21751
21752 Initialize the section names for the RS/6000 at this point.
21753
21754 Specify filename, including full path, to assembler.
21755
21756 We want to go into the TOC section so at least one .toc will be emitted.
21757 Also, in order to output proper .bs/.es pairs, we need at least one static
21758 [RW] section emitted.
21759
21760 Finally, declare mcount when profiling to make the assembler happy. */
21761
21762 static void
21763 rs6000_xcoff_file_start (void)
21764 {
21765 rs6000_gen_section_name (&xcoff_bss_section_name,
21766 main_input_filename, ".bss_");
21767 rs6000_gen_section_name (&xcoff_private_data_section_name,
21768 main_input_filename, ".rw_");
21769 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21770 main_input_filename, ".rop_");
21771 rs6000_gen_section_name (&xcoff_read_only_section_name,
21772 main_input_filename, ".ro_");
21773 rs6000_gen_section_name (&xcoff_tls_data_section_name,
21774 main_input_filename, ".tls_");
21775
21776 fputs ("\t.file\t", asm_out_file);
21777 output_quoted_string (asm_out_file, main_input_filename);
21778 fputc ('\n', asm_out_file);
21779 if (write_symbols != NO_DEBUG)
21780 switch_to_section (private_data_section);
21781 switch_to_section (toc_section);
21782 switch_to_section (text_section);
21783 if (profile_flag)
21784 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21785 rs6000_file_start ();
21786 }
21787
21788 /* Output at end of assembler file.
21789 On the RS/6000, referencing data should automatically pull in text. */
21790
21791 static void
21792 rs6000_xcoff_file_end (void)
21793 {
21794 switch_to_section (text_section);
21795 if (xcoff_tls_exec_model_detected)
21796 {
21797 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21798 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file);
21799 }
21800 fputs ("_section_.text:\n", asm_out_file);
21801 switch_to_section (data_section);
21802 fputs (TARGET_32BIT
21803 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21804 asm_out_file);
21805
21806 }
21807
21808 struct declare_alias_data
21809 {
21810 FILE *file;
21811 bool function_descriptor;
21812 };
21813
21814 /* Declare alias N. A helper function for for_node_and_aliases. */
21815
21816 static bool
21817 rs6000_declare_alias (struct symtab_node *n, void *d)
21818 {
21819 struct declare_alias_data *data = (struct declare_alias_data *)d;
21820 /* Main symbol is output specially, because varasm machinery does part of
21821 the job for us - we do not need to declare .globl/lglobs and such. */
21822 if (!n->alias || n->weakref)
21823 return false;
21824
21825 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21826 return false;
21827
21828 /* Prevent assemble_alias from trying to use .set pseudo operation
21829 that does not behave as expected by the middle-end. */
21830 TREE_ASM_WRITTEN (n->decl) = true;
21831
21832 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21833 char *buffer = (char *) alloca (strlen (name) + 2);
21834 char *p;
21835 int dollar_inside = 0;
21836
21837 strcpy (buffer, name);
21838 p = strchr (buffer, '$');
21839 while (p) {
21840 *p = '_';
21841 dollar_inside++;
21842 p = strchr (p + 1, '$');
21843 }
21844 if (TREE_PUBLIC (n->decl))
21845 {
21846 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21847 {
21848 if (dollar_inside) {
21849 if (data->function_descriptor)
21850 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21851 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21852 }
21853 if (data->function_descriptor)
21854 {
21855 fputs ("\t.globl .", data->file);
21856 RS6000_OUTPUT_BASENAME (data->file, buffer);
21857 putc ('\n', data->file);
21858 }
21859 fputs ("\t.globl ", data->file);
21860 assemble_name (data->file, buffer);
21861 putc ('\n', data->file);
21862 }
21863 #ifdef ASM_WEAKEN_DECL
21864 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21865 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21866 #endif
21867 }
21868 else
21869 {
21870 if (dollar_inside)
21871 {
21872 if (data->function_descriptor)
21873 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21874 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21875 }
21876 if (data->function_descriptor)
21877 {
21878 fputs ("\t.lglobl .", data->file);
21879 RS6000_OUTPUT_BASENAME (data->file, buffer);
21880 putc ('\n', data->file);
21881 }
21882 fputs ("\t.lglobl ", data->file);
21883 assemble_name (data->file, buffer);
21884 putc ('\n', data->file);
21885 }
21886 if (data->function_descriptor)
21887 putc ('.', data->file);
21888 ASM_OUTPUT_LABEL (data->file, buffer);
21889 return false;
21890 }
21891
21892
21893 #ifdef HAVE_GAS_HIDDEN
21894 /* Helper function to calculate visibility of a DECL
21895 and return the value as a const string. */
21896
21897 static const char *
21898 rs6000_xcoff_visibility (tree decl)
21899 {
21900 static const char * const visibility_types[] = {
21901 "", ",protected", ",hidden", ",internal"
21902 };
21903
21904 enum symbol_visibility vis = DECL_VISIBILITY (decl);
21905 return visibility_types[vis];
21906 }
21907 #endif
21908
21909
21910 /* This macro produces the initial definition of a function name.
21911 On the RS/6000, we need to place an extra '.' in the function name and
21912 output the function descriptor.
21913 Dollar signs are converted to underscores.
21914
21915 The csect for the function will have already been created when
21916 text_section was selected. We do have to go back to that csect, however.
21917
21918 The third and fourth parameters to the .function pseudo-op (16 and 044)
21919 are placeholders which no longer have any use.
21920
21921 Because AIX assembler's .set command has unexpected semantics, we output
21922 all aliases as alternative labels in front of the definition. */
21923
21924 void
21925 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21926 {
21927 char *buffer = (char *) alloca (strlen (name) + 1);
21928 char *p;
21929 int dollar_inside = 0;
21930 struct declare_alias_data data = {file, false};
21931
21932 strcpy (buffer, name);
21933 p = strchr (buffer, '$');
21934 while (p) {
21935 *p = '_';
21936 dollar_inside++;
21937 p = strchr (p + 1, '$');
21938 }
21939 if (TREE_PUBLIC (decl))
21940 {
21941 if (!RS6000_WEAK || !DECL_WEAK (decl))
21942 {
21943 if (dollar_inside) {
21944 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21945 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21946 }
21947 fputs ("\t.globl .", file);
21948 RS6000_OUTPUT_BASENAME (file, buffer);
21949 #ifdef HAVE_GAS_HIDDEN
21950 fputs (rs6000_xcoff_visibility (decl), file);
21951 #endif
21952 putc ('\n', file);
21953 }
21954 }
21955 else
21956 {
21957 if (dollar_inside) {
21958 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21959 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21960 }
21961 fputs ("\t.lglobl .", file);
21962 RS6000_OUTPUT_BASENAME (file, buffer);
21963 putc ('\n', file);
21964 }
21965
21966 fputs ("\t.csect ", file);
21967 assemble_name (file, buffer);
21968 fputs (TARGET_32BIT ? "\n" : ",3\n", file);
21969
21970 ASM_OUTPUT_FUNCTION_LABEL (file, buffer, decl);
21971
21972 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21973 &data, true);
21974 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
21975 RS6000_OUTPUT_BASENAME (file, buffer);
21976 fputs (", TOC[tc0], 0\n", file);
21977
21978 in_section = NULL;
21979 switch_to_section (function_section (decl));
21980 putc ('.', file);
21981 ASM_OUTPUT_LABEL (file, buffer);
21982
21983 data.function_descriptor = true;
21984 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21985 &data, true);
21986 if (!DECL_IGNORED_P (decl))
21987 {
21988 if (dwarf_debuginfo_p ())
21989 {
21990 name = (*targetm.strip_name_encoding) (name);
21991 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
21992 }
21993 }
21994 return;
21995 }
21996
21997
21998 /* Output assembly language to globalize a symbol from a DECL,
21999 possibly with visibility. */
22000
22001 void
22002 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
22003 {
22004 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
22005 fputs (GLOBAL_ASM_OP, stream);
22006 assemble_name (stream, name);
22007 #ifdef HAVE_GAS_HIDDEN
22008 fputs (rs6000_xcoff_visibility (decl), stream);
22009 #endif
22010 putc ('\n', stream);
22011 }
22012
22013 /* Output assembly language to define a symbol as COMMON from a DECL,
22014 possibly with visibility. */
22015
22016 void
22017 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
22018 tree decl ATTRIBUTE_UNUSED,
22019 const char *name,
22020 unsigned HOST_WIDE_INT size,
22021 unsigned int align)
22022 {
22023 unsigned int align2 = 2;
22024
22025 if (align == 0)
22026 align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), DECL_ALIGN (decl));
22027
22028 if (align > 32)
22029 align2 = floor_log2 (align / BITS_PER_UNIT);
22030 else if (size > 4)
22031 align2 = 3;
22032
22033 if (! DECL_COMMON (decl))
22034 {
22035 /* Forget section. */
22036 in_section = NULL;
22037
22038 /* Globalize TLS BSS. */
22039 if (TREE_PUBLIC (decl) && DECL_THREAD_LOCAL_P (decl))
22040 {
22041 fputs (GLOBAL_ASM_OP, stream);
22042 assemble_name (stream, name);
22043 fputc ('\n', stream);
22044 }
22045
22046 /* Switch to section and skip space. */
22047 fputs ("\t.csect ", stream);
22048 assemble_name (stream, name);
22049 fprintf (stream, ",%u\n", align2);
22050 ASM_DECLARE_OBJECT_NAME (stream, name, decl);
22051 ASM_OUTPUT_SKIP (stream, size ? size : 1);
22052 return;
22053 }
22054
22055 if (TREE_PUBLIC (decl))
22056 {
22057 fprintf (stream,
22058 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%u" ,
22059 name, size, align2);
22060
22061 #ifdef HAVE_GAS_HIDDEN
22062 if (decl != NULL)
22063 fputs (rs6000_xcoff_visibility (decl), stream);
22064 #endif
22065 putc ('\n', stream);
22066 }
22067 else
22068 fprintf (stream,
22069 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%s,%u\n",
22070 (*targetm.strip_name_encoding) (name), size, name, align2);
22071 }
22072
22073 /* This macro produces the initial definition of a object (variable) name.
22074 Because AIX assembler's .set command has unexpected semantics, we output
22075 all aliases as alternative labels in front of the definition. */
22076
22077 void
22078 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
22079 {
22080 struct declare_alias_data data = {file, false};
22081 ASM_OUTPUT_LABEL (file, name);
22082 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
22083 &data, true);
22084 }
22085
22086 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
22087
22088 void
22089 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
22090 {
22091 fputs (integer_asm_op (size, FALSE), file);
22092 assemble_name (file, label);
22093 fputs ("-$", file);
22094 }
22095
22096 /* Output a symbol offset relative to the dbase for the current object.
22097 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
22098 signed offsets.
22099
22100 __gcc_unwind_dbase is embedded in all executables/libraries through
22101 libgcc/config/rs6000/crtdbase.S. */
22102
22103 void
22104 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
22105 {
22106 fputs (integer_asm_op (size, FALSE), file);
22107 assemble_name (file, label);
22108 fputs("-__gcc_unwind_dbase", file);
22109 }
22110
22111 #ifdef HAVE_AS_TLS
22112 static void
22113 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
22114 {
22115 rtx symbol;
22116 int flags;
22117 const char *symname;
22118
22119 default_encode_section_info (decl, rtl, first);
22120
22121 /* Careful not to prod global register variables. */
22122 if (!MEM_P (rtl))
22123 return;
22124 symbol = XEXP (rtl, 0);
22125 if (!SYMBOL_REF_P (symbol))
22126 return;
22127
22128 flags = SYMBOL_REF_FLAGS (symbol);
22129
22130 if (VAR_P (decl) && DECL_THREAD_LOCAL_P (decl))
22131 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
22132
22133 SYMBOL_REF_FLAGS (symbol) = flags;
22134
22135 symname = XSTR (symbol, 0);
22136
22137 /* Append CSECT mapping class, unless the symbol already is qualified.
22138 Aliases are implemented as labels, so the symbol name should not add
22139 a mapping class. */
22140 if (decl
22141 && DECL_P (decl)
22142 && VAR_OR_FUNCTION_DECL_P (decl)
22143 && (symtab_node::get (decl) == NULL
22144 || symtab_node::get (decl)->alias == 0)
22145 && symname[strlen (symname) - 1] != ']')
22146 {
22147 const char *smclass = NULL;
22148
22149 if (TREE_CODE (decl) == FUNCTION_DECL)
22150 smclass = "[DS]";
22151 else if (DECL_THREAD_LOCAL_P (decl))
22152 {
22153 if (bss_initializer_p (decl))
22154 smclass = "[UL]";
22155 else if (flag_data_sections)
22156 smclass = "[TL]";
22157 }
22158 else if (DECL_EXTERNAL (decl))
22159 smclass = "[UA]";
22160 else if (bss_initializer_p (decl))
22161 smclass = "[BS]";
22162 else if (flag_data_sections)
22163 {
22164 /* This must exactly match the logic of select section. */
22165 if (decl_readonly_section (decl, compute_reloc_for_var (decl)))
22166 smclass = "[RO]";
22167 else
22168 smclass = "[RW]";
22169 }
22170
22171 if (smclass != NULL)
22172 {
22173 char *newname = XALLOCAVEC (char, strlen (symname) + 5);
22174
22175 strcpy (newname, symname);
22176 strcat (newname, smclass);
22177 XSTR (symbol, 0) = ggc_strdup (newname);
22178 }
22179 }
22180 }
22181 #endif /* HAVE_AS_TLS */
22182 #endif /* TARGET_XCOFF */
22183
22184 void
22185 rs6000_asm_weaken_decl (FILE *stream, tree decl,
22186 const char *name, const char *val)
22187 {
22188 fputs ("\t.weak\t", stream);
22189 assemble_name (stream, name);
22190 if (decl && TREE_CODE (decl) == FUNCTION_DECL
22191 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
22192 {
22193 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22194 if (TARGET_XCOFF)
22195 fputs (rs6000_xcoff_visibility (decl), stream);
22196 #endif
22197 fputs ("\n\t.weak\t.", stream);
22198 RS6000_OUTPUT_BASENAME (stream, name);
22199 }
22200 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22201 if (TARGET_XCOFF)
22202 fputs (rs6000_xcoff_visibility (decl), stream);
22203 #endif
22204 fputc ('\n', stream);
22205
22206 if (val)
22207 {
22208 #ifdef ASM_OUTPUT_DEF
22209 ASM_OUTPUT_DEF (stream, name, val);
22210 #endif
22211 if (decl && TREE_CODE (decl) == FUNCTION_DECL
22212 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
22213 {
22214 fputs ("\t.set\t.", stream);
22215 RS6000_OUTPUT_BASENAME (stream, name);
22216 fputs (",.", stream);
22217 RS6000_OUTPUT_BASENAME (stream, val);
22218 fputc ('\n', stream);
22219 }
22220 }
22221 }
22222
22223
22224 /* Return true if INSN should not be copied. */
22225
22226 static bool
22227 rs6000_cannot_copy_insn_p (rtx_insn *insn)
22228 {
22229 return recog_memoized (insn) >= 0
22230 && get_attr_cannot_copy (insn);
22231 }
22232
22233 /* Compute a (partial) cost for rtx X. Return true if the complete
22234 cost has been computed, and false if subexpressions should be
22235 scanned. In either case, *TOTAL contains the cost result. */
22236
22237 static bool
22238 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
22239 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
22240 {
22241 int code = GET_CODE (x);
22242
22243 switch (code)
22244 {
22245 /* On the RS/6000, if it is valid in the insn, it is free. */
22246 case CONST_INT:
22247 if (((outer_code == SET
22248 || outer_code == PLUS
22249 || outer_code == MINUS)
22250 && (satisfies_constraint_I (x)
22251 || satisfies_constraint_L (x)))
22252 || (outer_code == AND
22253 && (satisfies_constraint_K (x)
22254 || (mode == SImode
22255 ? satisfies_constraint_L (x)
22256 : satisfies_constraint_J (x))))
22257 || ((outer_code == IOR || outer_code == XOR)
22258 && (satisfies_constraint_K (x)
22259 || (mode == SImode
22260 ? satisfies_constraint_L (x)
22261 : satisfies_constraint_J (x))))
22262 || outer_code == ASHIFT
22263 || outer_code == ASHIFTRT
22264 || outer_code == LSHIFTRT
22265 || outer_code == ROTATE
22266 || outer_code == ROTATERT
22267 || outer_code == ZERO_EXTRACT
22268 || (outer_code == MULT
22269 && satisfies_constraint_I (x))
22270 || ((outer_code == DIV || outer_code == UDIV
22271 || outer_code == MOD || outer_code == UMOD)
22272 && exact_log2 (INTVAL (x)) >= 0)
22273 || (outer_code == COMPARE
22274 && (satisfies_constraint_I (x)
22275 || satisfies_constraint_K (x)))
22276 || ((outer_code == EQ || outer_code == NE)
22277 && (satisfies_constraint_I (x)
22278 || satisfies_constraint_K (x)
22279 || (mode == SImode
22280 ? satisfies_constraint_L (x)
22281 : satisfies_constraint_J (x))))
22282 || (outer_code == GTU
22283 && satisfies_constraint_I (x))
22284 || (outer_code == LTU
22285 && satisfies_constraint_P (x)))
22286 {
22287 *total = 0;
22288 return true;
22289 }
22290 else if ((outer_code == PLUS
22291 && reg_or_add_cint_operand (x, mode))
22292 || (outer_code == MINUS
22293 && reg_or_sub_cint_operand (x, mode))
22294 || ((outer_code == SET
22295 || outer_code == IOR
22296 || outer_code == XOR)
22297 && (INTVAL (x)
22298 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
22299 {
22300 *total = COSTS_N_INSNS (1);
22301 return true;
22302 }
22303 /* FALLTHRU */
22304
22305 case CONST_DOUBLE:
22306 case CONST_WIDE_INT:
22307 case CONST:
22308 case HIGH:
22309 case SYMBOL_REF:
22310 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22311 return true;
22312
22313 case MEM:
22314 /* When optimizing for size, MEM should be slightly more expensive
22315 than generating address, e.g., (plus (reg) (const)).
22316 L1 cache latency is about two instructions. */
22317 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22318 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
22319 *total += COSTS_N_INSNS (100);
22320 return true;
22321
22322 case LABEL_REF:
22323 *total = 0;
22324 return true;
22325
22326 case PLUS:
22327 case MINUS:
22328 if (FLOAT_MODE_P (mode))
22329 *total = rs6000_cost->fp;
22330 else
22331 *total = COSTS_N_INSNS (1);
22332 return false;
22333
22334 case MULT:
22335 if (CONST_INT_P (XEXP (x, 1))
22336 && satisfies_constraint_I (XEXP (x, 1)))
22337 {
22338 if (INTVAL (XEXP (x, 1)) >= -256
22339 && INTVAL (XEXP (x, 1)) <= 255)
22340 *total = rs6000_cost->mulsi_const9;
22341 else
22342 *total = rs6000_cost->mulsi_const;
22343 }
22344 else if (mode == SFmode)
22345 *total = rs6000_cost->fp;
22346 else if (FLOAT_MODE_P (mode))
22347 *total = rs6000_cost->dmul;
22348 else if (mode == DImode)
22349 *total = rs6000_cost->muldi;
22350 else
22351 *total = rs6000_cost->mulsi;
22352 return false;
22353
22354 case FMA:
22355 if (mode == SFmode)
22356 *total = rs6000_cost->fp;
22357 else
22358 *total = rs6000_cost->dmul;
22359 break;
22360
22361 case DIV:
22362 case MOD:
22363 if (FLOAT_MODE_P (mode))
22364 {
22365 *total = mode == DFmode ? rs6000_cost->ddiv
22366 : rs6000_cost->sdiv;
22367 return false;
22368 }
22369 /* FALLTHRU */
22370
22371 case UDIV:
22372 case UMOD:
22373 if (CONST_INT_P (XEXP (x, 1))
22374 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
22375 {
22376 if (code == DIV || code == MOD)
22377 /* Shift, addze */
22378 *total = COSTS_N_INSNS (2);
22379 else
22380 /* Shift */
22381 *total = COSTS_N_INSNS (1);
22382 }
22383 else
22384 {
22385 if (GET_MODE (XEXP (x, 1)) == DImode)
22386 *total = rs6000_cost->divdi;
22387 else
22388 *total = rs6000_cost->divsi;
22389 }
22390 /* Add in shift and subtract for MOD unless we have a mod instruction. */
22391 if ((!TARGET_MODULO
22392 || (RS6000_DISABLE_SCALAR_MODULO && SCALAR_INT_MODE_P (mode)))
22393 && (code == MOD || code == UMOD))
22394 *total += COSTS_N_INSNS (2);
22395 return false;
22396
22397 case CTZ:
22398 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
22399 return false;
22400
22401 case FFS:
22402 *total = COSTS_N_INSNS (4);
22403 return false;
22404
22405 case POPCOUNT:
22406 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
22407 return false;
22408
22409 case PARITY:
22410 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
22411 return false;
22412
22413 case NOT:
22414 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
22415 *total = 0;
22416 else
22417 *total = COSTS_N_INSNS (1);
22418 return false;
22419
22420 case AND:
22421 if (CONST_INT_P (XEXP (x, 1)))
22422 {
22423 rtx left = XEXP (x, 0);
22424 rtx_code left_code = GET_CODE (left);
22425
22426 /* rotate-and-mask: 1 insn. */
22427 if ((left_code == ROTATE
22428 || left_code == ASHIFT
22429 || left_code == LSHIFTRT)
22430 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
22431 {
22432 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
22433 if (!CONST_INT_P (XEXP (left, 1)))
22434 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
22435 *total += COSTS_N_INSNS (1);
22436 return true;
22437 }
22438
22439 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22440 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
22441 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
22442 || (val & 0xffff) == val
22443 || (val & 0xffff0000) == val
22444 || ((val & 0xffff) == 0 && mode == SImode))
22445 {
22446 *total = rtx_cost (left, mode, AND, 0, speed);
22447 *total += COSTS_N_INSNS (1);
22448 return true;
22449 }
22450
22451 /* 2 insns. */
22452 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
22453 {
22454 *total = rtx_cost (left, mode, AND, 0, speed);
22455 *total += COSTS_N_INSNS (2);
22456 return true;
22457 }
22458 }
22459
22460 *total = COSTS_N_INSNS (1);
22461 return false;
22462
22463 case IOR:
22464 /* FIXME */
22465 *total = COSTS_N_INSNS (1);
22466 return true;
22467
22468 case CLZ:
22469 case XOR:
22470 case ZERO_EXTRACT:
22471 *total = COSTS_N_INSNS (1);
22472 return false;
22473
22474 case ASHIFT:
22475 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22476 the sign extend and shift separately within the insn. */
22477 if (TARGET_EXTSWSLI && mode == DImode
22478 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
22479 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
22480 {
22481 *total = 0;
22482 return false;
22483 }
22484 /* fall through */
22485
22486 case ASHIFTRT:
22487 case LSHIFTRT:
22488 case ROTATE:
22489 case ROTATERT:
22490 /* Handle mul_highpart. */
22491 if (outer_code == TRUNCATE
22492 && GET_CODE (XEXP (x, 0)) == MULT)
22493 {
22494 if (mode == DImode)
22495 *total = rs6000_cost->muldi;
22496 else
22497 *total = rs6000_cost->mulsi;
22498 return true;
22499 }
22500 else if (outer_code == AND)
22501 *total = 0;
22502 else
22503 *total = COSTS_N_INSNS (1);
22504 return false;
22505
22506 case SIGN_EXTEND:
22507 case ZERO_EXTEND:
22508 if (MEM_P (XEXP (x, 0)))
22509 *total = 0;
22510 else
22511 *total = COSTS_N_INSNS (1);
22512 return false;
22513
22514 case COMPARE:
22515 case NEG:
22516 case ABS:
22517 if (!FLOAT_MODE_P (mode))
22518 {
22519 *total = COSTS_N_INSNS (1);
22520 return false;
22521 }
22522 /* FALLTHRU */
22523
22524 case FLOAT:
22525 case UNSIGNED_FLOAT:
22526 case FIX:
22527 case UNSIGNED_FIX:
22528 case FLOAT_TRUNCATE:
22529 *total = rs6000_cost->fp;
22530 return false;
22531
22532 case FLOAT_EXTEND:
22533 if (mode == DFmode)
22534 *total = rs6000_cost->sfdf_convert;
22535 else
22536 *total = rs6000_cost->fp;
22537 return false;
22538
22539 case CALL:
22540 case IF_THEN_ELSE:
22541 if (!speed)
22542 {
22543 *total = COSTS_N_INSNS (1);
22544 return true;
22545 }
22546 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
22547 {
22548 *total = rs6000_cost->fp;
22549 return false;
22550 }
22551 break;
22552
22553 case NE:
22554 case EQ:
22555 case GTU:
22556 case LTU:
22557 /* Carry bit requires mode == Pmode.
22558 NEG or PLUS already counted so only add one. */
22559 if (mode == Pmode
22560 && (outer_code == NEG || outer_code == PLUS))
22561 {
22562 *total = COSTS_N_INSNS (1);
22563 return true;
22564 }
22565 /* FALLTHRU */
22566
22567 case GT:
22568 case LT:
22569 case UNORDERED:
22570 if (outer_code == SET)
22571 {
22572 if (XEXP (x, 1) == const0_rtx)
22573 {
22574 *total = COSTS_N_INSNS (2);
22575 return true;
22576 }
22577 else
22578 {
22579 *total = COSTS_N_INSNS (3);
22580 return false;
22581 }
22582 }
22583 /* CC COMPARE. */
22584 if (outer_code == COMPARE)
22585 {
22586 *total = 0;
22587 return true;
22588 }
22589 break;
22590
22591 case UNSPEC:
22592 if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ)
22593 {
22594 *total = 0;
22595 return true;
22596 }
22597 break;
22598
22599 default:
22600 break;
22601 }
22602
22603 return false;
22604 }
22605
22606 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22607
22608 static bool
22609 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
22610 int opno, int *total, bool speed)
22611 {
22612 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
22613
22614 fprintf (stderr,
22615 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22616 "opno = %d, total = %d, speed = %s, x:\n",
22617 ret ? "complete" : "scan inner",
22618 GET_MODE_NAME (mode),
22619 GET_RTX_NAME (outer_code),
22620 opno,
22621 *total,
22622 speed ? "true" : "false");
22623
22624 debug_rtx (x);
22625
22626 return ret;
22627 }
22628
22629 static int
22630 rs6000_insn_cost (rtx_insn *insn, bool speed)
22631 {
22632 if (recog_memoized (insn) < 0)
22633 return 0;
22634
22635 /* If we are optimizing for size, just use the length. */
22636 if (!speed)
22637 return get_attr_length (insn);
22638
22639 /* Use the cost if provided. */
22640 int cost = get_attr_cost (insn);
22641 if (cost > 0)
22642 return cost;
22643
22644 /* If the insn tells us how many insns there are, use that. Otherwise use
22645 the length/4. Adjust the insn length to remove the extra size that
22646 prefixed instructions take. */
22647 int n = get_attr_num_insns (insn);
22648 if (n == 0)
22649 {
22650 int length = get_attr_length (insn);
22651 if (get_attr_prefixed (insn) == PREFIXED_YES)
22652 {
22653 int adjust = 0;
22654 ADJUST_INSN_LENGTH (insn, adjust);
22655 length -= adjust;
22656 }
22657
22658 n = length / 4;
22659 }
22660
22661 enum attr_type type = get_attr_type (insn);
22662
22663 switch (type)
22664 {
22665 case TYPE_LOAD:
22666 case TYPE_FPLOAD:
22667 case TYPE_VECLOAD:
22668 cost = COSTS_N_INSNS (n + 1);
22669 break;
22670
22671 case TYPE_MUL:
22672 switch (get_attr_size (insn))
22673 {
22674 case SIZE_8:
22675 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
22676 break;
22677 case SIZE_16:
22678 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
22679 break;
22680 case SIZE_32:
22681 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
22682 break;
22683 case SIZE_64:
22684 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
22685 break;
22686 default:
22687 gcc_unreachable ();
22688 }
22689 break;
22690 case TYPE_DIV:
22691 switch (get_attr_size (insn))
22692 {
22693 case SIZE_32:
22694 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
22695 break;
22696 case SIZE_64:
22697 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
22698 break;
22699 default:
22700 gcc_unreachable ();
22701 }
22702 break;
22703
22704 case TYPE_FP:
22705 cost = n * rs6000_cost->fp;
22706 break;
22707 case TYPE_DMUL:
22708 cost = n * rs6000_cost->dmul;
22709 break;
22710 case TYPE_SDIV:
22711 cost = n * rs6000_cost->sdiv;
22712 break;
22713 case TYPE_DDIV:
22714 cost = n * rs6000_cost->ddiv;
22715 break;
22716
22717 case TYPE_SYNC:
22718 case TYPE_LOAD_L:
22719 case TYPE_MFCR:
22720 case TYPE_MFCRF:
22721 cost = COSTS_N_INSNS (n + 2);
22722 break;
22723
22724 default:
22725 cost = COSTS_N_INSNS (n);
22726 }
22727
22728 return cost;
22729 }
22730
22731 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22732
22733 static int
22734 rs6000_debug_address_cost (rtx x, machine_mode mode,
22735 addr_space_t as, bool speed)
22736 {
22737 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
22738
22739 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22740 ret, speed ? "true" : "false");
22741 debug_rtx (x);
22742
22743 return ret;
22744 }
22745
22746
22747 /* A C expression returning the cost of moving data from a register of class
22748 CLASS1 to one of CLASS2. */
22749
22750 static int
22751 rs6000_register_move_cost (machine_mode mode,
22752 reg_class_t from, reg_class_t to)
22753 {
22754 int ret;
22755 reg_class_t rclass;
22756
22757 if (TARGET_DEBUG_COST)
22758 dbg_cost_ctrl++;
22759
22760 /* If we have VSX, we can easily move between FPR or Altivec registers,
22761 otherwise we can only easily move within classes.
22762 Do this first so we give best-case answers for union classes
22763 containing both gprs and vsx regs. */
22764 HARD_REG_SET to_vsx, from_vsx;
22765 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
22766 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
22767 if (!hard_reg_set_empty_p (to_vsx)
22768 && !hard_reg_set_empty_p (from_vsx)
22769 && (TARGET_VSX
22770 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
22771 {
22772 int reg = FIRST_FPR_REGNO;
22773 if (TARGET_VSX
22774 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
22775 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
22776 reg = FIRST_ALTIVEC_REGNO;
22777 ret = 2 * hard_regno_nregs (reg, mode);
22778 }
22779
22780 /* Moves from/to GENERAL_REGS. */
22781 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
22782 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
22783 {
22784 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22785 {
22786 if (TARGET_DIRECT_MOVE)
22787 {
22788 /* Keep the cost for direct moves above that for within
22789 a register class even if the actual processor cost is
22790 comparable. We do this because a direct move insn
22791 can't be a nop, whereas with ideal register
22792 allocation a move within the same class might turn
22793 out to be a nop. */
22794 if (rs6000_tune == PROCESSOR_POWER9
22795 || rs6000_tune == PROCESSOR_POWER10
22796 || rs6000_tune == PROCESSOR_POWER11)
22797 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22798 else
22799 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22800 /* SFmode requires a conversion when moving between gprs
22801 and vsx. */
22802 if (mode == SFmode)
22803 ret += 2;
22804 }
22805 else
22806 ret = (rs6000_memory_move_cost (mode, rclass, false)
22807 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22808 }
22809
22810 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22811 shift. */
22812 else if (rclass == CR_REGS)
22813 ret = 4;
22814
22815 /* For those processors that have slow LR/CTR moves, make them more
22816 expensive than memory in order to bias spills to memory .*/
22817 else if ((rs6000_tune == PROCESSOR_POWER6
22818 || rs6000_tune == PROCESSOR_POWER7
22819 || rs6000_tune == PROCESSOR_POWER8
22820 || rs6000_tune == PROCESSOR_POWER9)
22821 && reg_class_subset_p (rclass, SPECIAL_REGS))
22822 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22823
22824 else
22825 /* A move will cost one instruction per GPR moved. */
22826 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22827 }
22828
22829 /* Everything else has to go through GENERAL_REGS. */
22830 else
22831 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22832 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22833
22834 if (TARGET_DEBUG_COST)
22835 {
22836 if (dbg_cost_ctrl == 1)
22837 fprintf (stderr,
22838 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22839 ret, GET_MODE_NAME (mode), reg_class_names[from],
22840 reg_class_names[to]);
22841 dbg_cost_ctrl--;
22842 }
22843
22844 return ret;
22845 }
22846
22847 /* A C expressions returning the cost of moving data of MODE from a register to
22848 or from memory. */
22849
22850 static int
22851 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22852 bool in ATTRIBUTE_UNUSED)
22853 {
22854 int ret;
22855
22856 if (TARGET_DEBUG_COST)
22857 dbg_cost_ctrl++;
22858
22859 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22860 ret = 4 * hard_regno_nregs (0, mode);
22861 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22862 || reg_classes_intersect_p (rclass, VSX_REGS)))
22863 ret = 4 * hard_regno_nregs (32, mode);
22864 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22865 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22866 else
22867 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22868
22869 if (TARGET_DEBUG_COST)
22870 {
22871 if (dbg_cost_ctrl == 1)
22872 fprintf (stderr,
22873 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22874 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22875 dbg_cost_ctrl--;
22876 }
22877
22878 return ret;
22879 }
22880
22881 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22882
22883 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22884 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22885 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22886 move cost between GENERAL_REGS and VSX_REGS low.
22887
22888 It might seem reasonable to use a union class. After all, if usage
22889 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22890 rather than memory. However, in cases where register pressure of
22891 both is high, like the cactus_adm spec test, allowing
22892 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22893 the first scheduling pass. This is partly due to an allocno of
22894 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22895 class, which gives too high a pressure for GENERAL_REGS and too low
22896 for VSX_REGS. So, force a choice of the subclass here.
22897
22898 The best class is also the union if GENERAL_REGS and VSX_REGS have
22899 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22900 allocno class, since trying to narrow down the class by regno mode
22901 is prone to error. For example, SImode is allowed in VSX regs and
22902 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22903 it would be wrong to choose an allocno of GENERAL_REGS based on
22904 SImode. */
22905
22906 static reg_class_t
22907 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22908 reg_class_t allocno_class,
22909 reg_class_t best_class)
22910 {
22911 switch (allocno_class)
22912 {
22913 case GEN_OR_VSX_REGS:
22914 /* best_class must be a subset of allocno_class. */
22915 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22916 || best_class == GEN_OR_FLOAT_REGS
22917 || best_class == VSX_REGS
22918 || best_class == ALTIVEC_REGS
22919 || best_class == FLOAT_REGS
22920 || best_class == GENERAL_REGS
22921 || best_class == BASE_REGS);
22922 /* Use best_class but choose wider classes when copying from the
22923 wider class to best_class is cheap. This mimics IRA choice
22924 of allocno class. */
22925 if (best_class == BASE_REGS)
22926 return GENERAL_REGS;
22927 if (TARGET_VSX && best_class == FLOAT_REGS)
22928 return VSX_REGS;
22929 return best_class;
22930
22931 case VSX_REGS:
22932 if (best_class == ALTIVEC_REGS)
22933 return ALTIVEC_REGS;
22934
22935 default:
22936 break;
22937 }
22938
22939 return allocno_class;
22940 }
22941
22942 /* Load up a constant. If the mode is a vector mode, splat the value across
22943 all of the vector elements. */
22944
22945 static rtx
22946 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22947 {
22948 rtx reg;
22949
22950 if (mode == SFmode || mode == DFmode)
22951 {
22952 rtx d = const_double_from_real_value (dconst, mode);
22953 reg = force_reg (mode, d);
22954 }
22955 else if (mode == V4SFmode)
22956 {
22957 rtx d = const_double_from_real_value (dconst, SFmode);
22958 rtvec v = gen_rtvec (4, d, d, d, d);
22959 reg = gen_reg_rtx (mode);
22960 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22961 }
22962 else if (mode == V2DFmode)
22963 {
22964 rtx d = const_double_from_real_value (dconst, DFmode);
22965 rtvec v = gen_rtvec (2, d, d);
22966 reg = gen_reg_rtx (mode);
22967 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22968 }
22969 else
22970 gcc_unreachable ();
22971
22972 return reg;
22973 }
22974
22975 /* Generate an FMA instruction. */
22976
22977 static void
22978 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
22979 {
22980 machine_mode mode = GET_MODE (target);
22981 rtx dst;
22982
22983 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
22984 gcc_assert (dst != NULL);
22985
22986 if (dst != target)
22987 emit_move_insn (target, dst);
22988 }
22989
22990 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22991
22992 static void
22993 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
22994 {
22995 machine_mode mode = GET_MODE (dst);
22996 rtx r;
22997
22998 /* This is a tad more complicated, since the fnma_optab is for
22999 a different expression: fma(-m1, m2, a), which is the same
23000 thing except in the case of signed zeros.
23001
23002 Fortunately we know that if FMA is supported that FNMSUB is
23003 also supported in the ISA. Just expand it directly. */
23004
23005 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
23006
23007 r = gen_rtx_NEG (mode, a);
23008 r = gen_rtx_FMA (mode, m1, m2, r);
23009 r = gen_rtx_NEG (mode, r);
23010 emit_insn (gen_rtx_SET (dst, r));
23011 }
23012
23013 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
23014 add a reg_note saying that this was a division. Support both scalar and
23015 vector divide. Assumes no trapping math and finite arguments. */
23016
23017 void
23018 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
23019 {
23020 machine_mode mode = GET_MODE (dst);
23021 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
23022 int i;
23023
23024 /* Low precision estimates guarantee 5 bits of accuracy. High
23025 precision estimates guarantee 14 bits of accuracy. SFmode
23026 requires 23 bits of accuracy. DFmode requires 52 bits of
23027 accuracy. Each pass at least doubles the accuracy, leading
23028 to the following. */
23029 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
23030 if (mode == DFmode || mode == V2DFmode)
23031 passes++;
23032
23033 enum insn_code code = optab_handler (smul_optab, mode);
23034 insn_gen_fn gen_mul = GEN_FCN (code);
23035
23036 gcc_assert (code != CODE_FOR_nothing);
23037
23038 one = rs6000_load_constant_and_splat (mode, dconst1);
23039
23040 /* x0 = 1./d estimate */
23041 x0 = gen_reg_rtx (mode);
23042 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
23043 UNSPEC_FRES)));
23044
23045 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
23046 if (passes > 1) {
23047
23048 /* e0 = 1. - d * x0 */
23049 e0 = gen_reg_rtx (mode);
23050 rs6000_emit_nmsub (e0, d, x0, one);
23051
23052 /* x1 = x0 + e0 * x0 */
23053 x1 = gen_reg_rtx (mode);
23054 rs6000_emit_madd (x1, e0, x0, x0);
23055
23056 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
23057 ++i, xprev = xnext, eprev = enext) {
23058
23059 /* enext = eprev * eprev */
23060 enext = gen_reg_rtx (mode);
23061 emit_insn (gen_mul (enext, eprev, eprev));
23062
23063 /* xnext = xprev + enext * xprev */
23064 xnext = gen_reg_rtx (mode);
23065 rs6000_emit_madd (xnext, enext, xprev, xprev);
23066 }
23067
23068 } else
23069 xprev = x0;
23070
23071 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
23072
23073 /* u = n * xprev */
23074 u = gen_reg_rtx (mode);
23075 emit_insn (gen_mul (u, n, xprev));
23076
23077 /* v = n - (d * u) */
23078 v = gen_reg_rtx (mode);
23079 rs6000_emit_nmsub (v, d, u, n);
23080
23081 /* dst = (v * xprev) + u */
23082 rs6000_emit_madd (dst, v, xprev, u);
23083
23084 if (note_p)
23085 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
23086 }
23087
23088 /* Goldschmidt's Algorithm for single/double-precision floating point
23089 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
23090
23091 void
23092 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
23093 {
23094 machine_mode mode = GET_MODE (src);
23095 rtx e = gen_reg_rtx (mode);
23096 rtx g = gen_reg_rtx (mode);
23097 rtx h = gen_reg_rtx (mode);
23098
23099 /* Low precision estimates guarantee 5 bits of accuracy. High
23100 precision estimates guarantee 14 bits of accuracy. SFmode
23101 requires 23 bits of accuracy. DFmode requires 52 bits of
23102 accuracy. Each pass at least doubles the accuracy, leading
23103 to the following. */
23104 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
23105 if (mode == DFmode || mode == V2DFmode)
23106 passes++;
23107
23108 int i;
23109 rtx mhalf;
23110 enum insn_code code = optab_handler (smul_optab, mode);
23111 insn_gen_fn gen_mul = GEN_FCN (code);
23112
23113 gcc_assert (code != CODE_FOR_nothing);
23114
23115 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
23116
23117 /* e = rsqrt estimate */
23118 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
23119 UNSPEC_RSQRT)));
23120
23121 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
23122 if (!recip)
23123 {
23124 rtx zero = force_reg (mode, CONST0_RTX (mode));
23125
23126 if (mode == SFmode)
23127 {
23128 rtx target = emit_conditional_move (e, { GT, src, zero, mode },
23129 e, zero, mode, 0);
23130 if (target != e)
23131 emit_move_insn (e, target);
23132 }
23133 else
23134 {
23135 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
23136 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
23137 }
23138 }
23139
23140 /* g = sqrt estimate. */
23141 emit_insn (gen_mul (g, e, src));
23142 /* h = 1/(2*sqrt) estimate. */
23143 emit_insn (gen_mul (h, e, mhalf));
23144
23145 if (recip)
23146 {
23147 if (passes == 1)
23148 {
23149 rtx t = gen_reg_rtx (mode);
23150 rs6000_emit_nmsub (t, g, h, mhalf);
23151 /* Apply correction directly to 1/rsqrt estimate. */
23152 rs6000_emit_madd (dst, e, t, e);
23153 }
23154 else
23155 {
23156 for (i = 0; i < passes; i++)
23157 {
23158 rtx t1 = gen_reg_rtx (mode);
23159 rtx g1 = gen_reg_rtx (mode);
23160 rtx h1 = gen_reg_rtx (mode);
23161
23162 rs6000_emit_nmsub (t1, g, h, mhalf);
23163 rs6000_emit_madd (g1, g, t1, g);
23164 rs6000_emit_madd (h1, h, t1, h);
23165
23166 g = g1;
23167 h = h1;
23168 }
23169 /* Multiply by 2 for 1/rsqrt. */
23170 emit_insn (gen_add3_insn (dst, h, h));
23171 }
23172 }
23173 else
23174 {
23175 rtx t = gen_reg_rtx (mode);
23176 rs6000_emit_nmsub (t, g, h, mhalf);
23177 rs6000_emit_madd (dst, g, t, g);
23178 }
23179
23180 return;
23181 }
23182
23183 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
23184 (Power7) targets. DST is the target, and SRC is the argument operand. */
23185
23186 void
23187 rs6000_emit_popcount (rtx dst, rtx src)
23188 {
23189 machine_mode mode = GET_MODE (dst);
23190 rtx tmp1, tmp2;
23191
23192 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
23193 if (TARGET_POPCNTD)
23194 {
23195 if (mode == SImode)
23196 emit_insn (gen_popcntdsi2 (dst, src));
23197 else
23198 emit_insn (gen_popcntddi2 (dst, src));
23199 return;
23200 }
23201
23202 tmp1 = gen_reg_rtx (mode);
23203
23204 if (mode == SImode)
23205 {
23206 emit_insn (gen_popcntbsi2 (tmp1, src));
23207 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
23208 NULL_RTX, 0);
23209 tmp2 = force_reg (SImode, tmp2);
23210 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
23211 }
23212 else
23213 {
23214 emit_insn (gen_popcntbdi2 (tmp1, src));
23215 tmp2 = expand_mult (DImode, tmp1,
23216 GEN_INT ((HOST_WIDE_INT)
23217 0x01010101 << 32 | 0x01010101),
23218 NULL_RTX, 0);
23219 tmp2 = force_reg (DImode, tmp2);
23220 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
23221 }
23222 }
23223
23224
23225 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
23226 target, and SRC is the argument operand. */
23227
23228 void
23229 rs6000_emit_parity (rtx dst, rtx src)
23230 {
23231 machine_mode mode = GET_MODE (dst);
23232 rtx tmp;
23233
23234 tmp = gen_reg_rtx (mode);
23235
23236 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
23237 if (TARGET_CMPB)
23238 {
23239 if (mode == SImode)
23240 {
23241 emit_insn (gen_popcntbsi2 (tmp, src));
23242 emit_insn (gen_paritysi2_cmpb (dst, tmp));
23243 }
23244 else
23245 {
23246 emit_insn (gen_popcntbdi2 (tmp, src));
23247 emit_insn (gen_paritydi2_cmpb (dst, tmp));
23248 }
23249 return;
23250 }
23251
23252 if (mode == SImode)
23253 {
23254 /* Is mult+shift >= shift+xor+shift+xor? */
23255 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
23256 {
23257 rtx tmp1, tmp2, tmp3, tmp4;
23258
23259 tmp1 = gen_reg_rtx (SImode);
23260 emit_insn (gen_popcntbsi2 (tmp1, src));
23261
23262 tmp2 = gen_reg_rtx (SImode);
23263 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
23264 tmp3 = gen_reg_rtx (SImode);
23265 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
23266
23267 tmp4 = gen_reg_rtx (SImode);
23268 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
23269 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
23270 }
23271 else
23272 rs6000_emit_popcount (tmp, src);
23273 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
23274 }
23275 else
23276 {
23277 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
23278 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
23279 {
23280 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
23281
23282 tmp1 = gen_reg_rtx (DImode);
23283 emit_insn (gen_popcntbdi2 (tmp1, src));
23284
23285 tmp2 = gen_reg_rtx (DImode);
23286 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
23287 tmp3 = gen_reg_rtx (DImode);
23288 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
23289
23290 tmp4 = gen_reg_rtx (DImode);
23291 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
23292 tmp5 = gen_reg_rtx (DImode);
23293 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
23294
23295 tmp6 = gen_reg_rtx (DImode);
23296 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
23297 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
23298 }
23299 else
23300 rs6000_emit_popcount (tmp, src);
23301 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
23302 }
23303 }
23304
23305 /* Expand an Altivec constant permutation for little endian mode.
23306 OP0 and OP1 are the input vectors and TARGET is the output vector.
23307 SEL specifies the constant permutation vector.
23308
23309 There are two issues: First, the two input operands must be
23310 swapped so that together they form a double-wide array in LE
23311 order. Second, the vperm instruction has surprising behavior
23312 in LE mode: it interprets the elements of the source vectors
23313 in BE mode ("left to right") and interprets the elements of
23314 the destination vector in LE mode ("right to left"). To
23315 correct for this, we must subtract each element of the permute
23316 control vector from 31.
23317
23318 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
23319 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
23320 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
23321 serve as the permute control vector. Then, in BE mode,
23322
23323 vperm 9,10,11,12
23324
23325 places the desired result in vr9. However, in LE mode the
23326 vector contents will be
23327
23328 vr10 = 00000003 00000002 00000001 00000000
23329 vr11 = 00000007 00000006 00000005 00000004
23330
23331 The result of the vperm using the same permute control vector is
23332
23333 vr9 = 05000000 07000000 01000000 03000000
23334
23335 That is, the leftmost 4 bytes of vr10 are interpreted as the
23336 source for the rightmost 4 bytes of vr9, and so on.
23337
23338 If we change the permute control vector to
23339
23340 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
23341
23342 and issue
23343
23344 vperm 9,11,10,12
23345
23346 we get the desired
23347
23348 vr9 = 00000006 00000004 00000002 00000000. */
23349
23350 static void
23351 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
23352 const vec_perm_indices &sel)
23353 {
23354 unsigned int i;
23355 rtx perm[16];
23356 rtx constv, unspec;
23357
23358 /* Unpack and adjust the constant selector. */
23359 for (i = 0; i < 16; ++i)
23360 {
23361 unsigned int elt = 31 - (sel[i] & 31);
23362 perm[i] = GEN_INT (elt);
23363 }
23364
23365 /* Expand to a permute, swapping the inputs and using the
23366 adjusted selector. */
23367 if (!REG_P (op0))
23368 op0 = force_reg (V16QImode, op0);
23369 if (!REG_P (op1))
23370 op1 = force_reg (V16QImode, op1);
23371
23372 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
23373 constv = force_reg (V16QImode, constv);
23374 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
23375 UNSPEC_VPERM);
23376 if (!REG_P (target))
23377 {
23378 rtx tmp = gen_reg_rtx (V16QImode);
23379 emit_move_insn (tmp, unspec);
23380 unspec = tmp;
23381 }
23382
23383 emit_move_insn (target, unspec);
23384 }
23385
23386 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
23387 permute control vector. But here it's not a constant, so we must
23388 generate a vector NAND or NOR to do the adjustment. */
23389
23390 void
23391 altivec_expand_vec_perm_le (rtx operands[4])
23392 {
23393 rtx notx, iorx, unspec;
23394 rtx target = operands[0];
23395 rtx op0 = operands[1];
23396 rtx op1 = operands[2];
23397 rtx sel = operands[3];
23398 rtx tmp = target;
23399 rtx norreg = gen_reg_rtx (V16QImode);
23400 machine_mode mode = GET_MODE (target);
23401
23402 /* Get everything in regs so the pattern matches. */
23403 if (!REG_P (op0))
23404 op0 = force_reg (mode, op0);
23405 if (!REG_P (op1))
23406 op1 = force_reg (mode, op1);
23407 if (!REG_P (sel))
23408 sel = force_reg (V16QImode, sel);
23409 if (!REG_P (target))
23410 tmp = gen_reg_rtx (mode);
23411
23412 if (TARGET_P9_VECTOR)
23413 {
23414 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
23415 UNSPEC_VPERMR);
23416 }
23417 else
23418 {
23419 /* Invert the selector with a VNAND if available, else a VNOR.
23420 The VNAND is preferred for future fusion opportunities. */
23421 notx = gen_rtx_NOT (V16QImode, sel);
23422 iorx = (TARGET_P8_VECTOR
23423 ? gen_rtx_IOR (V16QImode, notx, notx)
23424 : gen_rtx_AND (V16QImode, notx, notx));
23425 emit_insn (gen_rtx_SET (norreg, iorx));
23426
23427 /* Permute with operands reversed and adjusted selector. */
23428 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
23429 UNSPEC_VPERM);
23430 }
23431
23432 /* Copy into target, possibly by way of a register. */
23433 if (!REG_P (target))
23434 {
23435 emit_move_insn (tmp, unspec);
23436 unspec = tmp;
23437 }
23438
23439 emit_move_insn (target, unspec);
23440 }
23441
23442 /* Expand an Altivec constant permutation. Return true if we match
23443 an efficient implementation; false to fall back to VPERM.
23444
23445 OP0 and OP1 are the input vectors and TARGET is the output vector.
23446 SEL specifies the constant permutation vector. */
23447
23448 static bool
23449 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
23450 const vec_perm_indices &sel)
23451 {
23452 struct altivec_perm_insn {
23453 HOST_WIDE_INT mask;
23454 enum insn_code impl;
23455 unsigned char perm[16];
23456 };
23457 static const struct altivec_perm_insn patterns[] = {
23458 {OPTION_MASK_ALTIVEC,
23459 CODE_FOR_altivec_vpkuhum_direct,
23460 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23461 {OPTION_MASK_ALTIVEC,
23462 CODE_FOR_altivec_vpkuwum_direct,
23463 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23464 {OPTION_MASK_ALTIVEC,
23465 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
23466 : CODE_FOR_altivec_vmrglb_direct,
23467 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23468 {OPTION_MASK_ALTIVEC,
23469 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
23470 : CODE_FOR_altivec_vmrglh_direct,
23471 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23472 {OPTION_MASK_ALTIVEC,
23473 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si
23474 : CODE_FOR_altivec_vmrglw_direct_v4si,
23475 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23476 {OPTION_MASK_ALTIVEC,
23477 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
23478 : CODE_FOR_altivec_vmrghb_direct,
23479 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23480 {OPTION_MASK_ALTIVEC,
23481 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
23482 : CODE_FOR_altivec_vmrghh_direct,
23483 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23484 {OPTION_MASK_ALTIVEC,
23485 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si
23486 : CODE_FOR_altivec_vmrghw_direct_v4si,
23487 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23488 {OPTION_MASK_P8_VECTOR,
23489 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
23490 : CODE_FOR_p8_vmrgow_v4sf_direct,
23491 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23492 {OPTION_MASK_P8_VECTOR,
23493 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
23494 : CODE_FOR_p8_vmrgew_v4sf_direct,
23495 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23496 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23497 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23498 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23499 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23500 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23501 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23502 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23503 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23504
23505 unsigned int i, j, elt, which;
23506 unsigned char perm[16];
23507 rtx x;
23508 bool one_vec;
23509
23510 /* Unpack the constant selector. */
23511 for (i = which = 0; i < 16; ++i)
23512 {
23513 elt = sel[i] & 31;
23514 which |= (elt < 16 ? 1 : 2);
23515 perm[i] = elt;
23516 }
23517
23518 /* Simplify the constant selector based on operands. */
23519 switch (which)
23520 {
23521 default:
23522 gcc_unreachable ();
23523
23524 case 3:
23525 one_vec = false;
23526 if (!rtx_equal_p (op0, op1))
23527 break;
23528 /* FALLTHRU */
23529
23530 case 2:
23531 for (i = 0; i < 16; ++i)
23532 perm[i] &= 15;
23533 op0 = op1;
23534 one_vec = true;
23535 break;
23536
23537 case 1:
23538 op1 = op0;
23539 one_vec = true;
23540 break;
23541 }
23542
23543 /* Look for splat patterns. */
23544 if (one_vec)
23545 {
23546 elt = perm[0];
23547
23548 for (i = 0; i < 16; ++i)
23549 if (perm[i] != elt)
23550 break;
23551 if (i == 16)
23552 {
23553 if (!BYTES_BIG_ENDIAN)
23554 elt = 15 - elt;
23555 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
23556 return true;
23557 }
23558
23559 if (elt % 2 == 0)
23560 {
23561 for (i = 0; i < 16; i += 2)
23562 if (perm[i] != elt || perm[i + 1] != elt + 1)
23563 break;
23564 if (i == 16)
23565 {
23566 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
23567 x = gen_reg_rtx (V8HImode);
23568 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
23569 GEN_INT (field)));
23570 emit_move_insn (target, gen_lowpart (V16QImode, x));
23571 return true;
23572 }
23573 }
23574
23575 if (elt % 4 == 0)
23576 {
23577 for (i = 0; i < 16; i += 4)
23578 if (perm[i] != elt
23579 || perm[i + 1] != elt + 1
23580 || perm[i + 2] != elt + 2
23581 || perm[i + 3] != elt + 3)
23582 break;
23583 if (i == 16)
23584 {
23585 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
23586 x = gen_reg_rtx (V4SImode);
23587 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
23588 GEN_INT (field)));
23589 emit_move_insn (target, gen_lowpart (V16QImode, x));
23590 return true;
23591 }
23592 }
23593 }
23594
23595 /* Look for merge and pack patterns. */
23596 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
23597 {
23598 bool swapped;
23599
23600 if ((patterns[j].mask & rs6000_isa_flags) == 0)
23601 continue;
23602
23603 elt = patterns[j].perm[0];
23604 if (perm[0] == elt)
23605 swapped = false;
23606 else if (perm[0] == elt + 16)
23607 swapped = true;
23608 else
23609 continue;
23610 for (i = 1; i < 16; ++i)
23611 {
23612 elt = patterns[j].perm[i];
23613 if (swapped)
23614 elt = (elt >= 16 ? elt - 16 : elt + 16);
23615 else if (one_vec && elt >= 16)
23616 elt -= 16;
23617 if (perm[i] != elt)
23618 break;
23619 }
23620 if (i == 16)
23621 {
23622 enum insn_code icode = patterns[j].impl;
23623 machine_mode omode = insn_data[icode].operand[0].mode;
23624 machine_mode imode = insn_data[icode].operand[1].mode;
23625
23626 rtx perm_idx = GEN_INT (0);
23627 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23628 {
23629 int perm_val = 0;
23630 if (one_vec)
23631 {
23632 if (perm[0] == 8)
23633 perm_val |= 2;
23634 if (perm[8] == 8)
23635 perm_val |= 1;
23636 }
23637 else
23638 {
23639 if (perm[0] != 0)
23640 perm_val |= 2;
23641 if (perm[8] != 16)
23642 perm_val |= 1;
23643 }
23644 perm_idx = GEN_INT (perm_val);
23645 }
23646
23647 /* For little-endian, don't use vpkuwum and vpkuhum if the
23648 underlying vector type is not V4SI and V8HI, respectively.
23649 For example, using vpkuwum with a V8HI picks up the even
23650 halfwords (BE numbering) when the even halfwords (LE
23651 numbering) are what we need. */
23652 if (!BYTES_BIG_ENDIAN
23653 && icode == CODE_FOR_altivec_vpkuwum_direct
23654 && ((REG_P (op0)
23655 && GET_MODE (op0) != V4SImode)
23656 || (SUBREG_P (op0)
23657 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
23658 continue;
23659 if (!BYTES_BIG_ENDIAN
23660 && icode == CODE_FOR_altivec_vpkuhum_direct
23661 && ((REG_P (op0)
23662 && GET_MODE (op0) != V8HImode)
23663 || (SUBREG_P (op0)
23664 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
23665 continue;
23666
23667 /* For little-endian, the two input operands must be swapped
23668 (or swapped back) to ensure proper right-to-left numbering
23669 from 0 to 2N-1. */
23670 if (swapped == BYTES_BIG_ENDIAN
23671 && icode != CODE_FOR_vsx_xxpermdi_v16qi)
23672 std::swap (op0, op1);
23673 if (imode != V16QImode)
23674 {
23675 op0 = gen_lowpart (imode, op0);
23676 op1 = gen_lowpart (imode, op1);
23677 }
23678 if (omode == V16QImode)
23679 x = target;
23680 else
23681 x = gen_reg_rtx (omode);
23682 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23683 emit_insn (GEN_FCN (icode) (x, op0, op1, perm_idx));
23684 else
23685 emit_insn (GEN_FCN (icode) (x, op0, op1));
23686 if (omode != V16QImode)
23687 emit_move_insn (target, gen_lowpart (V16QImode, x));
23688 return true;
23689 }
23690 }
23691
23692 if (!BYTES_BIG_ENDIAN)
23693 {
23694 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
23695 return true;
23696 }
23697
23698 return false;
23699 }
23700
23701 /* Expand a VSX Permute Doubleword constant permutation.
23702 Return true if we match an efficient implementation. */
23703
23704 static bool
23705 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
23706 unsigned char perm0, unsigned char perm1)
23707 {
23708 rtx x;
23709
23710 /* If both selectors come from the same operand, fold to single op. */
23711 if ((perm0 & 2) == (perm1 & 2))
23712 {
23713 if (perm0 & 2)
23714 op0 = op1;
23715 else
23716 op1 = op0;
23717 }
23718 /* If both operands are equal, fold to simpler permutation. */
23719 if (rtx_equal_p (op0, op1))
23720 {
23721 perm0 = perm0 & 1;
23722 perm1 = (perm1 & 1) + 2;
23723 }
23724 /* If the first selector comes from the second operand, swap. */
23725 else if (perm0 & 2)
23726 {
23727 if (perm1 & 2)
23728 return false;
23729 perm0 -= 2;
23730 perm1 += 2;
23731 std::swap (op0, op1);
23732 }
23733 /* If the second selector does not come from the second operand, fail. */
23734 else if ((perm1 & 2) == 0)
23735 return false;
23736
23737 /* Success! */
23738 if (target != NULL)
23739 {
23740 machine_mode vmode, dmode;
23741 rtvec v;
23742
23743 vmode = GET_MODE (target);
23744 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
23745 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
23746 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
23747 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
23748 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
23749 emit_insn (gen_rtx_SET (target, x));
23750 }
23751 return true;
23752 }
23753
23754 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23755
23756 static bool
23757 rs6000_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
23758 rtx target, rtx op0, rtx op1,
23759 const vec_perm_indices &sel)
23760 {
23761 if (vmode != op_mode)
23762 return false;
23763
23764 bool testing_p = !target;
23765
23766 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23767 if (TARGET_ALTIVEC && testing_p)
23768 return true;
23769
23770 if (op0)
23771 {
23772 rtx nop0 = force_reg (vmode, op0);
23773 if (op0 == op1)
23774 op1 = nop0;
23775 op0 = nop0;
23776 }
23777 if (op1)
23778 op1 = force_reg (vmode, op1);
23779
23780 /* Check for ps_merge* or xxpermdi insns. */
23781 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
23782 {
23783 if (testing_p)
23784 {
23785 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
23786 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
23787 }
23788 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
23789 return true;
23790 }
23791
23792 if (TARGET_ALTIVEC)
23793 {
23794 /* Force the target-independent code to lower to V16QImode. */
23795 if (vmode != V16QImode)
23796 return false;
23797 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
23798 return true;
23799 }
23800
23801 return false;
23802 }
23803
23804 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23805 OP0 and OP1 are the input vectors and TARGET is the output vector.
23806 PERM specifies the constant permutation vector. */
23807
23808 static void
23809 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
23810 machine_mode vmode, const vec_perm_builder &perm)
23811 {
23812 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
23813 if (x != target)
23814 emit_move_insn (target, x);
23815 }
23816
23817 /* Expand an extract even operation. */
23818
23819 void
23820 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
23821 {
23822 machine_mode vmode = GET_MODE (target);
23823 unsigned i, nelt = GET_MODE_NUNITS (vmode);
23824 vec_perm_builder perm (nelt, nelt, 1);
23825
23826 for (i = 0; i < nelt; i++)
23827 perm.quick_push (i * 2);
23828
23829 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23830 }
23831
23832 /* Expand a vector interleave operation. */
23833
23834 void
23835 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23836 {
23837 machine_mode vmode = GET_MODE (target);
23838 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23839 vec_perm_builder perm (nelt, nelt, 1);
23840
23841 high = (highp ? 0 : nelt / 2);
23842 for (i = 0; i < nelt / 2; i++)
23843 {
23844 perm.quick_push (i + high);
23845 perm.quick_push (i + nelt + high);
23846 }
23847
23848 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23849 }
23850
23851 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23852 void
23853 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23854 {
23855 HOST_WIDE_INT hwi_scale (scale);
23856 REAL_VALUE_TYPE r_pow;
23857 rtvec v = rtvec_alloc (2);
23858 rtx elt;
23859 rtx scale_vec = gen_reg_rtx (V2DFmode);
23860 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23861 elt = const_double_from_real_value (r_pow, DFmode);
23862 RTVEC_ELT (v, 0) = elt;
23863 RTVEC_ELT (v, 1) = elt;
23864 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23865 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23866 }
23867
23868 /* Return an RTX representing where to find the function value of a
23869 function returning MODE. */
23870 static rtx
23871 rs6000_complex_function_value (machine_mode mode)
23872 {
23873 unsigned int regno;
23874 rtx r1, r2;
23875 machine_mode inner = GET_MODE_INNER (mode);
23876 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23877
23878 if (TARGET_FLOAT128_TYPE
23879 && (mode == KCmode
23880 || (mode == TCmode && TARGET_IEEEQUAD)))
23881 regno = ALTIVEC_ARG_RETURN;
23882
23883 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23884 regno = FP_ARG_RETURN;
23885
23886 else
23887 {
23888 regno = GP_ARG_RETURN;
23889
23890 /* 32-bit is OK since it'll go in r3/r4. */
23891 if (TARGET_32BIT && inner_bytes >= 4)
23892 return gen_rtx_REG (mode, regno);
23893 }
23894
23895 if (inner_bytes >= 8)
23896 return gen_rtx_REG (mode, regno);
23897
23898 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23899 const0_rtx);
23900 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23901 GEN_INT (inner_bytes));
23902 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23903 }
23904
23905 /* Return an rtx describing a return value of MODE as a PARALLEL
23906 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23907 stride REG_STRIDE. */
23908
23909 static rtx
23910 rs6000_parallel_return (machine_mode mode,
23911 int n_elts, machine_mode elt_mode,
23912 unsigned int regno, unsigned int reg_stride)
23913 {
23914 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23915
23916 int i;
23917 for (i = 0; i < n_elts; i++)
23918 {
23919 rtx r = gen_rtx_REG (elt_mode, regno);
23920 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23921 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23922 regno += reg_stride;
23923 }
23924
23925 return par;
23926 }
23927
23928 /* Target hook for TARGET_FUNCTION_VALUE.
23929
23930 An integer value is in r3 and a floating-point value is in fp1,
23931 unless -msoft-float. */
23932
23933 static rtx
23934 rs6000_function_value (const_tree valtype,
23935 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23936 bool outgoing ATTRIBUTE_UNUSED)
23937 {
23938 machine_mode mode;
23939 unsigned int regno;
23940 machine_mode elt_mode;
23941 int n_elts;
23942
23943 /* Special handling for structs in darwin64. */
23944 if (TARGET_MACHO
23945 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23946 {
23947 CUMULATIVE_ARGS valcum;
23948 rtx valret;
23949
23950 valcum.words = 0;
23951 valcum.fregno = FP_ARG_MIN_REG;
23952 valcum.vregno = ALTIVEC_ARG_MIN_REG;
23953 /* Do a trial code generation as if this were going to be passed as
23954 an argument; if any part goes in memory, we return NULL. */
23955 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23956 if (valret)
23957 return valret;
23958 /* Otherwise fall through to standard ABI rules. */
23959 }
23960
23961 mode = TYPE_MODE (valtype);
23962
23963 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23964 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23965 {
23966 int first_reg, n_regs;
23967
23968 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23969 {
23970 /* _Decimal128 must use even/odd register pairs. */
23971 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23972 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23973 }
23974 else
23975 {
23976 first_reg = ALTIVEC_ARG_RETURN;
23977 n_regs = 1;
23978 }
23979
23980 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
23981 }
23982
23983 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23984 if (TARGET_32BIT && TARGET_POWERPC64)
23985 switch (mode)
23986 {
23987 default:
23988 break;
23989 case E_DImode:
23990 case E_SCmode:
23991 case E_DCmode:
23992 case E_TCmode:
23993 int count = GET_MODE_SIZE (mode) / 4;
23994 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
23995 }
23996
23997 if ((INTEGRAL_TYPE_P (valtype)
23998 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
23999 || POINTER_TYPE_P (valtype))
24000 mode = TARGET_32BIT ? SImode : DImode;
24001
24002 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
24003 /* _Decimal128 must use an even/odd register pair. */
24004 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
24005 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
24006 && !FLOAT128_VECTOR_P (mode))
24007 regno = FP_ARG_RETURN;
24008 else if (TREE_CODE (valtype) == COMPLEX_TYPE
24009 && targetm.calls.split_complex_arg)
24010 return rs6000_complex_function_value (mode);
24011 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
24012 return register is used in both cases, and we won't see V2DImode/V2DFmode
24013 for pure altivec, combine the two cases. */
24014 else if ((VECTOR_TYPE_P (valtype) || VECTOR_ALIGNMENT_P (mode))
24015 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
24016 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
24017 regno = ALTIVEC_ARG_RETURN;
24018 else
24019 regno = GP_ARG_RETURN;
24020
24021 return gen_rtx_REG (mode, regno);
24022 }
24023
24024 /* Define how to find the value returned by a library function
24025 assuming the value has mode MODE. */
24026 rtx
24027 rs6000_libcall_value (machine_mode mode)
24028 {
24029 unsigned int regno;
24030
24031 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
24032 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
24033 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
24034
24035 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
24036 /* _Decimal128 must use an even/odd register pair. */
24037 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
24038 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
24039 regno = FP_ARG_RETURN;
24040 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
24041 return register is used in both cases, and we won't see V2DImode/V2DFmode
24042 for pure altivec, combine the two cases. */
24043 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
24044 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
24045 regno = ALTIVEC_ARG_RETURN;
24046 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
24047 return rs6000_complex_function_value (mode);
24048 else
24049 regno = GP_ARG_RETURN;
24050
24051 return gen_rtx_REG (mode, regno);
24052 }
24053
24054 /* Compute register pressure classes. We implement the target hook to avoid
24055 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
24056 lead to incorrect estimates of number of available registers and therefor
24057 increased register pressure/spill. */
24058 static int
24059 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
24060 {
24061 int n;
24062
24063 n = 0;
24064 pressure_classes[n++] = GENERAL_REGS;
24065 if (TARGET_ALTIVEC)
24066 pressure_classes[n++] = ALTIVEC_REGS;
24067 if (TARGET_VSX)
24068 pressure_classes[n++] = VSX_REGS;
24069 else
24070 {
24071 if (TARGET_HARD_FLOAT)
24072 pressure_classes[n++] = FLOAT_REGS;
24073 }
24074 pressure_classes[n++] = CR_REGS;
24075 pressure_classes[n++] = SPECIAL_REGS;
24076
24077 return n;
24078 }
24079
24080 /* Given FROM and TO register numbers, say whether this elimination is allowed.
24081 Frame pointer elimination is automatically handled.
24082
24083 For the RS/6000, if frame pointer elimination is being done, we would like
24084 to convert ap into fp, not sp.
24085
24086 We need r30 if -mminimal-toc was specified, and there are constant pool
24087 references. */
24088
24089 static bool
24090 rs6000_can_eliminate (const int from, const int to)
24091 {
24092 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
24093 ? ! frame_pointer_needed
24094 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
24095 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
24096 || constant_pool_empty_p ()
24097 : true);
24098 }
24099
24100 /* Define the offset between two registers, FROM to be eliminated and its
24101 replacement TO, at the start of a routine. */
24102 HOST_WIDE_INT
24103 rs6000_initial_elimination_offset (int from, int to)
24104 {
24105 rs6000_stack_t *info = rs6000_stack_info ();
24106 HOST_WIDE_INT offset;
24107
24108 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
24109 offset = info->push_p ? 0 : -info->total_size;
24110 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
24111 {
24112 offset = info->push_p ? 0 : -info->total_size;
24113 if (FRAME_GROWS_DOWNWARD)
24114 offset += info->fixed_size + info->vars_size + info->parm_size;
24115 }
24116 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
24117 offset = FRAME_GROWS_DOWNWARD
24118 ? info->fixed_size + info->vars_size + info->parm_size
24119 : 0;
24120 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
24121 offset = info->total_size;
24122 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
24123 offset = info->push_p ? info->total_size : 0;
24124 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
24125 offset = 0;
24126 else
24127 gcc_unreachable ();
24128
24129 return offset;
24130 }
24131
24132 /* Fill in sizes of registers used by unwinder. */
24133
24134 static void
24135 rs6000_init_dwarf_reg_sizes_extra (tree address)
24136 {
24137 if (TARGET_MACHO && ! TARGET_ALTIVEC)
24138 {
24139 int i;
24140 machine_mode mode = TYPE_MODE (char_type_node);
24141 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
24142 rtx mem = gen_rtx_MEM (BLKmode, addr);
24143 rtx value = gen_int_mode (16, mode);
24144
24145 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
24146 The unwinder still needs to know the size of Altivec registers. */
24147
24148 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
24149 {
24150 int column = DWARF_REG_TO_UNWIND_COLUMN
24151 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
24152 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
24153
24154 emit_move_insn (adjust_address (mem, mode, offset), value);
24155 }
24156 }
24157 }
24158
24159 /* Map internal gcc register numbers to debug format register numbers.
24160 FORMAT specifies the type of debug register number to use:
24161 0 -- debug information, except for frame-related sections
24162 1 -- DWARF .debug_frame section
24163 2 -- DWARF .eh_frame section */
24164
24165 unsigned int
24166 rs6000_debugger_regno (unsigned int regno, unsigned int format)
24167 {
24168 /* On some platforms, we use the standard DWARF register
24169 numbering for .debug_info and .debug_frame. */
24170 if ((format == 0 && dwarf_debuginfo_p ()) || format == 1)
24171 {
24172 #ifdef RS6000_USE_DWARF_NUMBERING
24173 if (regno <= 31)
24174 return regno;
24175 if (FP_REGNO_P (regno))
24176 return regno - FIRST_FPR_REGNO + 32;
24177 if (ALTIVEC_REGNO_P (regno))
24178 return regno - FIRST_ALTIVEC_REGNO + 1124;
24179 if (regno == LR_REGNO)
24180 return 108;
24181 if (regno == CTR_REGNO)
24182 return 109;
24183 if (regno == CA_REGNO)
24184 return 101; /* XER */
24185 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
24186 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
24187 The actual code emitted saves the whole of CR, so we map CR2_REGNO
24188 to the DWARF reg for CR. */
24189 if (format == 1 && regno == CR2_REGNO)
24190 return 64;
24191 if (CR_REGNO_P (regno))
24192 return regno - CR0_REGNO + 86;
24193 if (regno == VRSAVE_REGNO)
24194 return 356;
24195 if (regno == VSCR_REGNO)
24196 return 67;
24197
24198 /* These do not make much sense. */
24199 if (regno == FRAME_POINTER_REGNUM)
24200 return 111;
24201 if (regno == ARG_POINTER_REGNUM)
24202 return 67;
24203 if (regno == 64)
24204 return 100;
24205
24206 gcc_unreachable ();
24207 #endif
24208 }
24209
24210 /* We use the GCC 7 (and before) internal number for non-DWARF debug
24211 information, and also for .eh_frame. */
24212 /* Translate the regnos to their numbers in GCC 7 (and before). */
24213 if (regno <= 31)
24214 return regno;
24215 if (FP_REGNO_P (regno))
24216 return regno - FIRST_FPR_REGNO + 32;
24217 if (ALTIVEC_REGNO_P (regno))
24218 return regno - FIRST_ALTIVEC_REGNO + 77;
24219 if (regno == LR_REGNO)
24220 return 65;
24221 if (regno == CTR_REGNO)
24222 return 66;
24223 if (regno == CA_REGNO)
24224 return 76; /* XER */
24225 if (CR_REGNO_P (regno))
24226 return regno - CR0_REGNO + 68;
24227 if (regno == VRSAVE_REGNO)
24228 return 109;
24229 if (regno == VSCR_REGNO)
24230 return 110;
24231
24232 if (regno == FRAME_POINTER_REGNUM)
24233 return 111;
24234 if (regno == ARG_POINTER_REGNUM)
24235 return 67;
24236 if (regno == 64)
24237 return 64;
24238
24239 gcc_unreachable ();
24240 }
24241
24242 /* target hook eh_return_filter_mode */
24243 static scalar_int_mode
24244 rs6000_eh_return_filter_mode (void)
24245 {
24246 return TARGET_32BIT ? SImode : word_mode;
24247 }
24248
24249 /* Target hook for translate_mode_attribute. */
24250 static machine_mode
24251 rs6000_translate_mode_attribute (machine_mode mode)
24252 {
24253 if ((FLOAT128_IEEE_P (mode)
24254 && ieee128_float_type_node == long_double_type_node)
24255 || (FLOAT128_IBM_P (mode)
24256 && ibm128_float_type_node == long_double_type_node))
24257 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
24258 return mode;
24259 }
24260
24261 /* Target hook for scalar_mode_supported_p. */
24262 static bool
24263 rs6000_scalar_mode_supported_p (scalar_mode mode)
24264 {
24265 /* -m32 does not support TImode. This is the default, from
24266 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
24267 same ABI as for -m32. But default_scalar_mode_supported_p allows
24268 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
24269 for -mpowerpc64. */
24270 if (TARGET_32BIT && mode == TImode)
24271 return false;
24272
24273 if (DECIMAL_FLOAT_MODE_P (mode))
24274 return default_decimal_float_supported_p ();
24275 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
24276 return true;
24277 else
24278 return default_scalar_mode_supported_p (mode);
24279 }
24280
24281 /* Target hook for libgcc_floating_mode_supported_p. */
24282
24283 static bool
24284 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode)
24285 {
24286 switch (mode)
24287 {
24288 case E_SFmode:
24289 case E_DFmode:
24290 case E_TFmode:
24291 return true;
24292
24293 /* We only return true for KFmode if IEEE 128-bit types are supported, and
24294 if long double does not use the IEEE 128-bit format. If long double
24295 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
24296 Because the code will not use KFmode in that case, there will be aborts
24297 because it can't find KFmode in the Floatn types. */
24298 case E_KFmode:
24299 return TARGET_FLOAT128_TYPE && !TARGET_IEEEQUAD;
24300
24301 default:
24302 return false;
24303 }
24304 }
24305
24306 /* Target hook for vector_mode_supported_p. */
24307 static bool
24308 rs6000_vector_mode_supported_p (machine_mode mode)
24309 {
24310 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
24311 128-bit, the compiler might try to widen IEEE 128-bit to IBM
24312 double-double. */
24313 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
24314 return true;
24315
24316 else
24317 return false;
24318 }
24319
24320 /* Target hook for floatn_mode. */
24321 static opt_scalar_float_mode
24322 rs6000_floatn_mode (int n, bool extended)
24323 {
24324 if (extended)
24325 {
24326 switch (n)
24327 {
24328 case 32:
24329 return DFmode;
24330
24331 case 64:
24332 if (TARGET_FLOAT128_TYPE)
24333 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24334 else
24335 return opt_scalar_float_mode ();
24336
24337 case 128:
24338 return opt_scalar_float_mode ();
24339
24340 default:
24341 /* Those are the only valid _FloatNx types. */
24342 gcc_unreachable ();
24343 }
24344 }
24345 else
24346 {
24347 switch (n)
24348 {
24349 case 32:
24350 return SFmode;
24351
24352 case 64:
24353 return DFmode;
24354
24355 case 128:
24356 if (TARGET_FLOAT128_TYPE)
24357 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24358 else
24359 return opt_scalar_float_mode ();
24360
24361 default:
24362 return opt_scalar_float_mode ();
24363 }
24364 }
24365
24366 }
24367
24368 /* Target hook for c_mode_for_suffix. */
24369 static machine_mode
24370 rs6000_c_mode_for_suffix (char suffix)
24371 {
24372 if (TARGET_FLOAT128_TYPE)
24373 {
24374 if (suffix == 'q' || suffix == 'Q')
24375 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24376
24377 /* At the moment, we are not defining a suffix for IBM extended double.
24378 If/when the default for -mabi=ieeelongdouble is changed, and we want
24379 to support __ibm128 constants in legacy library code, we may need to
24380 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
24381 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
24382 __float80 constants. */
24383 }
24384
24385 return VOIDmode;
24386 }
24387
24388 /* Target hook for invalid_arg_for_unprototyped_fn. */
24389 static const char *
24390 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
24391 {
24392 return (!rs6000_darwin64_abi
24393 && typelist == 0
24394 && VECTOR_TYPE_P (TREE_TYPE (val))
24395 && (funcdecl == NULL_TREE
24396 || (TREE_CODE (funcdecl) == FUNCTION_DECL
24397 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD
24398 && !fndecl_built_in_p (funcdecl, BUILT_IN_CLASSIFY_TYPE))))
24399 ? N_("AltiVec argument passed to unprototyped function")
24400 : NULL;
24401 }
24402
24403 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
24404 setup by using __stack_chk_fail_local hidden function instead of
24405 calling __stack_chk_fail directly. Otherwise it is better to call
24406 __stack_chk_fail directly. */
24407
24408 static tree ATTRIBUTE_UNUSED
24409 rs6000_stack_protect_fail (void)
24410 {
24411 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
24412 ? default_hidden_stack_protect_fail ()
24413 : default_external_stack_protect_fail ();
24414 }
24415
24416 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
24417
24418 #if TARGET_ELF
24419 static unsigned HOST_WIDE_INT
24420 rs6000_asan_shadow_offset (void)
24421 {
24422 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
24423 }
24424 #endif
24425 \f
24426 /* Mask options that we want to support inside of attribute((target)) and
24427 #pragma GCC target operations. Note, we do not include things like
24428 64/32-bit, endianness, hard/soft floating point, etc. that would have
24429 different calling sequences. */
24430
24431 struct rs6000_opt_mask {
24432 const char *name; /* option name */
24433 HOST_WIDE_INT mask; /* mask to set */
24434 bool invert; /* invert sense of mask */
24435 bool valid_target; /* option is a target option */
24436 };
24437
24438 static struct rs6000_opt_mask const rs6000_opt_masks[] =
24439 {
24440 { "altivec", OPTION_MASK_ALTIVEC, false, true },
24441 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
24442 false, true },
24443 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
24444 false, true },
24445 { "cmpb", OPTION_MASK_CMPB, false, true },
24446 { "crypto", OPTION_MASK_CRYPTO, false, true },
24447 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
24448 { "dlmzb", OPTION_MASK_DLMZB, false, true },
24449 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
24450 false, true },
24451 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
24452 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
24453 { "fprnd", OPTION_MASK_FPRND, false, true },
24454 { "power10", OPTION_MASK_POWER10, false, true },
24455 { "power11", OPTION_MASK_POWER11, false, false },
24456 { "hard-dfp", OPTION_MASK_DFP, false, true },
24457 { "htm", OPTION_MASK_HTM, false, true },
24458 { "isel", OPTION_MASK_ISEL, false, true },
24459 { "mfcrf", OPTION_MASK_MFCRF, false, true },
24460 { "mfpgpr", 0, false, true },
24461 { "mma", OPTION_MASK_MMA, false, true },
24462 { "modulo", OPTION_MASK_MODULO, false, true },
24463 { "mulhw", OPTION_MASK_MULHW, false, true },
24464 { "multiple", OPTION_MASK_MULTIPLE, false, true },
24465 { "pcrel", OPTION_MASK_PCREL, false, true },
24466 { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true },
24467 { "popcntb", OPTION_MASK_POPCNTB, false, true },
24468 { "popcntd", OPTION_MASK_POPCNTD, false, true },
24469 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
24470 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
24471 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
24472 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
24473 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
24474 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
24475 { "power10-fusion", OPTION_MASK_P10_FUSION, false, true },
24476 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
24477 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
24478 { "prefixed", OPTION_MASK_PREFIXED, false, true },
24479 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
24480 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
24481 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
24482 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
24483 { "string", 0, false, true },
24484 { "update", OPTION_MASK_NO_UPDATE, true , true },
24485 { "vsx", OPTION_MASK_VSX, false, true },
24486 #ifdef OPTION_MASK_64BIT
24487 #if TARGET_AIX_OS
24488 { "aix64", OPTION_MASK_64BIT, false, false },
24489 { "aix32", OPTION_MASK_64BIT, true, false },
24490 #else
24491 { "64", OPTION_MASK_64BIT, false, false },
24492 { "32", OPTION_MASK_64BIT, true, false },
24493 #endif
24494 #endif
24495 #ifdef OPTION_MASK_EABI
24496 { "eabi", OPTION_MASK_EABI, false, false },
24497 #endif
24498 #ifdef OPTION_MASK_LITTLE_ENDIAN
24499 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
24500 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
24501 #endif
24502 #ifdef OPTION_MASK_RELOCATABLE
24503 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
24504 #endif
24505 #ifdef OPTION_MASK_STRICT_ALIGN
24506 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
24507 #endif
24508 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
24509 { "string", 0, false, false },
24510 };
24511
24512 /* Option variables that we want to support inside attribute((target)) and
24513 #pragma GCC target operations. */
24514
24515 struct rs6000_opt_var {
24516 const char *name; /* option name */
24517 size_t global_offset; /* offset of the option in global_options. */
24518 size_t target_offset; /* offset of the option in target options. */
24519 };
24520
24521 static struct rs6000_opt_var const rs6000_opt_vars[] =
24522 {
24523 { "friz",
24524 offsetof (struct gcc_options, x_TARGET_FRIZ),
24525 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
24526 { "avoid-indexed-addresses",
24527 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
24528 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
24529 { "longcall",
24530 offsetof (struct gcc_options, x_rs6000_default_long_calls),
24531 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
24532 { "optimize-swaps",
24533 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
24534 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
24535 { "allow-movmisalign",
24536 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
24537 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
24538 { "sched-groups",
24539 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
24540 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
24541 { "always-hint",
24542 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
24543 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
24544 { "align-branch-targets",
24545 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
24546 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
24547 { "sched-prolog",
24548 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24549 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24550 { "sched-epilog",
24551 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24552 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24553 { "speculate-indirect-jumps",
24554 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
24555 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
24556 };
24557
24558 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24559 parsing. Return true if there were no errors. */
24560
24561 static bool
24562 rs6000_inner_target_options (tree args, bool attr_p)
24563 {
24564 bool ret = true;
24565
24566 if (args == NULL_TREE)
24567 ;
24568
24569 else if (TREE_CODE (args) == STRING_CST)
24570 {
24571 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24572 char *q;
24573
24574 while ((q = strtok (p, ",")) != NULL)
24575 {
24576 bool error_p = false;
24577 bool not_valid_p = false;
24578 const char *cpu_opt = NULL;
24579
24580 p = NULL;
24581 if (startswith (q, "cpu="))
24582 {
24583 int cpu_index = rs6000_cpu_name_lookup (q+4);
24584 if (cpu_index >= 0)
24585 rs6000_cpu_index = cpu_index;
24586 else
24587 {
24588 error_p = true;
24589 cpu_opt = q+4;
24590 }
24591 }
24592 else if (startswith (q, "tune="))
24593 {
24594 int tune_index = rs6000_cpu_name_lookup (q+5);
24595 if (tune_index >= 0)
24596 rs6000_tune_index = tune_index;
24597 else
24598 {
24599 error_p = true;
24600 cpu_opt = q+5;
24601 }
24602 }
24603 else
24604 {
24605 size_t i;
24606 bool invert = false;
24607 char *r = q;
24608
24609 error_p = true;
24610 if (startswith (r, "no-"))
24611 {
24612 invert = true;
24613 r += 3;
24614 }
24615
24616 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
24617 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
24618 {
24619 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
24620
24621 if (!rs6000_opt_masks[i].valid_target)
24622 not_valid_p = true;
24623 else
24624 {
24625 error_p = false;
24626 rs6000_isa_flags_explicit |= mask;
24627
24628 /* VSX needs altivec, so -mvsx automagically sets
24629 altivec and disables -mavoid-indexed-addresses. */
24630 if (!invert)
24631 {
24632 if (mask == OPTION_MASK_VSX)
24633 {
24634 mask |= OPTION_MASK_ALTIVEC;
24635 TARGET_AVOID_XFORM = 0;
24636 }
24637 }
24638
24639 if (rs6000_opt_masks[i].invert)
24640 invert = !invert;
24641
24642 if (invert)
24643 rs6000_isa_flags &= ~mask;
24644 else
24645 rs6000_isa_flags |= mask;
24646 }
24647 break;
24648 }
24649
24650 if (error_p && !not_valid_p)
24651 {
24652 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
24653 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
24654 {
24655 size_t j = rs6000_opt_vars[i].global_offset;
24656 *((int *) ((char *)&global_options + j)) = !invert;
24657 error_p = false;
24658 not_valid_p = false;
24659 break;
24660 }
24661 }
24662 }
24663
24664 if (error_p)
24665 {
24666 const char *eprefix, *esuffix;
24667
24668 ret = false;
24669 if (attr_p)
24670 {
24671 eprefix = "__attribute__((__target__(";
24672 esuffix = ")))";
24673 }
24674 else
24675 {
24676 eprefix = "#pragma GCC target ";
24677 esuffix = "";
24678 }
24679
24680 if (cpu_opt)
24681 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
24682 q, esuffix);
24683 else if (not_valid_p)
24684 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
24685 else
24686 error ("%s%qs%s is invalid", eprefix, q, esuffix);
24687 }
24688 }
24689 }
24690
24691 else if (TREE_CODE (args) == TREE_LIST)
24692 {
24693 do
24694 {
24695 tree value = TREE_VALUE (args);
24696 if (value)
24697 {
24698 bool ret2 = rs6000_inner_target_options (value, attr_p);
24699 if (!ret2)
24700 ret = false;
24701 }
24702 args = TREE_CHAIN (args);
24703 }
24704 while (args != NULL_TREE);
24705 }
24706
24707 else
24708 {
24709 error ("attribute %<target%> argument not a string");
24710 return false;
24711 }
24712
24713 return ret;
24714 }
24715
24716 /* Print out the target options as a list for -mdebug=target. */
24717
24718 static void
24719 rs6000_debug_target_options (tree args, const char *prefix)
24720 {
24721 if (args == NULL_TREE)
24722 fprintf (stderr, "%s<NULL>", prefix);
24723
24724 else if (TREE_CODE (args) == STRING_CST)
24725 {
24726 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24727 char *q;
24728
24729 while ((q = strtok (p, ",")) != NULL)
24730 {
24731 p = NULL;
24732 fprintf (stderr, "%s\"%s\"", prefix, q);
24733 prefix = ", ";
24734 }
24735 }
24736
24737 else if (TREE_CODE (args) == TREE_LIST)
24738 {
24739 do
24740 {
24741 tree value = TREE_VALUE (args);
24742 if (value)
24743 {
24744 rs6000_debug_target_options (value, prefix);
24745 prefix = ", ";
24746 }
24747 args = TREE_CHAIN (args);
24748 }
24749 while (args != NULL_TREE);
24750 }
24751
24752 else
24753 gcc_unreachable ();
24754
24755 return;
24756 }
24757
24758 \f
24759 /* Hook to validate attribute((target("..."))). */
24760
24761 static bool
24762 rs6000_valid_attribute_p (tree fndecl,
24763 tree ARG_UNUSED (name),
24764 tree args,
24765 int flags)
24766 {
24767 struct cl_target_option cur_target;
24768 bool ret;
24769 tree old_optimize;
24770 tree new_target, new_optimize;
24771 tree func_optimize;
24772
24773 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
24774
24775 if (TARGET_DEBUG_TARGET)
24776 {
24777 tree tname = DECL_NAME (fndecl);
24778 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
24779 if (tname)
24780 fprintf (stderr, "function: %.*s\n",
24781 (int) IDENTIFIER_LENGTH (tname),
24782 IDENTIFIER_POINTER (tname));
24783 else
24784 fprintf (stderr, "function: unknown\n");
24785
24786 fprintf (stderr, "args:");
24787 rs6000_debug_target_options (args, " ");
24788 fprintf (stderr, "\n");
24789
24790 if (flags)
24791 fprintf (stderr, "flags: 0x%x\n", flags);
24792
24793 fprintf (stderr, "--------------------\n");
24794 }
24795
24796 /* attribute((target("default"))) does nothing, beyond
24797 affecting multi-versioning. */
24798 if (TREE_VALUE (args)
24799 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
24800 && TREE_CHAIN (args) == NULL_TREE
24801 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
24802 return true;
24803
24804 old_optimize = build_optimization_node (&global_options,
24805 &global_options_set);
24806 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
24807
24808 /* If the function changed the optimization levels as well as setting target
24809 options, start with the optimizations specified. */
24810 if (func_optimize && func_optimize != old_optimize)
24811 cl_optimization_restore (&global_options, &global_options_set,
24812 TREE_OPTIMIZATION (func_optimize));
24813
24814 /* The target attributes may also change some optimization flags, so update
24815 the optimization options if necessary. */
24816 cl_target_option_save (&cur_target, &global_options, &global_options_set);
24817 rs6000_cpu_index = rs6000_tune_index = -1;
24818 ret = rs6000_inner_target_options (args, true);
24819
24820 /* Set up any additional state. */
24821 if (ret)
24822 {
24823 ret = rs6000_option_override_internal (false);
24824 new_target = build_target_option_node (&global_options,
24825 &global_options_set);
24826 }
24827 else
24828 new_target = NULL;
24829
24830 new_optimize = build_optimization_node (&global_options,
24831 &global_options_set);
24832
24833 if (!new_target)
24834 ret = false;
24835
24836 else if (fndecl)
24837 {
24838 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24839
24840 if (old_optimize != new_optimize)
24841 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24842 }
24843
24844 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24845
24846 if (old_optimize != new_optimize)
24847 cl_optimization_restore (&global_options, &global_options_set,
24848 TREE_OPTIMIZATION (old_optimize));
24849
24850 return ret;
24851 }
24852
24853 \f
24854 /* Hook to validate the current #pragma GCC target and set the state, and
24855 update the macros based on what was changed. If ARGS is NULL, then
24856 POP_TARGET is used to reset the options. */
24857
24858 bool
24859 rs6000_pragma_target_parse (tree args, tree pop_target)
24860 {
24861 tree prev_tree = build_target_option_node (&global_options,
24862 &global_options_set);
24863 tree cur_tree;
24864 struct cl_target_option *prev_opt, *cur_opt;
24865 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24866
24867 if (TARGET_DEBUG_TARGET)
24868 {
24869 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24870 fprintf (stderr, "args:");
24871 rs6000_debug_target_options (args, " ");
24872 fprintf (stderr, "\n");
24873
24874 if (pop_target)
24875 {
24876 fprintf (stderr, "pop_target:\n");
24877 debug_tree (pop_target);
24878 }
24879 else
24880 fprintf (stderr, "pop_target: <NULL>\n");
24881
24882 fprintf (stderr, "--------------------\n");
24883 }
24884
24885 if (! args)
24886 {
24887 cur_tree = ((pop_target)
24888 ? pop_target
24889 : target_option_default_node);
24890 cl_target_option_restore (&global_options, &global_options_set,
24891 TREE_TARGET_OPTION (cur_tree));
24892 }
24893 else
24894 {
24895 rs6000_cpu_index = rs6000_tune_index = -1;
24896 if (!rs6000_inner_target_options (args, false)
24897 || !rs6000_option_override_internal (false)
24898 || (cur_tree = build_target_option_node (&global_options,
24899 &global_options_set))
24900 == NULL_TREE)
24901 {
24902 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24903 fprintf (stderr, "invalid pragma\n");
24904
24905 return false;
24906 }
24907 }
24908
24909 target_option_current_node = cur_tree;
24910 rs6000_activate_target_options (target_option_current_node);
24911
24912 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24913 change the macros that are defined. */
24914 if (rs6000_target_modify_macros_ptr)
24915 {
24916 prev_opt = TREE_TARGET_OPTION (prev_tree);
24917 prev_flags = prev_opt->x_rs6000_isa_flags;
24918
24919 cur_opt = TREE_TARGET_OPTION (cur_tree);
24920 cur_flags = cur_opt->x_rs6000_isa_flags;
24921
24922 diff_flags = (prev_flags ^ cur_flags);
24923
24924 if (diff_flags != 0)
24925 {
24926 /* Delete old macros. */
24927 rs6000_target_modify_macros_ptr (false,
24928 prev_flags & diff_flags);
24929
24930 /* Define new macros. */
24931 rs6000_target_modify_macros_ptr (true,
24932 cur_flags & diff_flags);
24933 }
24934 }
24935
24936 return true;
24937 }
24938
24939 \f
24940 /* Remember the last target of rs6000_set_current_function. */
24941 static GTY(()) tree rs6000_previous_fndecl;
24942
24943 /* Restore target's globals from NEW_TREE and invalidate the
24944 rs6000_previous_fndecl cache. */
24945
24946 void
24947 rs6000_activate_target_options (tree new_tree)
24948 {
24949 cl_target_option_restore (&global_options, &global_options_set,
24950 TREE_TARGET_OPTION (new_tree));
24951 if (TREE_TARGET_GLOBALS (new_tree))
24952 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24953 else if (new_tree == target_option_default_node)
24954 restore_target_globals (&default_target_globals);
24955 else
24956 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
24957 rs6000_previous_fndecl = NULL_TREE;
24958 }
24959
24960 /* Establish appropriate back-end context for processing the function
24961 FNDECL. The argument might be NULL to indicate processing at top
24962 level, outside of any function scope. */
24963 static void
24964 rs6000_set_current_function (tree fndecl)
24965 {
24966 if (TARGET_DEBUG_TARGET)
24967 {
24968 fprintf (stderr, "\n==================== rs6000_set_current_function");
24969
24970 if (fndecl)
24971 fprintf (stderr, ", fndecl %s (%p)",
24972 (DECL_NAME (fndecl)
24973 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
24974 : "<unknown>"), (void *)fndecl);
24975
24976 if (rs6000_previous_fndecl)
24977 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
24978
24979 fprintf (stderr, "\n");
24980 }
24981
24982 /* Only change the context if the function changes. This hook is called
24983 several times in the course of compiling a function, and we don't want to
24984 slow things down too much or call target_reinit when it isn't safe. */
24985 if (fndecl == rs6000_previous_fndecl)
24986 return;
24987
24988 tree old_tree;
24989 if (rs6000_previous_fndecl == NULL_TREE)
24990 old_tree = target_option_current_node;
24991 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
24992 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
24993 else
24994 old_tree = target_option_default_node;
24995
24996 tree new_tree;
24997 if (fndecl == NULL_TREE)
24998 {
24999 if (old_tree != target_option_current_node)
25000 new_tree = target_option_current_node;
25001 else
25002 new_tree = NULL_TREE;
25003 }
25004 else
25005 {
25006 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
25007 if (new_tree == NULL_TREE)
25008 new_tree = target_option_default_node;
25009 }
25010
25011 if (TARGET_DEBUG_TARGET)
25012 {
25013 if (new_tree)
25014 {
25015 fprintf (stderr, "\nnew fndecl target specific options:\n");
25016 debug_tree (new_tree);
25017 }
25018
25019 if (old_tree)
25020 {
25021 fprintf (stderr, "\nold fndecl target specific options:\n");
25022 debug_tree (old_tree);
25023 }
25024
25025 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
25026 fprintf (stderr, "--------------------\n");
25027 }
25028
25029 if (new_tree && old_tree != new_tree)
25030 rs6000_activate_target_options (new_tree);
25031
25032 if (fndecl)
25033 rs6000_previous_fndecl = fndecl;
25034 }
25035
25036 \f
25037 /* Save the current options */
25038
25039 static void
25040 rs6000_function_specific_save (struct cl_target_option *ptr,
25041 struct gcc_options *opts,
25042 struct gcc_options */* opts_set */)
25043 {
25044 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
25045 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
25046 }
25047
25048 /* Restore the current options */
25049
25050 static void
25051 rs6000_function_specific_restore (struct gcc_options *opts,
25052 struct gcc_options */* opts_set */,
25053 struct cl_target_option *ptr)
25054
25055 {
25056 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
25057 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
25058 (void) rs6000_option_override_internal (false);
25059 }
25060
25061 /* Print the current options */
25062
25063 static void
25064 rs6000_function_specific_print (FILE *file, int indent,
25065 struct cl_target_option *ptr)
25066 {
25067 rs6000_print_isa_options (file, indent, "Isa options set",
25068 ptr->x_rs6000_isa_flags);
25069
25070 rs6000_print_isa_options (file, indent, "Isa options explicit",
25071 ptr->x_rs6000_isa_flags_explicit);
25072 }
25073
25074 /* Helper function to print the current isa or misc options on a line. */
25075
25076 static void
25077 rs6000_print_options_internal (FILE *file,
25078 int indent,
25079 const char *string,
25080 HOST_WIDE_INT flags,
25081 const char *prefix,
25082 const struct rs6000_opt_mask *opts,
25083 size_t num_elements)
25084 {
25085 size_t i;
25086 size_t start_column = 0;
25087 size_t cur_column;
25088 size_t max_column = 120;
25089 size_t prefix_len = strlen (prefix);
25090 size_t comma_len = 0;
25091 const char *comma = "";
25092
25093 if (indent)
25094 start_column += fprintf (file, "%*s", indent, "");
25095
25096 if (!flags)
25097 {
25098 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
25099 return;
25100 }
25101
25102 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
25103
25104 /* Print the various mask options. */
25105 cur_column = start_column;
25106 for (i = 0; i < num_elements; i++)
25107 {
25108 bool invert = opts[i].invert;
25109 const char *name = opts[i].name;
25110 const char *no_str = "";
25111 HOST_WIDE_INT mask = opts[i].mask;
25112 size_t len = comma_len + prefix_len + strlen (name);
25113
25114 if (!invert)
25115 {
25116 if ((flags & mask) == 0)
25117 {
25118 no_str = "no-";
25119 len += strlen ("no-");
25120 }
25121
25122 flags &= ~mask;
25123 }
25124
25125 else
25126 {
25127 if ((flags & mask) != 0)
25128 {
25129 no_str = "no-";
25130 len += strlen ("no-");
25131 }
25132
25133 flags |= mask;
25134 }
25135
25136 cur_column += len;
25137 if (cur_column > max_column)
25138 {
25139 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
25140 cur_column = start_column + len;
25141 comma = "";
25142 }
25143
25144 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
25145 comma = ", ";
25146 comma_len = strlen (", ");
25147 }
25148
25149 fputs ("\n", file);
25150 }
25151
25152 /* Helper function to print the current isa options on a line. */
25153
25154 static void
25155 rs6000_print_isa_options (FILE *file, int indent, const char *string,
25156 HOST_WIDE_INT flags)
25157 {
25158 rs6000_print_options_internal (file, indent, string, flags, "-m",
25159 &rs6000_opt_masks[0],
25160 ARRAY_SIZE (rs6000_opt_masks));
25161 }
25162
25163 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
25164 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
25165 -mupper-regs-df, etc.).
25166
25167 This function does not handle explicit options such as the user specifying
25168 -mdirect-move. These are handled in rs6000_option_override_internal, and
25169 the appropriate error is given if needed.
25170
25171 We return a mask of all of the implicit options that should not be enabled
25172 by default. */
25173
25174 static HOST_WIDE_INT
25175 rs6000_disable_incompatible_switches (void)
25176 {
25177 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
25178 size_t i, j;
25179
25180 static const struct {
25181 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
25182 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
25183 const char *const name; /* name of the switch. */
25184 } flags[] = {
25185 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
25186 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
25187 };
25188
25189 for (i = 0; i < ARRAY_SIZE (flags); i++)
25190 {
25191 HOST_WIDE_INT no_flag = flags[i].no_flag;
25192
25193 if ((rs6000_isa_flags & no_flag) == 0
25194 && (rs6000_isa_flags_explicit & no_flag) != 0)
25195 {
25196 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
25197 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
25198 & rs6000_isa_flags
25199 & dep_flags);
25200
25201 if (set_flags)
25202 {
25203 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
25204 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
25205 {
25206 set_flags &= ~rs6000_opt_masks[j].mask;
25207 error ("%<-mno-%s%> turns off %<-m%s%>",
25208 flags[i].name,
25209 rs6000_opt_masks[j].name);
25210 }
25211
25212 gcc_assert (!set_flags);
25213 }
25214
25215 rs6000_isa_flags &= ~dep_flags;
25216 ignore_masks |= no_flag | dep_flags;
25217 }
25218 }
25219
25220 return ignore_masks;
25221 }
25222
25223 \f
25224 /* Helper function for printing the function name when debugging. */
25225
25226 static const char *
25227 get_decl_name (tree fn)
25228 {
25229 tree name;
25230
25231 if (!fn)
25232 return "<null>";
25233
25234 name = DECL_NAME (fn);
25235 if (!name)
25236 return "<no-name>";
25237
25238 return IDENTIFIER_POINTER (name);
25239 }
25240
25241 /* Return the clone id of the target we are compiling code for in a target
25242 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
25243 the priority list for the target clones (ordered from lowest to
25244 highest). */
25245
25246 static int
25247 rs6000_clone_priority (tree fndecl)
25248 {
25249 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
25250 HOST_WIDE_INT isa_masks;
25251 int ret = CLONE_DEFAULT;
25252 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
25253 const char *attrs_str = NULL;
25254
25255 attrs = TREE_VALUE (TREE_VALUE (attrs));
25256 attrs_str = TREE_STRING_POINTER (attrs);
25257
25258 /* Return priority zero for default function. Return the ISA needed for the
25259 function if it is not the default. */
25260 if (strcmp (attrs_str, "default") != 0)
25261 {
25262 if (fn_opts == NULL_TREE)
25263 fn_opts = target_option_default_node;
25264
25265 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
25266 isa_masks = rs6000_isa_flags;
25267 else
25268 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
25269
25270 for (ret = CLONE_MAX - 1; ret != 0; ret--)
25271 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
25272 break;
25273 }
25274
25275 if (TARGET_DEBUG_TARGET)
25276 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
25277 get_decl_name (fndecl), ret);
25278
25279 return ret;
25280 }
25281
25282 /* This compares the priority of target features in function DECL1 and DECL2.
25283 It returns positive value if DECL1 is higher priority, negative value if
25284 DECL2 is higher priority and 0 if they are the same. Note, priorities are
25285 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
25286
25287 static int
25288 rs6000_compare_version_priority (tree decl1, tree decl2)
25289 {
25290 int priority1 = rs6000_clone_priority (decl1);
25291 int priority2 = rs6000_clone_priority (decl2);
25292 int ret = priority1 - priority2;
25293
25294 if (TARGET_DEBUG_TARGET)
25295 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
25296 get_decl_name (decl1), get_decl_name (decl2), ret);
25297
25298 return ret;
25299 }
25300
25301 /* Make a dispatcher declaration for the multi-versioned function DECL.
25302 Calls to DECL function will be replaced with calls to the dispatcher
25303 by the front-end. Returns the decl of the dispatcher function. */
25304
25305 static tree
25306 rs6000_get_function_versions_dispatcher (void *decl)
25307 {
25308 tree fn = (tree) decl;
25309 struct cgraph_node *node = NULL;
25310 struct cgraph_node *default_node = NULL;
25311 struct cgraph_function_version_info *node_v = NULL;
25312 struct cgraph_function_version_info *first_v = NULL;
25313
25314 tree dispatch_decl = NULL;
25315
25316 struct cgraph_function_version_info *default_version_info = NULL;
25317 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
25318
25319 if (TARGET_DEBUG_TARGET)
25320 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
25321 get_decl_name (fn));
25322
25323 node = cgraph_node::get (fn);
25324 gcc_assert (node != NULL);
25325
25326 node_v = node->function_version ();
25327 gcc_assert (node_v != NULL);
25328
25329 if (node_v->dispatcher_resolver != NULL)
25330 return node_v->dispatcher_resolver;
25331
25332 /* Find the default version and make it the first node. */
25333 first_v = node_v;
25334 /* Go to the beginning of the chain. */
25335 while (first_v->prev != NULL)
25336 first_v = first_v->prev;
25337
25338 default_version_info = first_v;
25339 while (default_version_info != NULL)
25340 {
25341 const tree decl2 = default_version_info->this_node->decl;
25342 if (is_function_default_version (decl2))
25343 break;
25344 default_version_info = default_version_info->next;
25345 }
25346
25347 /* If there is no default node, just return NULL. */
25348 if (default_version_info == NULL)
25349 return NULL;
25350
25351 /* Make default info the first node. */
25352 if (first_v != default_version_info)
25353 {
25354 default_version_info->prev->next = default_version_info->next;
25355 if (default_version_info->next)
25356 default_version_info->next->prev = default_version_info->prev;
25357 first_v->prev = default_version_info;
25358 default_version_info->next = first_v;
25359 default_version_info->prev = NULL;
25360 }
25361
25362 default_node = default_version_info->this_node;
25363
25364 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
25365 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25366 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
25367 "exports hardware capability bits");
25368 #else
25369
25370 if (targetm.has_ifunc_p ())
25371 {
25372 struct cgraph_function_version_info *it_v = NULL;
25373 struct cgraph_node *dispatcher_node = NULL;
25374 struct cgraph_function_version_info *dispatcher_version_info = NULL;
25375
25376 /* Right now, the dispatching is done via ifunc. */
25377 dispatch_decl = make_dispatcher_decl (default_node->decl);
25378 TREE_NOTHROW (dispatch_decl) = TREE_NOTHROW (fn);
25379
25380 dispatcher_node = cgraph_node::get_create (dispatch_decl);
25381 gcc_assert (dispatcher_node != NULL);
25382 dispatcher_node->dispatcher_function = 1;
25383 dispatcher_version_info
25384 = dispatcher_node->insert_new_function_version ();
25385 dispatcher_version_info->next = default_version_info;
25386 dispatcher_node->definition = 1;
25387
25388 /* Set the dispatcher for all the versions. */
25389 it_v = default_version_info;
25390 while (it_v != NULL)
25391 {
25392 it_v->dispatcher_resolver = dispatch_decl;
25393 it_v = it_v->next;
25394 }
25395 }
25396 else
25397 {
25398 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25399 "multiversioning needs %<ifunc%> which is not supported "
25400 "on this target");
25401 }
25402 #endif
25403
25404 return dispatch_decl;
25405 }
25406
25407 /* Make the resolver function decl to dispatch the versions of a multi-
25408 versioned function, DEFAULT_DECL. Create an empty basic block in the
25409 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25410 function. */
25411
25412 static tree
25413 make_resolver_func (const tree default_decl,
25414 const tree dispatch_decl,
25415 basic_block *empty_bb)
25416 {
25417 /* Make the resolver function static. The resolver function returns
25418 void *. */
25419 tree decl_name = clone_function_name (default_decl, "resolver");
25420 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
25421 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
25422 tree decl = build_fn_decl (resolver_name, type);
25423 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
25424
25425 DECL_NAME (decl) = decl_name;
25426 TREE_USED (decl) = 1;
25427 DECL_ARTIFICIAL (decl) = 1;
25428 DECL_IGNORED_P (decl) = 0;
25429 TREE_PUBLIC (decl) = 0;
25430 DECL_UNINLINABLE (decl) = 1;
25431
25432 /* Resolver is not external, body is generated. */
25433 DECL_EXTERNAL (decl) = 0;
25434 DECL_EXTERNAL (dispatch_decl) = 0;
25435
25436 DECL_CONTEXT (decl) = NULL_TREE;
25437 DECL_INITIAL (decl) = make_node (BLOCK);
25438 DECL_STATIC_CONSTRUCTOR (decl) = 0;
25439
25440 if (DECL_COMDAT_GROUP (default_decl)
25441 || TREE_PUBLIC (default_decl))
25442 {
25443 /* In this case, each translation unit with a call to this
25444 versioned function will put out a resolver. Ensure it
25445 is comdat to keep just one copy. */
25446 DECL_COMDAT (decl) = 1;
25447 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
25448 }
25449 else
25450 TREE_PUBLIC (dispatch_decl) = 0;
25451
25452 /* Build result decl and add to function_decl. */
25453 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
25454 DECL_CONTEXT (t) = decl;
25455 DECL_ARTIFICIAL (t) = 1;
25456 DECL_IGNORED_P (t) = 1;
25457 DECL_RESULT (decl) = t;
25458
25459 gimplify_function_tree (decl);
25460 push_cfun (DECL_STRUCT_FUNCTION (decl));
25461 *empty_bb = init_lowered_empty_function (decl, false,
25462 profile_count::uninitialized ());
25463
25464 cgraph_node::add_new_function (decl, true);
25465 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
25466
25467 pop_cfun ();
25468
25469 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25470 DECL_ATTRIBUTES (dispatch_decl)
25471 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
25472
25473 cgraph_node::create_same_body_alias (dispatch_decl, decl);
25474
25475 return decl;
25476 }
25477
25478 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25479 return a pointer to VERSION_DECL if we are running on a machine that
25480 supports the index CLONE_ISA hardware architecture bits. This function will
25481 be called during version dispatch to decide which function version to
25482 execute. It returns the basic block at the end, to which more conditions
25483 can be added. */
25484
25485 static basic_block
25486 add_condition_to_bb (tree function_decl, tree version_decl,
25487 int clone_isa, basic_block new_bb)
25488 {
25489 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
25490
25491 gcc_assert (new_bb != NULL);
25492 gimple_seq gseq = bb_seq (new_bb);
25493
25494
25495 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
25496 build_fold_addr_expr (version_decl));
25497 tree result_var = create_tmp_var (ptr_type_node);
25498 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
25499 gimple *return_stmt = gimple_build_return (result_var);
25500
25501 if (clone_isa == CLONE_DEFAULT)
25502 {
25503 gimple_seq_add_stmt (&gseq, convert_stmt);
25504 gimple_seq_add_stmt (&gseq, return_stmt);
25505 set_bb_seq (new_bb, gseq);
25506 gimple_set_bb (convert_stmt, new_bb);
25507 gimple_set_bb (return_stmt, new_bb);
25508 pop_cfun ();
25509 return new_bb;
25510 }
25511
25512 tree bool_zero = build_int_cst (bool_int_type_node, 0);
25513 tree cond_var = create_tmp_var (bool_int_type_node);
25514 tree predicate_decl = rs6000_builtin_decls[(int) RS6000_BIF_CPU_SUPPORTS];
25515 const char *arg_str = rs6000_clone_map[clone_isa].name;
25516 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
25517 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
25518 gimple_call_set_lhs (call_cond_stmt, cond_var);
25519
25520 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
25521 gimple_set_bb (call_cond_stmt, new_bb);
25522 gimple_seq_add_stmt (&gseq, call_cond_stmt);
25523
25524 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
25525 NULL_TREE, NULL_TREE);
25526 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
25527 gimple_set_bb (if_else_stmt, new_bb);
25528 gimple_seq_add_stmt (&gseq, if_else_stmt);
25529
25530 gimple_seq_add_stmt (&gseq, convert_stmt);
25531 gimple_seq_add_stmt (&gseq, return_stmt);
25532 set_bb_seq (new_bb, gseq);
25533
25534 basic_block bb1 = new_bb;
25535 edge e12 = split_block (bb1, if_else_stmt);
25536 basic_block bb2 = e12->dest;
25537 e12->flags &= ~EDGE_FALLTHRU;
25538 e12->flags |= EDGE_TRUE_VALUE;
25539
25540 edge e23 = split_block (bb2, return_stmt);
25541 gimple_set_bb (convert_stmt, bb2);
25542 gimple_set_bb (return_stmt, bb2);
25543
25544 basic_block bb3 = e23->dest;
25545 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
25546
25547 remove_edge (e23);
25548 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
25549
25550 pop_cfun ();
25551 return bb3;
25552 }
25553
25554 /* This function generates the dispatch function for multi-versioned functions.
25555 DISPATCH_DECL is the function which will contain the dispatch logic.
25556 FNDECLS are the function choices for dispatch, and is a tree chain.
25557 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25558 code is generated. */
25559
25560 static int
25561 dispatch_function_versions (tree dispatch_decl,
25562 void *fndecls_p,
25563 basic_block *empty_bb)
25564 {
25565 int ix;
25566 tree ele;
25567 vec<tree> *fndecls;
25568 tree clones[CLONE_MAX];
25569
25570 if (TARGET_DEBUG_TARGET)
25571 fputs ("dispatch_function_versions, top\n", stderr);
25572
25573 gcc_assert (dispatch_decl != NULL
25574 && fndecls_p != NULL
25575 && empty_bb != NULL);
25576
25577 /* fndecls_p is actually a vector. */
25578 fndecls = static_cast<vec<tree> *> (fndecls_p);
25579
25580 /* At least one more version other than the default. */
25581 gcc_assert (fndecls->length () >= 2);
25582
25583 /* The first version in the vector is the default decl. */
25584 memset ((void *) clones, '\0', sizeof (clones));
25585 clones[CLONE_DEFAULT] = (*fndecls)[0];
25586
25587 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25588 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25589 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25590 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25591 to insert the code here to do the call. */
25592
25593 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
25594 {
25595 int priority = rs6000_clone_priority (ele);
25596 if (!clones[priority])
25597 clones[priority] = ele;
25598 }
25599
25600 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
25601 if (clones[ix])
25602 {
25603 if (TARGET_DEBUG_TARGET)
25604 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
25605 ix, get_decl_name (clones[ix]));
25606
25607 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
25608 *empty_bb);
25609 }
25610
25611 return 0;
25612 }
25613
25614 /* Generate the dispatching code body to dispatch multi-versioned function
25615 DECL. The target hook is called to process the "target" attributes and
25616 provide the code to dispatch the right function at run-time. NODE points
25617 to the dispatcher decl whose body will be created. */
25618
25619 static tree
25620 rs6000_generate_version_dispatcher_body (void *node_p)
25621 {
25622 tree resolver;
25623 basic_block empty_bb;
25624 struct cgraph_node *node = (cgraph_node *) node_p;
25625 struct cgraph_function_version_info *ninfo = node->function_version ();
25626
25627 if (ninfo->dispatcher_resolver)
25628 return ninfo->dispatcher_resolver;
25629
25630 /* node is going to be an alias, so remove the finalized bit. */
25631 node->definition = false;
25632
25633 /* The first version in the chain corresponds to the default version. */
25634 ninfo->dispatcher_resolver = resolver
25635 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
25636
25637 if (TARGET_DEBUG_TARGET)
25638 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
25639 get_decl_name (resolver));
25640
25641 push_cfun (DECL_STRUCT_FUNCTION (resolver));
25642 auto_vec<tree, 2> fn_ver_vec;
25643
25644 for (struct cgraph_function_version_info *vinfo = ninfo->next;
25645 vinfo;
25646 vinfo = vinfo->next)
25647 {
25648 struct cgraph_node *version = vinfo->this_node;
25649 /* Check for virtual functions here again, as by this time it should
25650 have been determined if this function needs a vtable index or
25651 not. This happens for methods in derived classes that override
25652 virtual methods in base classes but are not explicitly marked as
25653 virtual. */
25654 if (DECL_VINDEX (version->decl))
25655 sorry ("Virtual function multiversioning not supported");
25656
25657 fn_ver_vec.safe_push (version->decl);
25658 }
25659
25660 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
25661 cgraph_edge::rebuild_edges ();
25662 pop_cfun ();
25663 return resolver;
25664 }
25665
25666 /* Hook to decide if we need to scan function gimple statements to
25667 collect target specific information for inlining, and update the
25668 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25669 to predict which ISA feature is used at this time. Return true
25670 if we need to scan, otherwise return false. */
25671
25672 static bool
25673 rs6000_need_ipa_fn_target_info (const_tree decl,
25674 unsigned int &info ATTRIBUTE_UNUSED)
25675 {
25676 tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl);
25677 if (!target)
25678 target = target_option_default_node;
25679 struct cl_target_option *opts = TREE_TARGET_OPTION (target);
25680
25681 /* See PR102059, we only handle HTM for now, so will only do
25682 the consequent scannings when HTM feature enabled. */
25683 if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM)
25684 return true;
25685
25686 return false;
25687 }
25688
25689 /* Hook to update target specific information INFO for inlining by
25690 checking the given STMT. Return false if we don't need to scan
25691 any more, otherwise return true. */
25692
25693 static bool
25694 rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
25695 {
25696 #ifndef HAVE_AS_POWER10_HTM
25697 /* Assume inline asm can use any instruction features. */
25698 if (gimple_code (stmt) == GIMPLE_ASM)
25699 {
25700 const char *asm_str = gimple_asm_string (as_a<const gasm *> (stmt));
25701 /* Ignore empty inline asm string. */
25702 if (strlen (asm_str) > 0)
25703 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25704 the only bit we care about. */
25705 info |= RS6000_FN_TARGET_INFO_HTM;
25706 return false;
25707 }
25708 #endif
25709
25710 if (gimple_code (stmt) == GIMPLE_CALL)
25711 {
25712 tree fndecl = gimple_call_fndecl (stmt);
25713 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD))
25714 {
25715 enum rs6000_gen_builtins fcode
25716 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
25717 /* HTM bifs definitely exploit HTM insns. */
25718 if (bif_is_htm (rs6000_builtin_info[fcode]))
25719 {
25720 info |= RS6000_FN_TARGET_INFO_HTM;
25721 return false;
25722 }
25723 }
25724 }
25725
25726 return true;
25727 }
25728
25729 /* Hook to determine if one function can safely inline another. */
25730
25731 static bool
25732 rs6000_can_inline_p (tree caller, tree callee)
25733 {
25734 bool ret = false;
25735 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
25736 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
25737
25738 /* If the caller/callee has option attributes, then use them.
25739 Otherwise, use the command line options. */
25740 if (!callee_tree)
25741 callee_tree = target_option_default_node;
25742 if (!caller_tree)
25743 caller_tree = target_option_default_node;
25744
25745 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25746 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
25747
25748 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
25749 HOST_WIDE_INT caller_isa = caller_opts->x_rs6000_isa_flags;
25750 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
25751
25752 cgraph_node *callee_node = cgraph_node::get (callee);
25753 if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL)
25754 {
25755 unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
25756 if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
25757 {
25758 callee_isa &= ~OPTION_MASK_HTM;
25759 explicit_isa &= ~OPTION_MASK_HTM;
25760 }
25761 }
25762
25763 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25764 purposes. */
25765 callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25766 explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25767
25768 /* The callee's options must be a subset of the caller's options, i.e.
25769 a vsx function may inline an altivec function, but a no-vsx function
25770 must not inline a vsx function. However, for those options that the
25771 callee has explicitly enabled or disabled, then we must enforce that
25772 the callee's and caller's options match exactly; see PR70010. */
25773 if (((caller_isa & callee_isa) == callee_isa)
25774 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
25775 ret = true;
25776
25777 if (TARGET_DEBUG_TARGET)
25778 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25779 get_decl_name (caller), get_decl_name (callee),
25780 (ret ? "can" : "cannot"));
25781
25782 return ret;
25783 }
25784 \f
25785 /* Allocate a stack temp and fixup the address so it meets the particular
25786 memory requirements (either offetable or REG+REG addressing). */
25787
25788 rtx
25789 rs6000_allocate_stack_temp (machine_mode mode,
25790 bool offsettable_p,
25791 bool reg_reg_p)
25792 {
25793 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
25794 rtx addr = XEXP (stack, 0);
25795 int strict_p = reload_completed;
25796
25797 if (!legitimate_indirect_address_p (addr, strict_p))
25798 {
25799 if (offsettable_p
25800 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
25801 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25802
25803 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
25804 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25805 }
25806
25807 return stack;
25808 }
25809
25810 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25811 convert to such a form to deal with memory reference instructions
25812 like STFIWX and LDBRX that only take reg+reg addressing. */
25813
25814 rtx
25815 rs6000_force_indexed_or_indirect_mem (rtx x)
25816 {
25817 machine_mode mode = GET_MODE (x);
25818
25819 gcc_assert (MEM_P (x));
25820 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
25821 {
25822 rtx addr = XEXP (x, 0);
25823 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
25824 {
25825 rtx reg = XEXP (addr, 0);
25826 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
25827 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
25828 gcc_assert (REG_P (reg));
25829 emit_insn (gen_add3_insn (reg, reg, size_rtx));
25830 addr = reg;
25831 }
25832 else if (GET_CODE (addr) == PRE_MODIFY)
25833 {
25834 rtx reg = XEXP (addr, 0);
25835 rtx expr = XEXP (addr, 1);
25836 gcc_assert (REG_P (reg));
25837 gcc_assert (GET_CODE (expr) == PLUS);
25838 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
25839 addr = reg;
25840 }
25841
25842 if (GET_CODE (addr) == PLUS)
25843 {
25844 rtx op0 = XEXP (addr, 0);
25845 rtx op1 = XEXP (addr, 1);
25846 op0 = force_reg (Pmode, op0);
25847 op1 = force_reg (Pmode, op1);
25848 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
25849 }
25850 else
25851 x = replace_equiv_address (x, force_reg (Pmode, addr));
25852 }
25853
25854 return x;
25855 }
25856
25857 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25858
25859 On the RS/6000, all integer constants are acceptable, most won't be valid
25860 for particular insns, though. Only easy FP constants are acceptable. */
25861
25862 static bool
25863 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
25864 {
25865 if (TARGET_ELF && tls_referenced_p (x))
25866 return false;
25867
25868 if (CONST_DOUBLE_P (x))
25869 return easy_fp_constant (x, mode);
25870
25871 if (GET_CODE (x) == CONST_VECTOR)
25872 return easy_vector_constant (x, mode);
25873
25874 return true;
25875 }
25876
25877 #if TARGET_AIX_OS
25878 /* Implement TARGET_PRECOMPUTE_TLS_P.
25879
25880 On the AIX, TLS symbols are in the TOC, which is maintained in the
25881 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25882 must be considered legitimate constants. */
25883
25884 static bool
25885 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
25886 {
25887 return tls_referenced_p (x);
25888 }
25889 #endif
25890
25891 \f
25892 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25893
25894 static bool
25895 chain_already_loaded (rtx_insn *last)
25896 {
25897 for (; last != NULL; last = PREV_INSN (last))
25898 {
25899 if (NONJUMP_INSN_P (last))
25900 {
25901 rtx patt = PATTERN (last);
25902
25903 if (GET_CODE (patt) == SET)
25904 {
25905 rtx lhs = XEXP (patt, 0);
25906
25907 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25908 return true;
25909 }
25910 }
25911 }
25912 return false;
25913 }
25914
25915 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25916
25917 void
25918 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25919 {
25920 rtx func = func_desc;
25921 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25922 rtx toc_load = NULL_RTX;
25923 rtx toc_restore = NULL_RTX;
25924 rtx func_addr;
25925 rtx abi_reg = NULL_RTX;
25926 rtx call[5];
25927 int n_call;
25928 rtx insn;
25929 bool is_pltseq_longcall;
25930
25931 if (global_tlsarg)
25932 tlsarg = global_tlsarg;
25933
25934 /* Handle longcall attributes. */
25935 is_pltseq_longcall = false;
25936 if ((INTVAL (cookie) & CALL_LONG) != 0
25937 && GET_CODE (func_desc) == SYMBOL_REF)
25938 {
25939 func = rs6000_longcall_ref (func_desc, tlsarg);
25940 if (TARGET_PLTSEQ)
25941 is_pltseq_longcall = true;
25942 }
25943
25944 /* Handle indirect calls. */
25945 if (!SYMBOL_REF_P (func)
25946 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25947 {
25948 if (!rs6000_pcrel_p ())
25949 {
25950 /* Save the TOC into its reserved slot before the call,
25951 and prepare to restore it after the call. */
25952 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25953 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25954 gen_rtvec (1, stack_toc_offset),
25955 UNSPEC_TOCSLOT);
25956 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
25957
25958 /* Can we optimize saving the TOC in the prologue or
25959 do we need to do it at every call? */
25960 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
25961 cfun->machine->save_toc_in_prologue = true;
25962 else
25963 {
25964 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25965 rtx stack_toc_mem = gen_frame_mem (Pmode,
25966 gen_rtx_PLUS (Pmode, stack_ptr,
25967 stack_toc_offset));
25968 MEM_VOLATILE_P (stack_toc_mem) = 1;
25969 if (is_pltseq_longcall)
25970 {
25971 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
25972 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25973 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
25974 }
25975 else
25976 emit_move_insn (stack_toc_mem, toc_reg);
25977 }
25978 }
25979
25980 if (DEFAULT_ABI == ABI_ELFv2)
25981 {
25982 /* A function pointer in the ELFv2 ABI is just a plain address, but
25983 the ABI requires it to be loaded into r12 before the call. */
25984 func_addr = gen_rtx_REG (Pmode, 12);
25985 emit_move_insn (func_addr, func);
25986 abi_reg = func_addr;
25987 /* Indirect calls via CTR are strongly preferred over indirect
25988 calls via LR, so move the address there. Needed to mark
25989 this insn for linker plt sequence editing too. */
25990 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25991 if (is_pltseq_longcall)
25992 {
25993 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
25994 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25995 emit_insn (gen_rtx_SET (func_addr, mark_func));
25996 v = gen_rtvec (2, func_addr, func_desc);
25997 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25998 }
25999 else
26000 emit_move_insn (func_addr, abi_reg);
26001 }
26002 else
26003 {
26004 /* A function pointer under AIX is a pointer to a data area whose
26005 first word contains the actual address of the function, whose
26006 second word contains a pointer to its TOC, and whose third word
26007 contains a value to place in the static chain register (r11).
26008 Note that if we load the static chain, our "trampoline" need
26009 not have any executable code. */
26010
26011 /* Load up address of the actual function. */
26012 func = force_reg (Pmode, func);
26013 func_addr = gen_reg_rtx (Pmode);
26014 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
26015
26016 /* Indirect calls via CTR are strongly preferred over indirect
26017 calls via LR, so move the address there. */
26018 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
26019 emit_move_insn (ctr_reg, func_addr);
26020 func_addr = ctr_reg;
26021
26022 /* Prepare to load the TOC of the called function. Note that the
26023 TOC load must happen immediately before the actual call so
26024 that unwinding the TOC registers works correctly. See the
26025 comment in frob_update_context. */
26026 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
26027 rtx func_toc_mem = gen_rtx_MEM (Pmode,
26028 gen_rtx_PLUS (Pmode, func,
26029 func_toc_offset));
26030 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
26031
26032 /* If we have a static chain, load it up. But, if the call was
26033 originally direct, the 3rd word has not been written since no
26034 trampoline has been built, so we ought not to load it, lest we
26035 override a static chain value. */
26036 if (!(GET_CODE (func_desc) == SYMBOL_REF
26037 && SYMBOL_REF_FUNCTION_P (func_desc))
26038 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
26039 && !chain_already_loaded (get_current_sequence ()->next->last))
26040 {
26041 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
26042 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
26043 rtx func_sc_mem = gen_rtx_MEM (Pmode,
26044 gen_rtx_PLUS (Pmode, func,
26045 func_sc_offset));
26046 emit_move_insn (sc_reg, func_sc_mem);
26047 abi_reg = sc_reg;
26048 }
26049 }
26050 }
26051 else
26052 {
26053 /* No TOC register needed for calls from PC-relative callers. */
26054 if (!rs6000_pcrel_p ())
26055 /* Direct calls use the TOC: for local calls, the callee will
26056 assume the TOC register is set; for non-local calls, the
26057 PLT stub needs the TOC register. */
26058 abi_reg = toc_reg;
26059 func_addr = func;
26060 }
26061
26062 /* Create the call. */
26063 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26064 if (value != NULL_RTX)
26065 call[0] = gen_rtx_SET (value, call[0]);
26066 call[1] = gen_rtx_USE (VOIDmode, cookie);
26067 n_call = 2;
26068
26069 if (toc_load)
26070 call[n_call++] = toc_load;
26071 if (toc_restore)
26072 call[n_call++] = toc_restore;
26073
26074 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26075
26076 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
26077 insn = emit_call_insn (insn);
26078
26079 /* Mention all registers defined by the ABI to hold information
26080 as uses in CALL_INSN_FUNCTION_USAGE. */
26081 if (abi_reg)
26082 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26083 }
26084
26085 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
26086
26087 void
26088 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26089 {
26090 rtx call[2];
26091 rtx insn;
26092 rtx r12 = NULL_RTX;
26093 rtx func_addr = func_desc;
26094
26095 if (global_tlsarg)
26096 tlsarg = global_tlsarg;
26097
26098 /* Handle longcall attributes. */
26099 if (INTVAL (cookie) & CALL_LONG && SYMBOL_REF_P (func_desc))
26100 {
26101 /* PCREL can do a sibling call to a longcall function
26102 because we don't need to restore the TOC register. */
26103 gcc_assert (rs6000_pcrel_p ());
26104 func_desc = rs6000_longcall_ref (func_desc, tlsarg);
26105 }
26106 else
26107 gcc_assert (INTVAL (cookie) == 0);
26108
26109 /* For ELFv2, r12 and CTR need to hold the function address
26110 for an indirect call. */
26111 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
26112 {
26113 r12 = gen_rtx_REG (Pmode, 12);
26114 emit_move_insn (r12, func_desc);
26115 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26116 emit_move_insn (func_addr, r12);
26117 }
26118
26119 /* Create the call. */
26120 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26121 if (value != NULL_RTX)
26122 call[0] = gen_rtx_SET (value, call[0]);
26123
26124 call[1] = simple_return_rtx;
26125
26126 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
26127 insn = emit_call_insn (insn);
26128
26129 /* Note use of the TOC register. */
26130 if (!rs6000_pcrel_p ())
26131 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
26132 gen_rtx_REG (Pmode, TOC_REGNUM));
26133
26134 /* Note use of r12. */
26135 if (r12)
26136 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
26137 }
26138
26139 /* Expand code to perform a call under the SYSV4 ABI. */
26140
26141 void
26142 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26143 {
26144 rtx func = func_desc;
26145 rtx func_addr;
26146 rtx call[4];
26147 rtx insn;
26148 rtx abi_reg = NULL_RTX;
26149 int n;
26150
26151 if (global_tlsarg)
26152 tlsarg = global_tlsarg;
26153
26154 /* Handle longcall attributes. */
26155 if ((INTVAL (cookie) & CALL_LONG) != 0
26156 && GET_CODE (func_desc) == SYMBOL_REF)
26157 {
26158 func = rs6000_longcall_ref (func_desc, tlsarg);
26159 /* If the longcall was implemented as an inline PLT call using
26160 PLT unspecs then func will be REG:r11. If not, func will be
26161 a pseudo reg. The inline PLT call sequence supports lazy
26162 linking (and longcalls to functions in dlopen'd libraries).
26163 The other style of longcalls don't. The lazy linking entry
26164 to the dynamic symbol resolver requires r11 be the function
26165 address (as it is for linker generated PLT stubs). Ensure
26166 r11 stays valid to the bctrl by marking r11 used by the call. */
26167 if (TARGET_PLTSEQ)
26168 abi_reg = func;
26169 }
26170
26171 /* Handle indirect calls. */
26172 if (GET_CODE (func) != SYMBOL_REF)
26173 {
26174 func = force_reg (Pmode, func);
26175
26176 /* Indirect calls via CTR are strongly preferred over indirect
26177 calls via LR, so move the address there. That can't be left
26178 to reload because we want to mark every instruction in an
26179 inline PLT call sequence with a reloc, enabling the linker to
26180 edit the sequence back to a direct call when that makes sense. */
26181 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26182 if (abi_reg)
26183 {
26184 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
26185 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26186 emit_insn (gen_rtx_SET (func_addr, mark_func));
26187 v = gen_rtvec (2, func_addr, func_desc);
26188 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26189 }
26190 else
26191 emit_move_insn (func_addr, func);
26192 }
26193 else
26194 func_addr = func;
26195
26196 /* Create the call. */
26197 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26198 if (value != NULL_RTX)
26199 call[0] = gen_rtx_SET (value, call[0]);
26200
26201 call[1] = gen_rtx_USE (VOIDmode, cookie);
26202 n = 2;
26203 if (TARGET_SECURE_PLT
26204 && flag_pic
26205 && GET_CODE (func_addr) == SYMBOL_REF
26206 && !SYMBOL_REF_LOCAL_P (func_addr))
26207 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
26208
26209 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26210
26211 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
26212 insn = emit_call_insn (insn);
26213 if (abi_reg)
26214 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26215 }
26216
26217 /* Expand code to perform a sibling call under the SysV4 ABI. */
26218
26219 void
26220 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26221 {
26222 rtx func = func_desc;
26223 rtx func_addr;
26224 rtx call[3];
26225 rtx insn;
26226 rtx abi_reg = NULL_RTX;
26227
26228 if (global_tlsarg)
26229 tlsarg = global_tlsarg;
26230
26231 /* Handle longcall attributes. */
26232 if ((INTVAL (cookie) & CALL_LONG) != 0
26233 && GET_CODE (func_desc) == SYMBOL_REF)
26234 {
26235 func = rs6000_longcall_ref (func_desc, tlsarg);
26236 /* If the longcall was implemented as an inline PLT call using
26237 PLT unspecs then func will be REG:r11. If not, func will be
26238 a pseudo reg. The inline PLT call sequence supports lazy
26239 linking (and longcalls to functions in dlopen'd libraries).
26240 The other style of longcalls don't. The lazy linking entry
26241 to the dynamic symbol resolver requires r11 be the function
26242 address (as it is for linker generated PLT stubs). Ensure
26243 r11 stays valid to the bctr by marking r11 used by the call. */
26244 if (TARGET_PLTSEQ)
26245 abi_reg = func;
26246 }
26247
26248 /* Handle indirect calls. */
26249 if (GET_CODE (func) != SYMBOL_REF)
26250 {
26251 func = force_reg (Pmode, func);
26252
26253 /* Indirect sibcalls must go via CTR. That can't be left to
26254 reload because we want to mark every instruction in an inline
26255 PLT call sequence with a reloc, enabling the linker to edit
26256 the sequence back to a direct call when that makes sense. */
26257 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26258 if (abi_reg)
26259 {
26260 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
26261 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26262 emit_insn (gen_rtx_SET (func_addr, mark_func));
26263 v = gen_rtvec (2, func_addr, func_desc);
26264 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26265 }
26266 else
26267 emit_move_insn (func_addr, func);
26268 }
26269 else
26270 func_addr = func;
26271
26272 /* Create the call. */
26273 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26274 if (value != NULL_RTX)
26275 call[0] = gen_rtx_SET (value, call[0]);
26276
26277 call[1] = gen_rtx_USE (VOIDmode, cookie);
26278 call[2] = simple_return_rtx;
26279
26280 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
26281 insn = emit_call_insn (insn);
26282 if (abi_reg)
26283 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26284 }
26285
26286 #if TARGET_MACHO
26287
26288 /* Expand code to perform a call under the Darwin ABI.
26289 Modulo handling of mlongcall, this is much the same as sysv.
26290 if/when the longcall optimisation is removed, we could drop this
26291 code and use the sysv case (taking care to avoid the tls stuff).
26292
26293 We can use this for sibcalls too, if needed. */
26294
26295 void
26296 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
26297 rtx cookie, bool sibcall)
26298 {
26299 rtx func = func_desc;
26300 rtx func_addr;
26301 rtx call[3];
26302 rtx insn;
26303 int cookie_val = INTVAL (cookie);
26304 bool make_island = false;
26305
26306 /* Handle longcall attributes, there are two cases for Darwin:
26307 1) Newer linkers are capable of synthesising any branch islands needed.
26308 2) We need a helper branch island synthesised by the compiler.
26309 The second case has mostly been retired and we don't use it for m64.
26310 In fact, it's is an optimisation, we could just indirect as sysv does..
26311 ... however, backwards compatibility for now.
26312 If we're going to use this, then we need to keep the CALL_LONG bit set,
26313 so that we can pick up the special insn form later. */
26314 if ((cookie_val & CALL_LONG) != 0
26315 && GET_CODE (func_desc) == SYMBOL_REF)
26316 {
26317 /* FIXME: the longcall opt should not hang off this flag, it is most
26318 likely incorrect for kernel-mode code-generation. */
26319 if (darwin_symbol_stubs && TARGET_32BIT)
26320 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
26321 else
26322 {
26323 /* The linker is capable of doing this, but the user explicitly
26324 asked for -mlongcall, so we'll do the 'normal' version. */
26325 func = rs6000_longcall_ref (func_desc, NULL_RTX);
26326 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
26327 }
26328 }
26329
26330 /* Handle indirect calls. */
26331 if (GET_CODE (func) != SYMBOL_REF)
26332 {
26333 func = force_reg (Pmode, func);
26334
26335 /* Indirect calls via CTR are strongly preferred over indirect
26336 calls via LR, and are required for indirect sibcalls, so move
26337 the address there. */
26338 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26339 emit_move_insn (func_addr, func);
26340 }
26341 else
26342 func_addr = func;
26343
26344 /* Create the call. */
26345 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26346 if (value != NULL_RTX)
26347 call[0] = gen_rtx_SET (value, call[0]);
26348
26349 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
26350
26351 if (sibcall)
26352 call[2] = simple_return_rtx;
26353 else
26354 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26355
26356 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
26357 insn = emit_call_insn (insn);
26358 /* Now we have the debug info in the insn, we can set up the branch island
26359 if we're using one. */
26360 if (make_island)
26361 {
26362 tree funname = get_identifier (XSTR (func_desc, 0));
26363
26364 if (no_previous_def (funname))
26365 {
26366 rtx label_rtx = gen_label_rtx ();
26367 char *label_buf, temp_buf[256];
26368 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
26369 CODE_LABEL_NUMBER (label_rtx));
26370 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
26371 tree labelname = get_identifier (label_buf);
26372 add_compiler_branch_island (labelname, funname,
26373 insn_line ((const rtx_insn*)insn));
26374 }
26375 }
26376 }
26377 #endif
26378
26379 void
26380 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26381 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26382 {
26383 #if TARGET_MACHO
26384 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
26385 #else
26386 gcc_unreachable();
26387 #endif
26388 }
26389
26390
26391 void
26392 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26393 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26394 {
26395 #if TARGET_MACHO
26396 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
26397 #else
26398 gcc_unreachable();
26399 #endif
26400 }
26401
26402 /* Return whether we should generate PC-relative code for FNDECL. */
26403 bool
26404 rs6000_fndecl_pcrel_p (const_tree fndecl)
26405 {
26406 if (DEFAULT_ABI != ABI_ELFv2)
26407 return false;
26408
26409 struct cl_target_option *opts = target_opts_for_fn (fndecl);
26410
26411 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26412 && TARGET_CMODEL == CMODEL_MEDIUM);
26413 }
26414
26415 /* Return whether we should generate PC-relative code for *FN. */
26416 bool
26417 rs6000_function_pcrel_p (struct function *fn)
26418 {
26419 if (DEFAULT_ABI != ABI_ELFv2)
26420 return false;
26421
26422 /* Optimize usual case. */
26423 if (fn == cfun)
26424 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26425 && TARGET_CMODEL == CMODEL_MEDIUM);
26426
26427 return rs6000_fndecl_pcrel_p (fn->decl);
26428 }
26429
26430 /* Return whether we should generate PC-relative code for the current
26431 function. */
26432 bool
26433 rs6000_pcrel_p ()
26434 {
26435 return (DEFAULT_ABI == ABI_ELFv2
26436 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26437 && TARGET_CMODEL == CMODEL_MEDIUM);
26438 }
26439
26440 \f
26441 /* Given an address (ADDR), a mode (MODE), and what the format of the
26442 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26443 for the address. */
26444
26445 enum insn_form
26446 address_to_insn_form (rtx addr,
26447 machine_mode mode,
26448 enum non_prefixed_form non_prefixed_format)
26449 {
26450 /* Single register is easy. */
26451 if (REG_P (addr) || SUBREG_P (addr))
26452 return INSN_FORM_BASE_REG;
26453
26454 /* If the non prefixed instruction format doesn't support offset addressing,
26455 make sure only indexed addressing is allowed.
26456
26457 We special case SDmode so that the register allocator does not try to move
26458 SDmode through GPR registers, but instead uses the 32-bit integer load and
26459 store instructions for the floating point registers. */
26460 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
26461 {
26462 if (GET_CODE (addr) != PLUS)
26463 return INSN_FORM_BAD;
26464
26465 rtx op0 = XEXP (addr, 0);
26466 rtx op1 = XEXP (addr, 1);
26467 if (!REG_P (op0) && !SUBREG_P (op0))
26468 return INSN_FORM_BAD;
26469
26470 if (!REG_P (op1) && !SUBREG_P (op1))
26471 return INSN_FORM_BAD;
26472
26473 return INSN_FORM_X;
26474 }
26475
26476 /* Deal with update forms. */
26477 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
26478 return INSN_FORM_UPDATE;
26479
26480 /* Handle PC-relative symbols and labels. Check for both local and
26481 external symbols. Assume labels are always local. TLS symbols
26482 are not PC-relative for rs6000. */
26483 if (TARGET_PCREL)
26484 {
26485 if (LABEL_REF_P (addr))
26486 return INSN_FORM_PCREL_LOCAL;
26487
26488 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
26489 {
26490 if (!SYMBOL_REF_LOCAL_P (addr))
26491 return INSN_FORM_PCREL_EXTERNAL;
26492 else
26493 return INSN_FORM_PCREL_LOCAL;
26494 }
26495 }
26496
26497 if (GET_CODE (addr) == CONST)
26498 addr = XEXP (addr, 0);
26499
26500 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26501 if (GET_CODE (addr) == LO_SUM)
26502 return INSN_FORM_LO_SUM;
26503
26504 /* Everything below must be an offset address of some form. */
26505 if (GET_CODE (addr) != PLUS)
26506 return INSN_FORM_BAD;
26507
26508 rtx op0 = XEXP (addr, 0);
26509 rtx op1 = XEXP (addr, 1);
26510
26511 /* Check for indexed addresses. */
26512 if (REG_P (op1) || SUBREG_P (op1))
26513 {
26514 if (REG_P (op0) || SUBREG_P (op0))
26515 return INSN_FORM_X;
26516
26517 return INSN_FORM_BAD;
26518 }
26519
26520 if (!CONST_INT_P (op1))
26521 return INSN_FORM_BAD;
26522
26523 HOST_WIDE_INT offset = INTVAL (op1);
26524 if (!SIGNED_INTEGER_34BIT_P (offset))
26525 return INSN_FORM_BAD;
26526
26527 /* Check for local and external PC-relative addresses. Labels are always
26528 local. TLS symbols are not PC-relative for rs6000. */
26529 if (TARGET_PCREL)
26530 {
26531 if (LABEL_REF_P (op0))
26532 return INSN_FORM_PCREL_LOCAL;
26533
26534 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
26535 {
26536 if (!SYMBOL_REF_LOCAL_P (op0))
26537 return INSN_FORM_PCREL_EXTERNAL;
26538 else
26539 return INSN_FORM_PCREL_LOCAL;
26540 }
26541 }
26542
26543 /* If it isn't PC-relative, the address must use a base register. */
26544 if (!REG_P (op0) && !SUBREG_P (op0))
26545 return INSN_FORM_BAD;
26546
26547 /* Large offsets must be prefixed. */
26548 if (!SIGNED_INTEGER_16BIT_P (offset))
26549 {
26550 if (TARGET_PREFIXED)
26551 return INSN_FORM_PREFIXED_NUMERIC;
26552
26553 return INSN_FORM_BAD;
26554 }
26555
26556 /* We have a 16-bit offset, see what default instruction format to use. */
26557 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
26558 {
26559 unsigned size = GET_MODE_SIZE (mode);
26560
26561 /* On 64-bit systems, assume 64-bit integers need to use DS form
26562 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26563 (for LXV and STXV). TImode is problematical in that its normal usage
26564 is expected to be GPRs where it wants a DS instruction format, but if
26565 it goes into the vector registers, it wants a DQ instruction
26566 format. */
26567 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
26568 non_prefixed_format = NON_PREFIXED_DS;
26569
26570 else if (TARGET_VSX && size >= 16
26571 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
26572 non_prefixed_format = NON_PREFIXED_DQ;
26573
26574 else
26575 non_prefixed_format = NON_PREFIXED_D;
26576 }
26577
26578 /* Classify the D/DS/DQ-form addresses. */
26579 switch (non_prefixed_format)
26580 {
26581 /* Instruction format D, all 16 bits are valid. */
26582 case NON_PREFIXED_D:
26583 return INSN_FORM_D;
26584
26585 /* Instruction format DS, bottom 2 bits must be 0. */
26586 case NON_PREFIXED_DS:
26587 if ((offset & 3) == 0)
26588 return INSN_FORM_DS;
26589
26590 else if (TARGET_PREFIXED)
26591 return INSN_FORM_PREFIXED_NUMERIC;
26592
26593 else
26594 return INSN_FORM_BAD;
26595
26596 /* Instruction format DQ, bottom 4 bits must be 0. */
26597 case NON_PREFIXED_DQ:
26598 if ((offset & 15) == 0)
26599 return INSN_FORM_DQ;
26600
26601 else if (TARGET_PREFIXED)
26602 return INSN_FORM_PREFIXED_NUMERIC;
26603
26604 else
26605 return INSN_FORM_BAD;
26606
26607 default:
26608 break;
26609 }
26610
26611 return INSN_FORM_BAD;
26612 }
26613
26614 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26615 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26616 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26617 a D-form or DS-form instruction. X-form and base_reg are always
26618 allowed. */
26619 bool
26620 address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
26621 enum non_prefixed_form non_prefixed_format)
26622 {
26623 enum insn_form result_form;
26624
26625 result_form = address_to_insn_form (addr, mode, non_prefixed_format);
26626
26627 switch (non_prefixed_format)
26628 {
26629 case NON_PREFIXED_D:
26630 switch (result_form)
26631 {
26632 case INSN_FORM_X:
26633 case INSN_FORM_D:
26634 case INSN_FORM_DS:
26635 case INSN_FORM_BASE_REG:
26636 return true;
26637 default:
26638 return false;
26639 }
26640 break;
26641 case NON_PREFIXED_DS:
26642 switch (result_form)
26643 {
26644 case INSN_FORM_X:
26645 case INSN_FORM_DS:
26646 case INSN_FORM_BASE_REG:
26647 return true;
26648 default:
26649 return false;
26650 }
26651 break;
26652 default:
26653 break;
26654 }
26655 return false;
26656 }
26657
26658 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26659 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26660 the load or store with the PCREL_OPT optimization to make sure it is an
26661 instruction that can be optimized.
26662
26663 We need to specify the MODE separately from the REG to allow for loads that
26664 include zero/sign/float extension. */
26665
26666 bool
26667 pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
26668 {
26669 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26670 PCREL_OPT optimization. */
26671 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
26672 if (non_prefixed == NON_PREFIXED_X)
26673 return false;
26674
26675 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26676 rtx addr = XEXP (mem, 0);
26677 enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
26678 return (iform == INSN_FORM_BASE_REG
26679 || iform == INSN_FORM_D
26680 || iform == INSN_FORM_DS
26681 || iform == INSN_FORM_DQ);
26682 }
26683
26684 /* Helper function to see if we're potentially looking at lfs/stfs.
26685 - PARALLEL containing a SET and a CLOBBER
26686 - stfs:
26687 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26688 - CLOBBER is a V4SF
26689 - lfs:
26690 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26691 - CLOBBER is a DI
26692 */
26693
26694 static bool
26695 is_lfs_stfs_insn (rtx_insn *insn)
26696 {
26697 rtx pattern = PATTERN (insn);
26698 if (GET_CODE (pattern) != PARALLEL)
26699 return false;
26700
26701 /* This should be a parallel with exactly one set and one clobber. */
26702 if (XVECLEN (pattern, 0) != 2)
26703 return false;
26704
26705 rtx set = XVECEXP (pattern, 0, 0);
26706 if (GET_CODE (set) != SET)
26707 return false;
26708
26709 rtx clobber = XVECEXP (pattern, 0, 1);
26710 if (GET_CODE (clobber) != CLOBBER)
26711 return false;
26712
26713 /* All we care is that the destination of the SET is a mem:SI,
26714 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26715 should be a scratch:V4SF. */
26716
26717 rtx dest = SET_DEST (set);
26718 rtx src = SET_SRC (set);
26719 rtx scratch = SET_DEST (clobber);
26720
26721 if (GET_CODE (src) != UNSPEC)
26722 return false;
26723
26724 /* stfs case. */
26725 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
26726 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
26727 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
26728 return true;
26729
26730 /* lfs case. */
26731 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
26732 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
26733 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
26734 return true;
26735
26736 return false;
26737 }
26738
26739 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26740 instruction format (D/DS/DQ) used for offset memory. */
26741
26742 enum non_prefixed_form
26743 reg_to_non_prefixed (rtx reg, machine_mode mode)
26744 {
26745 /* If it isn't a register, use the defaults. */
26746 if (!REG_P (reg) && !SUBREG_P (reg))
26747 return NON_PREFIXED_DEFAULT;
26748
26749 unsigned int r = reg_or_subregno (reg);
26750
26751 /* If we have a pseudo, use the default instruction format. */
26752 if (!HARD_REGISTER_NUM_P (r))
26753 return NON_PREFIXED_DEFAULT;
26754
26755 unsigned size = GET_MODE_SIZE (mode);
26756
26757 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26758 128-bit floating point, and 128-bit integers. Before power9, only indexed
26759 addressing was available for vectors. */
26760 if (FP_REGNO_P (r))
26761 {
26762 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26763 return NON_PREFIXED_D;
26764
26765 else if (size < 8)
26766 return NON_PREFIXED_X;
26767
26768 else if (TARGET_VSX && size >= 16
26769 && (VECTOR_MODE_P (mode)
26770 || VECTOR_ALIGNMENT_P (mode)
26771 || mode == TImode || mode == CTImode))
26772 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
26773
26774 else
26775 return NON_PREFIXED_DEFAULT;
26776 }
26777
26778 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26779 128-bit floating point, and 128-bit integers. Before power9, only indexed
26780 addressing was available. */
26781 else if (ALTIVEC_REGNO_P (r))
26782 {
26783 if (!TARGET_P9_VECTOR)
26784 return NON_PREFIXED_X;
26785
26786 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26787 return NON_PREFIXED_DS;
26788
26789 else if (size < 8)
26790 return NON_PREFIXED_X;
26791
26792 else if (TARGET_VSX && size >= 16
26793 && (VECTOR_MODE_P (mode)
26794 || VECTOR_ALIGNMENT_P (mode)
26795 || mode == TImode || mode == CTImode))
26796 return NON_PREFIXED_DQ;
26797
26798 else
26799 return NON_PREFIXED_DEFAULT;
26800 }
26801
26802 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26803 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26804 through the GPR registers for memory operations. */
26805 else if (TARGET_POWERPC64 && size >= 8)
26806 return NON_PREFIXED_DS;
26807
26808 return NON_PREFIXED_D;
26809 }
26810
26811 \f
26812 /* Whether a load instruction is a prefixed instruction. This is called from
26813 the prefixed attribute processing. */
26814
26815 bool
26816 prefixed_load_p (rtx_insn *insn)
26817 {
26818 /* Validate the insn to make sure it is a normal load insn. */
26819 extract_insn_cached (insn);
26820 if (recog_data.n_operands < 2)
26821 return false;
26822
26823 rtx reg = recog_data.operand[0];
26824 rtx mem = recog_data.operand[1];
26825
26826 if (!REG_P (reg) && !SUBREG_P (reg))
26827 return false;
26828
26829 if (!MEM_P (mem))
26830 return false;
26831
26832 /* Prefixed load instructions do not support update or indexed forms. */
26833 if (get_attr_indexed (insn) == INDEXED_YES
26834 || get_attr_update (insn) == UPDATE_YES)
26835 return false;
26836
26837 /* LWA uses the DS format instead of the D format that LWZ uses. */
26838 enum non_prefixed_form non_prefixed;
26839 machine_mode reg_mode = GET_MODE (reg);
26840 machine_mode mem_mode = GET_MODE (mem);
26841
26842 if (mem_mode == SImode && reg_mode == DImode
26843 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26844 non_prefixed = NON_PREFIXED_DS;
26845
26846 else
26847 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26848
26849 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26850 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
26851 else
26852 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
26853 }
26854
26855 /* Whether a store instruction is a prefixed instruction. This is called from
26856 the prefixed attribute processing. */
26857
26858 bool
26859 prefixed_store_p (rtx_insn *insn)
26860 {
26861 /* Validate the insn to make sure it is a normal store insn. */
26862 extract_insn_cached (insn);
26863 if (recog_data.n_operands < 2)
26864 return false;
26865
26866 rtx mem = recog_data.operand[0];
26867 rtx reg = recog_data.operand[1];
26868
26869 if (!REG_P (reg) && !SUBREG_P (reg))
26870 return false;
26871
26872 if (!MEM_P (mem))
26873 return false;
26874
26875 /* Prefixed store instructions do not support update or indexed forms. */
26876 if (get_attr_indexed (insn) == INDEXED_YES
26877 || get_attr_update (insn) == UPDATE_YES)
26878 return false;
26879
26880 machine_mode mem_mode = GET_MODE (mem);
26881 rtx addr = XEXP (mem, 0);
26882 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26883
26884 /* Need to make sure we aren't looking at a stfs which doesn't look
26885 like the other things reg_to_non_prefixed/address_is_prefixed
26886 looks for. */
26887 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26888 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
26889 else
26890 return address_is_prefixed (addr, mem_mode, non_prefixed);
26891 }
26892
26893 /* Whether a load immediate or add instruction is a prefixed instruction. This
26894 is called from the prefixed attribute processing. */
26895
26896 bool
26897 prefixed_paddi_p (rtx_insn *insn)
26898 {
26899 rtx set = single_set (insn);
26900 if (!set)
26901 return false;
26902
26903 rtx dest = SET_DEST (set);
26904 rtx src = SET_SRC (set);
26905
26906 if (!REG_P (dest) && !SUBREG_P (dest))
26907 return false;
26908
26909 /* Is this a load immediate that can't be done with a simple ADDI or
26910 ADDIS? */
26911 if (CONST_INT_P (src))
26912 return (satisfies_constraint_eI (src)
26913 && !satisfies_constraint_I (src)
26914 && !satisfies_constraint_L (src));
26915
26916 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26917 ADDIS? */
26918 if (GET_CODE (src) == PLUS)
26919 {
26920 rtx op1 = XEXP (src, 1);
26921
26922 return (CONST_INT_P (op1)
26923 && satisfies_constraint_eI (op1)
26924 && !satisfies_constraint_I (op1)
26925 && !satisfies_constraint_L (op1));
26926 }
26927
26928 /* If not, is it a load of a PC-relative address? */
26929 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
26930 return false;
26931
26932 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
26933 return false;
26934
26935 enum insn_form iform = address_to_insn_form (src, Pmode,
26936 NON_PREFIXED_DEFAULT);
26937
26938 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
26939 }
26940
26941 /* Whether the next instruction needs a 'p' prefix issued before the
26942 instruction is printed out. */
26943 static bool prepend_p_to_next_insn;
26944
26945 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26946 outputting the assembler code. On the PowerPC, we remember if the current
26947 insn is a prefixed insn where we need to emit a 'p' before the insn.
26948
26949 In addition, if the insn is part of a PC-relative reference to an external
26950 label optimization, this is recorded also. */
26951 void
26952 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
26953 {
26954 prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
26955 == MAYBE_PREFIXED_YES
26956 && get_attr_prefixed (insn) == PREFIXED_YES);
26957 return;
26958 }
26959
26960 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26961 We use it to emit a 'p' for prefixed insns that is set in
26962 FINAL_PRESCAN_INSN. */
26963 void
26964 rs6000_asm_output_opcode (FILE *stream)
26965 {
26966 if (prepend_p_to_next_insn)
26967 {
26968 fprintf (stream, "p");
26969
26970 /* Reset the flag in the case where there are separate insn lines in the
26971 sequence, so the 'p' is only emitted for the first line. This shows up
26972 when we are doing the PCREL_OPT optimization, in that the label created
26973 with %r<n> would have a leading 'p' printed. */
26974 prepend_p_to_next_insn = false;
26975 }
26976
26977 return;
26978 }
26979
26980 /* Emit the relocation to tie the next instruction to a previous instruction
26981 that loads up an external address. This is used to do the PCREL_OPT
26982 optimization. Note, the label is generated after the PLD of the got
26983 pc-relative address to allow for the assembler to insert NOPs before the PLD
26984 instruction. The operand is a constant integer that is the label
26985 number. */
26986
26987 void
26988 output_pcrel_opt_reloc (rtx label_num)
26989 {
26990 rtx operands[1] = { label_num };
26991 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26992 operands);
26993 }
26994
26995 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26996 should be adjusted to reflect any required changes. This macro is used when
26997 there is some systematic length adjustment required that would be difficult
26998 to express in the length attribute.
26999
27000 In the PowerPC, we use this to adjust the length of an instruction if one or
27001 more prefixed instructions are generated, using the attribute
27002 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
27003 hardware requires that a prefied instruciton does not cross a 64-byte
27004 boundary. This means the compiler has to assume the length of the first
27005 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
27006 already set for the non-prefixed instruction, we just need to udpate for the
27007 difference. */
27008
27009 int
27010 rs6000_adjust_insn_length (rtx_insn *insn, int length)
27011 {
27012 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
27013 {
27014 rtx pattern = PATTERN (insn);
27015 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
27016 && get_attr_prefixed (insn) == PREFIXED_YES)
27017 {
27018 int num_prefixed = get_attr_max_prefixed_insns (insn);
27019 length += 4 * (num_prefixed + 1);
27020 }
27021 }
27022
27023 return length;
27024 }
27025
27026 \f
27027 #ifdef HAVE_GAS_HIDDEN
27028 # define USE_HIDDEN_LINKONCE 1
27029 #else
27030 # define USE_HIDDEN_LINKONCE 0
27031 #endif
27032
27033 /* Fills in the label name that should be used for a 476 link stack thunk. */
27034
27035 void
27036 get_ppc476_thunk_name (char name[32])
27037 {
27038 gcc_assert (TARGET_LINK_STACK);
27039
27040 if (USE_HIDDEN_LINKONCE)
27041 sprintf (name, "__ppc476.get_thunk");
27042 else
27043 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
27044 }
27045
27046 /* This function emits the simple thunk routine that is used to preserve
27047 the link stack on the 476 cpu. */
27048
27049 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
27050 static void
27051 rs6000_code_end (void)
27052 {
27053 char name[32];
27054 tree decl;
27055
27056 if (!TARGET_LINK_STACK)
27057 return;
27058
27059 get_ppc476_thunk_name (name);
27060
27061 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
27062 build_function_type_list (void_type_node, NULL_TREE));
27063 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
27064 NULL_TREE, void_type_node);
27065 TREE_PUBLIC (decl) = 1;
27066 TREE_STATIC (decl) = 1;
27067
27068 #if RS6000_WEAK
27069 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
27070 {
27071 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
27072 targetm.asm_out.unique_section (decl, 0);
27073 switch_to_section (get_named_section (decl, NULL, 0));
27074 DECL_WEAK (decl) = 1;
27075 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
27076 targetm.asm_out.globalize_label (asm_out_file, name);
27077 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
27078 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
27079 }
27080 else
27081 #endif
27082 {
27083 switch_to_section (text_section);
27084 ASM_OUTPUT_LABEL (asm_out_file, name);
27085 }
27086
27087 DECL_INITIAL (decl) = make_node (BLOCK);
27088 current_function_decl = decl;
27089 allocate_struct_function (decl, false);
27090 init_function_start (decl);
27091 first_function_block_is_cold = false;
27092 /* Make sure unwind info is emitted for the thunk if needed. */
27093 final_start_function (emit_barrier (), asm_out_file, 1);
27094
27095 fputs ("\tblr\n", asm_out_file);
27096
27097 final_end_function ();
27098 init_insn_lengths ();
27099 free_after_compilation (cfun);
27100 set_cfun (NULL);
27101 current_function_decl = NULL;
27102 }
27103
27104 /* Add r30 to hard reg set if the prologue sets it up and it is not
27105 pic_offset_table_rtx. */
27106
27107 static void
27108 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
27109 {
27110 if (!TARGET_SINGLE_PIC_BASE
27111 && TARGET_TOC
27112 && TARGET_MINIMAL_TOC
27113 && !constant_pool_empty_p ())
27114 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
27115 if (cfun->machine->split_stack_argp_used)
27116 add_to_hard_reg_set (&set->set, Pmode, 12);
27117
27118 /* Make sure the hard reg set doesn't include r2, which was possibly added
27119 via PIC_OFFSET_TABLE_REGNUM. */
27120 if (TARGET_TOC)
27121 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
27122 }
27123
27124 \f
27125 /* Helper function for rs6000_split_logical to emit a logical instruction after
27126 spliting the operation to single GPR registers.
27127
27128 DEST is the destination register.
27129 OP1 and OP2 are the input source registers.
27130 CODE is the base operation (AND, IOR, XOR, NOT).
27131 MODE is the machine mode.
27132 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27133 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27134 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27135
27136 static void
27137 rs6000_split_logical_inner (rtx dest,
27138 rtx op1,
27139 rtx op2,
27140 enum rtx_code code,
27141 machine_mode mode,
27142 bool complement_final_p,
27143 bool complement_op1_p,
27144 bool complement_op2_p)
27145 {
27146 rtx bool_rtx;
27147
27148 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
27149 if (op2 && CONST_INT_P (op2)
27150 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
27151 && !complement_final_p && !complement_op1_p && !complement_op2_p)
27152 {
27153 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
27154 HOST_WIDE_INT value = INTVAL (op2) & mask;
27155
27156 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
27157 if (code == AND)
27158 {
27159 if (value == 0)
27160 {
27161 emit_insn (gen_rtx_SET (dest, const0_rtx));
27162 return;
27163 }
27164
27165 else if (value == mask)
27166 {
27167 if (!rtx_equal_p (dest, op1))
27168 emit_insn (gen_rtx_SET (dest, op1));
27169 return;
27170 }
27171 }
27172
27173 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
27174 into separate ORI/ORIS or XORI/XORIS instrucitons. */
27175 else if (code == IOR || code == XOR)
27176 {
27177 if (value == 0)
27178 {
27179 if (!rtx_equal_p (dest, op1))
27180 emit_insn (gen_rtx_SET (dest, op1));
27181 return;
27182 }
27183 }
27184 }
27185
27186 if (code == AND && mode == SImode
27187 && !complement_final_p && !complement_op1_p && !complement_op2_p)
27188 {
27189 emit_insn (gen_andsi3 (dest, op1, op2));
27190 return;
27191 }
27192
27193 if (complement_op1_p)
27194 op1 = gen_rtx_NOT (mode, op1);
27195
27196 if (complement_op2_p)
27197 op2 = gen_rtx_NOT (mode, op2);
27198
27199 /* For canonical RTL, if only one arm is inverted it is the first. */
27200 if (!complement_op1_p && complement_op2_p)
27201 std::swap (op1, op2);
27202
27203 bool_rtx = ((code == NOT)
27204 ? gen_rtx_NOT (mode, op1)
27205 : gen_rtx_fmt_ee (code, mode, op1, op2));
27206
27207 if (complement_final_p)
27208 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
27209
27210 emit_insn (gen_rtx_SET (dest, bool_rtx));
27211 }
27212
27213 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
27214 operations are split immediately during RTL generation to allow for more
27215 optimizations of the AND/IOR/XOR.
27216
27217 OPERANDS is an array containing the destination and two input operands.
27218 CODE is the base operation (AND, IOR, XOR, NOT).
27219 MODE is the machine mode.
27220 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27221 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27222 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
27223 CLOBBER_REG is either NULL or a scratch register of type CC to allow
27224 formation of the AND instructions. */
27225
27226 static void
27227 rs6000_split_logical_di (rtx operands[3],
27228 enum rtx_code code,
27229 bool complement_final_p,
27230 bool complement_op1_p,
27231 bool complement_op2_p)
27232 {
27233 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
27234 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
27235 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
27236 enum hi_lo { hi = 0, lo = 1 };
27237 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
27238 size_t i;
27239
27240 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
27241 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
27242 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
27243 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
27244
27245 if (code == NOT)
27246 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
27247 else
27248 {
27249 if (!CONST_INT_P (operands[2]))
27250 {
27251 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
27252 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
27253 }
27254 else
27255 {
27256 HOST_WIDE_INT value = INTVAL (operands[2]);
27257 HOST_WIDE_INT value_hi_lo[2];
27258
27259 gcc_assert (!complement_final_p);
27260 gcc_assert (!complement_op1_p);
27261 gcc_assert (!complement_op2_p);
27262
27263 value_hi_lo[hi] = value >> 32;
27264 value_hi_lo[lo] = value & lower_32bits;
27265
27266 for (i = 0; i < 2; i++)
27267 {
27268 HOST_WIDE_INT sub_value = value_hi_lo[i];
27269
27270 if (sub_value & sign_bit)
27271 sub_value |= upper_32bits;
27272
27273 op2_hi_lo[i] = GEN_INT (sub_value);
27274
27275 /* If this is an AND instruction, check to see if we need to load
27276 the value in a register. */
27277 if (code == AND && sub_value != -1 && sub_value != 0
27278 && !and_operand (op2_hi_lo[i], SImode))
27279 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
27280 }
27281 }
27282 }
27283
27284 for (i = 0; i < 2; i++)
27285 {
27286 /* Split large IOR/XOR operations. */
27287 if ((code == IOR || code == XOR)
27288 && CONST_INT_P (op2_hi_lo[i])
27289 && !complement_final_p
27290 && !complement_op1_p
27291 && !complement_op2_p
27292 && !logical_const_operand (op2_hi_lo[i], SImode))
27293 {
27294 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
27295 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
27296 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
27297 rtx tmp = gen_reg_rtx (SImode);
27298
27299 /* Make sure the constant is sign extended. */
27300 if ((hi_16bits & sign_bit) != 0)
27301 hi_16bits |= upper_32bits;
27302
27303 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
27304 code, SImode, false, false, false);
27305
27306 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
27307 code, SImode, false, false, false);
27308 }
27309 else
27310 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
27311 code, SImode, complement_final_p,
27312 complement_op1_p, complement_op2_p);
27313 }
27314
27315 return;
27316 }
27317
27318 /* Split the insns that make up boolean operations operating on multiple GPR
27319 registers. The boolean MD patterns ensure that the inputs either are
27320 exactly the same as the output registers, or there is no overlap.
27321
27322 OPERANDS is an array containing the destination and two input operands.
27323 CODE is the base operation (AND, IOR, XOR, NOT).
27324 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27325 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27326 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27327
27328 void
27329 rs6000_split_logical (rtx operands[3],
27330 enum rtx_code code,
27331 bool complement_final_p,
27332 bool complement_op1_p,
27333 bool complement_op2_p)
27334 {
27335 machine_mode mode = GET_MODE (operands[0]);
27336 machine_mode sub_mode;
27337 rtx op0, op1, op2;
27338 int sub_size, regno0, regno1, nregs, i;
27339
27340 /* If this is DImode, use the specialized version that can run before
27341 register allocation. */
27342 if (mode == DImode && !TARGET_POWERPC64)
27343 {
27344 rs6000_split_logical_di (operands, code, complement_final_p,
27345 complement_op1_p, complement_op2_p);
27346 return;
27347 }
27348
27349 op0 = operands[0];
27350 op1 = operands[1];
27351 op2 = (code == NOT) ? NULL_RTX : operands[2];
27352 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
27353 sub_size = GET_MODE_SIZE (sub_mode);
27354 regno0 = REGNO (op0);
27355 regno1 = REGNO (op1);
27356
27357 gcc_assert (reload_completed);
27358 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27359 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27360
27361 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
27362 gcc_assert (nregs > 1);
27363
27364 if (op2 && REG_P (op2))
27365 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
27366
27367 for (i = 0; i < nregs; i++)
27368 {
27369 int offset = i * sub_size;
27370 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
27371 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
27372 rtx sub_op2 = ((code == NOT)
27373 ? NULL_RTX
27374 : simplify_subreg (sub_mode, op2, mode, offset));
27375
27376 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
27377 complement_final_p, complement_op1_p,
27378 complement_op2_p);
27379 }
27380
27381 return;
27382 }
27383
27384 /* Emit instructions to move SRC to DST. Called by splitters for
27385 multi-register moves. It will emit at most one instruction for
27386 each register that is accessed; that is, it won't emit li/lis pairs
27387 (or equivalent for 64-bit code). One of SRC or DST must be a hard
27388 register. */
27389
27390 void
27391 rs6000_split_multireg_move (rtx dst, rtx src)
27392 {
27393 /* The register number of the first register being moved. */
27394 int reg;
27395 /* The mode that is to be moved. */
27396 machine_mode mode;
27397 /* The mode that the move is being done in, and its size. */
27398 machine_mode reg_mode;
27399 int reg_mode_size;
27400 /* The number of registers that will be moved. */
27401 int nregs;
27402
27403 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
27404 mode = GET_MODE (dst);
27405 nregs = hard_regno_nregs (reg, mode);
27406
27407 /* If we have a vector quad register for MMA, and this is a load or store,
27408 see if we can use vector paired load/stores. */
27409 if (mode == XOmode && TARGET_MMA
27410 && (MEM_P (dst) || MEM_P (src)))
27411 {
27412 reg_mode = OOmode;
27413 nregs /= 2;
27414 }
27415 /* If we have a vector pair/quad mode, split it into two/four separate
27416 vectors. */
27417 else if (mode == OOmode || mode == XOmode)
27418 reg_mode = V1TImode;
27419 else if (FP_REGNO_P (reg))
27420 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
27421 (TARGET_HARD_FLOAT ? DFmode : SFmode);
27422 else if (ALTIVEC_REGNO_P (reg))
27423 reg_mode = V16QImode;
27424 else
27425 reg_mode = word_mode;
27426 reg_mode_size = GET_MODE_SIZE (reg_mode);
27427
27428 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
27429
27430 /* TDmode residing in FP registers is special, since the ISA requires that
27431 the lower-numbered word of a register pair is always the most significant
27432 word, even in little-endian mode. This does not match the usual subreg
27433 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27434 the appropriate constituent registers "by hand" in little-endian mode.
27435
27436 Note we do not need to check for destructive overlap here since TDmode
27437 can only reside in even/odd register pairs. */
27438 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
27439 {
27440 rtx p_src, p_dst;
27441 int i;
27442
27443 for (i = 0; i < nregs; i++)
27444 {
27445 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
27446 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
27447 else
27448 p_src = simplify_gen_subreg (reg_mode, src, mode,
27449 i * reg_mode_size);
27450
27451 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
27452 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
27453 else
27454 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
27455 i * reg_mode_size);
27456
27457 emit_insn (gen_rtx_SET (p_dst, p_src));
27458 }
27459
27460 return;
27461 }
27462
27463 /* The __vector_pair and __vector_quad modes are multi-register
27464 modes, so if we have to load or store the registers, we have to be
27465 careful to properly swap them if we're in little endian mode
27466 below. This means the last register gets the first memory
27467 location. We also need to be careful of using the right register
27468 numbers if we are splitting XO to OO. */
27469 if (mode == OOmode || mode == XOmode)
27470 {
27471 nregs = hard_regno_nregs (reg, mode);
27472 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
27473 if (MEM_P (dst))
27474 {
27475 unsigned offset = 0;
27476 unsigned size = GET_MODE_SIZE (reg_mode);
27477
27478 /* If we are reading an accumulator register, we have to
27479 deprime it before we can access it. */
27480 if (TARGET_MMA
27481 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27482 emit_insn (gen_mma_xxmfacc (src, src));
27483
27484 for (int i = 0; i < nregs; i += reg_mode_nregs)
27485 {
27486 unsigned subreg
27487 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27488 rtx dst2 = adjust_address (dst, reg_mode, offset);
27489 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
27490 offset += size;
27491 emit_insn (gen_rtx_SET (dst2, src2));
27492 }
27493
27494 return;
27495 }
27496
27497 if (MEM_P (src))
27498 {
27499 unsigned offset = 0;
27500 unsigned size = GET_MODE_SIZE (reg_mode);
27501
27502 for (int i = 0; i < nregs; i += reg_mode_nregs)
27503 {
27504 unsigned subreg
27505 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27506 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
27507 rtx src2 = adjust_address (src, reg_mode, offset);
27508 offset += size;
27509 emit_insn (gen_rtx_SET (dst2, src2));
27510 }
27511
27512 /* If we are writing an accumulator register, we have to
27513 prime it after we've written it. */
27514 if (TARGET_MMA
27515 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27516 emit_insn (gen_mma_xxmtacc (dst, dst));
27517
27518 return;
27519 }
27520
27521 if (GET_CODE (src) == UNSPEC
27522 || GET_CODE (src) == UNSPEC_VOLATILE)
27523 {
27524 gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
27525 || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
27526 gcc_assert (REG_P (dst));
27527 if (GET_MODE (src) == XOmode)
27528 gcc_assert (FP_REGNO_P (REGNO (dst)));
27529 if (GET_MODE (src) == OOmode)
27530 gcc_assert (VSX_REGNO_P (REGNO (dst)));
27531
27532 int nvecs = XVECLEN (src, 0);
27533 for (int i = 0; i < nvecs; i++)
27534 {
27535 rtx op;
27536 int regno = reg + i;
27537
27538 if (WORDS_BIG_ENDIAN)
27539 {
27540 op = XVECEXP (src, 0, i);
27541
27542 /* If we are loading an even VSX register and the memory location
27543 is adjacent to the next register's memory location (if any),
27544 then we can load them both with one LXVP instruction. */
27545 if ((regno & 1) == 0)
27546 {
27547 rtx op2 = XVECEXP (src, 0, i + 1);
27548 if (adjacent_mem_locations (op, op2) == op)
27549 {
27550 op = adjust_address (op, OOmode, 0);
27551 /* Skip the next register, since we're going to
27552 load it together with this register. */
27553 i++;
27554 }
27555 }
27556 }
27557 else
27558 {
27559 op = XVECEXP (src, 0, nvecs - i - 1);
27560
27561 /* If we are loading an even VSX register and the memory location
27562 is adjacent to the next register's memory location (if any),
27563 then we can load them both with one LXVP instruction. */
27564 if ((regno & 1) == 0)
27565 {
27566 rtx op2 = XVECEXP (src, 0, nvecs - i - 2);
27567 if (adjacent_mem_locations (op2, op) == op2)
27568 {
27569 op = adjust_address (op2, OOmode, 0);
27570 /* Skip the next register, since we're going to
27571 load it together with this register. */
27572 i++;
27573 }
27574 }
27575 }
27576
27577 rtx dst_i = gen_rtx_REG (GET_MODE (op), regno);
27578 emit_insn (gen_rtx_SET (dst_i, op));
27579 }
27580
27581 /* We are writing an accumulator register, so we have to
27582 prime it after we've written it. */
27583 if (GET_MODE (src) == XOmode)
27584 emit_insn (gen_mma_xxmtacc (dst, dst));
27585
27586 return;
27587 }
27588
27589 /* Register -> register moves can use common code. */
27590 }
27591
27592 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
27593 {
27594 /* If we are reading an accumulator register, we have to
27595 deprime it before we can access it. */
27596 if (TARGET_MMA
27597 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27598 emit_insn (gen_mma_xxmfacc (src, src));
27599
27600 /* Move register range backwards, if we might have destructive
27601 overlap. */
27602 int i;
27603 /* XO/OO are opaque so cannot use subregs. */
27604 if (mode == OOmode || mode == XOmode )
27605 {
27606 for (i = nregs - 1; i >= 0; i--)
27607 {
27608 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
27609 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
27610 emit_insn (gen_rtx_SET (dst_i, src_i));
27611 }
27612 }
27613 else
27614 {
27615 for (i = nregs - 1; i >= 0; i--)
27616 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27617 i * reg_mode_size),
27618 simplify_gen_subreg (reg_mode, src, mode,
27619 i * reg_mode_size)));
27620 }
27621
27622 /* If we are writing an accumulator register, we have to
27623 prime it after we've written it. */
27624 if (TARGET_MMA
27625 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27626 emit_insn (gen_mma_xxmtacc (dst, dst));
27627 }
27628 else
27629 {
27630 int i;
27631 int j = -1;
27632 bool used_update = false;
27633 rtx restore_basereg = NULL_RTX;
27634
27635 if (MEM_P (src) && INT_REGNO_P (reg))
27636 {
27637 rtx breg;
27638
27639 if (GET_CODE (XEXP (src, 0)) == PRE_INC
27640 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
27641 {
27642 rtx delta_rtx;
27643 breg = XEXP (XEXP (src, 0), 0);
27644 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
27645 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
27646 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
27647 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27648 src = replace_equiv_address (src, breg);
27649 }
27650 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
27651 {
27652 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
27653 {
27654 rtx basereg = XEXP (XEXP (src, 0), 0);
27655 if (TARGET_UPDATE)
27656 {
27657 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
27658 emit_insn (gen_rtx_SET (ndst,
27659 gen_rtx_MEM (reg_mode,
27660 XEXP (src, 0))));
27661 used_update = true;
27662 }
27663 else
27664 emit_insn (gen_rtx_SET (basereg,
27665 XEXP (XEXP (src, 0), 1)));
27666 src = replace_equiv_address (src, basereg);
27667 }
27668 else
27669 {
27670 rtx basereg = gen_rtx_REG (Pmode, reg);
27671 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
27672 src = replace_equiv_address (src, basereg);
27673 }
27674 }
27675
27676 breg = XEXP (src, 0);
27677 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
27678 breg = XEXP (breg, 0);
27679
27680 /* If the base register we are using to address memory is
27681 also a destination reg, then change that register last. */
27682 if (REG_P (breg)
27683 && REGNO (breg) >= REGNO (dst)
27684 && REGNO (breg) < REGNO (dst) + nregs)
27685 j = REGNO (breg) - REGNO (dst);
27686 }
27687 else if (MEM_P (dst) && INT_REGNO_P (reg))
27688 {
27689 rtx breg;
27690
27691 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
27692 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
27693 {
27694 rtx delta_rtx;
27695 breg = XEXP (XEXP (dst, 0), 0);
27696 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
27697 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
27698 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
27699
27700 /* We have to update the breg before doing the store.
27701 Use store with update, if available. */
27702
27703 if (TARGET_UPDATE)
27704 {
27705 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27706 emit_insn (TARGET_32BIT
27707 ? (TARGET_POWERPC64
27708 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
27709 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
27710 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
27711 used_update = true;
27712 }
27713 else
27714 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27715 dst = replace_equiv_address (dst, breg);
27716 }
27717 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
27718 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
27719 {
27720 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
27721 {
27722 rtx basereg = XEXP (XEXP (dst, 0), 0);
27723 if (TARGET_UPDATE)
27724 {
27725 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27726 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
27727 XEXP (dst, 0)),
27728 nsrc));
27729 used_update = true;
27730 }
27731 else
27732 emit_insn (gen_rtx_SET (basereg,
27733 XEXP (XEXP (dst, 0), 1)));
27734 dst = replace_equiv_address (dst, basereg);
27735 }
27736 else
27737 {
27738 rtx basereg = XEXP (XEXP (dst, 0), 0);
27739 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
27740 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
27741 && REG_P (basereg)
27742 && REG_P (offsetreg)
27743 && REGNO (basereg) != REGNO (offsetreg));
27744 if (REGNO (basereg) == 0)
27745 {
27746 rtx tmp = offsetreg;
27747 offsetreg = basereg;
27748 basereg = tmp;
27749 }
27750 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
27751 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
27752 dst = replace_equiv_address (dst, basereg);
27753 }
27754 }
27755 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
27756 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
27757 }
27758
27759 /* If we are reading an accumulator register, we have to
27760 deprime it before we can access it. */
27761 if (TARGET_MMA && REG_P (src)
27762 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27763 emit_insn (gen_mma_xxmfacc (src, src));
27764
27765 for (i = 0; i < nregs; i++)
27766 {
27767 /* Calculate index to next subword. */
27768 ++j;
27769 if (j == nregs)
27770 j = 0;
27771
27772 /* If compiler already emitted move of first word by
27773 store with update, no need to do anything. */
27774 if (j == 0 && used_update)
27775 continue;
27776
27777 /* XO/OO are opaque so cannot use subregs. */
27778 if (mode == OOmode || mode == XOmode )
27779 {
27780 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
27781 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
27782 emit_insn (gen_rtx_SET (dst_i, src_i));
27783 }
27784 else
27785 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27786 j * reg_mode_size),
27787 simplify_gen_subreg (reg_mode, src, mode,
27788 j * reg_mode_size)));
27789 }
27790
27791 /* If we are writing an accumulator register, we have to
27792 prime it after we've written it. */
27793 if (TARGET_MMA && REG_P (dst)
27794 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27795 emit_insn (gen_mma_xxmtacc (dst, dst));
27796
27797 if (restore_basereg != NULL_RTX)
27798 emit_insn (restore_basereg);
27799 }
27800 }
27801 \f
27802 /* Return true if the peephole2 can combine a load involving a combination of
27803 an addis instruction and a load with an offset that can be fused together on
27804 a power8. */
27805
27806 bool
27807 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
27808 rtx addis_value, /* addis value. */
27809 rtx target, /* target register that is loaded. */
27810 rtx mem) /* bottom part of the memory addr. */
27811 {
27812 rtx addr;
27813 rtx base_reg;
27814
27815 /* Validate arguments. */
27816 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
27817 return false;
27818
27819 if (!base_reg_operand (target, GET_MODE (target)))
27820 return false;
27821
27822 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
27823 return false;
27824
27825 /* Allow sign/zero extension. */
27826 if (GET_CODE (mem) == ZERO_EXTEND
27827 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
27828 mem = XEXP (mem, 0);
27829
27830 if (!MEM_P (mem))
27831 return false;
27832
27833 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
27834 return false;
27835
27836 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
27837 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
27838 return false;
27839
27840 /* Validate that the register used to load the high value is either the
27841 register being loaded, or we can safely replace its use.
27842
27843 This function is only called from the peephole2 pass and we assume that
27844 there are 2 instructions in the peephole (addis and load), so we want to
27845 check if the target register was not used in the memory address and the
27846 register to hold the addis result is dead after the peephole. */
27847 if (REGNO (addis_reg) != REGNO (target))
27848 {
27849 if (reg_mentioned_p (target, mem))
27850 return false;
27851
27852 if (!peep2_reg_dead_p (2, addis_reg))
27853 return false;
27854
27855 /* If the target register being loaded is the stack pointer, we must
27856 avoid loading any other value into it, even temporarily. */
27857 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
27858 return false;
27859 }
27860
27861 base_reg = XEXP (addr, 0);
27862 return REGNO (addis_reg) == REGNO (base_reg);
27863 }
27864
27865 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27866 sequence. We adjust the addis register to use the target register. If the
27867 load sign extends, we adjust the code to do the zero extending load, and an
27868 explicit sign extension later since the fusion only covers zero extending
27869 loads.
27870
27871 The operands are:
27872 operands[0] register set with addis (to be replaced with target)
27873 operands[1] value set via addis
27874 operands[2] target register being loaded
27875 operands[3] D-form memory reference using operands[0]. */
27876
27877 void
27878 expand_fusion_gpr_load (rtx *operands)
27879 {
27880 rtx addis_value = operands[1];
27881 rtx target = operands[2];
27882 rtx orig_mem = operands[3];
27883 rtx new_addr, new_mem, orig_addr, offset;
27884 enum rtx_code plus_or_lo_sum;
27885 machine_mode target_mode = GET_MODE (target);
27886 machine_mode extend_mode = target_mode;
27887 machine_mode ptr_mode = Pmode;
27888 enum rtx_code extend = UNKNOWN;
27889
27890 if (GET_CODE (orig_mem) == ZERO_EXTEND
27891 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
27892 {
27893 extend = GET_CODE (orig_mem);
27894 orig_mem = XEXP (orig_mem, 0);
27895 target_mode = GET_MODE (orig_mem);
27896 }
27897
27898 gcc_assert (MEM_P (orig_mem));
27899
27900 orig_addr = XEXP (orig_mem, 0);
27901 plus_or_lo_sum = GET_CODE (orig_addr);
27902 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
27903
27904 offset = XEXP (orig_addr, 1);
27905 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
27906 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
27907
27908 if (extend != UNKNOWN)
27909 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
27910
27911 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
27912 UNSPEC_FUSION_GPR);
27913 emit_insn (gen_rtx_SET (target, new_mem));
27914
27915 if (extend == SIGN_EXTEND)
27916 {
27917 int sub_off = ((BYTES_BIG_ENDIAN)
27918 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
27919 : 0);
27920 rtx sign_reg
27921 = simplify_subreg (target_mode, target, extend_mode, sub_off);
27922
27923 emit_insn (gen_rtx_SET (target,
27924 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
27925 }
27926
27927 return;
27928 }
27929
27930 /* Emit the addis instruction that will be part of a fused instruction
27931 sequence. */
27932
27933 void
27934 emit_fusion_addis (rtx target, rtx addis_value)
27935 {
27936 rtx fuse_ops[10];
27937 const char *addis_str = NULL;
27938
27939 /* Emit the addis instruction. */
27940 fuse_ops[0] = target;
27941 if (satisfies_constraint_L (addis_value))
27942 {
27943 fuse_ops[1] = addis_value;
27944 addis_str = "lis %0,%v1";
27945 }
27946
27947 else if (GET_CODE (addis_value) == PLUS)
27948 {
27949 rtx op0 = XEXP (addis_value, 0);
27950 rtx op1 = XEXP (addis_value, 1);
27951
27952 if (REG_P (op0) && CONST_INT_P (op1)
27953 && satisfies_constraint_L (op1))
27954 {
27955 fuse_ops[1] = op0;
27956 fuse_ops[2] = op1;
27957 addis_str = "addis %0,%1,%v2";
27958 }
27959 }
27960
27961 else if (GET_CODE (addis_value) == HIGH)
27962 {
27963 rtx value = XEXP (addis_value, 0);
27964 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
27965 {
27966 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
27967 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
27968 if (TARGET_ELF)
27969 addis_str = "addis %0,%2,%1@toc@ha";
27970
27971 else if (TARGET_XCOFF)
27972 addis_str = "addis %0,%1@u(%2)";
27973
27974 else
27975 gcc_unreachable ();
27976 }
27977
27978 else if (GET_CODE (value) == PLUS)
27979 {
27980 rtx op0 = XEXP (value, 0);
27981 rtx op1 = XEXP (value, 1);
27982
27983 if (GET_CODE (op0) == UNSPEC
27984 && XINT (op0, 1) == UNSPEC_TOCREL
27985 && CONST_INT_P (op1))
27986 {
27987 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
27988 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
27989 fuse_ops[3] = op1;
27990 if (TARGET_ELF)
27991 addis_str = "addis %0,%2,%1+%3@toc@ha";
27992
27993 else if (TARGET_XCOFF)
27994 addis_str = "addis %0,%1+%3@u(%2)";
27995
27996 else
27997 gcc_unreachable ();
27998 }
27999 }
28000
28001 else if (satisfies_constraint_L (value))
28002 {
28003 fuse_ops[1] = value;
28004 addis_str = "lis %0,%v1";
28005 }
28006
28007 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
28008 {
28009 fuse_ops[1] = value;
28010 addis_str = "lis %0,%1@ha";
28011 }
28012 }
28013
28014 if (!addis_str)
28015 fatal_insn ("Could not generate addis value for fusion", addis_value);
28016
28017 output_asm_insn (addis_str, fuse_ops);
28018 }
28019
28020 /* Emit a D-form load or store instruction that is the second instruction
28021 of a fusion sequence. */
28022
28023 static void
28024 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
28025 {
28026 rtx fuse_ops[10];
28027 char insn_template[80];
28028
28029 fuse_ops[0] = load_reg;
28030 fuse_ops[1] = addis_reg;
28031
28032 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
28033 {
28034 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
28035 fuse_ops[2] = offset;
28036 output_asm_insn (insn_template, fuse_ops);
28037 }
28038
28039 else if (GET_CODE (offset) == UNSPEC
28040 && XINT (offset, 1) == UNSPEC_TOCREL)
28041 {
28042 if (TARGET_ELF)
28043 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
28044
28045 else if (TARGET_XCOFF)
28046 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
28047
28048 else
28049 gcc_unreachable ();
28050
28051 fuse_ops[2] = XVECEXP (offset, 0, 0);
28052 output_asm_insn (insn_template, fuse_ops);
28053 }
28054
28055 else if (GET_CODE (offset) == PLUS
28056 && GET_CODE (XEXP (offset, 0)) == UNSPEC
28057 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
28058 && CONST_INT_P (XEXP (offset, 1)))
28059 {
28060 rtx tocrel_unspec = XEXP (offset, 0);
28061 if (TARGET_ELF)
28062 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
28063
28064 else if (TARGET_XCOFF)
28065 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
28066
28067 else
28068 gcc_unreachable ();
28069
28070 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
28071 fuse_ops[3] = XEXP (offset, 1);
28072 output_asm_insn (insn_template, fuse_ops);
28073 }
28074
28075 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
28076 {
28077 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
28078
28079 fuse_ops[2] = offset;
28080 output_asm_insn (insn_template, fuse_ops);
28081 }
28082
28083 else
28084 fatal_insn ("Unable to generate load/store offset for fusion", offset);
28085
28086 return;
28087 }
28088
28089 /* Given an address, convert it into the addis and load offset parts. Addresses
28090 created during the peephole2 process look like:
28091 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
28092 (unspec [(...)] UNSPEC_TOCREL)) */
28093
28094 static void
28095 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
28096 {
28097 rtx hi, lo;
28098
28099 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
28100 {
28101 hi = XEXP (addr, 0);
28102 lo = XEXP (addr, 1);
28103 }
28104 else
28105 gcc_unreachable ();
28106
28107 *p_hi = hi;
28108 *p_lo = lo;
28109 }
28110
28111 /* Return a string to fuse an addis instruction with a gpr load to the same
28112 register that we loaded up the addis instruction. The address that is used
28113 is the logical address that was formed during peephole2:
28114 (lo_sum (high) (low-part))
28115
28116 The code is complicated, so we call output_asm_insn directly, and just
28117 return "". */
28118
28119 const char *
28120 emit_fusion_gpr_load (rtx target, rtx mem)
28121 {
28122 rtx addis_value;
28123 rtx addr;
28124 rtx load_offset;
28125 const char *load_str = NULL;
28126 machine_mode mode;
28127
28128 if (GET_CODE (mem) == ZERO_EXTEND)
28129 mem = XEXP (mem, 0);
28130
28131 gcc_assert (REG_P (target) && MEM_P (mem));
28132
28133 addr = XEXP (mem, 0);
28134 fusion_split_address (addr, &addis_value, &load_offset);
28135
28136 /* Now emit the load instruction to the same register. */
28137 mode = GET_MODE (mem);
28138 switch (mode)
28139 {
28140 case E_QImode:
28141 load_str = "lbz";
28142 break;
28143
28144 case E_HImode:
28145 load_str = "lhz";
28146 break;
28147
28148 case E_SImode:
28149 case E_SFmode:
28150 load_str = "lwz";
28151 break;
28152
28153 case E_DImode:
28154 case E_DFmode:
28155 gcc_assert (TARGET_POWERPC64);
28156 load_str = "ld";
28157 break;
28158
28159 default:
28160 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
28161 }
28162
28163 /* Emit the addis instruction. */
28164 emit_fusion_addis (target, addis_value);
28165
28166 /* Emit the D-form load instruction. */
28167 emit_fusion_load (target, target, load_offset, load_str);
28168
28169 return "";
28170 }
28171 \f
28172 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
28173 ignores it then. */
28174 static GTY(()) tree atomic_hold_decl;
28175 static GTY(()) tree atomic_clear_decl;
28176 static GTY(()) tree atomic_update_decl;
28177
28178 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
28179 static void
28180 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
28181 {
28182 if (!TARGET_HARD_FLOAT)
28183 {
28184 #ifdef RS6000_GLIBC_ATOMIC_FENV
28185 if (atomic_hold_decl == NULL_TREE)
28186 {
28187 atomic_hold_decl
28188 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28189 get_identifier ("__atomic_feholdexcept"),
28190 build_function_type_list (void_type_node,
28191 double_ptr_type_node,
28192 NULL_TREE));
28193 TREE_PUBLIC (atomic_hold_decl) = 1;
28194 DECL_EXTERNAL (atomic_hold_decl) = 1;
28195 }
28196
28197 if (atomic_clear_decl == NULL_TREE)
28198 {
28199 atomic_clear_decl
28200 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28201 get_identifier ("__atomic_feclearexcept"),
28202 build_function_type_list (void_type_node,
28203 NULL_TREE));
28204 TREE_PUBLIC (atomic_clear_decl) = 1;
28205 DECL_EXTERNAL (atomic_clear_decl) = 1;
28206 }
28207
28208 tree const_double = build_qualified_type (double_type_node,
28209 TYPE_QUAL_CONST);
28210 tree const_double_ptr = build_pointer_type (const_double);
28211 if (atomic_update_decl == NULL_TREE)
28212 {
28213 atomic_update_decl
28214 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28215 get_identifier ("__atomic_feupdateenv"),
28216 build_function_type_list (void_type_node,
28217 const_double_ptr,
28218 NULL_TREE));
28219 TREE_PUBLIC (atomic_update_decl) = 1;
28220 DECL_EXTERNAL (atomic_update_decl) = 1;
28221 }
28222
28223 tree fenv_var = create_tmp_var_raw (double_type_node);
28224 TREE_ADDRESSABLE (fenv_var) = 1;
28225 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
28226 build4 (TARGET_EXPR, double_type_node, fenv_var,
28227 void_node, NULL_TREE, NULL_TREE));
28228
28229 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
28230 *clear = build_call_expr (atomic_clear_decl, 0);
28231 *update = build_call_expr (atomic_update_decl, 1,
28232 fold_convert (const_double_ptr, fenv_addr));
28233 #endif
28234 return;
28235 }
28236
28237 tree mffs = rs6000_builtin_decls[RS6000_BIF_MFFS];
28238 tree mtfsf = rs6000_builtin_decls[RS6000_BIF_MTFSF];
28239 tree call_mffs = build_call_expr (mffs, 0);
28240
28241 /* Generates the equivalent of feholdexcept (&fenv_var)
28242
28243 *fenv_var = __builtin_mffs ();
28244 double fenv_hold;
28245 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
28246 __builtin_mtfsf (0xff, fenv_hold); */
28247
28248 /* Mask to clear everything except for the rounding modes and non-IEEE
28249 arithmetic flag. */
28250 const unsigned HOST_WIDE_INT hold_exception_mask
28251 = HOST_WIDE_INT_C (0xffffffff00000007);
28252
28253 tree fenv_var = create_tmp_var_raw (double_type_node);
28254
28255 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
28256 NULL_TREE, NULL_TREE);
28257
28258 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
28259 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
28260 build_int_cst (uint64_type_node,
28261 hold_exception_mask));
28262
28263 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28264 fenv_llu_and);
28265
28266 tree hold_mtfsf = build_call_expr (mtfsf, 2,
28267 build_int_cst (unsigned_type_node, 0xff),
28268 fenv_hold_mtfsf);
28269
28270 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
28271
28272 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
28273
28274 double fenv_clear = __builtin_mffs ();
28275 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
28276 __builtin_mtfsf (0xff, fenv_clear); */
28277
28278 /* Mask to clear everything except for the rounding modes and non-IEEE
28279 arithmetic flag. */
28280 const unsigned HOST_WIDE_INT clear_exception_mask
28281 = HOST_WIDE_INT_C (0xffffffff00000000);
28282
28283 tree fenv_clear = create_tmp_var_raw (double_type_node);
28284
28285 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
28286 call_mffs, NULL_TREE, NULL_TREE);
28287
28288 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
28289 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
28290 fenv_clean_llu,
28291 build_int_cst (uint64_type_node,
28292 clear_exception_mask));
28293
28294 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28295 fenv_clear_llu_and);
28296
28297 tree clear_mtfsf = build_call_expr (mtfsf, 2,
28298 build_int_cst (unsigned_type_node, 0xff),
28299 fenv_clear_mtfsf);
28300
28301 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
28302
28303 /* Generates the equivalent of feupdateenv (&fenv_var)
28304
28305 double old_fenv = __builtin_mffs ();
28306 double fenv_update;
28307 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
28308 (*(uint64_t*)fenv_var 0x1ff80fff);
28309 __builtin_mtfsf (0xff, fenv_update); */
28310
28311 const unsigned HOST_WIDE_INT update_exception_mask
28312 = HOST_WIDE_INT_C (0xffffffff1fffff00);
28313 const unsigned HOST_WIDE_INT new_exception_mask
28314 = HOST_WIDE_INT_C (0x1ff80fff);
28315
28316 tree old_fenv = create_tmp_var_raw (double_type_node);
28317 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
28318 call_mffs, NULL_TREE, NULL_TREE);
28319
28320 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
28321 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
28322 build_int_cst (uint64_type_node,
28323 update_exception_mask));
28324
28325 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
28326 build_int_cst (uint64_type_node,
28327 new_exception_mask));
28328
28329 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
28330 old_llu_and, new_llu_and);
28331
28332 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28333 new_llu_mask);
28334
28335 tree update_mtfsf = build_call_expr (mtfsf, 2,
28336 build_int_cst (unsigned_type_node, 0xff),
28337 fenv_update_mtfsf);
28338
28339 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
28340 }
28341
28342 void
28343 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
28344 {
28345 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28346
28347 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28348 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28349
28350 /* The destination of the vmrgew instruction layout is:
28351 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28352 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28353 vmrgew instruction will be correct. */
28354 if (BYTES_BIG_ENDIAN)
28355 {
28356 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
28357 GEN_INT (0)));
28358 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
28359 GEN_INT (3)));
28360 }
28361 else
28362 {
28363 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
28364 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
28365 }
28366
28367 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28368 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28369
28370 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
28371 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
28372
28373 if (BYTES_BIG_ENDIAN)
28374 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28375 else
28376 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28377 }
28378
28379 void
28380 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
28381 {
28382 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28383
28384 rtx_tmp0 = gen_reg_rtx (V2DImode);
28385 rtx_tmp1 = gen_reg_rtx (V2DImode);
28386
28387 /* The destination of the vmrgew instruction layout is:
28388 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28389 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28390 vmrgew instruction will be correct. */
28391 if (BYTES_BIG_ENDIAN)
28392 {
28393 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
28394 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
28395 }
28396 else
28397 {
28398 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
28399 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
28400 }
28401
28402 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28403 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28404
28405 if (signed_convert)
28406 {
28407 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
28408 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
28409 }
28410 else
28411 {
28412 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
28413 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
28414 }
28415
28416 if (BYTES_BIG_ENDIAN)
28417 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28418 else
28419 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28420 }
28421
28422 void
28423 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
28424 rtx src2)
28425 {
28426 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28427
28428 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28429 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28430
28431 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
28432 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
28433
28434 rtx_tmp2 = gen_reg_rtx (V4SImode);
28435 rtx_tmp3 = gen_reg_rtx (V4SImode);
28436
28437 if (signed_convert)
28438 {
28439 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
28440 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
28441 }
28442 else
28443 {
28444 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
28445 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
28446 }
28447
28448 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
28449 }
28450
28451 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28452
28453 static bool
28454 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
28455 optimization_type opt_type)
28456 {
28457 switch (op)
28458 {
28459 case rsqrt_optab:
28460 return (opt_type == OPTIMIZE_FOR_SPEED
28461 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
28462
28463 default:
28464 return true;
28465 }
28466 }
28467
28468 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28469
28470 static HOST_WIDE_INT
28471 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
28472 {
28473 if (TREE_CODE (exp) == STRING_CST
28474 && (STRICT_ALIGNMENT || !optimize_size))
28475 return MAX (align, BITS_PER_WORD);
28476 return align;
28477 }
28478
28479 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28480
28481 static HOST_WIDE_INT
28482 rs6000_starting_frame_offset (void)
28483 {
28484 if (FRAME_GROWS_DOWNWARD)
28485 return 0;
28486 return RS6000_STARTING_FRAME_OFFSET;
28487 }
28488 \f
28489 /* Internal function to return the built-in function id for the complex
28490 multiply operation for a given mode. */
28491
28492 static inline built_in_function
28493 complex_multiply_builtin_code (machine_mode mode)
28494 {
28495 gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28496 int func = BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28497 return (built_in_function) func;
28498 }
28499
28500 /* Internal function to return the built-in function id for the complex divide
28501 operation for a given mode. */
28502
28503 static inline built_in_function
28504 complex_divide_builtin_code (machine_mode mode)
28505 {
28506 gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28507 int func = BUILT_IN_COMPLEX_DIV_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28508 return (built_in_function) func;
28509 }
28510
28511 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28512 function names from <foo>l to <foo>f128 if the default long double type is
28513 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28514 include file switches the names on systems that support long double as IEEE
28515 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28516 In the future, glibc will export names like __ieee128_sinf128 and we can
28517 switch to using those instead of using sinf128, which pollutes the user's
28518 namespace.
28519
28520 This will switch the names for Fortran math functions as well (which doesn't
28521 use math.h). However, Fortran needs other changes to the compiler and
28522 library before you can switch the real*16 type at compile time.
28523
28524 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28525 only do this transformation if the __float128 type is enabled. This
28526 prevents us from doing the transformation on older 32-bit ports that might
28527 have enabled using IEEE 128-bit floating point as the default long double
28528 type.
28529
28530 We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the
28531 function names used for complex multiply and divide to the appropriate
28532 names. */
28533
28534 static tree
28535 rs6000_mangle_decl_assembler_name (tree decl, tree id)
28536 {
28537 /* Handle complex multiply/divide. For IEEE 128-bit, use __mulkc3 or
28538 __divkc3 and for IBM 128-bit use __multc3 and __divtc3. */
28539 if (TARGET_FLOAT128_TYPE
28540 && TREE_CODE (decl) == FUNCTION_DECL
28541 && DECL_IS_UNDECLARED_BUILTIN (decl)
28542 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28543 {
28544 built_in_function id = DECL_FUNCTION_CODE (decl);
28545 const char *newname = NULL;
28546
28547 if (id == complex_multiply_builtin_code (KCmode))
28548 newname = "__mulkc3";
28549
28550 else if (id == complex_multiply_builtin_code (ICmode))
28551 newname = "__multc3";
28552
28553 else if (id == complex_multiply_builtin_code (TCmode))
28554 newname = (TARGET_IEEEQUAD) ? "__mulkc3" : "__multc3";
28555
28556 else if (id == complex_divide_builtin_code (KCmode))
28557 newname = "__divkc3";
28558
28559 else if (id == complex_divide_builtin_code (ICmode))
28560 newname = "__divtc3";
28561
28562 else if (id == complex_divide_builtin_code (TCmode))
28563 newname = (TARGET_IEEEQUAD) ? "__divkc3" : "__divtc3";
28564
28565 if (newname)
28566 {
28567 if (TARGET_DEBUG_BUILTIN)
28568 fprintf (stderr, "Map complex mul/div => %s\n", newname);
28569
28570 return get_identifier (newname);
28571 }
28572 }
28573
28574 /* Map long double built-in functions if long double is IEEE 128-bit. */
28575 if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
28576 && TREE_CODE (decl) == FUNCTION_DECL
28577 && DECL_IS_UNDECLARED_BUILTIN (decl)
28578 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28579 {
28580 size_t len = IDENTIFIER_LENGTH (id);
28581 const char *name = IDENTIFIER_POINTER (id);
28582 char *newname = NULL;
28583
28584 /* See if it is one of the built-in functions with an unusual name. */
28585 switch (DECL_FUNCTION_CODE (decl))
28586 {
28587 case BUILT_IN_DREML:
28588 newname = xstrdup ("__remainderieee128");
28589 break;
28590
28591 case BUILT_IN_GAMMAL:
28592 newname = xstrdup ("__lgammaieee128");
28593 break;
28594
28595 case BUILT_IN_GAMMAL_R:
28596 case BUILT_IN_LGAMMAL_R:
28597 newname = xstrdup ("__lgammaieee128_r");
28598 break;
28599
28600 case BUILT_IN_NEXTTOWARD:
28601 newname = xstrdup ("__nexttoward_to_ieee128");
28602 break;
28603
28604 case BUILT_IN_NEXTTOWARDF:
28605 newname = xstrdup ("__nexttowardf_to_ieee128");
28606 break;
28607
28608 case BUILT_IN_NEXTTOWARDL:
28609 newname = xstrdup ("__nexttowardieee128");
28610 break;
28611
28612 case BUILT_IN_POW10L:
28613 newname = xstrdup ("__exp10ieee128");
28614 break;
28615
28616 case BUILT_IN_SCALBL:
28617 newname = xstrdup ("__scalbieee128");
28618 break;
28619
28620 case BUILT_IN_SIGNIFICANDL:
28621 newname = xstrdup ("__significandieee128");
28622 break;
28623
28624 case BUILT_IN_SINCOSL:
28625 newname = xstrdup ("__sincosieee128");
28626 break;
28627
28628 default:
28629 break;
28630 }
28631
28632 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28633 if (!newname)
28634 {
28635 size_t printf_len = strlen ("printf");
28636 size_t scanf_len = strlen ("scanf");
28637 size_t printf_chk_len = strlen ("printf_chk");
28638
28639 if (len >= printf_len
28640 && strcmp (name + len - printf_len, "printf") == 0)
28641 newname = xasprintf ("__%sieee128", name);
28642
28643 else if (len >= scanf_len
28644 && strcmp (name + len - scanf_len, "scanf") == 0)
28645 newname = xasprintf ("__isoc99_%sieee128", name);
28646
28647 else if (len >= printf_chk_len
28648 && strcmp (name + len - printf_chk_len, "printf_chk") == 0)
28649 newname = xasprintf ("%sieee128", name);
28650
28651 else if (name[len - 1] == 'l')
28652 {
28653 bool uses_ieee128_p = false;
28654 tree type = TREE_TYPE (decl);
28655 machine_mode ret_mode = TYPE_MODE (type);
28656
28657 /* See if the function returns a IEEE 128-bit floating point type or
28658 complex type. */
28659 if (ret_mode == TFmode || ret_mode == TCmode)
28660 uses_ieee128_p = true;
28661 else
28662 {
28663 function_args_iterator args_iter;
28664 tree arg;
28665
28666 /* See if the function passes a IEEE 128-bit floating point type
28667 or complex type. */
28668 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
28669 {
28670 machine_mode arg_mode = TYPE_MODE (arg);
28671 if (arg_mode == TFmode || arg_mode == TCmode)
28672 {
28673 uses_ieee128_p = true;
28674 break;
28675 }
28676 }
28677 }
28678
28679 /* If we passed or returned an IEEE 128-bit floating point type,
28680 change the name. Use __<name>ieee128, instead of <name>l. */
28681 if (uses_ieee128_p)
28682 newname = xasprintf ("__%.*sieee128", (int)(len - 1), name);
28683 }
28684 }
28685
28686 if (newname)
28687 {
28688 if (TARGET_DEBUG_BUILTIN)
28689 fprintf (stderr, "Map %s => %s\n", name, newname);
28690
28691 id = get_identifier (newname);
28692 free (newname);
28693 }
28694 }
28695
28696 return id;
28697 }
28698
28699 /* Predict whether the given loop in gimple will be transformed in the RTL
28700 doloop_optimize pass. */
28701
28702 static bool
28703 rs6000_predict_doloop_p (struct loop *loop)
28704 {
28705 gcc_assert (loop);
28706
28707 /* On rs6000, targetm.can_use_doloop_p is actually
28708 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28709 if (loop->inner != NULL)
28710 {
28711 if (dump_file && (dump_flags & TDF_DETAILS))
28712 fprintf (dump_file, "Predict doloop failure due to"
28713 " loop nesting.\n");
28714 return false;
28715 }
28716
28717 return true;
28718 }
28719
28720 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28721
28722 static machine_mode
28723 rs6000_preferred_doloop_mode (machine_mode)
28724 {
28725 return word_mode;
28726 }
28727
28728 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28729
28730 static bool
28731 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
28732 {
28733 gcc_assert (MEM_P (mem));
28734
28735 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28736 type addresses, so don't allow MEMs with those address types to be
28737 substituted as an equivalent expression. See PR93974 for details. */
28738 if (GET_CODE (XEXP (mem, 0)) == AND)
28739 return true;
28740
28741 return false;
28742 }
28743
28744 /* Implement TARGET_INVALID_CONVERSION. */
28745
28746 static const char *
28747 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
28748 {
28749 /* Make sure we're working with the canonical types. */
28750 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
28751 fromtype = TYPE_CANONICAL (fromtype);
28752 if (TYPE_CANONICAL (totype) != NULL_TREE)
28753 totype = TYPE_CANONICAL (totype);
28754
28755 machine_mode frommode = TYPE_MODE (fromtype);
28756 machine_mode tomode = TYPE_MODE (totype);
28757
28758 if (frommode != tomode)
28759 {
28760 /* Do not allow conversions to/from XOmode and OOmode types. */
28761 if (frommode == XOmode)
28762 return N_("invalid conversion from type %<__vector_quad%>");
28763 if (tomode == XOmode)
28764 return N_("invalid conversion to type %<__vector_quad%>");
28765 if (frommode == OOmode)
28766 return N_("invalid conversion from type %<__vector_pair%>");
28767 if (tomode == OOmode)
28768 return N_("invalid conversion to type %<__vector_pair%>");
28769 }
28770
28771 /* Conversion allowed. */
28772 return NULL;
28773 }
28774
28775 /* Convert a SFmode constant to the integer bit pattern. */
28776
28777 long
28778 rs6000_const_f32_to_i32 (rtx operand)
28779 {
28780 long value;
28781 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
28782
28783 gcc_assert (GET_MODE (operand) == SFmode);
28784 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
28785 return value;
28786 }
28787
28788 void
28789 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
28790 {
28791 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
28792 inform (input_location,
28793 "the result for the xxspltidp instruction "
28794 "is undefined for subnormal input values");
28795 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
28796 }
28797
28798 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28799
28800 static bool
28801 rs6000_gen_pic_addr_diff_vec (void)
28802 {
28803 return rs6000_relative_jumptables;
28804 }
28805
28806 void
28807 rs6000_output_addr_vec_elt (FILE *file, int value)
28808 {
28809 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
28810 char buf[100];
28811
28812 fprintf (file, "%s", directive);
28813 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
28814 assemble_name (file, buf);
28815 fprintf (file, "\n");
28816 }
28817
28818 \f
28819 /* Copy an integer constant to the vector constant structure. */
28820
28821 static void
28822 constant_int_to_128bit_vector (rtx op,
28823 machine_mode mode,
28824 size_t byte_num,
28825 vec_const_128bit_type *info)
28826 {
28827 unsigned HOST_WIDE_INT uvalue = UINTVAL (op);
28828 unsigned bitsize = GET_MODE_BITSIZE (mode);
28829
28830 for (int shift = bitsize - 8; shift >= 0; shift -= 8)
28831 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28832 }
28833
28834 /* Copy a floating point constant to the vector constant structure. */
28835
28836 static void
28837 constant_fp_to_128bit_vector (rtx op,
28838 machine_mode mode,
28839 size_t byte_num,
28840 vec_const_128bit_type *info)
28841 {
28842 unsigned bitsize = GET_MODE_BITSIZE (mode);
28843 unsigned num_words = bitsize / 32;
28844 const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
28845 long real_words[VECTOR_128BIT_WORDS];
28846
28847 /* Make sure we don't overflow the real_words array and that it is
28848 filled completely. */
28849 gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
28850
28851 real_to_target (real_words, rtype, mode);
28852
28853 /* Iterate over each 32-bit word in the floating point constant. The
28854 real_to_target function puts out words in target endian fashion. We need
28855 to arrange the order so that the bytes are written in big endian order. */
28856 for (unsigned num = 0; num < num_words; num++)
28857 {
28858 unsigned endian_num = (BYTES_BIG_ENDIAN
28859 ? num
28860 : num_words - 1 - num);
28861
28862 unsigned uvalue = real_words[endian_num];
28863 for (int shift = 32 - 8; shift >= 0; shift -= 8)
28864 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28865 }
28866
28867 /* Mark that this constant involves floating point. */
28868 info->fp_constant_p = true;
28869 }
28870
28871 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28872 structure INFO.
28873
28874 Break out the constant out to bytes, half words, words, and double words.
28875 Return true if we have successfully converted the constant.
28876
28877 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28878 constants. Integer and floating point scalar constants are splatted to fill
28879 out the vector. */
28880
28881 bool
28882 vec_const_128bit_to_bytes (rtx op,
28883 machine_mode mode,
28884 vec_const_128bit_type *info)
28885 {
28886 /* Initialize the constant structure. */
28887 memset ((void *)info, 0, sizeof (vec_const_128bit_type));
28888
28889 /* Assume CONST_INTs are DImode. */
28890 if (mode == VOIDmode)
28891 mode = CONST_INT_P (op) ? DImode : GET_MODE (op);
28892
28893 if (mode == VOIDmode)
28894 return false;
28895
28896 unsigned size = GET_MODE_SIZE (mode);
28897 bool splat_p = false;
28898
28899 if (size > VECTOR_128BIT_BYTES)
28900 return false;
28901
28902 /* Set up the bits. */
28903 switch (GET_CODE (op))
28904 {
28905 /* Integer constants, default to double word. */
28906 case CONST_INT:
28907 {
28908 constant_int_to_128bit_vector (op, mode, 0, info);
28909 splat_p = true;
28910 break;
28911 }
28912
28913 /* Floating point constants. */
28914 case CONST_DOUBLE:
28915 {
28916 /* Fail if the floating point constant is the wrong mode. */
28917 if (GET_MODE (op) != mode)
28918 return false;
28919
28920 /* SFmode stored as scalars are stored in DFmode format. */
28921 if (mode == SFmode)
28922 {
28923 mode = DFmode;
28924 size = GET_MODE_SIZE (DFmode);
28925 }
28926
28927 constant_fp_to_128bit_vector (op, mode, 0, info);
28928 splat_p = true;
28929 break;
28930 }
28931
28932 /* Vector constants, iterate over each element. On little endian
28933 systems, we have to reverse the element numbers. */
28934 case CONST_VECTOR:
28935 {
28936 /* Fail if the vector constant is the wrong mode or size. */
28937 if (GET_MODE (op) != mode
28938 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28939 return false;
28940
28941 machine_mode ele_mode = GET_MODE_INNER (mode);
28942 size_t ele_size = GET_MODE_SIZE (ele_mode);
28943 size_t nunits = GET_MODE_NUNITS (mode);
28944
28945 for (size_t num = 0; num < nunits; num++)
28946 {
28947 rtx ele = CONST_VECTOR_ELT (op, num);
28948 size_t byte_num = (BYTES_BIG_ENDIAN
28949 ? num
28950 : nunits - 1 - num) * ele_size;
28951
28952 if (CONST_INT_P (ele))
28953 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28954 else if (CONST_DOUBLE_P (ele))
28955 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28956 else
28957 return false;
28958 }
28959
28960 break;
28961 }
28962
28963 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28964 Since we are duplicating the element, we don't have to worry about
28965 endian issues. */
28966 case VEC_DUPLICATE:
28967 {
28968 /* Fail if the vector duplicate is the wrong mode or size. */
28969 if (GET_MODE (op) != mode
28970 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28971 return false;
28972
28973 machine_mode ele_mode = GET_MODE_INNER (mode);
28974 size_t ele_size = GET_MODE_SIZE (ele_mode);
28975 rtx ele = XEXP (op, 0);
28976 size_t nunits = GET_MODE_NUNITS (mode);
28977
28978 if (!CONST_INT_P (ele) && !CONST_DOUBLE_P (ele))
28979 return false;
28980
28981 for (size_t num = 0; num < nunits; num++)
28982 {
28983 size_t byte_num = num * ele_size;
28984
28985 if (CONST_INT_P (ele))
28986 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28987 else
28988 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28989 }
28990
28991 break;
28992 }
28993
28994 /* Any thing else, just return failure. */
28995 default:
28996 return false;
28997 }
28998
28999 /* Splat the constant to fill 128 bits if desired. */
29000 if (splat_p && size < VECTOR_128BIT_BYTES)
29001 {
29002 if ((VECTOR_128BIT_BYTES % size) != 0)
29003 return false;
29004
29005 for (size_t offset = size;
29006 offset < VECTOR_128BIT_BYTES;
29007 offset += size)
29008 memcpy ((void *) &info->bytes[offset],
29009 (void *) &info->bytes[0],
29010 size);
29011 }
29012
29013 /* Remember original size. */
29014 info->original_size = size;
29015
29016 /* Determine if the bytes are all the same. */
29017 unsigned char first_byte = info->bytes[0];
29018 info->all_bytes_same = true;
29019 for (size_t i = 1; i < VECTOR_128BIT_BYTES; i++)
29020 if (first_byte != info->bytes[i])
29021 {
29022 info->all_bytes_same = false;
29023 break;
29024 }
29025
29026 /* Pack half words together & determine if all of the half words are the
29027 same. */
29028 for (size_t i = 0; i < VECTOR_128BIT_HALF_WORDS; i++)
29029 info->half_words[i] = ((info->bytes[i * 2] << 8)
29030 | info->bytes[(i * 2) + 1]);
29031
29032 unsigned short first_hword = info->half_words[0];
29033 info->all_half_words_same = true;
29034 for (size_t i = 1; i < VECTOR_128BIT_HALF_WORDS; i++)
29035 if (first_hword != info->half_words[i])
29036 {
29037 info->all_half_words_same = false;
29038 break;
29039 }
29040
29041 /* Pack words together & determine if all of the words are the same. */
29042 for (size_t i = 0; i < VECTOR_128BIT_WORDS; i++)
29043 info->words[i] = ((info->bytes[i * 4] << 24)
29044 | (info->bytes[(i * 4) + 1] << 16)
29045 | (info->bytes[(i * 4) + 2] << 8)
29046 | info->bytes[(i * 4) + 3]);
29047
29048 info->all_words_same
29049 = (info->words[0] == info->words[1]
29050 && info->words[0] == info->words[2]
29051 && info->words[0] == info->words[3]);
29052
29053 /* Pack double words together & determine if all of the double words are the
29054 same. */
29055 for (size_t i = 0; i < VECTOR_128BIT_DOUBLE_WORDS; i++)
29056 {
29057 unsigned HOST_WIDE_INT d_word = 0;
29058 for (size_t j = 0; j < 8; j++)
29059 d_word = (d_word << 8) | info->bytes[(i * 8) + j];
29060
29061 info->double_words[i] = d_word;
29062 }
29063
29064 info->all_double_words_same
29065 = (info->double_words[0] == info->double_words[1]);
29066
29067 return true;
29068 }
29069
29070 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
29071 if the LXVKQ instruction cannot be used. Otherwise return the immediate
29072 value to be used with the LXVKQ instruction. */
29073
29074 unsigned
29075 constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
29076 {
29077 /* Is the instruction supported with power10 code generation, IEEE 128-bit
29078 floating point hardware and VSX registers are available. */
29079 if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
29080 || !TARGET_VSX)
29081 return 0;
29082
29083 /* All of the constants that are generated by LXVKQ have the bottom 3 words
29084 that are 0. */
29085 if (vsx_const->words[1] != 0
29086 || vsx_const->words[2] != 0
29087 || vsx_const->words[3] != 0)
29088 return 0;
29089
29090 /* See if we have a match for the first word. */
29091 switch (vsx_const->words[0])
29092 {
29093 case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */
29094 case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */
29095 case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */
29096 case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */
29097 case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */
29098 case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */
29099 case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */
29100 case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */
29101 case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */
29102 case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */
29103 case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */
29104 case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */
29105 case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */
29106 case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */
29107 case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */
29108 case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */
29109 case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */
29110 case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */
29111
29112 /* anything else cannot be loaded. */
29113 default:
29114 break;
29115 }
29116
29117 return 0;
29118 }
29119
29120 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
29121 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
29122 value to be used with the XXSPLTIW instruction. */
29123
29124 unsigned
29125 constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
29126 {
29127 if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
29128 return 0;
29129
29130 if (!vsx_const->all_words_same)
29131 return 0;
29132
29133 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
29134 if (vsx_const->all_bytes_same)
29135 return 0;
29136
29137 /* See if we can use VSPLTISH or VSPLTISW. */
29138 if (vsx_const->all_half_words_same)
29139 {
29140 short sign_h_word = vsx_const->half_words[0];
29141 if (EASY_VECTOR_15 (sign_h_word))
29142 return 0;
29143 }
29144
29145 int sign_word = vsx_const->words[0];
29146 if (EASY_VECTOR_15 (sign_word))
29147 return 0;
29148
29149 return vsx_const->words[0];
29150 }
29151
29152 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
29153 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
29154 value to be used with the XXSPLTIDP instruction. */
29155
29156 unsigned
29157 constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
29158 {
29159 if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
29160 return 0;
29161
29162 /* Reject if the two 64-bit segments are not the same. */
29163 if (!vsx_const->all_double_words_same)
29164 return 0;
29165
29166 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
29167 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
29168 if (vsx_const->all_bytes_same
29169 || vsx_const->all_half_words_same
29170 || vsx_const->all_words_same)
29171 return 0;
29172
29173 unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
29174
29175 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
29176 pattern and the signalling NaN bit pattern. Recognize infinity and
29177 negative infinity. */
29178
29179 /* Bit representation of DFmode normal quiet NaN. */
29180 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
29181
29182 /* Bit representation of DFmode normal signaling NaN. */
29183 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
29184
29185 /* Bit representation of DFmode positive infinity. */
29186 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
29187
29188 /* Bit representation of DFmode negative infinity. */
29189 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
29190
29191 if (value != RS6000_CONST_DF_NAN
29192 && value != RS6000_CONST_DF_NANS
29193 && value != RS6000_CONST_DF_INF
29194 && value != RS6000_CONST_DF_NEG_INF)
29195 {
29196 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
29197 the exponent, and 52 bits for the mantissa (not counting the hidden
29198 bit used for normal numbers). NaN values have the exponent set to all
29199 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
29200
29201 int df_exponent = (value >> 52) & 0x7ff;
29202 unsigned HOST_WIDE_INT
29203 df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);
29204
29205 if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */
29206 return 0;
29207
29208 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
29209 the exponent all 0 bits, and the mantissa non-zero. If the value is
29210 subnormal, then the hidden bit in the mantissa is not set. */
29211 if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */
29212 return 0;
29213 }
29214
29215 /* Change the representation to DFmode constant. */
29216 long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
29217
29218 /* real_from_target takes the target words in target order. */
29219 if (!BYTES_BIG_ENDIAN)
29220 std::swap (df_words[0], df_words[1]);
29221
29222 REAL_VALUE_TYPE rv_type;
29223 real_from_target (&rv_type, df_words, DFmode);
29224
29225 const REAL_VALUE_TYPE *rv = &rv_type;
29226
29227 /* Validate that the number can be stored as a SFmode value. */
29228 if (!exact_real_truncate (SFmode, rv))
29229 return 0;
29230
29231 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
29232 mantissa field is non-zero) which is undefined for the XXSPLTIDP
29233 instruction. */
29234 long sf_value;
29235 real_to_target (&sf_value, rv, SFmode);
29236
29237 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
29238 and 23 bits for the mantissa. Subnormal numbers have the exponent all
29239 0 bits, and the mantissa non-zero. */
29240 long sf_exponent = (sf_value >> 23) & 0xFF;
29241 long sf_mantissa = sf_value & 0x7FFFFF;
29242
29243 if (sf_exponent == 0 && sf_mantissa != 0)
29244 return 0;
29245
29246 /* Return the immediate to be used. */
29247 return sf_value;
29248 }
29249
29250 /* Now we have only two opaque types, they are __vector_quad and
29251 __vector_pair built-in types. They are target specific and
29252 only available when MMA is supported. With MMA supported, it
29253 simply returns true, otherwise it checks if the given gimple
29254 STMT is an assignment, asm or call stmt and uses either of
29255 these two opaque types unexpectedly, if yes, it would raise
29256 an error message and returns true, otherwise it returns false. */
29257
29258 bool
29259 rs6000_opaque_type_invalid_use_p (gimple *stmt)
29260 {
29261 if (TARGET_MMA)
29262 return false;
29263
29264 /* If the given TYPE is one MMA opaque type, emit the corresponding
29265 error messages and return true, otherwise return false. */
29266 auto check_and_error_invalid_use = [](tree type)
29267 {
29268 tree mv = TYPE_MAIN_VARIANT (type);
29269 if (mv == vector_quad_type_node)
29270 {
29271 error ("type %<__vector_quad%> requires the %qs option", "-mmma");
29272 return true;
29273 }
29274 else if (mv == vector_pair_type_node)
29275 {
29276 error ("type %<__vector_pair%> requires the %qs option", "-mmma");
29277 return true;
29278 }
29279 return false;
29280 };
29281
29282 if (stmt)
29283 {
29284 /* The usage of MMA opaque types is very limited for now,
29285 to check with gassign, gasm and gcall is enough so far. */
29286 if (gassign *ga = dyn_cast<gassign *> (stmt))
29287 {
29288 tree lhs = gimple_assign_lhs (ga);
29289 tree type = TREE_TYPE (lhs);
29290 if (check_and_error_invalid_use (type))
29291 return true;
29292 }
29293 else if (gasm *gs = dyn_cast<gasm *> (stmt))
29294 {
29295 unsigned ninputs = gimple_asm_ninputs (gs);
29296 for (unsigned i = 0; i < ninputs; i++)
29297 {
29298 tree op = gimple_asm_input_op (gs, i);
29299 tree val = TREE_VALUE (op);
29300 tree type = TREE_TYPE (val);
29301 if (check_and_error_invalid_use (type))
29302 return true;
29303 }
29304 unsigned noutputs = gimple_asm_noutputs (gs);
29305 for (unsigned i = 0; i < noutputs; i++)
29306 {
29307 tree op = gimple_asm_output_op (gs, i);
29308 tree val = TREE_VALUE (op);
29309 tree type = TREE_TYPE (val);
29310 if (check_and_error_invalid_use (type))
29311 return true;
29312 }
29313 }
29314 else if (gcall *gc = dyn_cast<gcall *> (stmt))
29315 {
29316 unsigned nargs = gimple_call_num_args (gc);
29317 for (unsigned i = 0; i < nargs; i++)
29318 {
29319 tree arg = gimple_call_arg (gc, i);
29320 tree type = TREE_TYPE (arg);
29321 if (check_and_error_invalid_use (type))
29322 return true;
29323 }
29324 }
29325 }
29326
29327 return false;
29328 }
29329
29330 struct gcc_target targetm = TARGET_INITIALIZER;
29331
29332 #include "gt-rs6000.h"
This page took 1.379669 seconds and 5 git commands to generate.